From 7b415a9fe9c0fc46efd8a2d034c2bdd0d9e0eef6 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 12:38:04 +0530
Subject: [PATCH 001/247] initial instance segmentation support

---
 evaluator.py              |  827 +++++++++++++++++++
 models/yolo.py            |   85 +-
 models/yolov5l_seg.yaml   |   48 ++
 models/yolov5m_seg.yaml   |   48 ++
 models/yolov5n_seg.yaml   |   48 ++
 models/yolov5s_seg.yaml   |   48 ++
 models/yolov5x_seg.yaml   |   48 ++
 seg_augmentations.py      |  368 +++++++++
 seg_dataloaders.py        | 1640 +++++++++++++++++++++++++++++++++++++
 train_instseg.py          |  680 +++++++++++++++
 utils/general.py          |   16 +-
 utils/loggers/__init__.py |  247 +++++-
 utils/metrics.py          |  380 ++++++---
 utils/plots.py            |  899 ++++++++++++++++++++
 utils/seg_loss.py         |  459 +++++++++++
 utils/segment.py          |  318 +++++++
 16 files changed, 6015 insertions(+), 144 deletions(-)
 create mode 100644 evaluator.py
 create mode 100644 models/yolov5l_seg.yaml
 create mode 100644 models/yolov5m_seg.yaml
 create mode 100644 models/yolov5n_seg.yaml
 create mode 100644 models/yolov5s_seg.yaml
 create mode 100644 models/yolov5x_seg.yaml
 create mode 100644 seg_augmentations.py
 create mode 100644 seg_dataloaders.py
 create mode 100644 train_instseg.py
 create mode 100644 utils/seg_loss.py
 create mode 100644 utils/segment.py

diff --git a/evaluator.py b/evaluator.py
new file mode 100644
index 000000000000..e15d090ad625
--- /dev/null
+++ b/evaluator.py
@@ -0,0 +1,827 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Validate a trained YOLOv5 model accuracy on a custom dataset
+
+Usage:
+    $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640
+"""
+
+import json
+from pathlib import Path
+from threading import Thread
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+#import pycocotools.mask as mask_util
+from tqdm import tqdm
+
+from models.experimental import attempt_load
+from seg_dataloaders import create_dataloader
+from utils.general import (
+    coco80_to_coco91_class,
+    increment_path,
+    colorstr,
+)
+from utils.general import (
+    check_dataset,
+    check_img_size,
+    check_suffix,
+)
+from utils.general import (
+    box_iou,
+    non_max_suppression,
+    scale_coords,
+    xyxy2xywh,
+    xywh2xyxy,
+)
+from utils.segment import (
+    non_max_suppression_masks,
+    mask_iou,
+    process_mask,
+    process_mask_upsample,
+    scale_masks,
+)
+from utils.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
+from utils.plots import output_to_target, plot_images_boxes_and_masks
+from utils.torch_utils import select_device, time_sync
+from PIL import Image
+
+def save_one_txt(predn, save_conf, shape, file):
+    # Save one txt result
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (
+            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
+        )  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, "a") as f:
+            f.write(("%g " * len(line)).rstrip() % line + "\n")
+
+
+def save_one_json(predn, jdict, path, class_map, pred_masks=None):
+    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+
+    if pred_masks is not None:
+        pred_masks = np.transpose(pred_masks, (2, 0, 1))
+        rles = [
+            mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0]
+            for mask in pred_masks
+        ]
+        for rle in rles:
+            rle["counts"] = rle["counts"].decode("utf-8")
+
+    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
+        pred_dict = {
+            "image_id": image_id,
+            "category_id": class_map[int(p[5])],
+            "bbox": [round(x, 3) for x in b],
+            "score": round(p[4], 5),
+        }
+        if pred_masks is not None:
+            pred_dict["segmentation"] = rles[i]
+        jdict.append(pred_dict)
+
+
+@torch.no_grad()
+class Yolov5Evaluator:
+    def __init__(
+        self,
+        data,
+        conf_thres=0.001,
+        iou_thres=0.6,
+        device="",
+        single_cls=False,
+        augment=False,
+        verbose=False,
+        project="runs/val",
+        name="exp",
+        exist_ok=False,
+        half=True,
+        save_dir=Path(""),
+        nosave=False,
+        plots=True,
+        mask=False,
+        mask_downsample_ratio=1,
+    ) -> None:
+        self.data = check_dataset(data)  # check
+        self.conf_thres = conf_thres  # confidence threshold
+        self.iou_thres = iou_thres  # NMS IoU threshold
+        self.device = device  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        self.single_cls = single_cls  # treat as single-class dataset
+        self.augment = augment  # augmented inference
+        self.verbose = verbose  # verbose output
+        self.project = project  # save to project/name
+        self.name = name  # save to project/name
+        self.exist_ok = exist_ok  # existing project/name ok, do not increment
+        self.half = half  # use FP16 half-precision inference
+        self.save_dir = save_dir
+        self.nosave = nosave
+        self.plots = plots
+        self.mask = mask
+        self.mask_downsample_ratio = mask_downsample_ratio
+
+        self.nc = 1 if self.single_cls else int(self.data["nc"])  # number of classes
+        self.iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
+        self.niou = self.iouv.numel()
+        self.confusion_matrix = ConfusionMatrix(nc=self.nc)
+        self.dt = [0.0, 0.0, 0.0]
+        self.names = {k: v for k, v in enumerate(self.data["names"])}
+        self.s = (
+            ("%20s" + "%11s" * 10)
+            % (
+                "Class",
+                "Images",
+                "Labels",
+                "Box:{P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95}",
+                "Mask:{P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95}",
+            )
+            if self.mask
+            else ("%20s" + "%11s" * 6)
+            % (
+                "Class",
+                "Images",
+                "Labels",
+                "P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95",
+            )
+        )
+
+        # coco stuff
+        self.is_coco = isinstance(self.data.get("val"), str) and self.data[
+            "val"
+        ].endswith(
+            "coco/val2017.txt"
+        )  # COCO dataset
+        self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
+        self.jdict = []
+        self.iou_thres = 0.65 if self.is_coco else self.iou_thres
+
+        # masks stuff
+        self.pred_masks = []  # for mask visualization
+
+        # metric stuff
+        self.seen = 0
+        self.stats = []
+        self.total_loss = torch.zeros((4 if self.mask else 3))
+        self.metric = Metrics() if self.mask else Metric()
+
+    def run_training(self, model, dataloader, compute_loss=None):
+        """This is for evaluation when training."""
+        self.seen = 0
+        self.device = next(model.parameters()).device  # get model device
+        # self.iouv.to(self.device)
+        self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device)
+        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if self.half else model.float()
+        # Configure
+        model.eval()
+
+        # inference
+        # masks will be `None` if training objection.
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
+            tqdm(dataloader, desc=self.s)
+        ):
+            # reset pred_masks
+            self.pred_masks = []
+            img = img.to(self.device, non_blocking=True)
+            targets = targets.to(self.device)
+            if masks is not None:
+                masks = masks.to(self.device)
+            out, train_out = self.inference(model, img, targets, masks, compute_loss)
+
+            # Statistics per image
+            for si, pred in enumerate(out):
+                self.seen += 1
+
+                # eval in every image level
+                labels = targets[targets[:, 0] == si, 1:]
+                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+
+                # get predition masks
+                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
+                pred_maski = self.get_predmasks(
+                    pred,
+                    proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None,
+                )
+
+                # for visualization
+                if self.plots and batch_i < 3 and pred_maski is not None:
+                    self.pred_masks.append(pred_maski.cpu())
+
+                # NOTE: eval in training image-size space
+                self.compute_stat(pred, pred_maski, labels, gt_masksi)
+
+            if batch_i < 3:
+                self.plot_images(batch_i, img, targets, masks, out, paths)
+
+        # compute map and print it.
+        t = self.after_infer()
+
+        # Return results
+        model.float()  # for training
+        return (
+            (
+                *self.metric.mean_results(),
+                *(self.total_loss.cpu() / len(dataloader)).tolist(),
+            ),
+            self.metric.get_maps(self.nc),
+            t,
+        )
+
+    def run(
+        self,
+        weights,
+        batch_size,
+        imgsz,
+        save_txt=False,
+        save_conf=False,
+        save_json=False,
+        task="val",
+    ):
+        """This is for native evaluation."""
+        model, dataloader, imgsz = self.before_infer(
+            weights, batch_size, imgsz, save_txt, task
+        )
+        self.seen = 0
+        # self.iouv.to(self.device)
+        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if self.half else model.float()
+        # Configure
+        model.eval()
+
+        # inference
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
+            tqdm(dataloader, desc=self.s)
+        ):
+            # reset pred_masks
+            self.pred_masks = []
+            img = img.to(self.device, non_blocking=True)
+            targets = targets.to(self.device)
+            if masks is not None:
+                masks = masks.to(self.device)
+            out, train_out = self.inference(model, img, targets, masks)
+
+            # Statistics per image
+            for si, pred in enumerate(out):
+                self.seen += 1
+                path = Path(paths[si])
+                shape = shapes[si][0]
+                ratio_pad = shapes[si][1]
+
+                # eval in every image level
+                labels = targets[targets[:, 0] == si, 1:]
+                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+
+                # get predition masks
+                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
+                pred_maski = self.get_predmasks(
+                    pred,
+                    proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None,
+                )
+
+                # for visualization
+                if self.plots and batch_i < 3 and pred_maski is not None:
+                    self.pred_masks.append(pred_maski.cpu())
+
+                # NOTE: eval in training image-size space
+                self.compute_stat(pred, pred_maski, labels, gt_masksi)
+
+                # no preditions, not save anything
+                if len(pred) == 0:
+                    continue
+
+                if save_txt or save_json:
+                    # clone() is for plot_images work correctly
+                    predn = pred.clone()
+                    # 因为test时添加了0.5的padding，因此这里与数据加载的padding不一致，所以需要转入ratio_pad
+                    scale_coords(
+                        img[si].shape[1:], predn[:, :4], shape, ratio_pad
+                    )  # native-space pred
+                
+                # Save/log
+                if save_txt and self.save_dir.exists():
+                    # NOTE: convert coords to native space when save txt.
+                    # support save box preditions only
+                    save_one_txt(
+                        predn,
+                        save_conf,
+                        shape,
+                        file=self.save_dir / "labels" / (path.stem + ".txt"),
+                    )
+                if save_json and self.save_dir.exists():
+                    # NOTE: convert coords to native space when save json.
+                    # if pred_maski is not None:
+                    # h, w, n
+                    pred_maski = scale_masks(
+                        img[si].shape[1:],
+                        pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
+                        shape,
+                        ratio_pad,
+                    )
+                    save_one_json(
+                        predn,
+                        self.jdict,
+                        path,
+                        self.class_map,
+                        pred_maski,
+                    )  # append to COCO-JSON dictionary
+
+            if self.plots and batch_i < 3:
+                self.plot_images(batch_i, img, targets, masks, out, paths)
+
+        # compute map and print it.
+        t = self.after_infer()
+
+        # save json
+        if self.save_dir.exists() and save_json:
+            pred_json = str(self.save_dir / f"predictions.json")  # predictions json
+            print(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
+            with open(pred_json, "w") as f:
+                json.dump(self.jdict, f)
+
+        # Print speeds
+        shape = (batch_size, 3, imgsz, imgsz)
+        print(
+            f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}"
+            % t
+        )
+
+        s = (
+            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}"
+            if save_txt and self.save_dir.exists()
+            else ""
+        )
+        print(
+            f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}"
+        )
+
+        # Return results
+        return (
+            (
+                *self.metric.mean_results(),
+                *(self.total_loss.cpu() / len(dataloader)).tolist(),
+            ),
+            self.metric.get_maps(self.nc),
+            t,
+        )
+
+    def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
+        "prepare for evaluation without training."
+        self.device = select_device(self.device, batch_size=batch_size)
+
+        # Directories
+        self.save_dir = increment_path(
+            Path(self.project) / self.name, exist_ok=self.exist_ok
+        )  # increment run
+        if not self.nosave:
+            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(
+                parents=True, exist_ok=True
+            )  # make dir
+
+        # Load model
+        check_suffix(weights, ".pt")
+        model = attempt_load(weights, map_location=self.device)  # load FP32 model
+        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+        imgsz = check_img_size(imgsz, s=gs)  # check image size
+
+        # Data
+        if self.device.type != "cpu":
+            model(
+                torch.zeros(1, 3, imgsz, imgsz)
+                .to(self.device)
+                .type_as(next(model.parameters()))
+            )  # run once
+        pad = 0.0 if task == "speed" else 0.5
+        task = (
+            task if task in ("train", "val", "test") else "val"
+        )  # path to train/val/test images
+        dataloader = create_dataloader(
+            self.data[task],
+            imgsz,
+            batch_size,
+            gs,
+            self.single_cls,
+            pad=pad,
+            rect=True,
+            prefix=colorstr(f"{task}: "),
+            mask_head=self.mask,
+            mask_downsample_ratio=self.mask_downsample_ratio,
+        )[0]
+        return model, dataloader, imgsz
+
+    def inference(self, model, img, targets, masks=None, compute_loss=None):
+        """Inference"""
+        t1 = time_sync()
+        img = img.half() if self.half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        _, _, height, width = img.shape  # batch size, channels, height, width
+        t2 = time_sync()
+        self.dt[0] += t2 - t1
+
+        # Run model
+        out, train_out = model(
+            img, augment=self.augment
+        )  # inference and training outputs
+        self.dt[1] += time_sync() - t2
+
+        # Compute loss
+        if compute_loss:
+            self.total_loss += compute_loss(train_out, targets, masks)[
+                1
+            ]  # box, obj, cls
+
+        # Run NMS
+        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(
+            self.device
+        )  # to pixels
+        t3 = time_sync()
+        out = self.nms(
+            prediction=out,
+            conf_thres=self.conf_thres,
+            iou_thres=self.iou_thres,
+            multi_label=True,
+            agnostic=self.single_cls,
+        )
+        self.dt[2] += time_sync() - t3
+        return out, train_out
+
+    def after_infer(self):
+        """Do something after inference, such as plots and get metrics.
+        Return:
+            t(tuple): speeds of per image.
+        """
+        # Plot confusion matrix
+        if self.plots and self.save_dir.exists():
+            self.confusion_matrix.plot(
+                save_dir=self.save_dir, names=list(self.names.values())
+            )
+
+        # Compute statistics
+        stats = [np.concatenate(x, 0) for x in zip(*self.stats)]  # to numpy
+        box_or_mask_any = stats[0].any() or stats[1].any()
+        stats = stats[1:] if not self.mask else stats
+        if len(stats) and box_or_mask_any:
+            results = self.ap_per_class(
+                *stats,
+                self.plots,
+                self.save_dir if self.save_dir.exists() else None,
+                self.names,
+            )
+            self.metric.update(results)
+            nt = np.bincount(
+                stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc
+            )  # number of targets per class
+        else:
+            nt = torch.zeros(1)
+
+        # make this empty, cause make `stats` self is for reduce some duplicated codes.
+        self.stats = []
+        # print information
+        self.print_metric(nt, stats)
+        t = tuple(x / self.seen * 1e3 for x in self.dt)  # speeds per image
+        return t
+
+    def process_batch(self, detections, labels, iouv):
+        """
+        Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+        Arguments:
+            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+            labels (Array[M, 5]), class, x1, y1, x2, y2
+        Returns:
+            correct (Array[N, 10]), for 10 IoU levels
+        """
+        correct = torch.zeros(
+            detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device
+        )
+        iou = box_iou(labels[:, 1:], detections[:, :4])
+        x = torch.where(
+            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])
+        )  # IoU above threshold and classes match
+        if x[0].shape[0]:
+            matches = (
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )  # [label, detection, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            matches = torch.Tensor(matches).to(iouv.device)
+            correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
+        return correct
+
+    def get_predmasks(self, pred, proto_out, gt_shape):
+        """Get pred masks in different ways.
+        1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size)
+            mask for saving cuda memory.
+        2. process_mask_upsample, for val after training to get high quality mask(original size).
+
+        Args:
+            pred(torch.Tensor): output of network, (N, 5 + mask_dim + class).
+            proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w).
+            gt_shape(tuple): shape of gt mask, this shape may not equal to input size of
+                input image, Cause the mask_downsample_ratio.
+        Return:
+            pred_mask(torch.Tensor): predition of final masks with the same size with
+                input image, (N, input_h, input_w).
+        """
+        if proto_out is None or len(pred) == 0:
+            return None
+        process = process_mask_upsample if self.plots else process_mask
+        gt_shape = (
+            gt_shape[0] * self.mask_downsample_ratio,
+            gt_shape[1] * self.mask_downsample_ratio,
+        )
+        # n, h, w
+        pred_mask = (
+            process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape)
+            .permute(2, 0, 1)
+            .contiguous()
+        )
+        return pred_mask
+
+    def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
+        assert not (
+            (pred_maski is None) ^ (gt_masksi is None)
+        ), "`proto_out` and `gt_masksi` should be both None or both exist."
+        if pred_maski is None and gt_masksi is None:
+            return torch.zeros(0, self.niou, dtype=torch.bool)
+
+        correct = torch.zeros(
+            predn.shape[0],
+            self.iouv.shape[0],
+            dtype=torch.bool,
+            device=self.iouv.device,
+        )
+
+        if not self.plots:
+            gt_masksi = F.interpolate(
+                gt_masksi.unsqueeze(0),
+                pred_maski.shape[1:],
+                mode="bilinear",
+                align_corners=False,
+            ).squeeze(0)
+
+        iou = mask_iou(
+            gt_masksi.view(gt_masksi.shape[0], -1),
+            pred_maski.view(pred_maski.shape[0], -1),
+        )
+        x = torch.where(
+            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])
+        )  # IoU above threshold and classes match
+        if x[0].shape[0]:
+            matches = (
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )  # [label, detection, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            matches = torch.Tensor(matches).to(self.iouv.device)
+            correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv
+        return correct
+
+    def compute_stat(self, predn, pred_maski, labels, gt_maski):
+        """Compute states about ious. with boxs size in training img-size space."""
+        nl = len(labels)
+        tcls = labels[:, 0].tolist() if nl else []  # target class
+
+        if len(predn) == 0:
+            if nl:
+                self.stats.append(
+                    (
+                        torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
+                        torch.zeros(0, self.niou, dtype=torch.bool),  # masks
+                        torch.Tensor(),
+                        torch.Tensor(),
+                        tcls,
+                    )
+                )
+            return
+
+        # Predictions
+        if self.single_cls:
+            predn[:, 5] = 0
+
+        # Evaluate
+        if nl:
+            tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+            labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+            # boxes
+            correct_boxes = self.process_batch(predn, labelsn, self.iouv)
+
+            # masks
+            correct_masks = self.process_batch_masks(
+                predn, pred_maski, gt_maski, labelsn
+            )
+
+            if self.plots:
+                self.confusion_matrix.process_batch(predn, labelsn)
+        else:
+            correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
+            correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
+        self.stats.append(
+            (
+                correct_masks.cpu(),
+                correct_boxes.cpu(),
+                predn[:, 4].cpu(),
+                predn[:, 5].cpu(),
+                tcls,
+            )
+        )  # (correct, conf, pcls, tcls)
+
+    def print_metric(self, nt, stats):
+        # Print results
+        pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4)
+        print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results()))
+
+        # Print results per class
+        # TODO: self.seen support verbose.
+        if self.verbose and self.nc > 1 and len(stats):
+            for i, c in enumerate(self.metric.ap_class_index):
+                print(
+                    pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))
+                )
+
+    def plot_images(self, i, img, targets, masks, out, paths):
+        if not self.save_dir.exists():
+            return
+        # plot ground truth
+        f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
+
+        Thread(
+            target=plot_images_boxes_and_masks,
+            args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
+            daemon=True,
+        ).start()
+        f = self.save_dir / f"val_batch{i}_pred.jpg"  # predictions
+
+        # plot predition
+        if len(self.pred_masks):
+            pred_masks = (
+                torch.cat(self.pred_masks, dim=0)
+                if len(self.pred_masks) > 1
+                else self.pred_masks[0]
+            )
+        else:
+            pred_masks = None
+        Thread(
+            target=plot_images_boxes_and_masks,
+            args=(
+                img,
+                output_to_target(out),
+                pred_masks,
+                paths,
+                f,
+                self.names,
+                max(img.shape[2:]),
+            ),
+            daemon=True,
+        ).start()
+        import wandb
+        if wandb.run:
+            res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:]))
+            res = Image.fromarray(res)
+            wandb.log({f"pred_{i}":wandb.Image(res)})
+
+    def nms(self, **kwargs):
+        return (
+            non_max_suppression_masks(**kwargs)
+            if self.mask
+            else non_max_suppression(**kwargs)
+        )
+
+    def ap_per_class(self, *args):
+        return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args)
+
+
+class Metric:
+    def __init__(self) -> None:
+        self.p = []  # (nc, )
+        self.r = []  # (nc, )
+        self.f1 = []  # (nc, )
+        self.all_ap = []  # (nc, 10)
+        self.ap_class_index = []  # (nc, )
+
+    @property
+    def ap50(self):
+        """AP@0.5 of all classes.
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap[:, 0] if len(self.all_ap) else []
+
+    @property
+    def ap(self):
+        """AP@0.5:0.95
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap.mean(1) if len(self.all_ap) else []
+
+    @property
+    def mp(self):
+        """mean precision of all classes.
+        Return:
+            float.
+        """
+        return self.p.mean() if len(self.p) else 0.0
+
+    @property
+    def mr(self):
+        """mean recall of all classes.
+        Return:
+            float.
+        """
+        return self.r.mean() if len(self.r) else 0.0
+
+    @property
+    def map50(self):
+        """Mean AP@0.5 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
+
+    @property
+    def map(self):
+        """Mean AP@0.5:0.95 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap.mean() if len(self.all_ap) else 0.0
+
+    def mean_results(self):
+        """Mean of results, return mp, mr, map50, map"""
+        return (self.mp, self.mr, self.map50, self.map)
+
+    def class_result(self, i):
+        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
+        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
+
+    def get_maps(self, nc):
+        maps = np.zeros(nc) + self.map
+        for i, c in enumerate(self.ap_class_index):
+            maps[c] = self.ap[i]
+        return maps
+
+    def update(self, results):
+        """
+        Args:
+            results: tuple(p, r, ap, f1, ap_class)
+        """
+        p, r, all_ap, f1, ap_class_index = results
+        self.p = p
+        self.r = r
+        self.all_ap = all_ap
+        self.f1 = f1
+        self.ap_class_index = ap_class_index
+
+
+class Metrics:
+    """Metric for boxes and masks."""
+
+    def __init__(self) -> None:
+        self.metric_box = Metric()
+        self.metric_mask = Metric()
+
+    def update(self, results):
+        """
+        Args:
+            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
+        """
+        self.metric_box.update(list(results["boxes"].values()))
+        self.metric_mask.update(list(results["masks"].values()))
+
+    def mean_results(self):
+        return self.metric_box.mean_results() + self.metric_mask.mean_results()
+
+    def class_result(self, i):
+        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
+
+    def get_maps(self, nc):
+        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
+
+    @property
+    def ap_class_index(self):
+        # boxes and masks have the same ap_class_index
+        return self.metric_box.ap_class_index
diff --git a/models/yolo.py b/models/yolo.py
index 02660e6c4130..e6860a9d7435 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -88,6 +88,64 @@ def _make_grid(self, nx=20, ny=20, i=0):
         anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
         return grid, anchor_grid
 
+class DetectSegment(Detect):
+    def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inplace=True):
+        super().__init__(nc, anchors, ch, inplace)
+        self.mask_dim = mask_dim
+        self.no = nc + 5 + self.mask_dim  # number of outputs per anchor
+        self.nm = 5 + self.mask_dim
+        self.proto_c = proto_channel
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1)
+                               for x in ch)  # output conv
+
+        # p3作为输入
+        self.proto_net = nn.Sequential(
+            nn.Conv2d(ch[0], self.proto_c, kernel_size=3, stride=1, padding=1),
+            nn.SiLU(inplace=True),
+            # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
+            # nn.SiLU(inplace=True),
+            # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
+            # nn.SiLU(inplace=True), 
+            # nn.Upsample(scale_factor=2, mode='nearest'),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
+            nn.SiLU(inplace=True),
+            nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0),
+            nn.SiLU(inplace=True))
+
+    def forward(self, x):
+        z = []  # inference output
+        for i in range(self.nl):
+            if i == 0:
+                proto_out = self.proto_net(x[i])
+
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+
+            if not self.training:  # inference
+                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
+                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
+
+                y = x[i].clone()
+                y[..., 0:5] = y[..., 0:5].sigmoid()
+                y[..., self.nm:] = y[..., self.nm:].sigmoid()
+                if self.inplace:
+                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
+                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy.type_as(y), wh.type_as(y), y[..., 4:]), -1)
+                z.append(y.view(-1, self.na * ny * nx, self.no))
+
+        # TODO: export
+        if torch.onnx.is_in_onnx_export():
+            output = torch.cat(z, 1)
+            return output  # keep the same type with x
+        else:
+            return (x, proto_out) if self.training else (torch.cat(z, 1), (x, proto_out))
+
 
 class Model(nn.Module):
     # YOLOv5 model
@@ -115,7 +173,15 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, i
 
         # Build strides, anchors
         m = self.model[-1]  # Detect()
-        if isinstance(m, Detect):
+        if isinstance(m, DetectSegment):
+            s = 256  # 2x min stride
+            m.inplace = self.inplace
+            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]])  # forward
+            m.anchors /= m.stride.view(-1, 1, 1)
+            check_anchor_order(m)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+        elif isinstance(m, Detect):
             s = 256  # 2x min stride
             m.inplace = self.inplace
             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
@@ -207,9 +273,9 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
         m = self.model[-1]  # Detect() module
         for mi, s in zip(m.m, m.stride):  # from
-            b = mi.bias.view(m.na, -1).detach()  # conv.bias(255) to (3,85)
-            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
     def _print_biases(self):
@@ -234,6 +300,12 @@ def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
         self.info()
         return self
 
+    def autoshape(self):  # add AutoShape module
+        LOGGER.info('Adding AutoShape... ')
+        m = AutoShape(self)  # wrap model
+        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
+        return m
+
     def info(self, verbose=False, img_size=640):  # print model information
         model_info(self, verbose, img_size)
 
@@ -279,10 +351,13 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
             args = [ch[f]]
         elif m is Concat:
             c2 = sum(ch[x] for x in f)
-        elif m is Detect:
+        # TODO: channel, gw, gd
+        elif m in [Detect, DetectSegment]:
             args.append([ch[x] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is DetectSegment:
+                args[3] = make_divisible(args[3] * gw, 8)
         elif m is Contract:
             c2 = ch[f] * args[0] ** 2
         elif m is Expand:
diff --git a/models/yolov5l_seg.yaml b/models/yolov5l_seg.yaml
new file mode 100644
index 000000000000..98fbe51addfe
--- /dev/null
+++ b/models/yolov5l_seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/yolov5m_seg.yaml b/models/yolov5m_seg.yaml
new file mode 100644
index 000000000000..6b19539786b2
--- /dev/null
+++ b/models/yolov5m_seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/yolov5n_seg.yaml b/models/yolov5n_seg.yaml
new file mode 100644
index 000000000000..40a0409aac46
--- /dev/null
+++ b/models/yolov5n_seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/yolov5s_seg.yaml b/models/yolov5s_seg.yaml
new file mode 100644
index 000000000000..cb71f5853de6
--- /dev/null
+++ b/models/yolov5s_seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters 1767976
+nc: 3  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.5  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
\ No newline at end of file
diff --git a/models/yolov5x_seg.yaml b/models/yolov5x_seg.yaml
new file mode 100644
index 000000000000..e1f91c584dca
--- /dev/null
+++ b/models/yolov5x_seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/seg_augmentations.py b/seg_augmentations.py
new file mode 100644
index 000000000000..63055f640390
--- /dev/null
+++ b/seg_augmentations.py
@@ -0,0 +1,368 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Image augmentation functions
+"""
+
+import logging
+import math
+import random
+
+import cv2
+import numpy as np
+
+from utils.general import colorstr, check_version
+from utils.segment import segment2box, resample_segments
+from utils.metrics import bbox_ioa
+
+
+class Albumentations:
+    # YOLOv5 Albumentations class (optional, only used if package is installed)
+    def __init__(self):
+        self.transform = None
+        try:
+            import albumentations as A
+
+            check_version(A.__version__, "1.0.3")  # version requirement
+
+            self.transform = A.Compose(
+                [
+                    A.Blur(p=0.01),
+                    A.MedianBlur(p=0.01),
+                    A.ToGray(p=0.01),
+                    A.CLAHE(p=0.01),
+                    A.RandomBrightnessContrast(p=0.0),
+                    A.RandomGamma(p=0.0),
+                    A.ImageCompression(quality_lower=75, p=0.0),
+                ],
+                bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]),
+            )
+
+            logging.info(
+                colorstr("albumentations: ")
+                + ", ".join(f"{x}" for x in self.transform.transforms if x.p)
+            )
+        except ImportError:  # package not installed, skip
+            pass
+        except Exception as e:
+            logging.info(colorstr("albumentations: ") + f"{e}")
+
+    def __call__(self, im, labels, p=1.0):
+        if self.transform and random.random() < p:
+            new = self.transform(
+                image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]
+            )  # transformed
+            im, labels = new["image"], np.array(
+                [[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])]
+            )
+        return im, labels
+
+
+def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
+    # HSV color-space augmentation
+    if hgain or sgain or vgain:
+        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+        hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
+        dtype = im.dtype  # uint8
+
+        x = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+        im_hsv = cv2.merge(
+            (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))
+        )
+        cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
+
+
+def hist_equalize(im, clahe=True, bgr=False):
+    # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
+    yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
+    if clahe:
+        c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        yuv[:, :, 0] = c.apply(yuv[:, :, 0])
+    else:
+        yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogram
+    return cv2.cvtColor(
+        yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB
+    )  # convert YUV image to RGB
+
+
+def replicate(im, labels):
+    # Replicate labels
+    h, w = im.shape[:2]
+    boxes = labels[:, 1:].astype(int)
+    x1, y1, x2, y2 = boxes.T
+    s = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)
+    for i in s.argsort()[: round(s.size * 0.5)]:  # smallest indices
+        x1b, y1b, x2b, y2b = boxes[i]
+        bh, bw = y2b - y1b, x2b - x1b
+        yc, xc = int(random.uniform(0, h - bh)), int(
+            random.uniform(0, w - bw)
+        )  # offset x, y
+        x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
+        im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]  # im4[ymin:ymax, xmin:xmax]
+        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
+
+    return im, labels
+
+
+def letterbox(
+    im,
+    new_shape=(640, 640),
+    color=(114, 114, 114),
+    auto=True,
+    scaleFill=False,
+    scaleup=True,
+    stride=32,
+    center=True,  # center padding or left top padding
+):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    if center:
+        dw /= 2  # divide padding into 2 sides
+        dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)) if center else 0, int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)) if center else 0, int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(
+        im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
+    )  # add border
+    return im, ratio, (dw, dh)
+
+
+def random_perspective(
+    im,
+    targets=(),
+    segments=(),
+    degrees=10,
+    translate=0.1,
+    scale=0.1,
+    shear=10,
+    perspective=0.0,
+    border=(0, 0),
+    area_thr=0.2,
+    return_seg=False,
+):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # targets = [cls, xyxy]
+
+    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
+    width = im.shape[1] + border[1] * 2
+
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
+
+    # Perspective
+    P = np.eye(3)
+    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (
+        random.uniform(0.5 - translate, 0.5 + translate) * width
+    )  # x translation (pixels)
+    T[1, 2] = (
+        random.uniform(0.5 - translate, 0.5 + translate) * height
+    )  # y translation (pixels)
+
+    # Combined rotation matrix
+    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+        if perspective:
+            im = cv2.warpPerspective(
+                im, M, dsize=(width, height), borderValue=(114, 114, 114)
+            )
+        else:  # affine
+            im = cv2.warpAffine(
+                im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)
+            )
+
+    # Visualize
+    # import matplotlib.pyplot as plt
+    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+    # ax[0].imshow(im[:, :, ::-1])  # base
+    # ax[1].imshow(im2[:, :, ::-1])  # warped
+
+    # Transform label coordinates
+    n = len(targets)
+    new_segments = []
+    if n:
+        use_segments = any(x.any() for x in segments)
+        new = np.zeros((n, 4))
+        if use_segments:  # warp segments
+            segments = resample_segments(segments)  # upsample
+            for i, segment in enumerate(segments):
+                xy = np.ones((len(segment), 3))
+                xy[:, :2] = segment
+                xy = xy @ M.T  # transform
+                xy = (
+                    xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]
+                )  # perspective rescale or affine
+
+                # clip
+                new[i] = segment2box(xy, width, height)
+                new_segments.append(xy)
+
+        else:  # warp boxes
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(
+                n * 4, 2
+            )  # x1y1, x2y2, x1y2, x2y1
+            xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(
+                n, 8
+            )  # perspective rescale or affine
+
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            new = (
+                np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+            )
+
+            # clip
+            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
+            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
+
+        # filter candidates
+        i = box_candidates(
+            box1=targets[:, 1:5].T * s,
+            box2=new.T,
+            cls=targets[:, 0],
+            # area_thr=0.01 if use_segments else 0.10,
+            area_thr=area_thr,
+        )
+        targets = targets[i]
+        targets[:, 1:5] = new[i]
+        new_segments = (
+            np.array(new_segments)[i] if len(new_segments) else np.array(new_segments)
+        )
+
+    return (im, targets, new_segments) if return_seg else (im, targets)
+
+
+def copy_paste(im, labels, segments, p=0.5):
+    # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+    n = len(segments)
+    if p and n:
+        h, w, c = im.shape  # height, width, channels
+        im_new = np.zeros(im.shape, np.uint8)
+        for j in random.sample(range(n), k=round(p * n)):
+            l, s = labels[j], segments[j]
+            box = w - l[3], l[2], w - l[1], l[4]
+            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+            if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
+                labels = np.concatenate((labels, [[l[0], *box]]), 0)
+                segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
+                cv2.drawContours(
+                    im_new,
+                    [segments[j].astype(np.int32)],
+                    -1,
+                    (255, 255, 255),
+                    cv2.FILLED,
+                )
+
+        result = cv2.bitwise_and(src1=im, src2=im_new)
+        result = cv2.flip(result, 1)  # augment segments (flip left-right)
+        i = result > 0  # pixels to replace
+        # i[:, :] = result.max(2).reshape(h, w, 1)  # act over ch
+        im[i] = result[i]  # cv2.imwrite('debug.jpg', im)  # debug
+
+    return im, labels, segments
+
+
+def cutout(im, labels, p=0.5):
+    # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
+    if random.random() < p:
+        h, w = im.shape[:2]
+        scales = (
+            [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16
+        )  # image size fraction
+        for s in scales:
+            mask_h = random.randint(1, int(h * s))  # create random masks
+            mask_w = random.randint(1, int(w * s))
+
+            # box
+            xmin = max(0, random.randint(0, w) - mask_w // 2)
+            ymin = max(0, random.randint(0, h) - mask_h // 2)
+            xmax = min(w, xmin + mask_w)
+            ymax = min(h, ymin + mask_h)
+
+            # apply random color mask
+            im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
+
+            # return unobscured labels
+            if len(labels) and s > 0.03:
+                box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+                ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+                labels = labels[ioa < 0.60]  # remove >60% obscured labels
+
+    return labels
+
+
+def mixup(im, labels, im2, labels2):
+    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
+    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+    im = (im * r + im2 * (1 - r)).astype(np.uint8)
+    labels = np.concatenate((labels, labels2), 0)
+    return im, labels
+
+
+def box_candidates(
+    box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16
+):  # box1(4,n), box2(4,n)
+    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+    area_thr = (
+        np.array(area_thr)[cls.astype(np.int)]
+        if isinstance(area_thr, list)
+        else area_thr
+    )
+    if isinstance(area_thr, list) and len(area_thr) == 1:
+        area_thr = area_thr[0]
+    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
+    return (
+        (w2 > wh_thr)
+        & (h2 > wh_thr)
+        & (w2 * h2 / (w1 * h1 + eps) > area_thr)
+        & (ar < ar_thr)
+    )  # candidates
\ No newline at end of file
diff --git a/seg_dataloaders.py b/seg_dataloaders.py
new file mode 100644
index 000000000000..31fb0a1872ba
--- /dev/null
+++ b/seg_dataloaders.py
@@ -0,0 +1,1640 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Dataloaders
+"""
+
+import glob
+import logging
+import os
+import time
+import json
+import yaml
+import random
+from itertools import repeat
+from multiprocessing.pool import ThreadPool, Pool
+from PIL import Image
+from pathlib import Path
+from functools import wraps
+from zipfile import ZipFile
+
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.utils.data import distributed
+from torch.utils.data import Dataset as torchDataset
+from torch.utils.data.sampler import RandomSampler
+from tqdm import tqdm
+
+
+from seg_augmentations import (
+    Albumentations,
+    augment_hsv,
+    copy_paste,
+    letterbox,
+    mixup,
+    random_perspective,
+)
+from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy
+from utils.torch_utils import torch_distributed_zero_first
+
+
+from torch.utils.data.sampler import BatchSampler as torchBatchSampler
+from torch.utils.data.sampler import Sampler
+
+class _RepeatSampler:
+    """ Sampler that repeats forever
+
+    Args:
+        sampler (Sampler)
+    """
+
+    def __init__(self, sampler):
+        self.sampler = sampler
+
+    def __iter__(self):
+        while True:
+            yield from iter(self.sampler)
+class YoloBatchSampler(torchBatchSampler):
+    """
+    This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.
+    It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
+    but it will turn on/off the mosaic aug.
+    """
+
+    def __init__(self, *args, augment=True, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.augment = augment
+
+    def __iter__(self):
+        for batch in super().__iter__():
+            yield [(self.augment, idx) for idx in batch]
+
+def create_dataloader_ori(
+    path,
+    imgsz,
+    batch_size,
+    stride,
+    single_cls=False,
+    hyp=None,
+    augment=False,
+    cache=False,
+    pad=0.0,
+    rect=False,
+    rank=-1,
+    workers=8,
+    image_weights=False,
+    quad=False,
+    prefix="",
+    shuffle=False,
+    neg_dir="",
+    bg_dir="",
+    area_thr=0.2,
+    mask_head=False,
+    mask_downsample_ratio=1,
+):
+    if rect and shuffle:
+        print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
+        shuffle = False
+    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
+    data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels
+    with torch_distributed_zero_first(rank):
+        dataset = data_load(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augment images
+            hyp=hyp,  # augmentation hyperparameters
+            rect=rect,  # rectangular training
+            cache_images=cache,
+            single_cls=single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix,
+            neg_dir=neg_dir,
+            bg_dir=bg_dir,
+            area_thr=area_thr,
+        )
+        if mask_head:
+            dataset.downsample_ratio = mask_downsample_ratio
+
+    batch_size = min(batch_size, len(dataset))
+    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers
+    sampler = distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else None
+    loader = DataLoader if image_weights else InfiniteDataLoader
+    # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
+    dataloader = loader(
+        dataset,
+        batch_size=batch_size,
+        num_workers=nw,
+        shuffle=shuffle and sampler is None,
+        sampler=sampler,
+        pin_memory=True,
+        collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn,
+    )
+    return dataloader, dataset
+
+
+def create_dataloader(
+    path,
+    imgsz,
+    batch_size,
+    stride,
+    single_cls=False,
+    hyp=None,
+    augment=False,
+    cache=False,
+    pad=0.0,
+    rect=False,
+    rank=-1,
+    workers=8,
+    image_weights=False,
+    quad=False,
+    prefix="",
+    shuffle=False,
+    neg_dir="",
+    bg_dir="",
+    area_thr=0.2,
+    mask_head=False,
+    mask_downsample_ratio=1,
+):
+    if rect and shuffle:
+        print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
+        shuffle = False
+    data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels
+    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
+    with torch_distributed_zero_first(rank):
+        dataset = data_load(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augment images
+            hyp=hyp,  # augmentation hyperparameters
+            rect=rect,  # rectangular training
+            cache_images=cache,
+            single_cls=single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix,
+            neg_dir=neg_dir,
+            bg_dir=bg_dir,
+            area_thr=area_thr,
+        )
+        if mask_head:
+            dataset.downsample_ratio = mask_downsample_ratio
+
+    batch_size = min(batch_size, len(dataset))
+    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers
+    # sampler = InfiniteSampler(len(dataset), seed=0)
+    sampler = (
+        distributed.DistributedSampler(dataset, shuffle=shuffle)
+        if rank != -1
+        else RandomSampler(dataset)
+    )
+
+    batch_sampler = (
+        YoloBatchSampler(
+            sampler=sampler,
+            batch_size=batch_size,
+            drop_last=False,
+            augment=augment,
+        )
+        if not rect
+        else None
+    )
+    dataloader = DataLoader(
+        dataset,
+        num_workers=nw,
+        batch_size=1
+        if batch_sampler is not None
+        else batch_size,  # batch-size and batch-sampler is exclusion
+        batch_sampler=batch_sampler,
+        pin_memory=True,
+        collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn,
+        # Make sure each process has different random seed, especially for 'fork' method.
+        # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
+        # but this will make init_seed() not work.
+        # worker_init_fn=worker_init_reset_seed,
+    )
+    return dataloader, dataset
+
+
+class Dataset(torchDataset):
+    """This class is a subclass of the base :class:`torch.utils.data.Dataset`,
+    that enables on the fly resizing of the ``input_dim``.
+
+    Args:
+        input_dimension (tuple): (width,height) tuple with default dimensions of the network
+    """
+
+    def __init__(self, augment=True):
+        super().__init__()
+        self.augment = augment
+
+    @staticmethod
+    def mosaic_getitem(getitem_fn):
+        """
+        Decorator method that needs to be used around the ``__getitem__`` method. |br|
+        This decorator enables the closing mosaic
+
+        Example:
+            >>> class CustomSet(ln.data.Dataset):
+            ...     def __len__(self):
+            ...         return 10
+            ...     @ln.data.Dataset.mosaic_getitem
+            ...     def __getitem__(self, index):
+            ...         return self.enable_mosaic
+        """
+
+        @wraps(getitem_fn)
+        def wrapper(self, index):
+            if not isinstance(index, int):
+                self.augment = index[0]
+                index = index[1]
+
+            ret_val = getitem_fn(self, index)
+
+            return ret_val
+
+        return wrapper
+
+
+class LoadImagesAndLabels(Dataset):
+    # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
+    cache_version = 0.6  # dataset labels *.cache version
+
+    def __init__(
+        self,
+        path,
+        img_size=640,
+        batch_size=16,
+        augment=False,
+        hyp=None,
+        rect=False,
+        image_weights=False,
+        cache_images=False,
+        single_cls=False,
+        stride=32,
+        pad=0.0,
+        prefix="",
+        neg_dir="",
+        bg_dir="",
+        area_thr=0.2,
+    ):
+        super().__init__(augment=augment)
+        self.img_size = img_size
+        self.hyp = hyp
+        self.image_weights = image_weights
+        self.rect = False if image_weights else rect
+        self.mosaic = (
+            self.augment and not self.rect
+        )  # load 4 images at a time into a mosaic (only during training)
+        self.mosaic_border = [-img_size // 2, -img_size // 2]
+        self.stride = stride
+        self.path = path
+        self.albumentations = Albumentations() if augment else None
+
+        # additional feature
+        self.img_neg_files, self.img_bg_files = self.get_neg_and_bg(neg_dir, bg_dir)
+        self.area_thr = area_thr
+
+        p = Path(path)  # os-agnostic
+        self.img_files = self.get_img_files(p, prefix)
+        self.label_files = img2label_paths(self.img_files)  # labels
+        # Check cache
+        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache")
+        labels, shapes, segments, img_files, label_files = self.load_cache(cache_path, prefix)
+
+        self.segments = segments
+        self.labels = list(labels)
+        self.shapes = np.array(shapes, dtype=np.float64)
+        self.img_files = img_files  # update
+        self.label_files = label_files  # update
+
+        num_imgs = len(shapes)  # number of images
+        batch_index = np.floor(np.arange(num_imgs) / batch_size).astype(np.int)  # batch index
+        self.batch_index = batch_index  # batch index of image
+        self.num_imgs = num_imgs
+        self.indices = range(num_imgs)
+
+        # Update labels
+        for i, (_, segment) in enumerate(zip(self.labels, self.segments)):
+            if single_cls:  # single-class training, merge all classes into 0
+                self.labels[i][:, 0] = 0
+                if segment:
+                    self.segments[i][:, 0] = 0
+
+        # Rectangular Training
+        if self.rect:
+            num_batches = batch_index[-1] + 1  # number of batches
+            self.update_rect(num_batches, pad)
+
+        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
+        self.imgs, self.img_npy = [None] * num_imgs, [None] * num_imgs
+        if cache_images:
+            self.cache_images(cache_images, prefix)
+
+    def cache_images(self, cache_images, prefix):
+        """Cache images to disk or ram for faster speed."""
+        if cache_images == "disk":
+            self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + "_npy")
+            self.img_npy = [
+                self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files
+            ]
+            self.im_cache_dir.mkdir(parents=True, exist_ok=True)
+        gb = 0  # Gigabytes of cached images
+        self.img_hw0, self.img_hw = [None] * self.num_imgs, [None] * self.num_imgs
+        results = ThreadPool(NUM_THREADS).imap(
+            lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs))
+        )
+        pbar = tqdm(enumerate(results), total=self.num_imgs)
+        for i, x in pbar:
+            if cache_images == "disk":
+                if not self.img_npy[i].exists():
+                    np.save(self.img_npy[i].as_posix(), x[0])
+                gb += self.img_npy[i].stat().st_size
+            else:
+                (
+                    self.imgs[i],
+                    self.img_hw0[i],
+                    self.img_hw[i],
+                ) = x  # im, hw_orig, hw_resized = load_image(self, i)
+                gb += self.imgs[i].nbytes
+            pbar.desc = f"{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})"
+        pbar.close()
+
+    def get_img_files(self, p, prefix):
+        """Read image files."""
+        try:
+            f = []  # image files
+            if p.is_dir():  # dir
+                f += glob.glob(str(p / "**" / "*.*"), recursive=True)
+                # f = list(p.rglob('*.*'))  # pathlib
+            elif p.is_file():  # file
+                with open(p, "r") as t:
+                    t = t.read().strip().splitlines()
+                    parent = str(p.parent) + os.sep
+                    f += [
+                        x.replace("./", parent) if x.startswith("./") else x for x in t
+                    ]  # local to global path
+                    # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
+            else:
+                raise Exception(f"{prefix}{p} does not exist")
+            img_files = sorted(
+                [x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS]
+            )
+            # img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
+            assert img_files, f"{prefix}No images found"
+        except Exception as e:
+            raise Exception(f"{prefix}Error loading data from {str(p)}: {e}\nSee {HELP_URL}")
+        return img_files
+
+    def get_neg_and_bg(self, neg_dir, bg_dir):
+        """Get negative pictures and background pictures."""
+        img_neg_files, img_bg_files = [], []
+        if os.path.isdir(neg_dir):
+            img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)]
+            logging.info(
+                colorstr("Negative dir: ")
+                + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training"
+            )
+
+        if os.path.isdir(bg_dir):
+            img_bg_files = [os.path.join(bg_dir, i) for i in os.listdir(bg_dir)]
+            logging.info(
+                colorstr("Background dir: ")
+                + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training"
+            )
+        return img_neg_files, img_bg_files
+
+    def load_cache(self, cache_path, prefix):
+        """Load labels from *.cache file."""
+        try:
+            cache, exists = (
+                np.load(cache_path, allow_pickle=True).item(),
+                True,
+            )  # load dict
+            assert cache["version"] == self.cache_version  # same version
+            assert cache["hash"] == get_hash(self.label_files + self.img_files)  # same hash
+        except:
+            cache, exists = self.cache_labels(cache_path, prefix), False  # cache
+
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupted, total
+        if exists:
+            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+            tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
+            if cache["msgs"]:
+                logging.info("\n".join(cache["msgs"]))  # display warnings
+        assert (
+            nf > 0 or not self.augment
+        ), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}"
+
+        # Read cache
+        [cache.pop(k) for k in ("hash", "version", "msgs")]  # remove items
+        labels, shapes, segments = zip(*cache.values())
+        img_files = list(cache.keys())  # update
+        label_files = img2label_paths(cache.keys())  # update
+        return labels, shapes, segments, img_files, label_files
+
+    def update_rect(self, num_batches, pad):
+        """Update attr if rect is True."""
+        # Sort by aspect ratio
+        s = self.shapes  # wh
+        ar = s[:, 1] / s[:, 0]  # aspect ratio
+        irect = ar.argsort()
+        self.img_files = [self.img_files[i] for i in irect]
+        self.label_files = [self.label_files[i] for i in irect]
+        self.labels = [self.labels[i] for i in irect]
+        self.segments = [self.segments[i] for i in irect]
+        self.shapes = s[irect]  # wh
+        ar = ar[irect]
+
+        # Set training image shapes
+        shapes = [[1, 1]] * num_batches
+        for i in range(num_batches):
+            ari = ar[self.batch_index == i]
+            mini, maxi = ari.min(), ari.max()
+            if maxi < 1:
+                shapes[i] = [maxi, 1]
+            elif mini > 1:
+                shapes[i] = [1, 1 / mini]
+
+        self.batch_shapes = (
+            np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride
+        )
+
+    def cache_labels(self, path=Path("./labels.cache"), prefix=""):
+        """Cache labels to *.cache file if there is no *.cache file in local."""
+        # Cache dataset labels, check images and read shapes
+        x = {}  # dict
+        nm, nf, ne, nc, msgs = (
+            0,
+            0,
+            0,
+            0,
+            [],
+        )  # number missing, found, empty, corrupt, messages
+        desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
+
+        with Pool(NUM_THREADS) as pool:
+            pbar = tqdm(
+                pool.imap(
+                    verify_image_label,
+                    zip(self.img_files, self.label_files, repeat(prefix)),
+                ),
+                desc=desc,
+                total=len(self.img_files),
+            )
+            for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
+                nm += nm_f
+                nf += nf_f
+                ne += ne_f
+                nc += nc_f
+                if im_file:
+                    x[im_file] = [l, shape, segments]
+                if msg:
+                    msgs.append(msg)
+                pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+
+        pbar.close()
+        if msgs:
+            logging.info("\n".join(msgs))
+        if nf == 0:
+            logging.info(f"{prefix}WARNING: No labels found in {path}. See {HELP_URL}")
+        x["hash"] = get_hash(self.label_files + self.img_files)
+        x["results"] = nf, nm, ne, nc, len(self.img_files)
+        x["msgs"] = msgs  # warnings
+        x["version"] = self.cache_version  # cache version
+        try:
+            np.save(path, x)  # save cache for next time
+            path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
+            logging.info(f"{prefix}New cache created: {path}")
+        except Exception as e:
+            logging.info(
+                f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}"
+            )  # path not writeable
+        return x
+
+    def __len__(self):
+        return len(self.img_files)
+
+    # def __iter__(self):
+    #     self.count = -1
+    #     print('ran dataset iter')
+    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
+    #     return self
+
+    @Dataset.mosaic_getitem
+    def __getitem__(self, index):
+        index = self.indices[index]  # linear, shuffled, or image_weights
+
+        hyp = self.hyp
+        self.mosaic = self.augment and not self.rect
+        mosaic = self.mosaic and random.random() < hyp["mosaic"]
+        if mosaic:
+            # Load mosaic
+            img, labels = load_mosaic(self, index)
+            shapes = None
+
+            # MixUp augmentation
+            if random.random() < hyp["mixup"]:
+                img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1)))
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = load_image(self, index)
+
+            # Letterbox
+            shape = (
+                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size
+            )  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            labels = self.labels[index].copy()
+            if labels.size:  # normalized xywh to pixel xyxy format
+                labels[:, 1:] = xywhn2xyxy(
+                    labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]
+                )
+
+            if self.augment:
+                img, labels = random_perspective(
+                    img,
+                    labels,
+                    degrees=hyp["degrees"],
+                    translate=hyp["translate"],
+                    scale=hyp["scale"],
+                    shear=hyp["shear"],
+                    perspective=hyp["perspective"],
+                )
+
+        nl = len(labels)  # number of labels
+        if nl:
+            labels[:, 1:5] = xyxy2xywhn(
+                labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3
+            )
+
+        if self.augment:
+            # Albumentations
+            img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
+
+            # HSV color-space
+            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
+
+            # Flip up-down
+            if random.random() < hyp["flipud"]:
+                img = np.flipud(img)
+                if nl:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+            # Flip left-right
+            if random.random() < hyp["fliplr"]:
+                img = np.fliplr(img)
+                if nl:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+            # Cutouts
+            # labels = cutout(img, labels, p=0.5)
+
+        labels_out = torch.zeros((nl, 6))
+        if nl:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+
+        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, shapes = zip(*batch)  # transposed
+        for i, l in enumerate(label):
+            l[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, None
+
+    @staticmethod
+    def collate_fn4(batch):
+        img, label, path, shapes = zip(*batch)  # transposed
+        n = len(shapes) // 4
+        img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
+
+        ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
+        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
+        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale
+        for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
+            i *= 4
+            if random.random() < 0.5:
+                im = F.interpolate(
+                    img[i].unsqueeze(0).float(),
+                    scale_factor=2.0,
+                    mode="bilinear",
+                    align_corners=False,
+                )[0].type(img[i].type())
+                l = label[i]
+            else:
+                im = torch.cat(
+                    (
+                        torch.cat((img[i], img[i + 1]), 1),
+                        torch.cat((img[i + 2], img[i + 3]), 1),
+                    ),
+                    2,
+                )
+                l = (
+                    torch.cat(
+                        (
+                            label[i],
+                            label[i + 1] + ho,
+                            label[i + 2] + wo,
+                            label[i + 3] + ho + wo,
+                        ),
+                        0,
+                    )
+                    * s
+                )
+            img4.append(im)
+            label4.append(l)
+
+        for i, l in enumerate(label4):
+            l[:, 0] = i  # add target image index for build_targets()
+
+        return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
+
+
+class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
+    def __init__(
+        self,
+        path,
+        img_size=640,
+        batch_size=16,
+        augment=False,
+        hyp=None,
+        rect=False,
+        image_weights=False,
+        cache_images=False,
+        single_cls=False,
+        stride=32,
+        pad=0,
+        prefix="",
+        neg_dir="",
+        bg_dir="",
+        area_thr=0.2,
+        downsample_ratio=1,  # return dowmsample mask
+    ):
+        super().__init__(
+            path,
+            img_size,
+            batch_size,
+            augment,
+            hyp,
+            rect,
+            image_weights,
+            cache_images,
+            single_cls,
+            stride,
+            pad,
+            prefix,
+            neg_dir,
+            bg_dir,
+            area_thr,
+        )
+        self.downsample_ratio = downsample_ratio
+
+    @Dataset.mosaic_getitem
+    def __getitem__(self, index):
+        index = self.indices[index]  # linear, shuffled, or image_weights
+
+        hyp = self.hyp
+        self.mosaic = self.augment and not self.rect
+        mosaic = self.mosaic and random.random() < hyp["mosaic"]
+        masks = []
+        if mosaic:
+            # Load mosaic
+            img, labels, segments = load_mosaic(self, index, return_seg=True)
+            shapes = None
+
+            # TODO: Mixup not support segment for now
+            # MixUp augmentation
+            if random.random() < hyp["mixup"]:
+                img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1)))
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = load_image(self, index)
+
+            # Letterbox
+            shape = (
+                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size
+            )  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            labels = self.labels[index].copy()
+            # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
+            segments = self.segments[index].copy()
+            # TODO
+            if len(segments):
+                for i_s in range(len(segments)):
+                    segments[i_s] = xyn2xy(
+                        segments[i_s],
+                        ratio[0] * w,
+                        ratio[1] * h,
+                        padw=pad[0],
+                        padh=pad[1],
+                    )
+            if labels.size:  # normalized xywh to pixel xyxy format
+                labels[:, 1:] = xywhn2xyxy(
+                    labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]
+                )
+
+            if self.augment:
+                img, labels, segments = random_perspective(
+                    img,
+                    labels,
+                    segments=segments,
+                    degrees=hyp["degrees"],
+                    translate=hyp["translate"],
+                    scale=hyp["scale"],
+                    shear=hyp["shear"],
+                    perspective=hyp["perspective"],
+                    return_seg=True,
+                )
+
+        nl = len(labels)  # number of labels
+        if nl:
+            labels[:, 1:5] = xyxy2xywhn(
+                labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3
+            )
+            for si in range(len(segments)):
+                mask = polygon2mask_downsample(
+                    img.shape[:2],
+                    [segments[si].reshape(-1)],
+                    downsample_ratio=self.downsample_ratio,
+                )
+                masks.append(torch.from_numpy(mask.astype(np.float32)))
+
+        masks = (
+            torch.stack(masks, axis=0)
+            if len(masks)
+            else torch.zeros(
+                nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio
+            )
+        )
+        # TODO: albumentations support
+        if self.augment:
+            # Albumentations
+            # there are some augmentation that won't change boxes and masks,
+            # so just be it for now.
+            img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
+
+            # HSV color-space
+            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
+
+            # Flip up-down
+            if random.random() < hyp["flipud"]:
+                img = np.flipud(img)
+                if nl:
+                    labels[:, 2] = 1 - labels[:, 2]
+                    masks = torch.flip(masks, dims=[1])
+
+            # Flip left-right
+            if random.random() < hyp["fliplr"]:
+                img = np.fliplr(img)
+                if nl:
+                    labels[:, 1] = 1 - labels[:, 1]
+                    masks = torch.flip(masks, dims=[2])
+
+            # Cutouts
+            # labels = cutout(img, labels, p=0.5)
+
+        labels_out = torch.zeros((nl, 6))
+        if nl:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+
+        return (torch.from_numpy(img), labels_out, self.img_files[index], shapes, masks)
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, shapes, masks = zip(*batch)  # transposed
+        batched_masks = torch.cat(masks, 0)
+        # print(batched_masks.shape)
+        # print('batched_masks:', (batched_masks > 0).sum())
+        for i, l in enumerate(label):
+            l[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
+
+
+# Ancillary functions --------------------------------------------------------------------------------------------------
+def load_image(self, i):
+    # loads 1 image from dataset index 'i', returns im, original hw, resized hw
+    im = self.imgs[i]
+    if im is None:  # not cached in ram
+        npy = self.img_npy[i]
+        if npy and npy.exists():  # load npy
+            im = np.load(npy)
+        else:  # read image
+            path = self.img_files[i]
+            im = cv2.imread(path)  # BGR
+            assert im is not None, "Image Not Found " + path
+        h0, w0 = im.shape[:2]  # orig hw
+        r = self.img_size / max(h0, w0)  # ratio
+        if r != 1:  # if sizes are not equal
+            im = cv2.resize(
+                im,
+                (int(w0 * r), int(h0 * r)),
+                interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR,
+            )
+        return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
+    else:
+        return (
+            self.imgs[i],
+            self.img_hw0[i],
+            self.img_hw[i],
+        )  # im, hw_original, hw_resized
+
+
+def load_neg_image(self, index):
+    path = self.img_neg_files[index]
+    img = cv2.imread(path)  # BGR
+    assert img is not None, "Image Not Found " + path
+    h0, w0 = img.shape[:2]  # orig hw
+    r = self.img_size / max(h0, w0)  # resize image to img_size
+    if r != 1:  # always resize down, only resize up if training with augmentation
+        interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+        img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+    return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
+
+
+def load_bg_image(self, index):
+    path = self.img_files[index]
+    bg_path = self.img_bg_files[np.random.randint(0, len(self.img_bg_files))]
+    img, coord, _, (w, h) = paste1(
+        path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5)
+    )
+    label = self.labels[index]
+    label[:, 1] = (label[:, 1] * w + coord[0]) / img.shape[1]
+    label[:, 2] = (label[:, 2] * h + coord[1]) / img.shape[0]
+    label[:, 3] = label[:, 3] * w / img.shape[1]
+    label[:, 4] = label[:, 4] * h / img.shape[0]
+
+    assert img is not None, "Image Not Found " + path
+    h0, w0 = img.shape[:2]  # orig hw
+    r = self.img_size / max(h0, w0)  # resize image to img_size
+    if r != 1:  # always resize down, only resize up if training with augmentation
+        interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+        img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+    return img, (h0, w0), img.shape[:2], label  # img, hw_original, hw_resized
+
+
+def load_mosaic(self, index, return_seg=False):
+    # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+    labels4, segments4 = [], []
+    s = self.img_size
+    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
+
+    num_neg = random.randint(0, 2) if len(self.img_neg_files) else 0
+    # 3 additional image indices
+    indices = [index] + random.choices(self.indices, k=(3 - num_neg))
+    indices = indices + random.choices(range(len(self.img_neg_files)), k=num_neg)
+    ri = list(range(4))
+    random.shuffle(ri)
+    for j, (i, index) in enumerate(zip(ri, indices)):
+        temp_label = None
+        # Load image
+        # TODO
+        if j < (4 - num_neg):
+            if len(self.img_bg_files) and (random.uniform(0, 1) > 0.5):
+                img, _, (h, w), temp_label = load_bg_image(self, index)
+            else:
+                img, _, (h, w) = load_image(self, index)
+        else:
+            img, _, (h, w) = load_neg_image(self, index)
+        # place img in img4
+        if j == 0:
+            img4 = np.full(
+                (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
+            )  # base image with 4 tiles
+        if i == 0:  # top left
+            x1a, y1a, x2a, y2a = (
+                max(xc - w, 0),
+                max(yc - h, 0),
+                xc,
+                yc,
+            )  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = (
+                w - (x2a - x1a),
+                h - (y2a - y1a),
+                w,
+                h,
+            )  # xmin, ymin, xmax, ymax (small image)
+        elif i == 1:  # top right
+            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+        elif i == 2:  # bottom left
+            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+        elif i == 3:  # bottom right
+            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        padw = x1a - x1b
+        padh = y1a - y1b
+
+        # Labels
+        if j >= (4 - num_neg):
+            continue
+
+        # TODO: deal with segments
+        if len(self.img_bg_files) and temp_label is not None:
+            labels, segments = temp_label, []
+        else:
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+
+        if labels.size:
+            labels[:, 1:] = xywhn2xyxy(
+                labels[:, 1:], w, h, padw, padh
+            )  # normalized xywh to pixel xyxy format
+            segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+        labels4.append(labels)
+        segments4.extend(segments)
+
+    # Concat/clip labels
+    labels4 = np.concatenate(labels4, 0)
+    for x in (labels4[:, 1:], *segments4):
+        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+    # img4, labels4 = replicate(img4, labels4)  # replicate
+
+    # Augment
+    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
+    results = random_perspective(
+        img4,
+        labels4,
+        segments4,
+        degrees=self.hyp["degrees"],
+        translate=self.hyp["translate"],
+        scale=self.hyp["scale"],
+        shear=self.hyp["shear"],
+        perspective=self.hyp["perspective"],
+        border=self.mosaic_border,
+        area_thr=self.area_thr,
+        return_seg=return_seg,
+    )  # border to remove
+    # return (img4, labels4, segments4) if return_seg else (img4, labels4)
+    return results
+
+
+def load_mosaic9(self, index):
+    # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
+    labels9, segments9 = [], []
+    s = self.img_size
+    indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
+    random.shuffle(indices)
+    for i, index in enumerate(indices):
+        # Load image
+        img, _, (h, w) = load_image(self, index)
+
+        # place img in img9
+        if i == 0:  # center
+            img9 = np.full(
+                (s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8
+            )  # base image with 4 tiles
+            h0, w0 = h, w
+            c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
+        elif i == 1:  # top
+            c = s, s - h, s + w, s
+        elif i == 2:  # top right
+            c = s + wp, s - h, s + wp + w, s
+        elif i == 3:  # right
+            c = s + w0, s, s + w0 + w, s + h
+        elif i == 4:  # bottom right
+            c = s + w0, s + hp, s + w0 + w, s + hp + h
+        elif i == 5:  # bottom
+            c = s + w0 - w, s + h0, s + w0, s + h0 + h
+        elif i == 6:  # bottom left
+            c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
+        elif i == 7:  # left
+            c = s - w, s + h0 - h, s, s + h0
+        elif i == 8:  # top left
+            c = s - w, s + h0 - hp - h, s, s + h0 - hp
+
+        padx, pady = c[:2]
+        x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords
+
+        # Labels
+        labels, segments = self.labels[index].copy(), self.segments[index].copy()
+        if labels.size:
+            labels[:, 1:] = xywhn2xyxy(
+                labels[:, 1:], w, h, padx, pady
+            )  # normalized xywh to pixel xyxy format
+            segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
+        labels9.append(labels)
+        segments9.extend(segments)
+
+        # Image
+        img9[y1:y2, x1:x2] = img[y1 - pady :, x1 - padx :]  # img9[ymin:ymax, xmin:xmax]
+        hp, wp = h, w  # height, width previous
+
+    # Offset
+    yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border]  # mosaic center x, y
+    img9 = img9[yc : yc + 2 * s, xc : xc + 2 * s]
+
+    # Concat/clip labels
+    labels9 = np.concatenate(labels9, 0)
+    labels9[:, [1, 3]] -= xc
+    labels9[:, [2, 4]] -= yc
+    c = np.array([xc, yc])  # centers
+    segments9 = [x - c for x in segments9]
+
+    for x in (labels9[:, 1:], *segments9):
+        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+    # img9, labels9 = replicate(img9, labels9)  # replicate
+
+    # Augment
+    img9, labels9 = random_perspective(
+        img9,
+        labels9,
+        segments9,
+        degrees=self.hyp["degrees"],
+        translate=self.hyp["translate"],
+        scale=self.hyp["scale"],
+        shear=self.hyp["shear"],
+        perspective=self.hyp["perspective"],
+        border=self.mosaic_border,
+    )  # border to remove
+
+    return img9, labels9
+
+
+def dataset_stats(path="coco128.yaml", autodownload=False, verbose=False, profile=False, hub=False):
+    """Return dataset statistics dictionary with images and instances counts per split per class
+    To run in parent directory: export PYTHONPATH="$PWD/yolov5"
+    Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
+    Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
+    Arguments
+        path:           Path to data.yaml or data.zip (with data.yaml inside data.zip)
+        autodownload:   Attempt to download dataset if not found locally
+        verbose:        Print stats dictionary
+    """
+
+    def round_labels(labels):
+        # Update labels to integer class and 6 decimal place floats
+        return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels]
+
+    def unzip(path):
+        # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
+        if str(path).endswith(".zip"):  # path is data.zip
+            assert Path(path).is_file(), f"Error unzipping {path}, file not found"
+            ZipFile(path).extractall(path=path.parent)  # unzip
+            dir = path.with_suffix("")  # dataset directory == zip name
+            return (
+                True,
+                str(dir),
+                next(dir.rglob("*.yaml")),
+            )  # zipped, data_dir, yaml_path
+        else:  # path is data.yaml
+            return False, None, path
+
+    def hub_ops(f, max_dim=1920):
+        # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
+        f_new = im_dir / Path(f).name  # dataset-hub image filename
+        try:  # use PIL
+            im = Image.open(f)
+            r = max_dim / max(im.height, im.width)  # ratio
+            if r < 1.0:  # image too large
+                im = im.resize((int(im.width * r), int(im.height * r)))
+            im.save(f_new, quality=75)  # save
+        except Exception as e:  # use OpenCV
+            print(f"WARNING: HUB ops PIL failure {f}: {e}")
+            im = cv2.imread(f)
+            im_height, im_width = im.shape[:2]
+            r = max_dim / max(im_height, im_width)  # ratio
+            if r < 1.0:  # image too large
+                im = cv2.resize(
+                    im,
+                    (int(im_width * r), int(im_height * r)),
+                    interpolation=cv2.INTER_LINEAR,
+                )
+            cv2.imwrite(str(f_new), im)
+
+    zipped, data_dir, yaml_path = unzip(Path(path))
+    with open(check_yaml(yaml_path), errors="ignore") as f:
+        data = yaml.safe_load(f)  # data dict
+        if zipped:
+            data["path"] = data_dir  # TODO: should this be dir.resolve()?
+    check_dataset(data, autodownload)  # download dataset if missing
+    hub_dir = Path(data["path"] + ("-hub" if hub else ""))
+    stats = {"nc": data["nc"], "names": data["names"]}  # statistics dictionary
+    for split in "train", "val", "test":
+        if data.get(split) is None:
+            stats[split] = None  # i.e. no test set
+            continue
+        x = []
+        dataset = LoadImagesAndLabels(data[split])  # load dataset
+        for label in tqdm(dataset.labels, total=dataset.num_imgs, desc="Statistics"):
+            x.append(np.bincount(label[:, 0].astype(int), minlength=data["nc"]))
+        x = np.array(x)  # shape(128x80)
+        stats[split] = {
+            "instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
+            "image_stats": {
+                "total": dataset.num_imgs,
+                "unlabelled": int(np.all(x == 0, 1).sum()),
+                "per_class": (x > 0).sum(0).tolist(),
+            },
+            "labels": [
+                {str(Path(k).name): round_labels(v.tolist())}
+                for k, v in zip(dataset.img_files, dataset.labels)
+            ],
+        }
+
+        if hub:
+            im_dir = hub_dir / "images"
+            im_dir.mkdir(parents=True, exist_ok=True)
+            for _ in tqdm(
+                ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files),
+                total=dataset.num_imgs,
+                desc="HUB Ops",
+            ):
+                pass
+
+    # Profile
+    stats_path = hub_dir / "stats.json"
+    if profile:
+        for _ in range(1):
+            file = stats_path.with_suffix(".npy")
+            t1 = time.time()
+            np.save(file, stats)
+            t2 = time.time()
+            x = np.load(file, allow_pickle=True)
+            print(f"stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write")
+
+            file = stats_path.with_suffix(".json")
+            t1 = time.time()
+            with open(file, "w") as f:
+                json.dump(stats, f)  # save stats *.json
+            t2 = time.time()
+            with open(file, "r") as f:
+                x = json.load(f)  # load hyps dict
+            print(f"stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write")
+
+    # Save, print and return
+    if hub:
+        print(f"Saving {stats_path.resolve()}...")
+        with open(stats_path, "w") as f:
+            json.dump(stats, f)  # save stats.json
+    if verbose:
+        print(json.dumps(stats, indent=2, sort_keys=False))
+    return stats
+
+
+import os
+import glob
+import shutil
+import hashlib
+import uuid
+import torch
+import cv2
+import numpy as np
+import random
+from pathlib import Path
+from PIL import Image, ImageOps, ExifTags
+from utils.segment import segments2boxes
+from utils.general import xywh2xyxy
+
+
+# Parameters
+HELP_URL = "https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data"
+IMG_FORMATS = [
+    "bmp",
+    "jpg",
+    "jpeg",
+    "png",
+    "tif",
+    "tiff",
+    "dng",
+    "webp",
+    "mpo",
+]  # acceptable image suffixes
+VID_FORMATS = [
+    "mov",
+    "avi",
+    "mp4",
+    "mpg",
+    "mpeg",
+    "m4v",
+    "wmv",
+    "mkv",
+    "vdo",
+    "flv",
+]  # acceptable video suffixes
+NUM_THREADS = min(8, os.cpu_count())  # number of multiprocessing threads
+
+# Get orientation exif tag
+for orientation in ExifTags.TAGS.keys():
+    if ExifTags.TAGS[orientation] == "Orientation":
+        break
+
+def get_hash(paths):
+    # Returns a single hash value of a list of paths (files or dirs)
+    size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
+    h = hashlib.md5(str(size).encode())  # hash sizes
+    h.update("".join(paths).encode())  # hash paths
+    return h.hexdigest()  # return hash
+
+
+def exif_size(img):
+    # Returns exif-corrected PIL size
+    s = img.size  # (width, height)
+    try:
+        rotation = dict(img._getexif().items())[orientation]
+        if rotation == 6:  # rotation 270
+            s = (s[1], s[0])
+        elif rotation == 8:  # rotation 90
+            s = (s[1], s[0])
+    except:
+        pass
+
+    return s
+
+
+def exif_transpose(image):
+    """
+    Transpose a PIL image accordingly if it has an EXIF Orientation tag.
+    Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()
+
+    :param image: The image to transpose.
+    :return: An image.
+    """
+    exif = image.getexif()
+    orientation = exif.get(0x0112, 1)  # default 1
+    if orientation > 1:
+        method = {
+            2: Image.FLIP_LEFT_RIGHT,
+            3: Image.ROTATE_180,
+            4: Image.FLIP_TOP_BOTTOM,
+            5: Image.TRANSPOSE,
+            6: Image.ROTATE_270,
+            7: Image.TRANSVERSE,
+            8: Image.ROTATE_90,
+        }.get(orientation)
+        if method is not None:
+            image = image.transpose(method)
+            del exif[0x0112]
+            image.info["exif"] = exif.tobytes()
+    return image
+
+
+def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (np.ndarray): [N, M], N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    img_size = (
+            img_size[0] // downsample_ratio, 
+            img_size[1] // downsample_ratio
+            )
+    mask = np.zeros(img_size, dtype=np.uint8)
+    polygons = np.asarray(polygons) / downsample_ratio
+    polygons = polygons.astype(np.int32)
+    shape = polygons.shape
+    polygons = polygons.reshape(shape[0], -1, 2)
+    cv2.fillPoly(mask, polygons, color=color)
+    return mask
+
+
+def worker_init_reset_seed(worker_id):
+    seed = uuid.uuid4().int % 2 ** 32
+    random.seed(seed)
+    torch.set_rng_state(torch.manual_seed(seed).get_state())
+    np.random.seed(seed)
+
+
+def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (np.ndarray): [N, M], N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    mask = np.zeros(img_size, dtype=np.uint8)
+    polygons = np.asarray(polygons)
+    polygons = polygons.astype(np.int32)
+    shape = polygons.shape
+    polygons = polygons.reshape(shape[0], -1, 2)
+    cv2.fillPoly(mask, polygons, color=color)
+    nh, nw = (
+            img_size[0] // downsample_ratio, 
+            img_size[1] // downsample_ratio
+            )
+    mask = cv2.resize(mask, (nw, nh))
+    return mask
+
+def img2label_paths(img_paths):
+    # Define label paths as a function of image paths
+    sa, sb = (
+        os.sep + "images" + os.sep,
+        os.sep + "labels" + os.sep,
+    )  # /images/, /labels/ substrings
+    return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
+
+
+def create_folder(path="./new"):
+    # Create folder
+    if os.path.exists(path):
+        shutil.rmtree(path)  # delete output folder
+    os.makedirs(path)  # make new output folder
+
+
+def flatten_recursive(path="../datasets/coco128"):
+    # Flatten a recursive directory by bringing all files to top level
+    new_path = Path(path + "_flat")
+    create_folder(new_path)
+    for file in tqdm(glob.glob(str(Path(path)) + "/**/*.*", recursive=True)):
+        shutil.copyfile(file, new_path / Path(file).name)
+
+def extract_boxes(
+    path="../datasets/coco128",
+):  # from utils.datasets import *; extract_boxes()
+    # Convert detection dataset into classification dataset, with one directory per class
+    path = Path(path)  # images dir
+    shutil.rmtree(path / "classifier") if (
+        path / "classifier"
+    ).is_dir() else None  # remove existing
+    files = list(path.rglob("*.*"))
+    n = len(files)  # number of files
+    for im_file in tqdm(files, total=n):
+        if im_file.suffix[1:] in IMG_FORMATS:
+            # image
+            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
+            h, w = im.shape[:2]
+
+            # labels
+            lb_file = Path(img2label_paths([str(im_file)])[0])
+            if Path(lb_file).exists():
+                with open(lb_file, "r") as f:
+                    lb = np.array(
+                        [x.split() for x in f.read().strip().splitlines()],
+                        dtype=np.float32,
+                    )  # labels
+
+                for j, x in enumerate(lb):
+                    c = int(x[0])  # class
+                    f = (
+                        (path / "classifier")
+                        / f"{c}"
+                        / f"{path.stem}_{im_file.stem}_{j}.jpg"
+                    )  # new filename
+                    if not f.parent.is_dir():
+                        f.parent.mkdir(parents=True)
+
+                    b = x[1:] * [w, h, w, h]  # box
+                    # b[2:] = b[2:].max()  # rectangle to square
+                    b[2:] = b[2:] * 1.2 + 3  # pad
+                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+
+                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
+                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
+                    assert cv2.imwrite(
+                        str(f), im[b[1] : b[3], b[0] : b[2]]
+                    ), f"box failure in {f}"
+
+
+def autosplit(
+    path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False
+):
+    """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
+    Usage: from utils.datasets import *; autosplit()
+    Arguments
+        path:            Path to images directory
+        weights:         Train, val, test weights (list, tuple)
+        annotated_only:  Only use images with an annotated txt file
+    """
+    path = Path(path)  # images dir
+    files = sorted(
+        [x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS]
+    )  # image files only
+    n = len(files)  # number of files
+    random.seed(0)  # for reproducibility
+    indices = random.choices(
+        [0, 1, 2], weights=weights, k=n
+    )  # assign each image to a split
+
+    txt = [
+        "autosplit_train.txt",
+        "autosplit_val.txt",
+        "autosplit_test.txt",
+    ]  # 3 txt files
+    [(path.parent / x).unlink(missing_ok=True) for x in txt]  # remove existing
+
+    print(
+        f"Autosplitting images from {path}"
+        + ", using *.txt labeled images only" * annotated_only
+    )
+    for i, img in tqdm(zip(indices, files), total=n):
+        if (
+            not annotated_only or Path(img2label_paths([str(img)])[0]).exists()
+        ):  # check label
+            with open(path.parent / txt[i], "a") as f:
+                f.write(
+                    "./" + img.relative_to(path.parent).as_posix() + "\n"
+                )  # add image to txt file
+
+
+def verify_image_label(args):
+    # Verify one image-label pair
+    im_file, lb_file, prefix = args
+    nm, nf, ne, nc, msg, segments = (
+        0,
+        0,
+        0,
+        0,
+        "",
+        [],
+    )  # number (missing, found, empty, corrupt), message, segments
+    try:
+        # verify images
+        im = Image.open(im_file)
+        im.verify()  # PIL verify
+        shape = exif_size(im)  # image size
+        assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+        assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
+        if im.format.lower() in ("jpg", "jpeg"):
+            with open(im_file, "rb") as f:
+                f.seek(-2, 2)
+                if f.read() != b"\xff\xd9":  # corrupt JPEG
+                    ImageOps.exif_transpose(Image.open(im_file)).save(
+                        im_file, "JPEG", subsampling=0, quality=100
+                    )
+                    msg = f"{prefix}WARNING: {im_file}: corrupt JPEG restored and saved"
+
+        # verify labels
+        if os.path.isfile(lb_file):
+            nf = 1  # label found
+            with open(lb_file, "r") as f:
+                l = [x.split() for x in f.read().strip().splitlines() if len(x)]
+                if any([len(x) > 6 for x in l]):  # is segment
+                    classes = np.array([x[0] for x in l], dtype=np.float32)
+                    segments = [
+                        np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l
+                    ]  # (cls, xy1...)
+                    l = np.concatenate(
+                        (classes.reshape(-1, 1), segments2boxes(segments)), 1
+                    )  # (cls, xywh)
+                l = np.array(l, dtype=np.float32)
+            nl = len(l)
+            if nl:
+                assert (
+                    l.shape[1] == 5
+                ), f"labels require 5 columns, {l.shape[1]} columns detected"
+                assert (l >= 0).all(), f"negative label values {l[l < 0]}"
+                assert (
+                    l[:, 1:] <= 1
+                ).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}"
+                l, idx = np.unique(l, axis=0, return_index=True)  # remove duplicate rows
+                # NOTE: `np.unique` will change the order of `l`, so adjust the segments order too.
+                segments = [segments[i] for i in idx] if len(segments) > 0 else segments
+                if len(l) < nl:
+                    msg = f"{prefix}WARNING: {im_file}: {nl - len(l)} duplicate labels removed"
+            else:
+                ne = 1  # label empty
+                l = np.zeros((0, 5), dtype=np.float32)
+        else:
+            nm = 1  # label missing
+            l = np.zeros((0, 5), dtype=np.float32)
+        return im_file, l, shape, segments, nm, nf, ne, nc, msg
+    except Exception as e:
+        nc = 1
+        msg = f"{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}"
+        return [None, None, None, None, nm, nf, ne, nc, msg]
+    
+from torch.utils.data import DataLoader as torchDataLoader
+
+class DataLoader(torchDataLoader):
+    """
+    Lightnet dataloader that enables on the fly resizing of the images.
+    See :class:`torch.utils.data.DataLoader` for more information on the arguments.
+    Check more on the following website:
+    https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def close_augment(self):
+        self.batch_sampler.augment = False
+
+
+class InfiniteDataLoader(torchDataLoader):
+    """Dataloader that reuses workers
+
+    Uses same syntax as vanilla DataLoader
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
+        self.iterator = super().__iter__()
+
+    def __len__(self):
+        return len(self.batch_sampler.sampler)
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield next(self.iterator)
+
+
+# NEW FILE 
+from PIL import Image, ImageDraw
+import numpy as np
+from PIL import ImageFile
+# import numbers
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+
+
+def get_raito(new_size, original_size):
+    """Get the ratio bewtten input_size and original_size"""
+    # # mmdet way
+    # iw, ih = new_size
+    # ow, oh = original_size
+    # max_long_edge = max(iw, ih)
+    # max_short_edge = min(iw, ih)
+    # ratio = min(max_long_edge / max(ow, oh), max_short_edge / min(ow, oh))
+    # return ratio
+
+    # # yolov5 way
+    return min(new_size[0] / original_size[0], new_size[1] / original_size[1])
+
+def imresize(img, new_size):
+    """Resize the img with new_size by PIL(keep aspect).
+
+    Args:
+        img (PIL): The original image.
+        new_size (tuple): The new size(w, h).
+    """
+    if isinstance(new_size, int):
+        new_size = (new_size, new_size)
+    old_size = img.size
+    ratio = get_raito(new_size, old_size)
+    img = img.resize((int(old_size[0] * ratio), int(old_size[1] * ratio)))
+    return img
+
+def get_wh(a, b):
+    return np.random.randint(a, b)
+
+
+def paste2(sample1, sample2, background, scale=1.2):
+    sample1 = Image.open(sample1)
+    d_w1, d_h1 = sample1.size
+
+    sample2 = Image.open(sample2)
+    d_w2, d_h2 = sample2.size
+
+    # print(sample.size)
+    background = Image.open(background)
+    background = background.resize((int((d_w1 + d_w2) * scale), int((d_h1 + d_h2) * scale)))
+    bw, bh = background.size
+
+    x1, y1 = get_wh(0, int(d_w1 * scale) - d_w1), get_wh(0, bh - d_h1)
+    x2, y2 = get_wh(int(d_w1 * scale), bw - d_w2), get_wh(0, bh - d_h2)
+    # x1, y1 = get_wh(0, int(bw / 2) - d_w1), get_wh(0, bh - d_h1)
+    # x2, y2 = get_wh(int(bw / 2), bw - d_w2), get_wh(0, bh - d_h2)
+
+    background.paste(sample1, (x1, y1))
+    background.paste(sample2, (x2, y2))
+    # background = background.resize((416, 416))
+
+    return np.array(background), (x1, y1, x2, y2), background
+    # print(background.size)
+    # background.show()
+
+
+def paste1(sample, background, bg_size, fg_scale=1.5):
+    sample = Image.open(sample)
+    background = Image.open(background)
+    background = imresize(background, bg_size)
+    bw, bh = background.size
+    # background = background.resize((int(d_w * scale), int(d_h * scale)))
+    new_w, new_h = int(bw / fg_scale), int(bh / fg_scale)
+    sample = imresize(sample, (new_w, new_h))
+
+    d_w, d_h = sample.size
+    x1, y1 = get_wh(0, bw - d_w), get_wh(0, bh - d_h)
+    background.paste(sample, (x1, y1))
+    # draw = ImageDraw.Draw(background)
+    # draw.rectangle((x1 + 240, y1 + 254, x1 + 240 + 5, y1 + 254 + 5), 'red', 'green')
+    # draw.rectangle((x1 + 80, y1 + 28, x1 + 400, y1 + 480), None, 'green')
+    # background = background.resize((416, 416))
+
+    return np.array(background.convert('RGB'))[:, :, ::-1], (x1, y1), background, (d_w, d_h)
diff --git a/train_instseg.py b/train_instseg.py
new file mode 100644
index 000000000000..ff85f1eb36b5
--- /dev/null
+++ b/train_instseg.py
@@ -0,0 +1,680 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset.
+
+Models and datasets download automatically from the latest YOLOv5 release.
+Models: https://github.com/ultralytics/yolov5/tree/master/models
+Datasets: https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
+
+Usage:
+    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (RECOMMENDED)
+    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
+"""
+
+import argparse
+import math
+import os
+import random
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from matplotlib.pyplot import plot
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.optim import SGD, Adam, AdamW, lr_scheduler
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+import val  # for end-of-epoch mAP
+from models.experimental import attempt_load
+from models.yolo import Model
+from utils.autoanchor import check_anchors
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from seg_dataloaders import create_dataloader
+from utils.downloads import attempt_download
+from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness,
+                           check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
+                           increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
+                           labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
+from utils.loggers import Loggers, NewLoggersMask
+from utils.loggers.wandb.wandb_utils import check_wandb_resume
+from utils.seg_loss import ComputeLoss
+#from utils.metrics import fitness
+from utils.plots import plot_evolve, plot_labels
+from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
+
+LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
+WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
+
+
+LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
+WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
+from utils.general import LOGGER, check_amp, check_version
+from utils.autobatch import check_train_batch_size
+from torch.optim import AdamW
+import yaml
+from datetime import datetime
+from distutils import dist
+from evaluator import Yolov5Evaluator
+
+def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
+    print(device)
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
+    callbacks.run('on_pretrain_routine_start')
+
+    # Directories
+    w = save_dir / 'weights'  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best, last_mosiac = w / 'last.pt', w / 'best.pt', w / "last_mosaic.pt"
+
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+
+    # Save run settings
+    if not evolve:
+        with open(save_dir / 'hyp.yaml', 'w') as f:
+            yaml.safe_dump(hyp, f, sort_keys=False)
+        with open(save_dir / 'opt.yaml', 'w') as f:
+            yaml.safe_dump(vars(opt), f, sort_keys=False)
+
+    # Loggers
+    data_dict = None
+    if RANK in {-1, 0}:
+        newloggers = NewLoggersMask
+        loggers = newloggers(
+            save_dir=save_dir, opt=opt, logger=LOGGER
+        )  # loggers instance
+
+        # Register actions
+        for k in methods(loggers):
+            callbacks.register_action(k, callback=getattr(loggers, k))
+
+    # Config
+    plots = not evolve and not opt.noplots  # create plots
+    cuda = device.type != 'cpu'
+    init_seeds(opt.seed + 1 + RANK)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict['train'], data_dict['val']
+    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
+    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
+    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
+    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
+
+    # Model
+    check_suffix(weights, '.pt')  # check weights
+    pretrained = weights.endswith('.pt')
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
+        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
+        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
+    else:
+        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+    amp = check_amp(model)  # check AMP
+
+    # Freeze
+    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        if any(x in k for x in freeze):
+            LOGGER.info(f'freezing {k}')
+            v.requires_grad = False
+
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz, amp)
+        loggers.on_params_update({"batch_size": batch_size})
+
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
+    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
+    evaluator = Yolov5Evaluator(
+            data = data,
+            single_cls=single_cls,
+            save_dir=save_dir,
+            mask=True,
+            verbose=False,
+            mask_downsample_ratio=mask_ratio,
+            plots=False
+        )
+    g = [], [], []  # optimizer parameter groups
+    bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
+    for v in model.modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
+            g[2].append(v.bias)
+        if isinstance(v, bn):  # weight (no decay)
+            g[1].append(v.weight)
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
+            g[0].append(v.weight)
+
+    if opt.optimizer == 'Adam':
+        optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+    elif opt.optimizer == 'AdamW':
+        optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+    else:
+        optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
+
+    optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']})  # add g0 with weight_decay
+    optimizer.add_param_group({'params': g[1]})  # add g1 (BatchNorm2d weights)
+    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
+                f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
+    del g
+
+    # Scheduler
+    if opt.cos_lr:
+        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
+    else:
+        lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
+
+    # EMA
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
+
+    # Resume
+    start_epoch, best_fitness = 0, 0.0
+    if pretrained:
+        # Optimizer
+        if ckpt['optimizer'] is not None:
+            optimizer.load_state_dict(ckpt['optimizer'])
+            best_fitness = ckpt['best_fitness']
+
+        # EMA
+        if ema and ckpt.get('ema'):
+            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
+            ema.updates = ckpt['updates']
+
+        # Epochs
+        start_epoch = ckpt['epoch'] + 1
+        if resume:
+            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
+        if epochs < start_epoch:
+            LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
+            epochs += ckpt['epoch']  # finetune additional epochs
+
+        del ckpt, csd
+
+    # DP mode
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
+                       'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
+        model = torch.nn.DataParallel(model)
+
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and RANK != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info('Using SyncBatchNorm()')
+
+    # Trainloader
+    train_loader, dataset = create_dataloader(train_path,
+                                              imgsz,
+                                              batch_size // WORLD_SIZE,
+                                              gs,
+                                              single_cls,
+                                              hyp=hyp,
+                                              augment=True,
+                                              cache=None if opt.cache == 'val' else opt.cache,
+                                              rect=opt.rect,
+                                              rank=LOCAL_RANK,
+                                              workers=workers,
+                                              image_weights=opt.image_weights,
+                                              quad=opt.quad,
+                                              prefix=colorstr('train: '),
+                                              mask_head=True,
+                                              shuffle=True)
+    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
+    nb = len(train_loader)  # number of batches
+    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
+
+    # Process 0
+    if RANK in {-1, 0}:
+        val_loader = create_dataloader(val_path,
+                                       imgsz,
+                                       batch_size // WORLD_SIZE * 2,
+                                       gs,
+                                       single_cls,
+                                       hyp=hyp,
+                                       cache=None if noval else opt.cache,
+                                       rect=True,
+                                       rank=-1,
+                                       workers=workers * 2,
+                                       pad=0.5,
+                                       mask_head=True,
+                                       prefix=colorstr('val: '))[0]
+
+        if not resume:
+            labels = np.concatenate(dataset.labels, 0)
+            # c = torch.tensor(labels[:, 0])  # classes
+            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
+            # model._initialize_biases(cf.to(device))
+            if plots:
+                plot_labels(labels, names, save_dir)
+
+            # Anchors
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+            model.half().float()  # pre-reduce anchor precision
+
+        callbacks.run('on_pretrain_routine_end')
+
+    # DDP mode
+    if cuda and RANK != -1:
+        if check_version(torch.__version__, '1.11.0'):
+            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
+        else:
+            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
+
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp['box'] *= 3 / nl  # scale to layers
+    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    hyp['label_smoothing'] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+
+    # Start training
+    t0 = time.time()
+    nw = max(round(hyp['warmup_epochs'] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = torch.cuda.amp.GradScaler(enabled=amp)
+    stopper, stop = EarlyStopping(patience=opt.patience), False
+    compute_loss = ComputeLoss(model)  # init loss class
+    callbacks.run('on_train_start')
+    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
+                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+                f"Logging results to {colorstr('bold', save_dir)}\n"
+                f'Starting training for {epochs} epochs...')
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        callbacks.run('on_train_epoch_start')
+        model.train()
+
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
+        mloss = torch.zeros(4, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
+        if RANK in {-1, 0}:
+            pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _, masks) in pbar:  # batch -------------------------------------------------------------
+            callbacks.run('on_train_batch_start')
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)])
+                    if 'momentum' in x:
+                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
+
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+
+            # Forward
+            with torch.cuda.amp.autocast(amp):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device),  masks=masks.to(device))  # loss scaled by batch_size
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize
+            if ni - last_opt_step >= accumulate:
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+
+            # Log
+            if RANK in {-1, 0}:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
+                pbar.set_description(("%10s" * 2 + "%10.4g" * 6)
+            % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1]))
+                callbacks.run('on_train_batch_end', ni, model, imgs, targets,masks, paths, plots, opt.sync_bn, None)
+
+                if callbacks.stop_training:
+                    return
+            # end batch ------------------------------------------------------------------------------------------------
+
+        # Scheduler
+        lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+
+        if RANK in {-1, 0}:
+            # mAP
+            callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = evaluator.run_training(
+                model=ema.ema,
+                dataloader=val_loader,
+                compute_loss=compute_loss,
+                )
+            # Update best mAP
+            def fitness(x):
+                w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
+                return (x[:, :8] * w).sum(1)
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            callbacks.run('on_fit_epoch_end', log_vals, epoch)
+
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    'epoch': epoch,
+                    'best_fitness': best_fitness,
+                    'model': deepcopy(de_parallel(model)).half(),
+                    'ema': deepcopy(ema.ema).half(),
+                    'updates': ema.updates,
+                    'optimizer': optimizer.state_dict(),
+                    #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
+                    'date': datetime.now().isoformat()}
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if opt.save_period > 0 and epoch % opt.save_period == 0:
+                    torch.save(ckpt, w / f'epoch{epoch}.pt')
+                del ckpt
+                callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
+
+        # EarlyStopping
+        if RANK != -1:  # if DDP training
+            broadcast_list = [stop if RANK == 0 else None]
+            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+            if RANK != 0:
+                stop = broadcast_list[0]
+        if stop:
+            break  # must break all DDP ranks
+
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training -----------------------------------------------------------------------------------------------------
+    if RANK in {-1, 0}:
+        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f'\nValidating {f}...')
+                    results, _, _ = evaluator.run_training(
+                model=attempt_load(f, device).half(),
+                dataloader=val_loader,
+                compute_loss=compute_loss,
+            )  # val best model with plots
+                    if is_coco:
+                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch)
+
+        callbacks.run('on_train_end', last, best, plots, epoch, results, masks=True)
+
+    torch.cuda.empty_cache()
+    return results
+
+
+
+def parse_opt(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
+    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=300)
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noplots', action='store_true', help='save no plot files')
+    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
+    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
+    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--quad', action='store_true', help='quad dataloader')
+    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
+    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
+    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
+    parser.add_argument('--mask-ratio', type=int, default=1, help='mask ratio')
+
+    # Weights & Biases arguments
+    parser.add_argument('--entity', default=None, help='W&B: Entity')
+    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
+
+    opt = parser.parse_known_args()[0] if known else parser.parse_args()
+    return opt
+
+
+def main(opt, callbacks=Callbacks()):
+    # Checks
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements(exclude=['thop'])
+
+    # Resume
+    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:  # resume an interrupted run
+        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
+        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
+        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
+            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
+        opt.cfg, opt.weights, opt.resume = '', ckpt, True  # reinstate
+        LOGGER.info(f'Resuming training from {ckpt}')
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
+            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
+        assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
+        if opt.evolve:
+            if opt.project == str(ROOT / 'runs/train'):  # if default project name, rename to runs/evolve
+                opt.project = str(ROOT / 'runs/evolve')
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        if opt.name == 'cfg':
+            opt.name = Path(opt.cfg).stem  # use model.yaml as name
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
+        assert not opt.image_weights, f'--image-weights {msg}'
+        assert not opt.evolve, f'--evolve {msg}'
+        assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
+        assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
+        assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device('cuda', LOCAL_RANK)
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
+
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+        if WORLD_SIZE > 1 and RANK == 0:
+            LOGGER.info('Destroying process group... ')
+            dist.destroy_process_group()
+
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {
+            'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+            'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
+            'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+            'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
+            'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
+            'box': (1, 0.02, 0.2),  # box loss gain
+            'cls': (1, 0.2, 4.0),  # cls loss gain
+            'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
+            'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
+            'iou_t': (0, 0.1, 0.7),  # IoU training threshold
+            'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
+            'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
+            'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
+            'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
+            'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
+            'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
+            'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
+            'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
+            'mixup': (1, 0.0, 1.0),  # image mixup (probability)
+            'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)
+
+        with open(opt.hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if 'anchors' not in hyp:  # anchors commented in hyp.yaml
+                hyp['anchors'] = 3
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
+        if opt.bucket:
+            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}')  # download evolve.csv if exists
+
+        for _ in range(opt.evolve):  # generations to evolve
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
+                # Select parent(s)
+                parent = 'single'  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min() + 1E-6  # weights (sum > 0)
+                if parent == 'single' or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == 'weighted':
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+
+                # Mutate
+                mp, s = 0.8, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 7] * v[i])  # mutate
+
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+
+            # Train mutation
+            results = train(hyp.copy(), opt, device, callbacks)
+            callbacks = Callbacks()
+            # Write mutation results
+            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
+
+        # Plot results
+        plot_evolve(evolve_csv)
+        LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
+                    f"Results saved to {colorstr('bold', save_dir)}\n"
+                    f'Usage example: $ python train.py --hyp {evolve_yaml}')
+
+
+def run(**kwargs):
+    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/utils/general.py b/utils/general.py
index a85a2915a31a..178e3073eb76 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -24,6 +24,7 @@
 from subprocess import check_output
 from typing import Optional
 from zipfile import ZipFile
+from PIL import ImageFont
 
 import cv2
 import numpy as np
@@ -444,7 +445,7 @@ def check_file(file, suffix=''):
         assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}"  # assert unique
         return files[0]  # return file
 
-
+'''
 def check_font(font=FONT, progress=False):
     # Download font to CONFIG_DIR if necessary
     font = Path(font)
@@ -453,7 +454,18 @@ def check_font(font=FONT, progress=False):
         url = "https://ultralytics.com/assets/" + font.name
         LOGGER.info(f'Downloading {url} to {file}...')
         torch.hub.download_url_to_file(url, str(file), progress=progress)
-
+'''
+def check_font(font="Arial.ttf", size=10, progress=False):
+    # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
+    font = Path(font)
+    font = font if font.exists() else (CONFIG_DIR / font.name)
+    try:
+        return ImageFont.truetype(str(font) if font.exists() else font.name, size)
+    except Exception as e:  # download if missing
+        url = "https://ultralytics.com/assets/" + font.name
+        print(f"Downloading {url} to {font}...")
+        torch.hub.download_url_to_file(url, str(font), progress=progress)
+        return ImageFont.truetype(str(font), size)
 
 def check_dataset(data, autodownload=True):
     # Download, check and/or unzip dataset if not found locally
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 42b696ba644f..bf95d82203b8 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -3,6 +3,7 @@
 Logging utils
 """
 
+from ast import Import
 import os
 import warnings
 
@@ -12,7 +13,7 @@
 
 from utils.general import colorstr, cv2, emojis
 from utils.loggers.wandb.wandb_utils import WandbLogger
-from utils.plots import plot_images, plot_results
+from utils.plots import plot_images, plot_results, plot_results_with_masks, plot_images_and_masks
 from utils.torch_utils import de_parallel
 
 LOGGERS = ('csv', 'tb', 'wandb')  # text-file, TensorBoard, Weights & Biases
@@ -157,8 +158,9 @@ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
             if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
                 self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
 
-    def on_train_end(self, last, best, plots, epoch, results):
+    def on_train_end(self, last, best, plots, epoch, results, masks=False):
         # Callback runs on training end
+        plot_results = plot_results_with_masks if masks else plot_results
         if plots:
             plot_results(file=self.save_dir / 'results.csv')  # save results.png
         files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
@@ -185,3 +187,244 @@ def on_params_update(self, params):
         # params: A dict containing {param: value} pairs
         if self.wandb:
             self.wandb.wandb_run.config.update(params, allow_val_change=True)
+
+from threading import Thread
+
+class NewLoggers:
+    """Loggers without wandb, cause I don't really use `wandb` and `wandb` related codes are noisy."""
+    def __init__(
+        self,
+        save_dir=None,
+        opt=None,
+        logger=None,
+        include=LOGGERS,
+    ):
+        self.save_dir = save_dir
+        self.opt = opt
+        self.logger = logger  # for printing results to console
+        self.include = include
+        self.keys = [
+            "train/box_loss",
+            "train/obj_loss",
+            "train/cls_loss",  # train loss
+            "metrics/precision",
+            "metrics/recall",
+            "metrics/mAP_0.5",
+            "metrics/mAP_0.5:0.95",  # metrics
+            "val/box_loss",
+            "val/obj_loss",
+            "val/cls_loss",  # val loss
+            "x/lr0",
+            "x/lr1",
+            "x/lr2",
+        ]  # params
+        self.best_keys = [
+            "best/epoch",
+            "best/precision",
+            "best/recall",
+            "best/mAP_0.5",
+            "best/mAP_0.5:0.95",
+        ]
+        for k in LOGGERS:
+            setattr(self, k, None)  # init empty logger dictionary
+        self.csv = True  # always log to csv
+
+        # TensorBoard
+        s = self.save_dir
+        if "tb" in self.include and s.exists():
+            prefix = colorstr("TensorBoard: ")
+            self.logger.info(
+                f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/"
+            )
+            self.tb = SummaryWriter(str(s))
+        try:
+            import wandb
+            from wandb import __version__
+            wandb.init(project="YOLOv5-Inst-seg", config=opt)
+        except ImportError:
+            wandb = None
+            pass
+        self.wandb = wandb
+        
+    def on_pretrain_routine_end(self):
+        pass
+
+    def on_train_batch_end(
+        self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx
+    ):
+        # Callback runs on train batch end
+        if plots and self.save_dir.exists():
+            if ni == 0:
+                if (
+                    not sync_bn
+                ):  # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
+                    with warnings.catch_warnings():
+                        warnings.simplefilter("ignore")  # suppress jit trace warning
+                        self.tb.add_graph(
+                            torch.jit.trace(
+                                de_parallel(model), imgs[0:1], strict=False
+                            ),
+                            [],
+                        )
+            if plot_idx is not None and ni in plot_idx:
+                f = self.save_dir / f"train_batch{ni}.jpg"  # filename
+                Thread(
+                    target=plot_images, args=(imgs, targets, paths, f), daemon=True
+                ).start()
+            # if ni < 3:
+            #     f = self.save_dir / f'train_batch{ni}.jpg'  # filename
+            #     Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
+
+    def on_train_epoch_end(self, epoch):
+        # Callback runs on train epoch end
+        pass
+
+    def on_val_image_end(self, imgs, targets, masks, paths):
+        # Callback runs on val image end
+        pass
+
+    def on_val_end(self):
+        # Callback runs on val end
+        pass
+
+    def on_fit_epoch_end(self, vals, epoch):
+        # Callback runs at the end of each fit (train+val) epoch
+        x = {k: v for k, v in zip(self.keys, vals)}  # dict
+        if self.csv and self.save_dir.exists():
+            file = self.save_dir / "results.csv"
+            n = len(x) + 1  # number of cols
+            s = (
+                ""
+                if file.exists()
+                else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n")
+            )  # add header
+            with open(file, "a") as f:
+                f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n")
+
+        if self.tb:
+            for k, v in x.items():
+                self.tb.add_scalar(k, v, epoch)
+        if self.wandb:
+            wandb.log(x)
+
+    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
+        # Callback runs on model save event
+        pass
+
+    def on_train_end(self, plots, epoch, masks=False):
+        plts = plot_results_with_masks if masks else plot_results
+        # Callback runs on training end
+        if plots and self.save_dir.exists():
+            plts(file=self.save_dir / "results.csv")  # save results.png
+        files = [
+            "results.png",
+            "confusion_matrix.png",
+            *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")),
+        ]
+        files = [
+            (self.save_dir / f) for f in files if (self.save_dir / f).exists()
+        ]  # filter
+
+        if self.tb:
+            import cv2
+
+            for f in files:
+                self.tb.add_image(
+                    f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats="HWC"
+                )
+
+    def on_params_update(self):
+        # Update hyperparams or configs of the experiment
+        # params: A dict containing {param: value} pairs
+        pass
+    
+class NewLoggersMask(NewLoggers):
+    def __init__(
+        self,
+        save_dir=None,
+        opt=None,
+        logger=None,
+        include=LOGGERS,
+    ):
+        super().__init__(save_dir, opt, logger, include)
+        self.keys = [
+            "train/box_loss",
+            "train/seg_loss",  # train loss
+            "train/obj_loss",
+            "train/cls_loss",
+            "metrics/precision(B)",
+            "metrics/recall(B)",
+            "metrics/mAP_0.5(B)",
+            "metrics/mAP_0.5:0.95(B)",  # metrics
+            "metrics/precision(M)",
+            "metrics/recall(M)",
+            "metrics/mAP_0.5(M)",
+            "metrics/mAP_0.5:0.95(M)",  # metrics
+            "val/box_loss",
+            "val/seg_loss",  # val loss
+            "val/obj_loss",
+            "val/cls_loss",
+            "x/lr0",
+            "x/lr1",
+            "x/lr2",
+        ]  # params
+        self.best_keys = [
+            "best/epoch",
+            "best/precision",
+            "best/recall",
+            "best/mAP_0.5",
+            "best/mAP_0.5:0.95",
+        ]
+
+        
+    def on_train_batch_end(
+        self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx
+    ):
+        # Callback runs on train batch end
+        if plots and self.save_dir.exists():
+            if ni == 0:
+                if (
+                    not sync_bn
+                ):  # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
+                    with warnings.catch_warnings():
+                        warnings.simplefilter("ignore")  # suppress jit trace warning
+                        self.tb.add_graph(
+                            torch.jit.trace(
+                                de_parallel(model), imgs[0:1], strict=False
+                            ),
+                            [],
+                        )
+            if plot_idx is not None and ni in plot_idx:
+                # if ni < 3:
+                f = self.save_dir / f"train_batch{ni}.jpg"  # filename
+                Thread(
+                    target=plot_images_and_masks,
+                    args=(imgs, targets, masks, paths, f),
+                    daemon=True,
+                ).start()
+            if ni==0:
+                if self.wandb:
+                    res = plot_images_and_masks(imgs, targets, masks, paths)
+                    wandb.log({"train_labels": wandb.Image(res)})
+                
+
+
+    def on_fit_epoch_end(self, vals, epoch):
+        # Callback runs at the end of each fit (train+val) epoch
+        x = {k: v for k, v in zip(self.keys, vals)}  # dict
+        if self.csv and self.save_dir.exists():
+            file = self.save_dir / "results.csv"
+            n = len(x) + 1  # number of cols
+            s = (
+                ""
+                if file.exists()
+                else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n")
+            )  # add header
+            with open(file, "a") as f:
+                f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n")
+
+        if self.tb:
+            for k, v in x.items():
+                self.tb.add_scalar(k, v, epoch)
+        if self.wandb:
+            wandb.log(x)
diff --git a/utils/metrics.py b/utils/metrics.py
index e17747b703fa..8646931bed00 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -5,6 +5,7 @@
 
 import math
 import warnings
+from easydict import EasyDict as edict
 from pathlib import Path
 
 import matplotlib.pyplot as plt
@@ -12,22 +13,19 @@
 import torch
 
 
-def fitness(x):
+def fitness(x, masks=False):
     # Model fitness as a weighted combination of metrics
+    if masks:
+        w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
+        return (x[:, :8] * w).sum(1)
     w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
     return (x[:, :4] * w).sum(1)
 
 
-def smooth(y, f=0.05):
-    # Box filter of fraction f
-    nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
-    p = np.ones(nf // 2)  # ones padding
-    yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
-    return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed
-
-
-def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16):
-    """ Compute the average precision, given the recall and precision curves.
+def ap_per_class(
+    tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""
+):
+    """Compute the average precision, given the recall and precision curves.
     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
     # Arguments
         tp:  True positives (nparray, nx1 or nx10).
@@ -35,7 +33,8 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
         pred_cls:  Predicted object classes (nparray).
         target_cls:  True object classes (nparray).
         plot:  Plot precision-recall curve at mAP@0.5
-        save_dir:  Plot save directory
+        save_dir:  Plot save directory.
+        prefix: prefix.
     # Returns
         The average precision as computed in py-faster-rcnn.
     """
@@ -45,7 +44,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 
     # Find unique classes
-    unique_classes, nt = np.unique(target_cls, return_counts=True)
+    unique_classes = np.unique(target_cls)
     nc = unique_classes.shape[0]  # number of classes, number of detections
 
     # Create Precision-Recall curve and compute AP for each class
@@ -53,48 +52,114 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
     ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
     for ci, c in enumerate(unique_classes):
         i = pred_cls == c
-        n_l = nt[ci]  # number of labels
+        n_l = (target_cls == c).sum()  # number of labels
         n_p = i.sum()  # number of predictions
+
         if n_p == 0 or n_l == 0:
             continue
-
-        # Accumulate FPs and TPs
-        fpc = (1 - tp[i]).cumsum(0)
-        tpc = tp[i].cumsum(0)
-
-        # Recall
-        recall = tpc / (n_l + eps)  # recall curve
-        r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
-
-        # Precision
-        precision = tpc / (tpc + fpc)  # precision curve
-        p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
-
-        # AP from recall-precision curve
-        for j in range(tp.shape[1]):
-            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
-            if plot and j == 0:
-                py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum(0)
+            tpc = tp[i].cumsum(0)
+
+            # Recall
+            recall = tpc / (n_l + 1e-16)  # recall curve
+            r[ci] = np.interp(
+                -px, -conf[i], recall[:, 0], left=0
+            )  # negative x, xp because xp decreases
+
+            # Precision
+            precision = tpc / (tpc + fpc)  # precision curve
+            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+
+            # AP from recall-precision curve
+            for j in range(tp.shape[1]):
+                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+                if plot and j == 0:
+                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
 
     # Compute F1 (harmonic mean of precision and recall)
-    f1 = 2 * p * r / (p + r + eps)
-    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
-    names = dict(enumerate(names))  # to dict
-    if plot:
-        plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
-        plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
-        plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
-        plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
-
-    i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
-    p, r, f1 = p[:, i], r[:, i], f1[:, i]
-    tp = (r * nt).round()  # true positives
-    fp = (tp / (p + eps) - tp).round()  # false positives
-    return tp, fp, p, r, f1, ap, unique_classes.astype(int)
+    f1 = 2 * p * r / (p + r + 1e-16)
+    names = [
+        v for k, v in names.items() if k in unique_classes
+    ]  # list: only classes that have data
+    names = {i: v for i, v in enumerate(names)}  # to dict
+    if plot and save_dir is not None:
+        plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names)
+        plot_mc_curve(
+            px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1"
+        )
+        plot_mc_curve(
+            px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision"
+        )
+        plot_mc_curve(
+            px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall"
+        )
+
+    i = f1.mean(0).argmax()  # max F1 index
+    return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32")
+
+
+def ap_per_class_box_and_mask(
+    tp_m,
+    tp_b,
+    conf,
+    pred_cls,
+    target_cls,
+    plot=False,
+    save_dir=".",
+    names=(),
+):
+    """
+    Args:
+        tp_b: tp of boxes.
+        tp_m: tp of masks.
+        other arguments see `func: ap_per_class`.
+    """
+    results_boxes = ap_per_class(
+        tp_b,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=plot,
+        save_dir=save_dir,
+        names=names,
+        prefix="Box",
+    )
+    results_masks = ap_per_class(
+        tp_m,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=plot,
+        save_dir=save_dir,
+        names=names,
+        prefix="Mask",
+    )
+
+    results = edict(
+        {
+            "boxes": {
+                "p": results_boxes[0],
+                "r": results_boxes[1],
+                "ap": results_boxes[2],
+                "f1": results_boxes[3],
+                "ap_class": results_boxes[4],
+            },
+            "masks": {
+                "p": results_masks[0],
+                "r": results_masks[1],
+                "ap": results_masks[2],
+                "f1": results_masks[3],
+                "ap_class": results_masks[4],
+            },
+        }
+    )
+    return results
 
 
 def compute_ap(recall, precision):
-    """ Compute the average precision, given the recall and precision curves
+    """Compute the average precision, given the recall and precision curves
     # Arguments
         recall:    The recall curve (list)
         precision: The precision curve (list)
@@ -110,8 +175,8 @@ def compute_ap(recall, precision):
     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
 
     # Integrate area under curve
-    method = 'interp'  # methods: 'continuous', 'interp'
-    if method == 'interp':
+    method = "interp"  # methods: 'continuous', 'interp'
+    if method == "interp":
         x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
         ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
     else:  # 'continuous'
@@ -146,7 +211,11 @@ def process_batch(self, detections, labels):
 
         x = torch.where(iou > self.iou_thres)
         if x[0].shape[0]:
-            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
+            matches = (
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -156,7 +225,7 @@ def process_batch(self, detections, labels):
             matches = np.zeros((0, 3))
 
         n = matches.shape[0] > 0
-        m0, m1, _ = matches.transpose().astype(int)
+        m0, m1, _ = matches.transpose().astype(np.int16)
         for i, gc in enumerate(gt_classes):
             j = m0 == i
             if n and sum(j) == 1:
@@ -172,91 +241,98 @@ def process_batch(self, detections, labels):
     def matrix(self):
         return self.matrix
 
-    def tp_fp(self):
-        tp = self.matrix.diagonal()  # true positives
-        fp = self.matrix.sum(1) - tp  # false positives
-        # fn = self.matrix.sum(0) - tp  # false negatives (missed detections)
-        return tp[:-1], fp[:-1]  # remove background class
-
-    def plot(self, normalize=True, save_dir='', names=()):
+    def plot(self, normalize=True, save_dir="", names=()):
         try:
             import seaborn as sn
 
-            array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1)  # normalize columns
+            array = self.matrix / (
+                (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1
+            )  # normalize columns
             array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
 
             fig = plt.figure(figsize=(12, 9), tight_layout=True)
-            nc, nn = self.nc, len(names)  # number of classes, names
-            sn.set(font_scale=1.0 if nc < 50 else 0.8)  # for label size
-            labels = (0 < nn < 99) and (nn == nc)  # apply names to ticklabels
+            sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
+            labels = (0 < len(names) < 99) and len(
+                names
+            ) == self.nc  # apply names to ticklabels
             with warnings.catch_warnings():
-                warnings.simplefilter('ignore')  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
-                sn.heatmap(array,
-                           annot=nc < 30,
-                           annot_kws={
-                               "size": 8},
-                           cmap='Blues',
-                           fmt='.2f',
-                           square=True,
-                           vmin=0.0,
-                           xticklabels=names + ['background FP'] if labels else "auto",
-                           yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
-            fig.axes[0].set_xlabel('True')
-            fig.axes[0].set_ylabel('Predicted')
-            fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
+                warnings.simplefilter(
+                    "ignore"
+                )  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
+                sn.heatmap(
+                    array,
+                    annot=self.nc < 30,
+                    annot_kws={"size": 8},
+                    cmap="Blues",
+                    fmt=".2f",
+                    square=True,
+                    xticklabels=names + ["background FP"] if labels else "auto",
+                    yticklabels=names + ["background FN"] if labels else "auto",
+                ).set_facecolor((1, 1, 1))
+            fig.axes[0].set_xlabel("True")
+            fig.axes[0].set_ylabel("Predicted")
+            fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
             plt.close()
         except Exception as e:
-            print(f'WARNING: ConfusionMatrix plot failure: {e}')
+            print(f"WARNING: ConfusionMatrix plot failure: {e}")
 
     def print(self):
         for i in range(self.nc + 1):
-            print(' '.join(map(str, self.matrix[i])))
+            print(" ".join(map(str, self.matrix[i])))
 
 
-def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
-    # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+    box2 = box2.T
 
     # Get the coordinates of bounding boxes
-    if xywh:  # transform from xywh to xyxy
-        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, 1), box2.chunk(4, 1)
-        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
-        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
-        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
-    else:  # x1, y1, x2, y2 = box1
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, 1)
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, 1)
-        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
-        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+    if x1y1x2y2:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:  # transform from xywh to xyxy
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
 
     # Intersection area
-    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
-            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
+        torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
+    ).clamp(0)
 
     # Union Area
+    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
     union = w1 * h1 + w2 * h2 - inter + eps
 
-    # IoU
     iou = inter / union
-    if CIoU or DIoU or GIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
+    if GIoU or DIoU or CIoU:
+        cw = torch.max(b1_x2, b2_x2) - torch.min(
+            b1_x1, b2_x1
+        )  # convex (smallest enclosing box) width
         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
             c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center dist ** 2
-            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+            rho2 = (
+                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
+                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
+            ) / 4  # center distance squared
+            if DIoU:
+                return iou - rho2 / c2  # DIoU
+            elif (
+                CIoU
+            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * torch.pow(
+                    torch.atan(w2 / h2) - torch.atan(w1 / h1), 2
+                )
                 with torch.no_grad():
                     alpha = v / (v - iou + (1 + eps))
                 return iou - (rho2 / c2 + v * alpha)  # CIoU
-            return iou - rho2 / c2  # DIoU
-        c_area = cw * ch + eps  # convex area
-        return iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
-    return iou  # IoU
-
-
-def box_area(box):
-    # box = xyxy(4,n)
-    return (box[2] - box[0]) * (box[3] - box[1])
+        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+            c_area = cw * ch + eps  # convex area
+            return iou - (c_area - union) / c_area  # GIoU
+    else:
+        return iou  # IoU
 
 
 def box_iou(box1, box2):
@@ -272,28 +348,44 @@ def box_iou(box1, box2):
             IoU values for every element in boxes1 and boxes2
     """
 
-    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
-    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
-
-    # IoU = inter / (area1 + area2 - inter)
-    return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter)
+    def box_area(box):
+        # box = 4xn
+        return (box[2] - box[0]) * (box[3] - box[1])
 
+    area1 = box_area(box1.T)
+    area2 = box_area(box2.T)
 
-def bbox_ioa(box1, box2, eps=1E-7):
-    """ Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = (
+        (
+            torch.min(box1[:, None, 2:], box2[:, 2:])
+            - torch.max(box1[:, None, :2], box2[:, :2])
+        )
+        .clamp(0)
+        .prod(2)
+    )
+    return inter / (
+        area1[:, None] + area2 - inter
+    )  # iou = inter / (area1 + area2 - inter)
+
+
+def bbox_ioa(box1, box2, eps=1e-7):
+    """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
     box1:       np.array of shape(4)
     box2:       np.array of shape(nx4)
     returns:    np.array of shape(n)
     """
 
+    box2 = box2.transpose()
+
     # Get the coordinates of bounding boxes
-    b1_x1, b1_y1, b1_x2, b1_y2 = box1
-    b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
+    b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+    b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
 
     # Intersection area
-    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
-                 (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
+    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * (
+        np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)
+    ).clip(0)
 
     # box2 area
     box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
@@ -307,49 +399,67 @@ def wh_iou(wh1, wh2):
     wh1 = wh1[:, None]  # [N,1,2]
     wh2 = wh2[None]  # [1,M,2]
     inter = torch.min(wh1, wh2).prod(2)  # [N,M]
-    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
+    return inter / (
+        wh1.prod(2) + wh2.prod(2) - inter
+    )  # iou = inter / (area1 + area2 - inter)
 
 
 # Plots ----------------------------------------------------------------------------------------------------------------
 
 
-def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
+def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()):
     # Precision-recall curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
     py = np.stack(py, axis=1)
 
     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
         for i, y in enumerate(py.T):
-            ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}')  # plot(recall, precision)
+            ax.plot(
+                px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}"
+            )  # plot(recall, precision)
     else:
-        ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
-
-    ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
-    ax.set_xlabel('Recall')
-    ax.set_ylabel('Precision')
+        ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
+
+    ax.plot(
+        px,
+        py.mean(1),
+        linewidth=3,
+        color="blue",
+        label="all classes %.3f mAP@0.5" % ap[:, 0].mean(),
+    )
+    ax.set_xlabel("Recall")
+    ax.set_ylabel("Precision")
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(save_dir, dpi=250)
+    fig.savefig(Path(save_dir), dpi=250)
     plt.close()
 
 
-def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'):
+def plot_mc_curve(
+    px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"
+):
     # Metric-confidence curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
 
     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
         for i, y in enumerate(py):
-            ax.plot(px, y, linewidth=1, label=f'{names[i]}')  # plot(confidence, metric)
+            ax.plot(px, y, linewidth=1, label=f"{names[i]}")  # plot(confidence, metric)
     else:
-        ax.plot(px, py.T, linewidth=1, color='grey')  # plot(confidence, metric)
-
-    y = smooth(py.mean(0), 0.05)
-    ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
+        ax.plot(px, py.T, linewidth=1, color="grey")  # plot(confidence, metric)
+
+    y = py.mean(0)
+    ax.plot(
+        px,
+        y,
+        linewidth=3,
+        color="blue",
+        label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}",
+    )
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(save_dir, dpi=250)
+    fig.savefig(Path(save_dir), dpi=250)
     plt.close()
diff --git a/utils/plots.py b/utils/plots.py
index 1bbb9c09c33a..94e59fc8866c 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -487,3 +487,902 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
         # cv2.imwrite(f, crop)  # https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue
         Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)).save(f, quality=95, subsampling=0)
     return crop
+
+
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Plotting utils
+"""
+
+import math
+import os
+from copy import copy
+from pathlib import Path
+
+import cv2
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sn
+import torch
+from PIL import Image, ImageDraw
+from itertools import repeat
+
+from .metrics import fitness
+
+# Settings
+RANK = int(os.getenv("RANK", -1))
+matplotlib.rc("font", **{"size": 11})
+matplotlib.use("Agg")  # for writing to files only
+
+
+class Colors:
+    # Ultralytics color palette https://ultralytics.com/
+    def __init__(self):
+        # hex = matplotlib.colors.TABLEAU_COLORS.values()
+        hex = (
+            "FF3838",
+            "FF9D97",
+            "FF701F",
+            "FFB21D",
+            "CFD231",
+            "48F90A",
+            "92CC17",
+            "3DDB86",
+            "1A9334",
+            "00D4BB",
+            "2C99A8",
+            "00C2FF",
+            "344593",
+            "6473FF",
+            "0018EC",
+            "8438FF",
+            "520085",
+            "CB38FF",
+            "FF95C8",
+            "FF37C7",
+        )
+        self.palette = [self.hex2rgb("#" + c) for c in hex]
+        self.n = len(self.palette)
+
+    def __call__(self, i, bgr=False):
+        c = self.palette[int(i) % self.n]
+        return (c[2], c[1], c[0]) if bgr else c
+
+    @staticmethod
+    def hex2rgb(h):  # rgb order (PIL)
+        return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4))
+
+
+colors = Colors()  # create instance for 'from utils.plots import colors'
+
+
+class Annotator:
+    if RANK in (-1, 0):
+        check_font()  # download TTF if necessary
+
+    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
+    def __init__(
+        self,
+        im,
+        line_width=None,
+        font_size=None,
+        font="Arial.ttf",
+        pil=False,
+        example="abc",
+    ):
+        assert (
+            im.data.contiguous
+        ), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images."
+        self.pil = pil or not is_ascii(example)
+        if self.pil:  # use PIL
+            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
+            self.draw = ImageDraw.Draw(self.im)
+            self.font = check_font(
+                font="Arial.Unicode.ttf",
+            )
+        else:  # use cv2
+            self.im = im
+        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width
+
+    def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
+        # Add one xyxy box to image with label
+        if self.pil or not is_ascii(label):
+            self.draw.rectangle(box, width=self.lw, outline=color)  # box
+            if label:
+                w, h = self.font.getsize(label)  # text width, height
+                outside = box[1] - h >= 0  # label fits outside box
+                self.draw.rectangle(
+                    [
+                        box[0],
+                        box[1] - h if outside else box[1],
+                        box[0] + w + 1,
+                        box[1] + 1 if outside else box[1] + h + 1,
+                    ],
+                    fill=color,
+                )
+                # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
+                self.draw.text(
+                    (box[0], box[1] - h if outside else box[1]),
+                    label,
+                    fill=txt_color,
+                    font=self.font,
+                )
+        else:  # cv2
+            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+            cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
+            if label:
+                tf = max(self.lw - 1, 1)  # font thickness
+                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[
+                    0
+                ]  # text width, height
+                outside = p1[1] - h - 3 >= 0  # label fits outside box
+                p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+                cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled
+                cv2.putText(
+                    self.im,
+                    label,
+                    (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
+                    0,
+                    self.lw / 3,
+                    txt_color,
+                    thickness=tf,
+                    lineType=cv2.LINE_AA,
+                )
+
+    def rectangle(self, xy, fill=None, outline=None, width=1):
+        # Add rectangle to image (PIL-only)
+        self.draw.rectangle(xy, fill, outline, width)
+
+    def text(self, xy, text, txt_color=(255, 255, 255)):
+        # Add text to image (PIL-only)
+        w, h = self.font.getsize(text)  # text width, height
+        self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
+
+    def result(self):
+        # Return annotated image as array
+        return np.asarray(self.im)
+
+class Visualizer(object):
+    """Visualization of one model."""
+    def __init__(self, names) -> None:
+        super().__init__()
+        self.names = names
+
+    def draw_one_img(self, img, output, vis_conf=0.4):
+        """Visualize one images.
+        
+        Args:
+            imgs (numpy.ndarray): one image.
+            outputs (torch.Tensor): one output, (num_boxes, classes+5)
+            vis_confs (float, optional): Visualize threshold.
+        Return:
+            img (numpy.ndarray): Image after visualization.           
+        """
+        if isinstance(output, list):
+            output = output[0]
+        if output is None or len(output) == 0:
+            return img
+        for (*xyxy, conf, cls) in reversed(output[:, :6]):
+            if conf < vis_conf:
+                continue
+            label = '%s %.2f' % (self.names[int(cls)], conf)
+            color = colors(int(cls))
+            plot_one_box(xyxy, img, label=label,
+                         color=color, 
+                         line_thickness=2)
+        return img
+
+    def draw_multi_img(self, imgs, outputs, vis_confs=0.4):
+        """Visualize multi images.
+        
+        Args:
+            imgs (List[numpy.array]): multi images.
+            outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5].
+            vis_confs (float | tuple[float], optional): Visualize threshold.
+        Return:
+            imgs (List[numpy.ndarray]): Images after visualization.           
+        """
+        if isinstance(vis_confs, float):
+            vis_confs = list(repeat(vis_confs, len(imgs)))
+        assert len(imgs) == len(outputs) == len(vis_confs)
+        for i, output in enumerate(outputs):  # detections per image
+            self.draw_one_img(imgs[i], output, vis_confs[i])
+        return imgs
+
+    def draw_imgs(self, imgs, outputs, vis_confs=0.4):
+        if isinstance(imgs, np.ndarray):
+            return self.draw_one_img(imgs, outputs, vis_confs)
+        else:
+            return self.draw_multi_img(imgs, outputs, vis_confs)
+
+    def __call__(self, imgs, outputs, vis_confs=0.4):
+        return self.draw_imgs(imgs, outputs, vis_confs)
+
+
+def hist2d(x, y, n=100):
+    # 2d histogram used in labels.png and evolve.png
+    xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
+    hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
+    xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
+    yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
+    return np.log(hist[xidx, yidx])
+
+
+def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
+    from scipy.signal import butter, filtfilt
+
+    # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
+    def butter_lowpass(cutoff, fs, order):
+        nyq = 0.5 * fs
+        normal_cutoff = cutoff / nyq
+        return butter(order, normal_cutoff, btype="low", analog=False)
+
+    b, a = butter_lowpass(cutoff, fs, order=order)
+    return filtfilt(b, a, data)  # forward-backward filter
+
+
+def output_to_target(output):
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+    targets = []
+    for i, o in enumerate(output):
+        for *box, conf, cls in o.cpu().numpy()[:, :6]:
+            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
+    return np.array(targets)
+
+
+def plot_images(
+    images,
+    targets,
+    paths=None,
+    fname="images.jpg",
+    names=None,
+    max_size=1920,
+    max_subplots=16,
+):
+    # Plot image grid with labels
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    if np.max(images[0]) <= 1:
+        images *= 255.0  # de-normalise (optional)
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+
+    # Build Image
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, im in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        im = im.transpose(1, 2, 0)
+        mosaic[y : y + h, x : x + w, :] = im
+
+    # Resize (optional)
+    scale = max_size / ns / max(h, w)
+    if scale < 1:
+        h = math.ceil(scale * h)
+        w = math.ceil(scale * w)
+        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+
+    # Annotate
+    fs = int((h + w) * ns * 0.01)  # font size
+    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
+    for i in range(i + 1):
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+        if paths:
+            annotator.text(
+                (x + 5, y + 5 + h),
+                text=Path(paths[i]).name[:40],
+                txt_color=(220, 220, 220),
+            )  # filenames
+        if len(targets) > 0:
+            ti = targets[targets[:, 0] == i]  # image targets
+            boxes = xywh2xyxy(ti[:, 2:6]).T
+            classes = ti[:, 1].astype("int")
+            labels = ti.shape[1] == 6  # labels if no conf column
+            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
+
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale < 1:  # absolute coords need scale if image scales
+                    boxes *= scale
+            boxes[[0, 2]] += x
+            boxes[[1, 3]] += y
+            for j, box in enumerate(boxes.T.tolist()):
+                cls = classes[j]
+                color = colors(cls)
+                cls = names[cls] if names else cls
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
+                    annotator.box_label(box, label, color=color)
+    annotator.im.save(fname)  # save
+    return annotator.result()
+
+
+def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""):
+    # Plot LR simulating training for full epochs
+    optimizer, scheduler = copy(optimizer), copy(scheduler)  # do not modify originals
+    y = []
+    for _ in range(epochs):
+        scheduler.step()
+        y.append(optimizer.param_groups[0]["lr"])
+    plt.plot(y, ".-", label="LR")
+    plt.xlabel("epoch")
+    plt.ylabel("LR")
+    plt.grid()
+    plt.xlim(0, epochs)
+    plt.ylim(0)
+    plt.savefig(Path(save_dir) / "LR.png", dpi=200)
+    plt.close()
+
+
+def plot_val_txt():  # from utils.plots import *; plot_val()
+    # Plot val.txt histograms
+    x = np.loadtxt("val.txt", dtype=np.float32)
+    box = xyxy2xywh(x[:, :4])
+    cx, cy = box[:, 0], box[:, 1]
+
+    fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
+    ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
+    ax.set_aspect("equal")
+    plt.savefig("hist2d.png", dpi=300)
+
+    fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
+    ax[0].hist(cx, bins=600)
+    ax[1].hist(cy, bins=600)
+    plt.savefig("hist1d.png", dpi=200)
+
+
+def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
+    # Plot targets.txt histograms
+    x = np.loadtxt("targets.txt", dtype=np.float32).T
+    s = ["x targets", "y targets", "width targets", "height targets"]
+    fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
+    ax = ax.ravel()
+    for i in range(4):
+        ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std()))
+        ax[i].legend()
+        ax[i].set_title(s[i])
+    plt.savefig("targets.jpg", dpi=200)
+
+
+def plot_val_study(file="", dir="", x=None):  # from utils.plots import *; plot_val_study()
+    # Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
+    save_dir = Path(file).parent if file else Path(dir)
+    plot2 = False  # plot additional results
+    if plot2:
+        ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
+
+    fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
+    # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
+    for f in sorted(save_dir.glob("study*.txt")):
+        y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
+        x = np.arange(y.shape[1]) if x is None else np.array(x)
+        if plot2:
+            s = [
+                "P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95",
+                "t_preprocess (ms/img)",
+                "t_inference (ms/img)",
+                "t_NMS (ms/img)",
+            ]
+            for i in range(7):
+                ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8)
+                ax[i].set_title(s[i])
+
+        j = y[3].argmax() + 1
+        ax2.plot(
+            y[5, 1:j],
+            y[3, 1:j] * 1e2,
+            ".-",
+            linewidth=2,
+            markersize=8,
+            label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"),
+        )
+
+    ax2.plot(
+        1e3 / np.array([209, 140, 97, 58, 35, 18]),
+        [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
+        "k.-",
+        linewidth=2,
+        markersize=8,
+        alpha=0.25,
+        label="EfficientDet",
+    )
+
+    ax2.grid(alpha=0.2)
+    ax2.set_yticks(np.arange(20, 60, 5))
+    ax2.set_xlim(0, 57)
+    ax2.set_ylim(25, 55)
+    ax2.set_xlabel("GPU Speed (ms/img)")
+    ax2.set_ylabel("COCO AP val")
+    ax2.legend(loc="lower right")
+    f = save_dir / "study.png"
+    print(f"Saving {f}...")
+    plt.savefig(f, dpi=300)
+
+
+def plot_labels(labels, names=(), save_dir=Path("")):
+    # plot dataset labels
+    print("Plotting labels... ")
+    c, b = labels[:, 0], labels[:, 1:].transpose()  # classes, boxes
+    nc = int(c.max() + 1)  # number of classes
+    x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"])
+
+    # seaborn correlogram
+    sn.pairplot(
+        x,
+        corner=True,
+        diag_kind="auto",
+        kind="hist",
+        diag_kws=dict(bins=50),
+        plot_kws=dict(pmax=0.9),
+    )
+    plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200)
+    plt.close()
+
+    # matplotlib labels
+    matplotlib.use("svg")  # faster
+    ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
+    y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
+    # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)]  # update colors bug #3195
+    ax[0].set_ylabel("instances")
+    if 0 < len(names) < 30:
+        ax[0].set_xticks(range(len(names)))
+        ax[0].set_xticklabels(names, rotation=90, fontsize=10)
+    else:
+        ax[0].set_xlabel("classes")
+    sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9)
+    sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9)
+
+    # rectangles
+    labels[:, 1:3] = 0.5  # center
+    labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
+    img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
+    for cls, *box in labels[:1000]:
+        ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls))  # plot
+    ax[1].imshow(img)
+    ax[1].axis("off")
+
+    for a in [0, 1, 2, 3]:
+        for s in ["top", "right", "left", "bottom"]:
+            ax[a].spines[s].set_visible(False)
+
+    plt.savefig(save_dir / "labels.jpg", dpi=200)
+    matplotlib.use("Agg")
+    plt.close()
+
+
+def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
+    # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
+    ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
+    s = [
+        "Images",
+        "Free Storage (GB)",
+        "RAM Usage (GB)",
+        "Battery",
+        "dt_raw (ms)",
+        "dt_smooth (ms)",
+        "real-world FPS",
+    ]
+    files = list(Path(save_dir).glob("frames*.txt"))
+    for fi, f in enumerate(files):
+        try:
+            results = np.loadtxt(f, ndmin=2).T[:, 90:-30]  # clip first and last rows
+            n = results.shape[1]  # number of rows
+            x = np.arange(start, min(stop, n) if stop else n)
+            results = results[:, x]
+            t = results[0] - results[0].min()  # set t0=0s
+            results[0] = x
+            for i, a in enumerate(ax):
+                if i < len(results):
+                    label = labels[fi] if len(labels) else f.stem.replace("frames_", "")
+                    a.plot(
+                        t,
+                        results[i],
+                        marker=".",
+                        label=label,
+                        linewidth=1,
+                        markersize=5,
+                    )
+                    a.set_title(s[i])
+                    a.set_xlabel("time (s)")
+                    # if fi == len(files) - 1:
+                    #     a.set_ylim(bottom=0)
+                    for side in ["top", "right"]:
+                        a.spines[side].set_visible(False)
+                else:
+                    a.remove()
+        except Exception as e:
+            print("Warning: Plotting error for %s; %s" % (f, e))
+    ax[1].legend()
+    plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200)
+
+
+def plot_evolve(
+    evolve_csv="path/to/evolve.csv",
+):  # from utils.plots import *; plot_evolve()
+    # Plot evolve.csv hyp evolution results
+    evolve_csv = Path(evolve_csv)
+    data = pd.read_csv(evolve_csv)
+    keys = [x.strip() for x in data.columns]
+    x = data.values
+    f = fitness(x)
+    j = np.argmax(f)  # max fitness index
+    plt.figure(figsize=(10, 12), tight_layout=True)
+    matplotlib.rc("font", **{"size": 8})
+    for i, k in enumerate(keys[7:]):
+        v = x[:, 7 + i]
+        mu = v[j]  # best single result
+        plt.subplot(6, 5, i + 1)
+        plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none")
+        plt.plot(mu, f.max(), "k+", markersize=15)
+        plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9})  # limit to 40 characters
+        if i % 5 != 0:
+            plt.yticks([])
+        print("%15s: %.3g" % (k, mu))
+    f = evolve_csv.with_suffix(".png")  # filename
+    plt.savefig(f, dpi=200)
+    plt.close()
+    print(f"Saved {f}")
+
+
+def plot_results(file="path/to/results.csv", dir="", best=True):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob("results*.csv"))
+    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
+    for _, f in enumerate(files):
+        try:
+            data = pd.read_csv(f)
+            index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6])
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
+                if best:
+                    # best
+                    ax[i].scatter(
+                        index,
+                        y[index],
+                        color="r",
+                        label=f"best:{index}",
+                        marker="*",
+                        linewidth=3,
+                    )
+                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
+                else:
+                    # last
+                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
+                    ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
+                # if j in [8, 9, 10]:  # share train and val loss y axes
+                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            print(f"Warning: Plotting error for {f}: {e}")
+    ax[1].legend()
+    fig.savefig(save_dir / "results.png", dpi=200)
+    plt.close()
+
+
+def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob("results*.csv"))
+    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
+    for _, f in enumerate(files):
+        try:
+            data = pd.read_csv(f)
+            index = np.argmax(
+                0.9 * data.values[:, 8]
+                + 0.1 * data.values[:, 7]
+                + 0.9 * data.values[:, 12]
+                + 0.1 * data.values[:, 11],
+            )
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
+                if best:
+                    # best
+                    ax[i].scatter(
+                        index,
+                        y[index],
+                        color="r",
+                        label=f"best:{index}",
+                        marker="*",
+                        linewidth=3,
+                    )
+                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
+                else:
+                    # last
+                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
+                    ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
+                # if j in [8, 9, 10]:  # share train and val loss y axes
+                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            print(f"Warning: Plotting error for {f}: {e}")
+    ax[1].legend()
+    fig.savefig(save_dir / "results.png", dpi=200)
+    plt.close()
+
+
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+    import random
+
+    # Plots one bounding box on image img
+    tl = (
+        line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
+    )  # line/font thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(
+            img,
+            label,
+            (c1[0], c1[1] - 2),
+            0,
+            tl / 3,
+            [225, 255, 255],
+            thickness=tf,
+            lineType=cv2.LINE_AA,
+        )
+
+
+def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")):
+    """
+    x:              Features to be visualized
+    module_type:    Module type
+    stage:          Module stage within model
+    n:              Maximum number of feature maps to plot
+    save_dir:       Directory to save results
+    """
+    if "Detect" not in module_type:
+        batch, channels, height, width = x.shape  # batch, channels, height, width
+        if height > 1 and width > 1:
+            f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
+
+            blocks = torch.chunk(
+                x[0].cpu(), channels, dim=0
+            )  # select batch index 0, block by channels
+            n = min(n, channels)  # number of plots
+            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
+            ax = ax.ravel()
+            plt.subplots_adjust(wspace=0.05, hspace=0.05)
+            for i in range(n):
+                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
+                ax[i].axis("off")
+
+            print(f"Saving {save_dir / f}... ({n}/{channels})")
+            plt.savefig(save_dir / f, dpi=300, bbox_inches="tight")
+            plt.close()
+
+
+def plot_images_and_masks(
+    images,
+    targets,
+    masks,
+    paths=None,
+    fname="images.jpg",
+    names=None,
+    max_size=640,
+    max_subplots=16,
+):
+    # Plot image grid with labels
+    # print("targets:", targets.shape)
+    # print("masks:", masks.shape)
+    # print('--------------------------')
+
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    if isinstance(masks, torch.Tensor):
+        masks = masks.cpu().numpy()
+        masks = masks.astype(int)
+
+    # un-normalise
+    if np.max(images[0]) <= 1:
+        images *= 255
+
+    tl = 3  # line thickness
+    tf = max(tl - 1, 1)  # font thickness
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+
+    # Check if we should resize
+    scale_factor = max_size / max(h, w)
+    if scale_factor < 1:
+        h = math.ceil(scale_factor * h)
+        w = math.ceil(scale_factor * w)
+
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, img in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+
+        block_x = int(w * (i // ns))
+        block_y = int(h * (i % ns))
+
+        img = img.transpose(1, 2, 0)
+        if scale_factor < 1:
+            img = cv2.resize(img, (w, h))
+
+        mosaic[block_y : block_y + h, block_x : block_x + w, :] = img
+        if len(targets) > 0:
+            idx = (targets[:, 0]).astype(int)
+            image_targets = targets[idx == i]
+            # print(targets.shape)
+            # print(masks.shape)
+            image_masks = masks[idx == i]
+            # mosaic_masks
+            # mosaic_masks[block_y:block_y + h,
+            #              block_x:block_x + w, :] = image_masks
+            boxes = xywh2xyxy(image_targets[:, 2:6]).T
+            classes = image_targets[:, 1].astype("int")
+            labels = image_targets.shape[1] == 6  # labels if no conf column
+            conf = (
+                None if labels else image_targets[:, 6]
+            )  # check for confidence presence (label vs pred)
+
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale_factor < 1:  # absolute coords need scale if image scales
+                    boxes *= scale_factor
+            boxes[[0, 2]] += block_x
+            boxes[[1, 3]] += block_y
+            for j, box in enumerate(boxes.T):
+                cls = int(classes[j])
+                color = colors(cls)
+                cls = names[cls] if names else cls
+                mask = image_masks[j].astype(np.bool)
+                # print(mask.shape)
+                # print(mosaic.shape)
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j])
+                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
+                    mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[
+                        block_y : block_y + h, block_x : block_x + w, :
+                    ][mask] * 0.35 + (np.array(color) * 0.65)
+
+        # Draw image filename labels
+        if paths:
+            label = Path(paths[i]).name[:40]  # trim to 40 char
+            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+            cv2.putText(
+                mosaic,
+                label,
+                (block_x + 5, block_y + t_size[1] + 5),
+                0,
+                tl / 3,
+                [220, 220, 220],
+                thickness=tf,
+                lineType=cv2.LINE_AA,
+            )
+
+        # Image border
+        cv2.rectangle(
+            mosaic,
+            (block_x, block_y),
+            (block_x + w, block_y + h),
+            (255, 255, 255),
+            thickness=3,
+        )
+
+    if fname:
+        r = min(1280.0 / max(h, w) / ns, 1.0)  # ratio to limit image size
+        mosaic = cv2.resize(
+            mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA
+        )
+        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
+        Image.fromarray(mosaic).save(fname)  # PIL save
+    return mosaic
+
+
+def plot_images_boxes_and_masks(
+    images,
+    targets,
+    masks=None,
+    paths=None,
+    fname="images.jpg",
+    names=None,
+    max_size=640,
+    max_subplots=16,
+):
+    if masks is not None:
+        return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots)
+    else:
+        return plot_images(images, targets, paths, fname, names, max_size, max_subplots)
+
+
+def plot_masks(img, masks, colors, alpha=0.5):
+    """
+    Args:
+        img (tensor): img on cuda, shape: [3, h, w], range: [0, 1]
+        masks (tensor): predicted masks on cuda, shape: [n, h, w]
+        colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
+    Return:
+        img after draw masks, shape: [h, w, 3]
+
+    transform colors and send img_gpu to cpu for the most time.
+    """
+    img_gpu = img.clone()
+    num_masks = len(masks)
+    # [n, 1, 1, 3]
+    # faster this way to transform colors
+    colors = torch.tensor(colors, device=img.device).float() / 255.0
+    colors = colors[:, None, None, :]
+    # [n, h, w, 1]
+    masks = masks[:, :, :, None]
+    masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha
+    inv_alph_masks = masks * (-alpha) + 1
+    masks_color_summand = masks_color[0]
+    if num_masks > 1:
+        inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0)
+        masks_color_cumul = masks_color[1:] * inv_alph_cumul
+        masks_color_summand += masks_color_cumul.sum(dim=0)
+
+    # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1]
+    img_gpu = img_gpu.flip(dims=[0])  # filp channel for opencv
+    img_gpu = img_gpu.permute(1, 2, 0).contiguous()
+    # [h, w, 3]
+    img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
+    return (img_gpu * 255).byte().cpu().numpy()
+
+def visualize(self, images, outputs, out_masks, vis_confs=0.4):
+    """Image visualize
+    if images is a List of ndarray, then will return a List.
+    if images is a ndarray, then return ndarray.
+    Args:
+        outputs: bbox+conf+cls, List[torch.Tensor(num_boxes, 6)]xB.
+        masks: binary masks, List[torch.Tensor(num_boxes, img_h, img_w)]xB.
+    """
+    ori_type = type(images)
+    # get original shape, cause self.ori_hw will be cleared
+    images = images if isinstance(images, list) else [images]
+    ori_hw = [img.shape[:2] for img in images]
+    # init the list to keep image with masks.
+    # TODO: fix this bug when output is empty.
+    masks_images = []
+    # draw masks
+    for i, output in enumerate(outputs):
+        if output is None or len(output) == 0:
+            continue
+        idx = output[:, 4] > vis_confs
+        masks = out_masks[i][idx]
+        mcolors = [colors(int(cls)) for cls in output[:, 5]]
+        # NOTE: this way to draw masks is faster,
+        # from https://github.com/dbolya/yolact
+        # image with masks, (img_h, img_w, 3)
+        img_masks = plot_masks(self.imgs[i], masks, mcolors)
+        # scale image to original hw
+        from utils.segment import scale_masks
+        img_masks = scale_masks(self.imgs[i].shape[1:], img_masks, ori_hw[i])
+        masks_images.append(img_masks)
+    # TODO: make this(ori_type stuff) clean
+    images = masks_images[0] if (len(masks_images) == 1) and type(masks_images) != ori_type else images[0]
+    return self.vis(images, outputs, vis_confs)
\ No newline at end of file
diff --git a/utils/seg_loss.py b/utils/seg_loss.py
new file mode 100644
index 000000000000..d4cf26401bc6
--- /dev/null
+++ b/utils/seg_loss.py
@@ -0,0 +1,459 @@
+import torch
+from utils.torch_utils import de_parallel, is_parallel
+from utils.general import xywh2xyxy
+from utils.segment import mask_iou, masks_iou, crop
+import torch.nn.functional as F
+import torch.nn as nn
+from utils.loss import smooth_BCE, FocalLoss
+
+
+class ComputeLoss:
+    # Compute losses
+    def __init__(self, model, autobalance=False):
+        self.sort_obj_iou = False
+        device = next(model.parameters()).device  # get model device
+        h = model.hyp  # hyperparameters
+
+        # Define criteria
+        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device))
+        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["obj_pw"]], device=device))
+
+        self.mask_loss = MaskIOULoss()
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(
+            eps=h.get("label_smoothing", 0.0)
+        )  # positive, negative BCE targets
+
+        # Focal loss
+        g = h["fl_gamma"]  # focal loss gamma
+        if g > 0:
+            BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
+
+        det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
+        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (
+            BCEcls,
+            BCEobj,
+            1.0,
+            h,
+            autobalance,
+        )
+        for k in "na", "nc", "nl", "anchors", "nm":
+            if hasattr(det, k):
+                setattr(self, k, getattr(det, k))
+
+    def __call__(self, p, targets, masks=None):  # predictions, targets, model
+        if masks is not None:
+            return self.loss_segment(p, targets, masks)
+        return self.loss_detection(p, targets)
+
+    def loss_detection(self, p, targets):
+        device = targets.device
+        lcls, lbox, lobj = (
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+        )
+        tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets
+
+        # Losses
+        for i, pi in enumerate(p):  # layer index, layer predictions
+            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
+            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
+
+            n = b.shape[0]  # number of targets
+            if n:
+                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+
+                # Regression
+                pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
+                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                pbox = torch.cat((pxy, pwh), 1)  # predicted box
+                iou = bbox_iou(
+                    pbox.T, tbox[i], x1y1x2y2=False, CIoU=True
+                )  # iou(prediction, target)
+                lbox += (1.0 - iou).mean()  # iou loss
+
+                # Objectness
+                score_iou = iou.detach().clamp(0).type(tobj.dtype)
+                if self.sort_obj_iou:
+                    sort_id = torch.argsort(score_iou)
+                    b, a, gj, gi, score_iou = (
+                        b[sort_id],
+                        a[sort_id],
+                        gj[sort_id],
+                        gi[sort_id],
+                        score_iou[sort_id],
+                    )
+                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou  # iou ratio
+
+                # Classification
+                if self.nc > 1:  # cls loss (only if multiple classes)
+                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
+                    t[range(n), tcls[i]] = self.cp
+                    lcls += self.BCEcls(ps[:, 5:], t)  # BCE
+
+                # Append targets to text file
+                # with open('targets.txt', 'a') as file:
+                #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
+
+            obji = self.BCEobj(pi[..., 4], tobj)
+            lobj += obji * self.balance[i]  # obj loss
+            if self.autobalance:
+                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+
+        if self.autobalance:
+            self.balance = [x / self.balance[self.ssi] for x in self.balance]
+        lbox *= self.hyp["box"]
+        lobj *= self.hyp["obj"]
+        lcls *= self.hyp["cls"]
+        bs = tobj.shape[0]  # batch size
+
+        return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
+
+    def loss_segment(self, preds, targets, masks):
+        """
+        proto_out:[batch-size, mask_dim, mask_hegiht, mask_width]
+        masks:[batch-size * num_objs, image_height, image_width]
+        每张图片objects数量不同，到时候处理时填充不足的
+        """
+        p = preds[0]
+        proto_out = preds[1]
+        mask_h, mask_w = proto_out.shape[2:]
+        proto_out = proto_out.permute(0, 2, 3, 1)
+
+        device = targets.device
+        lcls, lbox, lobj, lseg = (
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+        )
+        tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks(
+            p, targets
+        )  # targets
+
+        # Losses
+        for i, pi in enumerate(p):  # layer index, layer predictions
+            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
+            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
+
+            n = b.shape[0]  # number of targets
+            if n:
+                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+
+                # Regression
+                pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
+                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                pbox = torch.cat((pxy, pwh), 1)  # predicted box
+                iou = bbox_iou(
+                    pbox.T, tbox[i], x1y1x2y2=False, CIoU=True
+                )  # iou(prediction, target)
+                lbox += (1.0 - iou).mean()  # iou loss
+
+                # Objectness
+                score_iou = iou.detach().clamp(0).type(tobj.dtype)
+                if self.sort_obj_iou:
+                    sort_id = torch.argsort(score_iou)
+                    b, a, gj, gi, score_iou = (
+                        b[sort_id],
+                        a[sort_id],
+                        gj[sort_id],
+                        gi[sort_id],
+                        score_iou[sort_id],
+                    )
+                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou  # iou ratio
+
+                # Classification
+                if self.nc > 1:  # cls loss (only if multiple classes)
+                    t = torch.full_like(ps[:, self.nm :], self.cn, device=device)  # targets
+                    t[range(n), tcls[i]] = self.cp
+                    lcls += self.BCEcls(ps[:, self.nm :], t)  # BCE
+
+                # Mask Regression
+                mask_gt = masks[tidxs[i]]
+                downsampled_masks = F.interpolate(
+                    mask_gt[None, :],
+                    (mask_h, mask_w),
+                    mode="bilinear",
+                    align_corners=False,
+                ).squeeze(0)
+
+                mxywh = xywh[i]
+                mws, mhs = mxywh[:, 2:].T
+                mws, mhs = mws / pi.shape[3], mhs / pi.shape[2]
+                mxywhs = (
+                    mxywh
+                    / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]]
+                    * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device)
+                )
+                mxyxys = xywh2xyxy(mxywhs)
+
+                batch_lseg = torch.zeros(1, device=device)
+                for bi in b.unique():
+                    index = b == bi
+                    mask_gti = downsampled_masks[index]
+                    mask_gti = mask_gti.permute(1, 2, 0).contiguous()
+
+                    mw, mh = mws[index], mhs[index]
+                    mxyxy = mxyxys[index]
+                    psi = ps[index][:, 5 : self.nm]
+                    proto = proto_out[bi]
+
+                    batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
+                lseg += batch_lseg / len(b.unique())
+
+            obji = self.BCEobj(pi[..., 4], tobj)
+            lobj += obji * self.balance[i]  # obj loss
+            if self.autobalance:
+                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+
+        if self.autobalance:
+            self.balance = [x / self.balance[self.ssi] for x in self.balance]
+        lbox *= self.hyp["box"]
+        lobj *= self.hyp["obj"]
+        lcls *= self.hyp["cls"]
+        lseg *= self.hyp["box"]
+        bs = tobj.shape[0]  # batch size
+
+        loss = lbox + lobj + lcls + lseg
+        return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
+
+    def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
+        """mask loss of single pic."""
+        # (80, 80, 32) @ (32, n) -> (80, 80, n)
+        pred_mask = proto @ pred.tanh().T
+        lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
+        lseg = crop(lseg, xyxy)
+        lseg = lseg.mean(dim=(0, 1)) / w / h
+        return lseg.mean()
+
+    def mask_loss(self, gt_masks, preds, protos, xyxys, ws, hs):
+        """mask loss of batches."""
+        pass
+
+    def build_targets(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        tcls, tbox, indices, anch = [], [], [], []
+        gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
+        ai = (
+            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
+        )  # same as .repeat_interleave(nt)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
+
+        g = 0.5  # bias
+        off = (
+            torch.tensor(
+                [
+                    [0, 0],
+                    [1, 0],
+                    [0, 1],
+                    [-1, 0],
+                    [0, -1],  # j,k,l,m
+                    # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+                ],
+                device=targets.device,
+            ).float()
+            * g
+        )  # offsets
+
+        for i in range(self.nl):
+            anchors = self.anchors[i]
+            gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
+
+            # Match targets to anchors
+            t = targets * gain
+            if nt:
+                # Matches
+                r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
+                j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"]  # compare
+                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
+                t = t[j]  # filter
+
+                # Offsets
+                gxy = t[:, 2:4]  # grid xy
+                gxi = gain[[2, 3]] - gxy  # inverse
+                j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T
+                l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T
+                j = torch.stack((torch.ones_like(j), j, k, l, m))
+                t = t.repeat((5, 1, 1))[j]
+                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+            else:
+                t = targets[0]
+                offsets = 0
+
+            # Define
+            b, c = t[:, :2].long().T  # image, class
+            gxy = t[:, 2:4]  # grid xy
+            gwh = t[:, 4:6]  # grid wh
+            gij = (gxy - offsets).long()
+            gi, gj = gij.T  # grid xy indices
+
+            # Append
+            a = t[:, 6].long()  # anchor indices
+            indices.append(
+                (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))
+            )  # image, anchor, grid indices
+            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
+            anch.append(anchors[a])  # anchors
+            tcls.append(c)  # class
+
+        return tcls, tbox, indices, anch
+
+    def build_targets_for_masks(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], []
+        gain = torch.ones(8, device=targets.device)  # normalized to gridspace gain
+        ai = (
+            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
+        )  # same as .repeat_interleave(nt)
+        ti = (
+            torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)
+        )  # same as .repeat_interleave(nt)
+
+        targets = torch.cat(
+            (targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2
+        )  # append anchor indices
+
+        g = 0.5  # bias
+        off = (
+            torch.tensor(
+                [
+                    [0, 0],
+                    [1, 0],
+                    [0, 1],
+                    [-1, 0],
+                    [0, -1],  # j,k,l,m
+                    # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+                ],
+                device=targets.device,
+            ).float()
+            * g
+        )  # offsets
+
+        for i in range(self.nl):
+            anchors = self.anchors[i]
+            gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
+
+            # Match targets to anchors
+            t = targets * gain
+            if nt:
+                # Matches
+                r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
+                j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"]  # compare
+                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
+                t = t[j]  # filter
+
+                # Offsets
+                gxy = t[:, 2:4]  # grid xy
+                gxi = gain[[2, 3]] - gxy  # inverse
+                j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T
+                l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T
+                j = torch.stack((torch.ones_like(j), j, k, l, m))
+                t = t.repeat((5, 1, 1))[j]
+                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+            else:
+                t = targets[0]
+                offsets = 0
+
+            # Define
+            b, c = t[:, :2].long().T  # image, class
+            gxy = t[:, 2:4]  # grid xy
+            gwh = t[:, 4:6]  # grid wh
+            gij = (gxy - offsets).long()
+            gi, gj = gij.T  # grid xy indices
+
+            # Append
+            a = t[:, 6].long()  # anchor indices
+            tidx = t[:, 7].long()
+            indices.append(
+                (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))
+            )  # image, anchor, grid indices
+            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
+            anch.append(anchors[a])  # anchors
+            tcls.append(c)  # class
+            tidxs.append(tidx)
+            xywh.append(torch.cat((gxy, gwh), 1))
+
+        return tcls, tbox, indices, anch, tidxs, xywh
+    
+
+class MaskIOULoss(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, pred_mask, gt_mask, mxyxy=None):
+        """
+        Args:
+            pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n)
+            gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n)
+            mxyxy (torch.Tensor): ground truth of boxes, (n, 4)
+        """
+        _, _, n = pred_mask.shape  # same as gt_mask
+        pred_mask = pred_mask.sigmoid()
+        if mxyxy is not None:
+            pred_mask = crop(pred_mask, mxyxy)
+            gt_mask = crop(gt_mask, mxyxy)
+        pred_mask = pred_mask.permute(2, 0, 1).view(n, -1)
+        gt_mask = gt_mask.permute(2, 0, 1).view(n, -1)
+        iou = masks_iou(pred_mask, gt_mask)
+        return 1.0 - iou
+
+import math 
+
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+    box2 = box2.T
+
+    # Get the coordinates of bounding boxes
+    if x1y1x2y2:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:  # transform from xywh to xyxy
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+    # Intersection area
+    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
+        torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
+    ).clamp(0)
+
+    # Union Area
+    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+    union = w1 * h1 + w2 * h2 - inter + eps
+
+    iou = inter / union
+    if GIoU or DIoU or CIoU:
+        cw = torch.max(b1_x2, b2_x2) - torch.min(
+            b1_x1, b2_x1
+        )  # convex (smallest enclosing box) width
+        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
+            rho2 = (
+                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
+                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
+            ) / 4  # center distance squared
+            if DIoU:
+                return iou - rho2 / c2  # DIoU
+            elif (
+                CIoU
+            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * torch.pow(
+                    torch.atan(w2 / h2) - torch.atan(w1 / h1), 2
+                )
+                with torch.no_grad():
+                    alpha = v / (v - iou + (1 + eps))
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+            c_area = cw * ch + eps  # convex area
+            return iou - (c_area - union) / c_area  # GIoU
+    else:
+        return iou  # IoU
\ No newline at end of file
diff --git a/utils/segment.py b/utils/segment.py
new file mode 100644
index 000000000000..89f6627a6259
--- /dev/null
+++ b/utils/segment.py
@@ -0,0 +1,318 @@
+import numpy as np
+import time
+import cv2
+import torch.nn.functional as F
+import torch
+import torchvision
+from .general import xyxy2xywh, xywh2xyxy
+from .metrics import box_iou
+
+def segment2box(segment, width=640, height=640):
+    # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
+    x, y = segment.T  # segment xy
+    inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
+    x, y, = (
+        x[inside],
+        y[inside],
+    )
+    return (
+        np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4))
+    )  # xyxy
+
+
+def segments2boxes(segments):
+    # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
+    boxes = []
+    for s in segments:
+        x, y = s.T  # segment xy
+        boxes.append([x.min(), y.min(), x.max(), y.max()])  # cls, xyxy
+    return xyxy2xywh(np.array(boxes))  # cls, xywh
+
+
+def resample_segments(segments, n=1000):
+    # Up-sample an (n,2) segment
+    for i, s in enumerate(segments):
+        x = np.linspace(0, len(s) - 1, n)
+        xp = np.arange(len(s))
+        segments[i] = (
+            np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)])
+            .reshape(2, -1)
+            .T
+        )  # segment xy
+    return segments
+
+def non_max_suppression_masks(
+    prediction,
+    conf_thres=0.25,
+    iou_thres=0.45,
+    classes=None,
+    agnostic=False,
+    multi_label=False,
+    labels=(),
+    max_det=300,
+    mask_dim=32,
+):
+    """Runs Non-Maximum Suppression (NMS) on inference results
+
+    Returns:
+         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
+    """
+
+    nc = prediction.shape[2] - 5  # number of classes
+    xc = prediction[..., 4] > conf_thres  # candidates
+
+    # Checks
+    assert (
+        0 <= conf_thres <= 1
+    ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
+    assert (
+        0 <= iou_thres <= 1
+    ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
+
+    # Settings
+    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
+    time_limit = 10.0  # seconds to quit after
+    redundant = True  # require redundant detections
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+    merge = False  # use merge-NMS
+    nm = 5 + mask_dim
+
+    t = time.time()
+    output = [
+        torch.zeros((0, 6 + mask_dim), device=prediction.device)
+    ] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+        pred_masks = x[:, 5:nm]
+
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            l = labels[xi]
+            v = torch.zeros((len(l), nc + 5), device=x.device)
+            v[:, :4] = l[:, 1:5]  # box
+            v[:, 4] = 1.0  # conf
+            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
+            x = torch.cat((x, v), 0)
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Compute conf
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, nm:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat(
+                (box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1
+            )
+        else:  # best class only
+            conf, j = x[:, nm:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float(), pred_masks), 1)[
+                conf.view(-1) > conf_thres
+            ]
+
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+        # Apply finite constraint
+        # if not torch.isfinite(x).all():
+        #     x = x[torch.isfinite(x).all(1)]
+
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        elif n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+        if merge and (1 < n < 3e3):  # Merge NMS (boxes merged using weighted mean)
+            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+            weights = iou * scores[None]  # box weights
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
+                1, keepdim=True
+            )  # merged boxes
+            if redundant:
+                i = i[iou.sum(1) > 1]  # require redundancy
+
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            print(f"WARNING: NMS time limit {time_limit}s exceeded")
+            break  # time limit exceeded
+
+    return output
+
+def crop(masks, boxes):
+    """
+    "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+    Vectorized by Chong (thanks Chong).
+
+    Args:
+        - masks should be a size [h, w, n] tensor of masks
+        - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+    """
+    h, w, n = masks.size()
+    x1, x2 = boxes[:, 0], boxes[:, 2]
+    y1, y2 = (
+        boxes[:, 1],
+        boxes[:, 3],
+    )
+
+    rows = (
+        torch.arange(w, device=masks.device, dtype=x1.dtype)
+        .view(1, -1, 1)
+        .expand(h, w, n)
+    )
+    cols = (
+        torch.arange(h, device=masks.device, dtype=x1.dtype)
+        .view(-1, 1, 1)
+        .expand(h, w, n)
+    )
+
+    # (1, w, 1), (1, 1, n) -> (1, w, n)
+    masks_left = rows >= x1.view(1, 1, -1)
+    masks_right = rows < x2.view(1, 1, -1)
+    # (h, 1, 1), (1, 1, n) -> (h, 1, n)
+    masks_up = cols >= y1.view(1, 1, -1)
+    masks_down = cols < y2.view(1, 1, -1)
+
+    # (h, w, n)
+    crop_mask = masks_left * masks_right * masks_up * masks_down
+
+    return masks * crop_mask.float()
+
+def process_mask_upsample(proto_out, out_masks, bboxes, shape):
+    """
+    Crop after unsample.
+    proto_out: [mask_dim, mask_h, mask_w]
+    out_masks: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape:input_image_size, (h, w)
+
+    return: h, w, n
+    """
+    # mask_h, mask_w, n
+    masks = proto_out.float().permute(
+        1, 2, 0).contiguous() @ out_masks.float().tanh().T
+    # print(masks.shape)
+    masks = masks.sigmoid()
+    # print('after sigmoid:', masks)
+    masks = masks.permute(2, 0, 1).contiguous()
+    # [n, mask_h, mask_w]
+    masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0)
+    # [mask_h, mask_w, n]
+    masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes)
+    return masks.gt_(0.5) # .gt_(0.2)
+
+def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
+    """
+    Crop before unsample.
+    proto_out: [mask_dim, mask_h, mask_w]
+    out_masks: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape:input_image_size, (h, w)
+
+    return: h, w, n
+    """
+    downsampled_bboxes = bboxes.clone()
+    mh, mw = proto_out.shape[1:]
+    ih, iw = shape
+    # mask_h, mask_w, n
+    masks = proto_out.float().permute(
+        1, 2, 0).contiguous() @ out_masks.float().tanh().T
+    # print(masks)
+    masks = masks.sigmoid()
+    # print('after sigmoid:', masks)
+    downsampled_bboxes[:, 0] = downsampled_bboxes[:, 0] / iw * mw
+    downsampled_bboxes[:, 2] = downsampled_bboxes[:, 2] / iw * mw
+    downsampled_bboxes[:, 1] = downsampled_bboxes[:, 1] / ih * mh
+    downsampled_bboxes[:, 3] = downsampled_bboxes[:, 3] / ih * mh
+    masks = crop(masks, downsampled_bboxes)
+    masks = masks.permute(2, 0, 1).contiguous()
+    # [n, mask_h, mask_w]
+    if upsample:
+        masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0)
+    return masks.gt_(0.5).permute(1, 2, 0).contiguous()
+
+def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):
+    """
+    img1_shape: model input shape, [h, w]
+    img0_shape: origin pic shape, [h, w, 3]
+    masks: [h, w, num]
+    resize for the most time
+    """
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0],
+                   img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
+            img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+    tl_pad = int(pad[1]), int(pad[0])  # y, x
+    br_pad = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0])
+    
+    if len(masks.shape) < 2:
+        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
+    # masks_h, masks_w, n
+    masks = masks[tl_pad[0]:br_pad[0], tl_pad[1]:br_pad[1]]
+    # 1, n, masks_h, masks_w
+    # masks = masks.permute(2, 0, 1).contiguous()[None, :]
+    # # shape = [1, n, masks_h, masks_w] after F.interpolate, so take first element
+    # masks = F.interpolate(masks, img0_shape[:2], mode='bilinear', align_corners=False)[0]
+    # masks = masks.permute(1, 2, 0).contiguous()
+    # masks_h, masks_w, n
+    masks = cv2.resize(masks, (img0_shape[1], img0_shape[0]))
+
+    # keepdim
+    if len(masks.shape) == 2:
+        masks = masks[:, :, None]
+
+    return masks
+
+def mask_iou(mask1, mask2):
+    """
+    mask1: [N, n] m1 means number of predicted objects 
+    mask2: [M, n] m2 means number of gt objects
+    Note: n means image_w x image_h
+
+    return: masks iou, [N, M]
+    """
+    # print(mask1.shape)
+    # print(mask2.shape)
+    intersection = torch.matmul(mask1, mask2.t()).clamp(0)
+    area1 = torch.sum(mask1, dim=1).view(1, -1)
+    area2 = torch.sum(mask2, dim=1).view(1, -1)
+    union = (area1.t() + area2) - intersection
+
+    return intersection / (union + 1e-7)
+
+def masks_iou(mask1, mask2):
+    """
+    mask1: [N, n] m1 means number of predicted objects 
+    mask2: [N, n] m2 means number of gt objects
+    Note: n means image_w x image_h
+
+    return: masks iou, (N, )
+    """
+    intersection = (mask1 * mask2).sum(1).clamp(0)  # (N, )
+    area1 = torch.sum(mask1, dim=1).view(1, -1)
+    area2 = torch.sum(mask2, dim=1).view(1, -1)
+    union = (area1 + area2) - intersection
+    return intersection / (union + 1e-7)
\ No newline at end of file

From e151ee177750a5568d872cb346128567d5d5ff27 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 13:12:08 +0530
Subject: [PATCH 002/247] deterministic

---
 train_instseg.py          | 2 +-
 utils/loggers/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/train_instseg.py b/train_instseg.py
index ff85f1eb36b5..825edc6b7415 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -113,7 +113,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     # Config
     plots = not evolve and not opt.noplots  # create plots
     cuda = device.type != 'cpu'
-    init_seeds(opt.seed + 1 + RANK)
+    init_seeds(opt.seed + 1 + RANK, True)
     with torch_distributed_zero_first(LOCAL_RANK):
         data_dict = data_dict or check_dataset(data)  # check if None
     train_path, val_path = data_dict['train'], data_dict['val']
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index bf95d82203b8..65c673c64498 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -240,7 +240,7 @@ def __init__(
         try:
             import wandb
             from wandb import __version__
-            wandb.init(project="YOLOv5-Inst-seg", config=opt)
+            wandb.init(project=opt.project, name=opt.name, config=opt)
         except ImportError:
             wandb = None
             pass

From bcb5bcb617917c89d73665f679eb1e4507b88d5c Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 15:58:38 +0530
Subject: [PATCH 003/247] allow mask_ratio

---
 train_instseg.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/train_instseg.py b/train_instseg.py
index 825edc6b7415..ad7ced91e4c2 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -29,6 +29,7 @@
 import torch.nn as nn
 import yaml
 from torch.nn.parallel import DistributedDataParallel as DDP
+import torch.nn.functional as F
 from torch.optim import SGD, Adam, AdamW, lr_scheduler
 from tqdm import tqdm
 
@@ -253,7 +254,9 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                               quad=opt.quad,
                                               prefix=colorstr('train: '),
                                               mask_head=True,
-                                              shuffle=True)
+                                              shuffle=True,
+                                              mask_downsample_ratio=mask_ratio
+                                              )
     mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
     nb = len(train_loader)  # number of batches
     assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
@@ -272,6 +275,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                        workers=workers * 2,
                                        pad=0.5,
                                        mask_head=True,
+                                       mask_downsample_ratio=mask_ratio,
                                        prefix=colorstr('val: '))[0]
 
         if not resume:
@@ -396,6 +400,14 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                 mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
                 pbar.set_description(("%10s" * 2 + "%10.4g" * 6)
             % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1]))
+            # for plots
+                if mask_ratio != 1:
+                    masks = F.interpolate(
+                        masks[None, :],
+                        (imgsz, imgsz),
+                        mode="bilinear",
+                        align_corners=False,
+                    ).squeeze(0)
                 callbacks.run('on_train_batch_end', ni, model, imgs, targets,masks, paths, plots, opt.sync_bn, None)
 
                 if callbacks.stop_training:

From 472a45015843029c80a39af98e0cbb65ef8e72af Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 21:41:03 +0530
Subject: [PATCH 004/247] attempt class renaming

---
 train_instseg.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/train_instseg.py b/train_instseg.py
index ad7ced91e4c2..314c18be2c75 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -45,7 +45,7 @@
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
 from utils.callbacks import Callbacks
-from seg_dataloaders import create_dataloader
+from seg_dataloaders import create_dataloader, create_dataloader_ori
 from utils.downloads import attempt_download
 from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness,
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
@@ -239,7 +239,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         LOGGER.info('Using SyncBatchNorm()')
 
     # Trainloader
-    train_loader, dataset = create_dataloader(train_path,
+    train_loader, dataset = create_dataloader_ori(train_path,
                                               imgsz,
                                               batch_size // WORLD_SIZE,
                                               gs,
@@ -263,7 +263,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
     # Process 0
     if RANK in {-1, 0}:
-        val_loader = create_dataloader(val_path,
+        val_loader = create_dataloader_ori(val_path,
                                        imgsz,
                                        batch_size // WORLD_SIZE * 2,
                                        gs,

From 9871073b37127e5cf1b3bd799cdd560239674edc Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 21:42:43 +0530
Subject: [PATCH 005/247] attempt class

---
 data/coco.yaml   | 4 +++-
 train_instseg.py | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index 0c0c4adab05d..c07c27816796 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -14,7 +14,8 @@ val: val2017.txt  # val images (relative to 'path') 5000 images
 test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 
 # Classes
-nc: 80  # number of classes
+nc: 91  # number of classes
+'''
 names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
@@ -24,6 +25,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
         'hair drier', 'toothbrush']  # class names
+'''
 
 
 # Download script/URL (optional)
diff --git a/train_instseg.py b/train_instseg.py
index 314c18be2c75..ad7ced91e4c2 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -45,7 +45,7 @@
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
 from utils.callbacks import Callbacks
-from seg_dataloaders import create_dataloader, create_dataloader_ori
+from seg_dataloaders import create_dataloader
 from utils.downloads import attempt_download
 from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness,
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
@@ -239,7 +239,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         LOGGER.info('Using SyncBatchNorm()')
 
     # Trainloader
-    train_loader, dataset = create_dataloader_ori(train_path,
+    train_loader, dataset = create_dataloader(train_path,
                                               imgsz,
                                               batch_size // WORLD_SIZE,
                                               gs,
@@ -263,7 +263,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
     # Process 0
     if RANK in {-1, 0}:
-        val_loader = create_dataloader_ori(val_path,
+        val_loader = create_dataloader(val_path,
                                        imgsz,
                                        batch_size // WORLD_SIZE * 2,
                                        gs,

From a426c30326ca8c0654c9723d96dd3453cf6e1685 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 21:44:27 +0530
Subject: [PATCH 006/247] attempt cls format

---
 data/coco.yaml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index c07c27816796..35d3001404fc 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -15,18 +15,6 @@ test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.
 
 # Classes
 nc: 91  # number of classes
-'''
-names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
-        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
-        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
-        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
-        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
-        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
-        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
-        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-        'hair drier', 'toothbrush']  # class names
-'''
-
 
 # Download script/URL (optional)
 download: |

From 19ec985e02a7d95cd19861b22599e3c1bc69e50b Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 21:57:41 +0530
Subject: [PATCH 007/247] revert

---
 data/coco.yaml | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index 35d3001404fc..0c0c4adab05d 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -14,7 +14,17 @@ val: val2017.txt  # val images (relative to 'path') 5000 images
 test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 
 # Classes
-nc: 91  # number of classes
+nc: 80  # number of classes
+names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']  # class names
+
 
 # Download script/URL (optional)
 download: |

From 7f552344a72f0da50efe77246ddb8c9743e6d6d2 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 22:03:28 +0530
Subject: [PATCH 008/247] attempt

---
 models/yolov5m_seg.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/yolov5m_seg.yaml b/models/yolov5m_seg.yaml
index 6b19539786b2..37a0bb3f6050 100644
--- a/models/yolov5m_seg.yaml
+++ b/models/yolov5m_seg.yaml
@@ -45,4 +45,4 @@ head:
    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
    [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
-  ]
+  ]
\ No newline at end of file

From ff5f80f0258322a4fa41df5928c8ff3610f3f9fb Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 22:22:33 +0530
Subject: [PATCH 009/247] print mlc

---
 train_instseg.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/train_instseg.py b/train_instseg.py
index ad7ced91e4c2..9be5c42a9ef7 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -258,6 +258,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                               mask_downsample_ratio=mask_ratio
                                               )
     mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
+    print("mlc , nc ", mlc, "  ", nc )
     nb = len(train_loader)  # number of batches
     assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
 

From 0d9f53df03d0b7fc0b5432ced3b8ff0c9647d80b Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 22:35:23 +0530
Subject: [PATCH 010/247] add pdb

---
 utils/seg_loss.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index d4cf26401bc6..c0ccb7525c56 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -184,6 +184,7 @@ def loss_segment(self, preds, targets, masks):
                 mxywh = xywh[i]
                 mws, mhs = mxywh[:, 2:].T
                 mws, mhs = mws / pi.shape[3], mhs / pi.shape[2]
+                import pdb;pdb.set_trace()
                 mxywhs = (
                     mxywh
                     / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]]

From 0ee84c5cd1131af25ef2a9115cf997da40eb28e7 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 22:39:37 +0530
Subject: [PATCH 011/247] pdb

---
 utils/seg_loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index c0ccb7525c56..02cb65711ff4 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -171,6 +171,7 @@ def loss_segment(self, preds, targets, masks):
                     t = torch.full_like(ps[:, self.nm :], self.cn, device=device)  # targets
                     t[range(n), tcls[i]] = self.cp
                     lcls += self.BCEcls(ps[:, self.nm :], t)  # BCE
+                import pdb;pdb.set_trace()
 
                 # Mask Regression
                 mask_gt = masks[tidxs[i]]
@@ -184,7 +185,6 @@ def loss_segment(self, preds, targets, masks):
                 mxywh = xywh[i]
                 mws, mhs = mxywh[:, 2:].T
                 mws, mhs = mws / pi.shape[3], mhs / pi.shape[2]
-                import pdb;pdb.set_trace()
                 mxywhs = (
                     mxywh
                     / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]]

From 63fce9a9e9da5ce7ac96a5edc7f2dde61233efc3 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 11 Jul 2022 23:12:46 +0530
Subject: [PATCH 012/247] revert

---
 utils/seg_loss.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index 02cb65711ff4..d4cf26401bc6 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -171,7 +171,6 @@ def loss_segment(self, preds, targets, masks):
                     t = torch.full_like(ps[:, self.nm :], self.cn, device=device)  # targets
                     t[range(n), tcls[i]] = self.cp
                     lcls += self.BCEcls(ps[:, self.nm :], t)  # BCE
-                import pdb;pdb.set_trace()
 
                 # Mask Regression
                 mask_gt = masks[tidxs[i]]

From 023255fc8c5735202b6a0809f9f2d4f019d970f6 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 12 Jul 2022 11:53:31 +0530
Subject: [PATCH 013/247] allow plotting

---
 train_instseg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train_instseg.py b/train_instseg.py
index 9be5c42a9ef7..b5a307097368 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -169,7 +169,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             mask=True,
             verbose=False,
             mask_downsample_ratio=mask_ratio,
-            plots=False
+            plots=True
         )
     g = [], [], []  # optimizer parameter groups
     bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()

From d533e303d2bf039e870dcf68d77753e7a9b5b695 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 13 Jul 2022 10:35:50 +0530
Subject: [PATCH 014/247] make compatible with train.py

---
 evaluator.py         |   2 +-
 utils/metrics.py     | 382 +++++++++++++----------------------
 utils/seg_metrics.py | 465 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 602 insertions(+), 247 deletions(-)
 create mode 100644 utils/seg_metrics.py

diff --git a/evaluator.py b/evaluator.py
index e15d090ad625..3ed19bc529b0 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -42,7 +42,7 @@
     process_mask_upsample,
     scale_masks,
 )
-from utils.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
+from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
 from utils.plots import output_to_target, plot_images_boxes_and_masks
 from utils.torch_utils import select_device, time_sync
 from PIL import Image
diff --git a/utils/metrics.py b/utils/metrics.py
index 8646931bed00..cfdfbdb88b2c 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -5,7 +5,6 @@
 
 import math
 import warnings
-from easydict import EasyDict as edict
 from pathlib import Path
 
 import matplotlib.pyplot as plt
@@ -13,19 +12,22 @@
 import torch
 
 
-def fitness(x, masks=False):
+def fitness(x):
     # Model fitness as a weighted combination of metrics
-    if masks:
-        w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
-        return (x[:, :8] * w).sum(1)
     w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
     return (x[:, :4] * w).sum(1)
 
 
-def ap_per_class(
-    tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""
-):
-    """Compute the average precision, given the recall and precision curves.
+def smooth(y, f=0.05):
+    # Box filter of fraction f
+    nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
+    p = np.ones(nf // 2)  # ones padding
+    yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
+    return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16):
+    """ Compute the average precision, given the recall and precision curves.
     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
     # Arguments
         tp:  True positives (nparray, nx1 or nx10).
@@ -33,8 +35,7 @@ def ap_per_class(
         pred_cls:  Predicted object classes (nparray).
         target_cls:  True object classes (nparray).
         plot:  Plot precision-recall curve at mAP@0.5
-        save_dir:  Plot save directory.
-        prefix: prefix.
+        save_dir:  Plot save directory
     # Returns
         The average precision as computed in py-faster-rcnn.
     """
@@ -44,7 +45,7 @@ def ap_per_class(
     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 
     # Find unique classes
-    unique_classes = np.unique(target_cls)
+    unique_classes, nt = np.unique(target_cls, return_counts=True)
     nc = unique_classes.shape[0]  # number of classes, number of detections
 
     # Create Precision-Recall curve and compute AP for each class
@@ -52,114 +53,48 @@ def ap_per_class(
     ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
     for ci, c in enumerate(unique_classes):
         i = pred_cls == c
-        n_l = (target_cls == c).sum()  # number of labels
+        n_l = nt[ci]  # number of labels
         n_p = i.sum()  # number of predictions
-
         if n_p == 0 or n_l == 0:
             continue
-        else:
-            # Accumulate FPs and TPs
-            fpc = (1 - tp[i]).cumsum(0)
-            tpc = tp[i].cumsum(0)
-
-            # Recall
-            recall = tpc / (n_l + 1e-16)  # recall curve
-            r[ci] = np.interp(
-                -px, -conf[i], recall[:, 0], left=0
-            )  # negative x, xp because xp decreases
-
-            # Precision
-            precision = tpc / (tpc + fpc)  # precision curve
-            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
-
-            # AP from recall-precision curve
-            for j in range(tp.shape[1]):
-                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
-                if plot and j == 0:
-                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+
+        # Accumulate FPs and TPs
+        fpc = (1 - tp[i]).cumsum(0)
+        tpc = tp[i].cumsum(0)
+
+        # Recall
+        recall = tpc / (n_l + eps)  # recall curve
+        r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
+
+        # Precision
+        precision = tpc / (tpc + fpc)  # precision curve
+        p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+
+        # AP from recall-precision curve
+        for j in range(tp.shape[1]):
+            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+            if plot and j == 0:
+                py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
 
     # Compute F1 (harmonic mean of precision and recall)
-    f1 = 2 * p * r / (p + r + 1e-16)
-    names = [
-        v for k, v in names.items() if k in unique_classes
-    ]  # list: only classes that have data
-    names = {i: v for i, v in enumerate(names)}  # to dict
-    if plot and save_dir is not None:
-        plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names)
-        plot_mc_curve(
-            px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1"
-        )
-        plot_mc_curve(
-            px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision"
-        )
-        plot_mc_curve(
-            px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall"
-        )
-
-    i = f1.mean(0).argmax()  # max F1 index
-    return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32")
-
-
-def ap_per_class_box_and_mask(
-    tp_m,
-    tp_b,
-    conf,
-    pred_cls,
-    target_cls,
-    plot=False,
-    save_dir=".",
-    names=(),
-):
-    """
-    Args:
-        tp_b: tp of boxes.
-        tp_m: tp of masks.
-        other arguments see `func: ap_per_class`.
-    """
-    results_boxes = ap_per_class(
-        tp_b,
-        conf,
-        pred_cls,
-        target_cls,
-        plot=plot,
-        save_dir=save_dir,
-        names=names,
-        prefix="Box",
-    )
-    results_masks = ap_per_class(
-        tp_m,
-        conf,
-        pred_cls,
-        target_cls,
-        plot=plot,
-        save_dir=save_dir,
-        names=names,
-        prefix="Mask",
-    )
-
-    results = edict(
-        {
-            "boxes": {
-                "p": results_boxes[0],
-                "r": results_boxes[1],
-                "ap": results_boxes[2],
-                "f1": results_boxes[3],
-                "ap_class": results_boxes[4],
-            },
-            "masks": {
-                "p": results_masks[0],
-                "r": results_masks[1],
-                "ap": results_masks[2],
-                "f1": results_masks[3],
-                "ap_class": results_masks[4],
-            },
-        }
-    )
-    return results
+    f1 = 2 * p * r / (p + r + eps)
+    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
+    names = dict(enumerate(names))  # to dict
+    if plot:
+        plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
+        plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
+        plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
+        plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
+
+    i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
+    p, r, f1 = p[:, i], r[:, i], f1[:, i]
+    tp = (r * nt).round()  # true positives
+    fp = (tp / (p + eps) - tp).round()  # false positives
+    return tp, fp, p, r, f1, ap, unique_classes.astype(int)
 
 
 def compute_ap(recall, precision):
-    """Compute the average precision, given the recall and precision curves
+    """ Compute the average precision, given the recall and precision curves
     # Arguments
         recall:    The recall curve (list)
         precision: The precision curve (list)
@@ -175,8 +110,8 @@ def compute_ap(recall, precision):
     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
 
     # Integrate area under curve
-    method = "interp"  # methods: 'continuous', 'interp'
-    if method == "interp":
+    method = 'interp'  # methods: 'continuous', 'interp'
+    if method == 'interp':
         x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
         ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
     else:  # 'continuous'
@@ -211,11 +146,7 @@ def process_batch(self, detections, labels):
 
         x = torch.where(iou > self.iou_thres)
         if x[0].shape[0]:
-            matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -225,7 +156,7 @@ def process_batch(self, detections, labels):
             matches = np.zeros((0, 3))
 
         n = matches.shape[0] > 0
-        m0, m1, _ = matches.transpose().astype(np.int16)
+        m0, m1, _ = matches.transpose().astype(int)
         for i, gc in enumerate(gt_classes):
             j = m0 == i
             if n and sum(j) == 1:
@@ -241,101 +172,94 @@ def process_batch(self, detections, labels):
     def matrix(self):
         return self.matrix
 
-    def plot(self, normalize=True, save_dir="", names=()):
+    def tp_fp(self):
+        tp = self.matrix.diagonal()  # true positives
+        fp = self.matrix.sum(1) - tp  # false positives
+        # fn = self.matrix.sum(0) - tp  # false negatives (missed detections)
+        return tp[:-1], fp[:-1]  # remove background class
+
+    def plot(self, normalize=True, save_dir='', names=()):
         try:
             import seaborn as sn
 
-            array = self.matrix / (
-                (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1
-            )  # normalize columns
+            array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1)  # normalize columns
             array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
 
             fig = plt.figure(figsize=(12, 9), tight_layout=True)
-            sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
-            labels = (0 < len(names) < 99) and len(
-                names
-            ) == self.nc  # apply names to ticklabels
+            nc, nn = self.nc, len(names)  # number of classes, names
+            sn.set(font_scale=1.0 if nc < 50 else 0.8)  # for label size
+            labels = (0 < nn < 99) and (nn == nc)  # apply names to ticklabels
             with warnings.catch_warnings():
-                warnings.simplefilter(
-                    "ignore"
-                )  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
-                sn.heatmap(
-                    array,
-                    annot=self.nc < 30,
-                    annot_kws={"size": 8},
-                    cmap="Blues",
-                    fmt=".2f",
-                    square=True,
-                    xticklabels=names + ["background FP"] if labels else "auto",
-                    yticklabels=names + ["background FN"] if labels else "auto",
-                ).set_facecolor((1, 1, 1))
-            fig.axes[0].set_xlabel("True")
-            fig.axes[0].set_ylabel("Predicted")
-            fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
+                warnings.simplefilter('ignore')  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
+                sn.heatmap(array,
+                           annot=nc < 30,
+                           annot_kws={
+                               "size": 8},
+                           cmap='Blues',
+                           fmt='.2f',
+                           square=True,
+                           vmin=0.0,
+                           xticklabels=names + ['background FP'] if labels else "auto",
+                           yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
+            fig.axes[0].set_xlabel('True')
+            fig.axes[0].set_ylabel('Predicted')
+            fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
             plt.close()
         except Exception as e:
-            print(f"WARNING: ConfusionMatrix plot failure: {e}")
+            print(f'WARNING: ConfusionMatrix plot failure: {e}')
 
     def print(self):
         for i in range(self.nc + 1):
-            print(" ".join(map(str, self.matrix[i])))
+            print(' '.join(map(str, self.matrix[i])))
 
 
-def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
-    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
-    box2 = box2.T
+def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
 
     # Get the coordinates of bounding boxes
-    if x1y1x2y2:  # x1, y1, x2, y2 = box1
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-    else:  # transform from xywh to xyxy
-        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
-        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
-        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
-        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+    if xywh:  # transform from xywh to xyxy
+        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, 1), box2.chunk(4, 1)
+        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
+        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
+        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
+    else:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, 1)
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, 1)
+        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
+        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
 
     # Intersection area
-    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
-        torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
-    ).clamp(0)
+    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
 
     # Union Area
-    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
-    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
     union = w1 * h1 + w2 * h2 - inter + eps
 
+    # IoU
     iou = inter / union
-    if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(
-            b1_x1, b2_x1
-        )  # convex (smallest enclosing box) width
+    if CIoU or DIoU or GIoU:
+        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
             c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = (
-                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
-                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
-            ) / 4  # center distance squared
-            if DIoU:
-                return iou - rho2 / c2  # DIoU
-            elif (
-                CIoU
-            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(
-                    torch.atan(w2 / h2) - torch.atan(w1 / h1), 2
-                )
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center dist ** 2
+            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2)
                 with torch.no_grad():
                     alpha = v / (v - iou + (1 + eps))
                 return iou - (rho2 / c2 + v * alpha)  # CIoU
-        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
-            c_area = cw * ch + eps  # convex area
-            return iou - (c_area - union) / c_area  # GIoU
-    else:
-        return iou  # IoU
+            return iou - rho2 / c2  # DIoU
+        c_area = cw * ch + eps  # convex area
+        return iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+    return iou  # IoU
+
+
+def box_area(box):
+    # box = xyxy(4,n)
+    return (box[2] - box[0]) * (box[3] - box[1])
 
 
-def box_iou(box1, box2):
+def box_iou(box1, box2, eps=1e-7):
     # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
     """
     Return intersection-over-union (Jaccard index) of boxes.
@@ -348,44 +272,28 @@ def box_iou(box1, box2):
             IoU values for every element in boxes1 and boxes2
     """
 
-    def box_area(box):
-        # box = 4xn
-        return (box[2] - box[0]) * (box[3] - box[1])
-
-    area1 = box_area(box1.T)
-    area2 = box_area(box2.T)
-
     # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    inter = (
-        (
-            torch.min(box1[:, None, 2:], box2[:, 2:])
-            - torch.max(box1[:, None, :2], box2[:, :2])
-        )
-        .clamp(0)
-        .prod(2)
-    )
-    return inter / (
-        area1[:, None] + area2 - inter
-    )  # iou = inter / (area1 + area2 - inter)
+    (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
+    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
+
+    # IoU = inter / (area1 + area2 - inter)
+    return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
 
 
 def bbox_ioa(box1, box2, eps=1e-7):
-    """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
+    """ Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
     box1:       np.array of shape(4)
     box2:       np.array of shape(nx4)
     returns:    np.array of shape(n)
     """
 
-    box2 = box2.transpose()
-
     # Get the coordinates of bounding boxes
-    b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-    b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    b1_x1, b1_y1, b1_x2, b1_y2 = box1
+    b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
 
     # Intersection area
-    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * (
-        np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)
-    ).clip(0)
+    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
+                 (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
 
     # box2 area
     box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
@@ -394,72 +302,54 @@ def bbox_ioa(box1, box2, eps=1e-7):
     return inter_area / box2_area
 
 
-def wh_iou(wh1, wh2):
+def wh_iou(wh1, wh2, eps=1e-7):
     # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
     wh1 = wh1[:, None]  # [N,1,2]
     wh2 = wh2[None]  # [1,M,2]
     inter = torch.min(wh1, wh2).prod(2)  # [N,M]
-    return inter / (
-        wh1.prod(2) + wh2.prod(2) - inter
-    )  # iou = inter / (area1 + area2 - inter)
+    return inter / (wh1.prod(2) + wh2.prod(2) - inter + eps)  # iou = inter / (area1 + area2 - inter)
 
 
 # Plots ----------------------------------------------------------------------------------------------------------------
 
 
-def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()):
+def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
     # Precision-recall curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
     py = np.stack(py, axis=1)
 
     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
         for i, y in enumerate(py.T):
-            ax.plot(
-                px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}"
-            )  # plot(recall, precision)
+            ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}')  # plot(recall, precision)
     else:
-        ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
-
-    ax.plot(
-        px,
-        py.mean(1),
-        linewidth=3,
-        color="blue",
-        label="all classes %.3f mAP@0.5" % ap[:, 0].mean(),
-    )
-    ax.set_xlabel("Recall")
-    ax.set_ylabel("Precision")
+        ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
+
+    ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
+    ax.set_xlabel('Recall')
+    ax.set_ylabel('Precision')
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(Path(save_dir), dpi=250)
+    fig.savefig(save_dir, dpi=250)
     plt.close()
 
 
-def plot_mc_curve(
-    px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"
-):
+def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'):
     # Metric-confidence curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
 
     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
         for i, y in enumerate(py):
-            ax.plot(px, y, linewidth=1, label=f"{names[i]}")  # plot(confidence, metric)
+            ax.plot(px, y, linewidth=1, label=f'{names[i]}')  # plot(confidence, metric)
     else:
-        ax.plot(px, py.T, linewidth=1, color="grey")  # plot(confidence, metric)
-
-    y = py.mean(0)
-    ax.plot(
-        px,
-        y,
-        linewidth=3,
-        color="blue",
-        label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}",
-    )
+        ax.plot(px, py.T, linewidth=1, color='grey')  # plot(confidence, metric)
+
+    y = smooth(py.mean(0), 0.05)
+    ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(Path(save_dir), dpi=250)
-    plt.close()
+    fig.savefig(save_dir, dpi=250)
+    plt.close()
\ No newline at end of file
diff --git a/utils/seg_metrics.py b/utils/seg_metrics.py
new file mode 100644
index 000000000000..8646931bed00
--- /dev/null
+++ b/utils/seg_metrics.py
@@ -0,0 +1,465 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Model validation metrics
+"""
+
+import math
+import warnings
+from easydict import EasyDict as edict
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+
+
+def fitness(x, masks=False):
+    # Model fitness as a weighted combination of metrics
+    if masks:
+        w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
+        return (x[:, :8] * w).sum(1)
+    w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
+    return (x[:, :4] * w).sum(1)
+
+
+def ap_per_class(
+    tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""
+):
+    """Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:  True positives (nparray, nx1 or nx10).
+        conf:  Objectness value from 0-1 (nparray).
+        pred_cls:  Predicted object classes (nparray).
+        target_cls:  True object classes (nparray).
+        plot:  Plot precision-recall curve at mAP@0.5
+        save_dir:  Plot save directory.
+        prefix: prefix.
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(target_cls)
+    nc = unique_classes.shape[0]  # number of classes, number of detections
+
+    # Create Precision-Recall curve and compute AP for each class
+    px, py = np.linspace(0, 1, 1000), []  # for plotting
+    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
+    for ci, c in enumerate(unique_classes):
+        i = pred_cls == c
+        n_l = (target_cls == c).sum()  # number of labels
+        n_p = i.sum()  # number of predictions
+
+        if n_p == 0 or n_l == 0:
+            continue
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum(0)
+            tpc = tp[i].cumsum(0)
+
+            # Recall
+            recall = tpc / (n_l + 1e-16)  # recall curve
+            r[ci] = np.interp(
+                -px, -conf[i], recall[:, 0], left=0
+            )  # negative x, xp because xp decreases
+
+            # Precision
+            precision = tpc / (tpc + fpc)  # precision curve
+            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+
+            # AP from recall-precision curve
+            for j in range(tp.shape[1]):
+                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+                if plot and j == 0:
+                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+
+    # Compute F1 (harmonic mean of precision and recall)
+    f1 = 2 * p * r / (p + r + 1e-16)
+    names = [
+        v for k, v in names.items() if k in unique_classes
+    ]  # list: only classes that have data
+    names = {i: v for i, v in enumerate(names)}  # to dict
+    if plot and save_dir is not None:
+        plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names)
+        plot_mc_curve(
+            px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1"
+        )
+        plot_mc_curve(
+            px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision"
+        )
+        plot_mc_curve(
+            px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall"
+        )
+
+    i = f1.mean(0).argmax()  # max F1 index
+    return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32")
+
+
+def ap_per_class_box_and_mask(
+    tp_m,
+    tp_b,
+    conf,
+    pred_cls,
+    target_cls,
+    plot=False,
+    save_dir=".",
+    names=(),
+):
+    """
+    Args:
+        tp_b: tp of boxes.
+        tp_m: tp of masks.
+        other arguments see `func: ap_per_class`.
+    """
+    results_boxes = ap_per_class(
+        tp_b,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=plot,
+        save_dir=save_dir,
+        names=names,
+        prefix="Box",
+    )
+    results_masks = ap_per_class(
+        tp_m,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=plot,
+        save_dir=save_dir,
+        names=names,
+        prefix="Mask",
+    )
+
+    results = edict(
+        {
+            "boxes": {
+                "p": results_boxes[0],
+                "r": results_boxes[1],
+                "ap": results_boxes[2],
+                "f1": results_boxes[3],
+                "ap_class": results_boxes[4],
+            },
+            "masks": {
+                "p": results_masks[0],
+                "r": results_masks[1],
+                "ap": results_masks[2],
+                "f1": results_masks[3],
+                "ap_class": results_masks[4],
+            },
+        }
+    )
+    return results
+
+
+def compute_ap(recall, precision):
+    """Compute the average precision, given the recall and precision curves
+    # Arguments
+        recall:    The recall curve (list)
+        precision: The precision curve (list)
+    # Returns
+        Average precision, precision curve, recall curve
+    """
+
+    # Append sentinel values to beginning and end
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([1.0], precision, [0.0]))
+
+    # Compute the precision envelope
+    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+    # Integrate area under curve
+    method = "interp"  # methods: 'continuous', 'interp'
+    if method == "interp":
+        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
+        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
+    else:  # 'continuous'
+        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
+
+    return ap, mpre, mrec
+
+
+class ConfusionMatrix:
+    # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
+    def __init__(self, nc, conf=0.25, iou_thres=0.45):
+        self.matrix = np.zeros((nc + 1, nc + 1))
+        self.nc = nc  # number of classes
+        self.conf = conf
+        self.iou_thres = iou_thres
+
+    def process_batch(self, detections, labels):
+        """
+        Return intersection-over-union (Jaccard index) of boxes.
+        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+        Arguments:
+            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+            labels (Array[M, 5]), class, x1, y1, x2, y2
+        Returns:
+            None, updates confusion matrix accordingly
+        """
+        detections = detections[detections[:, 4] > self.conf]
+        gt_classes = labels[:, 0].int()
+        detection_classes = detections[:, 5].int()
+        iou = box_iou(labels[:, 1:], detections[:, :4])
+
+        x = torch.where(iou > self.iou_thres)
+        if x[0].shape[0]:
+            matches = (
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+        else:
+            matches = np.zeros((0, 3))
+
+        n = matches.shape[0] > 0
+        m0, m1, _ = matches.transpose().astype(np.int16)
+        for i, gc in enumerate(gt_classes):
+            j = m0 == i
+            if n and sum(j) == 1:
+                self.matrix[detection_classes[m1[j]], gc] += 1  # correct
+            else:
+                self.matrix[self.nc, gc] += 1  # background FP
+
+        if n:
+            for i, dc in enumerate(detection_classes):
+                if not any(m1 == i):
+                    self.matrix[dc, self.nc] += 1  # background FN
+
+    def matrix(self):
+        return self.matrix
+
+    def plot(self, normalize=True, save_dir="", names=()):
+        try:
+            import seaborn as sn
+
+            array = self.matrix / (
+                (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1
+            )  # normalize columns
+            array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
+
+            fig = plt.figure(figsize=(12, 9), tight_layout=True)
+            sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
+            labels = (0 < len(names) < 99) and len(
+                names
+            ) == self.nc  # apply names to ticklabels
+            with warnings.catch_warnings():
+                warnings.simplefilter(
+                    "ignore"
+                )  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
+                sn.heatmap(
+                    array,
+                    annot=self.nc < 30,
+                    annot_kws={"size": 8},
+                    cmap="Blues",
+                    fmt=".2f",
+                    square=True,
+                    xticklabels=names + ["background FP"] if labels else "auto",
+                    yticklabels=names + ["background FN"] if labels else "auto",
+                ).set_facecolor((1, 1, 1))
+            fig.axes[0].set_xlabel("True")
+            fig.axes[0].set_ylabel("Predicted")
+            fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
+            plt.close()
+        except Exception as e:
+            print(f"WARNING: ConfusionMatrix plot failure: {e}")
+
+    def print(self):
+        for i in range(self.nc + 1):
+            print(" ".join(map(str, self.matrix[i])))
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+    box2 = box2.T
+
+    # Get the coordinates of bounding boxes
+    if x1y1x2y2:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:  # transform from xywh to xyxy
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+    # Intersection area
+    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
+        torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
+    ).clamp(0)
+
+    # Union Area
+    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+    union = w1 * h1 + w2 * h2 - inter + eps
+
+    iou = inter / union
+    if GIoU or DIoU or CIoU:
+        cw = torch.max(b1_x2, b2_x2) - torch.min(
+            b1_x1, b2_x1
+        )  # convex (smallest enclosing box) width
+        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
+            rho2 = (
+                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
+                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
+            ) / 4  # center distance squared
+            if DIoU:
+                return iou - rho2 / c2  # DIoU
+            elif (
+                CIoU
+            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * torch.pow(
+                    torch.atan(w2 / h2) - torch.atan(w1 / h1), 2
+                )
+                with torch.no_grad():
+                    alpha = v / (v - iou + (1 + eps))
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+            c_area = cw * ch + eps  # convex area
+            return iou - (c_area - union) / c_area  # GIoU
+    else:
+        return iou  # IoU
+
+
+def box_iou(box1, box2):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+
+    def box_area(box):
+        # box = 4xn
+        return (box[2] - box[0]) * (box[3] - box[1])
+
+    area1 = box_area(box1.T)
+    area2 = box_area(box2.T)
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = (
+        (
+            torch.min(box1[:, None, 2:], box2[:, 2:])
+            - torch.max(box1[:, None, :2], box2[:, :2])
+        )
+        .clamp(0)
+        .prod(2)
+    )
+    return inter / (
+        area1[:, None] + area2 - inter
+    )  # iou = inter / (area1 + area2 - inter)
+
+
+def bbox_ioa(box1, box2, eps=1e-7):
+    """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
+    box1:       np.array of shape(4)
+    box2:       np.array of shape(nx4)
+    returns:    np.array of shape(n)
+    """
+
+    box2 = box2.transpose()
+
+    # Get the coordinates of bounding boxes
+    b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+    b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+
+    # Intersection area
+    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * (
+        np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)
+    ).clip(0)
+
+    # box2 area
+    box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
+
+    # Intersection over box2 area
+    return inter_area / box2_area
+
+
+def wh_iou(wh1, wh2):
+    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
+    wh1 = wh1[:, None]  # [N,1,2]
+    wh2 = wh2[None]  # [1,M,2]
+    inter = torch.min(wh1, wh2).prod(2)  # [N,M]
+    return inter / (
+        wh1.prod(2) + wh2.prod(2) - inter
+    )  # iou = inter / (area1 + area2 - inter)
+
+
+# Plots ----------------------------------------------------------------------------------------------------------------
+
+
+def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()):
+    # Precision-recall curve
+    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+    py = np.stack(py, axis=1)
+
+    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
+        for i, y in enumerate(py.T):
+            ax.plot(
+                px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}"
+            )  # plot(recall, precision)
+    else:
+        ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
+
+    ax.plot(
+        px,
+        py.mean(1),
+        linewidth=3,
+        color="blue",
+        label="all classes %.3f mAP@0.5" % ap[:, 0].mean(),
+    )
+    ax.set_xlabel("Recall")
+    ax.set_ylabel("Precision")
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    fig.savefig(Path(save_dir), dpi=250)
+    plt.close()
+
+
+def plot_mc_curve(
+    px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"
+):
+    # Metric-confidence curve
+    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+
+    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
+        for i, y in enumerate(py):
+            ax.plot(px, y, linewidth=1, label=f"{names[i]}")  # plot(confidence, metric)
+    else:
+        ax.plot(px, py.T, linewidth=1, color="grey")  # plot(confidence, metric)
+
+    y = py.mean(0)
+    ax.plot(
+        px,
+        y,
+        linewidth=3,
+        color="blue",
+        label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}",
+    )
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    fig.savefig(Path(save_dir), dpi=250)
+    plt.close()

From 6a706e26e526d2014ae2f585c817c1cef64146ba Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 13 Jul 2022 10:37:04 +0530
Subject: [PATCH 015/247] use seg_metrics

---
 utils/segment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment.py b/utils/segment.py
index 89f6627a6259..7a32ce518033 100644
--- a/utils/segment.py
+++ b/utils/segment.py
@@ -5,7 +5,7 @@
 import torch
 import torchvision
 from .general import xyxy2xywh, xywh2xyxy
-from .metrics import box_iou
+from .seg_metrics import box_iou
 
 def segment2box(segment, width=640, height=640):
     # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)

From 4c59f284566c6d8a07697570205ce64873ea78cd Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 13 Jul 2022 11:18:46 +0530
Subject: [PATCH 016/247] add TODOs

---
 evaluator.py         |  2 ++
 seg_augmentations.py |  4 ++-
 seg_dataloaders.py   | 71 ++++----------------------------------------
 utils/seg_loss.py    |  5 ++--
 4 files changed, 13 insertions(+), 69 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 3ed19bc529b0..096befddeb5c 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -1,3 +1,5 @@
+# TODO:  Optimize plotting, losses & merge with val.py
+
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Validate a trained YOLOv5 model accuracy on a custom dataset
diff --git a/seg_augmentations.py b/seg_augmentations.py
index 63055f640390..eddf1e31da22 100644
--- a/seg_augmentations.py
+++ b/seg_augmentations.py
@@ -1,3 +1,5 @@
+# TODO: Move to utils, merge with augmentations.py
+
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Image augmentation functions
@@ -12,7 +14,7 @@
 
 from utils.general import colorstr, check_version
 from utils.segment import segment2box, resample_segments
-from utils.metrics import bbox_ioa
+from utils.seg_metrics import bbox_ioa
 
 
 class Albumentations:
diff --git a/seg_dataloaders.py b/seg_dataloaders.py
index 31fb0a1872ba..7a3266e0e6f9 100644
--- a/seg_dataloaders.py
+++ b/seg_dataloaders.py
@@ -1,3 +1,5 @@
+## TODO: Move to utils, merge with dataloaders.py
+
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Dataloaders
@@ -55,6 +57,7 @@ def __init__(self, sampler):
     def __iter__(self):
         while True:
             yield from iter(self.sampler)
+
 class YoloBatchSampler(torchBatchSampler):
     """
     This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.
@@ -70,71 +73,6 @@ def __iter__(self):
         for batch in super().__iter__():
             yield [(self.augment, idx) for idx in batch]
 
-def create_dataloader_ori(
-    path,
-    imgsz,
-    batch_size,
-    stride,
-    single_cls=False,
-    hyp=None,
-    augment=False,
-    cache=False,
-    pad=0.0,
-    rect=False,
-    rank=-1,
-    workers=8,
-    image_weights=False,
-    quad=False,
-    prefix="",
-    shuffle=False,
-    neg_dir="",
-    bg_dir="",
-    area_thr=0.2,
-    mask_head=False,
-    mask_downsample_ratio=1,
-):
-    if rect and shuffle:
-        print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
-        shuffle = False
-    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
-    data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels
-    with torch_distributed_zero_first(rank):
-        dataset = data_load(
-            path,
-            imgsz,
-            batch_size,
-            augment=augment,  # augment images
-            hyp=hyp,  # augmentation hyperparameters
-            rect=rect,  # rectangular training
-            cache_images=cache,
-            single_cls=single_cls,
-            stride=int(stride),
-            pad=pad,
-            image_weights=image_weights,
-            prefix=prefix,
-            neg_dir=neg_dir,
-            bg_dir=bg_dir,
-            area_thr=area_thr,
-        )
-        if mask_head:
-            dataset.downsample_ratio = mask_downsample_ratio
-
-    batch_size = min(batch_size, len(dataset))
-    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers
-    sampler = distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else None
-    loader = DataLoader if image_weights else InfiniteDataLoader
-    # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
-    dataloader = loader(
-        dataset,
-        batch_size=batch_size,
-        num_workers=nw,
-        shuffle=shuffle and sampler is None,
-        sampler=sampler,
-        pin_memory=True,
-        collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn,
-    )
-    return dataloader, dataset
-
 
 def create_dataloader(
     path,
@@ -1196,6 +1134,7 @@ def hub_ops(f, max_dim=1920):
     return stats
 
 
+# REFACTOR IN NEW FILE
 import os
 import glob
 import shutil
@@ -1554,7 +1493,7 @@ def __iter__(self):
             yield next(self.iterator)
 
 
-# NEW FILE 
+# REFACTOR IN A NEW FILE 
 from PIL import Image, ImageDraw
 import numpy as np
 from PIL import ImageFile
diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index d4cf26401bc6..d8d155739273 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -1,6 +1,8 @@
+# TODO: merge with loss.py.. Optimize speed
+
 import torch
 from utils.torch_utils import de_parallel, is_parallel
-from utils.general import xywh2xyxy
+from utils.general import xywh2xyxy, Profile
 from utils.segment import mask_iou, masks_iou, crop
 import torch.nn.functional as F
 import torch.nn as nn
@@ -134,7 +136,6 @@ def loss_segment(self, preds, targets, masks):
         tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks(
             p, targets
         )  # targets
-
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
             b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx

From 4aef933c072db5e33a68999a0992125da302f717 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 13 Jul 2022 14:15:03 +0530
Subject: [PATCH 017/247] increase line length

---
 evaluator.py         | 414 +++++++----------------------
 seg_augmentations.py | 147 +++-------
 seg_dataloaders.py   | 621 ++++++++++---------------------------------
 utils/seg_loss.py    | 170 ++++--------
 utils/seg_metrics.py | 180 +++----------
 utils/segment.py     | 103 +++----
 6 files changed, 375 insertions(+), 1260 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 096befddeb5c..636c73482c98 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -15,47 +15,26 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-#import pycocotools.mask as mask_util
+from PIL import Image
+# import pycocotools.mask as mask_util
 from tqdm import tqdm
 
 from models.experimental import attempt_load
 from seg_dataloaders import create_dataloader
-from utils.general import (
-    coco80_to_coco91_class,
-    increment_path,
-    colorstr,
-)
-from utils.general import (
-    check_dataset,
-    check_img_size,
-    check_suffix,
-)
-from utils.general import (
-    box_iou,
-    non_max_suppression,
-    scale_coords,
-    xyxy2xywh,
-    xywh2xyxy,
-)
-from utils.segment import (
-    non_max_suppression_masks,
-    mask_iou,
-    process_mask,
-    process_mask_upsample,
-    scale_masks,
-)
-from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
+from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, )
+from utils.general import (check_dataset, check_img_size, check_suffix, )
+from utils.general import (coco80_to_coco91_class, increment_path, colorstr, )
 from utils.plots import output_to_target, plot_images_boxes_and_masks
+from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
+from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, )
 from utils.torch_utils import select_device, time_sync
-from PIL import Image
+
 
 def save_one_txt(predn, save_conf, shape, file):
     # Save one txt result
     gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
     for *xyxy, conf, cls in predn.tolist():
-        xywh = (
-            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
-        )  # normalized xywh
+        xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
         line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
         with open(file, "a") as f:
             f.write(("%g " * len(line)).rstrip() % line + "\n")
@@ -69,20 +48,13 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 
     if pred_masks is not None:
         pred_masks = np.transpose(pred_masks, (2, 0, 1))
-        rles = [
-            mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0]
-            for mask in pred_masks
-        ]
+        rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
         for rle in rles:
             rle["counts"] = rle["counts"].decode("utf-8")
 
     for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-        pred_dict = {
-            "image_id": image_id,
-            "category_id": class_map[int(p[5])],
-            "bbox": [round(x, 3) for x in b],
-            "score": round(p[4], 5),
-        }
+        pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b],
+            "score": round(p[4], 5), }
         if pred_masks is not None:
             pred_dict["segmentation"] = rles[i]
         jdict.append(pred_dict)
@@ -90,25 +62,9 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 
 @torch.no_grad()
 class Yolov5Evaluator:
-    def __init__(
-        self,
-        data,
-        conf_thres=0.001,
-        iou_thres=0.6,
-        device="",
-        single_cls=False,
-        augment=False,
-        verbose=False,
-        project="runs/val",
-        name="exp",
-        exist_ok=False,
-        half=True,
-        save_dir=Path(""),
-        nosave=False,
-        plots=True,
-        mask=False,
-        mask_downsample_ratio=1,
-    ) -> None:
+    def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False,
+            project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True,
+            mask=False, mask_downsample_ratio=1, ) -> None:
         self.data = check_dataset(data)  # check
         self.conf_thres = conf_thres  # confidence threshold
         self.iou_thres = iou_thres  # NMS IoU threshold
@@ -132,40 +88,14 @@ def __init__(
         self.confusion_matrix = ConfusionMatrix(nc=self.nc)
         self.dt = [0.0, 0.0, 0.0]
         self.names = {k: v for k, v in enumerate(self.data["names"])}
-        self.s = (
-            ("%20s" + "%11s" * 10)
-            % (
-                "Class",
-                "Images",
-                "Labels",
-                "Box:{P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95}",
-                "Mask:{P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95}",
-            )
-            if self.mask
-            else ("%20s" + "%11s" * 6)
-            % (
-                "Class",
-                "Images",
-                "Labels",
-                "P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95",
-            )
-        )
+        self.s = (("%20s" + "%11s" * 10) % (
+            "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5",
+            "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % (
+            "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",))
 
         # coco stuff
-        self.is_coco = isinstance(self.data.get("val"), str) and self.data[
-            "val"
-        ].endswith(
-            "coco/val2017.txt"
-        )  # COCO dataset
+        self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith(
+            "coco/val2017.txt")  # COCO dataset
         self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
         self.jdict = []
         self.iou_thres = 0.65 if self.is_coco else self.iou_thres
@@ -192,9 +122,7 @@ def run_training(self, model, dataloader, compute_loss=None):
 
         # inference
         # masks will be `None` if training objection.
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
-            tqdm(dataloader, desc=self.s)
-        ):
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
             # reset pred_masks
             self.pred_masks = []
             img = img.to(self.device, non_blocking=True)
@@ -213,11 +141,8 @@ def run_training(self, model, dataloader, compute_loss=None):
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(
-                    pred,
-                    proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None,
-                )
+                pred_maski = self.get_predmasks(pred, proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
@@ -234,29 +159,12 @@ def run_training(self, model, dataloader, compute_loss=None):
 
         # Return results
         model.float()  # for training
-        return (
-            (
-                *self.metric.mean_results(),
-                *(self.total_loss.cpu() / len(dataloader)).tolist(),
-            ),
-            self.metric.get_maps(self.nc),
-            t,
-        )
-
-    def run(
-        self,
-        weights,
-        batch_size,
-        imgsz,
-        save_txt=False,
-        save_conf=False,
-        save_json=False,
-        task="val",
-    ):
+        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
+                self.metric.get_maps(self.nc), t,)
+
+    def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ):
         """This is for native evaluation."""
-        model, dataloader, imgsz = self.before_infer(
-            weights, batch_size, imgsz, save_txt, task
-        )
+        model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task)
         self.seen = 0
         # self.iouv.to(self.device)
         self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
@@ -265,9 +173,7 @@ def run(
         model.eval()
 
         # inference
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
-            tqdm(dataloader, desc=self.s)
-        ):
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
             # reset pred_masks
             self.pred_masks = []
             img = img.to(self.device, non_blocking=True)
@@ -289,11 +195,8 @@ def run(
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(
-                    pred,
-                    proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None,
-                )
+                pred_maski = self.get_predmasks(pred, proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
@@ -310,37 +213,21 @@ def run(
                     # clone() is for plot_images work correctly
                     predn = pred.clone()
                     # 因为test时添加了0.5的padding，因此这里与数据加载的padding不一致，所以需要转入ratio_pad
-                    scale_coords(
-                        img[si].shape[1:], predn[:, :4], shape, ratio_pad
-                    )  # native-space pred
-                
+                    scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad)  # native-space pred
+
                 # Save/log
                 if save_txt and self.save_dir.exists():
                     # NOTE: convert coords to native space when save txt.
                     # support save box preditions only
-                    save_one_txt(
-                        predn,
-                        save_conf,
-                        shape,
-                        file=self.save_dir / "labels" / (path.stem + ".txt"),
-                    )
+                    save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), )
                 if save_json and self.save_dir.exists():
                     # NOTE: convert coords to native space when save json.
                     # if pred_maski is not None:
                     # h, w, n
-                    pred_maski = scale_masks(
-                        img[si].shape[1:],
-                        pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
-                        shape,
-                        ratio_pad,
-                    )
-                    save_one_json(
-                        predn,
-                        self.jdict,
-                        path,
-                        self.class_map,
-                        pred_maski,
-                    )  # append to COCO-JSON dictionary
+                    pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
+                        shape, ratio_pad, )
+                    save_one_json(predn, self.jdict, path, self.class_map,
+                        pred_maski, )  # append to COCO-JSON dictionary
 
             if self.plots and batch_i < 3:
                 self.plot_images(batch_i, img, targets, masks, out, paths)
@@ -357,42 +244,24 @@ def run(
 
         # Print speeds
         shape = (batch_size, 3, imgsz, imgsz)
-        print(
-            f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}"
-            % t
-        )
+        print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
 
         s = (
-            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}"
-            if save_txt and self.save_dir.exists()
-            else ""
-        )
-        print(
-            f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}"
-        )
+            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "")
+        print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}")
 
         # Return results
-        return (
-            (
-                *self.metric.mean_results(),
-                *(self.total_loss.cpu() / len(dataloader)).tolist(),
-            ),
-            self.metric.get_maps(self.nc),
-            t,
-        )
+        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
+                self.metric.get_maps(self.nc), t,)
 
     def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
         "prepare for evaluation without training."
         self.device = select_device(self.device, batch_size=batch_size)
 
         # Directories
-        self.save_dir = increment_path(
-            Path(self.project) / self.name, exist_ok=self.exist_ok
-        )  # increment run
+        self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok)  # increment run
         if not self.nosave:
-            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(
-                parents=True, exist_ok=True
-            )  # make dir
+            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
         check_suffix(weights, ".pt")
@@ -402,27 +271,11 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
 
         # Data
         if self.device.type != "cpu":
-            model(
-                torch.zeros(1, 3, imgsz, imgsz)
-                .to(self.device)
-                .type_as(next(model.parameters()))
-            )  # run once
+            model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters())))  # run once
         pad = 0.0 if task == "speed" else 0.5
-        task = (
-            task if task in ("train", "val", "test") else "val"
-        )  # path to train/val/test images
-        dataloader = create_dataloader(
-            self.data[task],
-            imgsz,
-            batch_size,
-            gs,
-            self.single_cls,
-            pad=pad,
-            rect=True,
-            prefix=colorstr(f"{task}: "),
-            mask_head=self.mask,
-            mask_downsample_ratio=self.mask_downsample_ratio,
-        )[0]
+        task = (task if task in ("train", "val", "test") else "val")  # path to train/val/test images
+        dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True,
+            prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0]
         return model, dataloader, imgsz
 
     def inference(self, model, img, targets, masks=None, compute_loss=None):
@@ -435,29 +288,18 @@ def inference(self, model, img, targets, masks=None, compute_loss=None):
         self.dt[0] += t2 - t1
 
         # Run model
-        out, train_out = model(
-            img, augment=self.augment
-        )  # inference and training outputs
+        out, train_out = model(img, augment=self.augment)  # inference and training outputs
         self.dt[1] += time_sync() - t2
 
         # Compute loss
         if compute_loss:
-            self.total_loss += compute_loss(train_out, targets, masks)[
-                1
-            ]  # box, obj, cls
+            self.total_loss += compute_loss(train_out, targets, masks)[1]  # box, obj, cls
 
         # Run NMS
-        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(
-            self.device
-        )  # to pixels
+        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device)  # to pixels
         t3 = time_sync()
-        out = self.nms(
-            prediction=out,
-            conf_thres=self.conf_thres,
-            iou_thres=self.iou_thres,
-            multi_label=True,
-            agnostic=self.single_cls,
-        )
+        out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True,
+            agnostic=self.single_cls, )
         self.dt[2] += time_sync() - t3
         return out, train_out
 
@@ -468,25 +310,18 @@ def after_infer(self):
         """
         # Plot confusion matrix
         if self.plots and self.save_dir.exists():
-            self.confusion_matrix.plot(
-                save_dir=self.save_dir, names=list(self.names.values())
-            )
+            self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values()))
 
         # Compute statistics
         stats = [np.concatenate(x, 0) for x in zip(*self.stats)]  # to numpy
         box_or_mask_any = stats[0].any() or stats[1].any()
         stats = stats[1:] if not self.mask else stats
         if len(stats) and box_or_mask_any:
-            results = self.ap_per_class(
-                *stats,
-                self.plots,
-                self.save_dir if self.save_dir.exists() else None,
-                self.names,
-            )
+            results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None,
+                self.names, )
             self.metric.update(results)
-            nt = np.bincount(
-                stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc
-            )  # number of targets per class
+            nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64),
+                minlength=self.nc)  # number of targets per class
         else:
             nt = torch.zeros(1)
 
@@ -506,19 +341,13 @@ def process_batch(self, detections, labels, iouv):
         Returns:
             correct (Array[N, 10]), for 10 IoU levels
         """
-        correct = torch.zeros(
-            detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device
-        )
+        correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
         iou = box_iou(labels[:, 1:], detections[:, :4])
         x = torch.where(
-            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])
-        )  # IoU above threshold and classes match
+            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
         if x[0].shape[0]:
             matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )  # [label, detection, iou]
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -546,53 +375,29 @@ def get_predmasks(self, pred, proto_out, gt_shape):
         if proto_out is None or len(pred) == 0:
             return None
         process = process_mask_upsample if self.plots else process_mask
-        gt_shape = (
-            gt_shape[0] * self.mask_downsample_ratio,
-            gt_shape[1] * self.mask_downsample_ratio,
-        )
+        gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,)
         # n, h, w
-        pred_mask = (
-            process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape)
-            .permute(2, 0, 1)
-            .contiguous()
-        )
+        pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous())
         return pred_mask
 
     def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
-        assert not (
-            (pred_maski is None) ^ (gt_masksi is None)
-        ), "`proto_out` and `gt_masksi` should be both None or both exist."
+        assert not ((pred_maski is None) ^ (
+                    gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist."
         if pred_maski is None and gt_masksi is None:
             return torch.zeros(0, self.niou, dtype=torch.bool)
 
-        correct = torch.zeros(
-            predn.shape[0],
-            self.iouv.shape[0],
-            dtype=torch.bool,
-            device=self.iouv.device,
-        )
+        correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, )
 
         if not self.plots:
-            gt_masksi = F.interpolate(
-                gt_masksi.unsqueeze(0),
-                pred_maski.shape[1:],
-                mode="bilinear",
-                align_corners=False,
-            ).squeeze(0)
-
-        iou = mask_iou(
-            gt_masksi.view(gt_masksi.shape[0], -1),
-            pred_maski.view(pred_maski.shape[0], -1),
-        )
+            gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear",
+                align_corners=False, ).squeeze(0)
+
+        iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), )
         x = torch.where(
-            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])
-        )  # IoU above threshold and classes match
+            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
         if x[0].shape[0]:
             matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )  # [label, detection, iou]
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -609,15 +414,9 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski):
 
         if len(predn) == 0:
             if nl:
-                self.stats.append(
-                    (
-                        torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
-                        torch.zeros(0, self.niou, dtype=torch.bool),  # masks
-                        torch.Tensor(),
-                        torch.Tensor(),
-                        tcls,
-                    )
-                )
+                self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
+                                   torch.zeros(0, self.niou, dtype=torch.bool),  # masks
+                                   torch.Tensor(), torch.Tensor(), tcls,))
             return
 
         # Predictions
@@ -632,24 +431,15 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski):
             correct_boxes = self.process_batch(predn, labelsn, self.iouv)
 
             # masks
-            correct_masks = self.process_batch_masks(
-                predn, pred_maski, gt_maski, labelsn
-            )
+            correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn)
 
             if self.plots:
                 self.confusion_matrix.process_batch(predn, labelsn)
         else:
             correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
             correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
-        self.stats.append(
-            (
-                correct_masks.cpu(),
-                correct_boxes.cpu(),
-                predn[:, 4].cpu(),
-                predn[:, 5].cpu(),
-                tcls,
-            )
-        )  # (correct, conf, pcls, tcls)
+        self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(),
+                           tcls,))  # (correct, conf, pcls, tcls)
 
     def print_metric(self, nt, stats):
         # Print results
@@ -660,9 +450,7 @@ def print_metric(self, nt, stats):
         # TODO: self.seen support verbose.
         if self.verbose and self.nc > 1 and len(stats):
             for i, c in enumerate(self.metric.ap_class_index):
-                print(
-                    pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))
-                )
+                print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)))
 
     def plot_images(self, i, img, targets, masks, out, paths):
         if not self.save_dir.exists():
@@ -670,47 +458,27 @@ def plot_images(self, i, img, targets, masks, out, paths):
         # plot ground truth
         f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
 
-        Thread(
-            target=plot_images_boxes_and_masks,
-            args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
-            daemon=True,
-        ).start()
+        Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
+            daemon=True, ).start()
         f = self.save_dir / f"val_batch{i}_pred.jpg"  # predictions
 
         # plot predition
         if len(self.pred_masks):
-            pred_masks = (
-                torch.cat(self.pred_masks, dim=0)
-                if len(self.pred_masks) > 1
-                else self.pred_masks[0]
-            )
+            pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0])
         else:
             pred_masks = None
-        Thread(
-            target=plot_images_boxes_and_masks,
-            args=(
-                img,
-                output_to_target(out),
-                pred_masks,
-                paths,
-                f,
-                self.names,
-                max(img.shape[2:]),
-            ),
-            daemon=True,
-        ).start()
+        Thread(target=plot_images_boxes_and_masks,
+            args=(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:]),),
+            daemon=True, ).start()
         import wandb
         if wandb.run:
-            res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:]))
+            res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names,
+                                              max(img.shape[2:]))
             res = Image.fromarray(res)
-            wandb.log({f"pred_{i}":wandb.Image(res)})
+            wandb.log({f"pred_{i}": wandb.Image(res)})
 
     def nms(self, **kwargs):
-        return (
-            non_max_suppression_masks(**kwargs)
-            if self.mask
-            else non_max_suppression(**kwargs)
-        )
+        return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
 
     def ap_per_class(self, *args):
         return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args)
diff --git a/seg_augmentations.py b/seg_augmentations.py
index eddf1e31da22..409e021772b3 100644
--- a/seg_augmentations.py
+++ b/seg_augmentations.py
@@ -13,8 +13,8 @@
 import numpy as np
 
 from utils.general import colorstr, check_version
-from utils.segment import segment2box, resample_segments
 from utils.seg_metrics import bbox_ioa
+from utils.segment import segment2box, resample_segments
 
 
 class Albumentations:
@@ -26,23 +26,11 @@ def __init__(self):
 
             check_version(A.__version__, "1.0.3")  # version requirement
 
-            self.transform = A.Compose(
-                [
-                    A.Blur(p=0.01),
-                    A.MedianBlur(p=0.01),
-                    A.ToGray(p=0.01),
-                    A.CLAHE(p=0.01),
-                    A.RandomBrightnessContrast(p=0.0),
-                    A.RandomGamma(p=0.0),
-                    A.ImageCompression(quality_lower=75, p=0.0),
-                ],
-                bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]),
-            )
-
-            logging.info(
-                colorstr("albumentations: ")
-                + ", ".join(f"{x}" for x in self.transform.transforms if x.p)
-            )
+            self.transform = A.Compose([A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0), ],
+                bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), )
+
+            logging.info(colorstr("albumentations: ") + ", ".join(f"{x}" for x in self.transform.transforms if x.p))
         except ImportError:  # package not installed, skip
             pass
         except Exception as e:
@@ -50,12 +38,8 @@ def __init__(self):
 
     def __call__(self, im, labels, p=1.0):
         if self.transform and random.random() < p:
-            new = self.transform(
-                image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]
-            )  # transformed
-            im, labels = new["image"], np.array(
-                [[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])]
-            )
+            new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformed
+            im, labels = new["image"], np.array([[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])])
         return im, labels
 
 
@@ -71,9 +55,7 @@ def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
         lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
 
-        im_hsv = cv2.merge(
-            (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))
-        )
+        im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
         cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
 
 
@@ -85,9 +67,7 @@ def hist_equalize(im, clahe=True, bgr=False):
         yuv[:, :, 0] = c.apply(yuv[:, :, 0])
     else:
         yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogram
-    return cv2.cvtColor(
-        yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB
-    )  # convert YUV image to RGB
+    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB)  # convert YUV image to RGB
 
 
 def replicate(im, labels):
@@ -99,9 +79,7 @@ def replicate(im, labels):
     for i in s.argsort()[: round(s.size * 0.5)]:  # smallest indices
         x1b, y1b, x2b, y2b = boxes[i]
         bh, bw = y2b - y1b, x2b - x1b
-        yc, xc = int(random.uniform(0, h - bh)), int(
-            random.uniform(0, w - bw)
-        )  # offset x, y
+        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
         x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
         im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]  # im4[ymin:ymax, xmin:xmax]
         labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
@@ -109,15 +87,8 @@ def replicate(im, labels):
     return im, labels
 
 
-def letterbox(
-    im,
-    new_shape=(640, 640),
-    color=(114, 114, 114),
-    auto=True,
-    scaleFill=False,
-    scaleup=True,
-    stride=32,
-    center=True,  # center padding or left top padding
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32,
+        center=True,  # center padding or left top padding
 ):
     # Resize and pad image while meeting stride-multiple constraints
     shape = im.shape[:2]  # current shape [height, width]
@@ -148,25 +119,12 @@ def letterbox(
         im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
     top, bottom = int(round(dh - 0.1)) if center else 0, int(round(dh + 0.1))
     left, right = int(round(dw - 0.1)) if center else 0, int(round(dw + 0.1))
-    im = cv2.copyMakeBorder(
-        im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
-    )  # add border
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
     return im, ratio, (dw, dh)
 
 
-def random_perspective(
-    im,
-    targets=(),
-    segments=(),
-    degrees=10,
-    translate=0.1,
-    scale=0.1,
-    shear=10,
-    perspective=0.0,
-    border=(0, 0),
-    area_thr=0.2,
-    return_seg=False,
-):
+def random_perspective(im, targets=(), segments=(), degrees=10, translate=0.1, scale=0.1, shear=10, perspective=0.0,
+        border=(0, 0), area_thr=0.2, return_seg=False, ):
     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
     # targets = [cls, xyxy]
 
@@ -198,24 +156,16 @@ def random_perspective(
 
     # Translation
     T = np.eye(3)
-    T[0, 2] = (
-        random.uniform(0.5 - translate, 0.5 + translate) * width
-    )  # x translation (pixels)
-    T[1, 2] = (
-        random.uniform(0.5 - translate, 0.5 + translate) * height
-    )  # y translation (pixels)
+    T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width)  # x translation (pixels)
+    T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height)  # y translation (pixels)
 
     # Combined rotation matrix
     M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
     if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
         if perspective:
-            im = cv2.warpPerspective(
-                im, M, dsize=(width, height), borderValue=(114, 114, 114)
-            )
+            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
         else:  # affine
-            im = cv2.warpAffine(
-                im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)
-            )
+            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
 
     # Visualize
     # import matplotlib.pyplot as plt
@@ -235,9 +185,7 @@ def random_perspective(
                 xy = np.ones((len(segment), 3))
                 xy[:, :2] = segment
                 xy = xy @ M.T  # transform
-                xy = (
-                    xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]
-                )  # perspective rescale or affine
+                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
 
                 # clip
                 new[i] = segment2box(xy, width, height)
@@ -245,38 +193,26 @@ def random_perspective(
 
         else:  # warp boxes
             xy = np.ones((n * 4, 3))
-            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(
-                n * 4, 2
-            )  # x1y1, x2y2, x1y2, x2y1
+            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
             xy = xy @ M.T  # transform
-            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(
-                n, 8
-            )  # perspective rescale or affine
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
 
             # create new boxes
             x = xy[:, [0, 2, 4, 6]]
             y = xy[:, [1, 3, 5, 7]]
-            new = (
-                np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-            )
+            new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T)
 
             # clip
             new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
             new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
 
         # filter candidates
-        i = box_candidates(
-            box1=targets[:, 1:5].T * s,
-            box2=new.T,
-            cls=targets[:, 0],
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, cls=targets[:, 0],
             # area_thr=0.01 if use_segments else 0.10,
-            area_thr=area_thr,
-        )
+            area_thr=area_thr, )
         targets = targets[i]
         targets[:, 1:5] = new[i]
-        new_segments = (
-            np.array(new_segments)[i] if len(new_segments) else np.array(new_segments)
-        )
+        new_segments = (np.array(new_segments)[i] if len(new_segments) else np.array(new_segments))
 
     return (im, targets, new_segments) if return_seg else (im, targets)
 
@@ -294,13 +230,7 @@ def copy_paste(im, labels, segments, p=0.5):
             if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
                 labels = np.concatenate((labels, [[l[0], *box]]), 0)
                 segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
-                cv2.drawContours(
-                    im_new,
-                    [segments[j].astype(np.int32)],
-                    -1,
-                    (255, 255, 255),
-                    cv2.FILLED,
-                )
+                cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED, )
 
         result = cv2.bitwise_and(src1=im, src2=im_new)
         result = cv2.flip(result, 1)  # augment segments (flip left-right)
@@ -315,9 +245,7 @@ def cutout(im, labels, p=0.5):
     # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
     if random.random() < p:
         h, w = im.shape[:2]
-        scales = (
-            [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16
-        )  # image size fraction
+        scales = ([0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16)  # image size fraction
         for s in scales:
             mask_h = random.randint(1, int(h * s))  # create random masks
             mask_w = random.randint(1, int(w * s))
@@ -348,23 +276,12 @@ def mixup(im, labels, im2, labels2):
     return im, labels
 
 
-def box_candidates(
-    box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16
-):  # box1(4,n), box2(4,n)
+def box_candidates(box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
     # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
     w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
     w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
-    area_thr = (
-        np.array(area_thr)[cls.astype(np.int)]
-        if isinstance(area_thr, list)
-        else area_thr
-    )
+    area_thr = (np.array(area_thr)[cls.astype(np.int)] if isinstance(area_thr, list) else area_thr)
     if isinstance(area_thr, list) and len(area_thr) == 1:
         area_thr = area_thr[0]
     ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
-    return (
-        (w2 > wh_thr)
-        & (h2 > wh_thr)
-        & (w2 * h2 / (w1 * h1 + eps) > area_thr)
-        & (ar < ar_thr)
-    )  # candidates
\ No newline at end of file
+    return ((w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr))  # candidates
diff --git a/seg_dataloaders.py b/seg_dataloaders.py
index 7a3266e0e6f9..32f3e0af7127 100644
--- a/seg_dataloaders.py
+++ b/seg_dataloaders.py
@@ -5,45 +5,29 @@
 Dataloaders
 """
 
-import glob
+import json
 import logging
-import os
 import time
-import json
-import yaml
-import random
+from functools import wraps
 from itertools import repeat
 from multiprocessing.pool import ThreadPool, Pool
-from PIL import Image
 from pathlib import Path
-from functools import wraps
 from zipfile import ZipFile
 
-import cv2
-import numpy as np
-import torch
 import torch.nn.functional as F
-from torch.utils.data import distributed
+import yaml
 from torch.utils.data import Dataset as torchDataset
+from torch.utils.data import distributed
+from torch.utils.data.sampler import BatchSampler as torchBatchSampler
 from torch.utils.data.sampler import RandomSampler
+from torch.utils.data.sampler import Sampler
 from tqdm import tqdm
 
-
-from seg_augmentations import (
-    Albumentations,
-    augment_hsv,
-    copy_paste,
-    letterbox,
-    mixup,
-    random_perspective,
-)
+from seg_augmentations import (Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, )
 from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy
 from utils.torch_utils import torch_distributed_zero_first
 
 
-from torch.utils.data.sampler import BatchSampler as torchBatchSampler
-from torch.utils.data.sampler import Sampler
-
 class _RepeatSampler:
     """ Sampler that repeats forever
 
@@ -58,6 +42,7 @@ def __iter__(self):
         while True:
             yield from iter(self.sampler)
 
+
 class YoloBatchSampler(torchBatchSampler):
     """
     This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.
@@ -74,82 +59,33 @@ def __iter__(self):
             yield [(self.augment, idx) for idx in batch]
 
 
-def create_dataloader(
-    path,
-    imgsz,
-    batch_size,
-    stride,
-    single_cls=False,
-    hyp=None,
-    augment=False,
-    cache=False,
-    pad=0.0,
-    rect=False,
-    rank=-1,
-    workers=8,
-    image_weights=False,
-    quad=False,
-    prefix="",
-    shuffle=False,
-    neg_dir="",
-    bg_dir="",
-    area_thr=0.2,
-    mask_head=False,
-    mask_downsample_ratio=1,
-):
+def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
+        rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, neg_dir="",
+        bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, ):
     if rect and shuffle:
         print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
         shuffle = False
     data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels
     # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
     with torch_distributed_zero_first(rank):
-        dataset = data_load(
-            path,
-            imgsz,
-            batch_size,
-            augment=augment,  # augment images
+        dataset = data_load(path, imgsz, batch_size, augment=augment,  # augment images
             hyp=hyp,  # augmentation hyperparameters
             rect=rect,  # rectangular training
-            cache_images=cache,
-            single_cls=single_cls,
-            stride=int(stride),
-            pad=pad,
-            image_weights=image_weights,
-            prefix=prefix,
-            neg_dir=neg_dir,
-            bg_dir=bg_dir,
-            area_thr=area_thr,
-        )
+            cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights,
+            prefix=prefix, neg_dir=neg_dir, bg_dir=bg_dir, area_thr=area_thr, )
         if mask_head:
             dataset.downsample_ratio = mask_downsample_ratio
 
     batch_size = min(batch_size, len(dataset))
     nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers
     # sampler = InfiniteSampler(len(dataset), seed=0)
-    sampler = (
-        distributed.DistributedSampler(dataset, shuffle=shuffle)
-        if rank != -1
-        else RandomSampler(dataset)
-    )
+    sampler = (distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else RandomSampler(dataset))
 
-    batch_sampler = (
-        YoloBatchSampler(
-            sampler=sampler,
-            batch_size=batch_size,
-            drop_last=False,
-            augment=augment,
-        )
-        if not rect
-        else None
-    )
-    dataloader = DataLoader(
-        dataset,
-        num_workers=nw,
-        batch_size=1
-        if batch_sampler is not None
-        else batch_size,  # batch-size and batch-sampler is exclusion
-        batch_sampler=batch_sampler,
-        pin_memory=True,
+    batch_sampler = (YoloBatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False,
+        augment=augment, ) if not rect else None)
+    dataloader = DataLoader(dataset, num_workers=nw, batch_size=1 if batch_sampler is not None else batch_size,
+        # batch-size and batch-sampler is exclusion
+        batch_sampler=batch_sampler, pin_memory=True,
         collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn,
         # Make sure each process has different random seed, especially for 'fork' method.
         # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
@@ -203,32 +139,14 @@ class LoadImagesAndLabels(Dataset):
     # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
     cache_version = 0.6  # dataset labels *.cache version
 
-    def __init__(
-        self,
-        path,
-        img_size=640,
-        batch_size=16,
-        augment=False,
-        hyp=None,
-        rect=False,
-        image_weights=False,
-        cache_images=False,
-        single_cls=False,
-        stride=32,
-        pad=0.0,
-        prefix="",
-        neg_dir="",
-        bg_dir="",
-        area_thr=0.2,
-    ):
+    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
+            cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", neg_dir="", bg_dir="", area_thr=0.2, ):
         super().__init__(augment=augment)
         self.img_size = img_size
         self.hyp = hyp
         self.image_weights = image_weights
         self.rect = False if image_weights else rect
-        self.mosaic = (
-            self.augment and not self.rect
-        )  # load 4 images at a time into a mosaic (only during training)
+        self.mosaic = (self.augment and not self.rect)  # load 4 images at a time into a mosaic (only during training)
         self.mosaic_border = [-img_size // 2, -img_size // 2]
         self.stride = stride
         self.path = path
@@ -278,15 +196,11 @@ def cache_images(self, cache_images, prefix):
         """Cache images to disk or ram for faster speed."""
         if cache_images == "disk":
             self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + "_npy")
-            self.img_npy = [
-                self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files
-            ]
+            self.img_npy = [self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files]
             self.im_cache_dir.mkdir(parents=True, exist_ok=True)
         gb = 0  # Gigabytes of cached images
         self.img_hw0, self.img_hw = [None] * self.num_imgs, [None] * self.num_imgs
-        results = ThreadPool(NUM_THREADS).imap(
-            lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs))
-        )
+        results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs)))
         pbar = tqdm(enumerate(results), total=self.num_imgs)
         for i, x in pbar:
             if cache_images == "disk":
@@ -294,11 +208,7 @@ def cache_images(self, cache_images, prefix):
                     np.save(self.img_npy[i].as_posix(), x[0])
                 gb += self.img_npy[i].stat().st_size
             else:
-                (
-                    self.imgs[i],
-                    self.img_hw0[i],
-                    self.img_hw[i],
-                ) = x  # im, hw_orig, hw_resized = load_image(self, i)
+                (self.imgs[i], self.img_hw0[i], self.img_hw[i],) = x  # im, hw_orig, hw_resized = load_image(self, i)
                 gb += self.imgs[i].nbytes
             pbar.desc = f"{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})"
         pbar.close()
@@ -308,21 +218,16 @@ def get_img_files(self, p, prefix):
         try:
             f = []  # image files
             if p.is_dir():  # dir
-                f += glob.glob(str(p / "**" / "*.*"), recursive=True)
-                # f = list(p.rglob('*.*'))  # pathlib
+                f += glob.glob(str(p / "**" / "*.*"), recursive=True)  # f = list(p.rglob('*.*'))  # pathlib
             elif p.is_file():  # file
                 with open(p, "r") as t:
                     t = t.read().strip().splitlines()
                     parent = str(p.parent) + os.sep
-                    f += [
-                        x.replace("./", parent) if x.startswith("./") else x for x in t
-                    ]  # local to global path
-                    # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
+                    f += [x.replace("./", parent) if x.startswith("./") else x for x in
+                        t]  # local to global path  # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
             else:
                 raise Exception(f"{prefix}{p} does not exist")
-            img_files = sorted(
-                [x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS]
-            )
+            img_files = sorted([x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS])
             # img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
             assert img_files, f"{prefix}No images found"
         except Exception as e:
@@ -334,26 +239,19 @@ def get_neg_and_bg(self, neg_dir, bg_dir):
         img_neg_files, img_bg_files = [], []
         if os.path.isdir(neg_dir):
             img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)]
-            logging.info(
-                colorstr("Negative dir: ")
-                + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training"
-            )
+            logging.info(colorstr(
+                "Negative dir: ") + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training")
 
         if os.path.isdir(bg_dir):
             img_bg_files = [os.path.join(bg_dir, i) for i in os.listdir(bg_dir)]
-            logging.info(
-                colorstr("Background dir: ")
-                + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training"
-            )
+            logging.info(colorstr(
+                "Background dir: ") + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training")
         return img_neg_files, img_bg_files
 
     def load_cache(self, cache_path, prefix):
         """Load labels from *.cache file."""
         try:
-            cache, exists = (
-                np.load(cache_path, allow_pickle=True).item(),
-                True,
-            )  # load dict
+            cache, exists = (np.load(cache_path, allow_pickle=True).item(), True,)  # load dict
             assert cache["version"] == self.cache_version  # same version
             assert cache["hash"] == get_hash(self.label_files + self.img_files)  # same hash
         except:
@@ -367,8 +265,7 @@ def load_cache(self, cache_path, prefix):
             if cache["msgs"]:
                 logging.info("\n".join(cache["msgs"]))  # display warnings
         assert (
-            nf > 0 or not self.augment
-        ), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}"
+                nf > 0 or not self.augment), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}"
 
         # Read cache
         [cache.pop(k) for k in ("hash", "version", "msgs")]  # remove items
@@ -400,32 +297,18 @@ def update_rect(self, num_batches, pad):
             elif mini > 1:
                 shapes[i] = [1, 1 / mini]
 
-        self.batch_shapes = (
-            np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride
-        )
+        self.batch_shapes = (np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride)
 
     def cache_labels(self, path=Path("./labels.cache"), prefix=""):
         """Cache labels to *.cache file if there is no *.cache file in local."""
         # Cache dataset labels, check images and read shapes
         x = {}  # dict
-        nm, nf, ne, nc, msgs = (
-            0,
-            0,
-            0,
-            0,
-            [],
-        )  # number missing, found, empty, corrupt, messages
+        nm, nf, ne, nc, msgs = (0, 0, 0, 0, [],)  # number missing, found, empty, corrupt, messages
         desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
 
         with Pool(NUM_THREADS) as pool:
-            pbar = tqdm(
-                pool.imap(
-                    verify_image_label,
-                    zip(self.img_files, self.label_files, repeat(prefix)),
-                ),
-                desc=desc,
-                total=len(self.img_files),
-            )
+            pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix)), ),
+                desc=desc, total=len(self.img_files), )
             for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
                 nm += nm_f
                 nf += nf_f
@@ -451,9 +334,7 @@ def cache_labels(self, path=Path("./labels.cache"), prefix=""):
             path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
             logging.info(f"{prefix}New cache created: {path}")
         except Exception as e:
-            logging.info(
-                f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}"
-            )  # path not writeable
+            logging.info(f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}")  # path not writeable
         return x
 
     def __len__(self):
@@ -487,33 +368,21 @@ def __getitem__(self, index):
 
             # Letterbox
             shape = (
-                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size
-            )  # final letterboxed shape
+                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size)  # final letterboxed shape
             img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
             shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
 
             labels = self.labels[index].copy()
             if labels.size:  # normalized xywh to pixel xyxy format
-                labels[:, 1:] = xywhn2xyxy(
-                    labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]
-                )
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
 
             if self.augment:
-                img, labels = random_perspective(
-                    img,
-                    labels,
-                    degrees=hyp["degrees"],
-                    translate=hyp["translate"],
-                    scale=hyp["scale"],
-                    shear=hyp["shear"],
-                    perspective=hyp["perspective"],
-                )
+                img, labels = random_perspective(img, labels, degrees=hyp["degrees"], translate=hyp["translate"],
+                    scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], )
 
         nl = len(labels)  # number of labels
         if nl:
-            labels[:, 1:5] = xyxy2xywhn(
-                labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3
-            )
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
 
         if self.augment:
             # Albumentations
@@ -535,8 +404,7 @@ def __getitem__(self, index):
                 if nl:
                     labels[:, 1] = 1 - labels[:, 1]
 
-            # Cutouts
-            # labels = cutout(img, labels, p=0.5)
+            # Cutouts  # labels = cutout(img, labels, p=0.5)
 
         labels_out = torch.zeros((nl, 6))
         if nl:
@@ -567,33 +435,13 @@ def collate_fn4(batch):
         for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
             i *= 4
             if random.random() < 0.5:
-                im = F.interpolate(
-                    img[i].unsqueeze(0).float(),
-                    scale_factor=2.0,
-                    mode="bilinear",
-                    align_corners=False,
-                )[0].type(img[i].type())
+                im = \
+                F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode="bilinear", align_corners=False, )[
+                    0].type(img[i].type())
                 l = label[i]
             else:
-                im = torch.cat(
-                    (
-                        torch.cat((img[i], img[i + 1]), 1),
-                        torch.cat((img[i + 2], img[i + 3]), 1),
-                    ),
-                    2,
-                )
-                l = (
-                    torch.cat(
-                        (
-                            label[i],
-                            label[i + 1] + ho,
-                            label[i + 2] + wo,
-                            label[i + 3] + ho + wo,
-                        ),
-                        0,
-                    )
-                    * s
-                )
+                im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1),), 2, )
+                l = (torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo,), 0, ) * s)
             img4.append(im)
             label4.append(l)
 
@@ -604,42 +452,12 @@ def collate_fn4(batch):
 
 
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
-    def __init__(
-        self,
-        path,
-        img_size=640,
-        batch_size=16,
-        augment=False,
-        hyp=None,
-        rect=False,
-        image_weights=False,
-        cache_images=False,
-        single_cls=False,
-        stride=32,
-        pad=0,
-        prefix="",
-        neg_dir="",
-        bg_dir="",
-        area_thr=0.2,
-        downsample_ratio=1,  # return dowmsample mask
+    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
+            cache_images=False, single_cls=False, stride=32, pad=0, prefix="", neg_dir="", bg_dir="", area_thr=0.2,
+            downsample_ratio=1,  # return dowmsample mask
     ):
-        super().__init__(
-            path,
-            img_size,
-            batch_size,
-            augment,
-            hyp,
-            rect,
-            image_weights,
-            cache_images,
-            single_cls,
-            stride,
-            pad,
-            prefix,
-            neg_dir,
-            bg_dir,
-            area_thr,
-        )
+        super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
+            stride, pad, prefix, neg_dir, bg_dir, area_thr, )
         self.downsample_ratio = downsample_ratio
 
     @Dataset.mosaic_getitem
@@ -666,8 +484,7 @@ def __getitem__(self, index):
 
             # Letterbox
             shape = (
-                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size
-            )  # final letterboxed shape
+                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size)  # final letterboxed shape
             img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
             shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
 
@@ -677,51 +494,25 @@ def __getitem__(self, index):
             # TODO
             if len(segments):
                 for i_s in range(len(segments)):
-                    segments[i_s] = xyn2xy(
-                        segments[i_s],
-                        ratio[0] * w,
-                        ratio[1] * h,
-                        padw=pad[0],
-                        padh=pad[1],
-                    )
+                    segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], )
             if labels.size:  # normalized xywh to pixel xyxy format
-                labels[:, 1:] = xywhn2xyxy(
-                    labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]
-                )
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
 
             if self.augment:
-                img, labels, segments = random_perspective(
-                    img,
-                    labels,
-                    segments=segments,
-                    degrees=hyp["degrees"],
-                    translate=hyp["translate"],
-                    scale=hyp["scale"],
-                    shear=hyp["shear"],
-                    perspective=hyp["perspective"],
-                    return_seg=True,
-                )
+                img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"],
+                    translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"],
+                    return_seg=True, )
 
         nl = len(labels)  # number of labels
         if nl:
-            labels[:, 1:5] = xyxy2xywhn(
-                labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3
-            )
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
             for si in range(len(segments)):
-                mask = polygon2mask_downsample(
-                    img.shape[:2],
-                    [segments[si].reshape(-1)],
-                    downsample_ratio=self.downsample_ratio,
-                )
+                mask = polygon2mask_downsample(img.shape[:2], [segments[si].reshape(-1)],
+                    downsample_ratio=self.downsample_ratio, )
                 masks.append(torch.from_numpy(mask.astype(np.float32)))
 
-        masks = (
-            torch.stack(masks, axis=0)
-            if len(masks)
-            else torch.zeros(
-                nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio
-            )
-        )
+        masks = (torch.stack(masks, axis=0) if len(masks) else torch.zeros(nl, img.shape[0] // self.downsample_ratio,
+                                                                               img.shape[1] // self.downsample_ratio))
         # TODO: albumentations support
         if self.augment:
             # Albumentations
@@ -747,8 +538,7 @@ def __getitem__(self, index):
                     labels[:, 1] = 1 - labels[:, 1]
                     masks = torch.flip(masks, dims=[2])
 
-            # Cutouts
-            # labels = cutout(img, labels, p=0.5)
+            # Cutouts  # labels = cutout(img, labels, p=0.5)
 
         labels_out = torch.zeros((nl, 6))
         if nl:
@@ -786,18 +576,11 @@ def load_image(self, i):
         h0, w0 = im.shape[:2]  # orig hw
         r = self.img_size / max(h0, w0)  # ratio
         if r != 1:  # if sizes are not equal
-            im = cv2.resize(
-                im,
-                (int(w0 * r), int(h0 * r)),
-                interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR,
-            )
+            im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
+                interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR, )
         return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
     else:
-        return (
-            self.imgs[i],
-            self.img_hw0[i],
-            self.img_hw[i],
-        )  # im, hw_original, hw_resized
+        return (self.imgs[i], self.img_hw0[i], self.img_hw[i],)  # im, hw_original, hw_resized
 
 
 def load_neg_image(self, index):
@@ -815,9 +598,7 @@ def load_neg_image(self, index):
 def load_bg_image(self, index):
     path = self.img_files[index]
     bg_path = self.img_bg_files[np.random.randint(0, len(self.img_bg_files))]
-    img, coord, _, (w, h) = paste1(
-        path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5)
-    )
+    img, coord, _, (w, h) = paste1(path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5))
     label = self.labels[index]
     label[:, 1] = (label[:, 1] * w + coord[0]) / img.shape[1]
     label[:, 2] = (label[:, 2] * h + coord[1]) / img.shape[0]
@@ -858,22 +639,10 @@ def load_mosaic(self, index, return_seg=False):
             img, _, (h, w) = load_neg_image(self, index)
         # place img in img4
         if j == 0:
-            img4 = np.full(
-                (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
-            )  # base image with 4 tiles
+            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
         if i == 0:  # top left
-            x1a, y1a, x2a, y2a = (
-                max(xc - w, 0),
-                max(yc - h, 0),
-                xc,
-                yc,
-            )  # xmin, ymin, xmax, ymax (large image)
-            x1b, y1b, x2b, y2b = (
-                w - (x2a - x1a),
-                h - (y2a - y1a),
-                w,
-                h,
-            )  # xmin, ymin, xmax, ymax (small image)
+            x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)  # xmin, ymin, xmax, ymax (small image)
         elif i == 1:  # top right
             x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
             x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
@@ -899,9 +668,7 @@ def load_mosaic(self, index, return_seg=False):
             labels, segments = self.labels[index].copy(), self.segments[index].copy()
 
         if labels.size:
-            labels[:, 1:] = xywhn2xyxy(
-                labels[:, 1:], w, h, padw, padh
-            )  # normalized xywh to pixel xyxy format
+            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
             segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
         labels4.append(labels)
         segments4.extend(segments)
@@ -914,19 +681,9 @@ def load_mosaic(self, index, return_seg=False):
 
     # Augment
     img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
-    results = random_perspective(
-        img4,
-        labels4,
-        segments4,
-        degrees=self.hyp["degrees"],
-        translate=self.hyp["translate"],
-        scale=self.hyp["scale"],
-        shear=self.hyp["shear"],
-        perspective=self.hyp["perspective"],
-        border=self.mosaic_border,
-        area_thr=self.area_thr,
-        return_seg=return_seg,
-    )  # border to remove
+    results = random_perspective(img4, labels4, segments4, degrees=self.hyp["degrees"], translate=self.hyp["translate"],
+        scale=self.hyp["scale"], shear=self.hyp["shear"], perspective=self.hyp["perspective"],
+        border=self.mosaic_border, area_thr=self.area_thr, return_seg=return_seg, )  # border to remove
     # return (img4, labels4, segments4) if return_seg else (img4, labels4)
     return results
 
@@ -943,9 +700,7 @@ def load_mosaic9(self, index):
 
         # place img in img9
         if i == 0:  # center
-            img9 = np.full(
-                (s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8
-            )  # base image with 4 tiles
+            img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
             h0, w0 = h, w
             c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
         elif i == 1:  # top
@@ -971,20 +726,18 @@ def load_mosaic9(self, index):
         # Labels
         labels, segments = self.labels[index].copy(), self.segments[index].copy()
         if labels.size:
-            labels[:, 1:] = xywhn2xyxy(
-                labels[:, 1:], w, h, padx, pady
-            )  # normalized xywh to pixel xyxy format
+            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy format
             segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
         labels9.append(labels)
         segments9.extend(segments)
 
         # Image
-        img9[y1:y2, x1:x2] = img[y1 - pady :, x1 - padx :]  # img9[ymin:ymax, xmin:xmax]
+        img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
         hp, wp = h, w  # height, width previous
 
     # Offset
     yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border]  # mosaic center x, y
-    img9 = img9[yc : yc + 2 * s, xc : xc + 2 * s]
+    img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s]
 
     # Concat/clip labels
     labels9 = np.concatenate(labels9, 0)
@@ -998,17 +751,9 @@ def load_mosaic9(self, index):
     # img9, labels9 = replicate(img9, labels9)  # replicate
 
     # Augment
-    img9, labels9 = random_perspective(
-        img9,
-        labels9,
-        segments9,
-        degrees=self.hyp["degrees"],
-        translate=self.hyp["translate"],
-        scale=self.hyp["scale"],
-        shear=self.hyp["shear"],
-        perspective=self.hyp["perspective"],
-        border=self.mosaic_border,
-    )  # border to remove
+    img9, labels9 = random_perspective(img9, labels9, segments9, degrees=self.hyp["degrees"],
+        translate=self.hyp["translate"], scale=self.hyp["scale"], shear=self.hyp["shear"],
+        perspective=self.hyp["perspective"], border=self.mosaic_border, )  # border to remove
 
     return img9, labels9
 
@@ -1034,11 +779,7 @@ def unzip(path):
             assert Path(path).is_file(), f"Error unzipping {path}, file not found"
             ZipFile(path).extractall(path=path.parent)  # unzip
             dir = path.with_suffix("")  # dataset directory == zip name
-            return (
-                True,
-                str(dir),
-                next(dir.rglob("*.yaml")),
-            )  # zipped, data_dir, yaml_path
+            return (True, str(dir), next(dir.rglob("*.yaml")),)  # zipped, data_dir, yaml_path
         else:  # path is data.yaml
             return False, None, path
 
@@ -1057,11 +798,7 @@ def hub_ops(f, max_dim=1920):
             im_height, im_width = im.shape[:2]
             r = max_dim / max(im_height, im_width)  # ratio
             if r < 1.0:  # image too large
-                im = cv2.resize(
-                    im,
-                    (int(im_width * r), int(im_height * r)),
-                    interpolation=cv2.INTER_LINEAR,
-                )
+                im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_LINEAR, )
             cv2.imwrite(str(f_new), im)
 
     zipped, data_dir, yaml_path = unzip(Path(path))
@@ -1081,27 +818,17 @@ def hub_ops(f, max_dim=1920):
         for label in tqdm(dataset.labels, total=dataset.num_imgs, desc="Statistics"):
             x.append(np.bincount(label[:, 0].astype(int), minlength=data["nc"]))
         x = np.array(x)  # shape(128x80)
-        stats[split] = {
-            "instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
-            "image_stats": {
-                "total": dataset.num_imgs,
-                "unlabelled": int(np.all(x == 0, 1).sum()),
-                "per_class": (x > 0).sum(0).tolist(),
-            },
-            "labels": [
-                {str(Path(k).name): round_labels(v.tolist())}
-                for k, v in zip(dataset.img_files, dataset.labels)
-            ],
-        }
+        stats[split] = {"instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
+            "image_stats": {"total": dataset.num_imgs, "unlabelled": int(np.all(x == 0, 1).sum()),
+                "per_class": (x > 0).sum(0).tolist(), },
+            "labels": [{str(Path(k).name): round_labels(v.tolist())} for k, v in
+                zip(dataset.img_files, dataset.labels)], }
 
         if hub:
             im_dir = hub_dir / "images"
             im_dir.mkdir(parents=True, exist_ok=True)
-            for _ in tqdm(
-                ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files),
-                total=dataset.num_imgs,
-                desc="HUB Ops",
-            ):
+            for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.num_imgs,
+                    desc="HUB Ops", ):
                 pass
 
     # Profile
@@ -1142,39 +869,16 @@ def hub_ops(f, max_dim=1920):
 import uuid
 import torch
 import cv2
-import numpy as np
 import random
 from pathlib import Path
-from PIL import Image, ImageOps, ExifTags
+from PIL import ImageOps, ExifTags
 from utils.segment import segments2boxes
 from utils.general import xywh2xyxy
 
-
 # Parameters
 HELP_URL = "https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data"
-IMG_FORMATS = [
-    "bmp",
-    "jpg",
-    "jpeg",
-    "png",
-    "tif",
-    "tiff",
-    "dng",
-    "webp",
-    "mpo",
-]  # acceptable image suffixes
-VID_FORMATS = [
-    "mov",
-    "avi",
-    "mp4",
-    "mpg",
-    "mpeg",
-    "m4v",
-    "wmv",
-    "mkv",
-    "vdo",
-    "flv",
-]  # acceptable video suffixes
+IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo", ]  # acceptable image suffixes
+VID_FORMATS = ["mov", "avi", "mp4", "mpg", "mpeg", "m4v", "wmv", "mkv", "vdo", "flv", ]  # acceptable video suffixes
 NUM_THREADS = min(8, os.cpu_count())  # number of multiprocessing threads
 
 # Get orientation exif tag
@@ -1182,6 +886,7 @@ def hub_ops(f, max_dim=1920):
     if ExifTags.TAGS[orientation] == "Orientation":
         break
 
+
 def get_hash(paths):
     # Returns a single hash value of a list of paths (files or dirs)
     size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
@@ -1216,15 +921,8 @@ def exif_transpose(image):
     exif = image.getexif()
     orientation = exif.get(0x0112, 1)  # default 1
     if orientation > 1:
-        method = {
-            2: Image.FLIP_LEFT_RIGHT,
-            3: Image.ROTATE_180,
-            4: Image.FLIP_TOP_BOTTOM,
-            5: Image.TRANSPOSE,
-            6: Image.ROTATE_270,
-            7: Image.TRANSVERSE,
-            8: Image.ROTATE_90,
-        }.get(orientation)
+        method = {2: Image.FLIP_LEFT_RIGHT, 3: Image.ROTATE_180, 4: Image.FLIP_TOP_BOTTOM, 5: Image.TRANSPOSE,
+            6: Image.ROTATE_270, 7: Image.TRANSVERSE, 8: Image.ROTATE_90, }.get(orientation)
         if method is not None:
             image = image.transpose(method)
             del exif[0x0112]
@@ -1239,10 +937,7 @@ def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
         polygons (np.ndarray): [N, M], N is the number of polygons,
             M is the number of points(Be divided by 2).
     """
-    img_size = (
-            img_size[0] // downsample_ratio, 
-            img_size[1] // downsample_ratio
-            )
+    img_size = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
     mask = np.zeros(img_size, dtype=np.uint8)
     polygons = np.asarray(polygons) / downsample_ratio
     polygons = polygons.astype(np.int32)
@@ -1272,19 +967,14 @@ def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1):
     shape = polygons.shape
     polygons = polygons.reshape(shape[0], -1, 2)
     cv2.fillPoly(mask, polygons, color=color)
-    nh, nw = (
-            img_size[0] // downsample_ratio, 
-            img_size[1] // downsample_ratio
-            )
+    nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
     mask = cv2.resize(mask, (nw, nh))
     return mask
 
+
 def img2label_paths(img_paths):
     # Define label paths as a function of image paths
-    sa, sb = (
-        os.sep + "images" + os.sep,
-        os.sep + "labels" + os.sep,
-    )  # /images/, /labels/ substrings
+    sa, sb = (os.sep + "images" + os.sep, os.sep + "labels" + os.sep,)  # /images/, /labels/ substrings
     return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
 
 
@@ -1302,14 +992,11 @@ def flatten_recursive(path="../datasets/coco128"):
     for file in tqdm(glob.glob(str(Path(path)) + "/**/*.*", recursive=True)):
         shutil.copyfile(file, new_path / Path(file).name)
 
-def extract_boxes(
-    path="../datasets/coco128",
-):  # from utils.datasets import *; extract_boxes()
+
+def extract_boxes(path="../datasets/coco128", ):  # from utils.datasets import *; extract_boxes()
     # Convert detection dataset into classification dataset, with one directory per class
     path = Path(path)  # images dir
-    shutil.rmtree(path / "classifier") if (
-        path / "classifier"
-    ).is_dir() else None  # remove existing
+    shutil.rmtree(path / "classifier") if (path / "classifier").is_dir() else None  # remove existing
     files = list(path.rglob("*.*"))
     n = len(files)  # number of files
     for im_file in tqdm(files, total=n):
@@ -1322,18 +1009,11 @@ def extract_boxes(
             lb_file = Path(img2label_paths([str(im_file)])[0])
             if Path(lb_file).exists():
                 with open(lb_file, "r") as f:
-                    lb = np.array(
-                        [x.split() for x in f.read().strip().splitlines()],
-                        dtype=np.float32,
-                    )  # labels
+                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32, )  # labels
 
                 for j, x in enumerate(lb):
                     c = int(x[0])  # class
-                    f = (
-                        (path / "classifier")
-                        / f"{c}"
-                        / f"{path.stem}_{im_file.stem}_{j}.jpg"
-                    )  # new filename
+                    f = ((path / "classifier") / f"{c}" / f"{path.stem}_{im_file.stem}_{j}.jpg")  # new filename
                     if not f.parent.is_dir():
                         f.parent.mkdir(parents=True)
 
@@ -1344,14 +1024,10 @@ def extract_boxes(
 
                     b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
                     b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
-                    assert cv2.imwrite(
-                        str(f), im[b[1] : b[3], b[0] : b[2]]
-                    ), f"box failure in {f}"
+                    assert cv2.imwrite(str(f), im[b[1]: b[3], b[0]: b[2]]), f"box failure in {f}"
 
 
-def autosplit(
-    path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False
-):
+def autosplit(path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
     """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
     Usage: from utils.datasets import *; autosplit()
     Arguments
@@ -1360,47 +1036,25 @@ def autosplit(
         annotated_only:  Only use images with an annotated txt file
     """
     path = Path(path)  # images dir
-    files = sorted(
-        [x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS]
-    )  # image files only
+    files = sorted([x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS])  # image files only
     n = len(files)  # number of files
     random.seed(0)  # for reproducibility
-    indices = random.choices(
-        [0, 1, 2], weights=weights, k=n
-    )  # assign each image to a split
-
-    txt = [
-        "autosplit_train.txt",
-        "autosplit_val.txt",
-        "autosplit_test.txt",
-    ]  # 3 txt files
+    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
+
+    txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt", ]  # 3 txt files
     [(path.parent / x).unlink(missing_ok=True) for x in txt]  # remove existing
 
-    print(
-        f"Autosplitting images from {path}"
-        + ", using *.txt labeled images only" * annotated_only
-    )
+    print(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only)
     for i, img in tqdm(zip(indices, files), total=n):
-        if (
-            not annotated_only or Path(img2label_paths([str(img)])[0]).exists()
-        ):  # check label
+        if (not annotated_only or Path(img2label_paths([str(img)])[0]).exists()):  # check label
             with open(path.parent / txt[i], "a") as f:
-                f.write(
-                    "./" + img.relative_to(path.parent).as_posix() + "\n"
-                )  # add image to txt file
+                f.write("./" + img.relative_to(path.parent).as_posix() + "\n")  # add image to txt file
 
 
 def verify_image_label(args):
     # Verify one image-label pair
     im_file, lb_file, prefix = args
-    nm, nf, ne, nc, msg, segments = (
-        0,
-        0,
-        0,
-        0,
-        "",
-        [],
-    )  # number (missing, found, empty, corrupt), message, segments
+    nm, nf, ne, nc, msg, segments = (0, 0, 0, 0, "", [],)  # number (missing, found, empty, corrupt), message, segments
     try:
         # verify images
         im = Image.open(im_file)
@@ -1412,9 +1066,7 @@ def verify_image_label(args):
             with open(im_file, "rb") as f:
                 f.seek(-2, 2)
                 if f.read() != b"\xff\xd9":  # corrupt JPEG
-                    ImageOps.exif_transpose(Image.open(im_file)).save(
-                        im_file, "JPEG", subsampling=0, quality=100
-                    )
+                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
                     msg = f"{prefix}WARNING: {im_file}: corrupt JPEG restored and saved"
 
         # verify labels
@@ -1424,22 +1076,14 @@ def verify_image_label(args):
                 l = [x.split() for x in f.read().strip().splitlines() if len(x)]
                 if any([len(x) > 6 for x in l]):  # is segment
                     classes = np.array([x[0] for x in l], dtype=np.float32)
-                    segments = [
-                        np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l
-                    ]  # (cls, xy1...)
-                    l = np.concatenate(
-                        (classes.reshape(-1, 1), segments2boxes(segments)), 1
-                    )  # (cls, xywh)
+                    segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
+                    l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
                 l = np.array(l, dtype=np.float32)
             nl = len(l)
             if nl:
-                assert (
-                    l.shape[1] == 5
-                ), f"labels require 5 columns, {l.shape[1]} columns detected"
+                assert (l.shape[1] == 5), f"labels require 5 columns, {l.shape[1]} columns detected"
                 assert (l >= 0).all(), f"negative label values {l[l < 0]}"
-                assert (
-                    l[:, 1:] <= 1
-                ).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}"
+                assert (l[:, 1:] <= 1).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}"
                 l, idx = np.unique(l, axis=0, return_index=True)  # remove duplicate rows
                 # NOTE: `np.unique` will change the order of `l`, so adjust the segments order too.
                 segments = [segments[i] for i in idx] if len(segments) > 0 else segments
@@ -1456,9 +1100,11 @@ def verify_image_label(args):
         nc = 1
         msg = f"{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}"
         return [None, None, None, None, nm, nf, ne, nc, msg]
-    
+
+
 from torch.utils.data import DataLoader as torchDataLoader
 
+
 class DataLoader(torchDataLoader):
     """
     Lightnet dataloader that enables on the fly resizing of the images.
@@ -1494,9 +1140,10 @@ def __iter__(self):
 
 
 # REFACTOR IN A NEW FILE 
-from PIL import Image, ImageDraw
+from PIL import Image
 import numpy as np
 from PIL import ImageFile
+
 # import numbers
 
 ImageFile.LOAD_TRUNCATED_IMAGES = True
@@ -1515,6 +1162,7 @@ def get_raito(new_size, original_size):
     # # yolov5 way
     return min(new_size[0] / original_size[0], new_size[1] / original_size[1])
 
+
 def imresize(img, new_size):
     """Resize the img with new_size by PIL(keep aspect).
 
@@ -1529,6 +1177,7 @@ def imresize(img, new_size):
     img = img.resize((int(old_size[0] * ratio), int(old_size[1] * ratio)))
     return img
 
+
 def get_wh(a, b):
     return np.random.randint(a, b)
 
@@ -1554,9 +1203,7 @@ def paste2(sample1, sample2, background, scale=1.2):
     background.paste(sample2, (x2, y2))
     # background = background.resize((416, 416))
 
-    return np.array(background), (x1, y1, x2, y2), background
-    # print(background.size)
-    # background.show()
+    return np.array(background), (x1, y1, x2, y2), background  # print(background.size)  # background.show()
 
 
 def paste1(sample, background, bg_size, fg_scale=1.5):
diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index d8d155739273..8ffb4439c2f0 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -1,12 +1,13 @@
 # TODO: merge with loss.py.. Optimize speed
 
 import torch
-from utils.torch_utils import de_parallel, is_parallel
-from utils.general import xywh2xyxy, Profile
-from utils.segment import mask_iou, masks_iou, crop
-import torch.nn.functional as F
 import torch.nn as nn
+import torch.nn.functional as F
+
+from utils.general import xywh2xyxy
 from utils.loss import smooth_BCE, FocalLoss
+from utils.segment import masks_iou, crop
+from utils.torch_utils import is_parallel
 
 
 class ComputeLoss:
@@ -23,9 +24,7 @@ def __init__(self, model, autobalance=False):
         self.mask_loss = MaskIOULoss()
 
         # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
-        self.cp, self.cn = smooth_BCE(
-            eps=h.get("label_smoothing", 0.0)
-        )  # positive, negative BCE targets
+        self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0))  # positive, negative BCE targets
 
         # Focal loss
         g = h["fl_gamma"]  # focal loss gamma
@@ -35,13 +34,7 @@ def __init__(self, model, autobalance=False):
         det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
         self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
         self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
-        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (
-            BCEcls,
-            BCEobj,
-            1.0,
-            h,
-            autobalance,
-        )
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (BCEcls, BCEobj, 1.0, h, autobalance,)
         for k in "na", "nc", "nl", "anchors", "nm":
             if hasattr(det, k):
                 setattr(self, k, getattr(det, k))
@@ -54,10 +47,7 @@ def __call__(self, p, targets, masks=None):  # predictions, targets, model
     def loss_detection(self, p, targets):
         device = targets.device
         lcls, lbox, lobj = (
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-        )
+            torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),)
         tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets
 
         # Losses
@@ -73,22 +63,14 @@ def loss_detection(self, p, targets):
                 pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
                 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
-                iou = bbox_iou(
-                    pbox.T, tbox[i], x1y1x2y2=False, CIoU=True
-                )  # iou(prediction, target)
+                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
                 score_iou = iou.detach().clamp(0).type(tobj.dtype)
                 if self.sort_obj_iou:
                     sort_id = torch.argsort(score_iou)
-                    b, a, gj, gi, score_iou = (
-                        b[sort_id],
-                        a[sort_id],
-                        gj[sort_id],
-                        gi[sort_id],
-                        score_iou[sort_id],
-                    )
+                    b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],)
                 tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou  # iou ratio
 
                 # Classification
@@ -97,9 +79,7 @@ def loss_detection(self, p, targets):
                     t[range(n), tcls[i]] = self.cp
                     lcls += self.BCEcls(ps[:, 5:], t)  # BCE
 
-                # Append targets to text file
-                # with open('targets.txt', 'a') as file:
-                #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
+                # Append targets to text file  # with open('targets.txt', 'a') as file:  #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
 
             obji = self.BCEobj(pi[..., 4], tobj)
             lobj += obji * self.balance[i]  # obj loss
@@ -128,14 +108,9 @@ def loss_segment(self, preds, targets, masks):
 
         device = targets.device
         lcls, lbox, lobj, lseg = (
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-        )
-        tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks(
-            p, targets
-        )  # targets
+            torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),
+            torch.zeros(1, device=device),)
+        tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks(p, targets)  # targets
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
             b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
@@ -149,47 +124,32 @@ def loss_segment(self, preds, targets, masks):
                 pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
                 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
-                iou = bbox_iou(
-                    pbox.T, tbox[i], x1y1x2y2=False, CIoU=True
-                )  # iou(prediction, target)
+                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
                 score_iou = iou.detach().clamp(0).type(tobj.dtype)
                 if self.sort_obj_iou:
                     sort_id = torch.argsort(score_iou)
-                    b, a, gj, gi, score_iou = (
-                        b[sort_id],
-                        a[sort_id],
-                        gj[sort_id],
-                        gi[sort_id],
-                        score_iou[sort_id],
-                    )
+                    b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],)
                 tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou  # iou ratio
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, self.nm :], self.cn, device=device)  # targets
+                    t = torch.full_like(ps[:, self.nm:], self.cn, device=device)  # targets
                     t[range(n), tcls[i]] = self.cp
-                    lcls += self.BCEcls(ps[:, self.nm :], t)  # BCE
+                    lcls += self.BCEcls(ps[:, self.nm:], t)  # BCE
 
                 # Mask Regression
                 mask_gt = masks[tidxs[i]]
-                downsampled_masks = F.interpolate(
-                    mask_gt[None, :],
-                    (mask_h, mask_w),
-                    mode="bilinear",
-                    align_corners=False,
-                ).squeeze(0)
+                downsampled_masks = F.interpolate(mask_gt[None, :], (mask_h, mask_w), mode="bilinear",
+                    align_corners=False, ).squeeze(0)
 
                 mxywh = xywh[i]
                 mws, mhs = mxywh[:, 2:].T
                 mws, mhs = mws / pi.shape[3], mhs / pi.shape[2]
-                mxywhs = (
-                    mxywh
-                    / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]]
-                    * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device)
-                )
+                mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] * torch.tensor(
+                    [mask_w, mask_h, mask_w, mask_h], device=mxywh.device))
                 mxyxys = xywh2xyxy(mxywhs)
 
                 batch_lseg = torch.zeros(1, device=device)
@@ -200,7 +160,7 @@ def loss_segment(self, preds, targets, masks):
 
                     mw, mh = mws[index], mhs[index]
                     mxyxy = mxyxys[index]
-                    psi = ps[index][:, 5 : self.nm]
+                    psi = ps[index][:, 5: self.nm]
                     proto = proto_out[bi]
 
                     batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
@@ -241,25 +201,13 @@ def build_targets(self, p, targets):
         tcls, tbox, indices, anch = [], [], [], []
         gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
         ai = (
-            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
-        )  # same as .repeat_interleave(nt)
+            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt))  # same as .repeat_interleave(nt)
         targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
 
         g = 0.5  # bias
-        off = (
-            torch.tensor(
-                [
-                    [0, 0],
-                    [1, 0],
-                    [0, 1],
-                    [-1, 0],
-                    [0, -1],  # j,k,l,m
-                    # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-                ],
-                device=targets.device,
-            ).float()
-            * g
-        )  # offsets
+        off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
+            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+        ], device=targets.device, ).float() * g)  # offsets
 
         for i in range(self.nl):
             anchors = self.anchors[i]
@@ -295,9 +243,7 @@ def build_targets(self, p, targets):
 
             # Append
             a = t[:, 6].long()  # anchor indices
-            indices.append(
-                (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))
-            )  # image, anchor, grid indices
+            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class
@@ -310,31 +256,16 @@ def build_targets_for_masks(self, p, targets):
         tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], []
         gain = torch.ones(8, device=targets.device)  # normalized to gridspace gain
         ai = (
-            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
-        )  # same as .repeat_interleave(nt)
+            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt))  # same as .repeat_interleave(nt)
         ti = (
-            torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)
-        )  # same as .repeat_interleave(nt)
+            torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1))  # same as .repeat_interleave(nt)
 
-        targets = torch.cat(
-            (targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2
-        )  # append anchor indices
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2)  # append anchor indices
 
         g = 0.5  # bias
-        off = (
-            torch.tensor(
-                [
-                    [0, 0],
-                    [1, 0],
-                    [0, 1],
-                    [-1, 0],
-                    [0, -1],  # j,k,l,m
-                    # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-                ],
-                device=targets.device,
-            ).float()
-            * g
-        )  # offsets
+        off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
+            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+        ], device=targets.device, ).float() * g)  # offsets
 
         for i in range(self.nl):
             anchors = self.anchors[i]
@@ -371,9 +302,7 @@ def build_targets_for_masks(self, p, targets):
             # Append
             a = t[:, 6].long()  # anchor indices
             tidx = t[:, 7].long()
-            indices.append(
-                (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))
-            )  # image, anchor, grid indices
+            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class
@@ -381,7 +310,7 @@ def build_targets_for_masks(self, p, targets):
             xywh.append(torch.cat((gxy, gwh), 1))
 
         return tcls, tbox, indices, anch, tidxs, xywh
-    
+
 
 class MaskIOULoss(nn.Module):
     def __init__(self) -> None:
@@ -404,7 +333,9 @@ def forward(self, pred_mask, gt_mask, mxyxy=None):
         iou = masks_iou(pred_mask, gt_mask)
         return 1.0 - iou
 
-import math 
+
+import math
+
 
 def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
     # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
@@ -422,8 +353,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
 
     # Intersection area
     inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
-        torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
-    ).clamp(0)
+            torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
 
     # Union Area
     w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
@@ -432,24 +362,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
 
     iou = inter / union
     if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(
-            b1_x1, b2_x1
-        )  # convex (smallest enclosing box) width
+        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
             c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = (
-                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
-                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
-            ) / 4  # center distance squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (
+                        b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
             if DIoU:
                 return iou - rho2 / c2  # DIoU
-            elif (
-                CIoU
-            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(
-                    torch.atan(w2 / h2) - torch.atan(w1 / h1), 2
-                )
+            elif (CIoU):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                 with torch.no_grad():
                     alpha = v / (v - iou + (1 + eps))
                 return iou - (rho2 / c2 + v * alpha)  # CIoU
@@ -457,4 +379,4 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
             c_area = cw * ch + eps  # convex area
             return iou - (c_area - union) / c_area  # GIoU
     else:
-        return iou  # IoU
\ No newline at end of file
+        return iou  # IoU
diff --git a/utils/seg_metrics.py b/utils/seg_metrics.py
index 8646931bed00..9c6133118dfa 100644
--- a/utils/seg_metrics.py
+++ b/utils/seg_metrics.py
@@ -5,12 +5,12 @@
 
 import math
 import warnings
-from easydict import EasyDict as edict
 from pathlib import Path
 
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
+from easydict import EasyDict as edict
 
 
 def fitness(x, masks=False):
@@ -22,9 +22,7 @@ def fitness(x, masks=False):
     return (x[:, :4] * w).sum(1)
 
 
-def ap_per_class(
-    tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""
-):
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""):
     """Compute the average precision, given the recall and precision curves.
     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
     # Arguments
@@ -64,9 +62,7 @@ def ap_per_class(
 
             # Recall
             recall = tpc / (n_l + 1e-16)  # recall curve
-            r[ci] = np.interp(
-                -px, -conf[i], recall[:, 0], left=0
-            )  # negative x, xp because xp decreases
+            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
 
             # Precision
             precision = tpc / (tpc + fpc)  # precision curve
@@ -80,81 +76,35 @@ def ap_per_class(
 
     # Compute F1 (harmonic mean of precision and recall)
     f1 = 2 * p * r / (p + r + 1e-16)
-    names = [
-        v for k, v in names.items() if k in unique_classes
-    ]  # list: only classes that have data
+    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
     names = {i: v for i, v in enumerate(names)}  # to dict
     if plot and save_dir is not None:
         plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names)
-        plot_mc_curve(
-            px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1"
-        )
-        plot_mc_curve(
-            px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision"
-        )
-        plot_mc_curve(
-            px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall"
-        )
+        plot_mc_curve(px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1")
+        plot_mc_curve(px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision")
+        plot_mc_curve(px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall")
 
     i = f1.mean(0).argmax()  # max F1 index
     return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32")
 
 
-def ap_per_class_box_and_mask(
-    tp_m,
-    tp_b,
-    conf,
-    pred_cls,
-    target_cls,
-    plot=False,
-    save_dir=".",
-    names=(),
-):
+def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ):
     """
     Args:
         tp_b: tp of boxes.
         tp_m: tp of masks.
         other arguments see `func: ap_per_class`.
     """
-    results_boxes = ap_per_class(
-        tp_b,
-        conf,
-        pred_cls,
-        target_cls,
-        plot=plot,
-        save_dir=save_dir,
-        names=names,
-        prefix="Box",
-    )
-    results_masks = ap_per_class(
-        tp_m,
-        conf,
-        pred_cls,
-        target_cls,
-        plot=plot,
-        save_dir=save_dir,
-        names=names,
-        prefix="Mask",
-    )
-
-    results = edict(
-        {
-            "boxes": {
-                "p": results_boxes[0],
-                "r": results_boxes[1],
-                "ap": results_boxes[2],
-                "f1": results_boxes[3],
-                "ap_class": results_boxes[4],
-            },
-            "masks": {
-                "p": results_masks[0],
-                "r": results_masks[1],
-                "ap": results_masks[2],
-                "f1": results_masks[3],
-                "ap_class": results_masks[4],
-            },
-        }
-    )
+    results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
+        prefix="Box", )
+    results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
+        prefix="Mask", )
+
+    results = edict({
+        "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3],
+            "ap_class": results_boxes[4], },
+        "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3],
+            "ap_class": results_masks[4], }, })
     return results
 
 
@@ -211,11 +161,7 @@ def process_batch(self, detections, labels):
 
         x = torch.where(iou > self.iou_thres)
         if x[0].shape[0]:
-            matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )
+            matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -245,30 +191,17 @@ def plot(self, normalize=True, save_dir="", names=()):
         try:
             import seaborn as sn
 
-            array = self.matrix / (
-                (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1
-            )  # normalize columns
+            array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1)  # normalize columns
             array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
 
             fig = plt.figure(figsize=(12, 9), tight_layout=True)
             sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
-            labels = (0 < len(names) < 99) and len(
-                names
-            ) == self.nc  # apply names to ticklabels
+            labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
             with warnings.catch_warnings():
-                warnings.simplefilter(
-                    "ignore"
-                )  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
-                sn.heatmap(
-                    array,
-                    annot=self.nc < 30,
-                    annot_kws={"size": 8},
-                    cmap="Blues",
-                    fmt=".2f",
-                    square=True,
+                warnings.simplefilter("ignore")  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
+                sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap="Blues", fmt=".2f", square=True,
                     xticklabels=names + ["background FP"] if labels else "auto",
-                    yticklabels=names + ["background FN"] if labels else "auto",
-                ).set_facecolor((1, 1, 1))
+                    yticklabels=names + ["background FN"] if labels else "auto", ).set_facecolor((1, 1, 1))
             fig.axes[0].set_xlabel("True")
             fig.axes[0].set_ylabel("Predicted")
             fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
@@ -297,8 +230,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
 
     # Intersection area
     inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
-        torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
-    ).clamp(0)
+            torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
 
     # Union Area
     w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
@@ -307,24 +239,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
 
     iou = inter / union
     if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(
-            b1_x1, b2_x1
-        )  # convex (smallest enclosing box) width
+        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
             c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = (
-                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
-                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
-            ) / 4  # center distance squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (
+                        b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
             if DIoU:
                 return iou - rho2 / c2  # DIoU
-            elif (
-                CIoU
-            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(
-                    torch.atan(w2 / h2) - torch.atan(w1 / h1), 2
-                )
+            elif (CIoU):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                 with torch.no_grad():
                     alpha = v / (v - iou + (1 + eps))
                 return iou - (rho2 / c2 + v * alpha)  # CIoU
@@ -356,17 +280,8 @@ def box_area(box):
     area2 = box_area(box2.T)
 
     # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    inter = (
-        (
-            torch.min(box1[:, None, 2:], box2[:, 2:])
-            - torch.max(box1[:, None, :2], box2[:, :2])
-        )
-        .clamp(0)
-        .prod(2)
-    )
-    return inter / (
-        area1[:, None] + area2 - inter
-    )  # iou = inter / (area1 + area2 - inter)
+    inter = ((torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2))
+    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
 
 
 def bbox_ioa(box1, box2, eps=1e-7):
@@ -384,8 +299,7 @@ def bbox_ioa(box1, box2, eps=1e-7):
 
     # Intersection area
     inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * (
-        np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)
-    ).clip(0)
+            np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
 
     # box2 area
     box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
@@ -399,9 +313,7 @@ def wh_iou(wh1, wh2):
     wh1 = wh1[:, None]  # [N,1,2]
     wh2 = wh2[None]  # [1,M,2]
     inter = torch.min(wh1, wh2).prod(2)  # [N,M]
-    return inter / (
-        wh1.prod(2) + wh2.prod(2) - inter
-    )  # iou = inter / (area1 + area2 - inter)
+    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
 
 
 # Plots ----------------------------------------------------------------------------------------------------------------
@@ -414,19 +326,11 @@ def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()):
 
     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
         for i, y in enumerate(py.T):
-            ax.plot(
-                px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}"
-            )  # plot(recall, precision)
+            ax.plot(px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}")  # plot(recall, precision)
     else:
         ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
 
-    ax.plot(
-        px,
-        py.mean(1),
-        linewidth=3,
-        color="blue",
-        label="all classes %.3f mAP@0.5" % ap[:, 0].mean(),
-    )
+    ax.plot(px, py.mean(1), linewidth=3, color="blue", label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), )
     ax.set_xlabel("Recall")
     ax.set_ylabel("Precision")
     ax.set_xlim(0, 1)
@@ -436,9 +340,7 @@ def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()):
     plt.close()
 
 
-def plot_mc_curve(
-    px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"
-):
+def plot_mc_curve(px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"):
     # Metric-confidence curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
 
@@ -449,13 +351,7 @@ def plot_mc_curve(
         ax.plot(px, py.T, linewidth=1, color="grey")  # plot(confidence, metric)
 
     y = py.mean(0)
-    ax.plot(
-        px,
-        y,
-        linewidth=3,
-        color="blue",
-        label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}",
-    )
+    ax.plot(px, y, linewidth=3, color="blue", label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", )
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
     ax.set_xlim(0, 1)
diff --git a/utils/segment.py b/utils/segment.py
index 7a32ce518033..01d2d1cafd9f 100644
--- a/utils/segment.py
+++ b/utils/segment.py
@@ -1,23 +1,21 @@
-import numpy as np
 import time
+
 import cv2
-import torch.nn.functional as F
+import numpy as np
 import torch
+import torch.nn.functional as F
 import torchvision
+
 from .general import xyxy2xywh, xywh2xyxy
 from .seg_metrics import box_iou
 
+
 def segment2box(segment, width=640, height=640):
     # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
     x, y = segment.T  # segment xy
     inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
-    x, y, = (
-        x[inside],
-        y[inside],
-    )
-    return (
-        np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4))
-    )  # xyxy
+    x, y, = (x[inside], y[inside],)
+    return (np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)))  # xyxy
 
 
 def segments2boxes(segments):
@@ -34,24 +32,12 @@ def resample_segments(segments, n=1000):
     for i, s in enumerate(segments):
         x = np.linspace(0, len(s) - 1, n)
         xp = np.arange(len(s))
-        segments[i] = (
-            np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)])
-            .reshape(2, -1)
-            .T
-        )  # segment xy
+        segments[i] = (np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T)  # segment xy
     return segments
 
-def non_max_suppression_masks(
-    prediction,
-    conf_thres=0.25,
-    iou_thres=0.45,
-    classes=None,
-    agnostic=False,
-    multi_label=False,
-    labels=(),
-    max_det=300,
-    mask_dim=32,
-):
+
+def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False,
+        multi_label=False, labels=(), max_det=300, mask_dim=32, ):
     """Runs Non-Maximum Suppression (NMS) on inference results
 
     Returns:
@@ -62,12 +48,8 @@ def non_max_suppression_masks(
     xc = prediction[..., 4] > conf_thres  # candidates
 
     # Checks
-    assert (
-        0 <= conf_thres <= 1
-    ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
-    assert (
-        0 <= iou_thres <= 1
-    ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
+    assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
+    assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
 
     # Settings
     min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
@@ -79,9 +61,7 @@ def non_max_suppression_masks(
     nm = 5 + mask_dim
 
     t = time.time()
-    output = [
-        torch.zeros((0, 6 + mask_dim), device=prediction.device)
-    ] * prediction.shape[0]
+    output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * prediction.shape[0]
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@@ -110,14 +90,10 @@ def non_max_suppression_masks(
         # Detections matrix nx6 (xyxy, conf, cls)
         if multi_label:
             i, j = (x[:, nm:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat(
-                (box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1
-            )
+            x = torch.cat((box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1)
         else:  # best class only
             conf, j = x[:, nm:].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float(), pred_masks), 1)[
-                conf.view(-1) > conf_thres
-            ]
+            x = torch.cat((box, conf, j.float(), pred_masks), 1)[conf.view(-1) > conf_thres]
 
         # Filter by class
         if classes is not None:
@@ -144,9 +120,7 @@ def non_max_suppression_masks(
             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
             weights = iou * scores[None]  # box weights
-            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
-                1, keepdim=True
-            )  # merged boxes
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
             if redundant:
                 i = i[iou.sum(1) > 1]  # require redundancy
 
@@ -157,6 +131,7 @@ def non_max_suppression_masks(
 
     return output
 
+
 def crop(masks, boxes):
     """
     "Crop" predicted masks by zeroing out everything not in the predicted bbox.
@@ -168,21 +143,10 @@ def crop(masks, boxes):
     """
     h, w, n = masks.size()
     x1, x2 = boxes[:, 0], boxes[:, 2]
-    y1, y2 = (
-        boxes[:, 1],
-        boxes[:, 3],
-    )
-
-    rows = (
-        torch.arange(w, device=masks.device, dtype=x1.dtype)
-        .view(1, -1, 1)
-        .expand(h, w, n)
-    )
-    cols = (
-        torch.arange(h, device=masks.device, dtype=x1.dtype)
-        .view(-1, 1, 1)
-        .expand(h, w, n)
-    )
+    y1, y2 = (boxes[:, 1], boxes[:, 3],)
+
+    rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n))
+    cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n))
 
     # (1, w, 1), (1, 1, n) -> (1, w, n)
     masks_left = rows >= x1.view(1, 1, -1)
@@ -196,6 +160,7 @@ def crop(masks, boxes):
 
     return masks * crop_mask.float()
 
+
 def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     """
     Crop after unsample.
@@ -207,8 +172,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     return: h, w, n
     """
     # mask_h, mask_w, n
-    masks = proto_out.float().permute(
-        1, 2, 0).contiguous() @ out_masks.float().tanh().T
+    masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T
     # print(masks.shape)
     masks = masks.sigmoid()
     # print('after sigmoid:', masks)
@@ -217,7 +181,8 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0)
     # [mask_h, mask_w, n]
     masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes)
-    return masks.gt_(0.5) # .gt_(0.2)
+    return masks.gt_(0.5)  # .gt_(0.2)
+
 
 def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     """
@@ -233,8 +198,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     mh, mw = proto_out.shape[1:]
     ih, iw = shape
     # mask_h, mask_w, n
-    masks = proto_out.float().permute(
-        1, 2, 0).contiguous() @ out_masks.float().tanh().T
+    masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T
     # print(masks)
     masks = masks.sigmoid()
     # print('after sigmoid:', masks)
@@ -249,6 +213,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
         masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0)
     return masks.gt_(0.5).permute(1, 2, 0).contiguous()
 
+
 def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):
     """
     img1_shape: model input shape, [h, w]
@@ -258,16 +223,14 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):
     """
     # Rescale coords (xyxy) from img1_shape to img0_shape
     if ratio_pad is None:  # calculate from img0_shape
-        gain = min(img1_shape[0] / img0_shape[0],
-                   img1_shape[1] / img0_shape[1])  # gain  = old / new
-        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
-            img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
     else:
         gain = ratio_pad[0][0]
         pad = ratio_pad[1]
     tl_pad = int(pad[1]), int(pad[0])  # y, x
     br_pad = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0])
-    
+
     if len(masks.shape) < 2:
         raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
     # masks_h, masks_w, n
@@ -286,6 +249,7 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):
 
     return masks
 
+
 def mask_iou(mask1, mask2):
     """
     mask1: [N, n] m1 means number of predicted objects 
@@ -303,6 +267,7 @@ def mask_iou(mask1, mask2):
 
     return intersection / (union + 1e-7)
 
+
 def masks_iou(mask1, mask2):
     """
     mask1: [N, n] m1 means number of predicted objects 
@@ -315,4 +280,4 @@ def masks_iou(mask1, mask2):
     area1 = torch.sum(mask1, dim=1).view(1, -1)
     area2 = torch.sum(mask2, dim=1).view(1, -1)
     union = (area1 + area2) - intersection
-    return intersection / (union + 1e-7)
\ No newline at end of file
+    return intersection / (union + 1e-7)

From 6c1adea65014e2bcc3060c43e7cbf42c885ec7aa Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 13 Jul 2022 14:22:58 +0530
Subject: [PATCH 018/247] fix test

---
 utils/loggers/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 65c673c64498..24e98ab3304f 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -160,7 +160,7 @@ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
 
     def on_train_end(self, last, best, plots, epoch, results, masks=False):
         # Callback runs on training end
-        plot_results = plot_results_with_masks if masks else plot_results
+        # plot_results = plot_results_with_masks if masks else plot_results
         if plots:
             plot_results(file=self.save_dir / 'results.csv')  # save results.png
         files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]

From c50fd2286bf2447db55919d12784597ac59d83f3 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Wed, 13 Jul 2022 17:47:09 +0800
Subject: [PATCH 019/247] add limit=10 for plotting while training

---
 evaluator.py   | 12 +++++++-----
 utils/plots.py |  5 +++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 636c73482c98..83b6afc18126 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -64,7 +64,7 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 class Yolov5Evaluator:
     def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False,
             project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True,
-            mask=False, mask_downsample_ratio=1, ) -> None:
+            max_plot_dets=10, mask=False, mask_downsample_ratio=1, ) -> None:
         self.data = check_dataset(data)  # check
         self.conf_thres = conf_thres  # confidence threshold
         self.iou_thres = iou_thres  # NMS IoU threshold
@@ -79,6 +79,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=
         self.save_dir = save_dir
         self.nosave = nosave
         self.plots = plots
+        self.max_plot_dets = max_plot_dets
         self.mask = mask
         self.mask_downsample_ratio = mask_downsample_ratio
 
@@ -146,7 +147,7 @@ def run_training(self, model, dataloader, compute_loss=None):
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski.cpu())
+                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
 
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
@@ -200,7 +201,7 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski.cpu())
+                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
 
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
@@ -468,11 +469,12 @@ def plot_images(self, i, img, targets, masks, out, paths):
         else:
             pred_masks = None
         Thread(target=plot_images_boxes_and_masks,
-            args=(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:]),),
+            args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),),
             daemon=True, ).start()
         import wandb
         if wandb.run:
-            res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names,
+            res = plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), 
+                                              pred_masks, paths, f, self.names,
                                               max(img.shape[2:]))
             res = Image.fromarray(res)
             wandb.log({f"pred_{i}": wandb.Image(res)})
diff --git a/utils/plots.py b/utils/plots.py
index 94e59fc8866c..f0c9b9ece4d4 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -723,10 +723,11 @@ def butter_lowpass(cutoff, fs, order):
     return filtfilt(b, a, data)  # forward-backward filter
 
 
-def output_to_target(output):
+def output_to_target(output, filter_dets=10):
     # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
     targets = []
     for i, o in enumerate(output):
+        o = o[:filter_dets]
         for *box, conf, cls in o.cpu().numpy()[:, :6]:
             targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
     return np.array(targets)
@@ -1385,4 +1386,4 @@ def visualize(self, images, outputs, out_masks, vis_confs=0.4):
         masks_images.append(img_masks)
     # TODO: make this(ori_type stuff) clean
     images = masks_images[0] if (len(masks_images) == 1) and type(masks_images) != ori_type else images[0]
-    return self.vis(images, outputs, vis_confs)
\ No newline at end of file
+    return self.vis(images, outputs, vis_confs)

From 6f01da0a5a9227b90902f443d0bb1d4f602a0005 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 14 Jul 2022 10:03:26 +0800
Subject: [PATCH 020/247] add object sorting in mask_nms

---
 utils/segment.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/segment.py b/utils/segment.py
index 01d2d1cafd9f..4439b862b508 100644
--- a/utils/segment.py
+++ b/utils/segment.py
@@ -109,6 +109,8 @@ def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, class
             continue
         elif n > max_nms:  # excess boxes
             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # else:
+        #     x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
 
         # Batched NMS
         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes

From fd88f0624159d39d8b0b63f24f3a4431ad0ed43b Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 14 Jul 2022 10:03:35 +0800
Subject: [PATCH 021/247] fix on_train_end

---
 train_instseg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train_instseg.py b/train_instseg.py
index b5a307097368..acee896085ca 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -488,7 +488,7 @@ def fitness(x):
                     if is_coco:
                         callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch)
 
-        callbacks.run('on_train_end', last, best, plots, epoch, results, masks=True)
+        callbacks.run('on_train_end', plots, epoch, masks=True)
 
     torch.cuda.empty_cache()
     return results

From 529c5401d5c2b371d6909c1d4c874f37d41b2ff1 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Thu, 14 Jul 2022 09:55:10 +0530
Subject: [PATCH 022/247] support noplots

---
 train_instseg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train_instseg.py b/train_instseg.py
index acee896085ca..e147d58f2106 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -169,7 +169,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             mask=True,
             verbose=False,
             mask_downsample_ratio=mask_ratio,
-            plots=True
+            plots=plots
         )
     g = [], [], []  # optimizer parameter groups
     bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()

From afb81468ee61354dc86e8cc7d67a4dd362d82a0a Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Thu, 14 Jul 2022 10:31:58 +0530
Subject: [PATCH 023/247] attempt memory leak fix

---
 evaluator.py              | 8 ++------
 utils/loggers/__init__.py | 3 +--
 utils/plots.py            | 3 ++-
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 83b6afc18126..2bbe62f52912 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -152,7 +152,7 @@ def run_training(self, model, dataloader, compute_loss=None):
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
 
-            if batch_i < 3:
+            if self.plots and batch_i < 3:
                 self.plot_images(batch_i, img, targets, masks, out, paths)
 
         # compute map and print it.
@@ -473,11 +473,7 @@ def plot_images(self, i, img, targets, masks, out, paths):
             daemon=True, ).start()
         import wandb
         if wandb.run:
-            res = plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), 
-                                              pred_masks, paths, f, self.names,
-                                              max(img.shape[2:]))
-            res = Image.fromarray(res)
-            wandb.log({f"pred_{i}": wandb.Image(res)})
+            wandb.log({f"pred_{i}": wandb.Image(f)})
 
     def nms(self, **kwargs):
         return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 24e98ab3304f..22c94d75f23b 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -404,8 +404,7 @@ def on_train_batch_end(
                 ).start()
             if ni==0:
                 if self.wandb:
-                    res = plot_images_and_masks(imgs, targets, masks, paths)
-                    wandb.log({"train_labels": wandb.Image(res)})
+                    wandb.log({"train_labels": wandb.Image(f)})
                 
 
 
diff --git a/utils/plots.py b/utils/plots.py
index f0c9b9ece4d4..f5cd3578929d 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -1300,7 +1300,8 @@ def plot_images_and_masks(
             mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA
         )
         # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
-        Image.fromarray(mosaic).save(fname)  # PIL save
+        with Image.fromarray(mosaic) as im:
+            im.save(fname)
     return mosaic
 
 

From ff65f54e475a93c313f8b2f86cdf461005f77a91 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 14 Jul 2022 13:03:15 +0800
Subject: [PATCH 024/247] fix object sorting in mask_nms

---
 utils/segment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/segment.py b/utils/segment.py
index 4439b862b508..d9773784eafa 100644
--- a/utils/segment.py
+++ b/utils/segment.py
@@ -109,8 +109,8 @@ def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, class
             continue
         elif n > max_nms:  # excess boxes
             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
-        # else:
-        #     x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
+        else:
+            x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
 
         # Batched NMS
         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes

From a8ae73f1be023fb14982e50fd72113eee65614ed Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 14 Jul 2022 13:03:34 +0800
Subject: [PATCH 025/247] update evaluator

---
 evaluator.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/evaluator.py b/evaluator.py
index 83b6afc18126..0cb037b8b6bd 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -110,6 +110,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=
         self.total_loss = torch.zeros((4 if self.mask else 3))
         self.metric = Metrics() if self.mask else Metric()
 
+    @torch.no_grad()
     def run_training(self, model, dataloader, compute_loss=None):
         """This is for evaluation when training."""
         self.seen = 0
@@ -389,7 +390,7 @@ def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
 
         correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, )
 
-        if not self.plots:
+        if gt_masksi.shape[1:] != pred_maski.shape[1:]:
             gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear",
                 align_corners=False, ).squeeze(0)
 
@@ -458,6 +459,14 @@ def plot_images(self, i, img, targets, masks, out, paths):
             return
         # plot ground truth
         f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
+        
+        if masks.shape[1:] != img.shape[2:]:
+            masks = F.interpolate(
+                masks.unsqueeze(0),
+                img.shape[2:],
+                mode="bilinear",
+                align_corners=False,
+            ).squeeze(0)
 
         Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
             daemon=True, ).start()

From 69a116a8b8f05335d9f5b564fb2134b3e5edbe98 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 14 Jul 2022 18:32:31 +0800
Subject: [PATCH 026/247] fix masks==None

---
 evaluator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluator.py b/evaluator.py
index 0cb037b8b6bd..3a547bbbbcc0 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -460,7 +460,7 @@ def plot_images(self, i, img, targets, masks, out, paths):
         # plot ground truth
         f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
         
-        if masks.shape[1:] != img.shape[2:]:
+        if masks is not None and masks.shape[1:] != img.shape[2:]:
             masks = F.interpolate(
                 masks.unsqueeze(0),
                 img.shape[2:],

From fc439519c959c9553078c00875cea22a01176ba2 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Thu, 14 Jul 2022 18:20:08 +0530
Subject: [PATCH 027/247] add pdb

---
 evaluator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/evaluator.py b/evaluator.py
index 2bbe62f52912..ecebe74d7170 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -153,6 +153,7 @@ def run_training(self, model, dataloader, compute_loss=None):
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
 
             if self.plots and batch_i < 3:
+                import pdb;pdb.set_trace()
                 self.plot_images(batch_i, img, targets, masks, out, paths)
 
         # compute map and print it.

From 8bd03e49ffca74796ce616e1e6ebfd685f33999f Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Fri, 15 Jul 2022 09:11:30 +0530
Subject: [PATCH 028/247] remove redundant if

---
 utils/loggers/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 22c94d75f23b..5b229f5a0af2 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -402,7 +402,6 @@ def on_train_batch_end(
                     args=(imgs, targets, masks, paths, f),
                     daemon=True,
                 ).start()
-            if ni==0:
                 if self.wandb:
                     wandb.log({"train_labels": wandb.Image(f)})
                 

From 04f78b55bdaa5b0fd6b8d6d11cba6337410e6a1c Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Fri, 15 Jul 2022 09:40:39 +0530
Subject: [PATCH 029/247] remove pdb

---
 evaluator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/evaluator.py b/evaluator.py
index b6e63f7c3af2..fb4745c27b7f 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -154,7 +154,6 @@ def run_training(self, model, dataloader, compute_loss=None):
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
 
             if self.plots and batch_i < 3:
-                import pdb;pdb.set_trace()
                 self.plot_images(batch_i, img, targets, masks, out, paths)
 
         # compute map and print it.

From a8861ca68adb62a860eecb10d90f6c4189a0f4cb Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Fri, 15 Jul 2022 10:47:26 +0530
Subject: [PATCH 030/247] str typrcast wandb image

---
 evaluator.py              | 2 +-
 utils/loggers/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index fb4745c27b7f..0fc6660785aa 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -482,7 +482,7 @@ def plot_images(self, i, img, targets, masks, out, paths):
             daemon=True, ).start()
         import wandb
         if wandb.run:
-            wandb.log({f"pred_{i}": wandb.Image(f)})
+            wandb.log({f"pred_{i}": wandb.Image(str(f))})
 
     def nms(self, **kwargs):
         return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 5b229f5a0af2..c80c8077baca 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -403,7 +403,7 @@ def on_train_batch_end(
                     daemon=True,
                 ).start()
                 if self.wandb:
-                    wandb.log({"train_labels": wandb.Image(f)})
+                    wandb.log({"train_labels": wandb.Image(str(f))})
                 
 
 

From 1cf9fd31a246ba33aeb1ebc48dd81f86aeed0637 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Fri, 15 Jul 2022 11:28:27 +0530
Subject: [PATCH 031/247] fix thread race condition temporality: allow log

---
 evaluator.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 0fc6660785aa..5d92244af5c0 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -153,7 +153,7 @@ def run_training(self, model, dataloader, compute_loss=None):
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
 
-            if self.plots and batch_i < 3:
+            if self.plots and batch_i < 2:
                 self.plot_images(batch_i, img, targets, masks, out, paths)
 
         # compute map and print it.
@@ -477,9 +477,10 @@ def plot_images(self, i, img, targets, masks, out, paths):
             pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0])
         else:
             pred_masks = None
-        Thread(target=plot_images_boxes_and_masks,
-            args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),),
-            daemon=True, ).start()
+        plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]))
+        #Thread(target=plot_images_boxes_and_masks,
+        #    args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),),
+        #    daemon=True, ).start()
         import wandb
         if wandb.run:
             wandb.log({f"pred_{i}": wandb.Image(str(f))})

From 67bae3d67b963592a8cac2a9e619b5177a0dc439 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Fri, 15 Jul 2022 16:18:56 +0800
Subject: [PATCH 032/247] fix ddp issue

---
 train_instseg.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/train_instseg.py b/train_instseg.py
index e147d58f2106..304b86282fa8 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -21,7 +21,6 @@
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
-from matplotlib.pyplot import plot
 
 import numpy as np
 import torch
@@ -58,10 +57,6 @@
 from utils.plots import plot_evolve, plot_labels
 from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
 
-LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
-RANK = int(os.getenv('RANK', -1))
-WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
-
 
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
@@ -71,7 +66,6 @@
 from torch.optim import AdamW
 import yaml
 from datetime import datetime
-from distutils import dist
 from evaluator import Yolov5Evaluator
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
@@ -344,7 +338,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
         mloss = torch.zeros(4, device=device)  # mean losses
         if RANK != -1:
-            train_loader.sampler.set_epoch(epoch)
+            train_loader.batch_sampler.sampler.set_epoch(epoch)
         pbar = enumerate(train_loader)
         LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
         if RANK in {-1, 0}:

From ff85de368df95e7374cd3c24f1d352778f60a7bd Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 18 Jul 2022 10:44:47 +0530
Subject: [PATCH 033/247] update loss tensor ops

---
 utils/seg_loss.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index 8ffb4439c2f0..e74b82adae7b 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -210,8 +210,8 @@ def build_targets(self, p, targets):
         ], device=targets.device, ).float() * g)  # offsets
 
         for i in range(self.nl):
-            anchors = self.anchors[i]
-            gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
 
             # Match targets to anchors
             t = targets * gain
@@ -243,7 +243,7 @@ def build_targets(self, p, targets):
 
             # Append
             a = t[:, 6].long()  # anchor indices
-            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
+            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class

From 2ee7f43f5706d1cd91cbc6f7bfcdb3d88267fcd0 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 19 Jul 2022 12:38:20 +0530
Subject: [PATCH 034/247] attempt to fix eval calculation

---
 evaluator.py => eval_seg.py | 431 ++++++++++++++++------
 train_instseg.py            |   4 +-
 utils/boxes.py              | 298 ++++++++++++++++
 utils/seg_plots.py          | 689 ++++++++++++++++++++++++++++++++++++
 4 files changed, 1307 insertions(+), 115 deletions(-)
 rename evaluator.py => eval_seg.py (66%)
 create mode 100644 utils/boxes.py
 create mode 100644 utils/seg_plots.py

diff --git a/evaluator.py b/eval_seg.py
similarity index 66%
rename from evaluator.py
rename to eval_seg.py
index 5d92244af5c0..24f2e40b95db 100644
--- a/evaluator.py
+++ b/eval_seg.py
@@ -1,12 +1,3 @@
-# TODO:  Optimize plotting, losses & merge with val.py
-
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Validate a trained YOLOv5 model accuracy on a custom dataset
-
-Usage:
-    $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640
-"""
 
 import json
 from pathlib import Path
@@ -15,18 +6,33 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from PIL import Image
 # import pycocotools.mask as mask_util
 from tqdm import tqdm
 
 from models.experimental import attempt_load
 from seg_dataloaders import create_dataloader
-from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, )
-from utils.general import (check_dataset, check_img_size, check_suffix, )
-from utils.general import (coco80_to_coco91_class, increment_path, colorstr, )
-from utils.plots import output_to_target, plot_images_boxes_and_masks
+from utils.general import (
+    coco80_to_coco91_class,
+    increment_path,
+    colorstr, check_dataset, check_img_size, check_suffix
+)
+
+from utils.segment import (
+    non_max_suppression_masks,
+    mask_iou,
+    process_mask,
+    process_mask_upsample,
+    scale_masks,
+)
+from utils.boxes import (
+    box_iou,
+    non_max_suppression,
+    scale_coords,
+    xyxy2xywh,
+    xywh2xyxy,
+)
 from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
-from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, )
+from utils.seg_plots import output_to_target, plot_images_boxes_and_masks
 from utils.torch_utils import select_device, time_sync
 
 
@@ -34,7 +40,9 @@ def save_one_txt(predn, save_conf, shape, file):
     # Save one txt result
     gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
     for *xyxy, conf, cls in predn.tolist():
-        xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+        xywh = (
+            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
+        )  # normalized xywh
         line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
         with open(file, "a") as f:
             f.write(("%g " * len(line)).rstrip() % line + "\n")
@@ -48,13 +56,20 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 
     if pred_masks is not None:
         pred_masks = np.transpose(pred_masks, (2, 0, 1))
-        rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
+        rles = [
+            mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0]
+            for mask in pred_masks
+        ]
         for rle in rles:
             rle["counts"] = rle["counts"].decode("utf-8")
 
     for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-        pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b],
-            "score": round(p[4], 5), }
+        pred_dict = {
+            "image_id": image_id,
+            "category_id": class_map[int(p[5])],
+            "bbox": [round(x, 3) for x in b],
+            "score": round(p[4], 5),
+        }
         if pred_masks is not None:
             pred_dict["segmentation"] = rles[i]
         jdict.append(pred_dict)
@@ -62,9 +77,25 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 
 @torch.no_grad()
 class Yolov5Evaluator:
-    def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False,
-            project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True,
-            max_plot_dets=10, mask=False, mask_downsample_ratio=1, ) -> None:
+    def __init__(
+        self,
+        data,
+        conf_thres=0.001,
+        iou_thres=0.6,
+        device="",
+        single_cls=False,
+        augment=False,
+        verbose=False,
+        project="runs/val",
+        name="exp",
+        exist_ok=False,
+        half=True,
+        save_dir=Path(""),
+        nosave=False,
+        plots=True,
+        mask=False,
+        mask_downsample_ratio=1,
+    ) -> None:
         self.data = check_dataset(data)  # check
         self.conf_thres = conf_thres  # confidence threshold
         self.iou_thres = iou_thres  # NMS IoU threshold
@@ -79,7 +110,6 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=
         self.save_dir = save_dir
         self.nosave = nosave
         self.plots = plots
-        self.max_plot_dets = max_plot_dets
         self.mask = mask
         self.mask_downsample_ratio = mask_downsample_ratio
 
@@ -89,14 +119,40 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=
         self.confusion_matrix = ConfusionMatrix(nc=self.nc)
         self.dt = [0.0, 0.0, 0.0]
         self.names = {k: v for k, v in enumerate(self.data["names"])}
-        self.s = (("%20s" + "%11s" * 10) % (
-            "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5",
-            "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % (
-            "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",))
+        self.s = (
+            ("%20s" + "%11s" * 10)
+            % (
+                "Class",
+                "Images",
+                "Labels",
+                "Box:{P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95}",
+                "Mask:{P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95}",
+            )
+            if self.mask
+            else ("%20s" + "%11s" * 6)
+            % (
+                "Class",
+                "Images",
+                "Labels",
+                "P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95",
+            )
+        )
 
         # coco stuff
-        self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith(
-            "coco/val2017.txt")  # COCO dataset
+        self.is_coco = isinstance(self.data.get("val"), str) and self.data[
+            "val"
+        ].endswith(
+            "coco/val2017.txt"
+        )  # COCO dataset
         self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
         self.jdict = []
         self.iou_thres = 0.65 if self.is_coco else self.iou_thres
@@ -110,7 +166,6 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=
         self.total_loss = torch.zeros((4 if self.mask else 3))
         self.metric = Metrics() if self.mask else Metric()
 
-    @torch.no_grad()
     def run_training(self, model, dataloader, compute_loss=None):
         """This is for evaluation when training."""
         self.seen = 0
@@ -124,7 +179,9 @@ def run_training(self, model, dataloader, compute_loss=None):
 
         # inference
         # masks will be `None` if training objection.
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
+            tqdm(dataloader, desc=self.s)
+        ):
             # reset pred_masks
             self.pred_masks = []
             img = img.to(self.device, non_blocking=True)
@@ -143,17 +200,20 @@ def run_training(self, model, dataloader, compute_loss=None):
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(pred, proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
+                pred_maski = self.get_predmasks(
+                    pred,
+                    proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None,
+                )
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
+                    self.pred_masks.append(pred_maski.cpu())
 
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
 
-            if self.plots and batch_i < 2:
+            if self.plots and batch_i < 3:
                 self.plot_images(batch_i, img, targets, masks, out, paths)
 
         # compute map and print it.
@@ -161,12 +221,29 @@ def run_training(self, model, dataloader, compute_loss=None):
 
         # Return results
         model.float()  # for training
-        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
-                self.metric.get_maps(self.nc), t,)
-
-    def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ):
+        return (
+            (
+                *self.metric.mean_results(),
+                *(self.total_loss.cpu() / len(dataloader)).tolist(),
+            ),
+            self.metric.get_maps(self.nc),
+            t,
+        )
+
+    def run(
+        self,
+        weights,
+        batch_size,
+        imgsz,
+        save_txt=False,
+        save_conf=False,
+        save_json=False,
+        task="val",
+    ):
         """This is for native evaluation."""
-        model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task)
+        model, dataloader, imgsz = self.before_infer(
+            weights, batch_size, imgsz, save_txt, task
+        )
         self.seen = 0
         # self.iouv.to(self.device)
         self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
@@ -175,7 +252,9 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_
         model.eval()
 
         # inference
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
+            tqdm(dataloader, desc=self.s)
+        ):
             # reset pred_masks
             self.pred_masks = []
             img = img.to(self.device, non_blocking=True)
@@ -197,12 +276,15 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(pred, proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
+                pred_maski = self.get_predmasks(
+                    pred,
+                    proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None,
+                )
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
+                    self.pred_masks.append(pred_maski.cpu())
 
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
@@ -215,21 +297,36 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_
                     # clone() is for plot_images work correctly
                     predn = pred.clone()
                     # 因为test时添加了0.5的padding，因此这里与数据加载的padding不一致，所以需要转入ratio_pad
-                    scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad)  # native-space pred
-
+                    scale_coords(
+                        img[si].shape[1:], predn[:, :4], shape, ratio_pad
+                    )  # native-space pred
                 # Save/log
                 if save_txt and self.save_dir.exists():
                     # NOTE: convert coords to native space when save txt.
                     # support save box preditions only
-                    save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), )
+                    save_one_txt(
+                        predn,
+                        save_conf,
+                        shape,
+                        file=self.save_dir / "labels" / (path.stem + ".txt"),
+                    )
                 if save_json and self.save_dir.exists():
                     # NOTE: convert coords to native space when save json.
                     # if pred_maski is not None:
                     # h, w, n
-                    pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
-                        shape, ratio_pad, )
-                    save_one_json(predn, self.jdict, path, self.class_map,
-                        pred_maski, )  # append to COCO-JSON dictionary
+                    pred_maski = scale_masks(
+                        img[si].shape[1:],
+                        pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
+                        shape,
+                        ratio_pad,
+                    )
+                    save_one_json(
+                        predn,
+                        self.jdict,
+                        path,
+                        self.class_map,
+                        pred_maski,
+                    )  # append to COCO-JSON dictionary
 
             if self.plots and batch_i < 3:
                 self.plot_images(batch_i, img, targets, masks, out, paths)
@@ -246,24 +343,42 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_
 
         # Print speeds
         shape = (batch_size, 3, imgsz, imgsz)
-        print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
+        print(
+            f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}"
+            % t
+        )
 
         s = (
-            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "")
-        print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}")
+            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}"
+            if save_txt and self.save_dir.exists()
+            else ""
+        )
+        print(
+            f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}"
+        )
 
         # Return results
-        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
-                self.metric.get_maps(self.nc), t,)
+        return (
+            (
+                *self.metric.mean_results(),
+                *(self.total_loss.cpu() / len(dataloader)).tolist(),
+            ),
+            self.metric.get_maps(self.nc),
+            t,
+        )
 
     def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
         "prepare for evaluation without training."
         self.device = select_device(self.device, batch_size=batch_size)
 
         # Directories
-        self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok)  # increment run
+        self.save_dir = increment_path(
+            Path(self.project) / self.name, exist_ok=self.exist_ok
+        )  # increment run
         if not self.nosave:
-            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(
+                parents=True, exist_ok=True
+            )  # make dir
 
         # Load model
         check_suffix(weights, ".pt")
@@ -273,11 +388,27 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
 
         # Data
         if self.device.type != "cpu":
-            model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters())))  # run once
+            model(
+                torch.zeros(1, 3, imgsz, imgsz)
+                .to(self.device)
+                .type_as(next(model.parameters()))
+            )  # run once
         pad = 0.0 if task == "speed" else 0.5
-        task = (task if task in ("train", "val", "test") else "val")  # path to train/val/test images
-        dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True,
-            prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0]
+        task = (
+            task if task in ("train", "val", "test") else "val"
+        )  # path to train/val/test images
+        dataloader = create_dataloader(
+            self.data[task],
+            imgsz,
+            batch_size,
+            gs,
+            self.single_cls,
+            pad=pad,
+            rect=True,
+            prefix=colorstr(f"{task}: "),
+            mask_head=self.mask,
+            mask_downsample_ratio=self.mask_downsample_ratio,
+        )[0]
         return model, dataloader, imgsz
 
     def inference(self, model, img, targets, masks=None, compute_loss=None):
@@ -290,18 +421,29 @@ def inference(self, model, img, targets, masks=None, compute_loss=None):
         self.dt[0] += t2 - t1
 
         # Run model
-        out, train_out = model(img, augment=self.augment)  # inference and training outputs
+        out, train_out = model(
+            img, augment=self.augment
+        )  # inference and training outputs
         self.dt[1] += time_sync() - t2
 
         # Compute loss
         if compute_loss:
-            self.total_loss += compute_loss(train_out, targets, masks)[1]  # box, obj, cls
+            self.total_loss += compute_loss(train_out, targets, masks)[
+                1
+            ]  # box, obj, cls
 
         # Run NMS
-        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device)  # to pixels
+        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(
+            self.device
+        )  # to pixels
         t3 = time_sync()
-        out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True,
-            agnostic=self.single_cls, )
+        out = self.nms(
+            prediction=out,
+            conf_thres=self.conf_thres,
+            iou_thres=self.iou_thres,
+            multi_label=True,
+            agnostic=self.single_cls,
+        )
         self.dt[2] += time_sync() - t3
         return out, train_out
 
@@ -312,18 +454,25 @@ def after_infer(self):
         """
         # Plot confusion matrix
         if self.plots and self.save_dir.exists():
-            self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values()))
+            self.confusion_matrix.plot(
+                save_dir=self.save_dir, names=list(self.names.values())
+            )
 
         # Compute statistics
         stats = [np.concatenate(x, 0) for x in zip(*self.stats)]  # to numpy
         box_or_mask_any = stats[0].any() or stats[1].any()
         stats = stats[1:] if not self.mask else stats
         if len(stats) and box_or_mask_any:
-            results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None,
-                self.names, )
+            results = self.ap_per_class(
+                *stats,
+                self.plots,
+                self.save_dir if self.save_dir.exists() else None,
+                self.names,
+            )
             self.metric.update(results)
-            nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64),
-                minlength=self.nc)  # number of targets per class
+            nt = np.bincount(
+                stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc
+            )  # number of targets per class
         else:
             nt = torch.zeros(1)
 
@@ -343,13 +492,19 @@ def process_batch(self, detections, labels, iouv):
         Returns:
             correct (Array[N, 10]), for 10 IoU levels
         """
-        correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+        correct = torch.zeros(
+            detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device
+        )
         iou = box_iou(labels[:, 1:], detections[:, :4])
         x = torch.where(
-            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
+            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])
+        )  # IoU above threshold and classes match
         if x[0].shape[0]:
             matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )  # [label, detection, iou]
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -377,29 +532,53 @@ def get_predmasks(self, pred, proto_out, gt_shape):
         if proto_out is None or len(pred) == 0:
             return None
         process = process_mask_upsample if self.plots else process_mask
-        gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,)
+        gt_shape = (
+            gt_shape[0] * self.mask_downsample_ratio,
+            gt_shape[1] * self.mask_downsample_ratio,
+        )
         # n, h, w
-        pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous())
+        pred_mask = (
+            process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape)
+            .permute(2, 0, 1)
+            .contiguous()
+        )
         return pred_mask
 
     def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
-        assert not ((pred_maski is None) ^ (
-                    gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist."
+        assert not (
+            (pred_maski is None) ^ (gt_masksi is None)
+        ), "`proto_out` and `gt_masksi` should be both None or both exist."
         if pred_maski is None and gt_masksi is None:
             return torch.zeros(0, self.niou, dtype=torch.bool)
 
-        correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, )
-
-        if gt_masksi.shape[1:] != pred_maski.shape[1:]:
-            gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear",
-                align_corners=False, ).squeeze(0)
+        correct = torch.zeros(
+            predn.shape[0],
+            self.iouv.shape[0],
+            dtype=torch.bool,
+            device=self.iouv.device,
+        )
+
+        if not self.plots:
+            gt_masksi = F.interpolate(
+                gt_masksi.unsqueeze(0),
+                pred_maski.shape[1:],
+                mode="bilinear",
+                align_corners=False,
+            ).squeeze(0)
 
-        iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), )
+        iou = mask_iou(
+            gt_masksi.view(gt_masksi.shape[0], -1),
+            pred_maski.view(pred_maski.shape[0], -1),
+        )
         x = torch.where(
-            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
+            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])
+        )  # IoU above threshold and classes match
         if x[0].shape[0]:
             matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )  # [label, detection, iou]
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -416,9 +595,15 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski):
 
         if len(predn) == 0:
             if nl:
-                self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
-                                   torch.zeros(0, self.niou, dtype=torch.bool),  # masks
-                                   torch.Tensor(), torch.Tensor(), tcls,))
+                self.stats.append(
+                    (
+                        torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
+                        torch.zeros(0, self.niou, dtype=torch.bool),  # masks
+                        torch.Tensor(),
+                        torch.Tensor(),
+                        tcls,
+                    )
+                )
             return
 
         # Predictions
@@ -433,15 +618,24 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski):
             correct_boxes = self.process_batch(predn, labelsn, self.iouv)
 
             # masks
-            correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn)
+            correct_masks = self.process_batch_masks(
+                predn, pred_maski, gt_maski, labelsn
+            )
 
             if self.plots:
                 self.confusion_matrix.process_batch(predn, labelsn)
         else:
             correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
             correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
-        self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(),
-                           tcls,))  # (correct, conf, pcls, tcls)
+        self.stats.append(
+            (
+                correct_masks.cpu(),
+                correct_boxes.cpu(),
+                predn[:, 4].cpu(),
+                predn[:, 5].cpu(),
+                tcls,
+            )
+        )  # (correct, conf, pcls, tcls)
 
     def print_metric(self, nt, stats):
         # Print results
@@ -452,41 +646,52 @@ def print_metric(self, nt, stats):
         # TODO: self.seen support verbose.
         if self.verbose and self.nc > 1 and len(stats):
             for i, c in enumerate(self.metric.ap_class_index):
-                print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)))
+                print(
+                    pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))
+                )
 
     def plot_images(self, i, img, targets, masks, out, paths):
         if not self.save_dir.exists():
             return
         # plot ground truth
         f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
-        
-        if masks is not None and masks.shape[1:] != img.shape[2:]:
-            masks = F.interpolate(
-                masks.unsqueeze(0),
-                img.shape[2:],
-                mode="bilinear",
-                align_corners=False,
-            ).squeeze(0)
 
-        Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
-            daemon=True, ).start()
+        Thread(
+            target=plot_images_boxes_and_masks,
+            args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
+            daemon=True,
+        ).start()
         f = self.save_dir / f"val_batch{i}_pred.jpg"  # predictions
 
         # plot predition
         if len(self.pred_masks):
-            pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0])
+            pred_masks = (
+                torch.cat(self.pred_masks, dim=0)
+                if len(self.pred_masks) > 1
+                else self.pred_masks[0]
+            )
         else:
             pred_masks = None
-        plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]))
-        #Thread(target=plot_images_boxes_and_masks,
-        #    args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),),
-        #    daemon=True, ).start()
-        import wandb
-        if wandb.run:
-            wandb.log({f"pred_{i}": wandb.Image(str(f))})
+        Thread(
+            target=plot_images_boxes_and_masks,
+            args=(
+                img,
+                output_to_target(out),
+                pred_masks,
+                paths,
+                f,
+                self.names,
+                max(img.shape[2:]),
+            ),
+            daemon=True,
+        ).start()
 
     def nms(self, **kwargs):
-        return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
+        return (
+            non_max_suppression_masks(**kwargs)
+            if self.mask
+            else non_max_suppression(**kwargs)
+        )
 
     def ap_per_class(self, *args):
         return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args)
diff --git a/train_instseg.py b/train_instseg.py
index 304b86282fa8..f5ef6b15a580 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -54,7 +54,7 @@
 from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.seg_loss import ComputeLoss
 #from utils.metrics import fitness
-from utils.plots import plot_evolve, plot_labels
+from utils.seg_plots import plot_evolve, plot_labels
 from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
 
 
@@ -66,7 +66,7 @@
 from torch.optim import AdamW
 import yaml
 from datetime import datetime
-from evaluator import Yolov5Evaluator
+from eval_seg import Yolov5Evaluator
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
     print(device)
diff --git a/utils/boxes.py b/utils/boxes.py
new file mode 100644
index 000000000000..1881dde83c81
--- /dev/null
+++ b/utils/boxes.py
@@ -0,0 +1,298 @@
+import time
+
+import cv2
+import numpy as np
+import torch
+import torchvision
+
+from utils.general import clip_coords, scale_coords, xywh2xyxy, xyxy2xywh
+from .general import increment_path
+from .metrics import box_iou
+
+
+def nms_numpy(boxes, scores, class_id, threshold, method=None, agnostic=False):
+    """
+    :param boxes: numpy(N, 4), xyxy
+    :param scores: numpy(N, )
+    :param class_id: numpy(N, )
+    :param threshold: float
+    :param method:
+    :return: kept boxed index
+    """
+    if boxes.size == 0:
+        return np.empty((0,), dtype=np.int8)
+    max_wh = 4096
+    if isinstance(boxes, torch.Tensor):
+        boxes = boxes.cpu().numpy()
+    if isinstance(scores, torch.Tensor):
+        scores = scores.cpu().numpy()
+    if isinstance(class_id, torch.Tensor):
+        class_id = class_id.cpu().numpy()
+
+    if boxes.ndim == 1:
+        boxes = boxes[None, :]
+    assert boxes.shape[1] == 4, f"expected boxes shape [N, 4], but got {boxes.shape}"
+    if len(class_id.shape) == 1:
+        class_id = class_id[:, None]
+
+    assert (boxes.shape[0] == class_id.shape[0] == scores.shape[0]), f"boxes, class_id and scores shapes must be equal"
+
+    c = class_id * (0 if agnostic else max_wh)
+    boxes = boxes + c
+    x1 = boxes[:, 0].copy()
+    y1 = boxes[:, 1].copy()
+    x2 = boxes[:, 2].copy()
+    y2 = boxes[:, 3].copy()
+
+    s = scores
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+
+    I = np.argsort(s)  # 从小到大排序索引
+    pick = np.zeros_like(s, dtype=np.int16)
+    counter = 0
+    while I.size > 0:
+        i = I[-1]
+        pick[counter] = i
+        counter += 1
+        idx = I[0:-1]
+
+        xx1 = np.maximum(x1[i], x1[idx]).copy()
+        yy1 = np.maximum(y1[i], y1[idx]).copy()
+        xx2 = np.minimum(x2[i], x2[idx]).copy()
+        yy2 = np.minimum(y2[i], y2[idx]).copy()
+
+        w = np.maximum(0.0, xx2 - xx1 + 1).copy()
+        h = np.maximum(0.0, yy2 - yy1 + 1).copy()
+
+        inter = w * h
+        if method == "Min":
+            o = inter / np.minimum(area[i], area[idx])
+        else:
+            o = inter / (area[i] + area[idx] - inter)
+        I = I[np.where(o <= threshold)]
+
+    pick = pick[:counter].copy()
+    return pick
+
+
+def save_one_box(xyxy, im, file="image.jpg", gain=1.02, pad=10, square=False, BGR=False, save=True):
+    # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
+    xyxy = torch.tensor(xyxy).view(-1, 4)
+    b = xyxy2xywh(xyxy)  # boxes
+    if square:
+        b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
+    b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
+    xyxy = xywh2xyxy(b).long()
+    clip_coords(xyxy, im.shape)
+    crop = im[int(xyxy[0, 1]): int(xyxy[0, 3]), int(xyxy[0, 0]): int(xyxy[0, 2]), :: (1 if BGR else -1), ]
+    if save:
+        cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix(".jpg")), crop)
+    return crop
+
+
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
+                        labels=(), max_det=300, ):
+    """Runs Non-Maximum Suppression (NMS) on inference results
+
+    Returns:
+         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
+    """
+
+    nc = prediction.shape[2] - 5  # number of classes
+    xc = prediction[..., 4] > conf_thres  # candidates
+
+    # Checks
+    assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
+    assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
+
+    # Settings
+    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
+    time_limit = 10.0  # seconds to quit after
+    redundant = True  # require redundant detections
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+    merge = False  # use merge-NMS
+
+    t = time.time()
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            l = labels[xi]
+            v = torch.zeros((len(l), nc + 5), device=x.device)
+            v[:, :4] = l[:, 1:5]  # box
+            v[:, 4] = 1.0  # conf
+            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
+            x = torch.cat((x, v), 0)
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Compute conf
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+        else:  # best class only
+            conf, j = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+        # Apply finite constraint
+        # if not torch.isfinite(x).all():
+        #     x = x[torch.isfinite(x).all(1)]
+
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        elif n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+        if merge and (1 < n < 3e3):  # Merge NMS (boxes merged using weighted mean)
+            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+            weights = iou * scores[None]  # box weights
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
+            if redundant:
+                i = i[iou.sum(1) > 1]  # require redundancy
+
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            print(f"WARNING: NMS time limit {time_limit}s exceeded")
+            break  # time limit exceeded
+
+    return output
+
+
+def non_max_suppression_numpy(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False,
+                              multi_label=False, labels=(), max_det=300, ):
+    """Runs Non-Maximum Suppression (NMS) on inference results
+
+    Returns:
+         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
+    """
+
+    nc = prediction.shape[2] - 5  # number of classes
+    xc = prediction[..., 4] > conf_thres  # candidates
+
+    # Checks
+    assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
+    assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
+
+    # Settings
+    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
+    time_limit = 10.0  # seconds to quit after
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+
+    t = time.time()
+    output = [np.zeros((0, 6))] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            l = labels[xi]
+            v = np.zeros((len(l), nc + 5), device=x.device)
+            v[:, :4] = l[:, 1:5]  # box
+            v[:, 4] = 1.0  # conf
+            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
+            x = np.concatenate((x, v), 0)
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Compute conf
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+        else:  # best class only
+            conf, j = x[:, 5:].max(1), x[:, 5:].argmax(1)
+            x = np.concatenate((box, conf[:, None], j.astype(np.float)[:, None]), 1)[conf > conf_thres]
+
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == np.array(classes)).any(1)]
+
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        elif n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+
+        # Batched NMS
+        boxes, scores, cls = x[:, :4], x[:, 4], x[:, 5]
+        i = nms_numpy(boxes, scores, cls, iou_thres, agnostic)  # NMS
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+
+        output[xi] = x[i][None, :] if x[i].ndim == 1 else x[i]
+        if (time.time() - t) > time_limit:
+            print(f"WARNING: NMS time limit {time_limit}s exceeded")
+            break  # time limit exceeded
+
+    return output
+
+
+def apply_classifier(x, model, img, im0):
+    # Apply a second stage classifier to yolo outputs
+    im0 = [im0] if isinstance(im0, np.ndarray) else im0
+    for i, d in enumerate(x):  # per image
+        if d is not None and len(d):
+            d = d.clone()
+
+            # Reshape and pad cutouts
+            b = xyxy2xywh(d[:, :4])  # boxes
+            b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # rectangle to square
+            b[:, 2:] = b[:, 2:] * 1.3 + 30  # pad
+            d[:, :4] = xywh2xyxy(b).long()
+
+            # Rescale boxes from img_size to im0 size
+            scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
+
+            # Classes
+            pred_cls1 = d[:, 5].long()
+            ims = []
+            for j, a in enumerate(d):  # per item
+                cutout = im0[i][int(a[1]): int(a[3]), int(a[0]): int(a[2])]
+                im = cv2.resize(cutout, (224, 224))  # BGR
+                # cv2.imwrite('example%i.jpg' % j, cutout)
+
+                im = im[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+                im = np.ascontiguousarray(im, dtype=np.float32)  # uint8 to float32
+                im /= 255.0  # 0 - 255 to 0.0 - 1.0
+                ims.append(im)
+
+            pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1)  # classifier prediction
+            x[i] = x[i][pred_cls1 == pred_cls2]  # retain matching class detections
+
+    return x
diff --git a/utils/seg_plots.py b/utils/seg_plots.py
new file mode 100644
index 000000000000..3f09d2ad272c
--- /dev/null
+++ b/utils/seg_plots.py
@@ -0,0 +1,689 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Plotting utils
+"""
+
+import math
+import os
+from copy import copy
+from itertools import repeat
+from pathlib import Path
+
+import cv2
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sn
+import torch
+from PIL import Image, ImageDraw
+
+from utils.general import check_font, is_ascii, is_chinese
+from utils.seg_metrics import fitness
+from .boxes import xywh2xyxy, xyxy2xywh
+
+# Settings
+RANK = int(os.getenv("RANK", -1))
+matplotlib.rc("font", **{"size": 11})
+matplotlib.use("Agg")  # for writing to files only
+
+
+class Colors:
+    # Ultralytics color palette https://ultralytics.com/
+    def __init__(self):
+        # hex = matplotlib.colors.TABLEAU_COLORS.values()
+        hex = ("FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A", "92CC17", "3DDB86", "1A9334", "00D4BB",
+               "2C99A8", "00C2FF", "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF", "FF95C8", "FF37C7",)
+        self.palette = [self.hex2rgb("#" + c) for c in hex]
+        self.n = len(self.palette)
+
+    def __call__(self, i, bgr=False):
+        c = self.palette[int(i) % self.n]
+        return (c[2], c[1], c[0]) if bgr else c
+
+    @staticmethod
+    def hex2rgb(h):  # rgb order (PIL)
+        return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
+
+
+colors = Colors()  # create instance for 'from utils.plots import colors'
+
+
+class Annotator:
+    if RANK in (-1, 0):
+        check_font()  # download TTF if necessary
+
+    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
+    def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=False, example="abc", ):
+        assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images."
+        self.pil = pil or not is_ascii(example) or is_chinese(example)
+        if self.pil:  # use PIL
+            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
+            self.draw = ImageDraw.Draw(self.im)
+            self.font = check_font(font="Arial.Unicode.ttf" if is_chinese(example) else font,
+                size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12), )
+        else:  # use cv2
+            self.im = im
+        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width
+
+    def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
+        # Add one xyxy box to image with label
+        if self.pil or not is_ascii(label):
+            self.draw.rectangle(box, width=self.lw, outline=color)  # box
+            if label:
+                w, h = self.font.getsize(label)  # text width, height
+                outside = box[1] - h >= 0  # label fits outside box
+                self.draw.rectangle([box[0], box[1] - h if outside else box[1], box[0] + w + 1,
+                    box[1] + 1 if outside else box[1] + h + 1, ], fill=color, )
+                # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
+                self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font, )
+        else:  # cv2
+            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+            cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
+            if label:
+                tf = max(self.lw - 1, 1)  # font thickness
+                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0]  # text width, height
+                outside = p1[1] - h - 3 >= 0  # label fits outside box
+                p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+                cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled
+                cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color,
+                    thickness=tf, lineType=cv2.LINE_AA, )
+
+    def rectangle(self, xy, fill=None, outline=None, width=1):
+        # Add rectangle to image (PIL-only)
+        self.draw.rectangle(xy, fill, outline, width)
+
+    def text(self, xy, text, txt_color=(255, 255, 255)):
+        # Add text to image (PIL-only)
+        w, h = self.font.getsize(text)  # text width, height
+        self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
+
+    def result(self):
+        # Return annotated image as array
+        return np.asarray(self.im)
+
+
+class Visualizer(object):
+    """Visualization of one model."""
+
+    def __init__(self, names) -> None:
+        super().__init__()
+        self.names = names
+
+    def draw_one_img(self, img, output, vis_conf=0.4):
+        """Visualize one images.
+        
+        Args:
+            imgs (numpy.ndarray): one image.
+            outputs (torch.Tensor): one output, (num_boxes, classes+5)
+            vis_confs (float, optional): Visualize threshold.
+        Return:
+            img (numpy.ndarray): Image after visualization.           
+        """
+        if isinstance(output, list):
+            output = output[0]
+        if output is None or len(output) == 0:
+            return img
+        for (*xyxy, conf, cls) in reversed(output[:, :6]):
+            if conf < vis_conf:
+                continue
+            label = '%s %.2f' % (self.names[int(cls)], conf)
+            color = colors(int(cls))
+            plot_one_box(xyxy, img, label=label, color=color, line_thickness=2)
+        return img
+
+    def draw_multi_img(self, imgs, outputs, vis_confs=0.4):
+        """Visualize multi images.
+        
+        Args:
+            imgs (List[numpy.array]): multi images.
+            outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5].
+            vis_confs (float | tuple[float], optional): Visualize threshold.
+        Return:
+            imgs (List[numpy.ndarray]): Images after visualization.           
+        """
+        if isinstance(vis_confs, float):
+            vis_confs = list(repeat(vis_confs, len(imgs)))
+        assert len(imgs) == len(outputs) == len(vis_confs)
+        for i, output in enumerate(outputs):  # detections per image
+            self.draw_one_img(imgs[i], output, vis_confs[i])
+        return imgs
+
+    def draw_imgs(self, imgs, outputs, vis_confs=0.4):
+        if isinstance(imgs, np.ndarray):
+            return self.draw_one_img(imgs, outputs, vis_confs)
+        else:
+            return self.draw_multi_img(imgs, outputs, vis_confs)
+
+    def __call__(self, imgs, outputs, vis_confs=0.4):
+        return self.draw_imgs(imgs, outputs, vis_confs)
+
+
+def hist2d(x, y, n=100):
+    # 2d histogram used in labels.png and evolve.png
+    xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
+    hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
+    xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
+    yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
+    return np.log(hist[xidx, yidx])
+
+
+def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
+    from scipy.signal import butter, filtfilt
+
+    # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
+    def butter_lowpass(cutoff, fs, order):
+        nyq = 0.5 * fs
+        normal_cutoff = cutoff / nyq
+        return butter(order, normal_cutoff, btype="low", analog=False)
+
+    b, a = butter_lowpass(cutoff, fs, order=order)
+    return filtfilt(b, a, data)  # forward-backward filter
+
+
+def output_to_target(output):
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+    targets = []
+    for i, o in enumerate(output):
+        for *box, conf, cls in o.cpu().numpy()[:, :6]:
+            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
+    return np.array(targets)
+
+
+def plot_images(images, targets, paths=None, fname="images.jpg", names=None, max_size=1920, max_subplots=16, ):
+    # Plot image grid with labels
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    if np.max(images[0]) <= 1:
+        images *= 255.0  # de-normalise (optional)
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+
+    # Build Image
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, im in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        im = im.transpose(1, 2, 0)
+        mosaic[y: y + h, x: x + w, :] = im
+
+    # Resize (optional)
+    scale = max_size / ns / max(h, w)
+    if scale < 1:
+        h = math.ceil(scale * h)
+        w = math.ceil(scale * w)
+        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+
+    # Annotate
+    fs = int((h + w) * ns * 0.01)  # font size
+    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
+    for i in range(i + 1):
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+        if paths:
+            annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220), )  # filenames
+        if len(targets) > 0:
+            ti = targets[targets[:, 0] == i]  # image targets
+            boxes = xywh2xyxy(ti[:, 2:6]).T
+            classes = ti[:, 1].astype("int")
+            labels = ti.shape[1] == 6  # labels if no conf column
+            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
+
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale < 1:  # absolute coords need scale if image scales
+                    boxes *= scale
+            boxes[[0, 2]] += x
+            boxes[[1, 3]] += y
+            for j, box in enumerate(boxes.T.tolist()):
+                cls = classes[j]
+                color = colors(cls)
+                cls = names[cls] if names else cls
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
+                    annotator.box_label(box, label, color=color)
+    annotator.im.save(fname)  # save
+    return annotator.result()
+
+
+def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""):
+    # Plot LR simulating training for full epochs
+    optimizer, scheduler = copy(optimizer), copy(scheduler)  # do not modify originals
+    y = []
+    for _ in range(epochs):
+        scheduler.step()
+        y.append(optimizer.param_groups[0]["lr"])
+    plt.plot(y, ".-", label="LR")
+    plt.xlabel("epoch")
+    plt.ylabel("LR")
+    plt.grid()
+    plt.xlim(0, epochs)
+    plt.ylim(0)
+    plt.savefig(Path(save_dir) / "LR.png", dpi=200)
+    plt.close()
+
+
+def plot_val_txt():  # from utils.plots import *; plot_val()
+    # Plot val.txt histograms
+    x = np.loadtxt("val.txt", dtype=np.float32)
+    box = xyxy2xywh(x[:, :4])
+    cx, cy = box[:, 0], box[:, 1]
+
+    fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
+    ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
+    ax.set_aspect("equal")
+    plt.savefig("hist2d.png", dpi=300)
+
+    fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
+    ax[0].hist(cx, bins=600)
+    ax[1].hist(cy, bins=600)
+    plt.savefig("hist1d.png", dpi=200)
+
+
+def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
+    # Plot targets.txt histograms
+    x = np.loadtxt("targets.txt", dtype=np.float32).T
+    s = ["x targets", "y targets", "width targets", "height targets"]
+    fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
+    ax = ax.ravel()
+    for i in range(4):
+        ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std()))
+        ax[i].legend()
+        ax[i].set_title(s[i])
+    plt.savefig("targets.jpg", dpi=200)
+
+
+def plot_val_study(file="", dir="", x=None):  # from utils.plots import *; plot_val_study()
+    # Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
+    save_dir = Path(file).parent if file else Path(dir)
+    plot2 = False  # plot additional results
+    if plot2:
+        ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
+
+    fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
+    # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
+    for f in sorted(save_dir.glob("study*.txt")):
+        y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
+        x = np.arange(y.shape[1]) if x is None else np.array(x)
+        if plot2:
+            s = ["P", "R", "mAP@.5", "mAP@.5:.95", "t_preprocess (ms/img)", "t_inference (ms/img)", "t_NMS (ms/img)", ]
+            for i in range(7):
+                ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8)
+                ax[i].set_title(s[i])
+
+        j = y[3].argmax() + 1
+        ax2.plot(y[5, 1:j], y[3, 1:j] * 1e2, ".-", linewidth=2, markersize=8,
+            label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"), )
+
+    ax2.plot(1e3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], "k.-", linewidth=2,
+        markersize=8, alpha=0.25, label="EfficientDet", )
+
+    ax2.grid(alpha=0.2)
+    ax2.set_yticks(np.arange(20, 60, 5))
+    ax2.set_xlim(0, 57)
+    ax2.set_ylim(25, 55)
+    ax2.set_xlabel("GPU Speed (ms/img)")
+    ax2.set_ylabel("COCO AP val")
+    ax2.legend(loc="lower right")
+    f = save_dir / "study.png"
+    print(f"Saving {f}...")
+    plt.savefig(f, dpi=300)
+
+
+def plot_labels(labels, names=(), save_dir=Path("")):
+    # plot dataset labels
+    print("Plotting labels... ")
+    c, b = labels[:, 0], labels[:, 1:].transpose()  # classes, boxes
+    nc = int(c.max() + 1)  # number of classes
+    x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"])
+
+    # seaborn correlogram
+    sn.pairplot(x, corner=True, diag_kind="auto", kind="hist", diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9), )
+    plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200)
+    plt.close()
+
+    # matplotlib labels
+    matplotlib.use("svg")  # faster
+    ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
+    y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
+    # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)]  # update colors bug #3195
+    ax[0].set_ylabel("instances")
+    if 0 < len(names) < 30:
+        ax[0].set_xticks(range(len(names)))
+        ax[0].set_xticklabels(names, rotation=90, fontsize=10)
+    else:
+        ax[0].set_xlabel("classes")
+    sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9)
+    sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9)
+
+    # rectangles
+    labels[:, 1:3] = 0.5  # center
+    labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
+    img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
+    for cls, *box in labels[:1000]:
+        ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls))  # plot
+    ax[1].imshow(img)
+    ax[1].axis("off")
+
+    for a in [0, 1, 2, 3]:
+        for s in ["top", "right", "left", "bottom"]:
+            ax[a].spines[s].set_visible(False)
+
+    plt.savefig(save_dir / "labels.jpg", dpi=200)
+    matplotlib.use("Agg")
+    plt.close()
+
+
+def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
+    # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
+    ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
+    s = ["Images", "Free Storage (GB)", "RAM Usage (GB)", "Battery", "dt_raw (ms)", "dt_smooth (ms)",
+        "real-world FPS", ]
+    files = list(Path(save_dir).glob("frames*.txt"))
+    for fi, f in enumerate(files):
+        try:
+            results = np.loadtxt(f, ndmin=2).T[:, 90:-30]  # clip first and last rows
+            n = results.shape[1]  # number of rows
+            x = np.arange(start, min(stop, n) if stop else n)
+            results = results[:, x]
+            t = results[0] - results[0].min()  # set t0=0s
+            results[0] = x
+            for i, a in enumerate(ax):
+                if i < len(results):
+                    label = labels[fi] if len(labels) else f.stem.replace("frames_", "")
+                    a.plot(t, results[i], marker=".", label=label, linewidth=1, markersize=5, )
+                    a.set_title(s[i])
+                    a.set_xlabel("time (s)")
+                    # if fi == len(files) - 1:
+                    #     a.set_ylim(bottom=0)
+                    for side in ["top", "right"]:
+                        a.spines[side].set_visible(False)
+                else:
+                    a.remove()
+        except Exception as e:
+            print("Warning: Plotting error for %s; %s" % (f, e))
+    ax[1].legend()
+    plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200)
+
+
+def plot_evolve(evolve_csv="path/to/evolve.csv", ):  # from utils.plots import *; plot_evolve()
+    # Plot evolve.csv hyp evolution results
+    evolve_csv = Path(evolve_csv)
+    data = pd.read_csv(evolve_csv)
+    keys = [x.strip() for x in data.columns]
+    x = data.values
+    f = fitness(x)
+    j = np.argmax(f)  # max fitness index
+    plt.figure(figsize=(10, 12), tight_layout=True)
+    matplotlib.rc("font", **{"size": 8})
+    for i, k in enumerate(keys[7:]):
+        v = x[:, 7 + i]
+        mu = v[j]  # best single result
+        plt.subplot(6, 5, i + 1)
+        plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none")
+        plt.plot(mu, f.max(), "k+", markersize=15)
+        plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9})  # limit to 40 characters
+        if i % 5 != 0:
+            plt.yticks([])
+        print("%15s: %.3g" % (k, mu))
+    f = evolve_csv.with_suffix(".png")  # filename
+    plt.savefig(f, dpi=200)
+    plt.close()
+    print(f"Saved {f}")
+
+
+def plot_results(file="path/to/results.csv", dir="", best=True):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob("results*.csv"))
+    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
+    for _, f in enumerate(files):
+        try:
+            data = pd.read_csv(f)
+            index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6])
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
+                if best:
+                    # best
+                    ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, )
+                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
+                else:
+                    # last
+                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
+                    ax[i].set_title(s[
+                                        j] + f"\n{round(y[-1], 5)}")  # if j in [8, 9, 10]:  # share train and val loss y axes  #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            print(f"Warning: Plotting error for {f}: {e}")
+    ax[1].legend()
+    fig.savefig(save_dir / "results.png", dpi=200)
+    plt.close()
+
+
+def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob("results*.csv"))
+    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
+    for _, f in enumerate(files):
+        try:
+            data = pd.read_csv(f)
+            index = np.argmax(
+                0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 0.1 * data.values[:,
+                                                                                                     11], )
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
+                if best:
+                    # best
+                    ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, )
+                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
+                else:
+                    # last
+                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
+                    ax[i].set_title(s[
+                                        j] + f"\n{round(y[-1], 5)}")  # if j in [8, 9, 10]:  # share train and val loss y axes  #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            print(f"Warning: Plotting error for {f}: {e}")
+    ax[1].legend()
+    fig.savefig(save_dir / "results.png", dpi=200)
+    plt.close()
+
+
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+    import random
+
+    # Plots one bounding box on image img
+    tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1)  # line/font thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA, )
+
+
+def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")):
+    """
+    x:              Features to be visualized
+    module_type:    Module type
+    stage:          Module stage within model
+    n:              Maximum number of feature maps to plot
+    save_dir:       Directory to save results
+    """
+    if "Detect" not in module_type:
+        batch, channels, height, width = x.shape  # batch, channels, height, width
+        if height > 1 and width > 1:
+            f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
+
+            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
+            n = min(n, channels)  # number of plots
+            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
+            ax = ax.ravel()
+            plt.subplots_adjust(wspace=0.05, hspace=0.05)
+            for i in range(n):
+                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
+                ax[i].axis("off")
+
+            print(f"Saving {save_dir / f}... ({n}/{channels})")
+            plt.savefig(save_dir / f, dpi=300, bbox_inches="tight")
+            plt.close()
+
+
+def plot_images_and_masks(images, targets, masks, paths=None, fname="images.jpg", names=None, max_size=640,
+        max_subplots=16, ):
+    # Plot image grid with labels
+    # print("targets:", targets.shape)
+    # print("masks:", masks.shape)
+    # print('--------------------------')
+
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    if isinstance(masks, torch.Tensor):
+        masks = masks.cpu().numpy()
+        masks = masks.astype(int)
+
+    # un-normalise
+    if np.max(images[0]) <= 1:
+        images *= 255
+
+    tl = 3  # line thickness
+    tf = max(tl - 1, 1)  # font thickness
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+
+    # Check if we should resize
+    scale_factor = max_size / max(h, w)
+    if scale_factor < 1:
+        h = math.ceil(scale_factor * h)
+        w = math.ceil(scale_factor * w)
+
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, img in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+
+        block_x = int(w * (i // ns))
+        block_y = int(h * (i % ns))
+
+        img = img.transpose(1, 2, 0)
+        if scale_factor < 1:
+            img = cv2.resize(img, (w, h))
+
+        mosaic[block_y: block_y + h, block_x: block_x + w, :] = img
+        if len(targets) > 0:
+            idx = (targets[:, 0]).astype(int)
+            image_targets = targets[idx == i]
+            # print(targets.shape)
+            # print(masks.shape)
+            image_masks = masks[idx == i]
+            # mosaic_masks
+            # mosaic_masks[block_y:block_y + h,
+            #              block_x:block_x + w, :] = image_masks
+            boxes = xywh2xyxy(image_targets[:, 2:6]).T
+            classes = image_targets[:, 1].astype("int")
+            labels = image_targets.shape[1] == 6  # labels if no conf column
+            conf = (None if labels else image_targets[:, 6])  # check for confidence presence (label vs pred)
+
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale_factor < 1:  # absolute coords need scale if image scales
+                    boxes *= scale_factor
+            boxes[[0, 2]] += block_x
+            boxes[[1, 3]] += block_y
+            for j, box in enumerate(boxes.T):
+                cls = int(classes[j])
+                color = colors(cls)
+                cls = names[cls] if names else cls
+                mask = image_masks[j].astype(np.bool)
+                # print(mask.shape)
+                # print(mosaic.shape)
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j])
+                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
+                    mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] = \
+                    mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65)
+
+        # Draw image filename labels
+        if paths:
+            label = Path(paths[i]).name[:40]  # trim to 40 char
+            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+            cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
+                lineType=cv2.LINE_AA, )
+
+        # Image border
+        cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3, )
+
+    if fname:
+        r = min(1280.0 / max(h, w) / ns, 1.0)  # ratio to limit image size
+        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
+        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
+        Image.fromarray(mosaic).save(fname)  # PIL save
+    return mosaic
+
+
+def plot_images_boxes_and_masks(images, targets, masks=None, paths=None, fname="images.jpg", names=None, max_size=640,
+        max_subplots=16, ):
+    if masks is not None:
+        return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots)
+    else:
+        return plot_images(images, targets, paths, fname, names, max_size, max_subplots)
+
+
+def plot_masks(img, masks, colors, alpha=0.5):
+    """
+    Args:
+        img (tensor): img on cuda, shape: [3, h, w], range: [0, 1]
+        masks (tensor): predicted masks on cuda, shape: [n, h, w]
+        colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
+    Return:
+        img after draw masks, shape: [h, w, 3]
+
+    transform colors and send img_gpu to cpu for the most time.
+    """
+    img_gpu = img.clone()
+    num_masks = len(masks)
+    # [n, 1, 1, 3]
+    # faster this way to transform colors
+    colors = torch.tensor(colors, device=img.device).float() / 255.0
+    colors = colors[:, None, None, :]
+    # [n, h, w, 1]
+    masks = masks[:, :, :, None]
+    masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha
+    inv_alph_masks = masks * (-alpha) + 1
+    masks_color_summand = masks_color[0]
+    if num_masks > 1:
+        inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0)
+        masks_color_cumul = masks_color[1:] * inv_alph_cumul
+        masks_color_summand += masks_color_cumul.sum(dim=0)
+
+    # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1]
+    img_gpu = img_gpu.flip(dims=[0])  # filp channel for opencv
+    img_gpu = img_gpu.permute(1, 2, 0).contiguous()
+    # [h, w, 3]
+    img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
+    return (img_gpu * 255).byte().cpu().numpy()

From 16a0163bd67232102c9ba5d1dda058119bf52c44 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Tue, 19 Jul 2022 15:32:54 +0800
Subject: [PATCH 035/247] add overlap mask option

---
 evaluator.py              | 22 +++++++---
 seg_dataloaders.py        | 85 +++++++++++++++++++++++++++------------
 train_instseg.py          | 21 ++++++----
 utils/loggers/__init__.py | 13 ++----
 utils/plots.py            | 17 +++++---
 utils/seg_loss.py         | 40 +++++++++++++-----
 6 files changed, 131 insertions(+), 67 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 5d92244af5c0..3b20f4971635 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -64,7 +64,7 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 class Yolov5Evaluator:
     def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False,
             project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True,
-            max_plot_dets=10, mask=False, mask_downsample_ratio=1, ) -> None:
+            max_plot_dets=10, mask=False, mask_downsample_ratio=1, overlap=False) -> None:
         self.data = check_dataset(data)  # check
         self.conf_thres = conf_thres  # confidence threshold
         self.iou_thres = iou_thres  # NMS IoU threshold
@@ -82,6 +82,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=
         self.max_plot_dets = max_plot_dets
         self.mask = mask
         self.mask_downsample_ratio = mask_downsample_ratio
+        self.overlap = overlap
 
         self.nc = 1 if self.single_cls else int(self.data["nc"])  # number of classes
         self.iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
@@ -130,7 +131,7 @@ def run_training(self, model, dataloader, compute_loss=None):
             img = img.to(self.device, non_blocking=True)
             targets = targets.to(self.device)
             if masks is not None:
-                masks = masks.to(self.device)
+                masks = masks.to(self.device).float()
             out, train_out = self.inference(model, img, targets, masks, compute_loss)
 
             # Statistics per image
@@ -139,7 +140,8 @@ def run_training(self, model, dataloader, compute_loss=None):
 
                 # eval in every image level
                 labels = targets[targets[:, 0] == si, 1:]
-                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+                midx = [si] if self.overlap else targets[:, 0] == si
+                gt_masksi = masks[midx] if masks is not None else None
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
@@ -181,7 +183,7 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_
             img = img.to(self.device, non_blocking=True)
             targets = targets.to(self.device)
             if masks is not None:
-                masks = masks.to(self.device)
+                masks = masks.to(self.device).float()
             out, train_out = self.inference(model, img, targets, masks)
 
             # Statistics per image
@@ -193,7 +195,8 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_
 
                 # eval in every image level
                 labels = targets[targets[:, 0] == si, 1:]
-                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+                midx = [si] if self.overlap else targets[:, 0] == si
+                gt_masksi = masks[midx] if masks is not None else None
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
@@ -390,6 +393,13 @@ def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
 
         correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, )
 
+        # convert masks (1, 640, 640) -> (n, 640, 640)
+        if self.overlap:
+            nl = len(labels)
+            index = torch.arange(nl, device=gt_masksi.device).view(nl, 1, 1) + 1
+            gt_masksi = gt_masksi.repeat(nl, 1, 1)
+            gt_masksi = torch.where(gt_masksi == index, 1.0, 0.0)
+
         if gt_masksi.shape[1:] != pred_maski.shape[1:]:
             gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear",
                 align_corners=False, ).squeeze(0)
@@ -462,7 +472,7 @@ def plot_images(self, i, img, targets, masks, out, paths):
         
         if masks is not None and masks.shape[1:] != img.shape[2:]:
             masks = F.interpolate(
-                masks.unsqueeze(0),
+                masks.unsqueeze(0).float(),
                 img.shape[2:],
                 mode="bilinear",
                 align_corners=False,
diff --git a/seg_dataloaders.py b/seg_dataloaders.py
index 32f3e0af7127..ac6da36fab09 100644
--- a/seg_dataloaders.py
+++ b/seg_dataloaders.py
@@ -61,7 +61,7 @@ def __iter__(self):
 
 def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
         rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, neg_dir="",
-        bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, ):
+        bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False):
     if rect and shuffle:
         print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
         shuffle = False
@@ -75,6 +75,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non
             prefix=prefix, neg_dir=neg_dir, bg_dir=bg_dir, area_thr=area_thr, )
         if mask_head:
             dataset.downsample_ratio = mask_downsample_ratio
+            dataset.overlap = overlap_mask
 
     batch_size = min(batch_size, len(dataset))
     nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers
@@ -454,11 +455,12 @@ def collate_fn4(batch):
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
             cache_images=False, single_cls=False, stride=32, pad=0, prefix="", neg_dir="", bg_dir="", area_thr=0.2,
-            downsample_ratio=1,  # return dowmsample mask
+            downsample_ratio=1, overlap=False,
     ):
         super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
             stride, pad, prefix, neg_dir, bg_dir, area_thr, )
         self.downsample_ratio = downsample_ratio
+        self.overlap = overlap
 
     @Dataset.mosaic_getitem
     def __getitem__(self, index):
@@ -506,13 +508,18 @@ def __getitem__(self, index):
         nl = len(labels)  # number of labels
         if nl:
             labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
-            for si in range(len(segments)):
-                mask = polygon2mask_downsample(img.shape[:2], [segments[si].reshape(-1)],
-                    downsample_ratio=self.downsample_ratio, )
-                masks.append(torch.from_numpy(mask.astype(np.float32)))
+            if self.overlap:
+                masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, 
+                        downsample_ratio=self.downsample_ratio)
+                masks = masks[None]  # (640, 640) -> (1, 640, 640)
+                labels = labels[sorted_idx]
+            else:
+                masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
 
-        masks = (torch.stack(masks, axis=0) if len(masks) else torch.zeros(nl, img.shape[0] // self.downsample_ratio,
-                                                                               img.shape[1] // self.downsample_ratio))
+        masks = (torch.from_numpy(masks) if len(masks) else 
+                torch.zeros(1 if self.overlap else nl, 
+                    img.shape[0] // self.downsample_ratio,
+                    img.shape[1] // self.downsample_ratio))
         # TODO: albumentations support
         if self.augment:
             # Albumentations
@@ -930,23 +937,6 @@ def exif_transpose(image):
     return image
 
 
-def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
-    """
-    Args:
-        img_size (tuple): The image size.
-        polygons (np.ndarray): [N, M], N is the number of polygons,
-            M is the number of points(Be divided by 2).
-    """
-    img_size = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
-    mask = np.zeros(img_size, dtype=np.uint8)
-    polygons = np.asarray(polygons) / downsample_ratio
-    polygons = polygons.astype(np.int32)
-    shape = polygons.shape
-    polygons = polygons.reshape(shape[0], -1, 2)
-    cv2.fillPoly(mask, polygons, color=color)
-    return mask
-
-
 def worker_init_reset_seed(worker_id):
     seed = uuid.uuid4().int % 2 ** 32
     random.seed(seed)
@@ -954,7 +944,7 @@ def worker_init_reset_seed(worker_id):
     np.random.seed(seed)
 
 
-def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1):
+def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
     """
     Args:
         img_size (tuple): The image size.
@@ -968,10 +958,53 @@ def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1):
     polygons = polygons.reshape(shape[0], -1, 2)
     cv2.fillPoly(mask, polygons, color=color)
     nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
+    # NOTE: fillPoly then resize is trying the keep the same way 
+    # of loss calculation when mask-ratio=1.
     mask = cv2.resize(mask, (nw, nh))
     return mask
 
 
+def polygons2masks(img_size, polygons, color, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (list[np.ndarray]): each polygon is [N, M], 
+            N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    masks = []
+    for si in range(len(polygons)):
+        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color,
+                            downsample_ratio)
+        masks.append(mask)
+    return np.array(masks)
+
+
+def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
+    """Return a (640, 640) overlap mask."""
+    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
+                    dtype=np.uint8)
+    areas = []
+    ms = []
+    for si in range(len(segments)):
+        mask = polygon2mask(
+            img_size,
+            [segments[si].reshape(-1)],
+            downsample_ratio=downsample_ratio,
+            color=1,
+        )
+        ms.append(mask)
+        areas.append(mask.sum())
+    areas = np.asarray(areas)
+    index = np.argsort(-areas)
+    ms = np.array(ms)[index]
+    for i in range(len(segments)):
+        mask = ms[i] * (i + 1)
+        masks = masks + mask
+        masks = np.clip(masks, a_min=0, a_max=i + 1)
+    return masks, index
+
+
 def img2label_paths(img_paths):
     # Define label paths as a function of image paths
     sa, sb = (os.sep + "images" + os.sep, os.sep + "labels" + os.sep,)  # /images/, /labels/ substrings
diff --git a/train_instseg.py b/train_instseg.py
index 304b86282fa8..29e9eb8c9536 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -78,7 +78,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     # Directories
     w = save_dir / 'weights'  # weights dir
     (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
-    last, best, last_mosiac = w / 'last.pt', w / 'best.pt', w / "last_mosaic.pt"
+    last, best = w / 'last.pt', w / 'best.pt'
 
     # Hyperparameters
     if isinstance(hyp, str):
@@ -107,6 +107,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
     # Config
     plots = not evolve and not opt.noplots  # create plots
+    overlap = opt.overlap_mask
     cuda = device.type != 'cpu'
     init_seeds(opt.seed + 1 + RANK, True)
     with torch_distributed_zero_first(LOCAL_RANK):
@@ -163,7 +164,8 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             mask=True,
             verbose=False,
             mask_downsample_ratio=mask_ratio,
-            plots=plots
+            plots=plots,
+            overlap=overlap
         )
     g = [], [], []  # optimizer parameter groups
     bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
@@ -249,7 +251,8 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                               prefix=colorstr('train: '),
                                               mask_head=True,
                                               shuffle=True,
-                                              mask_downsample_ratio=mask_ratio
+                                              mask_downsample_ratio=mask_ratio,
+                                              overlap_mask=overlap,
                                               )
     mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
     print("mlc , nc ", mlc, "  ", nc )
@@ -271,6 +274,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                        pad=0.5,
                                        mask_head=True,
                                        mask_downsample_ratio=mask_ratio,
+                                       overlap_mask=overlap,
                                        prefix=colorstr('val: '))[0]
 
         if not resume:
@@ -316,7 +320,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     scheduler.last_epoch = start_epoch - 1  # do not move
     scaler = torch.cuda.amp.GradScaler(enabled=amp)
     stopper, stop = EarlyStopping(patience=opt.patience), False
-    compute_loss = ComputeLoss(model)  # init loss class
+    compute_loss = ComputeLoss(model, overlap=overlap)  # init loss class
     callbacks.run('on_train_start')
     LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
                 f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
@@ -371,7 +375,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             # Forward
             with torch.cuda.amp.autocast(amp):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss(pred, targets.to(device),  masks=masks.to(device))  # loss scaled by batch_size
+                loss, loss_items = compute_loss(pred, targets.to(device),  masks=masks.to(device).float())  # loss scaled by batch_size
                 if RANK != -1:
                     loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
                 if opt.quad:
@@ -398,12 +402,12 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             # for plots
                 if mask_ratio != 1:
                     masks = F.interpolate(
-                        masks[None, :],
+                        masks[None, :].float(),
                         (imgsz, imgsz),
                         mode="bilinear",
                         align_corners=False,
                     ).squeeze(0)
-                callbacks.run('on_train_batch_end', ni, model, imgs, targets,masks, paths, plots, opt.sync_bn, None)
+                callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots, opt.sync_bn)
 
                 if callbacks.stop_training:
                     return
@@ -525,7 +529,8 @@ def parse_opt(known=False):
     parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
     parser.add_argument('--seed', type=int, default=0, help='Global training seed')
     parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
-    parser.add_argument('--mask-ratio', type=int, default=1, help='mask ratio')
+    parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory')
+    parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease')
 
     # Weights & Biases arguments
     parser.add_argument('--entity', default=None, help='W&B: Entity')
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index c80c8077baca..a142f607561e 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -377,9 +377,7 @@ def __init__(
         ]
 
         
-    def on_train_batch_end(
-        self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx
-    ):
+    def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots, sync_bn):
         # Callback runs on train batch end
         if plots and self.save_dir.exists():
             if ni == 0:
@@ -394,14 +392,9 @@ def on_train_batch_end(
                             ),
                             [],
                         )
-            if plot_idx is not None and ni in plot_idx:
-                # if ni < 3:
+            if ni < 3:
                 f = self.save_dir / f"train_batch{ni}.jpg"  # filename
-                Thread(
-                    target=plot_images_and_masks,
-                    args=(imgs, targets, masks, paths, f),
-                    daemon=True,
-                ).start()
+                plot_images_and_masks(imgs, targets, masks, paths, f)
                 if self.wandb:
                     wandb.log({"train_labels": wandb.Image(str(f))})
                 
diff --git a/utils/plots.py b/utils/plots.py
index f5cd3578929d..11a0c859e152 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -1235,12 +1235,17 @@ def plot_images_and_masks(
         if len(targets) > 0:
             idx = (targets[:, 0]).astype(int)
             image_targets = targets[idx == i]
-            # print(targets.shape)
-            # print(masks.shape)
-            image_masks = masks[idx == i]
-            # mosaic_masks
-            # mosaic_masks[block_y:block_y + h,
-            #              block_x:block_x + w, :] = image_masks
+
+            if masks.max() > 1.0:  # mean that masks are overlap
+                image_masks = masks[[i]]  # (1, 640, 640)
+                # convert masks (1, 640, 640) -> (n, 640, 640)
+                nl = len(image_targets)
+                index = np.arange(nl).reshape(nl, 1, 1) + 1
+                image_masks = np.repeat(image_masks, nl, axis=0)
+                image_masks = np.where(image_masks == index, 1.0, 0.0)
+            else:
+                image_masks = masks[idx == i]
+
             boxes = xywh2xyxy(image_targets[:, 2:6]).T
             classes = image_targets[:, 1].astype("int")
             labels = image_targets.shape[1] == 6  # labels if no conf column
diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index e74b82adae7b..e5294a5300f7 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -12,8 +12,9 @@
 
 class ComputeLoss:
     # Compute losses
-    def __init__(self, model, autobalance=False):
+    def __init__(self, model, autobalance=False, overlap=False):
         self.sort_obj_iou = False
+        self.overlap = overlap
         device = next(model.parameters()).device  # get model device
         h = model.hyp  # hyperparameters
 
@@ -141,8 +142,7 @@ def loss_segment(self, preds, targets, masks):
                     lcls += self.BCEcls(ps[:, self.nm:], t)  # BCE
 
                 # Mask Regression
-                mask_gt = masks[tidxs[i]]
-                downsampled_masks = F.interpolate(mask_gt[None, :], (mask_h, mask_w), mode="bilinear",
+                downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), mode="bilinear",
                     align_corners=False, ).squeeze(0)
 
                 mxywh = xywh[i]
@@ -155,8 +155,15 @@ def loss_segment(self, preds, targets, masks):
                 batch_lseg = torch.zeros(1, device=device)
                 for bi in b.unique():
                     index = b == bi
-                    mask_gti = downsampled_masks[index]
-                    mask_gti = mask_gti.permute(1, 2, 0).contiguous()
+                    if self.overlap:
+                        mask_index = tidxs[i][index]
+                        # h, w, n
+                        mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum())
+                        # h, w, n
+                        mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0)
+                    else:
+                        mask_gti = downsampled_masks[tidxs[i]][index]
+                        mask_gti = mask_gti.permute(1, 2, 0).contiguous()
 
                     mw, mh = mws[index], mhs[index]
                     mxyxy = mxyxys[index]
@@ -191,10 +198,6 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
         lseg = lseg.mean(dim=(0, 1)) / w / h
         return lseg.mean()
 
-    def mask_loss(self, gt_masks, preds, protos, xyxys, ws, hs):
-        """mask loss of batches."""
-        pass
-
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
@@ -257,8 +260,23 @@ def build_targets_for_masks(self, p, targets):
         gain = torch.ones(8, device=targets.device)  # normalized to gridspace gain
         ai = (
             torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt))  # same as .repeat_interleave(nt)
-        ti = (
-            torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1))  # same as .repeat_interleave(nt)
+        if self.overlap:
+            batch = p[0].shape[0]
+            ti = []
+            for i in range(batch):
+                # find number of targets of each image
+                num = (targets[:, 0] == i).sum()
+                # (na, num)
+                ti.append(
+                    torch.arange(num, device=targets.device)
+                    .float()
+                    .view(1, num)
+                    .repeat(na, 1) + 1)
+            # (na, nt)
+            ti = torch.cat(ti, 1)
+        else:
+            ti = (
+                torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1))  # same as .repeat_interleave(nt)
 
         targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2)  # append anchor indices
 

From efa5dcf567be3f4cae637f8d30e7be4df47b60e0 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Tue, 19 Jul 2022 17:12:37 +0800
Subject: [PATCH 036/247] fix mAP issue

---
 models/yolo.py   | 2 +-
 train_instseg.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/yolo.py b/models/yolo.py
index e6860a9d7435..245d65453828 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -131,7 +131,7 @@ def forward(self, x):
                 y[..., 0:5] = y[..., 0:5].sigmoid()
                 y[..., self.nm:] = y[..., self.nm:].sigmoid()
                 if self.inplace:
-                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
diff --git a/train_instseg.py b/train_instseg.py
index 29e9eb8c9536..b1ea72ff5757 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -316,7 +316,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
     last_opt_step = -1
     maps = np.zeros(nc)  # mAP per class
-    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
     scheduler.last_epoch = start_epoch - 1  # do not move
     scaler = torch.cuda.amp.GradScaler(enabled=amp)
     stopper, stop = EarlyStopping(patience=opt.patience), False

From e260a10f906e032d9b3376474020581c4fb10a25 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 19 Jul 2022 18:42:55 +0530
Subject: [PATCH 037/247] restore evaluate.py

---
 evaluate.py      | 810 +++++++++++++++++++++++++++++++++++++++++++++++
 train_instseg.py |   2 +-
 2 files changed, 811 insertions(+), 1 deletion(-)
 create mode 100644 evaluate.py

diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 000000000000..24f2e40b95db
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,810 @@
+
+import json
+from pathlib import Path
+from threading import Thread
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+# import pycocotools.mask as mask_util
+from tqdm import tqdm
+
+from models.experimental import attempt_load
+from seg_dataloaders import create_dataloader
+from utils.general import (
+    coco80_to_coco91_class,
+    increment_path,
+    colorstr, check_dataset, check_img_size, check_suffix
+)
+
+from utils.segment import (
+    non_max_suppression_masks,
+    mask_iou,
+    process_mask,
+    process_mask_upsample,
+    scale_masks,
+)
+from utils.boxes import (
+    box_iou,
+    non_max_suppression,
+    scale_coords,
+    xyxy2xywh,
+    xywh2xyxy,
+)
+from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
+from utils.seg_plots import output_to_target, plot_images_boxes_and_masks
+from utils.torch_utils import select_device, time_sync
+
+
+def save_one_txt(predn, save_conf, shape, file):
+    # Save one txt result
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (
+            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
+        )  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, "a") as f:
+            f.write(("%g " * len(line)).rstrip() % line + "\n")
+
+
+def save_one_json(predn, jdict, path, class_map, pred_masks=None):
+    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+
+    if pred_masks is not None:
+        pred_masks = np.transpose(pred_masks, (2, 0, 1))
+        rles = [
+            mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0]
+            for mask in pred_masks
+        ]
+        for rle in rles:
+            rle["counts"] = rle["counts"].decode("utf-8")
+
+    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
+        pred_dict = {
+            "image_id": image_id,
+            "category_id": class_map[int(p[5])],
+            "bbox": [round(x, 3) for x in b],
+            "score": round(p[4], 5),
+        }
+        if pred_masks is not None:
+            pred_dict["segmentation"] = rles[i]
+        jdict.append(pred_dict)
+
+
+@torch.no_grad()
+class Yolov5Evaluator:
+    def __init__(
+        self,
+        data,
+        conf_thres=0.001,
+        iou_thres=0.6,
+        device="",
+        single_cls=False,
+        augment=False,
+        verbose=False,
+        project="runs/val",
+        name="exp",
+        exist_ok=False,
+        half=True,
+        save_dir=Path(""),
+        nosave=False,
+        plots=True,
+        mask=False,
+        mask_downsample_ratio=1,
+    ) -> None:
+        self.data = check_dataset(data)  # check
+        self.conf_thres = conf_thres  # confidence threshold
+        self.iou_thres = iou_thres  # NMS IoU threshold
+        self.device = device  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        self.single_cls = single_cls  # treat as single-class dataset
+        self.augment = augment  # augmented inference
+        self.verbose = verbose  # verbose output
+        self.project = project  # save to project/name
+        self.name = name  # save to project/name
+        self.exist_ok = exist_ok  # existing project/name ok, do not increment
+        self.half = half  # use FP16 half-precision inference
+        self.save_dir = save_dir
+        self.nosave = nosave
+        self.plots = plots
+        self.mask = mask
+        self.mask_downsample_ratio = mask_downsample_ratio
+
+        self.nc = 1 if self.single_cls else int(self.data["nc"])  # number of classes
+        self.iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
+        self.niou = self.iouv.numel()
+        self.confusion_matrix = ConfusionMatrix(nc=self.nc)
+        self.dt = [0.0, 0.0, 0.0]
+        self.names = {k: v for k, v in enumerate(self.data["names"])}
+        self.s = (
+            ("%20s" + "%11s" * 10)
+            % (
+                "Class",
+                "Images",
+                "Labels",
+                "Box:{P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95}",
+                "Mask:{P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95}",
+            )
+            if self.mask
+            else ("%20s" + "%11s" * 6)
+            % (
+                "Class",
+                "Images",
+                "Labels",
+                "P",
+                "R",
+                "mAP@.5",
+                "mAP@.5:.95",
+            )
+        )
+
+        # coco stuff
+        self.is_coco = isinstance(self.data.get("val"), str) and self.data[
+            "val"
+        ].endswith(
+            "coco/val2017.txt"
+        )  # COCO dataset
+        self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
+        self.jdict = []
+        self.iou_thres = 0.65 if self.is_coco else self.iou_thres
+
+        # masks stuff
+        self.pred_masks = []  # for mask visualization
+
+        # metric stuff
+        self.seen = 0
+        self.stats = []
+        self.total_loss = torch.zeros((4 if self.mask else 3))
+        self.metric = Metrics() if self.mask else Metric()
+
+    def run_training(self, model, dataloader, compute_loss=None):
+        """This is for evaluation when training."""
+        self.seen = 0
+        self.device = next(model.parameters()).device  # get model device
+        # self.iouv.to(self.device)
+        self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device)
+        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if self.half else model.float()
+        # Configure
+        model.eval()
+
+        # inference
+        # masks will be `None` if training objection.
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
+            tqdm(dataloader, desc=self.s)
+        ):
+            # reset pred_masks
+            self.pred_masks = []
+            img = img.to(self.device, non_blocking=True)
+            targets = targets.to(self.device)
+            if masks is not None:
+                masks = masks.to(self.device)
+            out, train_out = self.inference(model, img, targets, masks, compute_loss)
+
+            # Statistics per image
+            for si, pred in enumerate(out):
+                self.seen += 1
+
+                # eval in every image level
+                labels = targets[targets[:, 0] == si, 1:]
+                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+
+                # get predition masks
+                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
+                pred_maski = self.get_predmasks(
+                    pred,
+                    proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None,
+                )
+
+                # for visualization
+                if self.plots and batch_i < 3 and pred_maski is not None:
+                    self.pred_masks.append(pred_maski.cpu())
+
+                # NOTE: eval in training image-size space
+                self.compute_stat(pred, pred_maski, labels, gt_masksi)
+
+            if self.plots and batch_i < 3:
+                self.plot_images(batch_i, img, targets, masks, out, paths)
+
+        # compute map and print it.
+        t = self.after_infer()
+
+        # Return results
+        model.float()  # for training
+        return (
+            (
+                *self.metric.mean_results(),
+                *(self.total_loss.cpu() / len(dataloader)).tolist(),
+            ),
+            self.metric.get_maps(self.nc),
+            t,
+        )
+
+    def run(
+        self,
+        weights,
+        batch_size,
+        imgsz,
+        save_txt=False,
+        save_conf=False,
+        save_json=False,
+        task="val",
+    ):
+        """This is for native evaluation."""
+        model, dataloader, imgsz = self.before_infer(
+            weights, batch_size, imgsz, save_txt, task
+        )
+        self.seen = 0
+        # self.iouv.to(self.device)
+        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if self.half else model.float()
+        # Configure
+        model.eval()
+
+        # inference
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
+            tqdm(dataloader, desc=self.s)
+        ):
+            # reset pred_masks
+            self.pred_masks = []
+            img = img.to(self.device, non_blocking=True)
+            targets = targets.to(self.device)
+            if masks is not None:
+                masks = masks.to(self.device)
+            out, train_out = self.inference(model, img, targets, masks)
+
+            # Statistics per image
+            for si, pred in enumerate(out):
+                self.seen += 1
+                path = Path(paths[si])
+                shape = shapes[si][0]
+                ratio_pad = shapes[si][1]
+
+                # eval in every image level
+                labels = targets[targets[:, 0] == si, 1:]
+                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+
+                # get predition masks
+                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
+                pred_maski = self.get_predmasks(
+                    pred,
+                    proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None,
+                )
+
+                # for visualization
+                if self.plots and batch_i < 3 and pred_maski is not None:
+                    self.pred_masks.append(pred_maski.cpu())
+
+                # NOTE: eval in training image-size space
+                self.compute_stat(pred, pred_maski, labels, gt_masksi)
+
+                # no preditions, not save anything
+                if len(pred) == 0:
+                    continue
+
+                if save_txt or save_json:
+                    # clone() is for plot_images work correctly
+                    predn = pred.clone()
+                    # 因为test时添加了0.5的padding，因此这里与数据加载的padding不一致，所以需要转入ratio_pad
+                    scale_coords(
+                        img[si].shape[1:], predn[:, :4], shape, ratio_pad
+                    )  # native-space pred
+                # Save/log
+                if save_txt and self.save_dir.exists():
+                    # NOTE: convert coords to native space when save txt.
+                    # support save box preditions only
+                    save_one_txt(
+                        predn,
+                        save_conf,
+                        shape,
+                        file=self.save_dir / "labels" / (path.stem + ".txt"),
+                    )
+                if save_json and self.save_dir.exists():
+                    # NOTE: convert coords to native space when save json.
+                    # if pred_maski is not None:
+                    # h, w, n
+                    pred_maski = scale_masks(
+                        img[si].shape[1:],
+                        pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
+                        shape,
+                        ratio_pad,
+                    )
+                    save_one_json(
+                        predn,
+                        self.jdict,
+                        path,
+                        self.class_map,
+                        pred_maski,
+                    )  # append to COCO-JSON dictionary
+
+            if self.plots and batch_i < 3:
+                self.plot_images(batch_i, img, targets, masks, out, paths)
+
+        # compute map and print it.
+        t = self.after_infer()
+
+        # save json
+        if self.save_dir.exists() and save_json:
+            pred_json = str(self.save_dir / f"predictions.json")  # predictions json
+            print(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
+            with open(pred_json, "w") as f:
+                json.dump(self.jdict, f)
+
+        # Print speeds
+        shape = (batch_size, 3, imgsz, imgsz)
+        print(
+            f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}"
+            % t
+        )
+
+        s = (
+            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}"
+            if save_txt and self.save_dir.exists()
+            else ""
+        )
+        print(
+            f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}"
+        )
+
+        # Return results
+        return (
+            (
+                *self.metric.mean_results(),
+                *(self.total_loss.cpu() / len(dataloader)).tolist(),
+            ),
+            self.metric.get_maps(self.nc),
+            t,
+        )
+
+    def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
+        "prepare for evaluation without training."
+        self.device = select_device(self.device, batch_size=batch_size)
+
+        # Directories
+        self.save_dir = increment_path(
+            Path(self.project) / self.name, exist_ok=self.exist_ok
+        )  # increment run
+        if not self.nosave:
+            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(
+                parents=True, exist_ok=True
+            )  # make dir
+
+        # Load model
+        check_suffix(weights, ".pt")
+        model = attempt_load(weights, map_location=self.device)  # load FP32 model
+        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+        imgsz = check_img_size(imgsz, s=gs)  # check image size
+
+        # Data
+        if self.device.type != "cpu":
+            model(
+                torch.zeros(1, 3, imgsz, imgsz)
+                .to(self.device)
+                .type_as(next(model.parameters()))
+            )  # run once
+        pad = 0.0 if task == "speed" else 0.5
+        task = (
+            task if task in ("train", "val", "test") else "val"
+        )  # path to train/val/test images
+        dataloader = create_dataloader(
+            self.data[task],
+            imgsz,
+            batch_size,
+            gs,
+            self.single_cls,
+            pad=pad,
+            rect=True,
+            prefix=colorstr(f"{task}: "),
+            mask_head=self.mask,
+            mask_downsample_ratio=self.mask_downsample_ratio,
+        )[0]
+        return model, dataloader, imgsz
+
+    def inference(self, model, img, targets, masks=None, compute_loss=None):
+        """Inference"""
+        t1 = time_sync()
+        img = img.half() if self.half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        _, _, height, width = img.shape  # batch size, channels, height, width
+        t2 = time_sync()
+        self.dt[0] += t2 - t1
+
+        # Run model
+        out, train_out = model(
+            img, augment=self.augment
+        )  # inference and training outputs
+        self.dt[1] += time_sync() - t2
+
+        # Compute loss
+        if compute_loss:
+            self.total_loss += compute_loss(train_out, targets, masks)[
+                1
+            ]  # box, obj, cls
+
+        # Run NMS
+        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(
+            self.device
+        )  # to pixels
+        t3 = time_sync()
+        out = self.nms(
+            prediction=out,
+            conf_thres=self.conf_thres,
+            iou_thres=self.iou_thres,
+            multi_label=True,
+            agnostic=self.single_cls,
+        )
+        self.dt[2] += time_sync() - t3
+        return out, train_out
+
+    def after_infer(self):
+        """Do something after inference, such as plots and get metrics.
+        Return:
+            t(tuple): speeds of per image.
+        """
+        # Plot confusion matrix
+        if self.plots and self.save_dir.exists():
+            self.confusion_matrix.plot(
+                save_dir=self.save_dir, names=list(self.names.values())
+            )
+
+        # Compute statistics
+        stats = [np.concatenate(x, 0) for x in zip(*self.stats)]  # to numpy
+        box_or_mask_any = stats[0].any() or stats[1].any()
+        stats = stats[1:] if not self.mask else stats
+        if len(stats) and box_or_mask_any:
+            results = self.ap_per_class(
+                *stats,
+                self.plots,
+                self.save_dir if self.save_dir.exists() else None,
+                self.names,
+            )
+            self.metric.update(results)
+            nt = np.bincount(
+                stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc
+            )  # number of targets per class
+        else:
+            nt = torch.zeros(1)
+
+        # make this empty, cause make `stats` self is for reduce some duplicated codes.
+        self.stats = []
+        # print information
+        self.print_metric(nt, stats)
+        t = tuple(x / self.seen * 1e3 for x in self.dt)  # speeds per image
+        return t
+
+    def process_batch(self, detections, labels, iouv):
+        """
+        Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+        Arguments:
+            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+            labels (Array[M, 5]), class, x1, y1, x2, y2
+        Returns:
+            correct (Array[N, 10]), for 10 IoU levels
+        """
+        correct = torch.zeros(
+            detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device
+        )
+        iou = box_iou(labels[:, 1:], detections[:, :4])
+        x = torch.where(
+            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])
+        )  # IoU above threshold and classes match
+        if x[0].shape[0]:
+            matches = (
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )  # [label, detection, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            matches = torch.Tensor(matches).to(iouv.device)
+            correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
+        return correct
+
+    def get_predmasks(self, pred, proto_out, gt_shape):
+        """Get pred masks in different ways.
+        1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size)
+            mask for saving cuda memory.
+        2. process_mask_upsample, for val after training to get high quality mask(original size).
+
+        Args:
+            pred(torch.Tensor): output of network, (N, 5 + mask_dim + class).
+            proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w).
+            gt_shape(tuple): shape of gt mask, this shape may not equal to input size of
+                input image, Cause the mask_downsample_ratio.
+        Return:
+            pred_mask(torch.Tensor): predition of final masks with the same size with
+                input image, (N, input_h, input_w).
+        """
+        if proto_out is None or len(pred) == 0:
+            return None
+        process = process_mask_upsample if self.plots else process_mask
+        gt_shape = (
+            gt_shape[0] * self.mask_downsample_ratio,
+            gt_shape[1] * self.mask_downsample_ratio,
+        )
+        # n, h, w
+        pred_mask = (
+            process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape)
+            .permute(2, 0, 1)
+            .contiguous()
+        )
+        return pred_mask
+
+    def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
+        assert not (
+            (pred_maski is None) ^ (gt_masksi is None)
+        ), "`proto_out` and `gt_masksi` should be both None or both exist."
+        if pred_maski is None and gt_masksi is None:
+            return torch.zeros(0, self.niou, dtype=torch.bool)
+
+        correct = torch.zeros(
+            predn.shape[0],
+            self.iouv.shape[0],
+            dtype=torch.bool,
+            device=self.iouv.device,
+        )
+
+        if not self.plots:
+            gt_masksi = F.interpolate(
+                gt_masksi.unsqueeze(0),
+                pred_maski.shape[1:],
+                mode="bilinear",
+                align_corners=False,
+            ).squeeze(0)
+
+        iou = mask_iou(
+            gt_masksi.view(gt_masksi.shape[0], -1),
+            pred_maski.view(pred_maski.shape[0], -1),
+        )
+        x = torch.where(
+            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])
+        )  # IoU above threshold and classes match
+        if x[0].shape[0]:
+            matches = (
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
+                .cpu()
+                .numpy()
+            )  # [label, detection, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            matches = torch.Tensor(matches).to(self.iouv.device)
+            correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv
+        return correct
+
+    def compute_stat(self, predn, pred_maski, labels, gt_maski):
+        """Compute states about ious. with boxs size in training img-size space."""
+        nl = len(labels)
+        tcls = labels[:, 0].tolist() if nl else []  # target class
+
+        if len(predn) == 0:
+            if nl:
+                self.stats.append(
+                    (
+                        torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
+                        torch.zeros(0, self.niou, dtype=torch.bool),  # masks
+                        torch.Tensor(),
+                        torch.Tensor(),
+                        tcls,
+                    )
+                )
+            return
+
+        # Predictions
+        if self.single_cls:
+            predn[:, 5] = 0
+
+        # Evaluate
+        if nl:
+            tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+            labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+            # boxes
+            correct_boxes = self.process_batch(predn, labelsn, self.iouv)
+
+            # masks
+            correct_masks = self.process_batch_masks(
+                predn, pred_maski, gt_maski, labelsn
+            )
+
+            if self.plots:
+                self.confusion_matrix.process_batch(predn, labelsn)
+        else:
+            correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
+            correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
+        self.stats.append(
+            (
+                correct_masks.cpu(),
+                correct_boxes.cpu(),
+                predn[:, 4].cpu(),
+                predn[:, 5].cpu(),
+                tcls,
+            )
+        )  # (correct, conf, pcls, tcls)
+
+    def print_metric(self, nt, stats):
+        # Print results
+        pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4)
+        print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results()))
+
+        # Print results per class
+        # TODO: self.seen support verbose.
+        if self.verbose and self.nc > 1 and len(stats):
+            for i, c in enumerate(self.metric.ap_class_index):
+                print(
+                    pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))
+                )
+
+    def plot_images(self, i, img, targets, masks, out, paths):
+        if not self.save_dir.exists():
+            return
+        # plot ground truth
+        f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
+
+        Thread(
+            target=plot_images_boxes_and_masks,
+            args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
+            daemon=True,
+        ).start()
+        f = self.save_dir / f"val_batch{i}_pred.jpg"  # predictions
+
+        # plot predition
+        if len(self.pred_masks):
+            pred_masks = (
+                torch.cat(self.pred_masks, dim=0)
+                if len(self.pred_masks) > 1
+                else self.pred_masks[0]
+            )
+        else:
+            pred_masks = None
+        Thread(
+            target=plot_images_boxes_and_masks,
+            args=(
+                img,
+                output_to_target(out),
+                pred_masks,
+                paths,
+                f,
+                self.names,
+                max(img.shape[2:]),
+            ),
+            daemon=True,
+        ).start()
+
+    def nms(self, **kwargs):
+        return (
+            non_max_suppression_masks(**kwargs)
+            if self.mask
+            else non_max_suppression(**kwargs)
+        )
+
+    def ap_per_class(self, *args):
+        return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args)
+
+
+class Metric:
+    def __init__(self) -> None:
+        self.p = []  # (nc, )
+        self.r = []  # (nc, )
+        self.f1 = []  # (nc, )
+        self.all_ap = []  # (nc, 10)
+        self.ap_class_index = []  # (nc, )
+
+    @property
+    def ap50(self):
+        """AP@0.5 of all classes.
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap[:, 0] if len(self.all_ap) else []
+
+    @property
+    def ap(self):
+        """AP@0.5:0.95
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap.mean(1) if len(self.all_ap) else []
+
+    @property
+    def mp(self):
+        """mean precision of all classes.
+        Return:
+            float.
+        """
+        return self.p.mean() if len(self.p) else 0.0
+
+    @property
+    def mr(self):
+        """mean recall of all classes.
+        Return:
+            float.
+        """
+        return self.r.mean() if len(self.r) else 0.0
+
+    @property
+    def map50(self):
+        """Mean AP@0.5 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
+
+    @property
+    def map(self):
+        """Mean AP@0.5:0.95 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap.mean() if len(self.all_ap) else 0.0
+
+    def mean_results(self):
+        """Mean of results, return mp, mr, map50, map"""
+        return (self.mp, self.mr, self.map50, self.map)
+
+    def class_result(self, i):
+        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
+        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
+
+    def get_maps(self, nc):
+        maps = np.zeros(nc) + self.map
+        for i, c in enumerate(self.ap_class_index):
+            maps[c] = self.ap[i]
+        return maps
+
+    def update(self, results):
+        """
+        Args:
+            results: tuple(p, r, ap, f1, ap_class)
+        """
+        p, r, all_ap, f1, ap_class_index = results
+        self.p = p
+        self.r = r
+        self.all_ap = all_ap
+        self.f1 = f1
+        self.ap_class_index = ap_class_index
+
+
+class Metrics:
+    """Metric for boxes and masks."""
+
+    def __init__(self) -> None:
+        self.metric_box = Metric()
+        self.metric_mask = Metric()
+
+    def update(self, results):
+        """
+        Args:
+            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
+        """
+        self.metric_box.update(list(results["boxes"].values()))
+        self.metric_mask.update(list(results["masks"].values()))
+
+    def mean_results(self):
+        return self.metric_box.mean_results() + self.metric_mask.mean_results()
+
+    def class_result(self, i):
+        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
+
+    def get_maps(self, nc):
+        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
+
+    @property
+    def ap_class_index(self):
+        # boxes and masks have the same ap_class_index
+        return self.metric_box.ap_class_index
diff --git a/train_instseg.py b/train_instseg.py
index f5ef6b15a580..b3c699c182e9 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -66,7 +66,7 @@
 from torch.optim import AdamW
 import yaml
 from datetime import datetime
-from eval_seg import Yolov5Evaluator
+from evaluate import Yolov5Evaluator
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
     print(device)

From 0a5944a8aea1cff40d9379b7e935bfd4bc61f9cc Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 19 Jul 2022 18:43:55 +0530
Subject: [PATCH 038/247] restore evaluator

---
 evaluate.py                 | 810 ------------------------------------
 eval_seg.py => evaluator.py |   0
 train_instseg.py            |   2 +-
 3 files changed, 1 insertion(+), 811 deletions(-)
 delete mode 100644 evaluate.py
 rename eval_seg.py => evaluator.py (100%)

diff --git a/evaluate.py b/evaluate.py
deleted file mode 100644
index 24f2e40b95db..000000000000
--- a/evaluate.py
+++ /dev/null
@@ -1,810 +0,0 @@
-
-import json
-from pathlib import Path
-from threading import Thread
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-# import pycocotools.mask as mask_util
-from tqdm import tqdm
-
-from models.experimental import attempt_load
-from seg_dataloaders import create_dataloader
-from utils.general import (
-    coco80_to_coco91_class,
-    increment_path,
-    colorstr, check_dataset, check_img_size, check_suffix
-)
-
-from utils.segment import (
-    non_max_suppression_masks,
-    mask_iou,
-    process_mask,
-    process_mask_upsample,
-    scale_masks,
-)
-from utils.boxes import (
-    box_iou,
-    non_max_suppression,
-    scale_coords,
-    xyxy2xywh,
-    xywh2xyxy,
-)
-from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
-from utils.seg_plots import output_to_target, plot_images_boxes_and_masks
-from utils.torch_utils import select_device, time_sync
-
-
-def save_one_txt(predn, save_conf, shape, file):
-    # Save one txt result
-    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
-    for *xyxy, conf, cls in predn.tolist():
-        xywh = (
-            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
-        )  # normalized xywh
-        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-        with open(file, "a") as f:
-            f.write(("%g " * len(line)).rstrip() % line + "\n")
-
-
-def save_one_json(predn, jdict, path, class_map, pred_masks=None):
-    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
-    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
-    box = xyxy2xywh(predn[:, :4])  # xywh
-    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-
-    if pred_masks is not None:
-        pred_masks = np.transpose(pred_masks, (2, 0, 1))
-        rles = [
-            mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0]
-            for mask in pred_masks
-        ]
-        for rle in rles:
-            rle["counts"] = rle["counts"].decode("utf-8")
-
-    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-        pred_dict = {
-            "image_id": image_id,
-            "category_id": class_map[int(p[5])],
-            "bbox": [round(x, 3) for x in b],
-            "score": round(p[4], 5),
-        }
-        if pred_masks is not None:
-            pred_dict["segmentation"] = rles[i]
-        jdict.append(pred_dict)
-
-
-@torch.no_grad()
-class Yolov5Evaluator:
-    def __init__(
-        self,
-        data,
-        conf_thres=0.001,
-        iou_thres=0.6,
-        device="",
-        single_cls=False,
-        augment=False,
-        verbose=False,
-        project="runs/val",
-        name="exp",
-        exist_ok=False,
-        half=True,
-        save_dir=Path(""),
-        nosave=False,
-        plots=True,
-        mask=False,
-        mask_downsample_ratio=1,
-    ) -> None:
-        self.data = check_dataset(data)  # check
-        self.conf_thres = conf_thres  # confidence threshold
-        self.iou_thres = iou_thres  # NMS IoU threshold
-        self.device = device  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        self.single_cls = single_cls  # treat as single-class dataset
-        self.augment = augment  # augmented inference
-        self.verbose = verbose  # verbose output
-        self.project = project  # save to project/name
-        self.name = name  # save to project/name
-        self.exist_ok = exist_ok  # existing project/name ok, do not increment
-        self.half = half  # use FP16 half-precision inference
-        self.save_dir = save_dir
-        self.nosave = nosave
-        self.plots = plots
-        self.mask = mask
-        self.mask_downsample_ratio = mask_downsample_ratio
-
-        self.nc = 1 if self.single_cls else int(self.data["nc"])  # number of classes
-        self.iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
-        self.niou = self.iouv.numel()
-        self.confusion_matrix = ConfusionMatrix(nc=self.nc)
-        self.dt = [0.0, 0.0, 0.0]
-        self.names = {k: v for k, v in enumerate(self.data["names"])}
-        self.s = (
-            ("%20s" + "%11s" * 10)
-            % (
-                "Class",
-                "Images",
-                "Labels",
-                "Box:{P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95}",
-                "Mask:{P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95}",
-            )
-            if self.mask
-            else ("%20s" + "%11s" * 6)
-            % (
-                "Class",
-                "Images",
-                "Labels",
-                "P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95",
-            )
-        )
-
-        # coco stuff
-        self.is_coco = isinstance(self.data.get("val"), str) and self.data[
-            "val"
-        ].endswith(
-            "coco/val2017.txt"
-        )  # COCO dataset
-        self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
-        self.jdict = []
-        self.iou_thres = 0.65 if self.is_coco else self.iou_thres
-
-        # masks stuff
-        self.pred_masks = []  # for mask visualization
-
-        # metric stuff
-        self.seen = 0
-        self.stats = []
-        self.total_loss = torch.zeros((4 if self.mask else 3))
-        self.metric = Metrics() if self.mask else Metric()
-
-    def run_training(self, model, dataloader, compute_loss=None):
-        """This is for evaluation when training."""
-        self.seen = 0
-        self.device = next(model.parameters()).device  # get model device
-        # self.iouv.to(self.device)
-        self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device)
-        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
-        model.half() if self.half else model.float()
-        # Configure
-        model.eval()
-
-        # inference
-        # masks will be `None` if training objection.
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
-            tqdm(dataloader, desc=self.s)
-        ):
-            # reset pred_masks
-            self.pred_masks = []
-            img = img.to(self.device, non_blocking=True)
-            targets = targets.to(self.device)
-            if masks is not None:
-                masks = masks.to(self.device)
-            out, train_out = self.inference(model, img, targets, masks, compute_loss)
-
-            # Statistics per image
-            for si, pred in enumerate(out):
-                self.seen += 1
-
-                # eval in every image level
-                labels = targets[targets[:, 0] == si, 1:]
-                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
-
-                # get predition masks
-                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(
-                    pred,
-                    proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None,
-                )
-
-                # for visualization
-                if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski.cpu())
-
-                # NOTE: eval in training image-size space
-                self.compute_stat(pred, pred_maski, labels, gt_masksi)
-
-            if self.plots and batch_i < 3:
-                self.plot_images(batch_i, img, targets, masks, out, paths)
-
-        # compute map and print it.
-        t = self.after_infer()
-
-        # Return results
-        model.float()  # for training
-        return (
-            (
-                *self.metric.mean_results(),
-                *(self.total_loss.cpu() / len(dataloader)).tolist(),
-            ),
-            self.metric.get_maps(self.nc),
-            t,
-        )
-
-    def run(
-        self,
-        weights,
-        batch_size,
-        imgsz,
-        save_txt=False,
-        save_conf=False,
-        save_json=False,
-        task="val",
-    ):
-        """This is for native evaluation."""
-        model, dataloader, imgsz = self.before_infer(
-            weights, batch_size, imgsz, save_txt, task
-        )
-        self.seen = 0
-        # self.iouv.to(self.device)
-        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
-        model.half() if self.half else model.float()
-        # Configure
-        model.eval()
-
-        # inference
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
-            tqdm(dataloader, desc=self.s)
-        ):
-            # reset pred_masks
-            self.pred_masks = []
-            img = img.to(self.device, non_blocking=True)
-            targets = targets.to(self.device)
-            if masks is not None:
-                masks = masks.to(self.device)
-            out, train_out = self.inference(model, img, targets, masks)
-
-            # Statistics per image
-            for si, pred in enumerate(out):
-                self.seen += 1
-                path = Path(paths[si])
-                shape = shapes[si][0]
-                ratio_pad = shapes[si][1]
-
-                # eval in every image level
-                labels = targets[targets[:, 0] == si, 1:]
-                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
-
-                # get predition masks
-                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(
-                    pred,
-                    proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None,
-                )
-
-                # for visualization
-                if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski.cpu())
-
-                # NOTE: eval in training image-size space
-                self.compute_stat(pred, pred_maski, labels, gt_masksi)
-
-                # no preditions, not save anything
-                if len(pred) == 0:
-                    continue
-
-                if save_txt or save_json:
-                    # clone() is for plot_images work correctly
-                    predn = pred.clone()
-                    # 因为test时添加了0.5的padding，因此这里与数据加载的padding不一致，所以需要转入ratio_pad
-                    scale_coords(
-                        img[si].shape[1:], predn[:, :4], shape, ratio_pad
-                    )  # native-space pred
-                # Save/log
-                if save_txt and self.save_dir.exists():
-                    # NOTE: convert coords to native space when save txt.
-                    # support save box preditions only
-                    save_one_txt(
-                        predn,
-                        save_conf,
-                        shape,
-                        file=self.save_dir / "labels" / (path.stem + ".txt"),
-                    )
-                if save_json and self.save_dir.exists():
-                    # NOTE: convert coords to native space when save json.
-                    # if pred_maski is not None:
-                    # h, w, n
-                    pred_maski = scale_masks(
-                        img[si].shape[1:],
-                        pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
-                        shape,
-                        ratio_pad,
-                    )
-                    save_one_json(
-                        predn,
-                        self.jdict,
-                        path,
-                        self.class_map,
-                        pred_maski,
-                    )  # append to COCO-JSON dictionary
-
-            if self.plots and batch_i < 3:
-                self.plot_images(batch_i, img, targets, masks, out, paths)
-
-        # compute map and print it.
-        t = self.after_infer()
-
-        # save json
-        if self.save_dir.exists() and save_json:
-            pred_json = str(self.save_dir / f"predictions.json")  # predictions json
-            print(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
-            with open(pred_json, "w") as f:
-                json.dump(self.jdict, f)
-
-        # Print speeds
-        shape = (batch_size, 3, imgsz, imgsz)
-        print(
-            f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}"
-            % t
-        )
-
-        s = (
-            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}"
-            if save_txt and self.save_dir.exists()
-            else ""
-        )
-        print(
-            f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}"
-        )
-
-        # Return results
-        return (
-            (
-                *self.metric.mean_results(),
-                *(self.total_loss.cpu() / len(dataloader)).tolist(),
-            ),
-            self.metric.get_maps(self.nc),
-            t,
-        )
-
-    def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
-        "prepare for evaluation without training."
-        self.device = select_device(self.device, batch_size=batch_size)
-
-        # Directories
-        self.save_dir = increment_path(
-            Path(self.project) / self.name, exist_ok=self.exist_ok
-        )  # increment run
-        if not self.nosave:
-            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(
-                parents=True, exist_ok=True
-            )  # make dir
-
-        # Load model
-        check_suffix(weights, ".pt")
-        model = attempt_load(weights, map_location=self.device)  # load FP32 model
-        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-        imgsz = check_img_size(imgsz, s=gs)  # check image size
-
-        # Data
-        if self.device.type != "cpu":
-            model(
-                torch.zeros(1, 3, imgsz, imgsz)
-                .to(self.device)
-                .type_as(next(model.parameters()))
-            )  # run once
-        pad = 0.0 if task == "speed" else 0.5
-        task = (
-            task if task in ("train", "val", "test") else "val"
-        )  # path to train/val/test images
-        dataloader = create_dataloader(
-            self.data[task],
-            imgsz,
-            batch_size,
-            gs,
-            self.single_cls,
-            pad=pad,
-            rect=True,
-            prefix=colorstr(f"{task}: "),
-            mask_head=self.mask,
-            mask_downsample_ratio=self.mask_downsample_ratio,
-        )[0]
-        return model, dataloader, imgsz
-
-    def inference(self, model, img, targets, masks=None, compute_loss=None):
-        """Inference"""
-        t1 = time_sync()
-        img = img.half() if self.half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        _, _, height, width = img.shape  # batch size, channels, height, width
-        t2 = time_sync()
-        self.dt[0] += t2 - t1
-
-        # Run model
-        out, train_out = model(
-            img, augment=self.augment
-        )  # inference and training outputs
-        self.dt[1] += time_sync() - t2
-
-        # Compute loss
-        if compute_loss:
-            self.total_loss += compute_loss(train_out, targets, masks)[
-                1
-            ]  # box, obj, cls
-
-        # Run NMS
-        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(
-            self.device
-        )  # to pixels
-        t3 = time_sync()
-        out = self.nms(
-            prediction=out,
-            conf_thres=self.conf_thres,
-            iou_thres=self.iou_thres,
-            multi_label=True,
-            agnostic=self.single_cls,
-        )
-        self.dt[2] += time_sync() - t3
-        return out, train_out
-
-    def after_infer(self):
-        """Do something after inference, such as plots and get metrics.
-        Return:
-            t(tuple): speeds of per image.
-        """
-        # Plot confusion matrix
-        if self.plots and self.save_dir.exists():
-            self.confusion_matrix.plot(
-                save_dir=self.save_dir, names=list(self.names.values())
-            )
-
-        # Compute statistics
-        stats = [np.concatenate(x, 0) for x in zip(*self.stats)]  # to numpy
-        box_or_mask_any = stats[0].any() or stats[1].any()
-        stats = stats[1:] if not self.mask else stats
-        if len(stats) and box_or_mask_any:
-            results = self.ap_per_class(
-                *stats,
-                self.plots,
-                self.save_dir if self.save_dir.exists() else None,
-                self.names,
-            )
-            self.metric.update(results)
-            nt = np.bincount(
-                stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc
-            )  # number of targets per class
-        else:
-            nt = torch.zeros(1)
-
-        # make this empty, cause make `stats` self is for reduce some duplicated codes.
-        self.stats = []
-        # print information
-        self.print_metric(nt, stats)
-        t = tuple(x / self.seen * 1e3 for x in self.dt)  # speeds per image
-        return t
-
-    def process_batch(self, detections, labels, iouv):
-        """
-        Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
-        Arguments:
-            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-            labels (Array[M, 5]), class, x1, y1, x2, y2
-        Returns:
-            correct (Array[N, 10]), for 10 IoU levels
-        """
-        correct = torch.zeros(
-            detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device
-        )
-        iou = box_iou(labels[:, 1:], detections[:, :4])
-        x = torch.where(
-            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])
-        )  # IoU above threshold and classes match
-        if x[0].shape[0]:
-            matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )  # [label, detection, iou]
-            if x[0].shape[0] > 1:
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-                # matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-            matches = torch.Tensor(matches).to(iouv.device)
-            correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
-        return correct
-
-    def get_predmasks(self, pred, proto_out, gt_shape):
-        """Get pred masks in different ways.
-        1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size)
-            mask for saving cuda memory.
-        2. process_mask_upsample, for val after training to get high quality mask(original size).
-
-        Args:
-            pred(torch.Tensor): output of network, (N, 5 + mask_dim + class).
-            proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w).
-            gt_shape(tuple): shape of gt mask, this shape may not equal to input size of
-                input image, Cause the mask_downsample_ratio.
-        Return:
-            pred_mask(torch.Tensor): predition of final masks with the same size with
-                input image, (N, input_h, input_w).
-        """
-        if proto_out is None or len(pred) == 0:
-            return None
-        process = process_mask_upsample if self.plots else process_mask
-        gt_shape = (
-            gt_shape[0] * self.mask_downsample_ratio,
-            gt_shape[1] * self.mask_downsample_ratio,
-        )
-        # n, h, w
-        pred_mask = (
-            process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape)
-            .permute(2, 0, 1)
-            .contiguous()
-        )
-        return pred_mask
-
-    def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
-        assert not (
-            (pred_maski is None) ^ (gt_masksi is None)
-        ), "`proto_out` and `gt_masksi` should be both None or both exist."
-        if pred_maski is None and gt_masksi is None:
-            return torch.zeros(0, self.niou, dtype=torch.bool)
-
-        correct = torch.zeros(
-            predn.shape[0],
-            self.iouv.shape[0],
-            dtype=torch.bool,
-            device=self.iouv.device,
-        )
-
-        if not self.plots:
-            gt_masksi = F.interpolate(
-                gt_masksi.unsqueeze(0),
-                pred_maski.shape[1:],
-                mode="bilinear",
-                align_corners=False,
-            ).squeeze(0)
-
-        iou = mask_iou(
-            gt_masksi.view(gt_masksi.shape[0], -1),
-            pred_maski.view(pred_maski.shape[0], -1),
-        )
-        x = torch.where(
-            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])
-        )  # IoU above threshold and classes match
-        if x[0].shape[0]:
-            matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )  # [label, detection, iou]
-            if x[0].shape[0] > 1:
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-                # matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-            matches = torch.Tensor(matches).to(self.iouv.device)
-            correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv
-        return correct
-
-    def compute_stat(self, predn, pred_maski, labels, gt_maski):
-        """Compute states about ious. with boxs size in training img-size space."""
-        nl = len(labels)
-        tcls = labels[:, 0].tolist() if nl else []  # target class
-
-        if len(predn) == 0:
-            if nl:
-                self.stats.append(
-                    (
-                        torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
-                        torch.zeros(0, self.niou, dtype=torch.bool),  # masks
-                        torch.Tensor(),
-                        torch.Tensor(),
-                        tcls,
-                    )
-                )
-            return
-
-        # Predictions
-        if self.single_cls:
-            predn[:, 5] = 0
-
-        # Evaluate
-        if nl:
-            tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
-            labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
-            # boxes
-            correct_boxes = self.process_batch(predn, labelsn, self.iouv)
-
-            # masks
-            correct_masks = self.process_batch_masks(
-                predn, pred_maski, gt_maski, labelsn
-            )
-
-            if self.plots:
-                self.confusion_matrix.process_batch(predn, labelsn)
-        else:
-            correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
-            correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
-        self.stats.append(
-            (
-                correct_masks.cpu(),
-                correct_boxes.cpu(),
-                predn[:, 4].cpu(),
-                predn[:, 5].cpu(),
-                tcls,
-            )
-        )  # (correct, conf, pcls, tcls)
-
-    def print_metric(self, nt, stats):
-        # Print results
-        pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4)
-        print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results()))
-
-        # Print results per class
-        # TODO: self.seen support verbose.
-        if self.verbose and self.nc > 1 and len(stats):
-            for i, c in enumerate(self.metric.ap_class_index):
-                print(
-                    pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))
-                )
-
-    def plot_images(self, i, img, targets, masks, out, paths):
-        if not self.save_dir.exists():
-            return
-        # plot ground truth
-        f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
-
-        Thread(
-            target=plot_images_boxes_and_masks,
-            args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
-            daemon=True,
-        ).start()
-        f = self.save_dir / f"val_batch{i}_pred.jpg"  # predictions
-
-        # plot predition
-        if len(self.pred_masks):
-            pred_masks = (
-                torch.cat(self.pred_masks, dim=0)
-                if len(self.pred_masks) > 1
-                else self.pred_masks[0]
-            )
-        else:
-            pred_masks = None
-        Thread(
-            target=plot_images_boxes_and_masks,
-            args=(
-                img,
-                output_to_target(out),
-                pred_masks,
-                paths,
-                f,
-                self.names,
-                max(img.shape[2:]),
-            ),
-            daemon=True,
-        ).start()
-
-    def nms(self, **kwargs):
-        return (
-            non_max_suppression_masks(**kwargs)
-            if self.mask
-            else non_max_suppression(**kwargs)
-        )
-
-    def ap_per_class(self, *args):
-        return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args)
-
-
-class Metric:
-    def __init__(self) -> None:
-        self.p = []  # (nc, )
-        self.r = []  # (nc, )
-        self.f1 = []  # (nc, )
-        self.all_ap = []  # (nc, 10)
-        self.ap_class_index = []  # (nc, )
-
-    @property
-    def ap50(self):
-        """AP@0.5 of all classes.
-        Return:
-            (nc, ) or [].
-        """
-        return self.all_ap[:, 0] if len(self.all_ap) else []
-
-    @property
-    def ap(self):
-        """AP@0.5:0.95
-        Return:
-            (nc, ) or [].
-        """
-        return self.all_ap.mean(1) if len(self.all_ap) else []
-
-    @property
-    def mp(self):
-        """mean precision of all classes.
-        Return:
-            float.
-        """
-        return self.p.mean() if len(self.p) else 0.0
-
-    @property
-    def mr(self):
-        """mean recall of all classes.
-        Return:
-            float.
-        """
-        return self.r.mean() if len(self.r) else 0.0
-
-    @property
-    def map50(self):
-        """Mean AP@0.5 of all classes.
-        Return:
-            float.
-        """
-        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
-
-    @property
-    def map(self):
-        """Mean AP@0.5:0.95 of all classes.
-        Return:
-            float.
-        """
-        return self.all_ap.mean() if len(self.all_ap) else 0.0
-
-    def mean_results(self):
-        """Mean of results, return mp, mr, map50, map"""
-        return (self.mp, self.mr, self.map50, self.map)
-
-    def class_result(self, i):
-        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
-        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
-
-    def get_maps(self, nc):
-        maps = np.zeros(nc) + self.map
-        for i, c in enumerate(self.ap_class_index):
-            maps[c] = self.ap[i]
-        return maps
-
-    def update(self, results):
-        """
-        Args:
-            results: tuple(p, r, ap, f1, ap_class)
-        """
-        p, r, all_ap, f1, ap_class_index = results
-        self.p = p
-        self.r = r
-        self.all_ap = all_ap
-        self.f1 = f1
-        self.ap_class_index = ap_class_index
-
-
-class Metrics:
-    """Metric for boxes and masks."""
-
-    def __init__(self) -> None:
-        self.metric_box = Metric()
-        self.metric_mask = Metric()
-
-    def update(self, results):
-        """
-        Args:
-            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
-        """
-        self.metric_box.update(list(results["boxes"].values()))
-        self.metric_mask.update(list(results["masks"].values()))
-
-    def mean_results(self):
-        return self.metric_box.mean_results() + self.metric_mask.mean_results()
-
-    def class_result(self, i):
-        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
-
-    def get_maps(self, nc):
-        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
-
-    @property
-    def ap_class_index(self):
-        # boxes and masks have the same ap_class_index
-        return self.metric_box.ap_class_index
diff --git a/eval_seg.py b/evaluator.py
similarity index 100%
rename from eval_seg.py
rename to evaluator.py
diff --git a/train_instseg.py b/train_instseg.py
index b3c699c182e9..b141037d7e18 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -66,7 +66,7 @@
 from torch.optim import AdamW
 import yaml
 from datetime import datetime
-from evaluate import Yolov5Evaluator
+from evaluator import Yolov5Evaluator
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
     print(device)

From bf82d7452023111108cfeac935681867322628f2 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 19 Jul 2022 18:47:56 +0530
Subject: [PATCH 039/247] fix conflicts with laughing-q

---
 evaluator.py | 451 +++++++++++++++------------------------------------
 1 file changed, 128 insertions(+), 323 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 24f2e40b95db..3e5e3ded21f0 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -1,3 +1,12 @@
+# TODO:  Optimize plotting, losses & merge with val.py
+
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Validate a trained YOLOv5 model accuracy on a custom dataset
+
+Usage:
+    $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640
+"""
 
 import json
 from pathlib import Path
@@ -6,33 +15,18 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
+from PIL import Image
 # import pycocotools.mask as mask_util
 from tqdm import tqdm
 
 from models.experimental import attempt_load
 from seg_dataloaders import create_dataloader
-from utils.general import (
-    coco80_to_coco91_class,
-    increment_path,
-    colorstr, check_dataset, check_img_size, check_suffix
-)
-
-from utils.segment import (
-    non_max_suppression_masks,
-    mask_iou,
-    process_mask,
-    process_mask_upsample,
-    scale_masks,
-)
-from utils.boxes import (
-    box_iou,
-    non_max_suppression,
-    scale_coords,
-    xyxy2xywh,
-    xywh2xyxy,
-)
+from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, )
+from utils.general import (check_dataset, check_img_size, check_suffix, )
+from utils.general import (coco80_to_coco91_class, increment_path, colorstr, )
+from utils.plots import output_to_target, plot_images_boxes_and_masks
 from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
-from utils.seg_plots import output_to_target, plot_images_boxes_and_masks
+from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, )
 from utils.torch_utils import select_device, time_sync
 
 
@@ -40,9 +34,7 @@ def save_one_txt(predn, save_conf, shape, file):
     # Save one txt result
     gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
     for *xyxy, conf, cls in predn.tolist():
-        xywh = (
-            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
-        )  # normalized xywh
+        xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
         line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
         with open(file, "a") as f:
             f.write(("%g " * len(line)).rstrip() % line + "\n")
@@ -56,20 +48,13 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 
     if pred_masks is not None:
         pred_masks = np.transpose(pred_masks, (2, 0, 1))
-        rles = [
-            mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0]
-            for mask in pred_masks
-        ]
+        rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
         for rle in rles:
             rle["counts"] = rle["counts"].decode("utf-8")
 
     for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-        pred_dict = {
-            "image_id": image_id,
-            "category_id": class_map[int(p[5])],
-            "bbox": [round(x, 3) for x in b],
-            "score": round(p[4], 5),
-        }
+        pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b],
+            "score": round(p[4], 5), }
         if pred_masks is not None:
             pred_dict["segmentation"] = rles[i]
         jdict.append(pred_dict)
@@ -77,25 +62,9 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None):
 
 @torch.no_grad()
 class Yolov5Evaluator:
-    def __init__(
-        self,
-        data,
-        conf_thres=0.001,
-        iou_thres=0.6,
-        device="",
-        single_cls=False,
-        augment=False,
-        verbose=False,
-        project="runs/val",
-        name="exp",
-        exist_ok=False,
-        half=True,
-        save_dir=Path(""),
-        nosave=False,
-        plots=True,
-        mask=False,
-        mask_downsample_ratio=1,
-    ) -> None:
+    def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False,
+            project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True,
+            max_plot_dets=10, mask=False, mask_downsample_ratio=1, overlap=False) -> None:
         self.data = check_dataset(data)  # check
         self.conf_thres = conf_thres  # confidence threshold
         self.iou_thres = iou_thres  # NMS IoU threshold
@@ -110,8 +79,10 @@ def __init__(
         self.save_dir = save_dir
         self.nosave = nosave
         self.plots = plots
+        self.max_plot_dets = max_plot_dets
         self.mask = mask
         self.mask_downsample_ratio = mask_downsample_ratio
+        self.overlap = overlap
 
         self.nc = 1 if self.single_cls else int(self.data["nc"])  # number of classes
         self.iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
@@ -119,40 +90,14 @@ def __init__(
         self.confusion_matrix = ConfusionMatrix(nc=self.nc)
         self.dt = [0.0, 0.0, 0.0]
         self.names = {k: v for k, v in enumerate(self.data["names"])}
-        self.s = (
-            ("%20s" + "%11s" * 10)
-            % (
-                "Class",
-                "Images",
-                "Labels",
-                "Box:{P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95}",
-                "Mask:{P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95}",
-            )
-            if self.mask
-            else ("%20s" + "%11s" * 6)
-            % (
-                "Class",
-                "Images",
-                "Labels",
-                "P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95",
-            )
-        )
+        self.s = (("%20s" + "%11s" * 10) % (
+            "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5",
+            "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % (
+            "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",))
 
         # coco stuff
-        self.is_coco = isinstance(self.data.get("val"), str) and self.data[
-            "val"
-        ].endswith(
-            "coco/val2017.txt"
-        )  # COCO dataset
+        self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith(
+            "coco/val2017.txt")  # COCO dataset
         self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
         self.jdict = []
         self.iou_thres = 0.65 if self.is_coco else self.iou_thres
@@ -166,6 +111,7 @@ def __init__(
         self.total_loss = torch.zeros((4 if self.mask else 3))
         self.metric = Metrics() if self.mask else Metric()
 
+    @torch.no_grad()
     def run_training(self, model, dataloader, compute_loss=None):
         """This is for evaluation when training."""
         self.seen = 0
@@ -179,15 +125,13 @@ def run_training(self, model, dataloader, compute_loss=None):
 
         # inference
         # masks will be `None` if training objection.
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
-            tqdm(dataloader, desc=self.s)
-        ):
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
             # reset pred_masks
             self.pred_masks = []
             img = img.to(self.device, non_blocking=True)
             targets = targets.to(self.device)
             if masks is not None:
-                masks = masks.to(self.device)
+                masks = masks.to(self.device).float()
             out, train_out = self.inference(model, img, targets, masks, compute_loss)
 
             # Statistics per image
@@ -196,24 +140,22 @@ def run_training(self, model, dataloader, compute_loss=None):
 
                 # eval in every image level
                 labels = targets[targets[:, 0] == si, 1:]
-                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+                midx = [si] if self.overlap else targets[:, 0] == si
+                gt_masksi = masks[midx] if masks is not None else None
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(
-                    pred,
-                    proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None,
-                )
+                pred_maski = self.get_predmasks(pred, proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski.cpu())
+                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
 
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
 
-            if self.plots and batch_i < 3:
+            if self.plots and batch_i < 2:
                 self.plot_images(batch_i, img, targets, masks, out, paths)
 
         # compute map and print it.
@@ -221,29 +163,12 @@ def run_training(self, model, dataloader, compute_loss=None):
 
         # Return results
         model.float()  # for training
-        return (
-            (
-                *self.metric.mean_results(),
-                *(self.total_loss.cpu() / len(dataloader)).tolist(),
-            ),
-            self.metric.get_maps(self.nc),
-            t,
-        )
-
-    def run(
-        self,
-        weights,
-        batch_size,
-        imgsz,
-        save_txt=False,
-        save_conf=False,
-        save_json=False,
-        task="val",
-    ):
+        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
+                self.metric.get_maps(self.nc), t,)
+
+    def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ):
         """This is for native evaluation."""
-        model, dataloader, imgsz = self.before_infer(
-            weights, batch_size, imgsz, save_txt, task
-        )
+        model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task)
         self.seen = 0
         # self.iouv.to(self.device)
         self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
@@ -252,15 +177,13 @@ def run(
         model.eval()
 
         # inference
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(
-            tqdm(dataloader, desc=self.s)
-        ):
+        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
             # reset pred_masks
             self.pred_masks = []
             img = img.to(self.device, non_blocking=True)
             targets = targets.to(self.device)
             if masks is not None:
-                masks = masks.to(self.device)
+                masks = masks.to(self.device).float()
             out, train_out = self.inference(model, img, targets, masks)
 
             # Statistics per image
@@ -272,19 +195,17 @@ def run(
 
                 # eval in every image level
                 labels = targets[targets[:, 0] == si, 1:]
-                gt_masksi = masks[targets[:, 0] == si] if masks is not None else None
+                midx = [si] if self.overlap else targets[:, 0] == si
+                gt_masksi = masks[midx] if masks is not None else None
 
                 # get predition masks
                 proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(
-                    pred,
-                    proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None,
-                )
+                pred_maski = self.get_predmasks(pred, proto_out,
+                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
 
                 # for visualization
                 if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski.cpu())
+                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
 
                 # NOTE: eval in training image-size space
                 self.compute_stat(pred, pred_maski, labels, gt_masksi)
@@ -297,36 +218,21 @@ def run(
                     # clone() is for plot_images work correctly
                     predn = pred.clone()
                     # 因为test时添加了0.5的padding，因此这里与数据加载的padding不一致，所以需要转入ratio_pad
-                    scale_coords(
-                        img[si].shape[1:], predn[:, :4], shape, ratio_pad
-                    )  # native-space pred
+                    scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad)  # native-space pred
+
                 # Save/log
                 if save_txt and self.save_dir.exists():
                     # NOTE: convert coords to native space when save txt.
                     # support save box preditions only
-                    save_one_txt(
-                        predn,
-                        save_conf,
-                        shape,
-                        file=self.save_dir / "labels" / (path.stem + ".txt"),
-                    )
+                    save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), )
                 if save_json and self.save_dir.exists():
                     # NOTE: convert coords to native space when save json.
                     # if pred_maski is not None:
                     # h, w, n
-                    pred_maski = scale_masks(
-                        img[si].shape[1:],
-                        pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
-                        shape,
-                        ratio_pad,
-                    )
-                    save_one_json(
-                        predn,
-                        self.jdict,
-                        path,
-                        self.class_map,
-                        pred_maski,
-                    )  # append to COCO-JSON dictionary
+                    pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
+                        shape, ratio_pad, )
+                    save_one_json(predn, self.jdict, path, self.class_map,
+                        pred_maski, )  # append to COCO-JSON dictionary
 
             if self.plots and batch_i < 3:
                 self.plot_images(batch_i, img, targets, masks, out, paths)
@@ -343,42 +249,24 @@ def run(
 
         # Print speeds
         shape = (batch_size, 3, imgsz, imgsz)
-        print(
-            f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}"
-            % t
-        )
+        print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
 
         s = (
-            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}"
-            if save_txt and self.save_dir.exists()
-            else ""
-        )
-        print(
-            f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}"
-        )
+            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "")
+        print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}")
 
         # Return results
-        return (
-            (
-                *self.metric.mean_results(),
-                *(self.total_loss.cpu() / len(dataloader)).tolist(),
-            ),
-            self.metric.get_maps(self.nc),
-            t,
-        )
+        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
+                self.metric.get_maps(self.nc), t,)
 
     def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
         "prepare for evaluation without training."
         self.device = select_device(self.device, batch_size=batch_size)
 
         # Directories
-        self.save_dir = increment_path(
-            Path(self.project) / self.name, exist_ok=self.exist_ok
-        )  # increment run
+        self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok)  # increment run
         if not self.nosave:
-            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(
-                parents=True, exist_ok=True
-            )  # make dir
+            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
         check_suffix(weights, ".pt")
@@ -388,27 +276,11 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
 
         # Data
         if self.device.type != "cpu":
-            model(
-                torch.zeros(1, 3, imgsz, imgsz)
-                .to(self.device)
-                .type_as(next(model.parameters()))
-            )  # run once
+            model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters())))  # run once
         pad = 0.0 if task == "speed" else 0.5
-        task = (
-            task if task in ("train", "val", "test") else "val"
-        )  # path to train/val/test images
-        dataloader = create_dataloader(
-            self.data[task],
-            imgsz,
-            batch_size,
-            gs,
-            self.single_cls,
-            pad=pad,
-            rect=True,
-            prefix=colorstr(f"{task}: "),
-            mask_head=self.mask,
-            mask_downsample_ratio=self.mask_downsample_ratio,
-        )[0]
+        task = (task if task in ("train", "val", "test") else "val")  # path to train/val/test images
+        dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True,
+            prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0]
         return model, dataloader, imgsz
 
     def inference(self, model, img, targets, masks=None, compute_loss=None):
@@ -421,29 +293,18 @@ def inference(self, model, img, targets, masks=None, compute_loss=None):
         self.dt[0] += t2 - t1
 
         # Run model
-        out, train_out = model(
-            img, augment=self.augment
-        )  # inference and training outputs
+        out, train_out = model(img, augment=self.augment)  # inference and training outputs
         self.dt[1] += time_sync() - t2
 
         # Compute loss
         if compute_loss:
-            self.total_loss += compute_loss(train_out, targets, masks)[
-                1
-            ]  # box, obj, cls
+            self.total_loss += compute_loss(train_out, targets, masks)[1]  # box, obj, cls
 
         # Run NMS
-        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(
-            self.device
-        )  # to pixels
+        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device)  # to pixels
         t3 = time_sync()
-        out = self.nms(
-            prediction=out,
-            conf_thres=self.conf_thres,
-            iou_thres=self.iou_thres,
-            multi_label=True,
-            agnostic=self.single_cls,
-        )
+        out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True,
+            agnostic=self.single_cls, )
         self.dt[2] += time_sync() - t3
         return out, train_out
 
@@ -454,25 +315,18 @@ def after_infer(self):
         """
         # Plot confusion matrix
         if self.plots and self.save_dir.exists():
-            self.confusion_matrix.plot(
-                save_dir=self.save_dir, names=list(self.names.values())
-            )
+            self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values()))
 
         # Compute statistics
         stats = [np.concatenate(x, 0) for x in zip(*self.stats)]  # to numpy
         box_or_mask_any = stats[0].any() or stats[1].any()
         stats = stats[1:] if not self.mask else stats
         if len(stats) and box_or_mask_any:
-            results = self.ap_per_class(
-                *stats,
-                self.plots,
-                self.save_dir if self.save_dir.exists() else None,
-                self.names,
-            )
+            results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None,
+                self.names, )
             self.metric.update(results)
-            nt = np.bincount(
-                stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc
-            )  # number of targets per class
+            nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64),
+                minlength=self.nc)  # number of targets per class
         else:
             nt = torch.zeros(1)
 
@@ -492,19 +346,13 @@ def process_batch(self, detections, labels, iouv):
         Returns:
             correct (Array[N, 10]), for 10 IoU levels
         """
-        correct = torch.zeros(
-            detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device
-        )
+        correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
         iou = box_iou(labels[:, 1:], detections[:, :4])
         x = torch.where(
-            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])
-        )  # IoU above threshold and classes match
+            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
         if x[0].shape[0]:
             matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )  # [label, detection, iou]
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -532,53 +380,36 @@ def get_predmasks(self, pred, proto_out, gt_shape):
         if proto_out is None or len(pred) == 0:
             return None
         process = process_mask_upsample if self.plots else process_mask
-        gt_shape = (
-            gt_shape[0] * self.mask_downsample_ratio,
-            gt_shape[1] * self.mask_downsample_ratio,
-        )
+        gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,)
         # n, h, w
-        pred_mask = (
-            process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape)
-            .permute(2, 0, 1)
-            .contiguous()
-        )
+        pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous())
         return pred_mask
 
     def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
-        assert not (
-            (pred_maski is None) ^ (gt_masksi is None)
-        ), "`proto_out` and `gt_masksi` should be both None or both exist."
+        assert not ((pred_maski is None) ^ (
+                    gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist."
         if pred_maski is None and gt_masksi is None:
             return torch.zeros(0, self.niou, dtype=torch.bool)
 
-        correct = torch.zeros(
-            predn.shape[0],
-            self.iouv.shape[0],
-            dtype=torch.bool,
-            device=self.iouv.device,
-        )
-
-        if not self.plots:
-            gt_masksi = F.interpolate(
-                gt_masksi.unsqueeze(0),
-                pred_maski.shape[1:],
-                mode="bilinear",
-                align_corners=False,
-            ).squeeze(0)
+        correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, )
+
+        # convert masks (1, 640, 640) -> (n, 640, 640)
+        if self.overlap:
+            nl = len(labels)
+            index = torch.arange(nl, device=gt_masksi.device).view(nl, 1, 1) + 1
+            gt_masksi = gt_masksi.repeat(nl, 1, 1)
+            gt_masksi = torch.where(gt_masksi == index, 1.0, 0.0)
+
+        if gt_masksi.shape[1:] != pred_maski.shape[1:]:
+            gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear",
+                align_corners=False, ).squeeze(0)
 
-        iou = mask_iou(
-            gt_masksi.view(gt_masksi.shape[0], -1),
-            pred_maski.view(pred_maski.shape[0], -1),
-        )
+        iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), )
         x = torch.where(
-            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])
-        )  # IoU above threshold and classes match
+            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
         if x[0].shape[0]:
             matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
-                .cpu()
-                .numpy()
-            )  # [label, detection, iou]
+                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -595,15 +426,9 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski):
 
         if len(predn) == 0:
             if nl:
-                self.stats.append(
-                    (
-                        torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
-                        torch.zeros(0, self.niou, dtype=torch.bool),  # masks
-                        torch.Tensor(),
-                        torch.Tensor(),
-                        tcls,
-                    )
-                )
+                self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
+                                   torch.zeros(0, self.niou, dtype=torch.bool),  # masks
+                                   torch.Tensor(), torch.Tensor(), tcls,))
             return
 
         # Predictions
@@ -618,24 +443,15 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski):
             correct_boxes = self.process_batch(predn, labelsn, self.iouv)
 
             # masks
-            correct_masks = self.process_batch_masks(
-                predn, pred_maski, gt_maski, labelsn
-            )
+            correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn)
 
             if self.plots:
                 self.confusion_matrix.process_batch(predn, labelsn)
         else:
             correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
             correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
-        self.stats.append(
-            (
-                correct_masks.cpu(),
-                correct_boxes.cpu(),
-                predn[:, 4].cpu(),
-                predn[:, 5].cpu(),
-                tcls,
-            )
-        )  # (correct, conf, pcls, tcls)
+        self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(),
+                           tcls,))  # (correct, conf, pcls, tcls)
 
     def print_metric(self, nt, stats):
         # Print results
@@ -646,52 +462,41 @@ def print_metric(self, nt, stats):
         # TODO: self.seen support verbose.
         if self.verbose and self.nc > 1 and len(stats):
             for i, c in enumerate(self.metric.ap_class_index):
-                print(
-                    pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))
-                )
+                print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)))
 
     def plot_images(self, i, img, targets, masks, out, paths):
         if not self.save_dir.exists():
             return
         # plot ground truth
         f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
+        
+        if masks is not None and masks.shape[1:] != img.shape[2:]:
+            masks = F.interpolate(
+                masks.unsqueeze(0).float(),
+                img.shape[2:],
+                mode="bilinear",
+                align_corners=False,
+            ).squeeze(0)
 
-        Thread(
-            target=plot_images_boxes_and_masks,
-            args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
-            daemon=True,
-        ).start()
+        Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
+            daemon=True, ).start()
         f = self.save_dir / f"val_batch{i}_pred.jpg"  # predictions
 
         # plot predition
         if len(self.pred_masks):
-            pred_masks = (
-                torch.cat(self.pred_masks, dim=0)
-                if len(self.pred_masks) > 1
-                else self.pred_masks[0]
-            )
+            pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0])
         else:
             pred_masks = None
-        Thread(
-            target=plot_images_boxes_and_masks,
-            args=(
-                img,
-                output_to_target(out),
-                pred_masks,
-                paths,
-                f,
-                self.names,
-                max(img.shape[2:]),
-            ),
-            daemon=True,
-        ).start()
+        plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]))
+        #Thread(target=plot_images_boxes_and_masks,
+        #    args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),),
+        #    daemon=True, ).start()
+        import wandb
+        if wandb.run:
+            wandb.log({f"pred_{i}": wandb.Image(str(f))})
 
     def nms(self, **kwargs):
-        return (
-            non_max_suppression_masks(**kwargs)
-            if self.mask
-            else non_max_suppression(**kwargs)
-        )
+        return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
 
     def ap_per_class(self, *args):
         return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args)
@@ -807,4 +612,4 @@ def get_maps(self, nc):
     @property
     def ap_class_index(self):
         # boxes and masks have the same ap_class_index
-        return self.metric_box.ap_class_index
+        return self.metric_box.ap_class_index
\ No newline at end of file

From 6ad389bb6fa6afe164f6c8e6156c8a37b0556611 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Wed, 20 Jul 2022 18:00:11 +0800
Subject: [PATCH 040/247] add val_instseg.py&&remove useless code

---
 evaluator.py       |   4 +-
 seg_dataloaders.py | 199 ++-----------
 train_instseg.py   |   2 +-
 utils/boxes.py     | 298 --------------------
 utils/seg_plots.py | 689 ---------------------------------------------
 val_instseg.py     |  85 ++++++
 6 files changed, 105 insertions(+), 1172 deletions(-)
 delete mode 100644 utils/boxes.py
 delete mode 100644 utils/seg_plots.py
 create mode 100644 val_instseg.py

diff --git a/evaluator.py b/evaluator.py
index 3e5e3ded21f0..27533c3048f1 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -270,7 +270,7 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
 
         # Load model
         check_suffix(weights, ".pt")
-        model = attempt_load(weights, map_location=self.device)  # load FP32 model
+        model = attempt_load(weights, device=self.device)  # load FP32 model
         gs = max(int(model.stride.max()), 32)  # grid size (max stride)
         imgsz = check_img_size(imgsz, s=gs)  # check image size
 
@@ -612,4 +612,4 @@ def get_maps(self, nc):
     @property
     def ap_class_index(self):
         # boxes and masks have the same ap_class_index
-        return self.metric_box.ap_class_index
\ No newline at end of file
+        return self.metric_box.ap_class_index
diff --git a/seg_dataloaders.py b/seg_dataloaders.py
index ac6da36fab09..4d74bb00c1a9 100644
--- a/seg_dataloaders.py
+++ b/seg_dataloaders.py
@@ -8,11 +8,14 @@
 import json
 import logging
 import time
+import numpy as np
 from functools import wraps
 from itertools import repeat
 from multiprocessing.pool import ThreadPool, Pool
 from pathlib import Path
 from zipfile import ZipFile
+from PIL import Image
+from tqdm import tqdm
 
 import torch.nn.functional as F
 import yaml
@@ -20,8 +23,6 @@
 from torch.utils.data import distributed
 from torch.utils.data.sampler import BatchSampler as torchBatchSampler
 from torch.utils.data.sampler import RandomSampler
-from torch.utils.data.sampler import Sampler
-from tqdm import tqdm
 
 from seg_augmentations import (Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, )
 from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy
@@ -60,8 +61,8 @@ def __iter__(self):
 
 
 def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
-        rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, neg_dir="",
-        bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False):
+        rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, 
+        area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False):
     if rect and shuffle:
         print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
         shuffle = False
@@ -72,7 +73,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non
             hyp=hyp,  # augmentation hyperparameters
             rect=rect,  # rectangular training
             cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights,
-            prefix=prefix, neg_dir=neg_dir, bg_dir=bg_dir, area_thr=area_thr, )
+            prefix=prefix, area_thr=area_thr, )
         if mask_head:
             dataset.downsample_ratio = mask_downsample_ratio
             dataset.overlap = overlap_mask
@@ -88,10 +89,6 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non
         # batch-size and batch-sampler is exclusion
         batch_sampler=batch_sampler, pin_memory=True,
         collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn,
-        # Make sure each process has different random seed, especially for 'fork' method.
-        # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
-        # but this will make init_seed() not work.
-        # worker_init_fn=worker_init_reset_seed,
     )
     return dataloader, dataset
 
@@ -141,7 +138,7 @@ class LoadImagesAndLabels(Dataset):
     cache_version = 0.6  # dataset labels *.cache version
 
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-            cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", neg_dir="", bg_dir="", area_thr=0.2, ):
+            cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", area_thr=0.2, ):
         super().__init__(augment=augment)
         self.img_size = img_size
         self.hyp = hyp
@@ -154,7 +151,6 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
         self.albumentations = Albumentations() if augment else None
 
         # additional feature
-        self.img_neg_files, self.img_bg_files = self.get_neg_and_bg(neg_dir, bg_dir)
         self.area_thr = area_thr
 
         p = Path(path)  # os-agnostic
@@ -235,20 +231,6 @@ def get_img_files(self, p, prefix):
             raise Exception(f"{prefix}Error loading data from {str(p)}: {e}\nSee {HELP_URL}")
         return img_files
 
-    def get_neg_and_bg(self, neg_dir, bg_dir):
-        """Get negative pictures and background pictures."""
-        img_neg_files, img_bg_files = [], []
-        if os.path.isdir(neg_dir):
-            img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)]
-            logging.info(colorstr(
-                "Negative dir: ") + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training")
-
-        if os.path.isdir(bg_dir):
-            img_bg_files = [os.path.join(bg_dir, i) for i in os.listdir(bg_dir)]
-            logging.info(colorstr(
-                "Background dir: ") + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training")
-        return img_neg_files, img_bg_files
-
     def load_cache(self, cache_path, prefix):
         """Load labels from *.cache file."""
         try:
@@ -454,11 +436,11 @@ def collate_fn4(batch):
 
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-            cache_images=False, single_cls=False, stride=32, pad=0, prefix="", neg_dir="", bg_dir="", area_thr=0.2,
+            cache_images=False, single_cls=False, stride=32, pad=0, prefix="", area_thr=0.2,
             downsample_ratio=1, overlap=False,
     ):
         super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
-            stride, pad, prefix, neg_dir, bg_dir, area_thr, )
+            stride, pad, prefix, area_thr, )
         self.downsample_ratio = downsample_ratio
         self.overlap = overlap
 
@@ -590,66 +572,23 @@ def load_image(self, i):
         return (self.imgs[i], self.img_hw0[i], self.img_hw[i],)  # im, hw_original, hw_resized
 
 
-def load_neg_image(self, index):
-    path = self.img_neg_files[index]
-    img = cv2.imread(path)  # BGR
-    assert img is not None, "Image Not Found " + path
-    h0, w0 = img.shape[:2]  # orig hw
-    r = self.img_size / max(h0, w0)  # resize image to img_size
-    if r != 1:  # always resize down, only resize up if training with augmentation
-        interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
-        img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
-    return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
-
-
-def load_bg_image(self, index):
-    path = self.img_files[index]
-    bg_path = self.img_bg_files[np.random.randint(0, len(self.img_bg_files))]
-    img, coord, _, (w, h) = paste1(path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5))
-    label = self.labels[index]
-    label[:, 1] = (label[:, 1] * w + coord[0]) / img.shape[1]
-    label[:, 2] = (label[:, 2] * h + coord[1]) / img.shape[0]
-    label[:, 3] = label[:, 3] * w / img.shape[1]
-    label[:, 4] = label[:, 4] * h / img.shape[0]
-
-    assert img is not None, "Image Not Found " + path
-    h0, w0 = img.shape[:2]  # orig hw
-    r = self.img_size / max(h0, w0)  # resize image to img_size
-    if r != 1:  # always resize down, only resize up if training with augmentation
-        interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
-        img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
-    return img, (h0, w0), img.shape[:2], label  # img, hw_original, hw_resized
-
-
 def load_mosaic(self, index, return_seg=False):
     # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
     labels4, segments4 = [], []
     s = self.img_size
     yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
 
-    num_neg = random.randint(0, 2) if len(self.img_neg_files) else 0
     # 3 additional image indices
-    indices = [index] + random.choices(self.indices, k=(3 - num_neg))
-    indices = indices + random.choices(range(len(self.img_neg_files)), k=num_neg)
-    ri = list(range(4))
-    random.shuffle(ri)
-    for j, (i, index) in enumerate(zip(ri, indices)):
-        temp_label = None
+    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+    for i, index in enumerate(indices):
         # Load image
-        # TODO
-        if j < (4 - num_neg):
-            if len(self.img_bg_files) and (random.uniform(0, 1) > 0.5):
-                img, _, (h, w), temp_label = load_bg_image(self, index)
-            else:
-                img, _, (h, w) = load_image(self, index)
-        else:
-            img, _, (h, w) = load_neg_image(self, index)
+        img, _, (h, w) = load_image(self, index)
+
         # place img in img4
-        if j == 0:
-            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
         if i == 0:  # top left
-            x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)  # xmin, ymin, xmax, ymax (large image)
-            x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)  # xmin, ymin, xmax, ymax (small image)
+            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
         elif i == 1:  # top right
             x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
             x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
@@ -664,15 +603,7 @@ def load_mosaic(self, index, return_seg=False):
         padw = x1a - x1b
         padh = y1a - y1b
 
-        # Labels
-        if j >= (4 - num_neg):
-            continue
-
-        # TODO: deal with segments
-        if len(self.img_bg_files) and temp_label is not None:
-            labels, segments = temp_label, []
-        else:
-            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+        labels, segments = self.labels[index].copy(), self.segments[index].copy()
 
         if labels.size:
             labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
@@ -873,7 +804,6 @@ def hub_ops(f, max_dim=1920):
 import glob
 import shutil
 import hashlib
-import uuid
 import torch
 import cv2
 import random
@@ -936,14 +866,6 @@ def exif_transpose(image):
             image.info["exif"] = exif.tobytes()
     return image
 
-
-def worker_init_reset_seed(worker_id):
-    seed = uuid.uuid4().int % 2 ** 32
-    random.seed(seed)
-    torch.set_rng_state(torch.manual_seed(seed).get_state())
-    np.random.seed(seed)
-
-
 def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
     """
     Args:
@@ -1170,90 +1092,3 @@ def __len__(self):
     def __iter__(self):
         for i in range(len(self)):
             yield next(self.iterator)
-
-
-# REFACTOR IN A NEW FILE 
-from PIL import Image
-import numpy as np
-from PIL import ImageFile
-
-# import numbers
-
-ImageFile.LOAD_TRUNCATED_IMAGES = True
-
-
-def get_raito(new_size, original_size):
-    """Get the ratio bewtten input_size and original_size"""
-    # # mmdet way
-    # iw, ih = new_size
-    # ow, oh = original_size
-    # max_long_edge = max(iw, ih)
-    # max_short_edge = min(iw, ih)
-    # ratio = min(max_long_edge / max(ow, oh), max_short_edge / min(ow, oh))
-    # return ratio
-
-    # # yolov5 way
-    return min(new_size[0] / original_size[0], new_size[1] / original_size[1])
-
-
-def imresize(img, new_size):
-    """Resize the img with new_size by PIL(keep aspect).
-
-    Args:
-        img (PIL): The original image.
-        new_size (tuple): The new size(w, h).
-    """
-    if isinstance(new_size, int):
-        new_size = (new_size, new_size)
-    old_size = img.size
-    ratio = get_raito(new_size, old_size)
-    img = img.resize((int(old_size[0] * ratio), int(old_size[1] * ratio)))
-    return img
-
-
-def get_wh(a, b):
-    return np.random.randint(a, b)
-
-
-def paste2(sample1, sample2, background, scale=1.2):
-    sample1 = Image.open(sample1)
-    d_w1, d_h1 = sample1.size
-
-    sample2 = Image.open(sample2)
-    d_w2, d_h2 = sample2.size
-
-    # print(sample.size)
-    background = Image.open(background)
-    background = background.resize((int((d_w1 + d_w2) * scale), int((d_h1 + d_h2) * scale)))
-    bw, bh = background.size
-
-    x1, y1 = get_wh(0, int(d_w1 * scale) - d_w1), get_wh(0, bh - d_h1)
-    x2, y2 = get_wh(int(d_w1 * scale), bw - d_w2), get_wh(0, bh - d_h2)
-    # x1, y1 = get_wh(0, int(bw / 2) - d_w1), get_wh(0, bh - d_h1)
-    # x2, y2 = get_wh(int(bw / 2), bw - d_w2), get_wh(0, bh - d_h2)
-
-    background.paste(sample1, (x1, y1))
-    background.paste(sample2, (x2, y2))
-    # background = background.resize((416, 416))
-
-    return np.array(background), (x1, y1, x2, y2), background  # print(background.size)  # background.show()
-
-
-def paste1(sample, background, bg_size, fg_scale=1.5):
-    sample = Image.open(sample)
-    background = Image.open(background)
-    background = imresize(background, bg_size)
-    bw, bh = background.size
-    # background = background.resize((int(d_w * scale), int(d_h * scale)))
-    new_w, new_h = int(bw / fg_scale), int(bh / fg_scale)
-    sample = imresize(sample, (new_w, new_h))
-
-    d_w, d_h = sample.size
-    x1, y1 = get_wh(0, bw - d_w), get_wh(0, bh - d_h)
-    background.paste(sample, (x1, y1))
-    # draw = ImageDraw.Draw(background)
-    # draw.rectangle((x1 + 240, y1 + 254, x1 + 240 + 5, y1 + 254 + 5), 'red', 'green')
-    # draw.rectangle((x1 + 80, y1 + 28, x1 + 400, y1 + 480), None, 'green')
-    # background = background.resize((416, 416))
-
-    return np.array(background.convert('RGB'))[:, :, ::-1], (x1, y1), background, (d_w, d_h)
diff --git a/train_instseg.py b/train_instseg.py
index eaffd189e574..b1ea72ff5757 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -54,7 +54,7 @@
 from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.seg_loss import ComputeLoss
 #from utils.metrics import fitness
-from utils.seg_plots import plot_evolve, plot_labels
+from utils.plots import plot_evolve, plot_labels
 from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
 
 
diff --git a/utils/boxes.py b/utils/boxes.py
deleted file mode 100644
index 1881dde83c81..000000000000
--- a/utils/boxes.py
+++ /dev/null
@@ -1,298 +0,0 @@
-import time
-
-import cv2
-import numpy as np
-import torch
-import torchvision
-
-from utils.general import clip_coords, scale_coords, xywh2xyxy, xyxy2xywh
-from .general import increment_path
-from .metrics import box_iou
-
-
-def nms_numpy(boxes, scores, class_id, threshold, method=None, agnostic=False):
-    """
-    :param boxes: numpy(N, 4), xyxy
-    :param scores: numpy(N, )
-    :param class_id: numpy(N, )
-    :param threshold: float
-    :param method:
-    :return: kept boxed index
-    """
-    if boxes.size == 0:
-        return np.empty((0,), dtype=np.int8)
-    max_wh = 4096
-    if isinstance(boxes, torch.Tensor):
-        boxes = boxes.cpu().numpy()
-    if isinstance(scores, torch.Tensor):
-        scores = scores.cpu().numpy()
-    if isinstance(class_id, torch.Tensor):
-        class_id = class_id.cpu().numpy()
-
-    if boxes.ndim == 1:
-        boxes = boxes[None, :]
-    assert boxes.shape[1] == 4, f"expected boxes shape [N, 4], but got {boxes.shape}"
-    if len(class_id.shape) == 1:
-        class_id = class_id[:, None]
-
-    assert (boxes.shape[0] == class_id.shape[0] == scores.shape[0]), f"boxes, class_id and scores shapes must be equal"
-
-    c = class_id * (0 if agnostic else max_wh)
-    boxes = boxes + c
-    x1 = boxes[:, 0].copy()
-    y1 = boxes[:, 1].copy()
-    x2 = boxes[:, 2].copy()
-    y2 = boxes[:, 3].copy()
-
-    s = scores
-    area = (x2 - x1 + 1) * (y2 - y1 + 1)
-
-    I = np.argsort(s)  # 从小到大排序索引
-    pick = np.zeros_like(s, dtype=np.int16)
-    counter = 0
-    while I.size > 0:
-        i = I[-1]
-        pick[counter] = i
-        counter += 1
-        idx = I[0:-1]
-
-        xx1 = np.maximum(x1[i], x1[idx]).copy()
-        yy1 = np.maximum(y1[i], y1[idx]).copy()
-        xx2 = np.minimum(x2[i], x2[idx]).copy()
-        yy2 = np.minimum(y2[i], y2[idx]).copy()
-
-        w = np.maximum(0.0, xx2 - xx1 + 1).copy()
-        h = np.maximum(0.0, yy2 - yy1 + 1).copy()
-
-        inter = w * h
-        if method == "Min":
-            o = inter / np.minimum(area[i], area[idx])
-        else:
-            o = inter / (area[i] + area[idx] - inter)
-        I = I[np.where(o <= threshold)]
-
-    pick = pick[:counter].copy()
-    return pick
-
-
-def save_one_box(xyxy, im, file="image.jpg", gain=1.02, pad=10, square=False, BGR=False, save=True):
-    # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
-    xyxy = torch.tensor(xyxy).view(-1, 4)
-    b = xyxy2xywh(xyxy)  # boxes
-    if square:
-        b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
-    b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
-    xyxy = xywh2xyxy(b).long()
-    clip_coords(xyxy, im.shape)
-    crop = im[int(xyxy[0, 1]): int(xyxy[0, 3]), int(xyxy[0, 0]): int(xyxy[0, 2]), :: (1 if BGR else -1), ]
-    if save:
-        cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix(".jpg")), crop)
-    return crop
-
-
-def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
-                        labels=(), max_det=300, ):
-    """Runs Non-Maximum Suppression (NMS) on inference results
-
-    Returns:
-         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
-    """
-
-    nc = prediction.shape[2] - 5  # number of classes
-    xc = prediction[..., 4] > conf_thres  # candidates
-
-    # Checks
-    assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
-    assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
-
-    # Settings
-    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
-    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 10.0  # seconds to quit after
-    redundant = True  # require redundant detections
-    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
-    merge = False  # use merge-NMS
-
-    t = time.time()
-    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
-    for xi, x in enumerate(prediction):  # image index, image inference
-        # Apply constraints
-        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
-        x = x[xc[xi]]  # confidence
-
-        # Cat apriori labels if autolabelling
-        if labels and len(labels[xi]):
-            l = labels[xi]
-            v = torch.zeros((len(l), nc + 5), device=x.device)
-            v[:, :4] = l[:, 1:5]  # box
-            v[:, 4] = 1.0  # conf
-            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
-            x = torch.cat((x, v), 0)
-
-        # If none remain process next image
-        if not x.shape[0]:
-            continue
-
-        # Compute conf
-        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
-
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
-        box = xywh2xyxy(x[:, :4])
-
-        # Detections matrix nx6 (xyxy, conf, cls)
-        if multi_label:
-            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
-        else:  # best class only
-            conf, j = x[:, 5:].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
-
-        # Filter by class
-        if classes is not None:
-            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
-
-        # Apply finite constraint
-        # if not torch.isfinite(x).all():
-        #     x = x[torch.isfinite(x).all(1)]
-
-        # Check shape
-        n = x.shape[0]  # number of boxes
-        if not n:  # no boxes
-            continue
-        elif n > max_nms:  # excess boxes
-            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
-
-        # Batched NMS
-        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
-        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
-        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
-        if i.shape[0] > max_det:  # limit detections
-            i = i[:max_det]
-        if merge and (1 < n < 3e3):  # Merge NMS (boxes merged using weighted mean)
-            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
-            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
-            weights = iou * scores[None]  # box weights
-            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
-            if redundant:
-                i = i[iou.sum(1) > 1]  # require redundancy
-
-        output[xi] = x[i]
-        if (time.time() - t) > time_limit:
-            print(f"WARNING: NMS time limit {time_limit}s exceeded")
-            break  # time limit exceeded
-
-    return output
-
-
-def non_max_suppression_numpy(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False,
-                              multi_label=False, labels=(), max_det=300, ):
-    """Runs Non-Maximum Suppression (NMS) on inference results
-
-    Returns:
-         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
-    """
-
-    nc = prediction.shape[2] - 5  # number of classes
-    xc = prediction[..., 4] > conf_thres  # candidates
-
-    # Checks
-    assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
-    assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
-
-    # Settings
-    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 10.0  # seconds to quit after
-    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
-
-    t = time.time()
-    output = [np.zeros((0, 6))] * prediction.shape[0]
-    for xi, x in enumerate(prediction):  # image index, image inference
-        # Apply constraints
-        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
-        x = x[xc[xi]]  # confidence
-
-        # Cat apriori labels if autolabelling
-        if labels and len(labels[xi]):
-            l = labels[xi]
-            v = np.zeros((len(l), nc + 5), device=x.device)
-            v[:, :4] = l[:, 1:5]  # box
-            v[:, 4] = 1.0  # conf
-            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
-            x = np.concatenate((x, v), 0)
-
-        # If none remain process next image
-        if not x.shape[0]:
-            continue
-
-        # Compute conf
-        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
-
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
-        box = xywh2xyxy(x[:, :4])
-
-        # Detections matrix nx6 (xyxy, conf, cls)
-        if multi_label:
-            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
-            x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].float()), 1)
-        else:  # best class only
-            conf, j = x[:, 5:].max(1), x[:, 5:].argmax(1)
-            x = np.concatenate((box, conf[:, None], j.astype(np.float)[:, None]), 1)[conf > conf_thres]
-
-        # Filter by class
-        if classes is not None:
-            x = x[(x[:, 5:6] == np.array(classes)).any(1)]
-
-        # Check shape
-        n = x.shape[0]  # number of boxes
-        if not n:  # no boxes
-            continue
-        elif n > max_nms:  # excess boxes
-            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
-
-        # Batched NMS
-        boxes, scores, cls = x[:, :4], x[:, 4], x[:, 5]
-        i = nms_numpy(boxes, scores, cls, iou_thres, agnostic)  # NMS
-        if i.shape[0] > max_det:  # limit detections
-            i = i[:max_det]
-
-        output[xi] = x[i][None, :] if x[i].ndim == 1 else x[i]
-        if (time.time() - t) > time_limit:
-            print(f"WARNING: NMS time limit {time_limit}s exceeded")
-            break  # time limit exceeded
-
-    return output
-
-
-def apply_classifier(x, model, img, im0):
-    # Apply a second stage classifier to yolo outputs
-    im0 = [im0] if isinstance(im0, np.ndarray) else im0
-    for i, d in enumerate(x):  # per image
-        if d is not None and len(d):
-            d = d.clone()
-
-            # Reshape and pad cutouts
-            b = xyxy2xywh(d[:, :4])  # boxes
-            b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # rectangle to square
-            b[:, 2:] = b[:, 2:] * 1.3 + 30  # pad
-            d[:, :4] = xywh2xyxy(b).long()
-
-            # Rescale boxes from img_size to im0 size
-            scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
-
-            # Classes
-            pred_cls1 = d[:, 5].long()
-            ims = []
-            for j, a in enumerate(d):  # per item
-                cutout = im0[i][int(a[1]): int(a[3]), int(a[0]): int(a[2])]
-                im = cv2.resize(cutout, (224, 224))  # BGR
-                # cv2.imwrite('example%i.jpg' % j, cutout)
-
-                im = im[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
-                im = np.ascontiguousarray(im, dtype=np.float32)  # uint8 to float32
-                im /= 255.0  # 0 - 255 to 0.0 - 1.0
-                ims.append(im)
-
-            pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1)  # classifier prediction
-            x[i] = x[i][pred_cls1 == pred_cls2]  # retain matching class detections
-
-    return x
diff --git a/utils/seg_plots.py b/utils/seg_plots.py
deleted file mode 100644
index 3f09d2ad272c..000000000000
--- a/utils/seg_plots.py
+++ /dev/null
@@ -1,689 +0,0 @@
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Plotting utils
-"""
-
-import math
-import os
-from copy import copy
-from itertools import repeat
-from pathlib import Path
-
-import cv2
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sn
-import torch
-from PIL import Image, ImageDraw
-
-from utils.general import check_font, is_ascii, is_chinese
-from utils.seg_metrics import fitness
-from .boxes import xywh2xyxy, xyxy2xywh
-
-# Settings
-RANK = int(os.getenv("RANK", -1))
-matplotlib.rc("font", **{"size": 11})
-matplotlib.use("Agg")  # for writing to files only
-
-
-class Colors:
-    # Ultralytics color palette https://ultralytics.com/
-    def __init__(self):
-        # hex = matplotlib.colors.TABLEAU_COLORS.values()
-        hex = ("FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A", "92CC17", "3DDB86", "1A9334", "00D4BB",
-               "2C99A8", "00C2FF", "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF", "FF95C8", "FF37C7",)
-        self.palette = [self.hex2rgb("#" + c) for c in hex]
-        self.n = len(self.palette)
-
-    def __call__(self, i, bgr=False):
-        c = self.palette[int(i) % self.n]
-        return (c[2], c[1], c[0]) if bgr else c
-
-    @staticmethod
-    def hex2rgb(h):  # rgb order (PIL)
-        return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
-
-
-colors = Colors()  # create instance for 'from utils.plots import colors'
-
-
-class Annotator:
-    if RANK in (-1, 0):
-        check_font()  # download TTF if necessary
-
-    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
-    def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=False, example="abc", ):
-        assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images."
-        self.pil = pil or not is_ascii(example) or is_chinese(example)
-        if self.pil:  # use PIL
-            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
-            self.draw = ImageDraw.Draw(self.im)
-            self.font = check_font(font="Arial.Unicode.ttf" if is_chinese(example) else font,
-                size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12), )
-        else:  # use cv2
-            self.im = im
-        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width
-
-    def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
-        # Add one xyxy box to image with label
-        if self.pil or not is_ascii(label):
-            self.draw.rectangle(box, width=self.lw, outline=color)  # box
-            if label:
-                w, h = self.font.getsize(label)  # text width, height
-                outside = box[1] - h >= 0  # label fits outside box
-                self.draw.rectangle([box[0], box[1] - h if outside else box[1], box[0] + w + 1,
-                    box[1] + 1 if outside else box[1] + h + 1, ], fill=color, )
-                # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
-                self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font, )
-        else:  # cv2
-            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
-            cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
-            if label:
-                tf = max(self.lw - 1, 1)  # font thickness
-                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0]  # text width, height
-                outside = p1[1] - h - 3 >= 0  # label fits outside box
-                p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
-                cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled
-                cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color,
-                    thickness=tf, lineType=cv2.LINE_AA, )
-
-    def rectangle(self, xy, fill=None, outline=None, width=1):
-        # Add rectangle to image (PIL-only)
-        self.draw.rectangle(xy, fill, outline, width)
-
-    def text(self, xy, text, txt_color=(255, 255, 255)):
-        # Add text to image (PIL-only)
-        w, h = self.font.getsize(text)  # text width, height
-        self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
-
-    def result(self):
-        # Return annotated image as array
-        return np.asarray(self.im)
-
-
-class Visualizer(object):
-    """Visualization of one model."""
-
-    def __init__(self, names) -> None:
-        super().__init__()
-        self.names = names
-
-    def draw_one_img(self, img, output, vis_conf=0.4):
-        """Visualize one images.
-        
-        Args:
-            imgs (numpy.ndarray): one image.
-            outputs (torch.Tensor): one output, (num_boxes, classes+5)
-            vis_confs (float, optional): Visualize threshold.
-        Return:
-            img (numpy.ndarray): Image after visualization.           
-        """
-        if isinstance(output, list):
-            output = output[0]
-        if output is None or len(output) == 0:
-            return img
-        for (*xyxy, conf, cls) in reversed(output[:, :6]):
-            if conf < vis_conf:
-                continue
-            label = '%s %.2f' % (self.names[int(cls)], conf)
-            color = colors(int(cls))
-            plot_one_box(xyxy, img, label=label, color=color, line_thickness=2)
-        return img
-
-    def draw_multi_img(self, imgs, outputs, vis_confs=0.4):
-        """Visualize multi images.
-        
-        Args:
-            imgs (List[numpy.array]): multi images.
-            outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5].
-            vis_confs (float | tuple[float], optional): Visualize threshold.
-        Return:
-            imgs (List[numpy.ndarray]): Images after visualization.           
-        """
-        if isinstance(vis_confs, float):
-            vis_confs = list(repeat(vis_confs, len(imgs)))
-        assert len(imgs) == len(outputs) == len(vis_confs)
-        for i, output in enumerate(outputs):  # detections per image
-            self.draw_one_img(imgs[i], output, vis_confs[i])
-        return imgs
-
-    def draw_imgs(self, imgs, outputs, vis_confs=0.4):
-        if isinstance(imgs, np.ndarray):
-            return self.draw_one_img(imgs, outputs, vis_confs)
-        else:
-            return self.draw_multi_img(imgs, outputs, vis_confs)
-
-    def __call__(self, imgs, outputs, vis_confs=0.4):
-        return self.draw_imgs(imgs, outputs, vis_confs)
-
-
-def hist2d(x, y, n=100):
-    # 2d histogram used in labels.png and evolve.png
-    xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
-    hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
-    xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
-    yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
-    return np.log(hist[xidx, yidx])
-
-
-def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
-    from scipy.signal import butter, filtfilt
-
-    # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
-    def butter_lowpass(cutoff, fs, order):
-        nyq = 0.5 * fs
-        normal_cutoff = cutoff / nyq
-        return butter(order, normal_cutoff, btype="low", analog=False)
-
-    b, a = butter_lowpass(cutoff, fs, order=order)
-    return filtfilt(b, a, data)  # forward-backward filter
-
-
-def output_to_target(output):
-    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
-    targets = []
-    for i, o in enumerate(output):
-        for *box, conf, cls in o.cpu().numpy()[:, :6]:
-            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
-    return np.array(targets)
-
-
-def plot_images(images, targets, paths=None, fname="images.jpg", names=None, max_size=1920, max_subplots=16, ):
-    # Plot image grid with labels
-    if isinstance(images, torch.Tensor):
-        images = images.cpu().float().numpy()
-    if isinstance(targets, torch.Tensor):
-        targets = targets.cpu().numpy()
-    if np.max(images[0]) <= 1:
-        images *= 255.0  # de-normalise (optional)
-    bs, _, h, w = images.shape  # batch size, _, height, width
-    bs = min(bs, max_subplots)  # limit plot images
-    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
-
-    # Build Image
-    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-    for i, im in enumerate(images):
-        if i == max_subplots:  # if last batch has fewer images than we expect
-            break
-        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-        im = im.transpose(1, 2, 0)
-        mosaic[y: y + h, x: x + w, :] = im
-
-    # Resize (optional)
-    scale = max_size / ns / max(h, w)
-    if scale < 1:
-        h = math.ceil(scale * h)
-        w = math.ceil(scale * w)
-        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
-
-    # Annotate
-    fs = int((h + w) * ns * 0.01)  # font size
-    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
-    for i in range(i + 1):
-        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
-        if paths:
-            annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220), )  # filenames
-        if len(targets) > 0:
-            ti = targets[targets[:, 0] == i]  # image targets
-            boxes = xywh2xyxy(ti[:, 2:6]).T
-            classes = ti[:, 1].astype("int")
-            labels = ti.shape[1] == 6  # labels if no conf column
-            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
-
-            if boxes.shape[1]:
-                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
-                    boxes[[0, 2]] *= w  # scale to pixels
-                    boxes[[1, 3]] *= h
-                elif scale < 1:  # absolute coords need scale if image scales
-                    boxes *= scale
-            boxes[[0, 2]] += x
-            boxes[[1, 3]] += y
-            for j, box in enumerate(boxes.T.tolist()):
-                cls = classes[j]
-                color = colors(cls)
-                cls = names[cls] if names else cls
-                if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
-                    annotator.box_label(box, label, color=color)
-    annotator.im.save(fname)  # save
-    return annotator.result()
-
-
-def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""):
-    # Plot LR simulating training for full epochs
-    optimizer, scheduler = copy(optimizer), copy(scheduler)  # do not modify originals
-    y = []
-    for _ in range(epochs):
-        scheduler.step()
-        y.append(optimizer.param_groups[0]["lr"])
-    plt.plot(y, ".-", label="LR")
-    plt.xlabel("epoch")
-    plt.ylabel("LR")
-    plt.grid()
-    plt.xlim(0, epochs)
-    plt.ylim(0)
-    plt.savefig(Path(save_dir) / "LR.png", dpi=200)
-    plt.close()
-
-
-def plot_val_txt():  # from utils.plots import *; plot_val()
-    # Plot val.txt histograms
-    x = np.loadtxt("val.txt", dtype=np.float32)
-    box = xyxy2xywh(x[:, :4])
-    cx, cy = box[:, 0], box[:, 1]
-
-    fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
-    ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
-    ax.set_aspect("equal")
-    plt.savefig("hist2d.png", dpi=300)
-
-    fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
-    ax[0].hist(cx, bins=600)
-    ax[1].hist(cy, bins=600)
-    plt.savefig("hist1d.png", dpi=200)
-
-
-def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
-    # Plot targets.txt histograms
-    x = np.loadtxt("targets.txt", dtype=np.float32).T
-    s = ["x targets", "y targets", "width targets", "height targets"]
-    fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
-    ax = ax.ravel()
-    for i in range(4):
-        ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std()))
-        ax[i].legend()
-        ax[i].set_title(s[i])
-    plt.savefig("targets.jpg", dpi=200)
-
-
-def plot_val_study(file="", dir="", x=None):  # from utils.plots import *; plot_val_study()
-    # Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
-    save_dir = Path(file).parent if file else Path(dir)
-    plot2 = False  # plot additional results
-    if plot2:
-        ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
-
-    fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
-    # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
-    for f in sorted(save_dir.glob("study*.txt")):
-        y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
-        x = np.arange(y.shape[1]) if x is None else np.array(x)
-        if plot2:
-            s = ["P", "R", "mAP@.5", "mAP@.5:.95", "t_preprocess (ms/img)", "t_inference (ms/img)", "t_NMS (ms/img)", ]
-            for i in range(7):
-                ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8)
-                ax[i].set_title(s[i])
-
-        j = y[3].argmax() + 1
-        ax2.plot(y[5, 1:j], y[3, 1:j] * 1e2, ".-", linewidth=2, markersize=8,
-            label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"), )
-
-    ax2.plot(1e3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], "k.-", linewidth=2,
-        markersize=8, alpha=0.25, label="EfficientDet", )
-
-    ax2.grid(alpha=0.2)
-    ax2.set_yticks(np.arange(20, 60, 5))
-    ax2.set_xlim(0, 57)
-    ax2.set_ylim(25, 55)
-    ax2.set_xlabel("GPU Speed (ms/img)")
-    ax2.set_ylabel("COCO AP val")
-    ax2.legend(loc="lower right")
-    f = save_dir / "study.png"
-    print(f"Saving {f}...")
-    plt.savefig(f, dpi=300)
-
-
-def plot_labels(labels, names=(), save_dir=Path("")):
-    # plot dataset labels
-    print("Plotting labels... ")
-    c, b = labels[:, 0], labels[:, 1:].transpose()  # classes, boxes
-    nc = int(c.max() + 1)  # number of classes
-    x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"])
-
-    # seaborn correlogram
-    sn.pairplot(x, corner=True, diag_kind="auto", kind="hist", diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9), )
-    plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200)
-    plt.close()
-
-    # matplotlib labels
-    matplotlib.use("svg")  # faster
-    ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
-    y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
-    # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)]  # update colors bug #3195
-    ax[0].set_ylabel("instances")
-    if 0 < len(names) < 30:
-        ax[0].set_xticks(range(len(names)))
-        ax[0].set_xticklabels(names, rotation=90, fontsize=10)
-    else:
-        ax[0].set_xlabel("classes")
-    sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9)
-    sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9)
-
-    # rectangles
-    labels[:, 1:3] = 0.5  # center
-    labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
-    img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
-    for cls, *box in labels[:1000]:
-        ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls))  # plot
-    ax[1].imshow(img)
-    ax[1].axis("off")
-
-    for a in [0, 1, 2, 3]:
-        for s in ["top", "right", "left", "bottom"]:
-            ax[a].spines[s].set_visible(False)
-
-    plt.savefig(save_dir / "labels.jpg", dpi=200)
-    matplotlib.use("Agg")
-    plt.close()
-
-
-def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
-    # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
-    ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
-    s = ["Images", "Free Storage (GB)", "RAM Usage (GB)", "Battery", "dt_raw (ms)", "dt_smooth (ms)",
-        "real-world FPS", ]
-    files = list(Path(save_dir).glob("frames*.txt"))
-    for fi, f in enumerate(files):
-        try:
-            results = np.loadtxt(f, ndmin=2).T[:, 90:-30]  # clip first and last rows
-            n = results.shape[1]  # number of rows
-            x = np.arange(start, min(stop, n) if stop else n)
-            results = results[:, x]
-            t = results[0] - results[0].min()  # set t0=0s
-            results[0] = x
-            for i, a in enumerate(ax):
-                if i < len(results):
-                    label = labels[fi] if len(labels) else f.stem.replace("frames_", "")
-                    a.plot(t, results[i], marker=".", label=label, linewidth=1, markersize=5, )
-                    a.set_title(s[i])
-                    a.set_xlabel("time (s)")
-                    # if fi == len(files) - 1:
-                    #     a.set_ylim(bottom=0)
-                    for side in ["top", "right"]:
-                        a.spines[side].set_visible(False)
-                else:
-                    a.remove()
-        except Exception as e:
-            print("Warning: Plotting error for %s; %s" % (f, e))
-    ax[1].legend()
-    plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200)
-
-
-def plot_evolve(evolve_csv="path/to/evolve.csv", ):  # from utils.plots import *; plot_evolve()
-    # Plot evolve.csv hyp evolution results
-    evolve_csv = Path(evolve_csv)
-    data = pd.read_csv(evolve_csv)
-    keys = [x.strip() for x in data.columns]
-    x = data.values
-    f = fitness(x)
-    j = np.argmax(f)  # max fitness index
-    plt.figure(figsize=(10, 12), tight_layout=True)
-    matplotlib.rc("font", **{"size": 8})
-    for i, k in enumerate(keys[7:]):
-        v = x[:, 7 + i]
-        mu = v[j]  # best single result
-        plt.subplot(6, 5, i + 1)
-        plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none")
-        plt.plot(mu, f.max(), "k+", markersize=15)
-        plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9})  # limit to 40 characters
-        if i % 5 != 0:
-            plt.yticks([])
-        print("%15s: %.3g" % (k, mu))
-    f = evolve_csv.with_suffix(".png")  # filename
-    plt.savefig(f, dpi=200)
-    plt.close()
-    print(f"Saved {f}")
-
-
-def plot_results(file="path/to/results.csv", dir="", best=True):
-    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
-    save_dir = Path(file).parent if file else Path(dir)
-    fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
-    ax = ax.ravel()
-    files = list(save_dir.glob("results*.csv"))
-    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
-    for _, f in enumerate(files):
-        try:
-            data = pd.read_csv(f)
-            index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6])
-            s = [x.strip() for x in data.columns]
-            x = data.values[:, 0]
-            for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
-                y = data.values[:, j]
-                # y[y == 0] = np.nan  # don't show zero values
-                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
-                if best:
-                    # best
-                    ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, )
-                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
-                else:
-                    # last
-                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
-                    ax[i].set_title(s[
-                                        j] + f"\n{round(y[-1], 5)}")  # if j in [8, 9, 10]:  # share train and val loss y axes  #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
-        except Exception as e:
-            print(f"Warning: Plotting error for {f}: {e}")
-    ax[1].legend()
-    fig.savefig(save_dir / "results.png", dpi=200)
-    plt.close()
-
-
-def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
-    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
-    save_dir = Path(file).parent if file else Path(dir)
-    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
-    ax = ax.ravel()
-    files = list(save_dir.glob("results*.csv"))
-    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
-    for _, f in enumerate(files):
-        try:
-            data = pd.read_csv(f)
-            index = np.argmax(
-                0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 0.1 * data.values[:,
-                                                                                                     11], )
-            s = [x.strip() for x in data.columns]
-            x = data.values[:, 0]
-            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
-                y = data.values[:, j]
-                # y[y == 0] = np.nan  # don't show zero values
-                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
-                if best:
-                    # best
-                    ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, )
-                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
-                else:
-                    # last
-                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
-                    ax[i].set_title(s[
-                                        j] + f"\n{round(y[-1], 5)}")  # if j in [8, 9, 10]:  # share train and val loss y axes  #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
-        except Exception as e:
-            print(f"Warning: Plotting error for {f}: {e}")
-    ax[1].legend()
-    fig.savefig(save_dir / "results.png", dpi=200)
-    plt.close()
-
-
-def plot_one_box(x, img, color=None, label=None, line_thickness=None):
-    import random
-
-    # Plots one bounding box on image img
-    tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1)  # line/font thickness
-    color = color or [random.randint(0, 255) for _ in range(3)]
-    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
-    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
-    if label:
-        tf = max(tl - 1, 1)  # font thickness
-        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
-        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
-        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA, )
-
-
-def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")):
-    """
-    x:              Features to be visualized
-    module_type:    Module type
-    stage:          Module stage within model
-    n:              Maximum number of feature maps to plot
-    save_dir:       Directory to save results
-    """
-    if "Detect" not in module_type:
-        batch, channels, height, width = x.shape  # batch, channels, height, width
-        if height > 1 and width > 1:
-            f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
-
-            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
-            n = min(n, channels)  # number of plots
-            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
-            ax = ax.ravel()
-            plt.subplots_adjust(wspace=0.05, hspace=0.05)
-            for i in range(n):
-                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
-                ax[i].axis("off")
-
-            print(f"Saving {save_dir / f}... ({n}/{channels})")
-            plt.savefig(save_dir / f, dpi=300, bbox_inches="tight")
-            plt.close()
-
-
-def plot_images_and_masks(images, targets, masks, paths=None, fname="images.jpg", names=None, max_size=640,
-        max_subplots=16, ):
-    # Plot image grid with labels
-    # print("targets:", targets.shape)
-    # print("masks:", masks.shape)
-    # print('--------------------------')
-
-    if isinstance(images, torch.Tensor):
-        images = images.cpu().float().numpy()
-    if isinstance(targets, torch.Tensor):
-        targets = targets.cpu().numpy()
-    if isinstance(masks, torch.Tensor):
-        masks = masks.cpu().numpy()
-        masks = masks.astype(int)
-
-    # un-normalise
-    if np.max(images[0]) <= 1:
-        images *= 255
-
-    tl = 3  # line thickness
-    tf = max(tl - 1, 1)  # font thickness
-    bs, _, h, w = images.shape  # batch size, _, height, width
-    bs = min(bs, max_subplots)  # limit plot images
-    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
-
-    # Check if we should resize
-    scale_factor = max_size / max(h, w)
-    if scale_factor < 1:
-        h = math.ceil(scale_factor * h)
-        w = math.ceil(scale_factor * w)
-
-    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-    for i, img in enumerate(images):
-        if i == max_subplots:  # if last batch has fewer images than we expect
-            break
-
-        block_x = int(w * (i // ns))
-        block_y = int(h * (i % ns))
-
-        img = img.transpose(1, 2, 0)
-        if scale_factor < 1:
-            img = cv2.resize(img, (w, h))
-
-        mosaic[block_y: block_y + h, block_x: block_x + w, :] = img
-        if len(targets) > 0:
-            idx = (targets[:, 0]).astype(int)
-            image_targets = targets[idx == i]
-            # print(targets.shape)
-            # print(masks.shape)
-            image_masks = masks[idx == i]
-            # mosaic_masks
-            # mosaic_masks[block_y:block_y + h,
-            #              block_x:block_x + w, :] = image_masks
-            boxes = xywh2xyxy(image_targets[:, 2:6]).T
-            classes = image_targets[:, 1].astype("int")
-            labels = image_targets.shape[1] == 6  # labels if no conf column
-            conf = (None if labels else image_targets[:, 6])  # check for confidence presence (label vs pred)
-
-            if boxes.shape[1]:
-                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
-                    boxes[[0, 2]] *= w  # scale to pixels
-                    boxes[[1, 3]] *= h
-                elif scale_factor < 1:  # absolute coords need scale if image scales
-                    boxes *= scale_factor
-            boxes[[0, 2]] += block_x
-            boxes[[1, 3]] += block_y
-            for j, box in enumerate(boxes.T):
-                cls = int(classes[j])
-                color = colors(cls)
-                cls = names[cls] if names else cls
-                mask = image_masks[j].astype(np.bool)
-                # print(mask.shape)
-                # print(mosaic.shape)
-                if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j])
-                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
-                    mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] = \
-                    mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65)
-
-        # Draw image filename labels
-        if paths:
-            label = Path(paths[i]).name[:40]  # trim to 40 char
-            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-            cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
-                lineType=cv2.LINE_AA, )
-
-        # Image border
-        cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3, )
-
-    if fname:
-        r = min(1280.0 / max(h, w) / ns, 1.0)  # ratio to limit image size
-        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
-        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
-        Image.fromarray(mosaic).save(fname)  # PIL save
-    return mosaic
-
-
-def plot_images_boxes_and_masks(images, targets, masks=None, paths=None, fname="images.jpg", names=None, max_size=640,
-        max_subplots=16, ):
-    if masks is not None:
-        return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots)
-    else:
-        return plot_images(images, targets, paths, fname, names, max_size, max_subplots)
-
-
-def plot_masks(img, masks, colors, alpha=0.5):
-    """
-    Args:
-        img (tensor): img on cuda, shape: [3, h, w], range: [0, 1]
-        masks (tensor): predicted masks on cuda, shape: [n, h, w]
-        colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
-    Return:
-        img after draw masks, shape: [h, w, 3]
-
-    transform colors and send img_gpu to cpu for the most time.
-    """
-    img_gpu = img.clone()
-    num_masks = len(masks)
-    # [n, 1, 1, 3]
-    # faster this way to transform colors
-    colors = torch.tensor(colors, device=img.device).float() / 255.0
-    colors = colors[:, None, None, :]
-    # [n, h, w, 1]
-    masks = masks[:, :, :, None]
-    masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha
-    inv_alph_masks = masks * (-alpha) + 1
-    masks_color_summand = masks_color[0]
-    if num_masks > 1:
-        inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0)
-        masks_color_cumul = masks_color[1:] * inv_alph_cumul
-        masks_color_summand += masks_color_cumul.sum(dim=0)
-
-    # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1]
-    img_gpu = img_gpu.flip(dims=[0])  # filp channel for opencv
-    img_gpu = img_gpu.permute(1, 2, 0).contiguous()
-    # [h, w, 3]
-    img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
-    return (img_gpu * 255).byte().cpu().numpy()
diff --git a/val_instseg.py b/val_instseg.py
new file mode 100644
index 000000000000..20183b6d7118
--- /dev/null
+++ b/val_instseg.py
@@ -0,0 +1,85 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Validate a trained YOLOv5 model accuracy on a custom dataset
+
+Usage:
+    $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640
+"""
+
+import argparse
+from evaluator import Yolov5Evaluator
+
+from utils.general import (
+    set_logging,
+    print_args,
+    check_yaml,
+    check_requirements,
+)
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-d', '--data', type=str, default='data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('-w', '--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('-b', '--batch-size', type=int, default=32, help='batch size')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
+    parser.add_argument('--task', default='val', help='train, val, test, speed or study')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
+    parser.add_argument('--nosave', action='store_true', help='do not save anything.')
+    parser.add_argument('--project', default='runs/val', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--overlap-mask', action='store_true', help='Eval overlapping masks')
+
+    opt = parser.parse_args()
+    opt.data = check_yaml(opt.data)  # check YAML
+    opt.save_json |= opt.data.endswith('coco.yaml')
+    print_args(vars(opt))
+    return opt
+
+def main(opt):
+    set_logging()
+    check_requirements(exclude=("tensorboard", "thop"))
+    evaluator = Yolov5Evaluator(
+        data=opt.data,
+        conf_thres=opt.conf_thres,
+        iou_thres=opt.iou_thres,
+        device=opt.device,
+        single_cls=opt.single_cls,
+        augment=opt.augment,
+        verbose=opt.verbose,
+        project=opt.project,
+        name=opt.name,
+        exist_ok=opt.exist_ok,
+        half=opt.half,
+        mask=True,
+        nosave=opt.nosave,
+        overlap=opt.overlap_mask,
+    )
+
+    if opt.task in ("train", "val", "test"):  # run normally
+        evaluator.run(
+            weights=opt.weights,
+            batch_size=opt.batch_size,
+            imgsz=opt.imgsz,
+            save_txt=opt.save_txt,
+            save_conf=opt.save_conf,
+            save_json=opt.save_json,
+            task=opt.task,
+        )
+    else:
+        raise ValueError(f"not support task {opt.task}")
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)

From f45d6f3ffd25f1376e2af5bb5931a894cef1efe1 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 20 Jul 2022 17:26:04 +0530
Subject: [PATCH 041/247] log weights after last epoch

---
 utils/loggers/__init__.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index a142f607561e..e1edf484e5e6 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -332,6 +332,15 @@ def on_train_end(self, plots, epoch, masks=False):
                 self.tb.add_image(
                     f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats="HWC"
                 )
+        if self.wandb:
+            best = self.save_dir/ "weights" / "best.pt"
+            last = self.save_dir / "weights" / "last.pt"
+            wandb.log_artifact(str(best if best.exists() else last),
+                                   type='model',
+                                   name=f'run_{self.wandb.run.id}_model',
+                                   aliases=['latest', 'best', 'stripped'])
+            self.wandb.finish_run()
+
 
     def on_params_update(self):
         # Update hyperparams or configs of the experiment

From 8f0ca0ebf338fd536ffe7ef03ded7ea21c51ffcf Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 21 Jul 2022 20:16:09 +0800
Subject: [PATCH 042/247] add detect_instseg.py

---
 detect_instseg.py | 278 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 278 insertions(+)
 create mode 100644 detect_instseg.py

diff --git a/detect_instseg.py b/detect_instseg.py
new file mode 100644
index 000000000000..2e67591fe936
--- /dev/null
+++ b/detect_instseg.py
@@ -0,0 +1,278 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Run inference on images, videos, directories, streams, etc.
+
+Usage - sources:
+    $ python path/to/detect.py --weights yolov5s.pt --source 0              # webcam
+                                                             img.jpg        # image
+                                                             vid.mp4        # video
+                                                             path/          # directory
+                                                             path/*.jpg     # glob
+                                                             'https://youtu.be/Zgi9g1ksQHc'  # YouTube
+                                                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+
+Usage - formats:
+    $ python path/to/detect.py --weights yolov5s.pt                 # PyTorch
+                                         yolov5s.torchscript        # TorchScript
+                                         yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                         yolov5s.xml                # OpenVINO
+                                         yolov5s.engine             # TensorRT
+                                         yolov5s.mlmodel            # CoreML (macOS-only)
+                                         yolov5s_saved_model        # TensorFlow SavedModel
+                                         yolov5s.pb                 # TensorFlow GraphDef
+                                         yolov5s.tflite             # TensorFlow Lite
+                                         yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+import torch
+import torch.backends.cudnn as cudnn
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.experimental import attempt_load
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
+from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+                           increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.plots import Annotator, colors, save_one_box, plot_masks
+from utils.torch_utils import select_device, time_sync
+from utils.segment import non_max_suppression_masks, scale_masks, process_mask_upsample
+
+
+@torch.no_grad()
+def run(
+        weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
+        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
+        imgsz=(640, 640),  # inference size (height, width)
+        conf_thres=0.25,  # confidence threshold
+        iou_thres=0.45,  # NMS IOU threshold
+        max_det=1000,  # maximum detections per image
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        view_img=False,  # show results
+        save_txt=False,  # save results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_crop=False,  # save cropped prediction boxes
+        nosave=False,  # do not save images/videos
+        classes=None,  # filter by class: --class 0, or --class 0 2 3
+        agnostic_nms=False,  # class-agnostic NMS
+        augment=False,  # augmented inference
+        visualize=False,  # visualize features
+        update=False,  # update all models
+        project=ROOT / 'runs/detect',  # save results to project/name
+        name='exp',  # save results to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        line_thickness=3,  # bounding box thickness (pixels)
+        hide_labels=False,  # hide labels
+        hide_conf=False,  # hide confidences
+        half=False,  # use FP16 half-precision inference
+):
+    source = str(source)
+    save_img = not nosave and not source.endswith('.txt')  # save inference images
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
+    webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
+    if is_url and is_file:
+        source = check_file(source)  # download
+
+    # Directories
+    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Load model
+    device = select_device(device)
+    model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True)
+    stride = max(int(model.stride.max()), 32)  # model stride
+    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+    model.half() if half else model.float()
+    pt = True
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Dataloader
+    if webcam:
+        view_img = check_imshow()
+        cudnn.benchmark = True  # set True to speed up constant image size inference
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
+        bs = len(dataset)  # batch_size
+    else:
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
+        bs = 1  # batch_size
+    vid_path, vid_writer = [None] * bs, [None] * bs
+
+    # Run inference
+    if device != "cpu":
+        im = torch.zeros(1, 3, *imgsz).to(device).half()  # input image
+        model(im)  # warmup
+    seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
+    for path, im, im0s, vid_cap, s in dataset:
+        t1 = time_sync()
+        im = torch.from_numpy(im).to(device)
+        im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        if len(im.shape) == 3:
+            im = im[None]  # expand for batch dim
+        t2 = time_sync()
+        dt[0] += t2 - t1
+
+        # Inference
+        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+        pred, out = model(im, augment=augment, visualize=visualize)
+        proto = out[1]
+        t3 = time_sync()
+        dt[1] += t3 - t2
+
+        # NMS
+        pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+        dt[2] += time_sync() - t3
+
+        # Second-stage classifier (optional)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
+
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            if webcam:  # batch_size >= 1
+                p, im0, frame = path[i], im0s[i].copy(), dataset.count
+                s += f'{i}: '
+            else:
+                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
+
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
+            s += '%gx%g ' % im.shape[2:]  # print string
+            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            imc = im0.copy() if save_crop else im0  # for save_crop
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
+            if len(det):
+                # mask stuff
+                masks_conf = det[:, 6:]
+                # binary mask, (img_h, img_w, n)
+                masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:])
+                # n, img_h, img_w
+                masks = masks.permute(2, 0, 1).contiguous()
+                # bbox stuff
+                det = det[:, :6]  # update the value in outputs, remove mask part.
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
+
+                # Print results
+                for c in det[:, -1].unique():
+                    n = (det[:, -1] == c).sum()  # detections per class
+                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
+
+                # plot masks
+                mcolors = [colors(int(cls)) for cls in det[:, 5]]
+                # NOTE: this way to draw masks is faster,
+                # but the image might get blurred,
+                # from https://github.com/dbolya/yolact
+                # image with masks, (img_h, img_w, 3)
+                img_masks = plot_masks(im[i], masks, mcolors)
+                # scale image to original hw
+                img_masks = scale_masks(im.shape[2:], img_masks, im0.shape)
+                annotator.im = img_masks
+
+                # Write results
+                for *xyxy, conf, cls in reversed(det):
+                    if save_txt:  # Write to file
+                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+                        with open(f'{txt_path}.txt', 'a') as f:
+                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+                    if save_img or save_crop or view_img:  # Add bbox to image
+                        c = int(cls)  # integer class
+                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
+                        annotator.box_label(xyxy, label, color=colors(c, True))
+                    if save_crop:
+                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
+
+            # Stream results
+            im0 = annotator.result()
+            if view_img:
+                if p not in windows:
+                    windows.append(p)
+                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+                cv2.imshow(str(p), im0)
+                cv2.waitKey(1)  # 1 millisecond
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == 'image':
+                    cv2.imwrite(save_path, im0)
+                else:  # 'video' or 'stream'
+                    if vid_path[i] != save_path:  # new video
+                        vid_path[i] = save_path
+                        if isinstance(vid_writer[i], cv2.VideoWriter):
+                            vid_writer[i].release()  # release previous video writer
+                        if vid_cap:  # video
+                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        else:  # stream
+                            fps, w, h = 30, im0.shape[1], im0.shape[0]
+                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
+                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+                    vid_writer[i].write(im0)
+
+        # Print time (inference-only)
+        LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
+
+    # Print results
+    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
+    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--view-img', action='store_true', help='show results')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
+    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
+    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--visualize', action='store_true', help='visualize features')
+    parser.add_argument('--update', action='store_true', help='update all models')
+    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
+    parser.add_argument('--name', default='exp', help='save results to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
+    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
+    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    check_requirements(exclude=('tensorboard', 'thop'))
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)

From 7fae119284224ba59885a6ab576f3f2962c75d5a Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Thu, 21 Jul 2022 18:07:07 +0530
Subject: [PATCH 043/247] fix check for device

---
 detect_instseg.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/detect_instseg.py b/detect_instseg.py
index 2e67591fe936..34fab4372db4 100644
--- a/detect_instseg.py
+++ b/detect_instseg.py
@@ -88,6 +88,7 @@ def run(
 
     # Load model
     device = select_device(device)
+    import pdb;pdb.set_trace()
     model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True)
     stride = max(int(model.stride.max()), 32)  # model stride
     names = model.module.names if hasattr(model, 'module') else model.names  # get class names
@@ -107,7 +108,7 @@ def run(
     vid_path, vid_writer = [None] * bs, [None] * bs
 
     # Run inference
-    if device != "cpu":
+    if str(device) != "cpu":
         im = torch.zeros(1, 3, *imgsz).to(device).half()  # input image
         model(im)  # warmup
     seen, windows, dt = 0, [], [0.0, 0.0, 0.0]

From c307b45de45111e5e0dcd6849f8cb729558d9b7d Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Thu, 21 Jul 2022 18:07:42 +0530
Subject: [PATCH 044/247] remove pdb import

---
 detect_instseg.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/detect_instseg.py b/detect_instseg.py
index 34fab4372db4..a703f75d486b 100644
--- a/detect_instseg.py
+++ b/detect_instseg.py
@@ -88,7 +88,6 @@ def run(
 
     # Load model
     device = select_device(device)
-    import pdb;pdb.set_trace()
     model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True)
     stride = max(int(model.stride.max()), 32)  # model stride
     names = model.module.names if hasattr(model, 'module') else model.names  # get class names

From fd6bfbbcb589806eb047528cf6a2887c507e50b3 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Fri, 22 Jul 2022 18:43:26 +0530
Subject: [PATCH 045/247] finish run

---
 utils/loggers/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index e1edf484e5e6..d04d42ef6f3e 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -339,7 +339,7 @@ def on_train_end(self, plots, epoch, masks=False):
                                    type='model',
                                    name=f'run_{self.wandb.run.id}_model',
                                    aliases=['latest', 'best', 'stripped'])
-            self.wandb.finish_run()
+            self.wandb.finish()
 
 
     def on_params_update(self):

From 29e433be3795dbefc2dc6822e0b49558746a7bb2 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Wed, 3 Aug 2022 05:38:18 +0000
Subject: [PATCH 046/247] update bias init&&update obj loss

---
 evaluator.py      |  8 ++++----
 models/yolo.py    |  2 +-
 train_instseg.py  |  2 ++
 utils/seg_loss.py | 18 +++++++++++++-----
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 27533c3048f1..74096e3f5b32 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -16,18 +16,18 @@
 import torch
 import torch.nn.functional as F
 from PIL import Image
-# import pycocotools.mask as mask_util
+import pycocotools.mask as mask_util
 from tqdm import tqdm
 
 from models.experimental import attempt_load
 from seg_dataloaders import create_dataloader
 from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, )
-from utils.general import (check_dataset, check_img_size, check_suffix, )
+from utils.general import (check_dataset, check_img_size, check_suffix)
 from utils.general import (coco80_to_coco91_class, increment_path, colorstr, )
 from utils.plots import output_to_target, plot_images_boxes_and_masks
 from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
 from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, )
-from utils.torch_utils import select_device, time_sync
+from utils.torch_utils import select_device, time_sync, de_parallel
 
 
 def save_one_txt(predn, save_conf, shape, file):
@@ -304,7 +304,7 @@ def inference(self, model, img, targets, masks=None, compute_loss=None):
         targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device)  # to pixels
         t3 = time_sync()
         out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True,
-            agnostic=self.single_cls, )
+            agnostic=self.single_cls, mask_dim=de_parallel(model).model[-1].mask_dim)
         self.dt[2] += time_sync() - t3
         return out, train_out
 
diff --git a/models/yolo.py b/models/yolo.py
index 786120b4902a..1d46726cf502 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -276,7 +276,7 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
     def _print_biases(self):
diff --git a/train_instseg.py b/train_instseg.py
index b1ea72ff5757..5f98ff839ba6 100644
--- a/train_instseg.py
+++ b/train_instseg.py
@@ -177,6 +177,8 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
             g[0].append(v.weight)
 
+    # hyp['lr0'] = hyp['lr0'] / batch_size * 128
+    # hyp['warmup_bias_lr'] = 0.01
     if opt.optimizer == 'Adam':
         optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
     elif opt.optimizer == 'AdamW':
diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index e5294a5300f7..94eebc7a0e5f 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -133,7 +133,7 @@ def loss_segment(self, preds, targets, masks):
                 if self.sort_obj_iou:
                     sort_id = torch.argsort(score_iou)
                     b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],)
-                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou  # iou ratio
+                tobj[b, a, gj, gi] = 0.5 * ((1.0 - self.gr) + self.gr * score_iou)  # iou ratio
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)
@@ -170,7 +170,13 @@ def loss_segment(self, preds, targets, masks):
                     psi = ps[index][:, 5: self.nm]
                     proto = proto_out[bi]
 
-                    batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
+                    one_lseg, iou = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
+                    batch_lseg += one_lseg
+
+                    # update tobj
+                    iou = iou.detach().clamp(0).type(tobj.dtype)
+                    tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
+
                 lseg += batch_lseg / len(b.unique())
 
             obji = self.BCEobj(pi[..., 4], tobj)
@@ -193,10 +199,12 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
         """mask loss of single pic."""
         # (80, 80, 32) @ (32, n) -> (80, 80, n)
         pred_mask = proto @ pred.tanh().T
+        # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
+        iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
         lseg = crop(lseg, xyxy)
         lseg = lseg.mean(dim=(0, 1)) / w / h
-        return lseg.mean()
+        return lseg.mean(), iou# + lseg_iou.mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
@@ -334,7 +342,7 @@ class MaskIOULoss(nn.Module):
     def __init__(self) -> None:
         super().__init__()
 
-    def forward(self, pred_mask, gt_mask, mxyxy=None):
+    def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False):
         """
         Args:
             pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n)
@@ -349,7 +357,7 @@ def forward(self, pred_mask, gt_mask, mxyxy=None):
         pred_mask = pred_mask.permute(2, 0, 1).view(n, -1)
         gt_mask = gt_mask.permute(2, 0, 1).view(n, -1)
         iou = masks_iou(pred_mask, gt_mask)
-        return 1.0 - iou
+        return iou if return_iou else (1.0 - iou)
 
 
 import math

From 005f8cd390026b40136722b6a06cc04d3177bc88 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 3 Aug 2022 14:21:13 +0530
Subject: [PATCH 047/247] log at correct steps

---
 evaluator.py              | 4 +++-
 utils/loggers/__init__.py | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/evaluator.py b/evaluator.py
index 74096e3f5b32..1db1ff28c7d0 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -94,6 +94,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=
             "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5",
             "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % (
             "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",))
+        self.step = 0
 
         # coco stuff
         self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith(
@@ -163,6 +164,7 @@ def run_training(self, model, dataloader, compute_loss=None):
 
         # Return results
         model.float()  # for training
+        self.step += 1
         return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
                 self.metric.get_maps(self.nc), t,)
 
@@ -493,7 +495,7 @@ def plot_images(self, i, img, targets, masks, out, paths):
         #    daemon=True, ).start()
         import wandb
         if wandb.run:
-            wandb.log({f"pred_{i}": wandb.Image(str(f))})
+            wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step)
 
     def nms(self, **kwargs):
         return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 14a5009ac880..92328a6b1403 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -404,8 +404,6 @@ def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots, sync
             if ni < 3:
                 f = self.save_dir / f"train_batch{ni}.jpg"  # filename
                 plot_images_and_masks(imgs, targets, masks, paths, f)
-                if self.wandb:
-                    wandb.log({"train_labels": wandb.Image(str(f))})
                 
 
 
@@ -427,4 +425,4 @@ def on_fit_epoch_end(self, vals, epoch):
             for k, v in x.items():
                 self.tb.add_scalar(k, v, epoch)
         if self.wandb:
-            wandb.log(x)
+            wandb.log(x, step=epoch)

From a5cfa79f60702d716850cbde660b45676c86b672 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 3 Aug 2022 15:00:32 +0530
Subject: [PATCH 048/247] update logger step

---
 utils/loggers/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 92328a6b1403..8e670a86b1b1 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -425,4 +425,4 @@ def on_fit_epoch_end(self, vals, epoch):
             for k, v in x.items():
                 self.tb.add_scalar(k, v, epoch)
         if self.wandb:
-            wandb.log(x, step=epoch)
+            wandb.log(x, step=epoch, commit=True)

From b89a2d65ef72e5042ad66cd0bb9419b2094a3840 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 3 Aug 2022 20:47:59 +0530
Subject: [PATCH 049/247] make compatible with torch 1.12

---
 utils/seg_loss.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/seg_loss.py b/utils/seg_loss.py
index 94eebc7a0e5f..e0618f831e63 100644
--- a/utils/seg_loss.py
+++ b/utils/seg_loss.py
@@ -294,8 +294,8 @@ def build_targets_for_masks(self, p, targets):
         ], device=targets.device, ).float() * g)  # offsets
 
         for i in range(self.nl):
-            anchors = self.anchors[i]
-            gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
 
             # Match targets to anchors
             t = targets * gain
@@ -328,7 +328,7 @@ def build_targets_for_masks(self, p, targets):
             # Append
             a = t[:, 6].long()  # anchor indices
             tidx = t[:, 7].long()
-            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
+            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class

From 146d96869cf218122fc41ba5ff7b6c006901fc1a Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Fri, 5 Aug 2022 15:59:55 +0530
Subject: [PATCH 050/247] update

---
 models/yolo.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/models/yolo.py b/models/yolo.py
index 1d46726cf502..885a1d7574c8 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -14,6 +14,8 @@
 from copy import deepcopy
 from pathlib import Path
 
+from torch import NoneType
+
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
@@ -28,6 +30,7 @@
 from utils.plots import feature_visualization
 from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
                                time_sync)
+import torch.nn.functional as F
 
 try:
     import thop  # for FLOPs computation
@@ -108,7 +111,7 @@ def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inp
             # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
             # nn.SiLU(inplace=True), 
             # nn.Upsample(scale_factor=2, mode='nearest'),
-            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            Upsample(scale_factor=2, mode='bilinear', align_corners=False),
             nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
             nn.SiLU(inplace=True),
             nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0),
@@ -376,6 +379,18 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
         ch.append(c2)
     return nn.Sequential(*layers), sorted(save)
 
+class Upsample(nn.Module):
+    '''
+    deterministic upsample layer
+    '''
+    def __init__(self, scale_factor, mode="bilinear", align_corners=False) -> None:
+        super().__init__()
+        self.scale_factor = scale_factor
+        self.mode = mode
+        self.align_corners = align_corners
+
+    def forward(self, x):
+        return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
@@ -409,4 +424,4 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
                 print(f'Error in {cfg}: {e}')
 
     else:  # report fused model summary
-        model.fuse()
+        model.fuse()
\ No newline at end of file

From 3cbaa348e23e879d3cef8da21cf9fc5449479e55 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Wed, 10 Aug 2022 18:06:33 +0800
Subject: [PATCH 051/247] clean up

---
 models/yolo.py                           |    7 +-
 requirements.txt                         |    1 +
 seg_augmentations.py                     |  287 ------
 seg_dataloaders.py                       | 1094 ----------------------
 detect_instseg.py => segment/detect.py   |    6 +-
 evaluator.py => segment/evaluator.py     |   13 +-
 train_instseg.py => segment/train.py     |   12 +-
 val_instseg.py => segment/val.py         |    0
 segment/val_new.py                       |  459 +++++++++
 utils/dataloaders.py                     |    1 +
 utils/general.py                         |    0
 utils/metrics.py                         |   12 +-
 utils/seg_metrics.py                     |  361 -------
 utils/segment/__init__.py                |    0
 utils/segment/augmentations.py           |  114 +++
 utils/segment/dataloaders.py             |  305 ++++++
 utils/{segment.py => segment/general.py} |   30 +-
 utils/{seg_loss.py => segment/loss.py}   |  230 +----
 utils/segment/metrics.py                 |  149 +++
 19 files changed, 1089 insertions(+), 1992 deletions(-)
 delete mode 100644 seg_augmentations.py
 delete mode 100644 seg_dataloaders.py
 rename detect_instseg.py => segment/detect.py (98%)
 rename evaluator.py => segment/evaluator.py (98%)
 rename train_instseg.py => segment/train.py (98%)
 rename val_instseg.py => segment/val.py (100%)
 create mode 100644 segment/val_new.py
 mode change 100755 => 100644 utils/dataloaders.py
 mode change 100755 => 100644 utils/general.py
 delete mode 100644 utils/seg_metrics.py
 create mode 100644 utils/segment/__init__.py
 create mode 100644 utils/segment/augmentations.py
 create mode 100644 utils/segment/dataloaders.py
 rename utils/{segment.py => segment/general.py} (89%)
 rename utils/{seg_loss.py => segment/loss.py} (55%)
 create mode 100644 utils/segment/metrics.py

diff --git a/models/yolo.py b/models/yolo.py
index 885a1d7574c8..cd9248e7c8c2 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -279,7 +279,10 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            if hasattr(m, "mask_dim"):
+                b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            else:
+                b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
     def _print_biases(self):
@@ -424,4 +427,4 @@ def forward(self, x):
                 print(f'Error in {cfg}: {e}')
 
     else:  # report fused model summary
-        model.fuse()
\ No newline at end of file
+        model.fuse()
diff --git a/requirements.txt b/requirements.txt
index 6313cecee578..8e5720ac50aa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,6 +12,7 @@ scipy>=1.4.1
 torch>=1.7.0
 torchvision>=0.8.1
 tqdm>=4.64.0
+easydict>=1.9
 protobuf<=3.20.1  # https://github.com/ultralytics/yolov5/issues/8012
 
 # Logging -------------------------------------
diff --git a/seg_augmentations.py b/seg_augmentations.py
deleted file mode 100644
index 409e021772b3..000000000000
--- a/seg_augmentations.py
+++ /dev/null
@@ -1,287 +0,0 @@
-# TODO: Move to utils, merge with augmentations.py
-
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Image augmentation functions
-"""
-
-import logging
-import math
-import random
-
-import cv2
-import numpy as np
-
-from utils.general import colorstr, check_version
-from utils.seg_metrics import bbox_ioa
-from utils.segment import segment2box, resample_segments
-
-
-class Albumentations:
-    # YOLOv5 Albumentations class (optional, only used if package is installed)
-    def __init__(self):
-        self.transform = None
-        try:
-            import albumentations as A
-
-            check_version(A.__version__, "1.0.3")  # version requirement
-
-            self.transform = A.Compose([A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01),
-                A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0), ],
-                bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), )
-
-            logging.info(colorstr("albumentations: ") + ", ".join(f"{x}" for x in self.transform.transforms if x.p))
-        except ImportError:  # package not installed, skip
-            pass
-        except Exception as e:
-            logging.info(colorstr("albumentations: ") + f"{e}")
-
-    def __call__(self, im, labels, p=1.0):
-        if self.transform and random.random() < p:
-            new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformed
-            im, labels = new["image"], np.array([[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])])
-        return im, labels
-
-
-def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
-    # HSV color-space augmentation
-    if hgain or sgain or vgain:
-        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
-        hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
-        dtype = im.dtype  # uint8
-
-        x = np.arange(0, 256, dtype=r.dtype)
-        lut_hue = ((x * r[0]) % 180).astype(dtype)
-        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
-        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
-
-        im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
-        cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
-
-
-def hist_equalize(im, clahe=True, bgr=False):
-    # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
-    yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
-    if clahe:
-        c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-        yuv[:, :, 0] = c.apply(yuv[:, :, 0])
-    else:
-        yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogram
-    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB)  # convert YUV image to RGB
-
-
-def replicate(im, labels):
-    # Replicate labels
-    h, w = im.shape[:2]
-    boxes = labels[:, 1:].astype(int)
-    x1, y1, x2, y2 = boxes.T
-    s = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)
-    for i in s.argsort()[: round(s.size * 0.5)]:  # smallest indices
-        x1b, y1b, x2b, y2b = boxes[i]
-        bh, bw = y2b - y1b, x2b - x1b
-        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
-        x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
-        im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]  # im4[ymin:ymax, xmin:xmax]
-        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
-
-    return im, labels
-
-
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32,
-        center=True,  # center padding or left top padding
-):
-    # Resize and pad image while meeting stride-multiple constraints
-    shape = im.shape[:2]  # current shape [height, width]
-    if isinstance(new_shape, int):
-        new_shape = (new_shape, new_shape)
-
-    # Scale ratio (new / old)
-    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
-    if not scaleup:  # only scale down, do not scale up (for better val mAP)
-        r = min(r, 1.0)
-
-    # Compute padding
-    ratio = r, r  # width, height ratios
-    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
-    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
-    if auto:  # minimum rectangle
-        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
-    elif scaleFill:  # stretch
-        dw, dh = 0.0, 0.0
-        new_unpad = (new_shape[1], new_shape[0])
-        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
-
-    if center:
-        dw /= 2  # divide padding into 2 sides
-        dh /= 2
-
-    if shape[::-1] != new_unpad:  # resize
-        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
-    top, bottom = int(round(dh - 0.1)) if center else 0, int(round(dh + 0.1))
-    left, right = int(round(dw - 0.1)) if center else 0, int(round(dw + 0.1))
-    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
-    return im, ratio, (dw, dh)
-
-
-def random_perspective(im, targets=(), segments=(), degrees=10, translate=0.1, scale=0.1, shear=10, perspective=0.0,
-        border=(0, 0), area_thr=0.2, return_seg=False, ):
-    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
-    # targets = [cls, xyxy]
-
-    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
-    width = im.shape[1] + border[1] * 2
-
-    # Center
-    C = np.eye(3)
-    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
-    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
-
-    # Perspective
-    P = np.eye(3)
-    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
-    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
-
-    # Rotation and Scale
-    R = np.eye(3)
-    a = random.uniform(-degrees, degrees)
-    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
-    s = random.uniform(1 - scale, 1 + scale)
-    # s = 2 ** random.uniform(-scale, scale)
-    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
-
-    # Shear
-    S = np.eye(3)
-    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
-    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
-
-    # Translation
-    T = np.eye(3)
-    T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width)  # x translation (pixels)
-    T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height)  # y translation (pixels)
-
-    # Combined rotation matrix
-    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
-    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
-        if perspective:
-            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
-        else:  # affine
-            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
-
-    # Visualize
-    # import matplotlib.pyplot as plt
-    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
-    # ax[0].imshow(im[:, :, ::-1])  # base
-    # ax[1].imshow(im2[:, :, ::-1])  # warped
-
-    # Transform label coordinates
-    n = len(targets)
-    new_segments = []
-    if n:
-        use_segments = any(x.any() for x in segments)
-        new = np.zeros((n, 4))
-        if use_segments:  # warp segments
-            segments = resample_segments(segments)  # upsample
-            for i, segment in enumerate(segments):
-                xy = np.ones((len(segment), 3))
-                xy[:, :2] = segment
-                xy = xy @ M.T  # transform
-                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
-
-                # clip
-                new[i] = segment2box(xy, width, height)
-                new_segments.append(xy)
-
-        else:  # warp boxes
-            xy = np.ones((n * 4, 3))
-            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
-            xy = xy @ M.T  # transform
-            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
-
-            # create new boxes
-            x = xy[:, [0, 2, 4, 6]]
-            y = xy[:, [1, 3, 5, 7]]
-            new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T)
-
-            # clip
-            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
-            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
-
-        # filter candidates
-        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, cls=targets[:, 0],
-            # area_thr=0.01 if use_segments else 0.10,
-            area_thr=area_thr, )
-        targets = targets[i]
-        targets[:, 1:5] = new[i]
-        new_segments = (np.array(new_segments)[i] if len(new_segments) else np.array(new_segments))
-
-    return (im, targets, new_segments) if return_seg else (im, targets)
-
-
-def copy_paste(im, labels, segments, p=0.5):
-    # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
-    n = len(segments)
-    if p and n:
-        h, w, c = im.shape  # height, width, channels
-        im_new = np.zeros(im.shape, np.uint8)
-        for j in random.sample(range(n), k=round(p * n)):
-            l, s = labels[j], segments[j]
-            box = w - l[3], l[2], w - l[1], l[4]
-            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
-            if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
-                labels = np.concatenate((labels, [[l[0], *box]]), 0)
-                segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
-                cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED, )
-
-        result = cv2.bitwise_and(src1=im, src2=im_new)
-        result = cv2.flip(result, 1)  # augment segments (flip left-right)
-        i = result > 0  # pixels to replace
-        # i[:, :] = result.max(2).reshape(h, w, 1)  # act over ch
-        im[i] = result[i]  # cv2.imwrite('debug.jpg', im)  # debug
-
-    return im, labels, segments
-
-
-def cutout(im, labels, p=0.5):
-    # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
-    if random.random() < p:
-        h, w = im.shape[:2]
-        scales = ([0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16)  # image size fraction
-        for s in scales:
-            mask_h = random.randint(1, int(h * s))  # create random masks
-            mask_w = random.randint(1, int(w * s))
-
-            # box
-            xmin = max(0, random.randint(0, w) - mask_w // 2)
-            ymin = max(0, random.randint(0, h) - mask_h // 2)
-            xmax = min(w, xmin + mask_w)
-            ymax = min(h, ymin + mask_h)
-
-            # apply random color mask
-            im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
-
-            # return unobscured labels
-            if len(labels) and s > 0.03:
-                box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
-                ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
-                labels = labels[ioa < 0.60]  # remove >60% obscured labels
-
-    return labels
-
-
-def mixup(im, labels, im2, labels2):
-    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
-    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
-    im = (im * r + im2 * (1 - r)).astype(np.uint8)
-    labels = np.concatenate((labels, labels2), 0)
-    return im, labels
-
-
-def box_candidates(box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
-    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
-    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
-    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
-    area_thr = (np.array(area_thr)[cls.astype(np.int)] if isinstance(area_thr, list) else area_thr)
-    if isinstance(area_thr, list) and len(area_thr) == 1:
-        area_thr = area_thr[0]
-    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
-    return ((w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr))  # candidates
diff --git a/seg_dataloaders.py b/seg_dataloaders.py
deleted file mode 100644
index 4d74bb00c1a9..000000000000
--- a/seg_dataloaders.py
+++ /dev/null
@@ -1,1094 +0,0 @@
-## TODO: Move to utils, merge with dataloaders.py
-
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Dataloaders
-"""
-
-import json
-import logging
-import time
-import numpy as np
-from functools import wraps
-from itertools import repeat
-from multiprocessing.pool import ThreadPool, Pool
-from pathlib import Path
-from zipfile import ZipFile
-from PIL import Image
-from tqdm import tqdm
-
-import torch.nn.functional as F
-import yaml
-from torch.utils.data import Dataset as torchDataset
-from torch.utils.data import distributed
-from torch.utils.data.sampler import BatchSampler as torchBatchSampler
-from torch.utils.data.sampler import RandomSampler
-
-from seg_augmentations import (Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, )
-from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy
-from utils.torch_utils import torch_distributed_zero_first
-
-
-class _RepeatSampler:
-    """ Sampler that repeats forever
-
-    Args:
-        sampler (Sampler)
-    """
-
-    def __init__(self, sampler):
-        self.sampler = sampler
-
-    def __iter__(self):
-        while True:
-            yield from iter(self.sampler)
-
-
-class YoloBatchSampler(torchBatchSampler):
-    """
-    This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.
-    It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
-    but it will turn on/off the mosaic aug.
-    """
-
-    def __init__(self, *args, augment=True, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.augment = augment
-
-    def __iter__(self):
-        for batch in super().__iter__():
-            yield [(self.augment, idx) for idx in batch]
-
-
-def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
-        rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, 
-        area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False):
-    if rect and shuffle:
-        print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False")
-        shuffle = False
-    data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels
-    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
-    with torch_distributed_zero_first(rank):
-        dataset = data_load(path, imgsz, batch_size, augment=augment,  # augment images
-            hyp=hyp,  # augmentation hyperparameters
-            rect=rect,  # rectangular training
-            cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights,
-            prefix=prefix, area_thr=area_thr, )
-        if mask_head:
-            dataset.downsample_ratio = mask_downsample_ratio
-            dataset.overlap = overlap_mask
-
-    batch_size = min(batch_size, len(dataset))
-    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers
-    # sampler = InfiniteSampler(len(dataset), seed=0)
-    sampler = (distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else RandomSampler(dataset))
-
-    batch_sampler = (YoloBatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False,
-        augment=augment, ) if not rect else None)
-    dataloader = DataLoader(dataset, num_workers=nw, batch_size=1 if batch_sampler is not None else batch_size,
-        # batch-size and batch-sampler is exclusion
-        batch_sampler=batch_sampler, pin_memory=True,
-        collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn,
-    )
-    return dataloader, dataset
-
-
-class Dataset(torchDataset):
-    """This class is a subclass of the base :class:`torch.utils.data.Dataset`,
-    that enables on the fly resizing of the ``input_dim``.
-
-    Args:
-        input_dimension (tuple): (width,height) tuple with default dimensions of the network
-    """
-
-    def __init__(self, augment=True):
-        super().__init__()
-        self.augment = augment
-
-    @staticmethod
-    def mosaic_getitem(getitem_fn):
-        """
-        Decorator method that needs to be used around the ``__getitem__`` method. |br|
-        This decorator enables the closing mosaic
-
-        Example:
-            >>> class CustomSet(ln.data.Dataset):
-            ...     def __len__(self):
-            ...         return 10
-            ...     @ln.data.Dataset.mosaic_getitem
-            ...     def __getitem__(self, index):
-            ...         return self.enable_mosaic
-        """
-
-        @wraps(getitem_fn)
-        def wrapper(self, index):
-            if not isinstance(index, int):
-                self.augment = index[0]
-                index = index[1]
-
-            ret_val = getitem_fn(self, index)
-
-            return ret_val
-
-        return wrapper
-
-
-class LoadImagesAndLabels(Dataset):
-    # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
-    cache_version = 0.6  # dataset labels *.cache version
-
-    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-            cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", area_thr=0.2, ):
-        super().__init__(augment=augment)
-        self.img_size = img_size
-        self.hyp = hyp
-        self.image_weights = image_weights
-        self.rect = False if image_weights else rect
-        self.mosaic = (self.augment and not self.rect)  # load 4 images at a time into a mosaic (only during training)
-        self.mosaic_border = [-img_size // 2, -img_size // 2]
-        self.stride = stride
-        self.path = path
-        self.albumentations = Albumentations() if augment else None
-
-        # additional feature
-        self.area_thr = area_thr
-
-        p = Path(path)  # os-agnostic
-        self.img_files = self.get_img_files(p, prefix)
-        self.label_files = img2label_paths(self.img_files)  # labels
-        # Check cache
-        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache")
-        labels, shapes, segments, img_files, label_files = self.load_cache(cache_path, prefix)
-
-        self.segments = segments
-        self.labels = list(labels)
-        self.shapes = np.array(shapes, dtype=np.float64)
-        self.img_files = img_files  # update
-        self.label_files = label_files  # update
-
-        num_imgs = len(shapes)  # number of images
-        batch_index = np.floor(np.arange(num_imgs) / batch_size).astype(np.int)  # batch index
-        self.batch_index = batch_index  # batch index of image
-        self.num_imgs = num_imgs
-        self.indices = range(num_imgs)
-
-        # Update labels
-        for i, (_, segment) in enumerate(zip(self.labels, self.segments)):
-            if single_cls:  # single-class training, merge all classes into 0
-                self.labels[i][:, 0] = 0
-                if segment:
-                    self.segments[i][:, 0] = 0
-
-        # Rectangular Training
-        if self.rect:
-            num_batches = batch_index[-1] + 1  # number of batches
-            self.update_rect(num_batches, pad)
-
-        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
-        self.imgs, self.img_npy = [None] * num_imgs, [None] * num_imgs
-        if cache_images:
-            self.cache_images(cache_images, prefix)
-
-    def cache_images(self, cache_images, prefix):
-        """Cache images to disk or ram for faster speed."""
-        if cache_images == "disk":
-            self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + "_npy")
-            self.img_npy = [self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files]
-            self.im_cache_dir.mkdir(parents=True, exist_ok=True)
-        gb = 0  # Gigabytes of cached images
-        self.img_hw0, self.img_hw = [None] * self.num_imgs, [None] * self.num_imgs
-        results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs)))
-        pbar = tqdm(enumerate(results), total=self.num_imgs)
-        for i, x in pbar:
-            if cache_images == "disk":
-                if not self.img_npy[i].exists():
-                    np.save(self.img_npy[i].as_posix(), x[0])
-                gb += self.img_npy[i].stat().st_size
-            else:
-                (self.imgs[i], self.img_hw0[i], self.img_hw[i],) = x  # im, hw_orig, hw_resized = load_image(self, i)
-                gb += self.imgs[i].nbytes
-            pbar.desc = f"{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})"
-        pbar.close()
-
-    def get_img_files(self, p, prefix):
-        """Read image files."""
-        try:
-            f = []  # image files
-            if p.is_dir():  # dir
-                f += glob.glob(str(p / "**" / "*.*"), recursive=True)  # f = list(p.rglob('*.*'))  # pathlib
-            elif p.is_file():  # file
-                with open(p, "r") as t:
-                    t = t.read().strip().splitlines()
-                    parent = str(p.parent) + os.sep
-                    f += [x.replace("./", parent) if x.startswith("./") else x for x in
-                        t]  # local to global path  # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
-            else:
-                raise Exception(f"{prefix}{p} does not exist")
-            img_files = sorted([x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS])
-            # img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
-            assert img_files, f"{prefix}No images found"
-        except Exception as e:
-            raise Exception(f"{prefix}Error loading data from {str(p)}: {e}\nSee {HELP_URL}")
-        return img_files
-
-    def load_cache(self, cache_path, prefix):
-        """Load labels from *.cache file."""
-        try:
-            cache, exists = (np.load(cache_path, allow_pickle=True).item(), True,)  # load dict
-            assert cache["version"] == self.cache_version  # same version
-            assert cache["hash"] == get_hash(self.label_files + self.img_files)  # same hash
-        except:
-            cache, exists = self.cache_labels(cache_path, prefix), False  # cache
-
-        # Display cache
-        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupted, total
-        if exists:
-            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
-            tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
-            if cache["msgs"]:
-                logging.info("\n".join(cache["msgs"]))  # display warnings
-        assert (
-                nf > 0 or not self.augment), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}"
-
-        # Read cache
-        [cache.pop(k) for k in ("hash", "version", "msgs")]  # remove items
-        labels, shapes, segments = zip(*cache.values())
-        img_files = list(cache.keys())  # update
-        label_files = img2label_paths(cache.keys())  # update
-        return labels, shapes, segments, img_files, label_files
-
-    def update_rect(self, num_batches, pad):
-        """Update attr if rect is True."""
-        # Sort by aspect ratio
-        s = self.shapes  # wh
-        ar = s[:, 1] / s[:, 0]  # aspect ratio
-        irect = ar.argsort()
-        self.img_files = [self.img_files[i] for i in irect]
-        self.label_files = [self.label_files[i] for i in irect]
-        self.labels = [self.labels[i] for i in irect]
-        self.segments = [self.segments[i] for i in irect]
-        self.shapes = s[irect]  # wh
-        ar = ar[irect]
-
-        # Set training image shapes
-        shapes = [[1, 1]] * num_batches
-        for i in range(num_batches):
-            ari = ar[self.batch_index == i]
-            mini, maxi = ari.min(), ari.max()
-            if maxi < 1:
-                shapes[i] = [maxi, 1]
-            elif mini > 1:
-                shapes[i] = [1, 1 / mini]
-
-        self.batch_shapes = (np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride)
-
-    def cache_labels(self, path=Path("./labels.cache"), prefix=""):
-        """Cache labels to *.cache file if there is no *.cache file in local."""
-        # Cache dataset labels, check images and read shapes
-        x = {}  # dict
-        nm, nf, ne, nc, msgs = (0, 0, 0, 0, [],)  # number missing, found, empty, corrupt, messages
-        desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
-
-        with Pool(NUM_THREADS) as pool:
-            pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix)), ),
-                desc=desc, total=len(self.img_files), )
-            for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
-                nm += nm_f
-                nf += nf_f
-                ne += ne_f
-                nc += nc_f
-                if im_file:
-                    x[im_file] = [l, shape, segments]
-                if msg:
-                    msgs.append(msg)
-                pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
-
-        pbar.close()
-        if msgs:
-            logging.info("\n".join(msgs))
-        if nf == 0:
-            logging.info(f"{prefix}WARNING: No labels found in {path}. See {HELP_URL}")
-        x["hash"] = get_hash(self.label_files + self.img_files)
-        x["results"] = nf, nm, ne, nc, len(self.img_files)
-        x["msgs"] = msgs  # warnings
-        x["version"] = self.cache_version  # cache version
-        try:
-            np.save(path, x)  # save cache for next time
-            path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
-            logging.info(f"{prefix}New cache created: {path}")
-        except Exception as e:
-            logging.info(f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}")  # path not writeable
-        return x
-
-    def __len__(self):
-        return len(self.img_files)
-
-    # def __iter__(self):
-    #     self.count = -1
-    #     print('ran dataset iter')
-    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
-    #     return self
-
-    @Dataset.mosaic_getitem
-    def __getitem__(self, index):
-        index = self.indices[index]  # linear, shuffled, or image_weights
-
-        hyp = self.hyp
-        self.mosaic = self.augment and not self.rect
-        mosaic = self.mosaic and random.random() < hyp["mosaic"]
-        if mosaic:
-            # Load mosaic
-            img, labels = load_mosaic(self, index)
-            shapes = None
-
-            # MixUp augmentation
-            if random.random() < hyp["mixup"]:
-                img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1)))
-
-        else:
-            # Load image
-            img, (h0, w0), (h, w) = load_image(self, index)
-
-            # Letterbox
-            shape = (
-                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size)  # final letterboxed shape
-            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
-            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
-
-            labels = self.labels[index].copy()
-            if labels.size:  # normalized xywh to pixel xyxy format
-                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
-
-            if self.augment:
-                img, labels = random_perspective(img, labels, degrees=hyp["degrees"], translate=hyp["translate"],
-                    scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], )
-
-        nl = len(labels)  # number of labels
-        if nl:
-            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
-
-        if self.augment:
-            # Albumentations
-            img, labels = self.albumentations(img, labels)
-            nl = len(labels)  # update after albumentations
-
-            # HSV color-space
-            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
-
-            # Flip up-down
-            if random.random() < hyp["flipud"]:
-                img = np.flipud(img)
-                if nl:
-                    labels[:, 2] = 1 - labels[:, 2]
-
-            # Flip left-right
-            if random.random() < hyp["fliplr"]:
-                img = np.fliplr(img)
-                if nl:
-                    labels[:, 1] = 1 - labels[:, 1]
-
-            # Cutouts  # labels = cutout(img, labels, p=0.5)
-
-        labels_out = torch.zeros((nl, 6))
-        if nl:
-            labels_out[:, 1:] = torch.from_numpy(labels)
-
-        # Convert
-        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-        img = np.ascontiguousarray(img)
-
-        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
-
-    @staticmethod
-    def collate_fn(batch):
-        img, label, path, shapes = zip(*batch)  # transposed
-        for i, l in enumerate(label):
-            l[:, 0] = i  # add target image index for build_targets()
-        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, None
-
-    @staticmethod
-    def collate_fn4(batch):
-        img, label, path, shapes = zip(*batch)  # transposed
-        n = len(shapes) // 4
-        img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
-
-        ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
-        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
-        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale
-        for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
-            i *= 4
-            if random.random() < 0.5:
-                im = \
-                F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode="bilinear", align_corners=False, )[
-                    0].type(img[i].type())
-                l = label[i]
-            else:
-                im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1),), 2, )
-                l = (torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo,), 0, ) * s)
-            img4.append(im)
-            label4.append(l)
-
-        for i, l in enumerate(label4):
-            l[:, 0] = i  # add target image index for build_targets()
-
-        return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
-
-
-class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
-    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-            cache_images=False, single_cls=False, stride=32, pad=0, prefix="", area_thr=0.2,
-            downsample_ratio=1, overlap=False,
-    ):
-        super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
-            stride, pad, prefix, area_thr, )
-        self.downsample_ratio = downsample_ratio
-        self.overlap = overlap
-
-    @Dataset.mosaic_getitem
-    def __getitem__(self, index):
-        index = self.indices[index]  # linear, shuffled, or image_weights
-
-        hyp = self.hyp
-        self.mosaic = self.augment and not self.rect
-        mosaic = self.mosaic and random.random() < hyp["mosaic"]
-        masks = []
-        if mosaic:
-            # Load mosaic
-            img, labels, segments = load_mosaic(self, index, return_seg=True)
-            shapes = None
-
-            # TODO: Mixup not support segment for now
-            # MixUp augmentation
-            if random.random() < hyp["mixup"]:
-                img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1)))
-
-        else:
-            # Load image
-            img, (h0, w0), (h, w) = load_image(self, index)
-
-            # Letterbox
-            shape = (
-                self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size)  # final letterboxed shape
-            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
-            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
-
-            labels = self.labels[index].copy()
-            # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
-            segments = self.segments[index].copy()
-            # TODO
-            if len(segments):
-                for i_s in range(len(segments)):
-                    segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], )
-            if labels.size:  # normalized xywh to pixel xyxy format
-                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
-
-            if self.augment:
-                img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"],
-                    translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"],
-                    return_seg=True, )
-
-        nl = len(labels)  # number of labels
-        if nl:
-            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
-            if self.overlap:
-                masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, 
-                        downsample_ratio=self.downsample_ratio)
-                masks = masks[None]  # (640, 640) -> (1, 640, 640)
-                labels = labels[sorted_idx]
-            else:
-                masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
-
-        masks = (torch.from_numpy(masks) if len(masks) else 
-                torch.zeros(1 if self.overlap else nl, 
-                    img.shape[0] // self.downsample_ratio,
-                    img.shape[1] // self.downsample_ratio))
-        # TODO: albumentations support
-        if self.augment:
-            # Albumentations
-            # there are some augmentation that won't change boxes and masks,
-            # so just be it for now.
-            img, labels = self.albumentations(img, labels)
-            nl = len(labels)  # update after albumentations
-
-            # HSV color-space
-            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
-
-            # Flip up-down
-            if random.random() < hyp["flipud"]:
-                img = np.flipud(img)
-                if nl:
-                    labels[:, 2] = 1 - labels[:, 2]
-                    masks = torch.flip(masks, dims=[1])
-
-            # Flip left-right
-            if random.random() < hyp["fliplr"]:
-                img = np.fliplr(img)
-                if nl:
-                    labels[:, 1] = 1 - labels[:, 1]
-                    masks = torch.flip(masks, dims=[2])
-
-            # Cutouts  # labels = cutout(img, labels, p=0.5)
-
-        labels_out = torch.zeros((nl, 6))
-        if nl:
-            labels_out[:, 1:] = torch.from_numpy(labels)
-
-        # Convert
-        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-        img = np.ascontiguousarray(img)
-
-        return (torch.from_numpy(img), labels_out, self.img_files[index], shapes, masks)
-
-    @staticmethod
-    def collate_fn(batch):
-        img, label, path, shapes, masks = zip(*batch)  # transposed
-        batched_masks = torch.cat(masks, 0)
-        # print(batched_masks.shape)
-        # print('batched_masks:', (batched_masks > 0).sum())
-        for i, l in enumerate(label):
-            l[:, 0] = i  # add target image index for build_targets()
-        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
-
-
-# Ancillary functions --------------------------------------------------------------------------------------------------
-def load_image(self, i):
-    # loads 1 image from dataset index 'i', returns im, original hw, resized hw
-    im = self.imgs[i]
-    if im is None:  # not cached in ram
-        npy = self.img_npy[i]
-        if npy and npy.exists():  # load npy
-            im = np.load(npy)
-        else:  # read image
-            path = self.img_files[i]
-            im = cv2.imread(path)  # BGR
-            assert im is not None, "Image Not Found " + path
-        h0, w0 = im.shape[:2]  # orig hw
-        r = self.img_size / max(h0, w0)  # ratio
-        if r != 1:  # if sizes are not equal
-            im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
-                interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR, )
-        return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
-    else:
-        return (self.imgs[i], self.img_hw0[i], self.img_hw[i],)  # im, hw_original, hw_resized
-
-
-def load_mosaic(self, index, return_seg=False):
-    # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
-    labels4, segments4 = [], []
-    s = self.img_size
-    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
-
-    # 3 additional image indices
-    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
-    for i, index in enumerate(indices):
-        # Load image
-        img, _, (h, w) = load_image(self, index)
-
-        # place img in img4
-        if i == 0:  # top left
-            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
-            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
-        elif i == 1:  # top right
-            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
-            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
-        elif i == 2:  # bottom left
-            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
-        elif i == 3:  # bottom right
-            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
-            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
-
-        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
-        padw = x1a - x1b
-        padh = y1a - y1b
-
-        labels, segments = self.labels[index].copy(), self.segments[index].copy()
-
-        if labels.size:
-            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
-            segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
-        labels4.append(labels)
-        segments4.extend(segments)
-
-    # Concat/clip labels
-    labels4 = np.concatenate(labels4, 0)
-    for x in (labels4[:, 1:], *segments4):
-        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
-    # img4, labels4 = replicate(img4, labels4)  # replicate
-
-    # Augment
-    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
-    results = random_perspective(img4, labels4, segments4, degrees=self.hyp["degrees"], translate=self.hyp["translate"],
-        scale=self.hyp["scale"], shear=self.hyp["shear"], perspective=self.hyp["perspective"],
-        border=self.mosaic_border, area_thr=self.area_thr, return_seg=return_seg, )  # border to remove
-    # return (img4, labels4, segments4) if return_seg else (img4, labels4)
-    return results
-
-
-def load_mosaic9(self, index):
-    # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
-    labels9, segments9 = [], []
-    s = self.img_size
-    indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
-    random.shuffle(indices)
-    for i, index in enumerate(indices):
-        # Load image
-        img, _, (h, w) = load_image(self, index)
-
-        # place img in img9
-        if i == 0:  # center
-            img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
-            h0, w0 = h, w
-            c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
-        elif i == 1:  # top
-            c = s, s - h, s + w, s
-        elif i == 2:  # top right
-            c = s + wp, s - h, s + wp + w, s
-        elif i == 3:  # right
-            c = s + w0, s, s + w0 + w, s + h
-        elif i == 4:  # bottom right
-            c = s + w0, s + hp, s + w0 + w, s + hp + h
-        elif i == 5:  # bottom
-            c = s + w0 - w, s + h0, s + w0, s + h0 + h
-        elif i == 6:  # bottom left
-            c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
-        elif i == 7:  # left
-            c = s - w, s + h0 - h, s, s + h0
-        elif i == 8:  # top left
-            c = s - w, s + h0 - hp - h, s, s + h0 - hp
-
-        padx, pady = c[:2]
-        x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords
-
-        # Labels
-        labels, segments = self.labels[index].copy(), self.segments[index].copy()
-        if labels.size:
-            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy format
-            segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
-        labels9.append(labels)
-        segments9.extend(segments)
-
-        # Image
-        img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
-        hp, wp = h, w  # height, width previous
-
-    # Offset
-    yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border]  # mosaic center x, y
-    img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s]
-
-    # Concat/clip labels
-    labels9 = np.concatenate(labels9, 0)
-    labels9[:, [1, 3]] -= xc
-    labels9[:, [2, 4]] -= yc
-    c = np.array([xc, yc])  # centers
-    segments9 = [x - c for x in segments9]
-
-    for x in (labels9[:, 1:], *segments9):
-        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
-    # img9, labels9 = replicate(img9, labels9)  # replicate
-
-    # Augment
-    img9, labels9 = random_perspective(img9, labels9, segments9, degrees=self.hyp["degrees"],
-        translate=self.hyp["translate"], scale=self.hyp["scale"], shear=self.hyp["shear"],
-        perspective=self.hyp["perspective"], border=self.mosaic_border, )  # border to remove
-
-    return img9, labels9
-
-
-def dataset_stats(path="coco128.yaml", autodownload=False, verbose=False, profile=False, hub=False):
-    """Return dataset statistics dictionary with images and instances counts per split per class
-    To run in parent directory: export PYTHONPATH="$PWD/yolov5"
-    Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
-    Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
-    Arguments
-        path:           Path to data.yaml or data.zip (with data.yaml inside data.zip)
-        autodownload:   Attempt to download dataset if not found locally
-        verbose:        Print stats dictionary
-    """
-
-    def round_labels(labels):
-        # Update labels to integer class and 6 decimal place floats
-        return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels]
-
-    def unzip(path):
-        # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
-        if str(path).endswith(".zip"):  # path is data.zip
-            assert Path(path).is_file(), f"Error unzipping {path}, file not found"
-            ZipFile(path).extractall(path=path.parent)  # unzip
-            dir = path.with_suffix("")  # dataset directory == zip name
-            return (True, str(dir), next(dir.rglob("*.yaml")),)  # zipped, data_dir, yaml_path
-        else:  # path is data.yaml
-            return False, None, path
-
-    def hub_ops(f, max_dim=1920):
-        # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
-        f_new = im_dir / Path(f).name  # dataset-hub image filename
-        try:  # use PIL
-            im = Image.open(f)
-            r = max_dim / max(im.height, im.width)  # ratio
-            if r < 1.0:  # image too large
-                im = im.resize((int(im.width * r), int(im.height * r)))
-            im.save(f_new, quality=75)  # save
-        except Exception as e:  # use OpenCV
-            print(f"WARNING: HUB ops PIL failure {f}: {e}")
-            im = cv2.imread(f)
-            im_height, im_width = im.shape[:2]
-            r = max_dim / max(im_height, im_width)  # ratio
-            if r < 1.0:  # image too large
-                im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_LINEAR, )
-            cv2.imwrite(str(f_new), im)
-
-    zipped, data_dir, yaml_path = unzip(Path(path))
-    with open(check_yaml(yaml_path), errors="ignore") as f:
-        data = yaml.safe_load(f)  # data dict
-        if zipped:
-            data["path"] = data_dir  # TODO: should this be dir.resolve()?
-    check_dataset(data, autodownload)  # download dataset if missing
-    hub_dir = Path(data["path"] + ("-hub" if hub else ""))
-    stats = {"nc": data["nc"], "names": data["names"]}  # statistics dictionary
-    for split in "train", "val", "test":
-        if data.get(split) is None:
-            stats[split] = None  # i.e. no test set
-            continue
-        x = []
-        dataset = LoadImagesAndLabels(data[split])  # load dataset
-        for label in tqdm(dataset.labels, total=dataset.num_imgs, desc="Statistics"):
-            x.append(np.bincount(label[:, 0].astype(int), minlength=data["nc"]))
-        x = np.array(x)  # shape(128x80)
-        stats[split] = {"instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
-            "image_stats": {"total": dataset.num_imgs, "unlabelled": int(np.all(x == 0, 1).sum()),
-                "per_class": (x > 0).sum(0).tolist(), },
-            "labels": [{str(Path(k).name): round_labels(v.tolist())} for k, v in
-                zip(dataset.img_files, dataset.labels)], }
-
-        if hub:
-            im_dir = hub_dir / "images"
-            im_dir.mkdir(parents=True, exist_ok=True)
-            for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.num_imgs,
-                    desc="HUB Ops", ):
-                pass
-
-    # Profile
-    stats_path = hub_dir / "stats.json"
-    if profile:
-        for _ in range(1):
-            file = stats_path.with_suffix(".npy")
-            t1 = time.time()
-            np.save(file, stats)
-            t2 = time.time()
-            x = np.load(file, allow_pickle=True)
-            print(f"stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write")
-
-            file = stats_path.with_suffix(".json")
-            t1 = time.time()
-            with open(file, "w") as f:
-                json.dump(stats, f)  # save stats *.json
-            t2 = time.time()
-            with open(file, "r") as f:
-                x = json.load(f)  # load hyps dict
-            print(f"stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write")
-
-    # Save, print and return
-    if hub:
-        print(f"Saving {stats_path.resolve()}...")
-        with open(stats_path, "w") as f:
-            json.dump(stats, f)  # save stats.json
-    if verbose:
-        print(json.dumps(stats, indent=2, sort_keys=False))
-    return stats
-
-
-# REFACTOR IN NEW FILE
-import os
-import glob
-import shutil
-import hashlib
-import torch
-import cv2
-import random
-from pathlib import Path
-from PIL import ImageOps, ExifTags
-from utils.segment import segments2boxes
-from utils.general import xywh2xyxy
-
-# Parameters
-HELP_URL = "https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data"
-IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo", ]  # acceptable image suffixes
-VID_FORMATS = ["mov", "avi", "mp4", "mpg", "mpeg", "m4v", "wmv", "mkv", "vdo", "flv", ]  # acceptable video suffixes
-NUM_THREADS = min(8, os.cpu_count())  # number of multiprocessing threads
-
-# Get orientation exif tag
-for orientation in ExifTags.TAGS.keys():
-    if ExifTags.TAGS[orientation] == "Orientation":
-        break
-
-
-def get_hash(paths):
-    # Returns a single hash value of a list of paths (files or dirs)
-    size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
-    h = hashlib.md5(str(size).encode())  # hash sizes
-    h.update("".join(paths).encode())  # hash paths
-    return h.hexdigest()  # return hash
-
-
-def exif_size(img):
-    # Returns exif-corrected PIL size
-    s = img.size  # (width, height)
-    try:
-        rotation = dict(img._getexif().items())[orientation]
-        if rotation == 6:  # rotation 270
-            s = (s[1], s[0])
-        elif rotation == 8:  # rotation 90
-            s = (s[1], s[0])
-    except:
-        pass
-
-    return s
-
-
-def exif_transpose(image):
-    """
-    Transpose a PIL image accordingly if it has an EXIF Orientation tag.
-    Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()
-
-    :param image: The image to transpose.
-    :return: An image.
-    """
-    exif = image.getexif()
-    orientation = exif.get(0x0112, 1)  # default 1
-    if orientation > 1:
-        method = {2: Image.FLIP_LEFT_RIGHT, 3: Image.ROTATE_180, 4: Image.FLIP_TOP_BOTTOM, 5: Image.TRANSPOSE,
-            6: Image.ROTATE_270, 7: Image.TRANSVERSE, 8: Image.ROTATE_90, }.get(orientation)
-        if method is not None:
-            image = image.transpose(method)
-            del exif[0x0112]
-            image.info["exif"] = exif.tobytes()
-    return image
-
-def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
-    """
-    Args:
-        img_size (tuple): The image size.
-        polygons (np.ndarray): [N, M], N is the number of polygons,
-            M is the number of points(Be divided by 2).
-    """
-    mask = np.zeros(img_size, dtype=np.uint8)
-    polygons = np.asarray(polygons)
-    polygons = polygons.astype(np.int32)
-    shape = polygons.shape
-    polygons = polygons.reshape(shape[0], -1, 2)
-    cv2.fillPoly(mask, polygons, color=color)
-    nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
-    # NOTE: fillPoly then resize is trying the keep the same way 
-    # of loss calculation when mask-ratio=1.
-    mask = cv2.resize(mask, (nw, nh))
-    return mask
-
-
-def polygons2masks(img_size, polygons, color, downsample_ratio=1):
-    """
-    Args:
-        img_size (tuple): The image size.
-        polygons (list[np.ndarray]): each polygon is [N, M], 
-            N is the number of polygons,
-            M is the number of points(Be divided by 2).
-    """
-    masks = []
-    for si in range(len(polygons)):
-        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color,
-                            downsample_ratio)
-        masks.append(mask)
-    return np.array(masks)
-
-
-def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
-    """Return a (640, 640) overlap mask."""
-    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
-                    dtype=np.uint8)
-    areas = []
-    ms = []
-    for si in range(len(segments)):
-        mask = polygon2mask(
-            img_size,
-            [segments[si].reshape(-1)],
-            downsample_ratio=downsample_ratio,
-            color=1,
-        )
-        ms.append(mask)
-        areas.append(mask.sum())
-    areas = np.asarray(areas)
-    index = np.argsort(-areas)
-    ms = np.array(ms)[index]
-    for i in range(len(segments)):
-        mask = ms[i] * (i + 1)
-        masks = masks + mask
-        masks = np.clip(masks, a_min=0, a_max=i + 1)
-    return masks, index
-
-
-def img2label_paths(img_paths):
-    # Define label paths as a function of image paths
-    sa, sb = (os.sep + "images" + os.sep, os.sep + "labels" + os.sep,)  # /images/, /labels/ substrings
-    return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
-
-
-def create_folder(path="./new"):
-    # Create folder
-    if os.path.exists(path):
-        shutil.rmtree(path)  # delete output folder
-    os.makedirs(path)  # make new output folder
-
-
-def flatten_recursive(path="../datasets/coco128"):
-    # Flatten a recursive directory by bringing all files to top level
-    new_path = Path(path + "_flat")
-    create_folder(new_path)
-    for file in tqdm(glob.glob(str(Path(path)) + "/**/*.*", recursive=True)):
-        shutil.copyfile(file, new_path / Path(file).name)
-
-
-def extract_boxes(path="../datasets/coco128", ):  # from utils.datasets import *; extract_boxes()
-    # Convert detection dataset into classification dataset, with one directory per class
-    path = Path(path)  # images dir
-    shutil.rmtree(path / "classifier") if (path / "classifier").is_dir() else None  # remove existing
-    files = list(path.rglob("*.*"))
-    n = len(files)  # number of files
-    for im_file in tqdm(files, total=n):
-        if im_file.suffix[1:] in IMG_FORMATS:
-            # image
-            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
-            h, w = im.shape[:2]
-
-            # labels
-            lb_file = Path(img2label_paths([str(im_file)])[0])
-            if Path(lb_file).exists():
-                with open(lb_file, "r") as f:
-                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32, )  # labels
-
-                for j, x in enumerate(lb):
-                    c = int(x[0])  # class
-                    f = ((path / "classifier") / f"{c}" / f"{path.stem}_{im_file.stem}_{j}.jpg")  # new filename
-                    if not f.parent.is_dir():
-                        f.parent.mkdir(parents=True)
-
-                    b = x[1:] * [w, h, w, h]  # box
-                    # b[2:] = b[2:].max()  # rectangle to square
-                    b[2:] = b[2:] * 1.2 + 3  # pad
-                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
-
-                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
-                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
-                    assert cv2.imwrite(str(f), im[b[1]: b[3], b[0]: b[2]]), f"box failure in {f}"
-
-
-def autosplit(path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
-    """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
-    Usage: from utils.datasets import *; autosplit()
-    Arguments
-        path:            Path to images directory
-        weights:         Train, val, test weights (list, tuple)
-        annotated_only:  Only use images with an annotated txt file
-    """
-    path = Path(path)  # images dir
-    files = sorted([x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS])  # image files only
-    n = len(files)  # number of files
-    random.seed(0)  # for reproducibility
-    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
-
-    txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt", ]  # 3 txt files
-    [(path.parent / x).unlink(missing_ok=True) for x in txt]  # remove existing
-
-    print(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only)
-    for i, img in tqdm(zip(indices, files), total=n):
-        if (not annotated_only or Path(img2label_paths([str(img)])[0]).exists()):  # check label
-            with open(path.parent / txt[i], "a") as f:
-                f.write("./" + img.relative_to(path.parent).as_posix() + "\n")  # add image to txt file
-
-
-def verify_image_label(args):
-    # Verify one image-label pair
-    im_file, lb_file, prefix = args
-    nm, nf, ne, nc, msg, segments = (0, 0, 0, 0, "", [],)  # number (missing, found, empty, corrupt), message, segments
-    try:
-        # verify images
-        im = Image.open(im_file)
-        im.verify()  # PIL verify
-        shape = exif_size(im)  # image size
-        assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
-        assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
-        if im.format.lower() in ("jpg", "jpeg"):
-            with open(im_file, "rb") as f:
-                f.seek(-2, 2)
-                if f.read() != b"\xff\xd9":  # corrupt JPEG
-                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
-                    msg = f"{prefix}WARNING: {im_file}: corrupt JPEG restored and saved"
-
-        # verify labels
-        if os.path.isfile(lb_file):
-            nf = 1  # label found
-            with open(lb_file, "r") as f:
-                l = [x.split() for x in f.read().strip().splitlines() if len(x)]
-                if any([len(x) > 6 for x in l]):  # is segment
-                    classes = np.array([x[0] for x in l], dtype=np.float32)
-                    segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
-                    l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
-                l = np.array(l, dtype=np.float32)
-            nl = len(l)
-            if nl:
-                assert (l.shape[1] == 5), f"labels require 5 columns, {l.shape[1]} columns detected"
-                assert (l >= 0).all(), f"negative label values {l[l < 0]}"
-                assert (l[:, 1:] <= 1).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}"
-                l, idx = np.unique(l, axis=0, return_index=True)  # remove duplicate rows
-                # NOTE: `np.unique` will change the order of `l`, so adjust the segments order too.
-                segments = [segments[i] for i in idx] if len(segments) > 0 else segments
-                if len(l) < nl:
-                    msg = f"{prefix}WARNING: {im_file}: {nl - len(l)} duplicate labels removed"
-            else:
-                ne = 1  # label empty
-                l = np.zeros((0, 5), dtype=np.float32)
-        else:
-            nm = 1  # label missing
-            l = np.zeros((0, 5), dtype=np.float32)
-        return im_file, l, shape, segments, nm, nf, ne, nc, msg
-    except Exception as e:
-        nc = 1
-        msg = f"{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}"
-        return [None, None, None, None, nm, nf, ne, nc, msg]
-
-
-from torch.utils.data import DataLoader as torchDataLoader
-
-
-class DataLoader(torchDataLoader):
-    """
-    Lightnet dataloader that enables on the fly resizing of the images.
-    See :class:`torch.utils.data.DataLoader` for more information on the arguments.
-    Check more on the following website:
-    https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def close_augment(self):
-        self.batch_sampler.augment = False
-
-
-class InfiniteDataLoader(torchDataLoader):
-    """Dataloader that reuses workers
-
-    Uses same syntax as vanilla DataLoader
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
-        self.iterator = super().__iter__()
-
-    def __len__(self):
-        return len(self.batch_sampler.sampler)
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield next(self.iterator)
diff --git a/detect_instseg.py b/segment/detect.py
similarity index 98%
rename from detect_instseg.py
rename to segment/detect.py
index a703f75d486b..d8e6150873f6 100644
--- a/detect_instseg.py
+++ b/segment/detect.py
@@ -169,7 +169,7 @@ def run(
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # plot masks
-                mcolors = [colors(int(cls)) for cls in det[:, 5]]
+                mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))]
                 # NOTE: this way to draw masks is faster,
                 # but the image might get blurred,
                 # from https://github.com/dbolya/yolact
@@ -180,7 +180,7 @@ def run(
                 annotator.im = img_masks
 
                 # Write results
-                for *xyxy, conf, cls in reversed(det):
+                for i, (*xyxy, conf, cls) in enumerate(det):
                     if save_txt:  # Write to file
                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                         line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
@@ -190,7 +190,7 @@ def run(
                     if save_img or save_crop or view_img:  # Add bbox to image
                         c = int(cls)  # integer class
                         label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
-                        annotator.box_label(xyxy, label, color=colors(c, True))
+                        annotator.box_label(xyxy, label, color=colors(i, True))
                     if save_crop:
                         save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
 
diff --git a/evaluator.py b/segment/evaluator.py
similarity index 98%
rename from evaluator.py
rename to segment/evaluator.py
index 1db1ff28c7d0..acf8f94e42cf 100644
--- a/evaluator.py
+++ b/segment/evaluator.py
@@ -15,18 +15,17 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from PIL import Image
 import pycocotools.mask as mask_util
 from tqdm import tqdm
 
 from models.experimental import attempt_load
-from seg_dataloaders import create_dataloader
+from utils.segment.dataloaders import create_dataloader
 from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, )
 from utils.general import (check_dataset, check_img_size, check_suffix)
 from utils.general import (coco80_to_coco91_class, increment_path, colorstr, )
 from utils.plots import output_to_target, plot_images_boxes_and_masks
-from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
-from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, )
+from utils.segment.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
+from utils.segment.general import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, )
 from utils.torch_utils import select_device, time_sync, de_parallel
 
 
@@ -493,9 +492,9 @@ def plot_images(self, i, img, targets, masks, out, paths):
         #Thread(target=plot_images_boxes_and_masks,
         #    args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),),
         #    daemon=True, ).start()
-        import wandb
-        if wandb.run:
-            wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step)
+        # import wandb
+        # if wandb.run:
+        #     wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step)
 
     def nms(self, **kwargs):
         return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
diff --git a/train_instseg.py b/segment/train.py
similarity index 98%
rename from train_instseg.py
rename to segment/train.py
index 5f98ff839ba6..1bee611e4ec1 100644
--- a/train_instseg.py
+++ b/segment/train.py
@@ -33,7 +33,7 @@
 from tqdm import tqdm
 
 FILE = Path(__file__).resolve()
-ROOT = FILE.parents[0]  # YOLOv5 root directory
+ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
@@ -44,15 +44,15 @@
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
 from utils.callbacks import Callbacks
-from seg_dataloaders import create_dataloader
+from utils.segment.dataloaders import create_dataloader
 from utils.downloads import attempt_download
 from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness,
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
                            increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
                            labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
-from utils.loggers import Loggers, NewLoggersMask
+from utils.loggers import NewLoggersMask
 from utils.loggers.wandb.wandb_utils import check_wandb_resume
-from utils.seg_loss import ComputeLoss
+from utils.segment.loss import ComputeLoss
 #from utils.metrics import fitness
 from utils.plots import plot_evolve, plot_labels
 from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
@@ -251,7 +251,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                               image_weights=opt.image_weights,
                                               quad=opt.quad,
                                               prefix=colorstr('train: '),
-                                              mask_head=True,
                                               shuffle=True,
                                               mask_downsample_ratio=mask_ratio,
                                               overlap_mask=overlap,
@@ -274,7 +273,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                        rank=-1,
                                        workers=workers * 2,
                                        pad=0.5,
-                                       mask_head=True,
                                        mask_downsample_ratio=mask_ratio,
                                        overlap_mask=overlap,
                                        prefix=colorstr('val: '))[0]
@@ -344,7 +342,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
         mloss = torch.zeros(4, device=device)  # mean losses
         if RANK != -1:
-            train_loader.batch_sampler.sampler.set_epoch(epoch)
+            train_loader.sampler.set_epoch(epoch)
         pbar = enumerate(train_loader)
         LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
         if RANK in {-1, 0}:
diff --git a/val_instseg.py b/segment/val.py
similarity index 100%
rename from val_instseg.py
rename to segment/val.py
diff --git a/segment/val_new.py b/segment/val_new.py
new file mode 100644
index 000000000000..033dec732bd8
--- /dev/null
+++ b/segment/val_new.py
@@ -0,0 +1,459 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Validate a trained YOLOv5 model accuracy on a custom dataset
+
+Usage:
+    $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640
+
+Usage - formats:
+    $ python path/to/val.py --weights yolov5s.pt                 # PyTorch
+                                      yolov5s.torchscript        # TorchScript
+                                      yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                      yolov5s.xml                # OpenVINO
+                                      yolov5s.engine             # TensorRT
+                                      yolov5s.mlmodel            # CoreML (macOS-only)
+                                      yolov5s_saved_model        # TensorFlow SavedModel
+                                      yolov5s.pb                 # TensorFlow GraphDef
+                                      yolov5s.tflite             # TensorFlow Lite
+                                      yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+"""
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+import torch.nn.functional as F
+import pycocotools.mask as mask_util
+from models.common import DetectMultiBackend
+from utils.callbacks import Callbacks
+from utils.segment.dataloaders import create_dataloader
+from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
+                           coco80_to_coco91_class, colorstr, emojis, increment_path, print_args,
+                           scale_coords, xywh2xyxy, xyxy2xywh, de_parallel)
+from utils.segment.general import non_max_suppression_masks, process_mask_upsample, mask_iou, scale_masks
+from utils.metrics import ConfusionMatrix, ap_per_class, box_iou
+from utils.segment.metrics import ap_per_class_box_and_mask, Metrics
+from utils.plots import output_to_target, plot_images, plot_val_study
+from utils.torch_utils import select_device, time_sync
+
+
+def save_one_txt(predn, save_conf, shape, file):
+    # Save one txt result
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, 'a') as f:
+            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+
+def save_one_json(predn, jdict, path, class_map, pred_masks):
+    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+
+    pred_masks = np.transpose(pred_masks, (2, 0, 1))
+    rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
+    for rle in rles:
+        rle["counts"] = rle["counts"].decode("utf-8")
+
+    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
+        pred_dict = {
+            'image_id': image_id,
+            'category_id': class_map[int(p[5])],
+            'bbox': [round(x, 3) for x in b],
+            'score': round(p[4], 5)}
+        pred_dict["segmentation"] = rles[i]
+        jdict.append(pred_dict)
+
+
+def process_batch(detections, labels, iouv):
+    """
+    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+    Arguments:
+        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (Array[M, 5]), class, x1, y1, x2, y2
+    Returns:
+        correct (Array[N, 10]), for 10 IoU levels
+    """
+    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+    iou = box_iou(labels[:, 1:], detections[:, :4])
+    correct_class = labels[:, 0:1] == detections[:, 5]
+    for i in range(len(iouv)):
+        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
+        if x[0].shape[0]:
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            correct[matches[:, 1].astype(int), i] = True
+    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+
+
+def process_batch_masks(self, predn, pred_masks, gt_masks, labels, iouv, overlap):
+    correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+    # convert masks (1, 640, 640) -> (n, 640, 640)
+    if overlap:
+        nl = len(labels)
+        index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
+        gt_masks = gt_masks.repeat(nl, 1, 1)
+        gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+
+    if gt_masks.shape[1:] != pred_masks.shape[1:]:
+        gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear",
+            align_corners=False, ).squeeze(0)
+
+    iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), )
+    x = torch.where(
+        (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
+    if x[0].shape[0]:
+        matches = (
+            torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
+        if x[0].shape[0] > 1:
+            matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+            # matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+        matches = torch.Tensor(matches).to(self.iouv.device)
+        correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv
+    return correct
+
+
+@torch.no_grad()
+def run(
+        data,
+        weights=None,  # model.pt path(s)
+        batch_size=32,  # batch size
+        imgsz=640,  # inference size (pixels)
+        conf_thres=0.001,  # confidence threshold
+        iou_thres=0.6,  # NMS IoU threshold
+        task='val',  # train, val, test, speed or study
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        workers=8,  # max dataloader workers (per RANK in DDP mode)
+        single_cls=False,  # treat as single-class dataset
+        augment=False,  # augmented inference
+        verbose=False,  # verbose output
+        save_txt=False,  # save results to *.txt
+        save_hybrid=False,  # save label+prediction hybrid results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_json=False,  # save a COCO-JSON results file
+        project=ROOT / 'runs/val',  # save to project/name
+        name='exp',  # save to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        half=True,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        model=None,
+        dataloader=None,
+        save_dir=Path(''),
+        plots=True,
+        overlap=False,
+        mask_downsample_ratio=1,
+        callbacks=Callbacks(),
+        compute_loss=None,
+):
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
+        half &= device.type != 'cpu'  # half precision only supported on CUDA
+        model.half() if half else model.float()
+    else:  # called directly
+        device = select_device(device, batch_size=batch_size)
+
+        # Directories
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+        # Load model
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half = model.fp16  # FP16 supported on limited backends with CUDA
+        if engine:
+            batch_size = model.batch_size
+        else:
+            device = model.device
+            if not (pt or jit):
+                batch_size = 1  # export.py models default to batch-size 1
+                LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
+
+        # Data
+        data = check_dataset(data)  # check
+
+    # Configure
+    model.eval()
+    cuda = device.type != 'cpu'
+    is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt')  # COCO dataset
+    nc = 1 if single_cls else int(data['nc'])  # number of classes
+    iouv = torch.linspace(0.5, 0.95, 10, device=device)  # iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+
+    # Dataloader
+    if not training:
+        if pt and not single_cls:  # check --weights are trained on --data
+            ncm = model.model.nc
+            assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
+                              f'classes). Pass correct combination of --weights and --data that are trained together.'
+        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
+        pad = 0.0 if task in ('speed', 'benchmark') else 0.5
+        rect = False if task == 'benchmark' else pt  # square inference for benchmarks
+        task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
+        dataloader = create_dataloader(data[task],
+                                       imgsz,
+                                       batch_size,
+                                       stride,
+                                       single_cls,
+                                       pad=pad,
+                                       rect=rect,
+                                       workers=workers,
+                                       prefix=colorstr(f'{task}: '),
+                                       mask_downsample_ratio=1,
+                                       overlap_mask=overlap,
+                                       mask_downsample_ratio=mask_downsample_ratio)[0]
+
+    seen = 0
+    confusion_matrix = ConfusionMatrix(nc=nc)
+    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
+    s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", 
+                                                               "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}")
+    dt = [0.0, 0.0, 0.0]
+    metrics = Metrics()
+    loss = torch.zeros(4, device=device)
+    jdict, stats = [], []
+    callbacks.run('on_val_start')
+    pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
+    for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
+        callbacks.run('on_val_batch_start')
+        t1 = time_sync()
+        if cuda:
+            im = im.to(device, non_blocking=True)
+            targets = targets.to(device)
+            masks = masks.to(device).float()
+        im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        nb, _, height, width = im.shape  # batch size, channels, height, width
+        t2 = time_sync()
+        dt[0] += t2 - t1
+
+        # Inference
+        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
+        dt[1] += time_sync() - t2
+
+        # Loss
+        if compute_loss:
+            loss += compute_loss([x.float() for x in train_out], targets, masks)[1]  # box, obj, cls
+
+        # NMS
+        targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
+        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+        t3 = time_sync()
+        out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls,
+                                        mask_dim=de_parallel(model).model[-1].mask_dim)
+        dt[2] += time_sync() - t3
+
+        # Metrics
+        for si, pred in enumerate(out):
+            labels = targets[targets[:, 0] == si, 1:]
+            midx = [si] if overlap else targets[:, 0] == si
+            gt_masks = masks[midx]
+            proto_out = train_out[1][si]
+            pred_masks = process_mask_upsample(proto_out, pred[:, 6:], pred[:, :4], 
+                            shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
+
+            nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
+            path, shape = Path(paths[si]), shapes[si][0]
+            correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            seen += 1
+
+            if npr == 0:
+                if nl:
+                    stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
+                continue
+
+            # Predictions
+            if single_cls:
+                pred[:, 5] = 0
+            predn = pred.clone()
+            scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+
+            # Evaluate
+            if nl:
+                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+                correct_bboxes = process_batch(predn, labelsn, iouv)
+                correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap)
+                if plots:
+                    confusion_matrix.process_batch(predn, labelsn)
+            stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
+
+            # Save/log
+            if save_txt:
+                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
+            if save_json:
+                pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
+                    shape, shapes[si][1])
+                save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
+            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
+
+        # Plot images
+        if plots and batch_i < 3:
+            # TODO: plot with masks
+            plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels
+            plot_images(im, output_to_target(out), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
+
+        callbacks.run('on_val_batch_end')
+
+    # Compute metrics
+    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
+    if len(stats) and stats[0].any():
+        results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
+        metrics.update(results)
+        # ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
+        # mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
+        nt = np.bincount(stats[4].astype(int), minlength=nc)  # number of targets per class
+    else:
+        nt = torch.zeros(1)
+
+    # Print results
+    pf = '%20s' + '%11i' * 2 + '%11.3g' * 8  # print format
+    LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
+
+    # Print results per class
+    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
+        for i, c in enumerate(metrics.ap_class_index):
+            LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
+
+    # Print speeds
+    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    if not training:
+        shape = (batch_size, 3, imgsz, imgsz)
+        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
+
+    # Plots
+    if plots:
+        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+        callbacks.run('on_val_end')
+
+    # Save JSON
+    if save_json and len(jdict):
+        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
+        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
+        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
+        LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
+        with open(pred_json, 'w') as f:
+            json.dump(jdict, f)
+
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            check_requirements(['pycocotools'])
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+
+            anno = COCO(anno_json)  # init annotations api
+            pred = anno.loadRes(pred_json)  # init predictions api
+            eval = COCOeval(anno, pred, 'bbox')
+            if is_coco:
+                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # image IDs to evaluate
+            eval.evaluate()
+            eval.accumulate()
+            eval.summarize()
+            # TODO: update these to metrics
+            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+        except Exception as e:
+            LOGGER.info(f'pycocotools unable to run: {e}')
+
+    # Return results
+    model.float()  # for training
+    if not training:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    return ((*metrics.mean_results(), *(loss.cpu() / len(dataloader)).tolist()),
+            metrics.get_maps(nc), t,)
+    # maps = np.zeros(nc) + map
+    # for i, c in enumerate(ap_class):
+    #     maps[c] = ap[i]
+    # return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
+    parser.add_argument('--task', default='val', help='train, val, test, speed or study')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
+    parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
+    opt = parser.parse_args()
+    opt.data = check_yaml(opt.data)  # check YAML
+    opt.save_json |= opt.data.endswith('coco.yaml')
+    opt.save_txt |= opt.save_hybrid
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
+
+    if opt.task in ('train', 'val', 'test'):  # run normally
+        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
+            LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️'))
+        run(**vars(opt))
+
+    else:
+        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
+        opt.half = True  # FP16 for fastest results
+        if opt.task == 'speed':  # speed benchmarks
+            # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
+            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
+            for opt.weights in weights:
+                run(**vars(opt), plots=False)
+
+        elif opt.task == 'study':  # speed vs mAP benchmarks
+            # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
+            for opt.weights in weights:
+                f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt'  # filename to save to
+                x, y = list(range(256, 1536 + 128, 128)), []  # x axis (image sizes), y axis
+                for opt.imgsz in x:  # img-size
+                    LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...')
+                    r, _, t = run(**vars(opt), plots=False)
+                    y.append(r + t)  # results and times
+                np.savetxt(f, y, fmt='%10.4g')  # save
+            os.system('zip -r study.zip study_*.txt')
+            plot_val_study(x=x)  # plot
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/utils/dataloaders.py b/utils/dataloaders.py
old mode 100755
new mode 100644
index 9ccfe2545d75..260fb6a97da9
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -511,6 +511,7 @@ def __init__(self,
             self.im_files = [self.im_files[i] for i in irect]
             self.label_files = [self.label_files[i] for i in irect]
             self.labels = [self.labels[i] for i in irect]
+            self.segments = [self.segments[i] for i in irect]
             self.shapes = s[irect]  # wh
             ar = ar[irect]
 
diff --git a/utils/general.py b/utils/general.py
old mode 100755
new mode 100644
diff --git a/utils/metrics.py b/utils/metrics.py
index cfdfbdb88b2c..605e692de04d 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -26,7 +26,7 @@ def smooth(y, f=0.05):
     return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed
 
 
-def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16):
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""):
     """ Compute the average precision, given the recall and precision curves.
     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
     # Arguments
@@ -81,10 +81,10 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
     names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
     names = dict(enumerate(names))  # to dict
     if plot:
-        plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
-        plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
-        plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
-        plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
+        plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
+        plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
+        plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
+        plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
 
     i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
     p, r, f1 = p[:, i], r[:, i], f1[:, i]
@@ -352,4 +352,4 @@ def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confi
     ax.set_ylim(0, 1)
     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
     fig.savefig(save_dir, dpi=250)
-    plt.close()
\ No newline at end of file
+    plt.close()
diff --git a/utils/seg_metrics.py b/utils/seg_metrics.py
deleted file mode 100644
index 9c6133118dfa..000000000000
--- a/utils/seg_metrics.py
+++ /dev/null
@@ -1,361 +0,0 @@
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Model validation metrics
-"""
-
-import math
-import warnings
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-from easydict import EasyDict as edict
-
-
-def fitness(x, masks=False):
-    # Model fitness as a weighted combination of metrics
-    if masks:
-        w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
-        return (x[:, :8] * w).sum(1)
-    w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
-    return (x[:, :4] * w).sum(1)
-
-
-def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""):
-    """Compute the average precision, given the recall and precision curves.
-    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
-    # Arguments
-        tp:  True positives (nparray, nx1 or nx10).
-        conf:  Objectness value from 0-1 (nparray).
-        pred_cls:  Predicted object classes (nparray).
-        target_cls:  True object classes (nparray).
-        plot:  Plot precision-recall curve at mAP@0.5
-        save_dir:  Plot save directory.
-        prefix: prefix.
-    # Returns
-        The average precision as computed in py-faster-rcnn.
-    """
-
-    # Sort by objectness
-    i = np.argsort(-conf)
-    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
-
-    # Find unique classes
-    unique_classes = np.unique(target_cls)
-    nc = unique_classes.shape[0]  # number of classes, number of detections
-
-    # Create Precision-Recall curve and compute AP for each class
-    px, py = np.linspace(0, 1, 1000), []  # for plotting
-    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
-    for ci, c in enumerate(unique_classes):
-        i = pred_cls == c
-        n_l = (target_cls == c).sum()  # number of labels
-        n_p = i.sum()  # number of predictions
-
-        if n_p == 0 or n_l == 0:
-            continue
-        else:
-            # Accumulate FPs and TPs
-            fpc = (1 - tp[i]).cumsum(0)
-            tpc = tp[i].cumsum(0)
-
-            # Recall
-            recall = tpc / (n_l + 1e-16)  # recall curve
-            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
-
-            # Precision
-            precision = tpc / (tpc + fpc)  # precision curve
-            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
-
-            # AP from recall-precision curve
-            for j in range(tp.shape[1]):
-                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
-                if plot and j == 0:
-                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
-
-    # Compute F1 (harmonic mean of precision and recall)
-    f1 = 2 * p * r / (p + r + 1e-16)
-    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
-    names = {i: v for i, v in enumerate(names)}  # to dict
-    if plot and save_dir is not None:
-        plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names)
-        plot_mc_curve(px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1")
-        plot_mc_curve(px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision")
-        plot_mc_curve(px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall")
-
-    i = f1.mean(0).argmax()  # max F1 index
-    return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32")
-
-
-def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ):
-    """
-    Args:
-        tp_b: tp of boxes.
-        tp_m: tp of masks.
-        other arguments see `func: ap_per_class`.
-    """
-    results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
-        prefix="Box", )
-    results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
-        prefix="Mask", )
-
-    results = edict({
-        "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3],
-            "ap_class": results_boxes[4], },
-        "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3],
-            "ap_class": results_masks[4], }, })
-    return results
-
-
-def compute_ap(recall, precision):
-    """Compute the average precision, given the recall and precision curves
-    # Arguments
-        recall:    The recall curve (list)
-        precision: The precision curve (list)
-    # Returns
-        Average precision, precision curve, recall curve
-    """
-
-    # Append sentinel values to beginning and end
-    mrec = np.concatenate(([0.0], recall, [1.0]))
-    mpre = np.concatenate(([1.0], precision, [0.0]))
-
-    # Compute the precision envelope
-    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
-
-    # Integrate area under curve
-    method = "interp"  # methods: 'continuous', 'interp'
-    if method == "interp":
-        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
-        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
-    else:  # 'continuous'
-        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
-        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
-
-    return ap, mpre, mrec
-
-
-class ConfusionMatrix:
-    # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
-    def __init__(self, nc, conf=0.25, iou_thres=0.45):
-        self.matrix = np.zeros((nc + 1, nc + 1))
-        self.nc = nc  # number of classes
-        self.conf = conf
-        self.iou_thres = iou_thres
-
-    def process_batch(self, detections, labels):
-        """
-        Return intersection-over-union (Jaccard index) of boxes.
-        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
-        Arguments:
-            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-            labels (Array[M, 5]), class, x1, y1, x2, y2
-        Returns:
-            None, updates confusion matrix accordingly
-        """
-        detections = detections[detections[:, 4] > self.conf]
-        gt_classes = labels[:, 0].int()
-        detection_classes = detections[:, 5].int()
-        iou = box_iou(labels[:, 1:], detections[:, :4])
-
-        x = torch.where(iou > self.iou_thres)
-        if x[0].shape[0]:
-            matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())
-            if x[0].shape[0] > 1:
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-        else:
-            matches = np.zeros((0, 3))
-
-        n = matches.shape[0] > 0
-        m0, m1, _ = matches.transpose().astype(np.int16)
-        for i, gc in enumerate(gt_classes):
-            j = m0 == i
-            if n and sum(j) == 1:
-                self.matrix[detection_classes[m1[j]], gc] += 1  # correct
-            else:
-                self.matrix[self.nc, gc] += 1  # background FP
-
-        if n:
-            for i, dc in enumerate(detection_classes):
-                if not any(m1 == i):
-                    self.matrix[dc, self.nc] += 1  # background FN
-
-    def matrix(self):
-        return self.matrix
-
-    def plot(self, normalize=True, save_dir="", names=()):
-        try:
-            import seaborn as sn
-
-            array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1)  # normalize columns
-            array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
-
-            fig = plt.figure(figsize=(12, 9), tight_layout=True)
-            sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
-            labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
-                sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap="Blues", fmt=".2f", square=True,
-                    xticklabels=names + ["background FP"] if labels else "auto",
-                    yticklabels=names + ["background FN"] if labels else "auto", ).set_facecolor((1, 1, 1))
-            fig.axes[0].set_xlabel("True")
-            fig.axes[0].set_ylabel("Predicted")
-            fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
-            plt.close()
-        except Exception as e:
-            print(f"WARNING: ConfusionMatrix plot failure: {e}")
-
-    def print(self):
-        for i in range(self.nc + 1):
-            print(" ".join(map(str, self.matrix[i])))
-
-
-def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
-    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
-    box2 = box2.T
-
-    # Get the coordinates of bounding boxes
-    if x1y1x2y2:  # x1, y1, x2, y2 = box1
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-    else:  # transform from xywh to xyxy
-        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
-        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
-        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
-        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
-
-    # Intersection area
-    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
-            torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
-
-    # Union Area
-    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
-    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
-    union = w1 * h1 + w2 * h2 - inter + eps
-
-    iou = inter / union
-    if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
-        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
-        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
-            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (
-                        b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
-            if DIoU:
-                return iou - rho2 / c2  # DIoU
-            elif (CIoU):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
-                with torch.no_grad():
-                    alpha = v / (v - iou + (1 + eps))
-                return iou - (rho2 / c2 + v * alpha)  # CIoU
-        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
-            c_area = cw * ch + eps  # convex area
-            return iou - (c_area - union) / c_area  # GIoU
-    else:
-        return iou  # IoU
-
-
-def box_iou(box1, box2):
-    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
-    """
-    Return intersection-over-union (Jaccard index) of boxes.
-    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
-    Arguments:
-        box1 (Tensor[N, 4])
-        box2 (Tensor[M, 4])
-    Returns:
-        iou (Tensor[N, M]): the NxM matrix containing the pairwise
-            IoU values for every element in boxes1 and boxes2
-    """
-
-    def box_area(box):
-        # box = 4xn
-        return (box[2] - box[0]) * (box[3] - box[1])
-
-    area1 = box_area(box1.T)
-    area2 = box_area(box2.T)
-
-    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    inter = ((torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2))
-    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
-
-
-def bbox_ioa(box1, box2, eps=1e-7):
-    """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
-    box1:       np.array of shape(4)
-    box2:       np.array of shape(nx4)
-    returns:    np.array of shape(n)
-    """
-
-    box2 = box2.transpose()
-
-    # Get the coordinates of bounding boxes
-    b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-    b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-
-    # Intersection area
-    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * (
-            np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
-
-    # box2 area
-    box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
-
-    # Intersection over box2 area
-    return inter_area / box2_area
-
-
-def wh_iou(wh1, wh2):
-    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
-    wh1 = wh1[:, None]  # [N,1,2]
-    wh2 = wh2[None]  # [1,M,2]
-    inter = torch.min(wh1, wh2).prod(2)  # [N,M]
-    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
-
-
-# Plots ----------------------------------------------------------------------------------------------------------------
-
-
-def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()):
-    # Precision-recall curve
-    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
-    py = np.stack(py, axis=1)
-
-    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
-        for i, y in enumerate(py.T):
-            ax.plot(px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}")  # plot(recall, precision)
-    else:
-        ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
-
-    ax.plot(px, py.mean(1), linewidth=3, color="blue", label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), )
-    ax.set_xlabel("Recall")
-    ax.set_ylabel("Precision")
-    ax.set_xlim(0, 1)
-    ax.set_ylim(0, 1)
-    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(Path(save_dir), dpi=250)
-    plt.close()
-
-
-def plot_mc_curve(px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"):
-    # Metric-confidence curve
-    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
-
-    if 0 < len(names) < 21:  # display per-class legend if < 21 classes
-        for i, y in enumerate(py):
-            ax.plot(px, y, linewidth=1, label=f"{names[i]}")  # plot(confidence, metric)
-    else:
-        ax.plot(px, py.T, linewidth=1, color="grey")  # plot(confidence, metric)
-
-    y = py.mean(0)
-    ax.plot(px, y, linewidth=3, color="blue", label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", )
-    ax.set_xlabel(xlabel)
-    ax.set_ylabel(ylabel)
-    ax.set_xlim(0, 1)
-    ax.set_ylim(0, 1)
-    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(Path(save_dir), dpi=250)
-    plt.close()
diff --git a/utils/segment/__init__.py b/utils/segment/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py
new file mode 100644
index 000000000000..be788a81ea94
--- /dev/null
+++ b/utils/segment/augmentations.py
@@ -0,0 +1,114 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Image augmentation functions
+"""
+
+import math
+import random
+
+import cv2
+import numpy as np
+
+from ..general import segment2box, resample_segments
+from ..augmentations import box_candidates
+
+
+def random_perspective(im,
+                       targets=(),
+                       segments=(),
+                       degrees=10,
+                       translate=.1,
+                       scale=.1,
+                       shear=10,
+                       perspective=0.0,
+                       border=(0, 0)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # targets = [cls, xyxy]
+
+    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
+    width = im.shape[1] + border[1] * 2
+
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
+
+    # Perspective
+    P = np.eye(3)
+    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width)  # x translation (pixels)
+    T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height)  # y translation (pixels)
+
+    # Combined rotation matrix
+    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+        if perspective:
+            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
+        else:  # affine
+            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+    # Visualize
+    # import matplotlib.pyplot as plt
+    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+    # ax[0].imshow(im[:, :, ::-1])  # base
+    # ax[1].imshow(im2[:, :, ::-1])  # warped
+
+    # Transform label coordinates
+    n = len(targets)
+    new_segments = []
+    if n:
+        use_segments = any(x.any() for x in segments)
+        new = np.zeros((n, 4))
+        if use_segments:  # warp segments
+            segments = resample_segments(segments)  # upsample
+            for i, segment in enumerate(segments):
+                xy = np.ones((len(segment), 3))
+                xy[:, :2] = segment
+                xy = xy @ M.T  # transform
+                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
+
+                # clip
+                new[i] = segment2box(xy, width, height)
+                new_segments.append(xy)
+
+        else:  # warp boxes
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T)
+
+            # clip
+            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
+            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
+
+        # filter candidates
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
+        targets = targets[i]
+        targets[:, 1:5] = new[i]
+        new_segments = np.array(new_segments)[i]
+
+    return im, targets, new_segments
+
+
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
new file mode 100644
index 000000000000..0230bcee13d2
--- /dev/null
+++ b/utils/segment/dataloaders.py
@@ -0,0 +1,305 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Dataloaders
+"""
+
+import numpy as np
+import cv2
+import random
+import os
+import torch
+
+from torch.utils.data import DataLoader 
+from torch.utils.data import distributed
+
+from ..augmentations import augment_hsv, copy_paste, letterbox, mixup
+from ..dataloaders import LoadImagesAndLabels, InfiniteDataLoader, seed_worker
+from ..general import xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER
+from ..torch_utils import torch_distributed_zero_first
+from .augmentations import random_perspective
+
+
+def create_dataloader(path,
+                      imgsz,
+                      batch_size,
+                      stride,
+                      single_cls=False,
+                      hyp=None,
+                      augment=False,
+                      cache=False,
+                      pad=0.0,
+                      rect=False,
+                      rank=-1,
+                      workers=8,
+                      image_weights=False,
+                      quad=False,
+                      prefix='',
+                      shuffle=False,
+                      mask_downsample_ratio=1, 
+                      overlap_mask=False):
+    if rect and shuffle:
+        LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
+        shuffle = False
+    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
+        dataset = LoadImagesAndLabelsAndMasks(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augmentation
+            hyp=hyp,  # hyperparameters
+            rect=rect,  # rectangular batches
+            cache_images=cache,
+            single_cls=single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix,
+            downsample_ratio=mask_downsample_ratio,
+            overlap=overlap_mask)
+
+    batch_size = min(batch_size, len(dataset))
+    nd = torch.cuda.device_count()  # number of CUDA devices
+    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
+    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
+    generator = torch.Generator()
+    generator.manual_seed(0)
+    return loader(dataset,
+                  batch_size=batch_size,
+                  shuffle=shuffle and sampler is None,
+                  num_workers=nw,
+                  sampler=sampler,
+                  pin_memory=True,
+                  collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
+                  worker_init_fn=seed_worker,
+                  generator=generator), dataset
+
+
+class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
+    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
+            cache_images=False, single_cls=False, stride=32, pad=0, prefix="",
+            downsample_ratio=1, overlap=False,
+    ):
+        super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
+            stride, pad, prefix)
+        self.downsample_ratio = downsample_ratio
+        self.overlap = overlap
+
+    def __getitem__(self, index):
+        index = self.indices[index]  # linear, shuffled, or image_weights
+
+        hyp = self.hyp
+        mosaic = self.mosaic and random.random() < hyp['mosaic']
+        masks = []
+        if mosaic:
+            # Load mosaic
+            img, labels, segments = self.load_mosaic(index)
+            shapes = None
+
+            # TODO: Mixup not support segment for now
+            # MixUp augmentation
+            if random.random() < hyp["mixup"]:
+                img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.num_imgs - 1)))
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = self.load_image(index)
+
+            # Letterbox
+            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            labels = self.labels[index].copy()
+            # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
+            segments = self.segments[index].copy()
+            if len(segments):
+                for i_s in range(len(segments)):
+                    segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], )
+            if labels.size:  # normalized xywh to pixel xyxy format
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
+
+            if self.augment:
+                img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"],
+                    translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"],
+                    return_seg=True, )
+
+        nl = len(labels)  # number of labels
+        if nl:
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
+            if self.overlap:
+                masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, 
+                        downsample_ratio=self.downsample_ratio)
+                masks = masks[None]  # (640, 640) -> (1, 640, 640)
+                labels = labels[sorted_idx]
+            else:
+                masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
+
+        masks = (torch.from_numpy(masks) if len(masks) else 
+                torch.zeros(1 if self.overlap else nl, 
+                    img.shape[0] // self.downsample_ratio,
+                    img.shape[1] // self.downsample_ratio))
+        # TODO: albumentations support
+        if self.augment:
+            # Albumentations
+            # there are some augmentation that won't change boxes and masks,
+            # so just be it for now.
+            img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
+
+            # HSV color-space
+            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
+
+            # Flip up-down
+            if random.random() < hyp["flipud"]:
+                img = np.flipud(img)
+                if nl:
+                    labels[:, 2] = 1 - labels[:, 2]
+                    masks = torch.flip(masks, dims=[1])
+
+            # Flip left-right
+            if random.random() < hyp["fliplr"]:
+                img = np.fliplr(img)
+                if nl:
+                    labels[:, 1] = 1 - labels[:, 1]
+                    masks = torch.flip(masks, dims=[2])
+
+            # Cutouts  # labels = cutout(img, labels, p=0.5)
+
+        labels_out = torch.zeros((nl, 6))
+        if nl:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+        
+        return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
+
+    def load_mosaic(self, index):
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4, segments4 = [], []
+        s = self.img_size
+        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
+
+        # 3 additional image indices
+        indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+        for i, index in enumerate(indices):
+            # Load image
+            img, _, (h, w) = self.load_image(index)
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+
+            if labels.size:
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+            labels4.append(labels)
+            segments4.extend(segments)
+
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, 1:], *segments4):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+
+        # Augment
+        img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
+        img4, labels4, segments4 = random_perspective(
+            img4,
+            labels4,
+            segments4,
+            degrees=self.hyp["degrees"],
+            translate=self.hyp["translate"],
+            scale=self.hyp["scale"],
+            shear=self.hyp["shear"],
+            perspective=self.hyp["perspective"],
+            border=self.mosaic_border)  # border to remove
+        return img4, labels4, segments4
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, shapes, masks = zip(*batch)  # transposed
+        batched_masks = torch.cat(masks, 0)
+        for i, l in enumerate(label):
+            l[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
+
+
+def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (np.ndarray): [N, M], N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    mask = np.zeros(img_size, dtype=np.uint8)
+    polygons = np.asarray(polygons)
+    polygons = polygons.astype(np.int32)
+    shape = polygons.shape
+    polygons = polygons.reshape(shape[0], -1, 2)
+    cv2.fillPoly(mask, polygons, color=color)
+    nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
+    # NOTE: fillPoly firstly then resize is trying the keep the same way 
+    # of loss calculation when mask-ratio=1.
+    mask = cv2.resize(mask, (nw, nh))
+    return mask
+
+
+def polygons2masks(img_size, polygons, color, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (list[np.ndarray]): each polygon is [N, M], 
+            N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    masks = []
+    for si in range(len(polygons)):
+        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color,
+                            downsample_ratio)
+        masks.append(mask)
+    return np.array(masks)
+
+
+def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
+    """Return a (640, 640) overlap mask."""
+    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
+                    dtype=np.uint8)
+    areas = []
+    ms = []
+    for si in range(len(segments)):
+        mask = polygon2mask(
+            img_size,
+            [segments[si].reshape(-1)],
+            downsample_ratio=downsample_ratio,
+            color=1,
+        )
+        ms.append(mask)
+        areas.append(mask.sum())
+    areas = np.asarray(areas)
+    index = np.argsort(-areas)
+    ms = np.array(ms)[index]
+    for i in range(len(segments)):
+        mask = ms[i] * (i + 1)
+        masks = masks + mask
+        masks = np.clip(masks, a_min=0, a_max=i + 1)
+    return masks, index
diff --git a/utils/segment.py b/utils/segment/general.py
similarity index 89%
rename from utils/segment.py
rename to utils/segment/general.py
index d9773784eafa..70056a4bbb31 100644
--- a/utils/segment.py
+++ b/utils/segment/general.py
@@ -6,34 +6,8 @@
 import torch.nn.functional as F
 import torchvision
 
-from .general import xyxy2xywh, xywh2xyxy
-from .seg_metrics import box_iou
-
-
-def segment2box(segment, width=640, height=640):
-    # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
-    x, y = segment.T  # segment xy
-    inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
-    x, y, = (x[inside], y[inside],)
-    return (np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)))  # xyxy
-
-
-def segments2boxes(segments):
-    # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
-    boxes = []
-    for s in segments:
-        x, y = s.T  # segment xy
-        boxes.append([x.min(), y.min(), x.max(), y.max()])  # cls, xyxy
-    return xyxy2xywh(np.array(boxes))  # cls, xywh
-
-
-def resample_segments(segments, n=1000):
-    # Up-sample an (n,2) segment
-    for i, s in enumerate(segments):
-        x = np.linspace(0, len(s) - 1, n)
-        xp = np.arange(len(s))
-        segments[i] = (np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T)  # segment xy
-    return segments
+from ..general import xywh2xyxy
+from .metrics import box_iou
 
 
 def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False,
diff --git a/utils/seg_loss.py b/utils/segment/loss.py
similarity index 55%
rename from utils/seg_loss.py
rename to utils/segment/loss.py
index e0618f831e63..47fed765f990 100644
--- a/utils/seg_loss.py
+++ b/utils/segment/loss.py
@@ -1,14 +1,33 @@
-# TODO: merge with loss.py.. Optimize speed
-
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from utils.general import xywh2xyxy
-from utils.loss import smooth_BCE, FocalLoss
-from utils.segment import masks_iou, crop
-from utils.torch_utils import is_parallel
+from ..general import xywh2xyxy
+from ..loss import smooth_BCE, FocalLoss
+from ..torch_utils import is_parallel
+from ..metrics import bbox_iou
+from .general import masks_iou, crop
 
+class MaskIOULoss(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False):
+        """
+        Args:
+            pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n)
+            gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n)
+            mxyxy (torch.Tensor): ground truth of boxes, (n, 4)
+        """
+        _, _, n = pred_mask.shape  # same as gt_mask
+        pred_mask = pred_mask.sigmoid()
+        if mxyxy is not None:
+            pred_mask = crop(pred_mask, mxyxy)
+            gt_mask = crop(gt_mask, mxyxy)
+        pred_mask = pred_mask.permute(2, 0, 1).view(n, -1)
+        gt_mask = gt_mask.permute(2, 0, 1).view(n, -1)
+        iou = masks_iou(pred_mask, gt_mask)
+        return iou if return_iou else (1.0 - iou)
 
 class ComputeLoss:
     # Compute losses
@@ -40,69 +59,9 @@ def __init__(self, model, autobalance=False, overlap=False):
             if hasattr(det, k):
                 setattr(self, k, getattr(det, k))
 
-    def __call__(self, p, targets, masks=None):  # predictions, targets, model
-        if masks is not None:
-            return self.loss_segment(p, targets, masks)
-        return self.loss_detection(p, targets)
-
-    def loss_detection(self, p, targets):
-        device = targets.device
-        lcls, lbox, lobj = (
-            torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),)
-        tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets
-
-        # Losses
-        for i, pi in enumerate(p):  # layer index, layer predictions
-            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
-            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
-
-            n = b.shape[0]  # number of targets
-            if n:
-                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
-
-                # Regression
-                pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
-                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
-                pbox = torch.cat((pxy, pwh), 1)  # predicted box
-                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
-                lbox += (1.0 - iou).mean()  # iou loss
-
-                # Objectness
-                score_iou = iou.detach().clamp(0).type(tobj.dtype)
-                if self.sort_obj_iou:
-                    sort_id = torch.argsort(score_iou)
-                    b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],)
-                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou  # iou ratio
-
-                # Classification
-                if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
-                    t[range(n), tcls[i]] = self.cp
-                    lcls += self.BCEcls(ps[:, 5:], t)  # BCE
-
-                # Append targets to text file  # with open('targets.txt', 'a') as file:  #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
-
-            obji = self.BCEobj(pi[..., 4], tobj)
-            lobj += obji * self.balance[i]  # obj loss
-            if self.autobalance:
-                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
-
-        if self.autobalance:
-            self.balance = [x / self.balance[self.ssi] for x in self.balance]
-        lbox *= self.hyp["box"]
-        lobj *= self.hyp["obj"]
-        lcls *= self.hyp["cls"]
-        bs = tobj.shape[0]  # batch size
-
-        return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
-
-    def loss_segment(self, preds, targets, masks):
-        """
-        proto_out:[batch-size, mask_dim, mask_hegiht, mask_width]
-        masks:[batch-size * num_objs, image_height, image_width]
-        每张图片objects数量不同，到时候处理时填充不足的
-        """
+    def __call__(self, preds, targets, masks):  # predictions, targets, model
         p = preds[0]
+        # [batch-size, mask_dim, mask_hegiht, mask_width]
         proto_out = preds[1]
         mask_h, mask_w = proto_out.shape[2:]
         proto_out = proto_out.permute(0, 2, 3, 1)
@@ -111,7 +70,7 @@ def loss_segment(self, preds, targets, masks):
         lcls, lbox, lobj, lseg = (
             torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),
             torch.zeros(1, device=device),)
-        tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks(p, targets)  # targets
+        tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets)  # targets
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
             b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
@@ -125,7 +84,7 @@ def loss_segment(self, preds, targets, masks):
                 pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
                 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
-                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze()  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
@@ -142,8 +101,10 @@ def loss_segment(self, preds, targets, masks):
                     lcls += self.BCEcls(ps[:, self.nm:], t)  # BCE
 
                 # Mask Regression
+                # TODO:
+                # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w]
                 downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), mode="bilinear",
-                    align_corners=False, ).squeeze(0)
+                    align_corners=False).squeeze(0)
 
                 mxywh = xywh[i]
                 mws, mhs = mxywh[:, 2:].T
@@ -196,7 +157,7 @@ def loss_segment(self, preds, targets, masks):
         return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
 
     def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
-        """mask loss of single pic."""
+        """mask loss of one single pic."""
         # (80, 80, 32) @ (32, n) -> (80, 80, n)
         pred_mask = proto @ pred.tanh().T
         # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
@@ -207,61 +168,6 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
         return lseg.mean(), iou# + lseg_iou.mean()
 
     def build_targets(self, p, targets):
-        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
-        na, nt = self.na, targets.shape[0]  # number of anchors, targets
-        tcls, tbox, indices, anch = [], [], [], []
-        gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
-        ai = (
-            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt))  # same as .repeat_interleave(nt)
-        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
-
-        g = 0.5  # bias
-        off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
-            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-        ], device=targets.device, ).float() * g)  # offsets
-
-        for i in range(self.nl):
-            anchors, shape = self.anchors[i], p[i].shape
-            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
-
-            # Match targets to anchors
-            t = targets * gain
-            if nt:
-                # Matches
-                r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
-                j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"]  # compare
-                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
-                t = t[j]  # filter
-
-                # Offsets
-                gxy = t[:, 2:4]  # grid xy
-                gxi = gain[[2, 3]] - gxy  # inverse
-                j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T
-                l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T
-                j = torch.stack((torch.ones_like(j), j, k, l, m))
-                t = t.repeat((5, 1, 1))[j]
-                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
-            else:
-                t = targets[0]
-                offsets = 0
-
-            # Define
-            b, c = t[:, :2].long().T  # image, class
-            gxy = t[:, 2:4]  # grid xy
-            gwh = t[:, 4:6]  # grid wh
-            gij = (gxy - offsets).long()
-            gi, gj = gij.T  # grid xy indices
-
-            # Append
-            a = t[:, 6].long()  # anchor indices
-            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
-            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
-            anch.append(anchors[a])  # anchors
-            tcls.append(c)  # class
-
-        return tcls, tbox, indices, anch
-
-    def build_targets_for_masks(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], []
@@ -336,73 +242,3 @@ def build_targets_for_masks(self, p, targets):
             xywh.append(torch.cat((gxy, gwh), 1))
 
         return tcls, tbox, indices, anch, tidxs, xywh
-
-
-class MaskIOULoss(nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-
-    def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False):
-        """
-        Args:
-            pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n)
-            gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n)
-            mxyxy (torch.Tensor): ground truth of boxes, (n, 4)
-        """
-        _, _, n = pred_mask.shape  # same as gt_mask
-        pred_mask = pred_mask.sigmoid()
-        if mxyxy is not None:
-            pred_mask = crop(pred_mask, mxyxy)
-            gt_mask = crop(gt_mask, mxyxy)
-        pred_mask = pred_mask.permute(2, 0, 1).view(n, -1)
-        gt_mask = gt_mask.permute(2, 0, 1).view(n, -1)
-        iou = masks_iou(pred_mask, gt_mask)
-        return iou if return_iou else (1.0 - iou)
-
-
-import math
-
-
-def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
-    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
-    box2 = box2.T
-
-    # Get the coordinates of bounding boxes
-    if x1y1x2y2:  # x1, y1, x2, y2 = box1
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-    else:  # transform from xywh to xyxy
-        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
-        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
-        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
-        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
-
-    # Intersection area
-    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
-            torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
-
-    # Union Area
-    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
-    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
-    union = w1 * h1 + w2 * h2 - inter + eps
-
-    iou = inter / union
-    if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
-        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
-        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
-            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (
-                        b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
-            if DIoU:
-                return iou - rho2 / c2  # DIoU
-            elif (CIoU):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
-                with torch.no_grad():
-                    alpha = v / (v - iou + (1 + eps))
-                return iou - (rho2 / c2 + v * alpha)  # CIoU
-        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
-            c_area = cw * ch + eps  # convex area
-            return iou - (c_area - union) / c_area  # GIoU
-    else:
-        return iou  # IoU
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
new file mode 100644
index 000000000000..602623377402
--- /dev/null
+++ b/utils/segment/metrics.py
@@ -0,0 +1,149 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Model validation metrics
+"""
+
+import numpy as np
+from easydict import EasyDict as edict
+from ..metrics import ap_per_class
+
+
+def fitness(x, masks=False):
+    # Model fitness as a weighted combination of metrics
+    if masks:
+        w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
+        return (x[:, :8] * w).sum(1)
+    w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
+    return (x[:, :4] * w).sum(1)
+
+
+def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ):
+    """
+    Args:
+        tp_b: tp of boxes.
+        tp_m: tp of masks.
+        other arguments see `func: ap_per_class`.
+    """
+    results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
+            prefix="Box")[2:]
+    results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
+            prefix="Mask")[2:]
+
+    results = edict({
+        "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3],
+            "ap_class": results_boxes[4]},
+        "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3],
+            "ap_class": results_masks[4]}})
+    return results
+
+class Metric:
+    def __init__(self) -> None:
+        self.p = []  # (nc, )
+        self.r = []  # (nc, )
+        self.f1 = []  # (nc, )
+        self.all_ap = []  # (nc, 10)
+        self.ap_class_index = []  # (nc, )
+
+    @property
+    def ap50(self):
+        """AP@0.5 of all classes.
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap[:, 0] if len(self.all_ap) else []
+
+    @property
+    def ap(self):
+        """AP@0.5:0.95
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap.mean(1) if len(self.all_ap) else []
+
+    @property
+    def mp(self):
+        """mean precision of all classes.
+        Return:
+            float.
+        """
+        return self.p.mean() if len(self.p) else 0.0
+
+    @property
+    def mr(self):
+        """mean recall of all classes.
+        Return:
+            float.
+        """
+        return self.r.mean() if len(self.r) else 0.0
+
+    @property
+    def map50(self):
+        """Mean AP@0.5 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
+
+    @property
+    def map(self):
+        """Mean AP@0.5:0.95 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap.mean() if len(self.all_ap) else 0.0
+
+    def mean_results(self):
+        """Mean of results, return mp, mr, map50, map"""
+        return (self.mp, self.mr, self.map50, self.map)
+
+    def class_result(self, i):
+        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
+        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
+
+    def get_maps(self, nc):
+        maps = np.zeros(nc) + self.map
+        for i, c in enumerate(self.ap_class_index):
+            maps[c] = self.ap[i]
+        return maps
+
+    def update(self, results):
+        """
+        Args:
+            results: tuple(p, r, ap, f1, ap_class)
+        """
+        p, r, all_ap, f1, ap_class_index = results
+        self.p = p
+        self.r = r
+        self.all_ap = all_ap
+        self.f1 = f1
+        self.ap_class_index = ap_class_index
+
+
+class Metrics:
+    """Metric for boxes and masks."""
+
+    def __init__(self) -> None:
+        self.metric_box = Metric()
+        self.metric_mask = Metric()
+
+    def update(self, results):
+        """
+        Args:
+            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
+        """
+        self.metric_box.update(list(results["boxes"].values()))
+        self.metric_mask.update(list(results["masks"].values()))
+
+    def mean_results(self):
+        return self.metric_box.mean_results() + self.metric_mask.mean_results()
+
+    def class_result(self, i):
+        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
+
+    def get_maps(self, nc):
+        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
+
+    @property
+    def ap_class_index(self):
+        # boxes and masks have the same ap_class_index
+        return self.metric_box.ap_class_index

From 1540351877733ddd413b64ef298a2eb9b4d14adc Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 11 Aug 2022 18:50:52 +0800
Subject: [PATCH 052/247] update val.py for segment

---
 segment/detect.py         |   7 +-
 segment/train.py          |  72 +++---
 segment/val.py            | 498 ++++++++++++++++++++++++++++++++++----
 segment/val_new.py        | 459 -----------------------------------
 utils/loggers/__init__.py | 259 +++++---------------
 utils/plots.py            | 317 ------------------------
 utils/segment/general.py  |   2 +-
 utils/segment/metrics.py  |  13 +-
 utils/segment/plots.py    | 353 +++++++++++++++++++++++++++
 9 files changed, 914 insertions(+), 1066 deletions(-)
 delete mode 100644 segment/val_new.py
 create mode 100644 utils/segment/plots.py

diff --git a/segment/detect.py b/segment/detect.py
index d8e6150873f6..c751e39a06b8 100644
--- a/segment/detect.py
+++ b/segment/detect.py
@@ -33,7 +33,7 @@
 import torch.backends.cudnn as cudnn
 
 FILE = Path(__file__).resolve()
-ROOT = FILE.parents[0]  # YOLOv5 root directory
+ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
@@ -42,9 +42,10 @@
 from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
 from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                            increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh)
-from utils.plots import Annotator, colors, save_one_box, plot_masks
+from utils.plots import Annotator, colors, save_one_box
+from utils.segment.plots import plot_masks
 from utils.torch_utils import select_device, time_sync
-from utils.segment import non_max_suppression_masks, scale_masks, process_mask_upsample
+from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample
 
 
 @torch.no_grad()
diff --git a/segment/train.py b/segment/train.py
index 1bee611e4ec1..3a06915eb061 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -22,6 +22,7 @@
 from datetime import datetime
 from pathlib import Path
 
+import val  # for end-of-epoch mAP
 import numpy as np
 import torch
 import torch.distributed as dist
@@ -38,7 +39,6 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-import val  # for end-of-epoch mAP
 from models.experimental import attempt_load
 from models.yolo import Model
 from utils.autoanchor import check_anchors
@@ -46,14 +46,14 @@
 from utils.callbacks import Callbacks
 from utils.segment.dataloaders import create_dataloader
 from utils.downloads import attempt_download
-from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness,
+from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
                            increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
                            labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
-from utils.loggers import NewLoggersMask
+from utils.loggers import LoggersMask
 from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.segment.loss import ComputeLoss
-#from utils.metrics import fitness
+from utils.segment.metrics import fitness
 from utils.plots import plot_evolve, plot_labels
 from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
 
@@ -66,7 +66,6 @@
 from torch.optim import AdamW
 import yaml
 from datetime import datetime
-from evaluator import Yolov5Evaluator
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
     print(device)
@@ -96,8 +95,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     # Loggers
     data_dict = None
     if RANK in {-1, 0}:
-        newloggers = NewLoggersMask
-        loggers = newloggers(
+        loggers = LoggersMask(
             save_dir=save_dir, opt=opt, logger=LOGGER
         )  # loggers instance
 
@@ -157,16 +155,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
     hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
     LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
-    evaluator = Yolov5Evaluator(
-            data = data,
-            single_cls=single_cls,
-            save_dir=save_dir,
-            mask=True,
-            verbose=False,
-            mask_downsample_ratio=mask_ratio,
-            plots=plots,
-            overlap=overlap
-        )
+
     g = [], [], []  # optimizer parameter groups
     bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
     for v in model.modules():
@@ -407,7 +396,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         mode="bilinear",
                         align_corners=False,
                     ).squeeze(0)
-                callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots, opt.sync_bn)
+                callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots)
 
                 if callbacks.stop_training:
                     return
@@ -423,21 +412,25 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
             final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
             if not noval or final_epoch:  # Calculate mAP
-                results, maps, _ = evaluator.run_training(
-                model=ema.ema,
-                dataloader=val_loader,
-                compute_loss=compute_loss,
-                )
+                results, maps, _ = val.run(data_dict,
+                                           batch_size=batch_size // WORLD_SIZE * 2,
+                                           imgsz=imgsz,
+                                           model=ema.ema,
+                                           single_cls=single_cls,
+                                           dataloader=val_loader,
+                                           save_dir=save_dir,
+                                           plots=False,
+                                           callbacks=callbacks,
+                                           compute_loss=compute_loss, 
+                                           mask_downsample_ratio=mask_ratio,
+                                           overlap=overlap)
             # Update best mAP
-            def fitness(x):
-                w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
-                return (x[:, :8] * w).sum(1)
             fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
             stop = stopper(epoch=epoch, fitness=fi)  # early stop check
             if fi > best_fitness:
                 best_fitness = fi
             log_vals = list(mloss) + list(results) + lr
-            callbacks.run('on_fit_epoch_end', log_vals, epoch)
+            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
 
             # Save model
             if (not nosave) or (final_epoch and not evolve):  # if save
@@ -478,15 +471,26 @@ def fitness(x):
                 strip_optimizer(f)  # strip optimizers
                 if f is best:
                     LOGGER.info(f'\nValidating {f}...')
-                    results, _, _ = evaluator.run_training(
-                model=attempt_load(f, device).half(),
-                dataloader=val_loader,
-                compute_loss=compute_loss,
-            )  # val best model with plots
+                    results, _, _ = val.run(
+                        data_dict,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=plots,
+                        callbacks=callbacks,
+                        compute_loss=compute_loss,
+                        mask_downsample_ratio=1,
+                        overlap=overlap)  # val best model with plots
                     if is_coco:
-                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch)
+                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
 
-        callbacks.run('on_train_end', plots, epoch, masks=True)
+        callbacks.run('on_train_end', last, best, plots, epoch, results)
 
     torch.cuda.empty_cache()
     return results
diff --git a/segment/val.py b/segment/val.py
index 20183b6d7118..a2a4eb526773 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -3,81 +3,491 @@
 Validate a trained YOLOv5 model accuracy on a custom dataset
 
 Usage:
-    $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640
+    $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640
+
+Usage - formats:
+    $ python path/to/val.py --weights yolov5s.pt                 # PyTorch
+                                      yolov5s.torchscript        # TorchScript
+                                      yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                      yolov5s.xml                # OpenVINO
+                                      yolov5s.engine             # TensorRT
+                                      yolov5s.mlmodel            # CoreML (macOS-only)
+                                      yolov5s_saved_model        # TensorFlow SavedModel
+                                      yolov5s.pb                 # TensorFlow GraphDef
+                                      yolov5s.tflite             # TensorFlow Lite
+                                      yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
 """
 
 import argparse
-from evaluator import Yolov5Evaluator
+import json
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+import torch.nn.functional as F
+import pycocotools.mask as mask_util
+from models.common import DetectMultiBackend
+from utils.callbacks import Callbacks
+from utils.segment.dataloaders import create_dataloader
+from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
+                           coco80_to_coco91_class, colorstr, emojis, increment_path, print_args,
+                           scale_coords, xywh2xyxy, xyxy2xywh)
+from utils.segment.general import (non_max_suppression_masks, process_mask_upsample, mask_iou, 
+                           scale_masks, process_mask)
+from utils.metrics import ConfusionMatrix, box_iou
+from utils.segment.metrics import ap_per_class_box_and_mask, Metrics
+from utils.segment.plots import plot_images_and_masks
+from utils.plots import output_to_target, plot_val_study
+from utils.torch_utils import select_device, time_sync, de_parallel
+
+
+def save_one_txt(predn, save_conf, shape, file):
+    # Save one txt result
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, 'a') as f:
+            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+
+def save_one_json(predn, jdict, path, class_map, pred_masks):
+    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+
+    pred_masks = np.transpose(pred_masks, (2, 0, 1))
+    rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
+    for rle in rles:
+        rle["counts"] = rle["counts"].decode("utf-8")
+
+    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
+        pred_dict = {
+            'image_id': image_id,
+            'category_id': class_map[int(p[5])],
+            'bbox': [round(x, 3) for x in b],
+            'score': round(p[4], 5)}
+        pred_dict["segmentation"] = rles[i]
+        jdict.append(pred_dict)
+
+
+def process_batch(detections, labels, iouv):
+    """
+    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+    Arguments:
+        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (Array[M, 5]), class, x1, y1, x2, y2
+    Returns:
+        correct (Array[N, 10]), for 10 IoU levels
+    """
+    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+    iou = box_iou(labels[:, 1:], detections[:, :4])
+    correct_class = labels[:, 0:1] == detections[:, 5]
+    for i in range(len(iouv)):
+        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
+        if x[0].shape[0]:
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            correct[matches[:, 1].astype(int), i] = True
+    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+
+
+def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
+    correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+    # convert masks (1, 640, 640) -> (n, 640, 640)
+    if overlap:
+        nl = len(labels)
+        index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
+        gt_masks = gt_masks.repeat(nl, 1, 1)
+        gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+
+    if gt_masks.shape[1:] != pred_masks.shape[1:]:
+        gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear",
+            align_corners=False, ).squeeze(0)
+
+    iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), )
+    x = torch.where(
+        (iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
+    if x[0].shape[0]:
+        matches = (
+            torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
+        if x[0].shape[0] > 1:
+            matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+            # matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+        matches = torch.Tensor(matches).to(iouv.device)
+        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
+    return correct
+
+
+@torch.no_grad()
+def run(
+        data,
+        weights=None,  # model.pt path(s)
+        batch_size=32,  # batch size
+        imgsz=640,  # inference size (pixels)
+        conf_thres=0.001,  # confidence threshold
+        iou_thres=0.6,  # NMS IoU threshold
+        task='val',  # train, val, test, speed or study
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        workers=8,  # max dataloader workers (per RANK in DDP mode)
+        single_cls=False,  # treat as single-class dataset
+        augment=False,  # augmented inference
+        verbose=False,  # verbose output
+        save_txt=False,  # save results to *.txt
+        save_hybrid=False,  # save label+prediction hybrid results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_json=False,  # save a COCO-JSON results file
+        project=ROOT / 'runs/val',  # save to project/name
+        name='exp',  # save to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        half=True,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        model=None,
+        dataloader=None,
+        save_dir=Path(''),
+        plots=True,
+        overlap=False,
+        mask_downsample_ratio=1,
+        callbacks=Callbacks(),
+        compute_loss=None,
+):
+    process = process_mask_upsample if plots else process_mask
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
+        half &= device.type != 'cpu'  # half precision only supported on CUDA
+        model.half() if half else model.float()
+    else:  # called directly
+        device = select_device(device, batch_size=batch_size)
+
+        # Directories
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+        # Load model
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half = model.fp16  # FP16 supported on limited backends with CUDA
+        if engine:
+            batch_size = model.batch_size
+        else:
+            device = model.device
+            if not (pt or jit):
+                batch_size = 1  # export.py models default to batch-size 1
+                LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
+
+        # Data
+        data = check_dataset(data)  # check
+
+    # Configure
+    model.eval()
+    cuda = device.type != 'cpu'
+    is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt')  # COCO dataset
+    nc = 1 if single_cls else int(data['nc'])  # number of classes
+    iouv = torch.linspace(0.5, 0.95, 10, device=device)  # iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+
+    # Dataloader
+    if not training:
+        if pt and not single_cls:  # check --weights are trained on --data
+            ncm = model.model.nc
+            assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
+                              f'classes). Pass correct combination of --weights and --data that are trained together.'
+        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
+        pad = 0.0 if task in ('speed', 'benchmark') else 0.5
+        rect = False if task == 'benchmark' else pt  # square inference for benchmarks
+        task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
+        dataloader = create_dataloader(data[task],
+                                       imgsz,
+                                       batch_size,
+                                       stride,
+                                       single_cls,
+                                       pad=pad,
+                                       rect=rect,
+                                       workers=workers,
+                                       prefix=colorstr(f'{task}: '),
+                                       overlap_mask=overlap,
+                                       mask_downsample_ratio=mask_downsample_ratio)[0]
+
+    seen = 0
+    confusion_matrix = ConfusionMatrix(nc=nc)
+    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
+    s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", 
+                                                               "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}")
+    dt = [0.0, 0.0, 0.0]
+    metrics = Metrics()
+    loss = torch.zeros(4, device=device)
+    jdict, stats = [], []
+    callbacks.run('on_val_start')
+    pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
+    for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
+        callbacks.run('on_val_batch_start')
+        t1 = time_sync()
+        if cuda:
+            im = im.to(device, non_blocking=True)
+            targets = targets.to(device)
+            masks = masks.to(device).float()
+        im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        nb, _, height, width = im.shape  # batch size, channels, height, width
+        t2 = time_sync()
+        dt[0] += t2 - t1
+
+        # Inference
+        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
+        dt[1] += time_sync() - t2
+
+        # Loss
+        if compute_loss:
+            loss += compute_loss(train_out, targets, masks)[1]  # box, obj, cls
 
-from utils.general import (
-    set_logging,
-    print_args,
-    check_yaml,
-    check_requirements,
-)
+        # NMS
+        targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
+        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+        t3 = time_sync()
+        out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls,
+                                        mask_dim=de_parallel(model).model[-1].mask_dim)
+        dt[2] += time_sync() - t3
+
+        # keep pred masks for plotting
+        plot_masks = []
+        # Metrics
+        for si, pred in enumerate(out):
+            labels = targets[targets[:, 0] == si, 1:]
+            midx = [si] if overlap else targets[:, 0] == si
+            gt_masks = masks[midx]
+            proto_out = train_out[1][si]
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], 
+                            shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
+            if plots and batch_i < 3:
+                plot_masks.append(pred_masks[:15].cpu())
+
+            nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
+            path, shape = Path(paths[si]), shapes[si][0]
+            correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            seen += 1
+
+            if npr == 0:
+                if nl:
+                    stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
+                continue
+
+            # Predictions
+            if single_cls:
+                pred[:, 5] = 0
+            predn = pred.clone()
+            scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+
+            # Evaluate
+            if nl:
+                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+                correct_bboxes = process_batch(predn, labelsn, iouv)
+                correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap)
+                if plots:
+                    confusion_matrix.process_batch(predn, labelsn)
+            stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
+
+            # Save/log
+            if save_txt:
+                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
+            if save_json:
+                pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
+                    shape, shapes[si][1])
+                save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
+            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
+
+        # Plot images
+        if plots and batch_i < 3:
+            if masks.shape[1:] != im.shape[2:]:
+                masks = F.interpolate(
+                    masks.unsqueeze(0).float(),
+                    im.shape[2:],
+                    mode="bilinear",
+                    align_corners=False,
+                ).squeeze(0)
+            plot_images_and_masks(im, targets, masks, paths, 
+                    save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels
+            plot_masks = torch.cat(plot_masks, dim=0)
+            plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, 
+                    save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
+
+        callbacks.run('on_val_batch_end')
+
+    # Compute metrics
+    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
+    if len(stats) and stats[0].any():
+        results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
+        metrics.update(results)
+        nt = np.bincount(stats[4].astype(int), minlength=nc)  # number of targets per class
+    else:
+        nt = torch.zeros(1)
+
+    # Print results
+    pf = '%20s' + '%11i' * 2 + '%11.3g' * 8  # print format
+    LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
+
+    # Print results per class
+    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
+        for i, c in enumerate(metrics.ap_class_index):
+            LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
+
+    # Print speeds
+    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    if not training:
+        shape = (batch_size, 3, imgsz, imgsz)
+        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
+
+    # Plots
+    if plots:
+        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+        callbacks.run('on_val_end')
+
+    # in case the cocoeval will update map
+    (
+        mp_bbox,
+        mr_bbox,
+        map50_bbox,
+        map_bbox,
+        mp_mask,
+        mr_mask,
+        map50_mask,
+        map_mask,
+    ) = metrics.mean_results()
+    # Save JSON
+    if save_json and len(jdict):
+        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
+        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
+        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
+        LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
+        with open(pred_json, 'w') as f:
+            json.dump(jdict, f)
+
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            check_requirements(['pycocotools'])
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+
+            anno = COCO(anno_json)  # init annotations api
+            pred = anno.loadRes(pred_json)  # init predictions api
+            eval_bbox = COCOeval(anno, pred, 'bbox')
+            eval_mask = COCOeval(anno, pred, 'segm')
+            if is_coco:
+                eval_bbox.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # image IDs to evaluate
+                eval_mask.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # image IDs to evaluate
+            eval_bbox.evaluate()
+            eval_bbox.accumulate()
+            eval_bbox.summarize()
+            map_bbox, map50_bbox = eval_bbox.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+
+            eval_mask.evaluate()
+            eval_mask.accumulate()
+            eval_mask.summarize()
+            map_mask, map50_mask = eval_mask.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+        except Exception as e:
+            LOGGER.info(f'pycocotools unable to run: {e}')
+
+    # Return results
+    model.float()  # for training
+    if not training:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    final_metric = (
+        mp_bbox,
+        mr_bbox,
+        map50_bbox,
+        map_bbox,
+        mp_mask,
+        mr_mask,
+        map50_mask,
+        map_mask,
+    )
+    return ((*final_metric, *(loss.cpu() / len(dataloader)).tolist()),
+            metrics.get_maps(nc), t,)
 
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-d', '--data', type=str, default='data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('-w', '--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
-    parser.add_argument('-b', '--batch-size', type=int, default=32, help='batch size')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
     parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
     parser.add_argument('--task', default='val', help='train, val, test, speed or study')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
     parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
     parser.add_argument('--augment', action='store_true', help='augmented inference')
     parser.add_argument('--verbose', action='store_true', help='report mAP by class')
     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
     parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
     parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
-    parser.add_argument('--nosave', action='store_true', help='do not save anything.')
-    parser.add_argument('--project', default='runs/val', help='save to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
-    parser.add_argument('--overlap-mask', action='store_true', help='Eval overlapping masks')
-
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
     opt.data = check_yaml(opt.data)  # check YAML
     opt.save_json |= opt.data.endswith('coco.yaml')
+    opt.save_txt |= opt.save_hybrid
     print_args(vars(opt))
     return opt
 
+
 def main(opt):
-    set_logging()
-    check_requirements(exclude=("tensorboard", "thop"))
-    evaluator = Yolov5Evaluator(
-        data=opt.data,
-        conf_thres=opt.conf_thres,
-        iou_thres=opt.iou_thres,
-        device=opt.device,
-        single_cls=opt.single_cls,
-        augment=opt.augment,
-        verbose=opt.verbose,
-        project=opt.project,
-        name=opt.name,
-        exist_ok=opt.exist_ok,
-        half=opt.half,
-        mask=True,
-        nosave=opt.nosave,
-        overlap=opt.overlap_mask,
-    )
+    check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
+
+    if opt.task in ('train', 'val', 'test'):  # run normally
+        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
+            LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️'))
+        run(**vars(opt))
 
-    if opt.task in ("train", "val", "test"):  # run normally
-        evaluator.run(
-            weights=opt.weights,
-            batch_size=opt.batch_size,
-            imgsz=opt.imgsz,
-            save_txt=opt.save_txt,
-            save_conf=opt.save_conf,
-            save_json=opt.save_json,
-            task=opt.task,
-        )
     else:
-        raise ValueError(f"not support task {opt.task}")
+        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
+        opt.half = True  # FP16 for fastest results
+        if opt.task == 'speed':  # speed benchmarks
+            # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
+            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
+            for opt.weights in weights:
+                run(**vars(opt), plots=False)
+
+        elif opt.task == 'study':  # speed vs mAP benchmarks
+            # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
+            for opt.weights in weights:
+                f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt'  # filename to save to
+                x, y = list(range(256, 1536 + 128, 128)), []  # x axis (image sizes), y axis
+                for opt.imgsz in x:  # img-size
+                    LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...')
+                    r, _, t = run(**vars(opt), plots=False)
+                    y.append(r + t)  # results and times
+                np.savetxt(f, y, fmt='%10.4g')  # save
+            os.system('zip -r study.zip study_*.txt')
+            plot_val_study(x=x)  # plot
 
 
 if __name__ == "__main__":
diff --git a/segment/val_new.py b/segment/val_new.py
deleted file mode 100644
index 033dec732bd8..000000000000
--- a/segment/val_new.py
+++ /dev/null
@@ -1,459 +0,0 @@
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Validate a trained YOLOv5 model accuracy on a custom dataset
-
-Usage:
-    $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640
-
-Usage - formats:
-    $ python path/to/val.py --weights yolov5s.pt                 # PyTorch
-                                      yolov5s.torchscript        # TorchScript
-                                      yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
-                                      yolov5s.xml                # OpenVINO
-                                      yolov5s.engine             # TensorRT
-                                      yolov5s.mlmodel            # CoreML (macOS-only)
-                                      yolov5s_saved_model        # TensorFlow SavedModel
-                                      yolov5s.pb                 # TensorFlow GraphDef
-                                      yolov5s.tflite             # TensorFlow Lite
-                                      yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
-"""
-
-import argparse
-import json
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import torch
-from tqdm import tqdm
-
-FILE = Path(__file__).resolve()
-ROOT = FILE.parents[0]  # YOLOv5 root directory
-if str(ROOT) not in sys.path:
-    sys.path.append(str(ROOT))  # add ROOT to PATH
-ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
-
-import torch.nn.functional as F
-import pycocotools.mask as mask_util
-from models.common import DetectMultiBackend
-from utils.callbacks import Callbacks
-from utils.segment.dataloaders import create_dataloader
-from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
-                           coco80_to_coco91_class, colorstr, emojis, increment_path, print_args,
-                           scale_coords, xywh2xyxy, xyxy2xywh, de_parallel)
-from utils.segment.general import non_max_suppression_masks, process_mask_upsample, mask_iou, scale_masks
-from utils.metrics import ConfusionMatrix, ap_per_class, box_iou
-from utils.segment.metrics import ap_per_class_box_and_mask, Metrics
-from utils.plots import output_to_target, plot_images, plot_val_study
-from utils.torch_utils import select_device, time_sync
-
-
-def save_one_txt(predn, save_conf, shape, file):
-    # Save one txt result
-    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
-    for *xyxy, conf, cls in predn.tolist():
-        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-        with open(file, 'a') as f:
-            f.write(('%g ' * len(line)).rstrip() % line + '\n')
-
-
-def save_one_json(predn, jdict, path, class_map, pred_masks):
-    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
-    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
-    box = xyxy2xywh(predn[:, :4])  # xywh
-    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-
-    pred_masks = np.transpose(pred_masks, (2, 0, 1))
-    rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
-    for rle in rles:
-        rle["counts"] = rle["counts"].decode("utf-8")
-
-    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-        pred_dict = {
-            'image_id': image_id,
-            'category_id': class_map[int(p[5])],
-            'bbox': [round(x, 3) for x in b],
-            'score': round(p[4], 5)}
-        pred_dict["segmentation"] = rles[i]
-        jdict.append(pred_dict)
-
-
-def process_batch(detections, labels, iouv):
-    """
-    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
-    Arguments:
-        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-        labels (Array[M, 5]), class, x1, y1, x2, y2
-    Returns:
-        correct (Array[N, 10]), for 10 IoU levels
-    """
-    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
-    iou = box_iou(labels[:, 1:], detections[:, :4])
-    correct_class = labels[:, 0:1] == detections[:, 5]
-    for i in range(len(iouv)):
-        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
-        if x[0].shape[0]:
-            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
-            if x[0].shape[0] > 1:
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-                # matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-            correct[matches[:, 1].astype(int), i] = True
-    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
-
-
-def process_batch_masks(self, predn, pred_masks, gt_masks, labels, iouv, overlap):
-    correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
-    # convert masks (1, 640, 640) -> (n, 640, 640)
-    if overlap:
-        nl = len(labels)
-        index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
-        gt_masks = gt_masks.repeat(nl, 1, 1)
-        gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
-
-    if gt_masks.shape[1:] != pred_masks.shape[1:]:
-        gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear",
-            align_corners=False, ).squeeze(0)
-
-    iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), )
-    x = torch.where(
-        (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
-    if x[0].shape[0]:
-        matches = (
-            torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
-        if x[0].shape[0] > 1:
-            matches = matches[matches[:, 2].argsort()[::-1]]
-            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-            # matches = matches[matches[:, 2].argsort()[::-1]]
-            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-        matches = torch.Tensor(matches).to(self.iouv.device)
-        correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv
-    return correct
-
-
-@torch.no_grad()
-def run(
-        data,
-        weights=None,  # model.pt path(s)
-        batch_size=32,  # batch size
-        imgsz=640,  # inference size (pixels)
-        conf_thres=0.001,  # confidence threshold
-        iou_thres=0.6,  # NMS IoU threshold
-        task='val',  # train, val, test, speed or study
-        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        workers=8,  # max dataloader workers (per RANK in DDP mode)
-        single_cls=False,  # treat as single-class dataset
-        augment=False,  # augmented inference
-        verbose=False,  # verbose output
-        save_txt=False,  # save results to *.txt
-        save_hybrid=False,  # save label+prediction hybrid results to *.txt
-        save_conf=False,  # save confidences in --save-txt labels
-        save_json=False,  # save a COCO-JSON results file
-        project=ROOT / 'runs/val',  # save to project/name
-        name='exp',  # save to project/name
-        exist_ok=False,  # existing project/name ok, do not increment
-        half=True,  # use FP16 half-precision inference
-        dnn=False,  # use OpenCV DNN for ONNX inference
-        model=None,
-        dataloader=None,
-        save_dir=Path(''),
-        plots=True,
-        overlap=False,
-        mask_downsample_ratio=1,
-        callbacks=Callbacks(),
-        compute_loss=None,
-):
-    # Initialize/load model and set device
-    training = model is not None
-    if training:  # called by train.py
-        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
-        half &= device.type != 'cpu'  # half precision only supported on CUDA
-        model.half() if half else model.float()
-    else:  # called directly
-        device = select_device(device, batch_size=batch_size)
-
-        # Directories
-        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
-        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
-
-        # Load model
-        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
-        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
-        imgsz = check_img_size(imgsz, s=stride)  # check image size
-        half = model.fp16  # FP16 supported on limited backends with CUDA
-        if engine:
-            batch_size = model.batch_size
-        else:
-            device = model.device
-            if not (pt or jit):
-                batch_size = 1  # export.py models default to batch-size 1
-                LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
-
-        # Data
-        data = check_dataset(data)  # check
-
-    # Configure
-    model.eval()
-    cuda = device.type != 'cpu'
-    is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt')  # COCO dataset
-    nc = 1 if single_cls else int(data['nc'])  # number of classes
-    iouv = torch.linspace(0.5, 0.95, 10, device=device)  # iou vector for mAP@0.5:0.95
-    niou = iouv.numel()
-
-    # Dataloader
-    if not training:
-        if pt and not single_cls:  # check --weights are trained on --data
-            ncm = model.model.nc
-            assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
-                              f'classes). Pass correct combination of --weights and --data that are trained together.'
-        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
-        pad = 0.0 if task in ('speed', 'benchmark') else 0.5
-        rect = False if task == 'benchmark' else pt  # square inference for benchmarks
-        task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-        dataloader = create_dataloader(data[task],
-                                       imgsz,
-                                       batch_size,
-                                       stride,
-                                       single_cls,
-                                       pad=pad,
-                                       rect=rect,
-                                       workers=workers,
-                                       prefix=colorstr(f'{task}: '),
-                                       mask_downsample_ratio=1,
-                                       overlap_mask=overlap,
-                                       mask_downsample_ratio=mask_downsample_ratio)[0]
-
-    seen = 0
-    confusion_matrix = ConfusionMatrix(nc=nc)
-    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
-    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
-    s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", 
-                                                               "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}")
-    dt = [0.0, 0.0, 0.0]
-    metrics = Metrics()
-    loss = torch.zeros(4, device=device)
-    jdict, stats = [], []
-    callbacks.run('on_val_start')
-    pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
-    for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
-        callbacks.run('on_val_batch_start')
-        t1 = time_sync()
-        if cuda:
-            im = im.to(device, non_blocking=True)
-            targets = targets.to(device)
-            masks = masks.to(device).float()
-        im = im.half() if half else im.float()  # uint8 to fp16/32
-        im /= 255  # 0 - 255 to 0.0 - 1.0
-        nb, _, height, width = im.shape  # batch size, channels, height, width
-        t2 = time_sync()
-        dt[0] += t2 - t1
-
-        # Inference
-        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
-        dt[1] += time_sync() - t2
-
-        # Loss
-        if compute_loss:
-            loss += compute_loss([x.float() for x in train_out], targets, masks)[1]  # box, obj, cls
-
-        # NMS
-        targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
-        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
-        t3 = time_sync()
-        out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls,
-                                        mask_dim=de_parallel(model).model[-1].mask_dim)
-        dt[2] += time_sync() - t3
-
-        # Metrics
-        for si, pred in enumerate(out):
-            labels = targets[targets[:, 0] == si, 1:]
-            midx = [si] if overlap else targets[:, 0] == si
-            gt_masks = masks[midx]
-            proto_out = train_out[1][si]
-            pred_masks = process_mask_upsample(proto_out, pred[:, 6:], pred[:, :4], 
-                            shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
-
-            nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
-            path, shape = Path(paths[si]), shapes[si][0]
-            correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
-            correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
-            seen += 1
-
-            if npr == 0:
-                if nl:
-                    stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
-                continue
-
-            # Predictions
-            if single_cls:
-                pred[:, 5] = 0
-            predn = pred.clone()
-            scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
-
-            # Evaluate
-            if nl:
-                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
-                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
-                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
-                correct_bboxes = process_batch(predn, labelsn, iouv)
-                correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap)
-                if plots:
-                    confusion_matrix.process_batch(predn, labelsn)
-            stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
-
-            # Save/log
-            if save_txt:
-                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
-            if save_json:
-                pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
-                    shape, shapes[si][1])
-                save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
-            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
-
-        # Plot images
-        if plots and batch_i < 3:
-            # TODO: plot with masks
-            plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels
-            plot_images(im, output_to_target(out), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
-
-        callbacks.run('on_val_batch_end')
-
-    # Compute metrics
-    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
-    if len(stats) and stats[0].any():
-        results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
-        metrics.update(results)
-        # ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
-        # mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
-        nt = np.bincount(stats[4].astype(int), minlength=nc)  # number of targets per class
-    else:
-        nt = torch.zeros(1)
-
-    # Print results
-    pf = '%20s' + '%11i' * 2 + '%11.3g' * 8  # print format
-    LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
-
-    # Print results per class
-    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
-        for i, c in enumerate(metrics.ap_class_index):
-            LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
-
-    # Print speeds
-    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
-    if not training:
-        shape = (batch_size, 3, imgsz, imgsz)
-        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
-
-    # Plots
-    if plots:
-        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
-        callbacks.run('on_val_end')
-
-    # Save JSON
-    if save_json and len(jdict):
-        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
-        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
-        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
-        LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
-        with open(pred_json, 'w') as f:
-            json.dump(jdict, f)
-
-        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
-            check_requirements(['pycocotools'])
-            from pycocotools.coco import COCO
-            from pycocotools.cocoeval import COCOeval
-
-            anno = COCO(anno_json)  # init annotations api
-            pred = anno.loadRes(pred_json)  # init predictions api
-            eval = COCOeval(anno, pred, 'bbox')
-            if is_coco:
-                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # image IDs to evaluate
-            eval.evaluate()
-            eval.accumulate()
-            eval.summarize()
-            # TODO: update these to metrics
-            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
-        except Exception as e:
-            LOGGER.info(f'pycocotools unable to run: {e}')
-
-    # Return results
-    model.float()  # for training
-    if not training:
-        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
-    return ((*metrics.mean_results(), *(loss.cpu() / len(dataloader)).tolist()),
-            metrics.get_maps(nc), t,)
-    # maps = np.zeros(nc) + map
-    # for i, c in enumerate(ap_class):
-    #     maps[c] = ap[i]
-    # return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
-
-
-def parse_opt():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
-    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
-    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
-    parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
-    parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
-    parser.add_argument('--task', default='val', help='train, val, test, speed or study')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
-    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
-    parser.add_argument('--augment', action='store_true', help='augmented inference')
-    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
-    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
-    parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
-    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
-    parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
-    parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')
-    parser.add_argument('--name', default='exp', help='save to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
-    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
-    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
-    opt = parser.parse_args()
-    opt.data = check_yaml(opt.data)  # check YAML
-    opt.save_json |= opt.data.endswith('coco.yaml')
-    opt.save_txt |= opt.save_hybrid
-    print_args(vars(opt))
-    return opt
-
-
-def main(opt):
-    check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
-
-    if opt.task in ('train', 'val', 'test'):  # run normally
-        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
-            LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️'))
-        run(**vars(opt))
-
-    else:
-        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
-        opt.half = True  # FP16 for fastest results
-        if opt.task == 'speed':  # speed benchmarks
-            # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
-            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
-            for opt.weights in weights:
-                run(**vars(opt), plots=False)
-
-        elif opt.task == 'study':  # speed vs mAP benchmarks
-            # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
-            for opt.weights in weights:
-                f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt'  # filename to save to
-                x, y = list(range(256, 1536 + 128, 128)), []  # x axis (image sizes), y axis
-                for opt.imgsz in x:  # img-size
-                    LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...')
-                    r, _, t = run(**vars(opt), plots=False)
-                    y.append(r + t)  # results and times
-                np.savetxt(f, y, fmt='%10.4g')  # save
-            os.system('zip -r study.zip study_*.txt')
-            plot_val_study(x=x)  # plot
-
-
-if __name__ == "__main__":
-    opt = parse_opt()
-    main(opt)
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 8e670a86b1b1..6e3696718b6b 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -3,7 +3,6 @@
 Logging utils
 """
 
-from ast import Import
 import os
 import warnings
 
@@ -13,7 +12,8 @@
 
 from utils.general import colorstr, cv2, emojis
 from utils.loggers.wandb.wandb_utils import WandbLogger
-from utils.plots import plot_images, plot_results, plot_results_with_masks, plot_images_and_masks
+from utils.plots import plot_images, plot_results
+from utils.segment.plots import plot_results_with_masks, plot_images_and_masks
 from utils.torch_utils import de_parallel
 
 LOGGERS = ('csv', 'tb', 'wandb')  # text-file, TensorBoard, Weights & Biases
@@ -158,7 +158,7 @@ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
             if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
                 self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
 
-    def on_train_end(self, last, best, plots, epoch, results, masks=False):
+    def on_train_end(self, last, best, plots, epoch, results):
         # Callback runs on training end
         # plot_results = plot_results_with_masks if masks else plot_results
         if plots:
@@ -188,174 +188,10 @@ def on_params_update(self, params):
         if self.wandb:
             self.wandb.wandb_run.config.update(params, allow_val_change=True)
 
-from threading import Thread
-
-class NewLoggers:
-    """Loggers without wandb, cause I don't really use `wandb` and `wandb` related codes are noisy."""
-    def __init__(
-        self,
-        save_dir=None,
-        opt=None,
-        logger=None,
-        include=LOGGERS,
-    ):
-        self.save_dir = save_dir
-        self.opt = opt
-        self.logger = logger  # for printing results to console
-        self.include = include
-        self.keys = [
-            "train/box_loss",
-            "train/obj_loss",
-            "train/cls_loss",  # train loss
-            "metrics/precision",
-            "metrics/recall",
-            "metrics/mAP_0.5",
-            "metrics/mAP_0.5:0.95",  # metrics
-            "val/box_loss",
-            "val/obj_loss",
-            "val/cls_loss",  # val loss
-            "x/lr0",
-            "x/lr1",
-            "x/lr2",
-        ]  # params
-        self.best_keys = [
-            "best/epoch",
-            "best/precision",
-            "best/recall",
-            "best/mAP_0.5",
-            "best/mAP_0.5:0.95",
-        ]
-        for k in LOGGERS:
-            setattr(self, k, None)  # init empty logger dictionary
-        self.csv = True  # always log to csv
-
-        # TensorBoard
-        s = self.save_dir
-        if "tb" in self.include and s.exists():
-            prefix = colorstr("TensorBoard: ")
-            self.logger.info(
-                f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/"
-            )
-            self.tb = SummaryWriter(str(s))
-        try:
-            import wandb
-            from wandb import __version__
-            wandb.init(project=opt.project, name=opt.name, config=opt)
-        except ImportError:
-            wandb = None
-            pass
-        self.wandb = wandb
-        
-    def on_pretrain_routine_end(self):
-        pass
 
-    def on_train_batch_end(
-        self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx
-    ):
-        # Callback runs on train batch end
-        if plots and self.save_dir.exists():
-            if ni == 0:
-                if (
-                    not sync_bn
-                ):  # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
-                    with warnings.catch_warnings():
-                        warnings.simplefilter("ignore")  # suppress jit trace warning
-                        self.tb.add_graph(
-                            torch.jit.trace(
-                                de_parallel(model), imgs[0:1], strict=False
-                            ),
-                            [],
-                        )
-            if plot_idx is not None and ni in plot_idx:
-                f = self.save_dir / f"train_batch{ni}.jpg"  # filename
-                Thread(
-                    target=plot_images, args=(imgs, targets, paths, f), daemon=True
-                ).start()
-            # if ni < 3:
-            #     f = self.save_dir / f'train_batch{ni}.jpg'  # filename
-            #     Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
-
-    def on_train_epoch_end(self, epoch):
-        # Callback runs on train epoch end
-        pass
-
-    def on_val_image_end(self, imgs, targets, masks, paths):
-        # Callback runs on val image end
-        pass
-
-    def on_val_end(self):
-        # Callback runs on val end
-        pass
-
-    def on_fit_epoch_end(self, vals, epoch):
-        # Callback runs at the end of each fit (train+val) epoch
-        x = {k: v for k, v in zip(self.keys, vals)}  # dict
-        if self.csv and self.save_dir.exists():
-            file = self.save_dir / "results.csv"
-            n = len(x) + 1  # number of cols
-            s = (
-                ""
-                if file.exists()
-                else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n")
-            )  # add header
-            with open(file, "a") as f:
-                f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n")
-
-        if self.tb:
-            for k, v in x.items():
-                self.tb.add_scalar(k, v, epoch)
-        if self.wandb:
-            wandb.log(x)
-
-    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
-        # Callback runs on model save event
-        pass
-
-    def on_train_end(self, plots, epoch, masks=False):
-        plts = plot_results_with_masks if masks else plot_results
-        # Callback runs on training end
-        if plots and self.save_dir.exists():
-            plts(file=self.save_dir / "results.csv")  # save results.png
-        files = [
-            "results.png",
-            "confusion_matrix.png",
-            *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")),
-        ]
-        files = [
-            (self.save_dir / f) for f in files if (self.save_dir / f).exists()
-        ]  # filter
-
-        if self.tb:
-            import cv2
-
-            for f in files:
-                self.tb.add_image(
-                    f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats="HWC"
-                )
-        if self.wandb:
-            best = self.save_dir/ "weights" / "best.pt"
-            last = self.save_dir / "weights" / "last.pt"
-            wandb.log_artifact(str(best if best.exists() else last),
-                                   type='model',
-                                   name=f'run_{self.wandb.run.id}_model',
-                                   aliases=['latest', 'best', 'stripped'])
-            self.wandb.finish()
-
-
-    def on_params_update(self):
-        # Update hyperparams or configs of the experiment
-        # params: A dict containing {param: value} pairs
-        pass
-    
-class NewLoggersMask(NewLoggers):
-    def __init__(
-        self,
-        save_dir=None,
-        opt=None,
-        logger=None,
-        include=LOGGERS,
-    ):
-        super().__init__(save_dir, opt, logger, include)
+class LoggersMask(Loggers):
+    def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
+        super().__init__(save_dir, weights, opt, hyp, logger, include)
         self.keys = [
             "train/box_loss",
             "train/seg_loss",  # train loss
@@ -379,50 +215,73 @@ def __init__(
         ]  # params
         self.best_keys = [
             "best/epoch",
-            "best/precision",
-            "best/recall",
-            "best/mAP_0.5",
-            "best/mAP_0.5:0.95",
+            "best/precision(B)",
+            "best/recall(B)",
+            "best/mAP_0.5(B)",
+            "best/mAP_0.5:0.95(B)",
+            "best/precision(M)",
+            "best/recall(M)",
+            "best/mAP_0.5(M)",
+            "best/mAP_0.5:0.95(M)",
         ]
 
         
-    def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots, sync_bn):
-        # Callback runs on train batch end
-        if plots and self.save_dir.exists():
+    def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots):
+        if plots:
             if ni == 0:
-                if (
-                    not sync_bn
-                ):  # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
+                if self.tb and not self.opt.sync_bn:  # --sync known issue https://github.com/ultralytics/yolov5/issues/3754
                     with warnings.catch_warnings():
-                        warnings.simplefilter("ignore")  # suppress jit trace warning
-                        self.tb.add_graph(
-                            torch.jit.trace(
-                                de_parallel(model), imgs[0:1], strict=False
-                            ),
-                            [],
-                        )
+                        warnings.simplefilter('ignore')  # suppress jit trace warning
+                        self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
             if ni < 3:
                 f = self.save_dir / f"train_batch{ni}.jpg"  # filename
                 plot_images_and_masks(imgs, targets, masks, paths, f)
-                
 
+            if self.wandb and ni == 10:
+                files = sorted(self.save_dir.glob('train*.jpg'))
+                self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
 
-    def on_fit_epoch_end(self, vals, epoch):
+    def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
         # Callback runs at the end of each fit (train+val) epoch
-        x = {k: v for k, v in zip(self.keys, vals)}  # dict
-        if self.csv and self.save_dir.exists():
-            file = self.save_dir / "results.csv"
+        x = dict(zip(self.keys, vals))
+        if self.csv:
+            file = self.save_dir / 'results.csv'
             n = len(x) + 1  # number of cols
-            s = (
-                ""
-                if file.exists()
-                else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n")
-            )  # add header
-            with open(file, "a") as f:
-                f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n")
+            s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n')  # add header
+            with open(file, 'a') as f:
+                f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
 
         if self.tb:
             for k, v in x.items():
                 self.tb.add_scalar(k, v, epoch)
+
+        if self.wandb:
+            if best_fitness == fi:
+                best_results = [epoch] + vals[4:12]
+                for i, name in enumerate(self.best_keys):
+                    self.wandb.wandb_run.summary[name] = best_results[i]  # log best results in the summary
+            self.wandb.log(x)
+            self.wandb.end_epoch(best_result=best_fitness == fi)
+
+    def on_train_end(self, last, best, plots, epoch, results):
+        # Callback runs on training end
+        if plots:
+            plot_results_with_masks(file=self.save_dir / 'results.csv')  # save results.png
+        files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
+        files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()]  # filter
+        self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}")
+
+        if self.tb:
+            for f in files:
+                self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
+
         if self.wandb:
-            wandb.log(x, step=epoch, commit=True)
+            self.wandb.log(dict(zip(self.keys[4:16], results)))
+            self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
+            # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
+            if not self.opt.evolve:
+                wandb.log_artifact(str(best if best.exists() else last),
+                                   type='model',
+                                   name=f'run_{self.wandb.wandb_run.id}_model',
+                                   aliases=['latest', 'best', 'stripped'])
+            self.wandb.finish_run()
diff --git a/utils/plots.py b/utils/plots.py
index 49d014abd9e0..1747a83284e4 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -644,62 +644,6 @@ def result(self):
         # Return annotated image as array
         return np.asarray(self.im)
 
-class Visualizer(object):
-    """Visualization of one model."""
-    def __init__(self, names) -> None:
-        super().__init__()
-        self.names = names
-
-    def draw_one_img(self, img, output, vis_conf=0.4):
-        """Visualize one images.
-        
-        Args:
-            imgs (numpy.ndarray): one image.
-            outputs (torch.Tensor): one output, (num_boxes, classes+5)
-            vis_confs (float, optional): Visualize threshold.
-        Return:
-            img (numpy.ndarray): Image after visualization.           
-        """
-        if isinstance(output, list):
-            output = output[0]
-        if output is None or len(output) == 0:
-            return img
-        for (*xyxy, conf, cls) in reversed(output[:, :6]):
-            if conf < vis_conf:
-                continue
-            label = '%s %.2f' % (self.names[int(cls)], conf)
-            color = colors(int(cls))
-            plot_one_box(xyxy, img, label=label,
-                         color=color, 
-                         line_thickness=2)
-        return img
-
-    def draw_multi_img(self, imgs, outputs, vis_confs=0.4):
-        """Visualize multi images.
-        
-        Args:
-            imgs (List[numpy.array]): multi images.
-            outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5].
-            vis_confs (float | tuple[float], optional): Visualize threshold.
-        Return:
-            imgs (List[numpy.ndarray]): Images after visualization.           
-        """
-        if isinstance(vis_confs, float):
-            vis_confs = list(repeat(vis_confs, len(imgs)))
-        assert len(imgs) == len(outputs) == len(vis_confs)
-        for i, output in enumerate(outputs):  # detections per image
-            self.draw_one_img(imgs[i], output, vis_confs[i])
-        return imgs
-
-    def draw_imgs(self, imgs, outputs, vis_confs=0.4):
-        if isinstance(imgs, np.ndarray):
-            return self.draw_one_img(imgs, outputs, vis_confs)
-        else:
-            return self.draw_multi_img(imgs, outputs, vis_confs)
-
-    def __call__(self, imgs, outputs, vis_confs=0.4):
-        return self.draw_imgs(imgs, outputs, vis_confs)
-
 
 def hist2d(x, y, n=100):
     # 2d histogram used in labels.png and evolve.png
@@ -1078,52 +1022,6 @@ def plot_results(file="path/to/results.csv", dir="", best=True):
     plt.close()
 
 
-def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
-    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
-    save_dir = Path(file).parent if file else Path(dir)
-    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
-    ax = ax.ravel()
-    files = list(save_dir.glob("results*.csv"))
-    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
-    for _, f in enumerate(files):
-        try:
-            data = pd.read_csv(f)
-            index = np.argmax(
-                0.9 * data.values[:, 8]
-                + 0.1 * data.values[:, 7]
-                + 0.9 * data.values[:, 12]
-                + 0.1 * data.values[:, 11],
-            )
-            s = [x.strip() for x in data.columns]
-            x = data.values[:, 0]
-            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
-                y = data.values[:, j]
-                # y[y == 0] = np.nan  # don't show zero values
-                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
-                if best:
-                    # best
-                    ax[i].scatter(
-                        index,
-                        y[index],
-                        color="r",
-                        label=f"best:{index}",
-                        marker="*",
-                        linewidth=3,
-                    )
-                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
-                else:
-                    # last
-                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
-                    ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
-                # if j in [8, 9, 10]:  # share train and val loss y axes
-                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
-        except Exception as e:
-            print(f"Warning: Plotting error for {f}: {e}")
-    ax[1].legend()
-    fig.savefig(save_dir / "results.png", dpi=200)
-    plt.close()
-
-
 def plot_one_box(x, img, color=None, label=None, line_thickness=None):
     import random
 
@@ -1178,218 +1076,3 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
             print(f"Saving {save_dir / f}... ({n}/{channels})")
             plt.savefig(save_dir / f, dpi=300, bbox_inches="tight")
             plt.close()
-
-
-def plot_images_and_masks(
-    images,
-    targets,
-    masks,
-    paths=None,
-    fname="images.jpg",
-    names=None,
-    max_size=640,
-    max_subplots=16,
-):
-    # Plot image grid with labels
-    # print("targets:", targets.shape)
-    # print("masks:", masks.shape)
-    # print('--------------------------')
-
-    if isinstance(images, torch.Tensor):
-        images = images.cpu().float().numpy()
-    if isinstance(targets, torch.Tensor):
-        targets = targets.cpu().numpy()
-    if isinstance(masks, torch.Tensor):
-        masks = masks.cpu().numpy()
-        masks = masks.astype(int)
-
-    # un-normalise
-    if np.max(images[0]) <= 1:
-        images *= 255
-
-    tl = 3  # line thickness
-    tf = max(tl - 1, 1)  # font thickness
-    bs, _, h, w = images.shape  # batch size, _, height, width
-    bs = min(bs, max_subplots)  # limit plot images
-    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
-
-    # Check if we should resize
-    scale_factor = max_size / max(h, w)
-    if scale_factor < 1:
-        h = math.ceil(scale_factor * h)
-        w = math.ceil(scale_factor * w)
-
-    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-    for i, img in enumerate(images):
-        if i == max_subplots:  # if last batch has fewer images than we expect
-            break
-
-        block_x = int(w * (i // ns))
-        block_y = int(h * (i % ns))
-
-        img = img.transpose(1, 2, 0)
-        if scale_factor < 1:
-            img = cv2.resize(img, (w, h))
-
-        mosaic[block_y : block_y + h, block_x : block_x + w, :] = img
-        if len(targets) > 0:
-            idx = (targets[:, 0]).astype(int)
-            image_targets = targets[idx == i]
-
-            if masks.max() > 1.0:  # mean that masks are overlap
-                image_masks = masks[[i]]  # (1, 640, 640)
-                # convert masks (1, 640, 640) -> (n, 640, 640)
-                nl = len(image_targets)
-                index = np.arange(nl).reshape(nl, 1, 1) + 1
-                image_masks = np.repeat(image_masks, nl, axis=0)
-                image_masks = np.where(image_masks == index, 1.0, 0.0)
-            else:
-                image_masks = masks[idx == i]
-
-            boxes = xywh2xyxy(image_targets[:, 2:6]).T
-            classes = image_targets[:, 1].astype("int")
-            labels = image_targets.shape[1] == 6  # labels if no conf column
-            conf = (
-                None if labels else image_targets[:, 6]
-            )  # check for confidence presence (label vs pred)
-
-            if boxes.shape[1]:
-                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
-                    boxes[[0, 2]] *= w  # scale to pixels
-                    boxes[[1, 3]] *= h
-                elif scale_factor < 1:  # absolute coords need scale if image scales
-                    boxes *= scale_factor
-            boxes[[0, 2]] += block_x
-            boxes[[1, 3]] += block_y
-            for j, box in enumerate(boxes.T):
-                cls = int(classes[j])
-                color = colors(cls)
-                cls = names[cls] if names else cls
-                mask = image_masks[j].astype(np.bool)
-                # print(mask.shape)
-                # print(mosaic.shape)
-                if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j])
-                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
-                    mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[
-                        block_y : block_y + h, block_x : block_x + w, :
-                    ][mask] * 0.35 + (np.array(color) * 0.65)
-
-        # Draw image filename labels
-        if paths:
-            label = Path(paths[i]).name[:40]  # trim to 40 char
-            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-            cv2.putText(
-                mosaic,
-                label,
-                (block_x + 5, block_y + t_size[1] + 5),
-                0,
-                tl / 3,
-                [220, 220, 220],
-                thickness=tf,
-                lineType=cv2.LINE_AA,
-            )
-
-        # Image border
-        cv2.rectangle(
-            mosaic,
-            (block_x, block_y),
-            (block_x + w, block_y + h),
-            (255, 255, 255),
-            thickness=3,
-        )
-
-    if fname:
-        r = min(1280.0 / max(h, w) / ns, 1.0)  # ratio to limit image size
-        mosaic = cv2.resize(
-            mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA
-        )
-        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
-        with Image.fromarray(mosaic) as im:
-            im.save(fname)
-    return mosaic
-
-
-def plot_images_boxes_and_masks(
-    images,
-    targets,
-    masks=None,
-    paths=None,
-    fname="images.jpg",
-    names=None,
-    max_size=640,
-    max_subplots=16,
-):
-    if masks is not None:
-        return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots)
-    else:
-        return plot_images(images, targets, paths, fname, names, max_size, max_subplots)
-
-
-def plot_masks(img, masks, colors, alpha=0.5):
-    """
-    Args:
-        img (tensor): img on cuda, shape: [3, h, w], range: [0, 1]
-        masks (tensor): predicted masks on cuda, shape: [n, h, w]
-        colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
-    Return:
-        img after draw masks, shape: [h, w, 3]
-
-    transform colors and send img_gpu to cpu for the most time.
-    """
-    img_gpu = img.clone()
-    num_masks = len(masks)
-    # [n, 1, 1, 3]
-    # faster this way to transform colors
-    colors = torch.tensor(colors, device=img.device).float() / 255.0
-    colors = colors[:, None, None, :]
-    # [n, h, w, 1]
-    masks = masks[:, :, :, None]
-    masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha
-    inv_alph_masks = masks * (-alpha) + 1
-    masks_color_summand = masks_color[0]
-    if num_masks > 1:
-        inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0)
-        masks_color_cumul = masks_color[1:] * inv_alph_cumul
-        masks_color_summand += masks_color_cumul.sum(dim=0)
-
-    # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1]
-    img_gpu = img_gpu.flip(dims=[0])  # filp channel for opencv
-    img_gpu = img_gpu.permute(1, 2, 0).contiguous()
-    # [h, w, 3]
-    img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
-    return (img_gpu * 255).byte().cpu().numpy()
-
-def visualize(self, images, outputs, out_masks, vis_confs=0.4):
-    """Image visualize
-    if images is a List of ndarray, then will return a List.
-    if images is a ndarray, then return ndarray.
-    Args:
-        outputs: bbox+conf+cls, List[torch.Tensor(num_boxes, 6)]xB.
-        masks: binary masks, List[torch.Tensor(num_boxes, img_h, img_w)]xB.
-    """
-    ori_type = type(images)
-    # get original shape, cause self.ori_hw will be cleared
-    images = images if isinstance(images, list) else [images]
-    ori_hw = [img.shape[:2] for img in images]
-    # init the list to keep image with masks.
-    # TODO: fix this bug when output is empty.
-    masks_images = []
-    # draw masks
-    for i, output in enumerate(outputs):
-        if output is None or len(output) == 0:
-            continue
-        idx = output[:, 4] > vis_confs
-        masks = out_masks[i][idx]
-        mcolors = [colors(int(cls)) for cls in output[:, 5]]
-        # NOTE: this way to draw masks is faster,
-        # from https://github.com/dbolya/yolact
-        # image with masks, (img_h, img_w, 3)
-        img_masks = plot_masks(self.imgs[i], masks, mcolors)
-        # scale image to original hw
-        from utils.segment import scale_masks
-        img_masks = scale_masks(self.imgs[i].shape[1:], img_masks, ori_hw[i])
-        masks_images.append(img_masks)
-    # TODO: make this(ori_type stuff) clean
-    images = masks_images[0] if (len(masks_images) == 1) and type(masks_images) != ori_type else images[0]
-    return self.vis(images, outputs, vis_confs)
diff --git a/utils/segment/general.py b/utils/segment/general.py
index 70056a4bbb31..d24b263bcc59 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -7,7 +7,7 @@
 import torchvision
 
 from ..general import xywh2xyxy
-from .metrics import box_iou
+from ..metrics import box_iou
 
 
 def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False,
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
index 602623377402..a3c0acd23920 100644
--- a/utils/segment/metrics.py
+++ b/utils/segment/metrics.py
@@ -8,13 +8,10 @@
 from ..metrics import ap_per_class
 
 
-def fitness(x, masks=False):
+def fitness(x):
     # Model fitness as a weighted combination of metrics
-    if masks:
-        w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
-        return (x[:, :8] * w).sum(1)
-    w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
-    return (x[:, :4] * w).sum(1)
+    w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
+    return (x[:, :8] * w).sum(1)
 
 
 def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ):
@@ -30,9 +27,9 @@ def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False
             prefix="Mask")[2:]
 
     results = edict({
-        "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3],
+        "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[3], "f1": results_boxes[2],
             "ap_class": results_boxes[4]},
-        "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3],
+        "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[3], "f1": results_masks[2],
             "ap_class": results_masks[4]}})
     return results
 
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
new file mode 100644
index 000000000000..77fb983fe8d1
--- /dev/null
+++ b/utils/segment/plots.py
@@ -0,0 +1,353 @@
+import cv2
+import torch
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from pathlib import Path
+from PIL import Image
+
+from ..plots import colors, Annotator
+from ..general import xywh2xyxy
+
+
+def plot_masks(img, masks, colors, alpha=0.5):
+    """
+    Args:
+        img (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
+        masks (tensor): predicted masks on cuda, shape: [n, h, w]
+        colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
+    Return:
+        ndarray: img after draw masks, shape: [h, w, 3]
+
+    transform colors and send img_gpu to cpu for the most time.
+    """
+    img_gpu = img.clone()
+    num_masks = len(masks)
+    if num_masks == 0:
+        return img.permute(1, 2, 0).contiguous().cpu().numpy() * 255
+
+    # [n, 1, 1, 3]
+    # faster this way to transform colors
+    colors = torch.tensor(colors, device=img.device).float() / 255.0
+    colors = colors[:, None, None, :]
+    # [n, h, w, 1]
+    masks = masks[:, :, :, None]
+    masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha
+    inv_alph_masks = masks * (-alpha) + 1
+    masks_color_summand = masks_color[0]
+    if num_masks > 1:
+        inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0)
+        masks_color_cumul = masks_color[1:] * inv_alph_cumul
+        masks_color_summand += masks_color_cumul.sum(dim=0)
+
+    # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1]
+    img_gpu = img_gpu.flip(dims=[0])  # filp channel for opencv
+    img_gpu = img_gpu.permute(1, 2, 0).contiguous()
+    # [h, w, 3]
+    img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
+    return (img_gpu * 255).byte().cpu().numpy()
+
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+    import random
+
+    # Plots one bounding box on image img
+    tl = (
+        line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
+    )  # line/font thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(
+            img,
+            label,
+            (c1[0], c1[1] - 2),
+            0,
+            tl / 3,
+            [225, 255, 255],
+            thickness=tf,
+            lineType=cv2.LINE_AA,
+        )
+
+def plot_images_and_masks(
+    images,
+    targets,
+    masks,
+    paths=None,
+    fname="images.jpg",
+    names=None,
+    max_size=640,
+    max_subplots=16,
+):
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    if isinstance(masks, torch.Tensor):
+        masks = masks.cpu().numpy()
+        masks = masks.astype(int)
+
+    # un-normalise
+    if np.max(images[0]) <= 1:
+        images *= 255
+
+    tl = 3  # line thickness
+    tf = max(tl - 1, 1)  # font thickness
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+
+    # Check if we should resize
+    scale_factor = max_size / max(h, w)
+    if scale_factor < 1:
+        h = math.ceil(scale_factor * h)
+        w = math.ceil(scale_factor * w)
+
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, img in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+
+        block_x = int(w * (i // ns))
+        block_y = int(h * (i % ns))
+
+        img = img.transpose(1, 2, 0)
+        if scale_factor < 1:
+            img = cv2.resize(img, (w, h))
+
+        mosaic[block_y : block_y + h, block_x : block_x + w, :] = img
+        if len(targets) > 0:
+            idx = (targets[:, 0]).astype(int)
+            image_targets = targets[idx == i]
+
+            if masks.max() > 1.0:  # mean that masks are overlap
+                image_masks = masks[[i]]  # (1, 640, 640)
+                # convert masks (1, 640, 640) -> (n, 640, 640)
+                nl = len(image_targets)
+                index = np.arange(nl).reshape(nl, 1, 1) + 1
+                image_masks = np.repeat(image_masks, nl, axis=0)
+                image_masks = np.where(image_masks == index, 1.0, 0.0)
+            else:
+                image_masks = masks[idx == i]
+
+            boxes = xywh2xyxy(image_targets[:, 2:6]).T
+            classes = image_targets[:, 1].astype("int")
+            labels = image_targets.shape[1] == 6  # labels if no conf column
+            conf = (
+                None if labels else image_targets[:, 6]
+            )  # check for confidence presence (label vs pred)
+
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale_factor < 1:  # absolute coords need scale if image scales
+                    boxes *= scale_factor
+            boxes[[0, 2]] += block_x
+            boxes[[1, 3]] += block_y
+            for j, box in enumerate(boxes.T):
+                cls = int(classes[j])
+                color = colors(cls)
+                cls = names[cls] if names else cls
+                if scale_factor < 1:
+                    mask = image_masks[j].astype(np.uint8)
+                    mask = cv2.resize(mask, (w, h))
+                    mask = mask.astype(np.bool)
+                else:
+                    mask = image_masks[j].astype(np.bool)
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j])
+                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
+                    mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[
+                        block_y : block_y + h, block_x : block_x + w, :
+                    ][mask] * 0.35 + (np.array(color) * 0.65)
+
+        # Draw image filename labels
+        if paths:
+            label = Path(paths[i]).name[:40]  # trim to 40 char
+            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+            cv2.putText(
+                mosaic,
+                label,
+                (block_x + 5, block_y + t_size[1] + 5),
+                0,
+                tl / 3,
+                [220, 220, 220],
+                thickness=tf,
+                lineType=cv2.LINE_AA,
+            )
+
+        # Image border
+        cv2.rectangle(
+            mosaic,
+            (block_x, block_y),
+            (block_x + w, block_y + h),
+            (255, 255, 255),
+            thickness=3,
+        )
+
+    if fname:
+        r = min(1280.0 / max(h, w) / ns, 1.0)  # ratio to limit image size
+        mosaic = cv2.resize(
+            mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA
+        )
+        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
+        with Image.fromarray(mosaic) as im:
+            im.save(fname)
+    return mosaic
+
+# def plot_images_and_masks(
+#     images,
+#     targets,
+#     masks,
+#     paths=None,
+#     fname="images.jpg",
+#     names=None,
+#     max_size=640,
+#     max_subplots=16,
+# ):
+#     # plot masks first in torch way,
+#     # this is faster if masks are in cuda.
+#     masks = torch.as_tensor(masks, dtype=torch.float32)
+#     images = torch.as_tensor(images, dtype=torch.float32, device=masks.device)
+#     if isinstance(targets, torch.Tensor):
+#         targets = targets.cpu().numpy()
+#
+#     # normalize
+#     if images[0].max() > 1:
+#         images /= 255
+#
+#     images_with_masks = []
+#     for i, img in enumerate(images):
+#         if len(targets) == 0:
+#             continue
+#         idx = (targets[:, 0]).astype(int)
+#         image_targets = targets[idx == i]
+#         mcolors = np.array([colors(int(cls), bgr=True) for cls in image_targets[:, 1]])
+#         labels = image_targets.shape[1] == 6  # labels if no conf column
+#         conf = (
+#             None if labels else image_targets[:, 6]
+#         )  # check for confidence presence (label vs pred)
+#
+#         if masks.max() > 1.0:  # mean that masks are overlap
+#             image_masks = masks[[i]]  # (1, 640, 640)
+#             # convert masks (1, 640, 640) -> (n, 640, 640)
+#             nl = len(image_targets)
+#             index = torch.arange(nl, device=image_masks.device).view(nl, 1, 1) + 1
+#             image_masks = image_masks.repeat(nl, 1, 1)
+#             image_masks = torch.where(image_masks == index, 1.0, 0.0)
+#         else:
+#             image_masks = masks[idx == i]
+#         if conf is not None:
+#             image_masks = image_masks[conf > 0.25]
+#             mcolors = mcolors[conf > 0.25]
+#         image_with_masks = plot_masks(img, image_masks, mcolors)
+#         images_with_masks.append(image_with_masks[..., ::-1])
+#     images = np.stack(images_with_masks, axis=0)
+#
+#     bs, h, w, _,= images.shape  # batch size, _, height, width
+#     bs = min(bs, max_subplots)  # limit plot images
+#     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+#
+#     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+#     for i, im in enumerate(images):
+#         if i == max_subplots:  # if last batch has fewer images than we expect
+#             break
+#         x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+#         mosaic[y : y + h, x : x + w, :] = im
+#
+#     # Resize (optional)
+#     scale = max_size / ns / max(h, w)
+#     if scale < 1:
+#         h = math.ceil(scale * h)
+#         w = math.ceil(scale * w)
+#         mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+#
+#     # Annotate
+#     fs = int((h + w) * ns * 0.01)  # font size
+#     annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
+#     for i in range(i + 1):
+#         x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+#         annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+#         if paths:
+#             annotator.text(
+#                 (x + 5, y + 5 + h),
+#                 text=Path(paths[i]).name[:40],
+#                 txt_color=(220, 220, 220),
+#             )  # filenames
+#         if len(targets) > 0:
+#             ti = targets[targets[:, 0] == i]  # image targets
+#             boxes = xywh2xyxy(ti[:, 2:6]).T
+#             classes = ti[:, 1].astype("int")
+#             labels = ti.shape[1] == 6  # labels if no conf column
+#             conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
+#
+#             if boxes.shape[1]:
+#                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+#                     boxes[[0, 2]] *= w  # scale to pixels
+#                     boxes[[1, 3]] *= h
+#                 elif scale < 1:  # absolute coords need scale if image scales
+#                     boxes *= scale
+#             boxes[[0, 2]] += x
+#             boxes[[1, 3]] += y
+#             for j, box in enumerate(boxes.T.tolist()):
+#                 cls = classes[j]
+#                 color = colors(cls)
+#                 cls = names[cls] if names else cls
+#                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
+#                     label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
+#                     annotator.box_label(box, label, color=color)
+#     annotator.im.save(fname)  # save
+#     return annotator.result()
+
+
+def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob("results*.csv"))
+    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
+    for _, f in enumerate(files):
+        try:
+            data = pd.read_csv(f)
+            index = np.argmax(
+                0.9 * data.values[:, 8]
+                + 0.1 * data.values[:, 7]
+                + 0.9 * data.values[:, 12]
+                + 0.1 * data.values[:, 11],
+            )
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
+                if best:
+                    # best
+                    ax[i].scatter(
+                        index,
+                        y[index],
+                        color="r",
+                        label=f"best:{index}",
+                        marker="*",
+                        linewidth=3,
+                    )
+                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
+                else:
+                    # last
+                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
+                    ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
+                # if j in [8, 9, 10]:  # share train and val loss y axes
+                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            print(f"Warning: Plotting error for {f}: {e}")
+    ax[1].legend()
+    fig.savefig(save_dir / "results.png", dpi=200)
+    plt.close()
+

From 9d5bbf77732ed19b3b3dbf4349a8f1fe9813a5df Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 11 Aug 2022 19:31:15 +0800
Subject: [PATCH 053/247] revert yolo.py&&remove evaluator.py

---
 models/yolo.py       |  18 +-
 segment/evaluator.py | 616 -------------------------------------------
 segment/val.py       |   2 +-
 3 files changed, 2 insertions(+), 634 deletions(-)
 delete mode 100644 segment/evaluator.py

diff --git a/models/yolo.py b/models/yolo.py
index cd9248e7c8c2..f991cdc7ec66 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -14,8 +14,6 @@
 from copy import deepcopy
 from pathlib import Path
 
-from torch import NoneType
-
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
@@ -30,7 +28,6 @@
 from utils.plots import feature_visualization
 from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
                                time_sync)
-import torch.nn.functional as F
 
 try:
     import thop  # for FLOPs computation
@@ -110,8 +107,7 @@ def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inp
             # nn.SiLU(inplace=True),
             # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
             # nn.SiLU(inplace=True), 
-            # nn.Upsample(scale_factor=2, mode='nearest'),
-            Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
             nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
             nn.SiLU(inplace=True),
             nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0),
@@ -382,18 +378,6 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
         ch.append(c2)
     return nn.Sequential(*layers), sorted(save)
 
-class Upsample(nn.Module):
-    '''
-    deterministic upsample layer
-    '''
-    def __init__(self, scale_factor, mode="bilinear", align_corners=False) -> None:
-        super().__init__()
-        self.scale_factor = scale_factor
-        self.mode = mode
-        self.align_corners = align_corners
-
-    def forward(self, x):
-        return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
diff --git a/segment/evaluator.py b/segment/evaluator.py
deleted file mode 100644
index acf8f94e42cf..000000000000
--- a/segment/evaluator.py
+++ /dev/null
@@ -1,616 +0,0 @@
-# TODO:  Optimize plotting, losses & merge with val.py
-
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Validate a trained YOLOv5 model accuracy on a custom dataset
-
-Usage:
-    $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640
-"""
-
-import json
-from pathlib import Path
-from threading import Thread
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-import pycocotools.mask as mask_util
-from tqdm import tqdm
-
-from models.experimental import attempt_load
-from utils.segment.dataloaders import create_dataloader
-from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, )
-from utils.general import (check_dataset, check_img_size, check_suffix)
-from utils.general import (coco80_to_coco91_class, increment_path, colorstr, )
-from utils.plots import output_to_target, plot_images_boxes_and_masks
-from utils.segment.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix
-from utils.segment.general import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, )
-from utils.torch_utils import select_device, time_sync, de_parallel
-
-
-def save_one_txt(predn, save_conf, shape, file):
-    # Save one txt result
-    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
-    for *xyxy, conf, cls in predn.tolist():
-        xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
-        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-        with open(file, "a") as f:
-            f.write(("%g " * len(line)).rstrip() % line + "\n")
-
-
-def save_one_json(predn, jdict, path, class_map, pred_masks=None):
-    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
-    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
-    box = xyxy2xywh(predn[:, :4])  # xywh
-    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-
-    if pred_masks is not None:
-        pred_masks = np.transpose(pred_masks, (2, 0, 1))
-        rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
-        for rle in rles:
-            rle["counts"] = rle["counts"].decode("utf-8")
-
-    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-        pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b],
-            "score": round(p[4], 5), }
-        if pred_masks is not None:
-            pred_dict["segmentation"] = rles[i]
-        jdict.append(pred_dict)
-
-
-@torch.no_grad()
-class Yolov5Evaluator:
-    def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False,
-            project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True,
-            max_plot_dets=10, mask=False, mask_downsample_ratio=1, overlap=False) -> None:
-        self.data = check_dataset(data)  # check
-        self.conf_thres = conf_thres  # confidence threshold
-        self.iou_thres = iou_thres  # NMS IoU threshold
-        self.device = device  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        self.single_cls = single_cls  # treat as single-class dataset
-        self.augment = augment  # augmented inference
-        self.verbose = verbose  # verbose output
-        self.project = project  # save to project/name
-        self.name = name  # save to project/name
-        self.exist_ok = exist_ok  # existing project/name ok, do not increment
-        self.half = half  # use FP16 half-precision inference
-        self.save_dir = save_dir
-        self.nosave = nosave
-        self.plots = plots
-        self.max_plot_dets = max_plot_dets
-        self.mask = mask
-        self.mask_downsample_ratio = mask_downsample_ratio
-        self.overlap = overlap
-
-        self.nc = 1 if self.single_cls else int(self.data["nc"])  # number of classes
-        self.iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
-        self.niou = self.iouv.numel()
-        self.confusion_matrix = ConfusionMatrix(nc=self.nc)
-        self.dt = [0.0, 0.0, 0.0]
-        self.names = {k: v for k, v in enumerate(self.data["names"])}
-        self.s = (("%20s" + "%11s" * 10) % (
-            "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5",
-            "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % (
-            "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",))
-        self.step = 0
-
-        # coco stuff
-        self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith(
-            "coco/val2017.txt")  # COCO dataset
-        self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
-        self.jdict = []
-        self.iou_thres = 0.65 if self.is_coco else self.iou_thres
-
-        # masks stuff
-        self.pred_masks = []  # for mask visualization
-
-        # metric stuff
-        self.seen = 0
-        self.stats = []
-        self.total_loss = torch.zeros((4 if self.mask else 3))
-        self.metric = Metrics() if self.mask else Metric()
-
-    @torch.no_grad()
-    def run_training(self, model, dataloader, compute_loss=None):
-        """This is for evaluation when training."""
-        self.seen = 0
-        self.device = next(model.parameters()).device  # get model device
-        # self.iouv.to(self.device)
-        self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device)
-        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
-        model.half() if self.half else model.float()
-        # Configure
-        model.eval()
-
-        # inference
-        # masks will be `None` if training objection.
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
-            # reset pred_masks
-            self.pred_masks = []
-            img = img.to(self.device, non_blocking=True)
-            targets = targets.to(self.device)
-            if masks is not None:
-                masks = masks.to(self.device).float()
-            out, train_out = self.inference(model, img, targets, masks, compute_loss)
-
-            # Statistics per image
-            for si, pred in enumerate(out):
-                self.seen += 1
-
-                # eval in every image level
-                labels = targets[targets[:, 0] == si, 1:]
-                midx = [si] if self.overlap else targets[:, 0] == si
-                gt_masksi = masks[midx] if masks is not None else None
-
-                # get predition masks
-                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(pred, proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
-
-                # for visualization
-                if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
-
-                # NOTE: eval in training image-size space
-                self.compute_stat(pred, pred_maski, labels, gt_masksi)
-
-            if self.plots and batch_i < 2:
-                self.plot_images(batch_i, img, targets, masks, out, paths)
-
-        # compute map and print it.
-        t = self.after_infer()
-
-        # Return results
-        model.float()  # for training
-        self.step += 1
-        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
-                self.metric.get_maps(self.nc), t,)
-
-    def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ):
-        """This is for native evaluation."""
-        model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task)
-        self.seen = 0
-        # self.iouv.to(self.device)
-        self.half &= self.device.type != "cpu"  # half precision only supported on CUDA
-        model.half() if self.half else model.float()
-        # Configure
-        model.eval()
-
-        # inference
-        for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)):
-            # reset pred_masks
-            self.pred_masks = []
-            img = img.to(self.device, non_blocking=True)
-            targets = targets.to(self.device)
-            if masks is not None:
-                masks = masks.to(self.device).float()
-            out, train_out = self.inference(model, img, targets, masks)
-
-            # Statistics per image
-            for si, pred in enumerate(out):
-                self.seen += 1
-                path = Path(paths[si])
-                shape = shapes[si][0]
-                ratio_pad = shapes[si][1]
-
-                # eval in every image level
-                labels = targets[targets[:, 0] == si, 1:]
-                midx = [si] if self.overlap else targets[:, 0] == si
-                gt_masksi = masks[midx] if masks is not None else None
-
-                # get predition masks
-                proto_out = train_out[1][si] if isinstance(train_out, tuple) else None
-                pred_maski = self.get_predmasks(pred, proto_out,
-                    gt_masksi.shape[1:] if gt_masksi is not None else None, )
-
-                # for visualization
-                if self.plots and batch_i < 3 and pred_maski is not None:
-                    self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu())
-
-                # NOTE: eval in training image-size space
-                self.compute_stat(pred, pred_maski, labels, gt_masksi)
-
-                # no preditions, not save anything
-                if len(pred) == 0:
-                    continue
-
-                if save_txt or save_json:
-                    # clone() is for plot_images work correctly
-                    predn = pred.clone()
-                    # 因为test时添加了0.5的padding，因此这里与数据加载的padding不一致，所以需要转入ratio_pad
-                    scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad)  # native-space pred
-
-                # Save/log
-                if save_txt and self.save_dir.exists():
-                    # NOTE: convert coords to native space when save txt.
-                    # support save box preditions only
-                    save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), )
-                if save_json and self.save_dir.exists():
-                    # NOTE: convert coords to native space when save json.
-                    # if pred_maski is not None:
-                    # h, w, n
-                    pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(),
-                        shape, ratio_pad, )
-                    save_one_json(predn, self.jdict, path, self.class_map,
-                        pred_maski, )  # append to COCO-JSON dictionary
-
-            if self.plots and batch_i < 3:
-                self.plot_images(batch_i, img, targets, masks, out, paths)
-
-        # compute map and print it.
-        t = self.after_infer()
-
-        # save json
-        if self.save_dir.exists() and save_json:
-            pred_json = str(self.save_dir / f"predictions.json")  # predictions json
-            print(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
-            with open(pred_json, "w") as f:
-                json.dump(self.jdict, f)
-
-        # Print speeds
-        shape = (batch_size, 3, imgsz, imgsz)
-        print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
-
-        s = (
-            f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "")
-        print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}")
-
-        # Return results
-        return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),),
-                self.metric.get_maps(self.nc), t,)
-
-    def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"):
-        "prepare for evaluation without training."
-        self.device = select_device(self.device, batch_size=batch_size)
-
-        # Directories
-        self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok)  # increment run
-        if not self.nosave:
-            (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)  # make dir
-
-        # Load model
-        check_suffix(weights, ".pt")
-        model = attempt_load(weights, device=self.device)  # load FP32 model
-        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-        imgsz = check_img_size(imgsz, s=gs)  # check image size
-
-        # Data
-        if self.device.type != "cpu":
-            model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters())))  # run once
-        pad = 0.0 if task == "speed" else 0.5
-        task = (task if task in ("train", "val", "test") else "val")  # path to train/val/test images
-        dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True,
-            prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0]
-        return model, dataloader, imgsz
-
-    def inference(self, model, img, targets, masks=None, compute_loss=None):
-        """Inference"""
-        t1 = time_sync()
-        img = img.half() if self.half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        _, _, height, width = img.shape  # batch size, channels, height, width
-        t2 = time_sync()
-        self.dt[0] += t2 - t1
-
-        # Run model
-        out, train_out = model(img, augment=self.augment)  # inference and training outputs
-        self.dt[1] += time_sync() - t2
-
-        # Compute loss
-        if compute_loss:
-            self.total_loss += compute_loss(train_out, targets, masks)[1]  # box, obj, cls
-
-        # Run NMS
-        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device)  # to pixels
-        t3 = time_sync()
-        out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True,
-            agnostic=self.single_cls, mask_dim=de_parallel(model).model[-1].mask_dim)
-        self.dt[2] += time_sync() - t3
-        return out, train_out
-
-    def after_infer(self):
-        """Do something after inference, such as plots and get metrics.
-        Return:
-            t(tuple): speeds of per image.
-        """
-        # Plot confusion matrix
-        if self.plots and self.save_dir.exists():
-            self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values()))
-
-        # Compute statistics
-        stats = [np.concatenate(x, 0) for x in zip(*self.stats)]  # to numpy
-        box_or_mask_any = stats[0].any() or stats[1].any()
-        stats = stats[1:] if not self.mask else stats
-        if len(stats) and box_or_mask_any:
-            results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None,
-                self.names, )
-            self.metric.update(results)
-            nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64),
-                minlength=self.nc)  # number of targets per class
-        else:
-            nt = torch.zeros(1)
-
-        # make this empty, cause make `stats` self is for reduce some duplicated codes.
-        self.stats = []
-        # print information
-        self.print_metric(nt, stats)
-        t = tuple(x / self.seen * 1e3 for x in self.dt)  # speeds per image
-        return t
-
-    def process_batch(self, detections, labels, iouv):
-        """
-        Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
-        Arguments:
-            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-            labels (Array[M, 5]), class, x1, y1, x2, y2
-        Returns:
-            correct (Array[N, 10]), for 10 IoU levels
-        """
-        correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
-        iou = box_iou(labels[:, 1:], detections[:, :4])
-        x = torch.where(
-            (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
-        if x[0].shape[0]:
-            matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
-            if x[0].shape[0] > 1:
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-                # matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-            matches = torch.Tensor(matches).to(iouv.device)
-            correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
-        return correct
-
-    def get_predmasks(self, pred, proto_out, gt_shape):
-        """Get pred masks in different ways.
-        1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size)
-            mask for saving cuda memory.
-        2. process_mask_upsample, for val after training to get high quality mask(original size).
-
-        Args:
-            pred(torch.Tensor): output of network, (N, 5 + mask_dim + class).
-            proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w).
-            gt_shape(tuple): shape of gt mask, this shape may not equal to input size of
-                input image, Cause the mask_downsample_ratio.
-        Return:
-            pred_mask(torch.Tensor): predition of final masks with the same size with
-                input image, (N, input_h, input_w).
-        """
-        if proto_out is None or len(pred) == 0:
-            return None
-        process = process_mask_upsample if self.plots else process_mask
-        gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,)
-        # n, h, w
-        pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous())
-        return pred_mask
-
-    def process_batch_masks(self, predn, pred_maski, gt_masksi, labels):
-        assert not ((pred_maski is None) ^ (
-                    gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist."
-        if pred_maski is None and gt_masksi is None:
-            return torch.zeros(0, self.niou, dtype=torch.bool)
-
-        correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, )
-
-        # convert masks (1, 640, 640) -> (n, 640, 640)
-        if self.overlap:
-            nl = len(labels)
-            index = torch.arange(nl, device=gt_masksi.device).view(nl, 1, 1) + 1
-            gt_masksi = gt_masksi.repeat(nl, 1, 1)
-            gt_masksi = torch.where(gt_masksi == index, 1.0, 0.0)
-
-        if gt_masksi.shape[1:] != pred_maski.shape[1:]:
-            gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear",
-                align_corners=False, ).squeeze(0)
-
-        iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), )
-        x = torch.where(
-            (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
-        if x[0].shape[0]:
-            matches = (
-                torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
-            if x[0].shape[0] > 1:
-                matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-                # matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-            matches = torch.Tensor(matches).to(self.iouv.device)
-            correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv
-        return correct
-
-    def compute_stat(self, predn, pred_maski, labels, gt_maski):
-        """Compute states about ious. with boxs size in training img-size space."""
-        nl = len(labels)
-        tcls = labels[:, 0].tolist() if nl else []  # target class
-
-        if len(predn) == 0:
-            if nl:
-                self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool),  # boxes
-                                   torch.zeros(0, self.niou, dtype=torch.bool),  # masks
-                                   torch.Tensor(), torch.Tensor(), tcls,))
-            return
-
-        # Predictions
-        if self.single_cls:
-            predn[:, 5] = 0
-
-        # Evaluate
-        if nl:
-            tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
-            labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
-            # boxes
-            correct_boxes = self.process_batch(predn, labelsn, self.iouv)
-
-            # masks
-            correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn)
-
-            if self.plots:
-                self.confusion_matrix.process_batch(predn, labelsn)
-        else:
-            correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
-            correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool)
-        self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(),
-                           tcls,))  # (correct, conf, pcls, tcls)
-
-    def print_metric(self, nt, stats):
-        # Print results
-        pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4)
-        print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results()))
-
-        # Print results per class
-        # TODO: self.seen support verbose.
-        if self.verbose and self.nc > 1 and len(stats):
-            for i, c in enumerate(self.metric.ap_class_index):
-                print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)))
-
-    def plot_images(self, i, img, targets, masks, out, paths):
-        if not self.save_dir.exists():
-            return
-        # plot ground truth
-        f = self.save_dir / f"val_batch{i}_labels.jpg"  # labels
-        
-        if masks is not None and masks.shape[1:] != img.shape[2:]:
-            masks = F.interpolate(
-                masks.unsqueeze(0).float(),
-                img.shape[2:],
-                mode="bilinear",
-                align_corners=False,
-            ).squeeze(0)
-
-        Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])),
-            daemon=True, ).start()
-        f = self.save_dir / f"val_batch{i}_pred.jpg"  # predictions
-
-        # plot predition
-        if len(self.pred_masks):
-            pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0])
-        else:
-            pred_masks = None
-        plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]))
-        #Thread(target=plot_images_boxes_and_masks,
-        #    args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),),
-        #    daemon=True, ).start()
-        # import wandb
-        # if wandb.run:
-        #     wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step)
-
-    def nms(self, **kwargs):
-        return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs))
-
-    def ap_per_class(self, *args):
-        return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args)
-
-
-class Metric:
-    def __init__(self) -> None:
-        self.p = []  # (nc, )
-        self.r = []  # (nc, )
-        self.f1 = []  # (nc, )
-        self.all_ap = []  # (nc, 10)
-        self.ap_class_index = []  # (nc, )
-
-    @property
-    def ap50(self):
-        """AP@0.5 of all classes.
-        Return:
-            (nc, ) or [].
-        """
-        return self.all_ap[:, 0] if len(self.all_ap) else []
-
-    @property
-    def ap(self):
-        """AP@0.5:0.95
-        Return:
-            (nc, ) or [].
-        """
-        return self.all_ap.mean(1) if len(self.all_ap) else []
-
-    @property
-    def mp(self):
-        """mean precision of all classes.
-        Return:
-            float.
-        """
-        return self.p.mean() if len(self.p) else 0.0
-
-    @property
-    def mr(self):
-        """mean recall of all classes.
-        Return:
-            float.
-        """
-        return self.r.mean() if len(self.r) else 0.0
-
-    @property
-    def map50(self):
-        """Mean AP@0.5 of all classes.
-        Return:
-            float.
-        """
-        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
-
-    @property
-    def map(self):
-        """Mean AP@0.5:0.95 of all classes.
-        Return:
-            float.
-        """
-        return self.all_ap.mean() if len(self.all_ap) else 0.0
-
-    def mean_results(self):
-        """Mean of results, return mp, mr, map50, map"""
-        return (self.mp, self.mr, self.map50, self.map)
-
-    def class_result(self, i):
-        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
-        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
-
-    def get_maps(self, nc):
-        maps = np.zeros(nc) + self.map
-        for i, c in enumerate(self.ap_class_index):
-            maps[c] = self.ap[i]
-        return maps
-
-    def update(self, results):
-        """
-        Args:
-            results: tuple(p, r, ap, f1, ap_class)
-        """
-        p, r, all_ap, f1, ap_class_index = results
-        self.p = p
-        self.r = r
-        self.all_ap = all_ap
-        self.f1 = f1
-        self.ap_class_index = ap_class_index
-
-
-class Metrics:
-    """Metric for boxes and masks."""
-
-    def __init__(self) -> None:
-        self.metric_box = Metric()
-        self.metric_mask = Metric()
-
-    def update(self, results):
-        """
-        Args:
-            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
-        """
-        self.metric_box.update(list(results["boxes"].values()))
-        self.metric_mask.update(list(results["masks"].values()))
-
-    def mean_results(self):
-        return self.metric_box.mean_results() + self.metric_mask.mean_results()
-
-    def class_result(self, i):
-        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
-
-    def get_maps(self, nc):
-        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
-
-    @property
-    def ap_class_index(self):
-        # boxes and masks have the same ap_class_index
-        return self.metric_box.ap_class_index
diff --git a/segment/val.py b/segment/val.py
index a2a4eb526773..877296e2b6dc 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -317,7 +317,7 @@ def run(
                 pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
                     shape, shapes[si][1])
                 save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
-            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
+            callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si])
 
         # Plot images
         if plots and batch_i < 3:

From 71776e1cbc01d2933211a67a794b29c615879557 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 11 Aug 2022 20:23:04 +0800
Subject: [PATCH 054/247] fix mixup

---
 segment/val.py                 | 21 ++++++++++++---------
 utils/segment/augmentations.py |  7 +++++++
 utils/segment/dataloaders.py   |  8 ++++----
 utils/segment/general.py       |  2 --
 4 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 877296e2b6dc..7387e2a5c30b 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -274,14 +274,6 @@ def run(
         # Metrics
         for si, pred in enumerate(out):
             labels = targets[targets[:, 0] == si, 1:]
-            midx = [si] if overlap else targets[:, 0] == si
-            gt_masks = masks[midx]
-            proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], 
-                            shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
-            if plots and batch_i < 3:
-                plot_masks.append(pred_masks[:15].cpu())
-
             nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
             path, shape = Path(paths[si]), shapes[si][0]
             correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
@@ -293,6 +285,16 @@ def run(
                     stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
                 continue
 
+            # deal with masks
+            midx = [si] if overlap else targets[:, 0] == si
+            gt_masks = masks[midx]
+            proto_out = train_out[1][si]
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], 
+                            shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
+            if plots and batch_i < 3:
+                # filter top 15 to plot
+                plot_masks.append(pred_masks[:15].cpu())
+
             # Predictions
             if single_cls:
                 pred[:, 5] = 0
@@ -379,7 +381,8 @@ def run(
     # Save JSON
     if save_json and len(jdict):
         w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
-        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
+        # anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
+        anno_json = "/d/dataset/COCO/annotations/instances_val2017.json"
         pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
         LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
         with open(pred_json, 'w') as f:
diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py
index be788a81ea94..c532119c5058 100644
--- a/utils/segment/augmentations.py
+++ b/utils/segment/augmentations.py
@@ -12,6 +12,13 @@
 from ..general import segment2box, resample_segments
 from ..augmentations import box_candidates
 
+def mixup(im, labels, segments, im2, labels2, segments2):
+    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
+    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+    im = (im * r + im2 * (1 - r)).astype(np.uint8)
+    labels = np.concatenate((labels, labels2), 0)
+    segments = np.concatenate((segments, segments2), 0)
+    return im, labels, segments
 
 def random_perspective(im,
                        targets=(),
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
index 0230bcee13d2..89ac50dba401 100644
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@@ -12,11 +12,11 @@
 from torch.utils.data import DataLoader 
 from torch.utils.data import distributed
 
-from ..augmentations import augment_hsv, copy_paste, letterbox, mixup
+from ..augmentations import augment_hsv, copy_paste, letterbox
 from ..dataloaders import LoadImagesAndLabels, InfiniteDataLoader, seed_worker
 from ..general import xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER
 from ..torch_utils import torch_distributed_zero_first
-from .augmentations import random_perspective
+from .augmentations import random_perspective, mixup
 
 
 def create_dataloader(path,
@@ -96,10 +96,10 @@ def __getitem__(self, index):
             img, labels, segments = self.load_mosaic(index)
             shapes = None
 
-            # TODO: Mixup not support segment for now
             # MixUp augmentation
             if random.random() < hyp["mixup"]:
-                img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.num_imgs - 1)))
+                img, labels, segments = mixup(img, labels, segments, 
+                        *self.load_mosaic(random.randint(0, self.n - 1)))
 
         else:
             # Load image
diff --git a/utils/segment/general.py b/utils/segment/general.py
index d24b263bcc59..00367e7268fd 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -149,9 +149,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     """
     # mask_h, mask_w, n
     masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T
-    # print(masks.shape)
     masks = masks.sigmoid()
-    # print('after sigmoid:', masks)
     masks = masks.permute(2, 0, 1).contiguous()
     # [n, mask_h, mask_w]
     masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0)

From a24b3fd1698078f47a91e2570701631ebf8bc24e Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Thu, 11 Aug 2022 20:40:03 +0800
Subject: [PATCH 055/247] update val(temp way)

---
 segment/val.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 7387e2a5c30b..63a6f479b89f 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -37,6 +37,7 @@
 import torch.nn.functional as F
 import pycocotools.mask as mask_util
 from models.common import DetectMultiBackend
+from models.experimental import attempt_load  # scoped to avoid circular import
 from utils.callbacks import Callbacks
 from utils.segment.dataloaders import create_dataloader
 from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
@@ -183,14 +184,20 @@ def run(
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
-        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        # model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+        model = attempt_load(weights, device=device)  # load FP32 model
+        stride = 32
+        pt, jit, engine = True, False, False
+        # stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
         imgsz = check_img_size(imgsz, s=stride)  # check image size
-        half = model.fp16  # FP16 supported on limited backends with CUDA
+        # half = model.fp16  # FP16 supported on limited backends with CUDA
+        half = device.type != 'cpu'
+        if half:
+            model.half()
         if engine:
             batch_size = model.batch_size
         else:
-            device = model.device
+            # device = model.device
             if not (pt or jit):
                 batch_size = 1  # export.py models default to batch-size 1
                 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
@@ -209,10 +216,10 @@ def run(
     # Dataloader
     if not training:
         if pt and not single_cls:  # check --weights are trained on --data
-            ncm = model.model.nc
+            ncm = model.nc
             assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
                               f'classes). Pass correct combination of --weights and --data that are trained together.'
-        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
+        # model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
         pad = 0.0 if task in ('speed', 'benchmark') else 0.5
         rect = False if task == 'benchmark' else pt  # square inference for benchmarks
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
@@ -254,7 +261,7 @@ def run(
         dt[0] += t2 - t1
 
         # Inference
-        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
+        out, train_out = model(im) #if training else model(im, augment=augment, val=True)  # inference, loss outputs
         dt[1] += time_sync() - t2
 
         # Loss
@@ -318,7 +325,7 @@ def run(
             if save_json:
                 pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
                     shape, shapes[si][1])
-                save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
+                save_one_json(predn, jdict, path, class_map, pred_masks)  # append to COCO-JSON dictionary
             callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si])
 
         # Plot images

From 8cf90edc2dd95da59d5c9b205945c2a3669417c6 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Sun, 14 Aug 2022 02:41:26 +0000
Subject: [PATCH 056/247] clean up

---
 utils/segment/augmentations.py |  33 +++--------
 utils/segment/plots.py         | 104 ---------------------------------
 2 files changed, 8 insertions(+), 129 deletions(-)

diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py
index c532119c5058..dc29df6ad8ad 100644
--- a/utils/segment/augmentations.py
+++ b/utils/segment/augmentations.py
@@ -81,37 +81,20 @@ def random_perspective(im,
     n = len(targets)
     new_segments = []
     if n:
-        use_segments = any(x.any() for x in segments)
         new = np.zeros((n, 4))
-        if use_segments:  # warp segments
-            segments = resample_segments(segments)  # upsample
-            for i, segment in enumerate(segments):
-                xy = np.ones((len(segment), 3))
-                xy[:, :2] = segment
-                xy = xy @ M.T  # transform
-                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
-
-                # clip
-                new[i] = segment2box(xy, width, height)
-                new_segments.append(xy)
-
-        else:  # warp boxes
-            xy = np.ones((n * 4, 3))
-            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        segments = resample_segments(segments)  # upsample
+        for i, segment in enumerate(segments):
+            xy = np.ones((len(segment), 3))
+            xy[:, :2] = segment
             xy = xy @ M.T  # transform
-            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
-
-            # create new boxes
-            x = xy[:, [0, 2, 4, 6]]
-            y = xy[:, [1, 3, 5, 7]]
-            new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T)
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
 
             # clip
-            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
-            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
+            new[i] = segment2box(xy, width, height)
+            new_segments.append(xy)
 
         # filter candidates
-        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
         targets = targets[i]
         targets[:, 1:5] = new[i]
         new_segments = np.array(new_segments)[i]
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 77fb983fe8d1..eb1e9b61d01a 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -201,110 +201,6 @@ def plot_images_and_masks(
             im.save(fname)
     return mosaic
 
-# def plot_images_and_masks(
-#     images,
-#     targets,
-#     masks,
-#     paths=None,
-#     fname="images.jpg",
-#     names=None,
-#     max_size=640,
-#     max_subplots=16,
-# ):
-#     # plot masks first in torch way,
-#     # this is faster if masks are in cuda.
-#     masks = torch.as_tensor(masks, dtype=torch.float32)
-#     images = torch.as_tensor(images, dtype=torch.float32, device=masks.device)
-#     if isinstance(targets, torch.Tensor):
-#         targets = targets.cpu().numpy()
-#
-#     # normalize
-#     if images[0].max() > 1:
-#         images /= 255
-#
-#     images_with_masks = []
-#     for i, img in enumerate(images):
-#         if len(targets) == 0:
-#             continue
-#         idx = (targets[:, 0]).astype(int)
-#         image_targets = targets[idx == i]
-#         mcolors = np.array([colors(int(cls), bgr=True) for cls in image_targets[:, 1]])
-#         labels = image_targets.shape[1] == 6  # labels if no conf column
-#         conf = (
-#             None if labels else image_targets[:, 6]
-#         )  # check for confidence presence (label vs pred)
-#
-#         if masks.max() > 1.0:  # mean that masks are overlap
-#             image_masks = masks[[i]]  # (1, 640, 640)
-#             # convert masks (1, 640, 640) -> (n, 640, 640)
-#             nl = len(image_targets)
-#             index = torch.arange(nl, device=image_masks.device).view(nl, 1, 1) + 1
-#             image_masks = image_masks.repeat(nl, 1, 1)
-#             image_masks = torch.where(image_masks == index, 1.0, 0.0)
-#         else:
-#             image_masks = masks[idx == i]
-#         if conf is not None:
-#             image_masks = image_masks[conf > 0.25]
-#             mcolors = mcolors[conf > 0.25]
-#         image_with_masks = plot_masks(img, image_masks, mcolors)
-#         images_with_masks.append(image_with_masks[..., ::-1])
-#     images = np.stack(images_with_masks, axis=0)
-#
-#     bs, h, w, _,= images.shape  # batch size, _, height, width
-#     bs = min(bs, max_subplots)  # limit plot images
-#     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
-#
-#     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-#     for i, im in enumerate(images):
-#         if i == max_subplots:  # if last batch has fewer images than we expect
-#             break
-#         x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-#         mosaic[y : y + h, x : x + w, :] = im
-#
-#     # Resize (optional)
-#     scale = max_size / ns / max(h, w)
-#     if scale < 1:
-#         h = math.ceil(scale * h)
-#         w = math.ceil(scale * w)
-#         mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
-#
-#     # Annotate
-#     fs = int((h + w) * ns * 0.01)  # font size
-#     annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
-#     for i in range(i + 1):
-#         x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-#         annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
-#         if paths:
-#             annotator.text(
-#                 (x + 5, y + 5 + h),
-#                 text=Path(paths[i]).name[:40],
-#                 txt_color=(220, 220, 220),
-#             )  # filenames
-#         if len(targets) > 0:
-#             ti = targets[targets[:, 0] == i]  # image targets
-#             boxes = xywh2xyxy(ti[:, 2:6]).T
-#             classes = ti[:, 1].astype("int")
-#             labels = ti.shape[1] == 6  # labels if no conf column
-#             conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
-#
-#             if boxes.shape[1]:
-#                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
-#                     boxes[[0, 2]] *= w  # scale to pixels
-#                     boxes[[1, 3]] *= h
-#                 elif scale < 1:  # absolute coords need scale if image scales
-#                     boxes *= scale
-#             boxes[[0, 2]] += x
-#             boxes[[1, 3]] += y
-#             for j, box in enumerate(boxes.T.tolist()):
-#                 cls = classes[j]
-#                 color = colors(cls)
-#                 cls = names[cls] if names else cls
-#                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-#                     label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
-#                     annotator.box_label(box, label, color=color)
-#     annotator.im.save(fname)  # save
-#     return annotator.result()
-
 
 def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')

From 8ecbde80334a0799ca094c3bc3b9f28a31cb1781 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Sun, 14 Aug 2022 02:41:54 +0000
Subject: [PATCH 057/247] cancel generator

---
 utils/dataloaders.py         | 7 ++++---
 utils/segment/dataloaders.py | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index 260fb6a97da9..2a06762c9c86 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -135,8 +135,8 @@ def create_dataloader(path,
     nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
     sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
-    generator = torch.Generator()
-    generator.manual_seed(0)
+    # generator = torch.Generator()
+    # generator.manual_seed(0)
     return loader(dataset,
                   batch_size=batch_size,
                   shuffle=shuffle and sampler is None,
@@ -145,7 +145,8 @@ def create_dataloader(path,
                   pin_memory=True,
                   collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
                   worker_init_fn=seed_worker,
-                  generator=generator), dataset
+                  # generator=generator,
+                  ), dataset
 
 
 class InfiniteDataLoader(dataloader.DataLoader):
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
index 89ac50dba401..f4af39617dea 100644
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@@ -62,8 +62,8 @@ def create_dataloader(path,
     nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
     sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
-    generator = torch.Generator()
-    generator.manual_seed(0)
+    # generator = torch.Generator()
+    # generator.manual_seed(0)
     return loader(dataset,
                   batch_size=batch_size,
                   shuffle=shuffle and sampler is None,
@@ -72,7 +72,8 @@ def create_dataloader(path,
                   pin_memory=True,
                   collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
                   worker_init_fn=seed_worker,
-                  generator=generator), dataset
+                  # generator=generator,
+                  ), dataset
 
 
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing

From e714bc112305079f3ba924a9d480f50146ea96bc Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Sun, 14 Aug 2022 02:42:13 +0000
Subject: [PATCH 058/247] revert loss

---
 utils/segment/loss.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 47fed765f990..992fe98499ff 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -92,7 +92,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 if self.sort_obj_iou:
                     sort_id = torch.argsort(score_iou)
                     b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],)
-                tobj[b, a, gj, gi] = 0.5 * ((1.0 - self.gr) + self.gr * score_iou)  # iou ratio
+                tobj[b, a, gj, gi] = 1.0 * ((1.0 - self.gr) + self.gr * score_iou)  # iou ratio
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)
@@ -131,12 +131,12 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     psi = ps[index][:, 5: self.nm]
                     proto = proto_out[bi]
 
-                    one_lseg, iou = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
+                    one_lseg = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
                     batch_lseg += one_lseg
 
-                    # update tobj
-                    iou = iou.detach().clamp(0).type(tobj.dtype)
-                    tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
+                    # # update tobj
+                    # iou = iou.detach().clamp(0).type(tobj.dtype)
+                    # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
 
                 lseg += batch_lseg / len(b.unique())
 
@@ -161,11 +161,11 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
         # (80, 80, 32) @ (32, n) -> (80, 80, n)
         pred_mask = proto @ pred.tanh().T
         # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
-        iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
+        # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
         lseg = crop(lseg, xyxy)
         lseg = lseg.mean(dim=(0, 1)) / w / h
-        return lseg.mean(), iou# + lseg_iou.mean()
+        return lseg.mean()#, iou# + lseg_iou.mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)

From 83b4020d14cde2a08caf7012d1a1bed3b8c4bf6a Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Sun, 14 Aug 2022 02:42:35 +0000
Subject: [PATCH 059/247] update train.py&&val.py

---
 segment/train.py | 5 ++---
 segment/val.py   | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index 3a06915eb061..ea3ca58316c0 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -68,7 +68,6 @@
 from datetime import datetime
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
-    print(device)
     save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
         opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
@@ -419,7 +418,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                            single_cls=single_cls,
                                            dataloader=val_loader,
                                            save_dir=save_dir,
-                                           plots=False,
+                                           plots=plots,
                                            callbacks=callbacks,
                                            compute_loss=compute_loss, 
                                            mask_downsample_ratio=mask_ratio,
@@ -485,7 +484,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         plots=plots,
                         callbacks=callbacks,
                         compute_loss=compute_loss,
-                        mask_downsample_ratio=1,
+                        mask_downsample_ratio=mask_ratio,
                         overlap=overlap)  # val best model with plots
                     if is_coco:
                         callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
diff --git a/segment/val.py b/segment/val.py
index 63a6f479b89f..1045a3959793 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -300,7 +300,7 @@ def run(
                             shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
             if plots and batch_i < 3:
                 # filter top 15 to plot
-                plot_masks.append(pred_masks[:15].cpu())
+                plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu())
 
             # Predictions
             if single_cls:
@@ -388,8 +388,7 @@ def run(
     # Save JSON
     if save_json and len(jdict):
         w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
-        # anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
-        anno_json = "/d/dataset/COCO/annotations/instances_val2017.json"
+        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
         pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
         LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
         with open(pred_json, 'w') as f:

From c381d575f57a1c51a42c5c6a57d02f36831d1a1d Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Wed, 17 Aug 2022 15:47:43 +0530
Subject: [PATCH 060/247] rearrange model files

---
 models/{ => segment}/yolov5l_seg.yaml | 0
 models/{ => segment}/yolov5m_seg.yaml | 0
 models/{ => segment}/yolov5n_seg.yaml | 0
 models/{ => segment}/yolov5s_seg.yaml | 0
 models/{ => segment}/yolov5x_seg.yaml | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename models/{ => segment}/yolov5l_seg.yaml (100%)
 rename models/{ => segment}/yolov5m_seg.yaml (100%)
 rename models/{ => segment}/yolov5n_seg.yaml (100%)
 rename models/{ => segment}/yolov5s_seg.yaml (100%)
 rename models/{ => segment}/yolov5x_seg.yaml (100%)

diff --git a/models/yolov5l_seg.yaml b/models/segment/yolov5l_seg.yaml
similarity index 100%
rename from models/yolov5l_seg.yaml
rename to models/segment/yolov5l_seg.yaml
diff --git a/models/yolov5m_seg.yaml b/models/segment/yolov5m_seg.yaml
similarity index 100%
rename from models/yolov5m_seg.yaml
rename to models/segment/yolov5m_seg.yaml
diff --git a/models/yolov5n_seg.yaml b/models/segment/yolov5n_seg.yaml
similarity index 100%
rename from models/yolov5n_seg.yaml
rename to models/segment/yolov5n_seg.yaml
diff --git a/models/yolov5s_seg.yaml b/models/segment/yolov5s_seg.yaml
similarity index 100%
rename from models/yolov5s_seg.yaml
rename to models/segment/yolov5s_seg.yaml
diff --git a/models/yolov5x_seg.yaml b/models/segment/yolov5x_seg.yaml
similarity index 100%
rename from models/yolov5x_seg.yaml
rename to models/segment/yolov5x_seg.yaml

From 0a965bf16dadf723bbd7f0c4e7ad2a40a34fddcc Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 19:59:34 +0530
Subject: [PATCH 061/247] create temp trainer

---
 segment/train_temp.py     | 708 ++++++++++++++++++++++++++++++++++++++
 utils/loggers/__init__.py |  79 +++++
 utils/segment/metrics.py  |  34 ++
 3 files changed, 821 insertions(+)
 create mode 100644 segment/train_temp.py

diff --git a/segment/train_temp.py b/segment/train_temp.py
new file mode 100644
index 000000000000..57c23811cf53
--- /dev/null
+++ b/segment/train_temp.py
@@ -0,0 +1,708 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset.
+
+Models and datasets download automatically from the latest YOLOv5 release.
+Models: https://github.com/ultralytics/yolov5/tree/master/models
+Datasets: https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
+
+Usage:
+    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (RECOMMENDED)
+    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
+"""
+
+import argparse
+import math
+import os
+import random
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+
+import val  # for end-of-epoch mAP
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.nn.parallel import DistributedDataParallel as DDP
+import torch.nn.functional as F
+from torch.optim import SGD, Adam, AdamW, lr_scheduler
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.experimental import attempt_load
+from models.yolo import Model
+from utils.autoanchor import check_anchors
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from utils.segment.dataloaders import create_dataloader
+from utils.downloads import attempt_download
+from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
+                           check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
+                           increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
+                           labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
+from utils.loggers import GenericLogger
+from utils.loggers.wandb.wandb_utils import check_wandb_resume
+from utils.segment.loss import ComputeLoss
+from utils.segment.metrics import fitness
+from utils.plots import plot_evolve, plot_labels
+from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
+
+
+LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
+WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
+from utils.general import LOGGER, check_amp, check_version
+from utils.autobatch import check_train_batch_size
+from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
+from utils.segment.metrics import KEYS, BEST_KEYS
+from torch.optim import AdamW
+import yaml
+from datetime import datetime
+
+def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
+
+    # Directories
+    w = save_dir / 'weights'  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / 'last.pt', w / 'best.pt'
+
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+
+    # Save run settings
+    if not evolve:
+        with open(save_dir / 'hyp.yaml', 'w') as f:
+            yaml.safe_dump(hyp, f, sort_keys=False)
+        with open(save_dir / 'opt.yaml', 'w') as f:
+            yaml.safe_dump(vars(opt), f, sort_keys=False)
+
+    # Loggers
+    data_dict = None
+    if RANK in {-1, 0}:
+        logger = GenericLogger(
+            opt=opt, console_logger=LOGGER
+        )  # loggers instance
+
+        # Register actions
+        # for k in methods(loggers):
+        #    callbacks.register_action(k, callback=getattr(loggers, k))
+
+    # Config
+    plots = not evolve and not opt.noplots  # create plots
+    overlap = opt.overlap_mask
+    cuda = device.type != 'cpu'
+    init_seeds(opt.seed + 1 + RANK, True)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict['train'], data_dict['val']
+    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
+    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
+    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
+    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
+
+    # Model
+    check_suffix(weights, '.pt')  # check weights
+    pretrained = weights.endswith('.pt')
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
+        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
+        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
+    else:
+        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+    amp = check_amp(model)  # check AMP
+
+    # Freeze
+    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        if any(x in k for x in freeze):
+            LOGGER.info(f'freezing {k}')
+            v.requires_grad = False
+
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz, amp)
+        logger.update_params({"batch_size": batch_size})
+
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
+    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
+
+    g = [], [], []  # optimizer parameter groups
+    bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
+    for v in model.modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
+            g[2].append(v.bias)
+        if isinstance(v, bn):  # weight (no decay)
+            g[1].append(v.weight)
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
+            g[0].append(v.weight)
+
+    # hyp['lr0'] = hyp['lr0'] / batch_size * 128
+    # hyp['warmup_bias_lr'] = 0.01
+    if opt.optimizer == 'Adam':
+        optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+    elif opt.optimizer == 'AdamW':
+        optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+    else:
+        optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
+
+    optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']})  # add g0 with weight_decay
+    optimizer.add_param_group({'params': g[1]})  # add g1 (BatchNorm2d weights)
+    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
+                f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
+    del g
+
+    # Scheduler
+    if opt.cos_lr:
+        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
+    else:
+        lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
+
+    # EMA
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
+
+    # Resume
+    start_epoch, best_fitness = 0, 0.0
+    if pretrained:
+        # Optimizer
+        if ckpt['optimizer'] is not None:
+            optimizer.load_state_dict(ckpt['optimizer'])
+            best_fitness = ckpt['best_fitness']
+
+        # EMA
+        if ema and ckpt.get('ema'):
+            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
+            ema.updates = ckpt['updates']
+
+        # Epochs
+        start_epoch = ckpt['epoch'] + 1
+        if resume:
+            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
+        if epochs < start_epoch:
+            LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
+            epochs += ckpt['epoch']  # finetune additional epochs
+
+        del ckpt, csd
+
+    # DP mode
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
+                       'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
+        model = torch.nn.DataParallel(model)
+
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and RANK != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info('Using SyncBatchNorm()')
+
+    # Trainloader
+    train_loader, dataset = create_dataloader(train_path,
+                                              imgsz,
+                                              batch_size // WORLD_SIZE,
+                                              gs,
+                                              single_cls,
+                                              hyp=hyp,
+                                              augment=True,
+                                              cache=None if opt.cache == 'val' else opt.cache,
+                                              rect=opt.rect,
+                                              rank=LOCAL_RANK,
+                                              workers=workers,
+                                              image_weights=opt.image_weights,
+                                              quad=opt.quad,
+                                              prefix=colorstr('train: '),
+                                              shuffle=True,
+                                              mask_downsample_ratio=mask_ratio,
+                                              overlap_mask=overlap,
+                                              )
+    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
+    print("mlc , nc ", mlc, "  ", nc )
+    nb = len(train_loader)  # number of batches
+    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
+
+    # Process 0
+    if RANK in {-1, 0}:
+        val_loader = create_dataloader(val_path,
+                                       imgsz,
+                                       batch_size // WORLD_SIZE * 2,
+                                       gs,
+                                       single_cls,
+                                       hyp=hyp,
+                                       cache=None if noval else opt.cache,
+                                       rect=True,
+                                       rank=-1,
+                                       workers=workers * 2,
+                                       pad=0.5,
+                                       mask_downsample_ratio=mask_ratio,
+                                       overlap_mask=overlap,
+                                       prefix=colorstr('val: '))[0]
+
+        if not resume:
+            labels = np.concatenate(dataset.labels, 0)
+            # c = torch.tensor(labels[:, 0])  # classes
+            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
+            # model._initialize_biases(cf.to(device))
+            if plots:
+                plot_labels(labels, names, save_dir)
+
+            # Anchors
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+            model.half().float()  # pre-reduce anchor precision
+
+    # DDP mode
+    if cuda and RANK != -1:
+        if check_version(torch.__version__, '1.11.0'):
+            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
+        else:
+            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
+
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp['box'] *= 3 / nl  # scale to layers
+    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    hyp['label_smoothing'] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+
+    # Start training
+    t0 = time.time()
+    nw = max(round(hyp['warmup_epochs'] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = torch.cuda.amp.GradScaler(enabled=amp)
+    stopper, stop = EarlyStopping(patience=opt.patience), False
+    compute_loss = ComputeLoss(model, overlap=overlap)  # init loss class
+    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
+                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+                f"Logging results to {colorstr('bold', save_dir)}\n"
+                f'Starting training for {epochs} epochs...')
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        model.train()
+
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
+        mloss = torch.zeros(4, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
+        if RANK in {-1, 0}:
+            pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _, masks) in pbar:  # batch -------------------------------------------------------------
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)])
+                    if 'momentum' in x:
+                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
+
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+
+            # Forward
+            with torch.cuda.amp.autocast(amp):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device),  masks=masks.to(device).float())  # loss scaled by batch_size
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize
+            if ni - last_opt_step >= accumulate:
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+
+            # Log
+            if RANK in {-1, 0}:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
+                pbar.set_description(("%10s" * 2 + "%10.4g" * 6)
+            % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1]))
+            # for plots
+                if mask_ratio != 1:
+                    masks = F.interpolate(
+                        masks[None, :].float(),
+                        (imgsz, imgsz),
+                        mode="bilinear",
+                        align_corners=False,
+                    ).squeeze(0)
+                #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots)
+                if plots:
+                    if ni < 3:
+                        f = save_dir / f"train_batch{ni}.jpg"  # filename
+                        plot_images_and_masks(imgs, targets, masks, paths, f)
+                    
+                    if ni == 10:
+                        files = sorted(save_dir.glob('train*.jpg'))
+                        logger.log_images(files, "Mosaics")
+            # end batch ------------------------------------------------------------------------------------------------
+
+        # Scheduler
+        lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+
+        if RANK in {-1, 0}:
+            # mAP
+            # callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = val.run(data_dict,
+                                           batch_size=batch_size // WORLD_SIZE * 2,
+                                           imgsz=imgsz,
+                                           model=ema.ema,
+                                           single_cls=single_cls,
+                                           dataloader=val_loader,
+                                           save_dir=save_dir,
+                                           plots=plots,
+                                           #callbacks=callbacks,
+                                           compute_loss=compute_loss, 
+                                           mask_downsample_ratio=mask_ratio,
+                                           overlap=overlap)
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            metrics_dict = dict(zip(KEYS, log_vals))
+            logger.log_metrics(metrics_dict, epoch)
+            #callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
+
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    'epoch': epoch,
+                    'best_fitness': best_fitness,
+                    'model': deepcopy(de_parallel(model)).half(),
+                    'ema': deepcopy(ema.ema).half(),
+                    'updates': ema.updates,
+                    'optimizer': optimizer.state_dict(),
+                    #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
+                    'date': datetime.now().isoformat()}
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if opt.save_period > 0 and epoch % opt.save_period == 0:
+                    torch.save(ckpt, w / f'epoch{epoch}.pt')
+                    logger.log_model(w / f'epoch{epoch}.pt')
+                del ckpt
+                
+
+        # EarlyStopping
+        if RANK != -1:  # if DDP training
+            broadcast_list = [stop if RANK == 0 else None]
+            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+            if RANK != 0:
+                stop = broadcast_list[0]
+        if stop:
+            break  # must break all DDP ranks
+
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training -----------------------------------------------------------------------------------------------------
+    if RANK in {-1, 0}:
+        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f'\nValidating {f}...')
+                    results, _, _ = val.run(
+                        data_dict,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=plots,
+                        #callbacks=callbacks,
+                        compute_loss=compute_loss,
+                        mask_downsample_ratio=mask_ratio,
+                        overlap=overlap)  # val best model with plots
+                    if is_coco:
+                        metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
+                        logger.log_metrics(metrics_dict, epoch)
+                        #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
+        # on train end callback using genericLogger
+        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1)
+        if plots:
+            plot_results_with_masks(file=save_dir / 'results.csv')  # save results.png
+            files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
+            files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
+            LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
+            logger.log_images(files)
+        # callbacks.run('on_train_end', last, best, plots, epoch, results)
+
+    torch.cuda.empty_cache()
+    return results
+
+
+
+def parse_opt(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
+    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=300)
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noplots', action='store_true', help='save no plot files')
+    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
+    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
+    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--quad', action='store_true', help='quad dataloader')
+    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
+    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
+    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
+    parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory')
+    parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease')
+
+    # Weights & Biases arguments
+    parser.add_argument('--entity', default=None, help='W&B: Entity')
+    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
+
+    opt = parser.parse_known_args()[0] if known else parser.parse_args()
+    return opt
+
+
+def main(opt, callbacks=Callbacks()):
+    # Checks
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements(exclude=['thop'])
+
+    # Resume
+    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:  # resume an interrupted run
+        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
+        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
+        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
+            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
+        opt.cfg, opt.weights, opt.resume = '', ckpt, True  # reinstate
+        LOGGER.info(f'Resuming training from {ckpt}')
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
+            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
+        assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
+        if opt.evolve:
+            if opt.project == str(ROOT / 'runs/train'):  # if default project name, rename to runs/evolve
+                opt.project = str(ROOT / 'runs/evolve')
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        if opt.name == 'cfg':
+            opt.name = Path(opt.cfg).stem  # use model.yaml as name
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
+        assert not opt.image_weights, f'--image-weights {msg}'
+        assert not opt.evolve, f'--evolve {msg}'
+        assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
+        assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
+        assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device('cuda', LOCAL_RANK)
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
+
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+        if WORLD_SIZE > 1 and RANK == 0:
+            LOGGER.info('Destroying process group... ')
+            dist.destroy_process_group()
+
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {
+            'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+            'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
+            'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+            'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
+            'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
+            'box': (1, 0.02, 0.2),  # box loss gain
+            'cls': (1, 0.2, 4.0),  # cls loss gain
+            'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
+            'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
+            'iou_t': (0, 0.1, 0.7),  # IoU training threshold
+            'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
+            'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
+            'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
+            'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
+            'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
+            'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
+            'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
+            'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
+            'mixup': (1, 0.0, 1.0),  # image mixup (probability)
+            'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)
+
+        with open(opt.hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if 'anchors' not in hyp:  # anchors commented in hyp.yaml
+                hyp['anchors'] = 3
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
+        if opt.bucket:
+            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}')  # download evolve.csv if exists
+
+        for _ in range(opt.evolve):  # generations to evolve
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
+                # Select parent(s)
+                parent = 'single'  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min() + 1E-6  # weights (sum > 0)
+                if parent == 'single' or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == 'weighted':
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+
+                # Mutate
+                mp, s = 0.8, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 7] * v[i])  # mutate
+
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+
+            # Train mutation
+            results = train(hyp.copy(), opt, device, callbacks)
+            callbacks = Callbacks()
+            # Write mutation results
+            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
+
+        # Plot results
+        plot_evolve(evolve_csv)
+        LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
+                    f"Results saved to {colorstr('bold', save_dir)}\n"
+                    f'Usage example: $ python train.py --hyp {evolve_yaml}')
+
+
+def run(**kwargs):
+    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 6e3696718b6b..e82bfc74a1bc 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -5,6 +5,7 @@
 
 import os
 import warnings
+from pathlib import Path
 
 import pkg_resources as pkg
 import torch
@@ -285,3 +286,81 @@ def on_train_end(self, last, best, plots, epoch, results):
                                    name=f'run_{self.wandb.wandb_run.id}_model',
                                    aliases=['latest', 'best', 'stripped'])
             self.wandb.finish_run()
+    
+class GenericLogger:
+    """
+    YOLOv5 General purpose logger for non-task specific logging
+    Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...)
+    Arguments
+        opt:             Run arguments
+        console_logger:  Console logger
+        include:         loggers to include
+    """
+
+    def __init__(self, opt, console_logger, include=('tb', 'wandb')):
+        # init default loggers
+        self.save_dir = Path(opt.save_dir)
+        self.include = include
+        self.console_logger = console_logger
+        if 'tb' in self.include:
+            prefix = colorstr('TensorBoard: ')
+            self.console_logger.info(
+                f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/")
+            self.tb = SummaryWriter(str(self.save_dir))
+
+        if wandb and 'wandb' in self.include:
+            self.wandb = wandb.init(project="YOLOv5" if opt.project == "runs/train_segment" else opt.project,
+                                    name=None if opt.name == "exp" else opt.name,
+                                    config=opt)
+        else:
+            self.wandb = None
+
+    def log_metrics(self, metrics_dict, epoch):
+        # Log metrics dictionary to all loggers
+        if self.tb:
+            for k, v in metrics_dict.items():
+                self.tb.add_scalar(k, v, epoch)
+
+        if self.wandb:
+            self.wandb.log(metrics_dict, step=epoch)
+
+    def log_images(self, files, name='Images', epoch=0):
+        # Log images to all loggers
+        files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])]  # to Path
+        files = [f for f in files if f.exists()]  # filter by exists
+
+        if self.tb:
+            for f in files:
+                self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
+
+        if self.wandb:
+            self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch)
+
+    def log_graph(self, model, imgsz=(640, 640)):
+        # Log model graph to all loggers
+        if self.tb:
+            log_tensorboard_graph(self.tb, model, imgsz)
+
+    def log_model(self, model_path, epoch=0, metadata={}):
+        # Log model to all loggers
+        if self.wandb:
+            art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata)
+            art.add_file(str(model_path))
+            wandb.log_artifact(art)
+    
+    def update_params(self, params):
+        # Update the paramters logged
+        if self.wandb:
+            wandb.run.config.update(params, allow_val_change=True)
+
+def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
+    # Log model graph to TensorBoard
+    try:
+        p = next(model.parameters())  # for device, type
+        imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz  # expand
+        im = torch.empty((1, 3, *imgsz)).to(p.device).type_as(p)  # input image
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress jit trace warning
+            tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
+    except Exception:
+        print('WARNING: TensorBoard graph visualization failure')
\ No newline at end of file
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
index a3c0acd23920..65e3011f9f12 100644
--- a/utils/segment/metrics.py
+++ b/utils/segment/metrics.py
@@ -144,3 +144,37 @@ def get_maps(self, nc):
     def ap_class_index(self):
         # boxes and masks have the same ap_class_index
         return self.metric_box.ap_class_index
+
+KEYS =  [
+            "train/box_loss",
+            "train/seg_loss",  # train loss
+            "train/obj_loss",
+            "train/cls_loss",
+            "metrics/precision(B)",
+            "metrics/recall(B)",
+            "metrics/mAP_0.5(B)",
+            "metrics/mAP_0.5:0.95(B)",  # metrics
+            "metrics/precision(M)",
+            "metrics/recall(M)",
+            "metrics/mAP_0.5(M)",
+            "metrics/mAP_0.5:0.95(M)",  # metrics
+            "val/box_loss",
+            "val/seg_loss",  # val loss
+            "val/obj_loss",
+            "val/cls_loss",
+            "x/lr0",
+            "x/lr1",
+            "x/lr2",
+        ]
+
+BEST_KEYS =  [
+            "best/epoch",
+            "best/precision(B)",
+            "best/recall(B)",
+            "best/mAP_0.5(B)",
+            "best/mAP_0.5:0.95(B)",
+            "best/precision(M)",
+            "best/recall(M)",
+            "best/mAP_0.5(M)",
+            "best/mAP_0.5:0.95(M)",
+        ]
\ No newline at end of file

From 3e2312d7b748d0e8dbc9813d14a2edbc4671bc12 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 20:04:53 +0530
Subject: [PATCH 062/247] no deterministic behaviour

---
 utils/general.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/general.py b/utils/general.py
index 040da33c85f3..0846278083cf 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -202,7 +202,7 @@ def init_seeds(seed=0, deterministic=False):
     import torch.backends.cudnn as cudnn
 
     if deterministic and check_version(torch.__version__, '1.12.0'):  # https://github.com/ultralytics/yolov5/pull/8213
-        torch.use_deterministic_algorithms(True)
+        #torch.use_deterministic_algorithms(True)
         os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
         os.environ['PYTHONHASHSEED'] = str(seed)
 

From aead6e92aa7f0a246da74bb9bf650d454907c89d Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 20:27:19 +0530
Subject: [PATCH 063/247] update

---
 segment/train_temp.py | 7 ++++---
 segment/val.py        | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/segment/train_temp.py b/segment/train_temp.py
index 57c23811cf53..b1293387cb63 100644
--- a/segment/train_temp.py
+++ b/segment/train_temp.py
@@ -324,7 +324,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         # Update mosaic border (optional)
         # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
         # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
-
         mloss = torch.zeros(4, device=device)  # mean losses
         if RANK != -1:
             train_loader.sampler.set_epoch(epoch)
@@ -430,10 +429,12 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             if fi > best_fitness:
                 best_fitness = fi
             log_vals = list(mloss) + list(results) + lr
+            # Log val metrics and media
             metrics_dict = dict(zip(KEYS, log_vals))
             logger.log_metrics(metrics_dict, epoch)
-            #callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
-
+            if plots:
+                files = sorted(save_dir.glob('val*.jpg'))
+                logger.log_images(files, "Validation")
             # Save model
             if (not nosave) or (final_epoch and not evolve):  # if save
                 ckpt = {
diff --git a/segment/val.py b/segment/val.py
index 1045a3959793..a301f636fb7d 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -372,7 +372,7 @@ def run(
     # Plots
     if plots:
         confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
-        callbacks.run('on_val_end')
+        #callbacks.run('on_val_end')   
 
     # in case the cocoeval will update map
     (

From 31670a20b6cbd3205a37290a314bf99024735857 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 20:41:34 +0530
Subject: [PATCH 064/247] update

---
 segment/train_temp.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/segment/train_temp.py b/segment/train_temp.py
index b1293387cb63..91168d533320 100644
--- a/segment/train_temp.py
+++ b/segment/train_temp.py
@@ -497,12 +497,14 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
         # on train end callback using genericLogger
         logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1)
+        if not opt.evolve:
+            logger.log_model(best, epoch+1)
         if plots:
             plot_results_with_masks(file=save_dir / 'results.csv')  # save results.png
             files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
             files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
             LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
-            logger.log_images(files)
+            logger.log_images(files, "Results")
         # callbacks.run('on_train_end', last, best, plots, epoch, results)
 
     torch.cuda.empty_cache()

From 8a83f6509d136274b6eec289e56b7e948115980f Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 20:52:11 +0530
Subject: [PATCH 065/247] update train

---
 segment/train.py      |  80 +++--
 segment/train_temp.py | 711 ------------------------------------------
 2 files changed, 46 insertions(+), 745 deletions(-)
 delete mode 100644 segment/train_temp.py

diff --git a/segment/train.py b/segment/train.py
index ea3ca58316c0..5986aa4278ad 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -50,7 +50,7 @@
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
                            increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
                            labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
-from utils.loggers import LoggersMask
+from utils.loggers import GenericLogger
 from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.segment.loss import ComputeLoss
 from utils.segment.metrics import fitness
@@ -63,6 +63,8 @@
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
 from utils.general import LOGGER, check_amp, check_version
 from utils.autobatch import check_train_batch_size
+from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
+from utils.segment.metrics import KEYS, BEST_KEYS
 from torch.optim import AdamW
 import yaml
 from datetime import datetime
@@ -71,7 +73,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
         opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
-    callbacks.run('on_pretrain_routine_start')
 
     # Directories
     w = save_dir / 'weights'  # weights dir
@@ -94,13 +95,13 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     # Loggers
     data_dict = None
     if RANK in {-1, 0}:
-        loggers = LoggersMask(
-            save_dir=save_dir, opt=opt, logger=LOGGER
+        logger = GenericLogger(
+            opt=opt, console_logger=LOGGER
         )  # loggers instance
 
         # Register actions
-        for k in methods(loggers):
-            callbacks.register_action(k, callback=getattr(loggers, k))
+        # for k in methods(loggers):
+        #    callbacks.register_action(k, callback=getattr(loggers, k))
 
     # Config
     plots = not evolve and not opt.noplots  # create plots
@@ -147,7 +148,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     # Batch size
     if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
         batch_size = check_train_batch_size(model, imgsz, amp)
-        loggers.on_params_update({"batch_size": batch_size})
+        logger.update_params({"batch_size": batch_size})
 
     # Optimizer
     nbs = 64  # nominal batch size
@@ -278,8 +279,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                 check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
             model.half().float()  # pre-reduce anchor precision
 
-        callbacks.run('on_pretrain_routine_end')
-
     # DDP mode
     if cuda and RANK != -1:
         if check_version(torch.__version__, '1.11.0'):
@@ -309,13 +308,11 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     scaler = torch.cuda.amp.GradScaler(enabled=amp)
     stopper, stop = EarlyStopping(patience=opt.patience), False
     compute_loss = ComputeLoss(model, overlap=overlap)  # init loss class
-    callbacks.run('on_train_start')
     LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
                 f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
                 f"Logging results to {colorstr('bold', save_dir)}\n"
                 f'Starting training for {epochs} epochs...')
     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
-        callbacks.run('on_train_epoch_start')
         model.train()
 
         # Update image weights (optional, single-GPU only)
@@ -327,7 +324,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         # Update mosaic border (optional)
         # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
         # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
-
         mloss = torch.zeros(4, device=device)  # mean losses
         if RANK != -1:
             train_loader.sampler.set_epoch(epoch)
@@ -337,7 +333,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
         optimizer.zero_grad()
         for i, (imgs, targets, paths, _, masks) in pbar:  # batch -------------------------------------------------------------
-            callbacks.run('on_train_batch_start')
             ni = i + nb * epoch  # number integrated batches (since train start)
             imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
 
@@ -395,10 +390,15 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         mode="bilinear",
                         align_corners=False,
                     ).squeeze(0)
-                callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots)
-
-                if callbacks.stop_training:
-                    return
+                #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots)
+                if plots:
+                    if ni < 3:
+                        f = save_dir / f"train_batch{ni}.jpg"  # filename
+                        plot_images_and_masks(imgs, targets, masks, paths, f)
+                    
+                    if ni == 10:
+                        files = sorted(save_dir.glob('train*.jpg'))
+                        logger.log_images(files, "Mosaics")
             # end batch ------------------------------------------------------------------------------------------------
 
         # Scheduler
@@ -407,7 +407,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
         if RANK in {-1, 0}:
             # mAP
-            callbacks.run('on_train_epoch_end', epoch=epoch)
+            # callbacks.run('on_train_epoch_end', epoch=epoch)
             ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
             final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
             if not noval or final_epoch:  # Calculate mAP
@@ -419,7 +419,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                            dataloader=val_loader,
                                            save_dir=save_dir,
                                            plots=plots,
-                                           callbacks=callbacks,
+                                           #callbacks=callbacks,
                                            compute_loss=compute_loss, 
                                            mask_downsample_ratio=mask_ratio,
                                            overlap=overlap)
@@ -429,8 +429,12 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             if fi > best_fitness:
                 best_fitness = fi
             log_vals = list(mloss) + list(results) + lr
-            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
-
+            # Log val metrics and media
+            metrics_dict = dict(zip(KEYS, log_vals))
+            logger.log_metrics(metrics_dict, epoch)
+            if plots:
+                files = sorted(save_dir.glob('val*.jpg'))
+                logger.log_images(files, "Validation")
             # Save model
             if (not nosave) or (final_epoch and not evolve):  # if save
                 ckpt = {
@@ -449,8 +453,9 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                     torch.save(ckpt, best)
                 if opt.save_period > 0 and epoch % opt.save_period == 0:
                     torch.save(ckpt, w / f'epoch{epoch}.pt')
+                    logger.log_model(w / f'epoch{epoch}.pt')
                 del ckpt
-                callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
+                
 
         # EarlyStopping
         if RANK != -1:  # if DDP training
@@ -482,14 +487,25 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         save_json=is_coco,
                         verbose=True,
                         plots=plots,
-                        callbacks=callbacks,
+                        #callbacks=callbacks,
                         compute_loss=compute_loss,
                         mask_downsample_ratio=mask_ratio,
                         overlap=overlap)  # val best model with plots
                     if is_coco:
-                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
-
-        callbacks.run('on_train_end', last, best, plots, epoch, results)
+                        metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
+                        logger.log_metrics(metrics_dict, epoch)
+                        #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
+        # on train end callback using genericLogger
+        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1)
+        if not opt.evolve:
+            logger.log_model(best, epoch+1)
+        if plots:
+            plot_results_with_masks(file=save_dir / 'results.csv')  # save results.png
+            files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
+            files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
+            LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
+            logger.log_images(files, "Results")
+        # callbacks.run('on_train_end', last, best, plots, epoch, results)
 
     torch.cuda.empty_cache()
     return results
@@ -521,7 +537,7 @@ def parse_opt(known=False):
     parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
     parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
     parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
-    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
@@ -532,15 +548,11 @@ def parse_opt(known=False):
     parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
     parser.add_argument('--seed', type=int, default=0, help='Global training seed')
     parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
-    parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory')
+ 
+    # Instance Segmentation Args
+    parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory')
     parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease')
 
-    # Weights & Biases arguments
-    parser.add_argument('--entity', default=None, help='W&B: Entity')
-    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
-    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
-    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
-
     opt = parser.parse_known_args()[0] if known else parser.parse_args()
     return opt
 
diff --git a/segment/train_temp.py b/segment/train_temp.py
deleted file mode 100644
index 91168d533320..000000000000
--- a/segment/train_temp.py
+++ /dev/null
@@ -1,711 +0,0 @@
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Train a YOLOv5 model on a custom dataset.
-
-Models and datasets download automatically from the latest YOLOv5 release.
-Models: https://github.com/ultralytics/yolov5/tree/master/models
-Datasets: https://github.com/ultralytics/yolov5/tree/master/data
-Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
-
-Usage:
-    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (RECOMMENDED)
-    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
-"""
-
-import argparse
-import math
-import os
-import random
-import sys
-import time
-from copy import deepcopy
-from datetime import datetime
-from pathlib import Path
-
-import val  # for end-of-epoch mAP
-import numpy as np
-import torch
-import torch.distributed as dist
-import torch.nn as nn
-import yaml
-from torch.nn.parallel import DistributedDataParallel as DDP
-import torch.nn.functional as F
-from torch.optim import SGD, Adam, AdamW, lr_scheduler
-from tqdm import tqdm
-
-FILE = Path(__file__).resolve()
-ROOT = FILE.parents[1]  # YOLOv5 root directory
-if str(ROOT) not in sys.path:
-    sys.path.append(str(ROOT))  # add ROOT to PATH
-ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
-
-from models.experimental import attempt_load
-from models.yolo import Model
-from utils.autoanchor import check_anchors
-from utils.autobatch import check_train_batch_size
-from utils.callbacks import Callbacks
-from utils.segment.dataloaders import create_dataloader
-from utils.downloads import attempt_download
-from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
-                           check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
-                           increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
-                           labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
-from utils.loggers import GenericLogger
-from utils.loggers.wandb.wandb_utils import check_wandb_resume
-from utils.segment.loss import ComputeLoss
-from utils.segment.metrics import fitness
-from utils.plots import plot_evolve, plot_labels
-from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
-
-
-LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
-RANK = int(os.getenv('RANK', -1))
-WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
-from utils.general import LOGGER, check_amp, check_version
-from utils.autobatch import check_train_batch_size
-from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
-from utils.segment.metrics import KEYS, BEST_KEYS
-from torch.optim import AdamW
-import yaml
-from datetime import datetime
-
-def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
-        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
-        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
-
-    # Directories
-    w = save_dir / 'weights'  # weights dir
-    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
-    last, best = w / 'last.pt', w / 'best.pt'
-
-    # Hyperparameters
-    if isinstance(hyp, str):
-        with open(hyp, errors='ignore') as f:
-            hyp = yaml.safe_load(f)  # load hyps dict
-    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
-
-    # Save run settings
-    if not evolve:
-        with open(save_dir / 'hyp.yaml', 'w') as f:
-            yaml.safe_dump(hyp, f, sort_keys=False)
-        with open(save_dir / 'opt.yaml', 'w') as f:
-            yaml.safe_dump(vars(opt), f, sort_keys=False)
-
-    # Loggers
-    data_dict = None
-    if RANK in {-1, 0}:
-        logger = GenericLogger(
-            opt=opt, console_logger=LOGGER
-        )  # loggers instance
-
-        # Register actions
-        # for k in methods(loggers):
-        #    callbacks.register_action(k, callback=getattr(loggers, k))
-
-    # Config
-    plots = not evolve and not opt.noplots  # create plots
-    overlap = opt.overlap_mask
-    cuda = device.type != 'cpu'
-    init_seeds(opt.seed + 1 + RANK, True)
-    with torch_distributed_zero_first(LOCAL_RANK):
-        data_dict = data_dict or check_dataset(data)  # check if None
-    train_path, val_path = data_dict['train'], data_dict['val']
-    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
-    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
-    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
-    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
-
-    # Model
-    check_suffix(weights, '.pt')  # check weights
-    pretrained = weights.endswith('.pt')
-    if pretrained:
-        with torch_distributed_zero_first(LOCAL_RANK):
-            weights = attempt_download(weights)  # download if not found locally
-        ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
-        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
-        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
-        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
-        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
-        model.load_state_dict(csd, strict=False)  # load
-        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
-    else:
-        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
-    amp = check_amp(model)  # check AMP
-
-    # Freeze
-    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
-    for k, v in model.named_parameters():
-        v.requires_grad = True  # train all layers
-        if any(x in k for x in freeze):
-            LOGGER.info(f'freezing {k}')
-            v.requires_grad = False
-
-    # Image size
-    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
-
-    # Batch size
-    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
-        batch_size = check_train_batch_size(model, imgsz, amp)
-        logger.update_params({"batch_size": batch_size})
-
-    # Optimizer
-    nbs = 64  # nominal batch size
-    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
-    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
-    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
-
-    g = [], [], []  # optimizer parameter groups
-    bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
-    for v in model.modules():
-        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
-            g[2].append(v.bias)
-        if isinstance(v, bn):  # weight (no decay)
-            g[1].append(v.weight)
-        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
-            g[0].append(v.weight)
-
-    # hyp['lr0'] = hyp['lr0'] / batch_size * 128
-    # hyp['warmup_bias_lr'] = 0.01
-    if opt.optimizer == 'Adam':
-        optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
-    elif opt.optimizer == 'AdamW':
-        optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
-    else:
-        optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
-
-    optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']})  # add g0 with weight_decay
-    optimizer.add_param_group({'params': g[1]})  # add g1 (BatchNorm2d weights)
-    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
-                f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
-    del g
-
-    # Scheduler
-    if opt.cos_lr:
-        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
-    else:
-        lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
-    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
-
-    # EMA
-    ema = ModelEMA(model) if RANK in {-1, 0} else None
-
-    # Resume
-    start_epoch, best_fitness = 0, 0.0
-    if pretrained:
-        # Optimizer
-        if ckpt['optimizer'] is not None:
-            optimizer.load_state_dict(ckpt['optimizer'])
-            best_fitness = ckpt['best_fitness']
-
-        # EMA
-        if ema and ckpt.get('ema'):
-            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
-            ema.updates = ckpt['updates']
-
-        # Epochs
-        start_epoch = ckpt['epoch'] + 1
-        if resume:
-            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
-        if epochs < start_epoch:
-            LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
-            epochs += ckpt['epoch']  # finetune additional epochs
-
-        del ckpt, csd
-
-    # DP mode
-    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
-        LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
-                       'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
-        model = torch.nn.DataParallel(model)
-
-    # SyncBatchNorm
-    if opt.sync_bn and cuda and RANK != -1:
-        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
-        LOGGER.info('Using SyncBatchNorm()')
-
-    # Trainloader
-    train_loader, dataset = create_dataloader(train_path,
-                                              imgsz,
-                                              batch_size // WORLD_SIZE,
-                                              gs,
-                                              single_cls,
-                                              hyp=hyp,
-                                              augment=True,
-                                              cache=None if opt.cache == 'val' else opt.cache,
-                                              rect=opt.rect,
-                                              rank=LOCAL_RANK,
-                                              workers=workers,
-                                              image_weights=opt.image_weights,
-                                              quad=opt.quad,
-                                              prefix=colorstr('train: '),
-                                              shuffle=True,
-                                              mask_downsample_ratio=mask_ratio,
-                                              overlap_mask=overlap,
-                                              )
-    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
-    print("mlc , nc ", mlc, "  ", nc )
-    nb = len(train_loader)  # number of batches
-    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
-
-    # Process 0
-    if RANK in {-1, 0}:
-        val_loader = create_dataloader(val_path,
-                                       imgsz,
-                                       batch_size // WORLD_SIZE * 2,
-                                       gs,
-                                       single_cls,
-                                       hyp=hyp,
-                                       cache=None if noval else opt.cache,
-                                       rect=True,
-                                       rank=-1,
-                                       workers=workers * 2,
-                                       pad=0.5,
-                                       mask_downsample_ratio=mask_ratio,
-                                       overlap_mask=overlap,
-                                       prefix=colorstr('val: '))[0]
-
-        if not resume:
-            labels = np.concatenate(dataset.labels, 0)
-            # c = torch.tensor(labels[:, 0])  # classes
-            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
-            # model._initialize_biases(cf.to(device))
-            if plots:
-                plot_labels(labels, names, save_dir)
-
-            # Anchors
-            if not opt.noautoanchor:
-                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
-            model.half().float()  # pre-reduce anchor precision
-
-    # DDP mode
-    if cuda and RANK != -1:
-        if check_version(torch.__version__, '1.11.0'):
-            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
-        else:
-            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
-
-    # Model attributes
-    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
-    hyp['box'] *= 3 / nl  # scale to layers
-    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
-    hyp['label_smoothing'] = opt.label_smoothing
-    model.nc = nc  # attach number of classes to model
-    model.hyp = hyp  # attach hyperparameters to model
-    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
-    model.names = names
-
-    # Start training
-    t0 = time.time()
-    nw = max(round(hyp['warmup_epochs'] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
-    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
-    last_opt_step = -1
-    maps = np.zeros(nc)  # mAP per class
-    results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
-    scheduler.last_epoch = start_epoch - 1  # do not move
-    scaler = torch.cuda.amp.GradScaler(enabled=amp)
-    stopper, stop = EarlyStopping(patience=opt.patience), False
-    compute_loss = ComputeLoss(model, overlap=overlap)  # init loss class
-    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
-                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
-                f"Logging results to {colorstr('bold', save_dir)}\n"
-                f'Starting training for {epochs} epochs...')
-    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
-        model.train()
-
-        # Update image weights (optional, single-GPU only)
-        if opt.image_weights:
-            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
-            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
-            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
-
-        # Update mosaic border (optional)
-        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
-        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
-        mloss = torch.zeros(4, device=device)  # mean losses
-        if RANK != -1:
-            train_loader.sampler.set_epoch(epoch)
-        pbar = enumerate(train_loader)
-        LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
-        if RANK in {-1, 0}:
-            pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
-        optimizer.zero_grad()
-        for i, (imgs, targets, paths, _, masks) in pbar:  # batch -------------------------------------------------------------
-            ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
-
-            # Warmup
-            if ni <= nw:
-                xi = [0, nw]  # x interp
-                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
-                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
-                for j, x in enumerate(optimizer.param_groups):
-                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
-                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)])
-                    if 'momentum' in x:
-                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
-
-            # Multi-scale
-            if opt.multi_scale:
-                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
-                sf = sz / max(imgs.shape[2:])  # scale factor
-                if sf != 1:
-                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
-                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
-
-            # Forward
-            with torch.cuda.amp.autocast(amp):
-                pred = model(imgs)  # forward
-                loss, loss_items = compute_loss(pred, targets.to(device),  masks=masks.to(device).float())  # loss scaled by batch_size
-                if RANK != -1:
-                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
-                if opt.quad:
-                    loss *= 4.
-
-            # Backward
-            scaler.scale(loss).backward()
-
-            # Optimize
-            if ni - last_opt_step >= accumulate:
-                scaler.step(optimizer)  # optimizer.step
-                scaler.update()
-                optimizer.zero_grad()
-                if ema:
-                    ema.update(model)
-                last_opt_step = ni
-
-            # Log
-            if RANK in {-1, 0}:
-                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
-                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
-                pbar.set_description(("%10s" * 2 + "%10.4g" * 6)
-            % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1]))
-            # for plots
-                if mask_ratio != 1:
-                    masks = F.interpolate(
-                        masks[None, :].float(),
-                        (imgsz, imgsz),
-                        mode="bilinear",
-                        align_corners=False,
-                    ).squeeze(0)
-                #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots)
-                if plots:
-                    if ni < 3:
-                        f = save_dir / f"train_batch{ni}.jpg"  # filename
-                        plot_images_and_masks(imgs, targets, masks, paths, f)
-                    
-                    if ni == 10:
-                        files = sorted(save_dir.glob('train*.jpg'))
-                        logger.log_images(files, "Mosaics")
-            # end batch ------------------------------------------------------------------------------------------------
-
-        # Scheduler
-        lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
-        scheduler.step()
-
-        if RANK in {-1, 0}:
-            # mAP
-            # callbacks.run('on_train_epoch_end', epoch=epoch)
-            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
-            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
-            if not noval or final_epoch:  # Calculate mAP
-                results, maps, _ = val.run(data_dict,
-                                           batch_size=batch_size // WORLD_SIZE * 2,
-                                           imgsz=imgsz,
-                                           model=ema.ema,
-                                           single_cls=single_cls,
-                                           dataloader=val_loader,
-                                           save_dir=save_dir,
-                                           plots=plots,
-                                           #callbacks=callbacks,
-                                           compute_loss=compute_loss, 
-                                           mask_downsample_ratio=mask_ratio,
-                                           overlap=overlap)
-            # Update best mAP
-            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
-            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
-            if fi > best_fitness:
-                best_fitness = fi
-            log_vals = list(mloss) + list(results) + lr
-            # Log val metrics and media
-            metrics_dict = dict(zip(KEYS, log_vals))
-            logger.log_metrics(metrics_dict, epoch)
-            if plots:
-                files = sorted(save_dir.glob('val*.jpg'))
-                logger.log_images(files, "Validation")
-            # Save model
-            if (not nosave) or (final_epoch and not evolve):  # if save
-                ckpt = {
-                    'epoch': epoch,
-                    'best_fitness': best_fitness,
-                    'model': deepcopy(de_parallel(model)).half(),
-                    'ema': deepcopy(ema.ema).half(),
-                    'updates': ema.updates,
-                    'optimizer': optimizer.state_dict(),
-                    #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
-                    'date': datetime.now().isoformat()}
-
-                # Save last, best and delete
-                torch.save(ckpt, last)
-                if best_fitness == fi:
-                    torch.save(ckpt, best)
-                if opt.save_period > 0 and epoch % opt.save_period == 0:
-                    torch.save(ckpt, w / f'epoch{epoch}.pt')
-                    logger.log_model(w / f'epoch{epoch}.pt')
-                del ckpt
-                
-
-        # EarlyStopping
-        if RANK != -1:  # if DDP training
-            broadcast_list = [stop if RANK == 0 else None]
-            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
-            if RANK != 0:
-                stop = broadcast_list[0]
-        if stop:
-            break  # must break all DDP ranks
-
-        # end epoch ----------------------------------------------------------------------------------------------------
-    # end training -----------------------------------------------------------------------------------------------------
-    if RANK in {-1, 0}:
-        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
-        for f in last, best:
-            if f.exists():
-                strip_optimizer(f)  # strip optimizers
-                if f is best:
-                    LOGGER.info(f'\nValidating {f}...')
-                    results, _, _ = val.run(
-                        data_dict,
-                        batch_size=batch_size // WORLD_SIZE * 2,
-                        imgsz=imgsz,
-                        model=attempt_load(f, device).half(),
-                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
-                        single_cls=single_cls,
-                        dataloader=val_loader,
-                        save_dir=save_dir,
-                        save_json=is_coco,
-                        verbose=True,
-                        plots=plots,
-                        #callbacks=callbacks,
-                        compute_loss=compute_loss,
-                        mask_downsample_ratio=mask_ratio,
-                        overlap=overlap)  # val best model with plots
-                    if is_coco:
-                        metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
-                        logger.log_metrics(metrics_dict, epoch)
-                        #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
-        # on train end callback using genericLogger
-        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1)
-        if not opt.evolve:
-            logger.log_model(best, epoch+1)
-        if plots:
-            plot_results_with_masks(file=save_dir / 'results.csv')  # save results.png
-            files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
-            files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
-            LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
-            logger.log_images(files, "Results")
-        # callbacks.run('on_train_end', last, best, plots, epoch, results)
-
-    torch.cuda.empty_cache()
-    return results
-
-
-
-def parse_opt(known=False):
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
-    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
-    parser.add_argument('--epochs', type=int, default=300)
-    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
-    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
-    parser.add_argument('--rect', action='store_true', help='rectangular training')
-    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
-    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
-    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
-    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
-    parser.add_argument('--noplots', action='store_true', help='save no plot files')
-    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
-    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
-    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
-    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
-    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
-    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
-    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
-    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
-    parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name')
-    parser.add_argument('--name', default='exp', help='save to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
-    parser.add_argument('--quad', action='store_true', help='quad dataloader')
-    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
-    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
-    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
-    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
-    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
-    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
-    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
-    parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory')
-    parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease')
-
-    # Weights & Biases arguments
-    parser.add_argument('--entity', default=None, help='W&B: Entity')
-    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
-    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
-    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
-
-    opt = parser.parse_known_args()[0] if known else parser.parse_args()
-    return opt
-
-
-def main(opt, callbacks=Callbacks()):
-    # Checks
-    if RANK in {-1, 0}:
-        print_args(vars(opt))
-        check_git_status()
-        check_requirements(exclude=['thop'])
-
-    # Resume
-    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:  # resume an interrupted run
-        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
-        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
-        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
-            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
-        opt.cfg, opt.weights, opt.resume = '', ckpt, True  # reinstate
-        LOGGER.info(f'Resuming training from {ckpt}')
-    else:
-        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
-            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
-        assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
-        if opt.evolve:
-            if opt.project == str(ROOT / 'runs/train'):  # if default project name, rename to runs/evolve
-                opt.project = str(ROOT / 'runs/evolve')
-            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
-        if opt.name == 'cfg':
-            opt.name = Path(opt.cfg).stem  # use model.yaml as name
-        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
-
-    # DDP mode
-    device = select_device(opt.device, batch_size=opt.batch_size)
-    if LOCAL_RANK != -1:
-        msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
-        assert not opt.image_weights, f'--image-weights {msg}'
-        assert not opt.evolve, f'--evolve {msg}'
-        assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
-        assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
-        assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
-        torch.cuda.set_device(LOCAL_RANK)
-        device = torch.device('cuda', LOCAL_RANK)
-        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
-
-    # Train
-    if not opt.evolve:
-        train(opt.hyp, opt, device, callbacks)
-        if WORLD_SIZE > 1 and RANK == 0:
-            LOGGER.info('Destroying process group... ')
-            dist.destroy_process_group()
-
-    # Evolve hyperparameters (optional)
-    else:
-        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
-        meta = {
-            'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
-            'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
-            'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
-            'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
-            'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
-            'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
-            'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
-            'box': (1, 0.02, 0.2),  # box loss gain
-            'cls': (1, 0.2, 4.0),  # cls loss gain
-            'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
-            'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
-            'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
-            'iou_t': (0, 0.1, 0.7),  # IoU training threshold
-            'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
-            'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
-            'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
-            'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
-            'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
-            'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
-            'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
-            'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
-            'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
-            'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
-            'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
-            'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
-            'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
-            'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
-            'mixup': (1, 0.0, 1.0),  # image mixup (probability)
-            'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)
-
-        with open(opt.hyp, errors='ignore') as f:
-            hyp = yaml.safe_load(f)  # load hyps dict
-            if 'anchors' not in hyp:  # anchors commented in hyp.yaml
-                hyp['anchors'] = 3
-        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
-        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
-        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
-        if opt.bucket:
-            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}')  # download evolve.csv if exists
-
-        for _ in range(opt.evolve):  # generations to evolve
-            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
-                # Select parent(s)
-                parent = 'single'  # parent selection method: 'single' or 'weighted'
-                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
-                n = min(5, len(x))  # number of previous results to consider
-                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
-                w = fitness(x) - fitness(x).min() + 1E-6  # weights (sum > 0)
-                if parent == 'single' or len(x) == 1:
-                    # x = x[random.randint(0, n - 1)]  # random selection
-                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
-                elif parent == 'weighted':
-                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
-
-                # Mutate
-                mp, s = 0.8, 0.2  # mutation probability, sigma
-                npr = np.random
-                npr.seed(int(time.time()))
-                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
-                ng = len(meta)
-                v = np.ones(ng)
-                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
-                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
-                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
-                    hyp[k] = float(x[i + 7] * v[i])  # mutate
-
-            # Constrain to limits
-            for k, v in meta.items():
-                hyp[k] = max(hyp[k], v[1])  # lower limit
-                hyp[k] = min(hyp[k], v[2])  # upper limit
-                hyp[k] = round(hyp[k], 5)  # significant digits
-
-            # Train mutation
-            results = train(hyp.copy(), opt, device, callbacks)
-            callbacks = Callbacks()
-            # Write mutation results
-            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
-
-        # Plot results
-        plot_evolve(evolve_csv)
-        LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
-                    f"Results saved to {colorstr('bold', save_dir)}\n"
-                    f'Usage example: $ python train.py --hyp {evolve_yaml}')
-
-
-def run(**kwargs):
-    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
-    opt = parse_opt(True)
-    for k, v in kwargs.items():
-        setattr(opt, k, v)
-    main(opt)
-    return opt
-
-
-if __name__ == "__main__":
-    opt = parse_opt()
-    main(opt)

From b17237ecf4206e35df36e2bf55dbb5c9d88812b1 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 20:54:38 +0530
Subject: [PATCH 066/247] torevert: use newlabels

---
 data/coco.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index 0c0c4adab05d..b354236e25a0 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -8,7 +8,7 @@
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco  # dataset root dir
+path: /datasets/coco  # dataset root dir
 train: train2017.txt  # train images (relative to 'path') 118287 images
 val: val2017.txt  # val images (relative to 'path') 5000 images
 test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

From a5aa7f7ad407ba6d6a6588fbc96cc9e49b4dbd71 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 20:57:11 +0530
Subject: [PATCH 067/247] update

---
 data/coco.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index b354236e25a0..bb9cb849abc9 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -8,7 +8,7 @@
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: /datasets/coco  # dataset root dir
+path: datasets/coco  # dataset root dir
 train: train2017.txt  # train images (relative to 'path') 118287 images
 val: val2017.txt  # val images (relative to 'path') 5000 images
 test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

From d67711a20a0485beb4fac9b60f0b37d6e6930d8b Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 21:04:19 +0530
Subject: [PATCH 068/247] cleanup

---
 utils/loggers/__init__.py | 97 ---------------------------------------
 1 file changed, 97 deletions(-)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index e82bfc74a1bc..a0a6b063bb59 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -190,103 +190,6 @@ def on_params_update(self, params):
             self.wandb.wandb_run.config.update(params, allow_val_change=True)
 
 
-class LoggersMask(Loggers):
-    def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
-        super().__init__(save_dir, weights, opt, hyp, logger, include)
-        self.keys = [
-            "train/box_loss",
-            "train/seg_loss",  # train loss
-            "train/obj_loss",
-            "train/cls_loss",
-            "metrics/precision(B)",
-            "metrics/recall(B)",
-            "metrics/mAP_0.5(B)",
-            "metrics/mAP_0.5:0.95(B)",  # metrics
-            "metrics/precision(M)",
-            "metrics/recall(M)",
-            "metrics/mAP_0.5(M)",
-            "metrics/mAP_0.5:0.95(M)",  # metrics
-            "val/box_loss",
-            "val/seg_loss",  # val loss
-            "val/obj_loss",
-            "val/cls_loss",
-            "x/lr0",
-            "x/lr1",
-            "x/lr2",
-        ]  # params
-        self.best_keys = [
-            "best/epoch",
-            "best/precision(B)",
-            "best/recall(B)",
-            "best/mAP_0.5(B)",
-            "best/mAP_0.5:0.95(B)",
-            "best/precision(M)",
-            "best/recall(M)",
-            "best/mAP_0.5(M)",
-            "best/mAP_0.5:0.95(M)",
-        ]
-
-        
-    def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots):
-        if plots:
-            if ni == 0:
-                if self.tb and not self.opt.sync_bn:  # --sync known issue https://github.com/ultralytics/yolov5/issues/3754
-                    with warnings.catch_warnings():
-                        warnings.simplefilter('ignore')  # suppress jit trace warning
-                        self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
-            if ni < 3:
-                f = self.save_dir / f"train_batch{ni}.jpg"  # filename
-                plot_images_and_masks(imgs, targets, masks, paths, f)
-
-            if self.wandb and ni == 10:
-                files = sorted(self.save_dir.glob('train*.jpg'))
-                self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
-
-    def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
-        # Callback runs at the end of each fit (train+val) epoch
-        x = dict(zip(self.keys, vals))
-        if self.csv:
-            file = self.save_dir / 'results.csv'
-            n = len(x) + 1  # number of cols
-            s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n')  # add header
-            with open(file, 'a') as f:
-                f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
-
-        if self.tb:
-            for k, v in x.items():
-                self.tb.add_scalar(k, v, epoch)
-
-        if self.wandb:
-            if best_fitness == fi:
-                best_results = [epoch] + vals[4:12]
-                for i, name in enumerate(self.best_keys):
-                    self.wandb.wandb_run.summary[name] = best_results[i]  # log best results in the summary
-            self.wandb.log(x)
-            self.wandb.end_epoch(best_result=best_fitness == fi)
-
-    def on_train_end(self, last, best, plots, epoch, results):
-        # Callback runs on training end
-        if plots:
-            plot_results_with_masks(file=self.save_dir / 'results.csv')  # save results.png
-        files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
-        files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()]  # filter
-        self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}")
-
-        if self.tb:
-            for f in files:
-                self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
-
-        if self.wandb:
-            self.wandb.log(dict(zip(self.keys[4:16], results)))
-            self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
-            # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
-            if not self.opt.evolve:
-                wandb.log_artifact(str(best if best.exists() else last),
-                                   type='model',
-                                   name=f'run_{self.wandb.wandb_run.id}_model',
-                                   aliases=['latest', 'best', 'stripped'])
-            self.wandb.finish_run()
-    
 class GenericLogger:
     """
     YOLOv5 General purpose logger for non-task specific logging

From 3de63fb51337bbbd45b4d19d0d7934f2f45ea0b1 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 21:48:06 +0530
Subject: [PATCH 069/247] update

---
 segment/detect.py  |   6 +-
 segment/predict.py | 279 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 282 insertions(+), 3 deletions(-)
 create mode 100644 segment/predict.py

diff --git a/segment/detect.py b/segment/detect.py
index c751e39a06b8..2eac4e46321f 100644
--- a/segment/detect.py
+++ b/segment/detect.py
@@ -89,7 +89,7 @@ def run(
 
     # Load model
     device = select_device(device)
-    model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True)
+    model = attempt_load(weights, device=device, inplace=True, fuse=True)
     stride = max(int(model.stride.max()), 32)  # model stride
     names = model.module.names if hasattr(model, 'module') else model.names  # get class names
     model.half() if half else model.float()
@@ -181,7 +181,7 @@ def run(
                 annotator.im = img_masks
 
                 # Write results
-                for i, (*xyxy, conf, cls) in enumerate(det):
+                for j, (*xyxy, conf, cls) in enumerate(det):
                     if save_txt:  # Write to file
                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                         line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
@@ -191,7 +191,7 @@ def run(
                     if save_img or save_crop or view_img:  # Add bbox to image
                         c = int(cls)  # integer class
                         label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
-                        annotator.box_label(xyxy, label, color=colors(i, True))
+                        annotator.box_label(xyxy, label, color=colors(j, True))
                     if save_crop:
                         save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
 
diff --git a/segment/predict.py b/segment/predict.py
new file mode 100644
index 000000000000..dd45690266ec
--- /dev/null
+++ b/segment/predict.py
@@ -0,0 +1,279 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Run inference on images, videos, directories, streams, etc.
+
+Usage - sources:
+    $ python path/to/detect.py --weights yolov5s.pt --source 0              # webcam
+                                                             img.jpg        # image
+                                                             vid.mp4        # video
+                                                             path/          # directory
+                                                             path/*.jpg     # glob
+                                                             'https://youtu.be/Zgi9g1ksQHc'  # YouTube
+                                                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+
+Usage - formats:
+    $ python path/to/detect.py --weights yolov5s.pt                 # PyTorch
+                                         yolov5s.torchscript        # TorchScript
+                                         yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                         yolov5s.xml                # OpenVINO
+                                         yolov5s.engine             # TensorRT
+                                         yolov5s.mlmodel            # CoreML (macOS-only)
+                                         yolov5s_saved_model        # TensorFlow SavedModel
+                                         yolov5s.pb                 # TensorFlow GraphDef
+                                         yolov5s.tflite             # TensorFlow Lite
+                                         yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+import torch
+import torch.backends.cudnn as cudnn
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.experimental import attempt_load
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
+from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+                           increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.plots import Annotator, colors, save_one_box
+from utils.segment.plots import plot_masks
+from utils.torch_utils import select_device, time_sync
+from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample
+
+
+@torch.no_grad()
+def run(
+        weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
+        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
+        imgsz=(640, 640),  # inference size (height, width)
+        conf_thres=0.25,  # confidence threshold
+        iou_thres=0.45,  # NMS IOU threshold
+        max_det=1000,  # maximum detections per image
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        view_img=False,  # show results
+        save_txt=False,  # save results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_crop=False,  # save cropped prediction boxes
+        nosave=False,  # do not save images/videos
+        classes=None,  # filter by class: --class 0, or --class 0 2 3
+        agnostic_nms=False,  # class-agnostic NMS
+        augment=False,  # augmented inference
+        visualize=False,  # visualize features
+        update=False,  # update all models
+        project=ROOT / 'runs/predict_segment',  # save results to project/name
+        name='exp',  # save results to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        line_thickness=3,  # bounding box thickness (pixels)
+        hide_labels=False,  # hide labels
+        hide_conf=False,  # hide confidences
+        half=False,  # use FP16 half-precision inference
+):
+    source = str(source)
+    save_img = not nosave and not source.endswith('.txt')  # save inference images
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
+    webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
+    if is_url and is_file:
+        source = check_file(source)  # download
+
+    # Directories
+    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Load model
+    device = select_device(device)
+    model = attempt_load(weights, device=device, inplace=True, fuse=True)
+    stride = max(int(model.stride.max()), 32)  # model stride
+    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+    model.half() if half else model.float()
+    pt = True
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Dataloader
+    if webcam:
+        view_img = check_imshow()
+        cudnn.benchmark = True  # set True to speed up constant image size inference
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
+        bs = len(dataset)  # batch_size
+    else:
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
+        bs = 1  # batch_size
+    vid_path, vid_writer = [None] * bs, [None] * bs
+
+    # Run inference
+    if str(device) != "cpu":
+        im = torch.zeros(1, 3, *imgsz).to(device).half()  # input image
+        model(im)  # warmup
+    seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
+    for path, im, im0s, vid_cap, s in dataset:
+        t1 = time_sync()
+        im = torch.from_numpy(im).to(device)
+        im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        if len(im.shape) == 3:
+            im = im[None]  # expand for batch dim
+        t2 = time_sync()
+        dt[0] += t2 - t1
+
+        # Inference
+        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+        pred, out = model(im, augment=augment, visualize=visualize)
+        proto = out[1]
+        t3 = time_sync()
+        dt[1] += t3 - t2
+
+        # NMS
+        pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+        dt[2] += time_sync() - t3
+
+        # Second-stage classifier (optional)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
+
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            if webcam:  # batch_size >= 1
+                p, im0, frame = path[i], im0s[i].copy(), dataset.count
+                s += f'{i}: '
+            else:
+                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
+
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
+            s += '%gx%g ' % im.shape[2:]  # print string
+            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            imc = im0.copy() if save_crop else im0  # for save_crop
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
+            if len(det):
+                # mask stuff
+                masks_conf = det[:, 6:]
+                # binary mask, (img_h, img_w, n)
+                masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:])
+                # n, img_h, img_w
+                masks = masks.permute(2, 0, 1).contiguous()
+                # bbox stuff
+                det = det[:, :6]  # update the value in outputs, remove mask part.
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
+
+                # Print results
+                for c in det[:, -1].unique():
+                    n = (det[:, -1] == c).sum()  # detections per class
+                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
+
+                # plot masks
+                mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))]
+                # NOTE: this way to draw masks is faster,
+                # but the image might get blurred,
+                # from https://github.com/dbolya/yolact
+                # image with masks, (img_h, img_w, 3)
+                img_masks = plot_masks(im[i], masks, mcolors)
+                # scale image to original hw
+                img_masks = scale_masks(im.shape[2:], img_masks, im0.shape)
+                annotator.im = img_masks
+
+                # Write results
+                for j, (*xyxy, conf, cls) in enumerate(det):
+                    if save_txt:  # Write to file
+                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+                        with open(f'{txt_path}.txt', 'a') as f:
+                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+                    if save_img or save_crop or view_img:  # Add bbox to image
+                        c = int(cls)  # integer class
+                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
+                        annotator.box_label(xyxy, label, color=colors(j, True))
+                    if save_crop:
+                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
+
+            # Stream results
+            im0 = annotator.result()
+            if view_img:
+                if p not in windows:
+                    windows.append(p)
+                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+                cv2.imshow(str(p), im0)
+                cv2.waitKey(1)  # 1 millisecond
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == 'image':
+                    cv2.imwrite(save_path, im0)
+                else:  # 'video' or 'stream'
+                    if vid_path[i] != save_path:  # new video
+                        vid_path[i] = save_path
+                        if isinstance(vid_writer[i], cv2.VideoWriter):
+                            vid_writer[i].release()  # release previous video writer
+                        if vid_cap:  # video
+                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        else:  # stream
+                            fps, w, h = 30, im0.shape[1], im0.shape[0]
+                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
+                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+                    vid_writer[i].write(im0)
+
+        # Print time (inference-only)
+        LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
+
+    # Print results
+    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
+    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--view-img', action='store_true', help='show results')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
+    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
+    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--visualize', action='store_true', help='visualize features')
+    parser.add_argument('--update', action='store_true', help='update all models')
+    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
+    parser.add_argument('--name', default='exp', help='save results to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
+    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
+    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    check_requirements(exclude=('tensorboard', 'thop'))
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)

From ad7a3430055b7e61631a80e10ab029b3f4b89a3b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Aug 2022 17:11:45 +0000
Subject: [PATCH 070/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 models/yolo.py                 |  11 ++-
 segment/detect.py              |   2 +-
 segment/predict.py             |   2 +-
 segment/train.py               | 119 +++++++++++++++--------------
 segment/val.py                 |  86 ++++++++++++---------
 utils/dataloaders.py           |  21 +++---
 utils/general.py               |   6 +-
 utils/loggers/__init__.py      |   5 +-
 utils/plots.py                 |  45 ++++-------
 utils/segment/augmentations.py |   6 +-
 utils/segment/dataloaders.py   | 132 +++++++++++++++++++--------------
 utils/segment/general.py       |  22 ++++--
 utils/segment/loss.py          |  79 +++++++++++++-------
 utils/segment/metrics.py       | 111 +++++++++++++++++----------
 utils/segment/plots.py         |  45 +++++------
 15 files changed, 396 insertions(+), 296 deletions(-)

diff --git a/models/yolo.py b/models/yolo.py
index c1f32d19fef9..e5065347bc70 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -89,15 +89,16 @@ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version
         anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
         return grid, anchor_grid
 
+
 class DetectSegment(Detect):
+
     def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inplace=True):
         super().__init__(nc, anchors, ch, inplace)
         self.mask_dim = mask_dim
         self.no = nc + 5 + self.mask_dim  # number of outputs per anchor
         self.nm = 5 + self.mask_dim
         self.proto_c = proto_channel
-        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1)
-                               for x in ch)  # output conv
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
 
         # p3作为输入
         self.proto_net = nn.Sequential(
@@ -106,7 +107,7 @@ def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inp
             # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
             # nn.SiLU(inplace=True),
             # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
-            # nn.SiLU(inplace=True), 
+            # nn.SiLU(inplace=True),
             nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
             nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
             nn.SiLU(inplace=True),
@@ -306,7 +307,8 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
             if hasattr(m, "mask_dim"):
-                b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+                b.data[:, 5 + m.mask_dim:] += math.log(0.6 /
+                                                       (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
             else:
                 b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
@@ -314,6 +316,7 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
 
 Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
 
+
 class ClassificationModel(BaseModel):
     # YOLOv5 classification model
     def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
diff --git a/segment/detect.py b/segment/detect.py
index 2eac4e46321f..24d1dd47a0f5 100644
--- a/segment/detect.py
+++ b/segment/detect.py
@@ -43,9 +43,9 @@
 from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                            increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
+from utils.segment.general import non_max_suppression_masks, process_mask_upsample, scale_masks
 from utils.segment.plots import plot_masks
 from utils.torch_utils import select_device, time_sync
-from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample
 
 
 @torch.no_grad()
diff --git a/segment/predict.py b/segment/predict.py
index dd45690266ec..09efa844c6df 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -43,9 +43,9 @@
 from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                            increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
+from utils.segment.general import non_max_suppression_masks, process_mask_upsample, scale_masks
 from utils.segment.plots import plot_masks
 from utils.torch_utils import select_device, time_sync
-from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample
 
 
 @torch.no_grad()
diff --git a/segment/train.py b/segment/train.py
index 5986aa4278ad..9771c6472f83 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -22,17 +22,18 @@
 from datetime import datetime
 from pathlib import Path
 
-import val  # for end-of-epoch mAP
 import numpy as np
 import torch
 import torch.distributed as dist
 import torch.nn as nn
+import torch.nn.functional as F
 import yaml
 from torch.nn.parallel import DistributedDataParallel as DDP
-import torch.nn.functional as F
 from torch.optim import SGD, Adam, AdamW, lr_scheduler
 from tqdm import tqdm
 
+import val  # for end-of-epoch mAP
+
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
@@ -44,7 +45,6 @@
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
 from utils.callbacks import Callbacks
-from utils.segment.dataloaders import create_dataloader
 from utils.downloads import attempt_download
 from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
@@ -52,22 +52,25 @@
                            labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
 from utils.loggers import GenericLogger
 from utils.loggers.wandb.wandb_utils import check_wandb_resume
+from utils.plots import plot_evolve, plot_labels
+from utils.segment.dataloaders import create_dataloader
 from utils.segment.loss import ComputeLoss
 from utils.segment.metrics import fitness
-from utils.plots import plot_evolve, plot_labels
 from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
 
-
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
-from utils.general import LOGGER, check_amp, check_version
+from datetime import datetime
+
+import yaml
+from torch.optim import AdamW
+
 from utils.autobatch import check_train_batch_size
+from utils.general import LOGGER, check_amp, check_version
+from utils.segment.metrics import BEST_KEYS, KEYS
 from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
-from utils.segment.metrics import KEYS, BEST_KEYS
-from torch.optim import AdamW
-import yaml
-from datetime import datetime
+
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
     save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
@@ -95,9 +98,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     # Loggers
     data_dict = None
     if RANK in {-1, 0}:
-        logger = GenericLogger(
-            opt=opt, console_logger=LOGGER
-        )  # loggers instance
+        logger = GenericLogger(opt=opt, console_logger=LOGGER)  # loggers instance
 
         # Register actions
         # for k in methods(loggers):
@@ -226,26 +227,27 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         LOGGER.info('Using SyncBatchNorm()')
 
     # Trainloader
-    train_loader, dataset = create_dataloader(train_path,
-                                              imgsz,
-                                              batch_size // WORLD_SIZE,
-                                              gs,
-                                              single_cls,
-                                              hyp=hyp,
-                                              augment=True,
-                                              cache=None if opt.cache == 'val' else opt.cache,
-                                              rect=opt.rect,
-                                              rank=LOCAL_RANK,
-                                              workers=workers,
-                                              image_weights=opt.image_weights,
-                                              quad=opt.quad,
-                                              prefix=colorstr('train: '),
-                                              shuffle=True,
-                                              mask_downsample_ratio=mask_ratio,
-                                              overlap_mask=overlap,
-                                              )
+    train_loader, dataset = create_dataloader(
+        train_path,
+        imgsz,
+        batch_size // WORLD_SIZE,
+        gs,
+        single_cls,
+        hyp=hyp,
+        augment=True,
+        cache=None if opt.cache == 'val' else opt.cache,
+        rect=opt.rect,
+        rank=LOCAL_RANK,
+        workers=workers,
+        image_weights=opt.image_weights,
+        quad=opt.quad,
+        prefix=colorstr('train: '),
+        shuffle=True,
+        mask_downsample_ratio=mask_ratio,
+        overlap_mask=overlap,
+    )
     mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
-    print("mlc , nc ", mlc, "  ", nc )
+    print("mlc , nc ", mlc, "  ", nc)
     nb = len(train_loader)  # number of batches
     assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
 
@@ -328,11 +330,12 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         if RANK != -1:
             train_loader.sampler.set_epoch(epoch)
         pbar = enumerate(train_loader)
-        LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
+        LOGGER.info(("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
         if RANK in {-1, 0}:
             pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
         optimizer.zero_grad()
-        for i, (imgs, targets, paths, _, masks) in pbar:  # batch -------------------------------------------------------------
+        for i, (imgs, targets, paths, _,
+                masks) in pbar:  # batch -------------------------------------------------------------
             ni = i + nb * epoch  # number integrated batches (since train start)
             imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
 
@@ -358,7 +361,8 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             # Forward
             with torch.cuda.amp.autocast(amp):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss(pred, targets.to(device),  masks=masks.to(device).float())  # loss scaled by batch_size
+                loss, loss_items = compute_loss(pred, targets.to(device),
+                                                masks=masks.to(device).float())  # loss scaled by batch_size
                 if RANK != -1:
                     loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
                 if opt.quad:
@@ -380,9 +384,9 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             if RANK in {-1, 0}:
                 mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
                 mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
-                pbar.set_description(("%10s" * 2 + "%10.4g" * 6)
-            % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1]))
-            # for plots
+                pbar.set_description(("%10s" * 2 + "%10.4g" * 6) %
+                                     (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1]))
+                # for plots
                 if mask_ratio != 1:
                     masks = F.interpolate(
                         masks[None, :].float(),
@@ -395,7 +399,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                     if ni < 3:
                         f = save_dir / f"train_batch{ni}.jpg"  # filename
                         plot_images_and_masks(imgs, targets, masks, paths, f)
-                    
+
                     if ni == 10:
                         files = sorted(save_dir.glob('train*.jpg'))
                         logger.log_images(files, "Mosaics")
@@ -411,18 +415,19 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
             final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
             if not noval or final_epoch:  # Calculate mAP
-                results, maps, _ = val.run(data_dict,
-                                           batch_size=batch_size // WORLD_SIZE * 2,
-                                           imgsz=imgsz,
-                                           model=ema.ema,
-                                           single_cls=single_cls,
-                                           dataloader=val_loader,
-                                           save_dir=save_dir,
-                                           plots=plots,
-                                           #callbacks=callbacks,
-                                           compute_loss=compute_loss, 
-                                           mask_downsample_ratio=mask_ratio,
-                                           overlap=overlap)
+                results, maps, _ = val.run(
+                    data_dict,
+                    batch_size=batch_size // WORLD_SIZE * 2,
+                    imgsz=imgsz,
+                    model=ema.ema,
+                    single_cls=single_cls,
+                    dataloader=val_loader,
+                    save_dir=save_dir,
+                    plots=plots,
+                    #callbacks=callbacks,
+                    compute_loss=compute_loss,
+                    mask_downsample_ratio=mask_ratio,
+                    overlap=overlap)
             # Update best mAP
             fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
             stop = stopper(epoch=epoch, fitness=fi)  # early stop check
@@ -455,7 +460,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                     torch.save(ckpt, w / f'epoch{epoch}.pt')
                     logger.log_model(w / f'epoch{epoch}.pt')
                 del ckpt
-                
 
         # EarlyStopping
         if RANK != -1:  # if DDP training
@@ -496,9 +500,9 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         logger.log_metrics(metrics_dict, epoch)
                         #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
         # on train end callback using genericLogger
-        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1)
+        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs + 1)
         if not opt.evolve:
-            logger.log_model(best, epoch+1)
+            logger.log_model(best, epoch + 1)
         if plots:
             plot_results_with_masks(file=save_dir / 'results.csv')  # save results.png
             files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
@@ -511,7 +515,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     return results
 
 
-
 def parse_opt(known=False):
     parser = argparse.ArgumentParser()
     parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
@@ -548,10 +551,12 @@ def parse_opt(known=False):
     parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
     parser.add_argument('--seed', type=int, default=0, help='Global training seed')
     parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
- 
+
     # Instance Segmentation Args
     parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory')
-    parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease')
+    parser.add_argument('--overlap-mask',
+                        action='store_true',
+                        help='Overlapping masks train faster at the cost of slight accuray decrease')
 
     opt = parser.parse_known_args()[0] if known else parser.parse_args()
     return opt
diff --git a/segment/val.py b/segment/val.py
index a301f636fb7d..06aba5e8459a 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -34,22 +34,22 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-import torch.nn.functional as F
 import pycocotools.mask as mask_util
+import torch.nn.functional as F
+
 from models.common import DetectMultiBackend
 from models.experimental import attempt_load  # scoped to avoid circular import
 from utils.callbacks import Callbacks
-from utils.segment.dataloaders import create_dataloader
 from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
-                           coco80_to_coco91_class, colorstr, emojis, increment_path, print_args,
-                           scale_coords, xywh2xyxy, xyxy2xywh)
-from utils.segment.general import (non_max_suppression_masks, process_mask_upsample, mask_iou, 
-                           scale_masks, process_mask)
+                           coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords,
+                           xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
-from utils.segment.metrics import ap_per_class_box_and_mask, Metrics
-from utils.segment.plots import plot_images_and_masks
 from utils.plots import output_to_target, plot_val_study
-from utils.torch_utils import select_device, time_sync, de_parallel
+from utils.segment.dataloaders import create_dataloader
+from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks
+from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
+from utils.segment.plots import plot_images_and_masks
+from utils.torch_utils import de_parallel, select_device, time_sync
 
 
 def save_one_txt(predn, save_conf, shape, file):
@@ -118,15 +118,20 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
         gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
 
     if gt_masks.shape[1:] != pred_masks.shape[1:]:
-        gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear",
-            align_corners=False, ).squeeze(0)
-
-    iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), )
-    x = torch.where(
-        (iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
+        gt_masks = F.interpolate(
+            gt_masks.unsqueeze(0),
+            pred_masks.shape[1:],
+            mode="bilinear",
+            align_corners=False,
+        ).squeeze(0)
+
+    iou = mask_iou(
+        gt_masks.view(gt_masks.shape[0], -1),
+        pred_masks.view(pred_masks.shape[0], -1),
+    )
+    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
     if x[0].shape[0]:
-        matches = (
-            torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
+        matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
         if x[0].shape[0] > 1:
             matches = matches[matches[:, 2].argsort()[::-1]]
             matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
@@ -239,8 +244,8 @@ def run(
     confusion_matrix = ConfusionMatrix(nc=nc)
     names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
     class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
-    s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", 
-                                                               "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}")
+    s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R",
+                                  "mAP@.5", "mAP@.5:.95}")
     dt = [0.0, 0.0, 0.0]
     metrics = Metrics()
     loss = torch.zeros(4, device=device)
@@ -261,7 +266,7 @@ def run(
         dt[0] += t2 - t1
 
         # Inference
-        out, train_out = model(im) #if training else model(im, augment=augment, val=True)  # inference, loss outputs
+        out, train_out = model(im)  #if training else model(im, augment=augment, val=True)  # inference, loss outputs
         dt[1] += time_sync() - t2
 
         # Loss
@@ -272,7 +277,12 @@ def run(
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         t3 = time_sync()
-        out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls,
+        out = non_max_suppression_masks(out,
+                                        conf_thres,
+                                        iou_thres,
+                                        labels=lb,
+                                        multi_label=True,
+                                        agnostic=single_cls,
                                         mask_dim=de_parallel(model).model[-1].mask_dim)
         dt[2] += time_sync() - t3
 
@@ -296,8 +306,8 @@ def run(
             midx = [si] if overlap else targets[:, 0] == si
             gt_masks = masks[midx]
             proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], 
-                            shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0,
+                                                                                                      1).contiguous()
             if plots and batch_i < 3:
                 # filter top 15 to plot
                 plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu())
@@ -317,14 +327,15 @@ def run(
                 correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap)
                 if plots:
                     confusion_matrix.process_batch(predn, labelsn)
-            stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
+            stats.append(
+                (correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
 
             # Save/log
             if save_txt:
                 save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
             if save_json:
-                pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
-                    shape, shapes[si][1])
+                pred_masks = scale_masks(im[si].shape[1:],
+                                         pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1])
                 save_one_json(predn, jdict, path, class_map, pred_masks)  # append to COCO-JSON dictionary
             callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si])
 
@@ -337,11 +348,11 @@ def run(
                     mode="bilinear",
                     align_corners=False,
                 ).squeeze(0)
-            plot_images_and_masks(im, targets, masks, paths, 
-                    save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels
+            plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg',
+                                  names)  # labels
             plot_masks = torch.cat(plot_masks, dim=0)
-            plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, 
-                    save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
+            plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths,
+                                  save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
         callbacks.run('on_val_batch_end')
 
@@ -372,7 +383,7 @@ def run(
     # Plots
     if plots:
         confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
-        #callbacks.run('on_val_end')   
+        #callbacks.run('on_val_end')
 
     # in case the cocoeval will update map
     (
@@ -404,8 +415,10 @@ def run(
             eval_bbox = COCOeval(anno, pred, 'bbox')
             eval_mask = COCOeval(anno, pred, 'segm')
             if is_coco:
-                eval_bbox.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # image IDs to evaluate
-                eval_mask.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # image IDs to evaluate
+                eval_bbox.params.imgIds = [int(Path(x).stem)
+                                           for x in dataloader.dataset.im_files]  # image IDs to evaluate
+                eval_mask.params.imgIds = [int(Path(x).stem)
+                                           for x in dataloader.dataset.im_files]  # image IDs to evaluate
             eval_bbox.evaluate()
             eval_bbox.accumulate()
             eval_bbox.summarize()
@@ -433,8 +446,11 @@ def run(
         map50_mask,
         map_mask,
     )
-    return ((*final_metric, *(loss.cpu() / len(dataloader)).tolist()),
-            metrics.get_maps(nc), t,)
+    return (
+        (*final_metric, *(loss.cpu() / len(dataloader)).tolist()),
+        metrics.get_maps(nc),
+        t,
+    )
 
 
 def parse_opt():
diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index ca70bfcbdac7..08d8a293fc31 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -139,16 +139,17 @@ def create_dataloader(path,
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
     # generator = torch.Generator()
     # generator.manual_seed(0)
-    return loader(dataset,
-                  batch_size=batch_size,
-                  shuffle=shuffle and sampler is None,
-                  num_workers=nw,
-                  sampler=sampler,
-                  pin_memory=True,
-                  collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
-                  worker_init_fn=seed_worker,
-                  # generator=generator,
-                  ), dataset
+    return loader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=shuffle and sampler is None,
+        num_workers=nw,
+        sampler=sampler,
+        pin_memory=True,
+        collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
+        worker_init_fn=seed_worker,
+        # generator=generator,
+    ), dataset
 
 
 class InfiniteDataLoader(dataloader.DataLoader):
diff --git a/utils/general.py b/utils/general.py
index fcad90041fb3..3f81e8733139 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -25,7 +25,6 @@
 from subprocess import check_output
 from typing import Optional
 from zipfile import ZipFile
-from PIL import ImageFont
 
 import cv2
 import numpy as np
@@ -34,6 +33,7 @@
 import torch
 import torchvision
 import yaml
+from PIL import ImageFont
 
 from utils.downloads import gsutil_getsize
 from utils.metrics import box_iou, fitness
@@ -465,6 +465,7 @@ def check_file(file, suffix=''):
         assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}"  # assert unique
         return files[0]  # return file
 
+
 '''
 def check_font(font=FONT, progress=False):
     # Download font to CONFIG_DIR if necessary
@@ -475,6 +476,8 @@ def check_font(font=FONT, progress=False):
         LOGGER.info(f'Downloading {url} to {file}...')
         torch.hub.download_url_to_file(url, str(file), progress=progress)
 '''
+
+
 def check_font(font="Arial.ttf", size=10, progress=False):
     # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
     font = Path(font)
@@ -487,6 +490,7 @@ def check_font(font="Arial.ttf", size=10, progress=False):
         torch.hub.download_url_to_file(url, str(font), progress=progress)
         return ImageFont.truetype(str(font), size)
 
+
 def check_dataset(data, autodownload=True):
     # Download, check and/or unzip dataset if not found locally
 
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 5069f2d2b16b..1bb5de45320f 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -15,7 +15,7 @@
 from utils.loggers.clearml.clearml_utils import ClearmlLogger
 from utils.loggers.wandb.wandb_utils import WandbLogger
 from utils.plots import plot_images, plot_results
-from utils.segment.plots import plot_results_with_masks, plot_images_and_masks
+from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
 from utils.torch_utils import de_parallel
 
 LOGGERS = ('csv', 'tb', 'wandb', 'clearml')  # *.csv, TensorBoard, Weights & Biases, ClearML
@@ -295,12 +295,13 @@ def log_model(self, model_path, epoch=0, metadata={}):
             art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata)
             art.add_file(str(model_path))
             wandb.log_artifact(art)
-    
+
     def update_params(self, params):
         # Update the paramters logged
         if self.wandb:
             wandb.run.config.update(params, allow_val_change=True)
 
+
 def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
     # Log model graph to TensorBoard
     try:
diff --git a/utils/plots.py b/utils/plots.py
index 2f5741f88ae6..ed227008e113 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -527,6 +527,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
 import math
 import os
 from copy import copy
+from itertools import repeat
 from pathlib import Path
 
 import cv2
@@ -537,7 +538,6 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
 import seaborn as sn
 import torch
 from PIL import Image, ImageDraw
-from itertools import repeat
 
 from .metrics import fitness
 
@@ -582,7 +582,7 @@ def __call__(self, i, bgr=False):
 
     @staticmethod
     def hex2rgb(h):  # rgb order (PIL)
-        return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4))
+        return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
 
 
 colors = Colors()  # create instance for 'from utils.plots import colors'
@@ -602,16 +602,12 @@ def __init__(
         pil=False,
         example="abc",
     ):
-        assert (
-            im.data.contiguous
-        ), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images."
+        assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images."
         self.pil = pil or not is_ascii(example)
         if self.pil:  # use PIL
             self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
             self.draw = ImageDraw.Draw(self.im)
-            self.font = check_font(
-                font="Arial.Unicode.ttf",
-            )
+            self.font = check_font(font="Arial.Unicode.ttf",)
         else:  # use cv2
             self.im = im
         self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width
@@ -628,8 +624,7 @@ def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 2
                         box[0],
                         box[1] - h if outside else box[1],
                         box[0] + w + 1,
-                        box[1] + 1 if outside else box[1] + h + 1,
-                    ],
+                        box[1] + 1 if outside else box[1] + h + 1,],
                     fill=color,
                 )
                 # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
@@ -644,9 +639,7 @@ def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 2
             cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
             if label:
                 tf = max(self.lw - 1, 1)  # font thickness
-                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[
-                    0
-                ]  # text width, height
+                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0]  # text width, height
                 outside = p1[1] - h - 3 >= 0  # label fits outside box
                 p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
                 cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled
@@ -734,7 +727,7 @@ def plot_images(
             break
         x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
         im = im.transpose(1, 2, 0)
-        mosaic[y : y + h, x : x + w, :] = im
+        mosaic[y:y + h, x:x + w, :] = im
 
     # Resize (optional)
     scale = max_size / ns / max(h, w)
@@ -822,7 +815,7 @@ def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
     fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
     ax = ax.ravel()
     for i in range(4):
-        ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std()))
+        ax[i].hist(x[i], bins=100, label="{:.3g} +/- {:.3g}".format(x[i].mean(), x[i].std()))
         ax[i].legend()
         ax[i].set_title(s[i])
     plt.savefig("targets.jpg", dpi=200)
@@ -848,8 +841,7 @@ def plot_val_study(file="", dir="", x=None):  # from utils.plots import *; plot_
                 "mAP@.5:.95",
                 "t_preprocess (ms/img)",
                 "t_inference (ms/img)",
-                "t_NMS (ms/img)",
-            ]
+                "t_NMS (ms/img)",]
             for i in range(7):
                 ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8)
                 ax[i].set_title(s[i])
@@ -947,8 +939,7 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
         "Battery",
         "dt_raw (ms)",
         "dt_smooth (ms)",
-        "real-world FPS",
-    ]
+        "real-world FPS",]
     files = list(Path(save_dir).glob("frames*.txt"))
     for fi, f in enumerate(files):
         try:
@@ -978,14 +969,12 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
                 else:
                     a.remove()
         except Exception as e:
-            print("Warning: Plotting error for %s; %s" % (f, e))
+            print("Warning: Plotting error for {}; {}".format(f, e))
     ax[1].legend()
     plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200)
 
 
-def plot_evolve(
-    evolve_csv="path/to/evolve.csv",
-):  # from utils.plots import *; plot_evolve()
+def plot_evolve(evolve_csv="path/to/evolve.csv",):  # from utils.plots import *; plot_evolve()
     # Plot evolve.csv hyp evolution results
     evolve_csv = Path(evolve_csv)
     data = pd.read_csv(evolve_csv)
@@ -1001,7 +990,7 @@ def plot_evolve(
         plt.subplot(6, 5, i + 1)
         plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none")
         plt.plot(mu, f.max(), "k+", markersize=15)
-        plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9})  # limit to 40 characters
+        plt.title("{} = {:.3g}".format(k, mu), fontdict={"size": 9})  # limit to 40 characters
         if i % 5 != 0:
             plt.yticks([])
         print("%15s: %.3g" % (k, mu))
@@ -1056,9 +1045,7 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
     import random
 
     # Plots one bounding box on image img
-    tl = (
-        line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
-    )  # line/font thickness
+    tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1)  # line/font thickness
     color = color or [random.randint(0, 255) for _ in range(3)]
     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
     cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
@@ -1092,9 +1079,7 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
         if height > 1 and width > 1:
             f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
 
-            blocks = torch.chunk(
-                x[0].cpu(), channels, dim=0
-            )  # select batch index 0, block by channels
+            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
             n = min(n, channels)  # number of plots
             fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
             ax = ax.ravel()
diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py
index dc29df6ad8ad..169addedf0f5 100644
--- a/utils/segment/augmentations.py
+++ b/utils/segment/augmentations.py
@@ -9,8 +9,9 @@
 import cv2
 import numpy as np
 
-from ..general import segment2box, resample_segments
 from ..augmentations import box_candidates
+from ..general import resample_segments, segment2box
+
 
 def mixup(im, labels, segments, im2, labels2, segments2):
     # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
@@ -20,6 +21,7 @@ def mixup(im, labels, segments, im2, labels2, segments2):
     segments = np.concatenate((segments, segments2), 0)
     return im, labels, segments
 
+
 def random_perspective(im,
                        targets=(),
                        segments=(),
@@ -100,5 +102,3 @@ def random_perspective(im,
         new_segments = np.array(new_segments)[i]
 
     return im, targets, new_segments
-
-
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
index f4af39617dea..f6fe642d077f 100644
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@@ -3,20 +3,19 @@
 Dataloaders
 """
 
-import numpy as np
-import cv2
-import random
 import os
-import torch
+import random
 
-from torch.utils.data import DataLoader 
-from torch.utils.data import distributed
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, distributed
 
 from ..augmentations import augment_hsv, copy_paste, letterbox
-from ..dataloaders import LoadImagesAndLabels, InfiniteDataLoader, seed_worker
-from ..general import xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER
+from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
+from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
 from ..torch_utils import torch_distributed_zero_first
-from .augmentations import random_perspective, mixup
+from .augmentations import mixup, random_perspective
 
 
 def create_dataloader(path,
@@ -35,7 +34,7 @@ def create_dataloader(path,
                       quad=False,
                       prefix='',
                       shuffle=False,
-                      mask_downsample_ratio=1, 
+                      mask_downsample_ratio=1,
                       overlap_mask=False):
     if rect and shuffle:
         LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
@@ -64,25 +63,40 @@ def create_dataloader(path,
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
     # generator = torch.Generator()
     # generator.manual_seed(0)
-    return loader(dataset,
-                  batch_size=batch_size,
-                  shuffle=shuffle and sampler is None,
-                  num_workers=nw,
-                  sampler=sampler,
-                  pin_memory=True,
-                  collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
-                  worker_init_fn=seed_worker,
-                  # generator=generator,
-                  ), dataset
+    return loader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=shuffle and sampler is None,
+        num_workers=nw,
+        sampler=sampler,
+        pin_memory=True,
+        collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
+        worker_init_fn=seed_worker,
+        # generator=generator,
+    ), dataset
 
 
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
-    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-            cache_images=False, single_cls=False, stride=32, pad=0, prefix="",
-            downsample_ratio=1, overlap=False,
+
+    def __init__(
+        self,
+        path,
+        img_size=640,
+        batch_size=16,
+        augment=False,
+        hyp=None,
+        rect=False,
+        image_weights=False,
+        cache_images=False,
+        single_cls=False,
+        stride=32,
+        pad=0,
+        prefix="",
+        downsample_ratio=1,
+        overlap=False,
     ):
         super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
-            stride, pad, prefix)
+                         stride, pad, prefix)
         self.downsample_ratio = downsample_ratio
         self.overlap = overlap
 
@@ -99,8 +113,7 @@ def __getitem__(self, index):
 
             # MixUp augmentation
             if random.random() < hyp["mixup"]:
-                img, labels, segments = mixup(img, labels, segments, 
-                        *self.load_mosaic(random.randint(0, self.n - 1)))
+                img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
 
         else:
             # Load image
@@ -116,30 +129,44 @@ def __getitem__(self, index):
             segments = self.segments[index].copy()
             if len(segments):
                 for i_s in range(len(segments)):
-                    segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], )
+                    segments[i_s] = xyn2xy(
+                        segments[i_s],
+                        ratio[0] * w,
+                        ratio[1] * h,
+                        padw=pad[0],
+                        padh=pad[1],
+                    )
             if labels.size:  # normalized xywh to pixel xyxy format
                 labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
 
             if self.augment:
-                img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"],
-                    translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"],
-                    return_seg=True, )
+                img, labels, segments = random_perspective(
+                    img,
+                    labels,
+                    segments=segments,
+                    degrees=hyp["degrees"],
+                    translate=hyp["translate"],
+                    scale=hyp["scale"],
+                    shear=hyp["shear"],
+                    perspective=hyp["perspective"],
+                    return_seg=True,
+                )
 
         nl = len(labels)  # number of labels
         if nl:
             labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
             if self.overlap:
-                masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, 
-                        downsample_ratio=self.downsample_ratio)
+                masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
+                                                           segments,
+                                                           downsample_ratio=self.downsample_ratio)
                 masks = masks[None]  # (640, 640) -> (1, 640, 640)
                 labels = labels[sorted_idx]
             else:
                 masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
 
-        masks = (torch.from_numpy(masks) if len(masks) else 
-                torch.zeros(1 if self.overlap else nl, 
-                    img.shape[0] // self.downsample_ratio,
-                    img.shape[1] // self.downsample_ratio))
+        masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
+                                                                        self.downsample_ratio, img.shape[1] //
+                                                                        self.downsample_ratio))
         # TODO: albumentations support
         if self.augment:
             # Albumentations
@@ -174,14 +201,14 @@ def __getitem__(self, index):
         # Convert
         img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
         img = np.ascontiguousarray(img)
-        
+
         return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
 
     def load_mosaic(self, index):
         # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
         labels4, segments4 = [], []
         s = self.img_size
-        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
+        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
 
         # 3 additional image indices
         indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
@@ -224,16 +251,15 @@ def load_mosaic(self, index):
 
         # Augment
         img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
-        img4, labels4, segments4 = random_perspective(
-            img4,
-            labels4,
-            segments4,
-            degrees=self.hyp["degrees"],
-            translate=self.hyp["translate"],
-            scale=self.hyp["scale"],
-            shear=self.hyp["shear"],
-            perspective=self.hyp["perspective"],
-            border=self.mosaic_border)  # border to remove
+        img4, labels4, segments4 = random_perspective(img4,
+                                                      labels4,
+                                                      segments4,
+                                                      degrees=self.hyp["degrees"],
+                                                      translate=self.hyp["translate"],
+                                                      scale=self.hyp["scale"],
+                                                      shear=self.hyp["shear"],
+                                                      perspective=self.hyp["perspective"],
+                                                      border=self.mosaic_border)  # border to remove
         return img4, labels4, segments4
 
     @staticmethod
@@ -259,7 +285,7 @@ def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
     polygons = polygons.reshape(shape[0], -1, 2)
     cv2.fillPoly(mask, polygons, color=color)
     nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
-    # NOTE: fillPoly firstly then resize is trying the keep the same way 
+    # NOTE: fillPoly firstly then resize is trying the keep the same way
     # of loss calculation when mask-ratio=1.
     mask = cv2.resize(mask, (nw, nh))
     return mask
@@ -269,22 +295,20 @@ def polygons2masks(img_size, polygons, color, downsample_ratio=1):
     """
     Args:
         img_size (tuple): The image size.
-        polygons (list[np.ndarray]): each polygon is [N, M], 
+        polygons (list[np.ndarray]): each polygon is [N, M],
             N is the number of polygons,
             M is the number of points(Be divided by 2).
     """
     masks = []
     for si in range(len(polygons)):
-        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color,
-                            downsample_ratio)
+        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
         masks.append(mask)
     return np.array(masks)
 
 
 def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
     """Return a (640, 640) overlap mask."""
-    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
-                    dtype=np.uint8)
+    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), dtype=np.uint8)
     areas = []
     ms = []
     for si in range(len(segments)):
diff --git a/utils/segment/general.py b/utils/segment/general.py
index 00367e7268fd..675fac4fbd92 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -10,8 +10,17 @@
 from ..metrics import box_iou
 
 
-def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False,
-        multi_label=False, labels=(), max_det=300, mask_dim=32, ):
+def non_max_suppression_masks(
+        prediction,
+        conf_thres=0.25,
+        iou_thres=0.45,
+        classes=None,
+        agnostic=False,
+        multi_label=False,
+        labels=(),
+        max_det=300,
+        mask_dim=32,
+):
     """Runs Non-Maximum Suppression (NMS) on inference results
 
     Returns:
@@ -119,7 +128,10 @@ def crop(masks, boxes):
     """
     h, w, n = masks.size()
     x1, x2 = boxes[:, 0], boxes[:, 2]
-    y1, y2 = (boxes[:, 1], boxes[:, 3],)
+    y1, y2 = (
+        boxes[:, 1],
+        boxes[:, 3],
+    )
 
     rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n))
     cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n))
@@ -226,7 +238,7 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):
 
 def mask_iou(mask1, mask2):
     """
-    mask1: [N, n] m1 means number of predicted objects 
+    mask1: [N, n] m1 means number of predicted objects
     mask2: [M, n] m2 means number of gt objects
     Note: n means image_w x image_h
 
@@ -244,7 +256,7 @@ def mask_iou(mask1, mask2):
 
 def masks_iou(mask1, mask2):
     """
-    mask1: [N, n] m1 means number of predicted objects 
+    mask1: [N, n] m1 means number of predicted objects
     mask2: [N, n] m2 means number of gt objects
     Note: n means image_w x image_h
 
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 992fe98499ff..d1027a387f7e 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -3,12 +3,14 @@
 import torch.nn.functional as F
 
 from ..general import xywh2xyxy
-from ..loss import smooth_BCE, FocalLoss
-from ..torch_utils import is_parallel
+from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
-from .general import masks_iou, crop
+from ..torch_utils import is_parallel
+from .general import crop, masks_iou
+
 
 class MaskIOULoss(nn.Module):
+
     def __init__(self) -> None:
         super().__init__()
 
@@ -29,6 +31,7 @@ def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False):
         iou = masks_iou(pred_mask, gt_mask)
         return iou if return_iou else (1.0 - iou)
 
+
 class ComputeLoss:
     # Compute losses
     def __init__(self, model, autobalance=False, overlap=False):
@@ -54,7 +57,13 @@ def __init__(self, model, autobalance=False, overlap=False):
         det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
         self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
         self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
-        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (BCEcls, BCEobj, 1.0, h, autobalance,)
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (
+            BCEcls,
+            BCEobj,
+            1.0,
+            h,
+            autobalance,
+        )
         for k in "na", "nc", "nl", "anchors", "nm":
             if hasattr(det, k):
                 setattr(self, k, getattr(det, k))
@@ -68,8 +77,11 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
         device = targets.device
         lcls, lbox, lobj, lseg = (
-            torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),
-            torch.zeros(1, device=device),)
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+            torch.zeros(1, device=device),
+        )
         tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets)  # targets
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
@@ -91,7 +103,13 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 score_iou = iou.detach().clamp(0).type(tobj.dtype)
                 if self.sort_obj_iou:
                     sort_id = torch.argsort(score_iou)
-                    b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],)
+                    b, a, gj, gi, score_iou = (
+                        b[sort_id],
+                        a[sort_id],
+                        gj[sort_id],
+                        gi[sort_id],
+                        score_iou[sort_id],
+                    )
                 tobj[b, a, gj, gi] = 1.0 * ((1.0 - self.gr) + self.gr * score_iou)  # iou ratio
 
                 # Classification
@@ -103,14 +121,15 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 # Mask Regression
                 # TODO:
                 # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w]
-                downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), mode="bilinear",
-                    align_corners=False).squeeze(0)
+                downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w),
+                                                  mode="bilinear",
+                                                  align_corners=False).squeeze(0)
 
                 mxywh = xywh[i]
                 mws, mhs = mxywh[:, 2:].T
                 mws, mhs = mws / pi.shape[3], mhs / pi.shape[2]
-                mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] * torch.tensor(
-                    [mask_w, mask_h, mask_w, mask_h], device=mxywh.device))
+                mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] *
+                          torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device))
                 mxyxys = xywh2xyxy(mxywhs)
 
                 batch_lseg = torch.zeros(1, device=device)
@@ -128,7 +147,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
                     mw, mh = mws[index], mhs[index]
                     mxyxy = mxyxys[index]
-                    psi = ps[index][:, 5: self.nm]
+                    psi = ps[index][:, 5:self.nm]
                     proto = proto_out[bi]
 
                     one_lseg = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
@@ -165,15 +184,15 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
         lseg = crop(lseg, xyxy)
         lseg = lseg.mean(dim=(0, 1)) / w / h
-        return lseg.mean()#, iou# + lseg_iou.mean()
+        return lseg.mean()  #, iou# + lseg_iou.mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], []
         gain = torch.ones(8, device=targets.device)  # normalized to gridspace gain
-        ai = (
-            torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt))  # same as .repeat_interleave(nt)
+        ai = (torch.arange(na, device=targets.device).float().view(na, 1).repeat(1,
+                                                                                 nt))  # same as .repeat_interleave(nt)
         if self.overlap:
             batch = p[0].shape[0]
             ti = []
@@ -181,27 +200,33 @@ def build_targets(self, p, targets):
                 # find number of targets of each image
                 num = (targets[:, 0] == i).sum()
                 # (na, num)
-                ti.append(
-                    torch.arange(num, device=targets.device)
-                    .float()
-                    .view(1, num)
-                    .repeat(na, 1) + 1)
+                ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1)
             # (na, nt)
             ti = torch.cat(ti, 1)
         else:
-            ti = (
-                torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1))  # same as .repeat_interleave(nt)
+            ti = (torch.arange(nt, device=targets.device).float().view(1,
+                                                                       nt).repeat(na,
+                                                                                  1))  # same as .repeat_interleave(nt)
 
         targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2)  # append anchor indices
 
         g = 0.5  # bias
-        off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
-            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-        ], device=targets.device, ).float() * g)  # offsets
+        off = (
+            torch.tensor(
+                [
+                    [0, 0],
+                    [1, 0],
+                    [0, 1],
+                    [-1, 0],
+                    [0, -1],  # j,k,l,m
+                    # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+                ],
+                device=targets.device,
+            ).float() * g)  # offsets
 
         for i in range(self.nl):
             anchors, shape = self.anchors[i], p[i].shape
-            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
+            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
 
             # Match targets to anchors
             t = targets * gain
@@ -234,7 +259,7 @@ def build_targets(self, p, targets):
             # Append
             a = t[:, 6].long()  # anchor indices
             tidx = t[:, 7].long()
-            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
+            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))  # image, anchor, grid
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
index 65e3011f9f12..981d90252ec9 100644
--- a/utils/segment/metrics.py
+++ b/utils/segment/metrics.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 from easydict import EasyDict as edict
+
 from ..metrics import ap_per_class
 
 
@@ -14,26 +15,57 @@ def fitness(x):
     return (x[:, :8] * w).sum(1)
 
 
-def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ):
+def ap_per_class_box_and_mask(
+        tp_m,
+        tp_b,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=False,
+        save_dir=".",
+        names=(),
+):
     """
     Args:
         tp_b: tp of boxes.
         tp_m: tp of masks.
         other arguments see `func: ap_per_class`.
     """
-    results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
-            prefix="Box")[2:]
-    results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names,
-            prefix="Mask")[2:]
+    results_boxes = ap_per_class(tp_b,
+                                 conf,
+                                 pred_cls,
+                                 target_cls,
+                                 plot=plot,
+                                 save_dir=save_dir,
+                                 names=names,
+                                 prefix="Box")[2:]
+    results_masks = ap_per_class(tp_m,
+                                 conf,
+                                 pred_cls,
+                                 target_cls,
+                                 plot=plot,
+                                 save_dir=save_dir,
+                                 names=names,
+                                 prefix="Mask")[2:]
 
     results = edict({
-        "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[3], "f1": results_boxes[2],
+        "boxes": {
+            "p": results_boxes[0],
+            "r": results_boxes[1],
+            "ap": results_boxes[3],
+            "f1": results_boxes[2],
             "ap_class": results_boxes[4]},
-        "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[3], "f1": results_masks[2],
+        "masks": {
+            "p": results_masks[0],
+            "r": results_masks[1],
+            "ap": results_masks[3],
+            "f1": results_masks[2],
             "ap_class": results_masks[4]}})
     return results
 
+
 class Metric:
+
     def __init__(self) -> None:
         self.p = []  # (nc, )
         self.r = []  # (nc, )
@@ -145,36 +177,35 @@ def ap_class_index(self):
         # boxes and masks have the same ap_class_index
         return self.metric_box.ap_class_index
 
-KEYS =  [
-            "train/box_loss",
-            "train/seg_loss",  # train loss
-            "train/obj_loss",
-            "train/cls_loss",
-            "metrics/precision(B)",
-            "metrics/recall(B)",
-            "metrics/mAP_0.5(B)",
-            "metrics/mAP_0.5:0.95(B)",  # metrics
-            "metrics/precision(M)",
-            "metrics/recall(M)",
-            "metrics/mAP_0.5(M)",
-            "metrics/mAP_0.5:0.95(M)",  # metrics
-            "val/box_loss",
-            "val/seg_loss",  # val loss
-            "val/obj_loss",
-            "val/cls_loss",
-            "x/lr0",
-            "x/lr1",
-            "x/lr2",
-        ]
-
-BEST_KEYS =  [
-            "best/epoch",
-            "best/precision(B)",
-            "best/recall(B)",
-            "best/mAP_0.5(B)",
-            "best/mAP_0.5:0.95(B)",
-            "best/precision(M)",
-            "best/recall(M)",
-            "best/mAP_0.5(M)",
-            "best/mAP_0.5:0.95(M)",
-        ]
\ No newline at end of file
+
+KEYS = [
+    "train/box_loss",
+    "train/seg_loss",  # train loss
+    "train/obj_loss",
+    "train/cls_loss",
+    "metrics/precision(B)",
+    "metrics/recall(B)",
+    "metrics/mAP_0.5(B)",
+    "metrics/mAP_0.5:0.95(B)",  # metrics
+    "metrics/precision(M)",
+    "metrics/recall(M)",
+    "metrics/mAP_0.5(M)",
+    "metrics/mAP_0.5:0.95(M)",  # metrics
+    "val/box_loss",
+    "val/seg_loss",  # val loss
+    "val/obj_loss",
+    "val/cls_loss",
+    "x/lr0",
+    "x/lr1",
+    "x/lr2",]
+
+BEST_KEYS = [
+    "best/epoch",
+    "best/precision(B)",
+    "best/recall(B)",
+    "best/mAP_0.5(B)",
+    "best/mAP_0.5:0.95(B)",
+    "best/precision(M)",
+    "best/recall(M)",
+    "best/mAP_0.5(M)",
+    "best/mAP_0.5:0.95(M)",]
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index eb1e9b61d01a..b0774213ede0 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -1,14 +1,15 @@
-import cv2
-import torch
 import math
-import numpy as np
+from pathlib import Path
+
+import cv2
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
-from pathlib import Path
+import torch
 from PIL import Image
 
-from ..plots import colors, Annotator
 from ..general import xywh2xyxy
+from ..plots import Annotator, colors
 
 
 def plot_masks(img, masks, colors, alpha=0.5):
@@ -37,7 +38,7 @@ def plot_masks(img, masks, colors, alpha=0.5):
     inv_alph_masks = masks * (-alpha) + 1
     masks_color_summand = masks_color[0]
     if num_masks > 1:
-        inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0)
+        inv_alph_cumul = inv_alph_masks[:(num_masks - 1)].cumprod(dim=0)
         masks_color_cumul = masks_color[1:] * inv_alph_cumul
         masks_color_summand += masks_color_cumul.sum(dim=0)
 
@@ -48,13 +49,12 @@ def plot_masks(img, masks, colors, alpha=0.5):
     img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
     return (img_gpu * 255).byte().cpu().numpy()
 
+
 def plot_one_box(x, img, color=None, label=None, line_thickness=None):
     import random
 
     # Plots one bounding box on image img
-    tl = (
-        line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
-    )  # line/font thickness
+    tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1)  # line/font thickness
     color = color or [random.randint(0, 255) for _ in range(3)]
     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
     cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
@@ -74,6 +74,7 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
             lineType=cv2.LINE_AA,
         )
 
+
 def plot_images_and_masks(
     images,
     targets,
@@ -120,7 +121,7 @@ def plot_images_and_masks(
         if scale_factor < 1:
             img = cv2.resize(img, (w, h))
 
-        mosaic[block_y : block_y + h, block_x : block_x + w, :] = img
+        mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
         if len(targets) > 0:
             idx = (targets[:, 0]).astype(int)
             image_targets = targets[idx == i]
@@ -138,9 +139,7 @@ def plot_images_and_masks(
             boxes = xywh2xyxy(image_targets[:, 2:6]).T
             classes = image_targets[:, 1].astype("int")
             labels = image_targets.shape[1] == 6  # labels if no conf column
-            conf = (
-                None if labels else image_targets[:, 6]
-            )  # check for confidence presence (label vs pred)
+            conf = (None if labels else image_targets[:, 6])  # check for confidence presence (label vs pred)
 
             if boxes.shape[1]:
                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
@@ -161,11 +160,11 @@ def plot_images_and_masks(
                 else:
                     mask = image_masks[j].astype(np.bool)
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j])
+                    label = "%s" % cls if labels else "{} {:.1f}".format(cls, conf[j])
                     plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
-                    mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[
-                        block_y : block_y + h, block_x : block_x + w, :
-                    ][mask] * 0.35 + (np.array(color) * 0.65)
+                    mosaic[block_y:block_y + h, block_x:block_x +
+                           w, :][mask] = mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (
+                               np.array(color) * 0.65)
 
         # Draw image filename labels
         if paths:
@@ -193,9 +192,7 @@ def plot_images_and_masks(
 
     if fname:
         r = min(1280.0 / max(h, w) / ns, 1.0)  # ratio to limit image size
-        mosaic = cv2.resize(
-            mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA
-        )
+        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
         # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
         with Image.fromarray(mosaic) as im:
             im.save(fname)
@@ -213,11 +210,8 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
         try:
             data = pd.read_csv(f)
             index = np.argmax(
-                0.9 * data.values[:, 8]
-                + 0.1 * data.values[:, 7]
-                + 0.9 * data.values[:, 12]
-                + 0.1 * data.values[:, 11],
-            )
+                0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
+                0.1 * data.values[:, 11],)
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
@@ -246,4 +240,3 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     ax[1].legend()
     fig.savefig(save_dir / "results.png", dpi=200)
     plt.close()
-

From 9f7633f5ce267a1662b87d7d34b9e448584cc4d4 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 23:03:12 +0530
Subject: [PATCH 071/247] Update coco.yaml

---
 data/coco.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index 37d30d63f7f0..d64dfc7fed76 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -8,7 +8,7 @@
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: datasets/coco  # dataset root dir
+path: ../datasets/coco  # dataset root dir
 train: train2017.txt  # train images (relative to 'path') 118287 images
 val: val2017.txt  # val images (relative to 'path') 5000 images
 test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

From 1ed6e1a4b3a04a40e89e8fd2832e08fdbbd1f485 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Aug 2022 17:33:34 +0000
Subject: [PATCH 072/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/plots.py         | 6 +++---
 utils/segment/plots.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/utils/plots.py b/utils/plots.py
index ed227008e113..f23876e0a170 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -815,7 +815,7 @@ def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
     fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
     ax = ax.ravel()
     for i in range(4):
-        ax[i].hist(x[i], bins=100, label="{:.3g} +/- {:.3g}".format(x[i].mean(), x[i].std()))
+        ax[i].hist(x[i], bins=100, label=f"{x[i].mean():.3g} +/- {x[i].std():.3g}")
         ax[i].legend()
         ax[i].set_title(s[i])
     plt.savefig("targets.jpg", dpi=200)
@@ -969,7 +969,7 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
                 else:
                     a.remove()
         except Exception as e:
-            print("Warning: Plotting error for {}; {}".format(f, e))
+            print(f"Warning: Plotting error for {f}; {e}")
     ax[1].legend()
     plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200)
 
@@ -990,7 +990,7 @@ def plot_evolve(evolve_csv="path/to/evolve.csv",):  # from utils.plots import *;
         plt.subplot(6, 5, i + 1)
         plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none")
         plt.plot(mu, f.max(), "k+", markersize=15)
-        plt.title("{} = {:.3g}".format(k, mu), fontdict={"size": 9})  # limit to 40 characters
+        plt.title(f"{k} = {mu:.3g}", fontdict={"size": 9})  # limit to 40 characters
         if i % 5 != 0:
             plt.yticks([])
         print("%15s: %.3g" % (k, mu))
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index b0774213ede0..8974fdfe1274 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -160,7 +160,7 @@ def plot_images_and_masks(
                 else:
                     mask = image_masks[j].astype(np.bool)
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = "%s" % cls if labels else "{} {:.1f}".format(cls, conf[j])
+                    label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}"
                     plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
                     mosaic[block_y:block_y + h, block_x:block_x +
                            w, :][mask] = mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (

From e040d5c09beee9873d35255b2059a02c95343695 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 23:13:23 +0530
Subject: [PATCH 073/247] cleanup

---
 segment/detect.py | 279 ----------------------------------------------
 segment/train.py  |  35 ++----
 segment/val.py    |   8 --
 3 files changed, 8 insertions(+), 314 deletions(-)
 delete mode 100644 segment/detect.py

diff --git a/segment/detect.py b/segment/detect.py
deleted file mode 100644
index 2eac4e46321f..000000000000
--- a/segment/detect.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Run inference on images, videos, directories, streams, etc.
-
-Usage - sources:
-    $ python path/to/detect.py --weights yolov5s.pt --source 0              # webcam
-                                                             img.jpg        # image
-                                                             vid.mp4        # video
-                                                             path/          # directory
-                                                             path/*.jpg     # glob
-                                                             'https://youtu.be/Zgi9g1ksQHc'  # YouTube
-                                                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
-
-Usage - formats:
-    $ python path/to/detect.py --weights yolov5s.pt                 # PyTorch
-                                         yolov5s.torchscript        # TorchScript
-                                         yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
-                                         yolov5s.xml                # OpenVINO
-                                         yolov5s.engine             # TensorRT
-                                         yolov5s.mlmodel            # CoreML (macOS-only)
-                                         yolov5s_saved_model        # TensorFlow SavedModel
-                                         yolov5s.pb                 # TensorFlow GraphDef
-                                         yolov5s.tflite             # TensorFlow Lite
-                                         yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
-"""
-
-import argparse
-import os
-import sys
-from pathlib import Path
-
-import torch
-import torch.backends.cudnn as cudnn
-
-FILE = Path(__file__).resolve()
-ROOT = FILE.parents[1]  # YOLOv5 root directory
-if str(ROOT) not in sys.path:
-    sys.path.append(str(ROOT))  # add ROOT to PATH
-ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
-
-from models.experimental import attempt_load
-from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh)
-from utils.plots import Annotator, colors, save_one_box
-from utils.segment.plots import plot_masks
-from utils.torch_utils import select_device, time_sync
-from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample
-
-
-@torch.no_grad()
-def run(
-        weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
-        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
-        imgsz=(640, 640),  # inference size (height, width)
-        conf_thres=0.25,  # confidence threshold
-        iou_thres=0.45,  # NMS IOU threshold
-        max_det=1000,  # maximum detections per image
-        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        view_img=False,  # show results
-        save_txt=False,  # save results to *.txt
-        save_conf=False,  # save confidences in --save-txt labels
-        save_crop=False,  # save cropped prediction boxes
-        nosave=False,  # do not save images/videos
-        classes=None,  # filter by class: --class 0, or --class 0 2 3
-        agnostic_nms=False,  # class-agnostic NMS
-        augment=False,  # augmented inference
-        visualize=False,  # visualize features
-        update=False,  # update all models
-        project=ROOT / 'runs/detect',  # save results to project/name
-        name='exp',  # save results to project/name
-        exist_ok=False,  # existing project/name ok, do not increment
-        line_thickness=3,  # bounding box thickness (pixels)
-        hide_labels=False,  # hide labels
-        hide_conf=False,  # hide confidences
-        half=False,  # use FP16 half-precision inference
-):
-    source = str(source)
-    save_img = not nosave and not source.endswith('.txt')  # save inference images
-    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
-    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
-    webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
-    if is_url and is_file:
-        source = check_file(source)  # download
-
-    # Directories
-    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
-    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
-
-    # Load model
-    device = select_device(device)
-    model = attempt_load(weights, device=device, inplace=True, fuse=True)
-    stride = max(int(model.stride.max()), 32)  # model stride
-    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-    model.half() if half else model.float()
-    pt = True
-    imgsz = check_img_size(imgsz, s=stride)  # check image size
-
-    # Dataloader
-    if webcam:
-        view_img = check_imshow()
-        cudnn.benchmark = True  # set True to speed up constant image size inference
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
-        bs = len(dataset)  # batch_size
-    else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
-        bs = 1  # batch_size
-    vid_path, vid_writer = [None] * bs, [None] * bs
-
-    # Run inference
-    if str(device) != "cpu":
-        im = torch.zeros(1, 3, *imgsz).to(device).half()  # input image
-        model(im)  # warmup
-    seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
-    for path, im, im0s, vid_cap, s in dataset:
-        t1 = time_sync()
-        im = torch.from_numpy(im).to(device)
-        im = im.half() if half else im.float()  # uint8 to fp16/32
-        im /= 255  # 0 - 255 to 0.0 - 1.0
-        if len(im.shape) == 3:
-            im = im[None]  # expand for batch dim
-        t2 = time_sync()
-        dt[0] += t2 - t1
-
-        # Inference
-        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-        pred, out = model(im, augment=augment, visualize=visualize)
-        proto = out[1]
-        t3 = time_sync()
-        dt[1] += t3 - t2
-
-        # NMS
-        pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
-        dt[2] += time_sync() - t3
-
-        # Second-stage classifier (optional)
-        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
-
-        # Process predictions
-        for i, det in enumerate(pred):  # per image
-            seen += 1
-            if webcam:  # batch_size >= 1
-                p, im0, frame = path[i], im0s[i].copy(), dataset.count
-                s += f'{i}: '
-            else:
-                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
-
-            p = Path(p)  # to Path
-            save_path = str(save_dir / p.name)  # im.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
-            s += '%gx%g ' % im.shape[2:]  # print string
-            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
-            imc = im0.copy() if save_crop else im0  # for save_crop
-            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
-            if len(det):
-                # mask stuff
-                masks_conf = det[:, 6:]
-                # binary mask, (img_h, img_w, n)
-                masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:])
-                # n, img_h, img_w
-                masks = masks.permute(2, 0, 1).contiguous()
-                # bbox stuff
-                det = det[:, :6]  # update the value in outputs, remove mask part.
-                # Rescale boxes from img_size to im0 size
-                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
-
-                # Print results
-                for c in det[:, -1].unique():
-                    n = (det[:, -1] == c).sum()  # detections per class
-                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
-
-                # plot masks
-                mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))]
-                # NOTE: this way to draw masks is faster,
-                # but the image might get blurred,
-                # from https://github.com/dbolya/yolact
-                # image with masks, (img_h, img_w, 3)
-                img_masks = plot_masks(im[i], masks, mcolors)
-                # scale image to original hw
-                img_masks = scale_masks(im.shape[2:], img_masks, im0.shape)
-                annotator.im = img_masks
-
-                # Write results
-                for j, (*xyxy, conf, cls) in enumerate(det):
-                    if save_txt:  # Write to file
-                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-                        with open(f'{txt_path}.txt', 'a') as f:
-                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
-
-                    if save_img or save_crop or view_img:  # Add bbox to image
-                        c = int(cls)  # integer class
-                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
-                        annotator.box_label(xyxy, label, color=colors(j, True))
-                    if save_crop:
-                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
-
-            # Stream results
-            im0 = annotator.result()
-            if view_img:
-                if p not in windows:
-                    windows.append(p)
-                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
-                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
-                cv2.imshow(str(p), im0)
-                cv2.waitKey(1)  # 1 millisecond
-
-            # Save results (image with detections)
-            if save_img:
-                if dataset.mode == 'image':
-                    cv2.imwrite(save_path, im0)
-                else:  # 'video' or 'stream'
-                    if vid_path[i] != save_path:  # new video
-                        vid_path[i] = save_path
-                        if isinstance(vid_writer[i], cv2.VideoWriter):
-                            vid_writer[i].release()  # release previous video writer
-                        if vid_cap:  # video
-                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
-                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-                        else:  # stream
-                            fps, w, h = 30, im0.shape[1], im0.shape[0]
-                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
-                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
-                    vid_writer[i].write(im0)
-
-        # Print time (inference-only)
-        LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
-
-    # Print results
-    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
-    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
-    if save_txt or save_img:
-        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
-    if update:
-        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
-
-
-def parse_opt():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
-    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
-    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
-    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
-    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
-    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--view-img', action='store_true', help='show results')
-    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
-    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
-    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
-    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
-    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
-    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
-    parser.add_argument('--augment', action='store_true', help='augmented inference')
-    parser.add_argument('--visualize', action='store_true', help='visualize features')
-    parser.add_argument('--update', action='store_true', help='update all models')
-    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
-    parser.add_argument('--name', default='exp', help='save results to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
-    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
-    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
-    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
-    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
-    opt = parser.parse_args()
-    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
-    print_args(vars(opt))
-    return opt
-
-
-def main(opt):
-    check_requirements(exclude=('tensorboard', 'thop'))
-    run(**vars(opt))
-
-
-if __name__ == "__main__":
-    opt = parse_opt()
-    main(opt)
diff --git a/segment/train.py b/segment/train.py
index 5986aa4278ad..fc8753f14c65 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -43,15 +43,13 @@
 from models.yolo import Model
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
-from utils.callbacks import Callbacks
 from utils.segment.dataloaders import create_dataloader
 from utils.downloads import attempt_download
 from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
                            increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
-                           labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
+                           labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer)
 from utils.loggers import GenericLogger
-from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.segment.loss import ComputeLoss
 from utils.segment.metrics import fitness
 from utils.plots import plot_evolve, plot_labels
@@ -69,7 +67,7 @@
 import yaml
 from datetime import datetime
 
-def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
+def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
     save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
         opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
@@ -97,11 +95,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     if RANK in {-1, 0}:
         logger = GenericLogger(
             opt=opt, console_logger=LOGGER
-        )  # loggers instance
-
-        # Register actions
-        # for k in methods(loggers):
-        #    callbacks.register_action(k, callback=getattr(loggers, k))
+        )
 
     # Config
     plots = not evolve and not opt.noplots  # create plots
@@ -166,8 +160,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
             g[0].append(v.weight)
 
-    # hyp['lr0'] = hyp['lr0'] / batch_size * 128
-    # hyp['warmup_bias_lr'] = 0.01
     if opt.optimizer == 'Adam':
         optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
     elif opt.optimizer == 'AdamW':
@@ -384,13 +376,8 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1]))
             # for plots
                 if mask_ratio != 1:
-                    masks = F.interpolate(
-                        masks[None, :].float(),
-                        (imgsz, imgsz),
-                        mode="bilinear",
-                        align_corners=False,
+                    masks = F.interpolate(masks[None, :].float(), (imgsz, imgsz), mode="bilinear", align_corners=False,
                     ).squeeze(0)
-                #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots)
                 if plots:
                     if ni < 3:
                         f = save_dir / f"train_batch{ni}.jpg"  # filename
@@ -407,7 +394,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
         if RANK in {-1, 0}:
             # mAP
-            # callbacks.run('on_train_epoch_end', epoch=epoch)
             ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
             final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
             if not noval or final_epoch:  # Calculate mAP
@@ -419,7 +405,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                            dataloader=val_loader,
                                            save_dir=save_dir,
                                            plots=plots,
-                                           #callbacks=callbacks,
                                            compute_loss=compute_loss, 
                                            mask_downsample_ratio=mask_ratio,
                                            overlap=overlap)
@@ -487,14 +472,12 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         save_json=is_coco,
                         verbose=True,
                         plots=plots,
-                        #callbacks=callbacks,
                         compute_loss=compute_loss,
                         mask_downsample_ratio=mask_ratio,
                         overlap=overlap)  # val best model with plots
                     if is_coco:
                         metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
                         logger.log_metrics(metrics_dict, epoch)
-                        #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
         # on train end callback using genericLogger
         logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1)
         if not opt.evolve:
@@ -505,7 +488,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
             LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
             logger.log_images(files, "Results")
-        # callbacks.run('on_train_end', last, best, plots, epoch, results)
 
     torch.cuda.empty_cache()
     return results
@@ -557,7 +539,7 @@ def parse_opt(known=False):
     return opt
 
 
-def main(opt, callbacks=Callbacks()):
+def main(opt):
     # Checks
     if RANK in {-1, 0}:
         print_args(vars(opt))
@@ -565,7 +547,7 @@ def main(opt, callbacks=Callbacks()):
         check_requirements(exclude=['thop'])
 
     # Resume
-    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:  # resume an interrupted run
+    if opt.resume and not opt.evolve:  # resume an interrupted run
         ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
         assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
         with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
@@ -599,7 +581,7 @@ def main(opt, callbacks=Callbacks()):
 
     # Train
     if not opt.evolve:
-        train(opt.hyp, opt, device, callbacks)
+        train(opt.hyp, opt, device)
         if WORLD_SIZE > 1 and RANK == 0:
             LOGGER.info('Destroying process group... ')
             dist.destroy_process_group()
@@ -681,8 +663,7 @@ def main(opt, callbacks=Callbacks()):
                 hyp[k] = round(hyp[k], 5)  # significant digits
 
             # Train mutation
-            results = train(hyp.copy(), opt, device, callbacks)
-            callbacks = Callbacks()
+            results = train(hyp.copy(), opt, device)
             # Write mutation results
             print_mutation(results, hyp.copy(), save_dir, opt.bucket)
 
diff --git a/segment/val.py b/segment/val.py
index a301f636fb7d..11e83f3aaec7 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -38,7 +38,6 @@
 import pycocotools.mask as mask_util
 from models.common import DetectMultiBackend
 from models.experimental import attempt_load  # scoped to avoid circular import
-from utils.callbacks import Callbacks
 from utils.segment.dataloaders import create_dataloader
 from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, emojis, increment_path, print_args,
@@ -166,7 +165,6 @@ def run(
         plots=True,
         overlap=False,
         mask_downsample_ratio=1,
-        callbacks=Callbacks(),
         compute_loss=None,
 ):
     process = process_mask_upsample if plots else process_mask
@@ -245,10 +243,8 @@ def run(
     metrics = Metrics()
     loss = torch.zeros(4, device=device)
     jdict, stats = [], []
-    callbacks.run('on_val_start')
     pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
     for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
-        callbacks.run('on_val_batch_start')
         t1 = time_sync()
         if cuda:
             im = im.to(device, non_blocking=True)
@@ -326,7 +322,6 @@ def run(
                 pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
                     shape, shapes[si][1])
                 save_one_json(predn, jdict, path, class_map, pred_masks)  # append to COCO-JSON dictionary
-            callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si])
 
         # Plot images
         if plots and batch_i < 3:
@@ -343,8 +338,6 @@ def run(
             plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, 
                     save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
-        callbacks.run('on_val_batch_end')
-
     # Compute metrics
     stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
     if len(stats) and stats[0].any():
@@ -372,7 +365,6 @@ def run(
     # Plots
     if plots:
         confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
-        #callbacks.run('on_val_end')   
 
     # in case the cocoeval will update map
     (

From c7756b00cacd64c11637eae938f11848809038ca Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Aug 2022 17:47:57 +0000
Subject: [PATCH 074/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/train.py | 14 ++++++++------
 segment/val.py   |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index 7c3faad86900..cd48e909a9f3 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -44,7 +44,6 @@
 from models.yolo import Model
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
-from utils.segment.dataloaders import create_dataloader
 from utils.downloads import attempt_download
 from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
                            check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
@@ -52,6 +51,7 @@
                            labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer)
 from utils.loggers import GenericLogger
 from utils.plots import plot_evolve, plot_labels
+from utils.segment.dataloaders import create_dataloader
 from utils.segment.loss import ComputeLoss
 from utils.segment.metrics import fitness
 from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
@@ -96,9 +96,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
     # Loggers
     data_dict = None
     if RANK in {-1, 0}:
-        logger = GenericLogger(
-            opt=opt, console_logger=LOGGER
-        )
+        logger = GenericLogger(opt=opt, console_logger=LOGGER)
 
     # Config
     plots = not evolve and not opt.noplots  # create plots
@@ -382,7 +380,11 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
                                      (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1]))
                 # for plots
                 if mask_ratio != 1:
-                    masks = F.interpolate(masks[None, :].float(), (imgsz, imgsz), mode="bilinear", align_corners=False,
+                    masks = F.interpolate(
+                        masks[None, :].float(),
+                        (imgsz, imgsz),
+                        mode="bilinear",
+                        align_corners=False,
                     ).squeeze(0)
                 if plots:
                     if ni < 3:
@@ -411,7 +413,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
                                            dataloader=val_loader,
                                            save_dir=save_dir,
                                            plots=plots,
-                                           compute_loss=compute_loss, 
+                                           compute_loss=compute_loss,
                                            mask_downsample_ratio=mask_ratio,
                                            overlap=overlap)
             # Update best mAP
diff --git a/segment/val.py b/segment/val.py
index f86893b3d95a..2cd0c36264ef 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -39,12 +39,12 @@
 
 from models.common import DetectMultiBackend
 from models.experimental import attempt_load  # scoped to avoid circular import
-from utils.segment.dataloaders import create_dataloader
 from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords,
                            xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
 from utils.plots import output_to_target, plot_val_study
+from utils.segment.dataloaders import create_dataloader
 from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
 from utils.segment.plots import plot_images_and_masks

From 8643c17c11161391ba25392b79e76dcaa57cef49 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 19:56:38 +0200
Subject: [PATCH 075/247] Fix duplicate plots.py

---
 utils/plots.py | 574 -------------------------------------------------
 1 file changed, 574 deletions(-)

diff --git a/utils/plots.py b/utils/plots.py
index f23876e0a170..7417308c4d82 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -517,577 +517,3 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
         # cv2.imwrite(f, crop)  # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue
         Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0)  # save RGB
     return crop
-
-
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-"""
-Plotting utils
-"""
-
-import math
-import os
-from copy import copy
-from itertools import repeat
-from pathlib import Path
-
-import cv2
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sn
-import torch
-from PIL import Image, ImageDraw
-
-from .metrics import fitness
-
-# Settings
-RANK = int(os.getenv("RANK", -1))
-matplotlib.rc("font", **{"size": 11})
-matplotlib.use("Agg")  # for writing to files only
-
-
-class Colors:
-    # Ultralytics color palette https://ultralytics.com/
-    def __init__(self):
-        # hex = matplotlib.colors.TABLEAU_COLORS.values()
-        hex = (
-            "FF3838",
-            "FF9D97",
-            "FF701F",
-            "FFB21D",
-            "CFD231",
-            "48F90A",
-            "92CC17",
-            "3DDB86",
-            "1A9334",
-            "00D4BB",
-            "2C99A8",
-            "00C2FF",
-            "344593",
-            "6473FF",
-            "0018EC",
-            "8438FF",
-            "520085",
-            "CB38FF",
-            "FF95C8",
-            "FF37C7",
-        )
-        self.palette = [self.hex2rgb("#" + c) for c in hex]
-        self.n = len(self.palette)
-
-    def __call__(self, i, bgr=False):
-        c = self.palette[int(i) % self.n]
-        return (c[2], c[1], c[0]) if bgr else c
-
-    @staticmethod
-    def hex2rgb(h):  # rgb order (PIL)
-        return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
-
-
-colors = Colors()  # create instance for 'from utils.plots import colors'
-
-
-class Annotator:
-    if RANK in (-1, 0):
-        check_font()  # download TTF if necessary
-
-    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
-    def __init__(
-        self,
-        im,
-        line_width=None,
-        font_size=None,
-        font="Arial.ttf",
-        pil=False,
-        example="abc",
-    ):
-        assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images."
-        self.pil = pil or not is_ascii(example)
-        if self.pil:  # use PIL
-            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
-            self.draw = ImageDraw.Draw(self.im)
-            self.font = check_font(font="Arial.Unicode.ttf",)
-        else:  # use cv2
-            self.im = im
-        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width
-
-    def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
-        # Add one xyxy box to image with label
-        if self.pil or not is_ascii(label):
-            self.draw.rectangle(box, width=self.lw, outline=color)  # box
-            if label:
-                w, h = self.font.getsize(label)  # text width, height
-                outside = box[1] - h >= 0  # label fits outside box
-                self.draw.rectangle(
-                    [
-                        box[0],
-                        box[1] - h if outside else box[1],
-                        box[0] + w + 1,
-                        box[1] + 1 if outside else box[1] + h + 1,],
-                    fill=color,
-                )
-                # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
-                self.draw.text(
-                    (box[0], box[1] - h if outside else box[1]),
-                    label,
-                    fill=txt_color,
-                    font=self.font,
-                )
-        else:  # cv2
-            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
-            cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
-            if label:
-                tf = max(self.lw - 1, 1)  # font thickness
-                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0]  # text width, height
-                outside = p1[1] - h - 3 >= 0  # label fits outside box
-                p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
-                cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled
-                cv2.putText(
-                    self.im,
-                    label,
-                    (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
-                    0,
-                    self.lw / 3,
-                    txt_color,
-                    thickness=tf,
-                    lineType=cv2.LINE_AA,
-                )
-
-    def rectangle(self, xy, fill=None, outline=None, width=1):
-        # Add rectangle to image (PIL-only)
-        self.draw.rectangle(xy, fill, outline, width)
-
-    def text(self, xy, text, txt_color=(255, 255, 255)):
-        # Add text to image (PIL-only)
-        w, h = self.font.getsize(text)  # text width, height
-        self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
-
-    def result(self):
-        # Return annotated image as array
-        return np.asarray(self.im)
-
-
-def hist2d(x, y, n=100):
-    # 2d histogram used in labels.png and evolve.png
-    xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
-    hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
-    xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
-    yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
-    return np.log(hist[xidx, yidx])
-
-
-def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
-    from scipy.signal import butter, filtfilt
-
-    # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
-    def butter_lowpass(cutoff, fs, order):
-        nyq = 0.5 * fs
-        normal_cutoff = cutoff / nyq
-        return butter(order, normal_cutoff, btype="low", analog=False)
-
-    b, a = butter_lowpass(cutoff, fs, order=order)
-    return filtfilt(b, a, data)  # forward-backward filter
-
-
-def output_to_target(output, filter_dets=10):
-    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
-    targets = []
-    for i, o in enumerate(output):
-        o = o[:filter_dets]
-        for *box, conf, cls in o.cpu().numpy()[:, :6]:
-            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
-    return np.array(targets)
-
-
-def plot_images(
-    images,
-    targets,
-    paths=None,
-    fname="images.jpg",
-    names=None,
-    max_size=1920,
-    max_subplots=16,
-):
-    # Plot image grid with labels
-    if isinstance(images, torch.Tensor):
-        images = images.cpu().float().numpy()
-    if isinstance(targets, torch.Tensor):
-        targets = targets.cpu().numpy()
-    if np.max(images[0]) <= 1:
-        images *= 255.0  # de-normalise (optional)
-    bs, _, h, w = images.shape  # batch size, _, height, width
-    bs = min(bs, max_subplots)  # limit plot images
-    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
-
-    # Build Image
-    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-    for i, im in enumerate(images):
-        if i == max_subplots:  # if last batch has fewer images than we expect
-            break
-        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-        im = im.transpose(1, 2, 0)
-        mosaic[y:y + h, x:x + w, :] = im
-
-    # Resize (optional)
-    scale = max_size / ns / max(h, w)
-    if scale < 1:
-        h = math.ceil(scale * h)
-        w = math.ceil(scale * w)
-        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
-
-    # Annotate
-    fs = int((h + w) * ns * 0.01)  # font size
-    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
-    for i in range(i + 1):
-        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
-        if paths:
-            annotator.text(
-                (x + 5, y + 5 + h),
-                text=Path(paths[i]).name[:40],
-                txt_color=(220, 220, 220),
-            )  # filenames
-        if len(targets) > 0:
-            ti = targets[targets[:, 0] == i]  # image targets
-            boxes = xywh2xyxy(ti[:, 2:6]).T
-            classes = ti[:, 1].astype("int")
-            labels = ti.shape[1] == 6  # labels if no conf column
-            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
-
-            if boxes.shape[1]:
-                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
-                    boxes[[0, 2]] *= w  # scale to pixels
-                    boxes[[1, 3]] *= h
-                elif scale < 1:  # absolute coords need scale if image scales
-                    boxes *= scale
-            boxes[[0, 2]] += x
-            boxes[[1, 3]] += y
-            for j, box in enumerate(boxes.T.tolist()):
-                cls = classes[j]
-                color = colors(cls)
-                cls = names[cls] if names else cls
-                if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
-                    annotator.box_label(box, label, color=color)
-    annotator.im.save(fname)  # save
-    return annotator.result()
-
-
-def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""):
-    # Plot LR simulating training for full epochs
-    optimizer, scheduler = copy(optimizer), copy(scheduler)  # do not modify originals
-    y = []
-    for _ in range(epochs):
-        scheduler.step()
-        y.append(optimizer.param_groups[0]["lr"])
-    plt.plot(y, ".-", label="LR")
-    plt.xlabel("epoch")
-    plt.ylabel("LR")
-    plt.grid()
-    plt.xlim(0, epochs)
-    plt.ylim(0)
-    plt.savefig(Path(save_dir) / "LR.png", dpi=200)
-    plt.close()
-
-
-def plot_val_txt():  # from utils.plots import *; plot_val()
-    # Plot val.txt histograms
-    x = np.loadtxt("val.txt", dtype=np.float32)
-    box = xyxy2xywh(x[:, :4])
-    cx, cy = box[:, 0], box[:, 1]
-
-    fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
-    ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
-    ax.set_aspect("equal")
-    plt.savefig("hist2d.png", dpi=300)
-
-    fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
-    ax[0].hist(cx, bins=600)
-    ax[1].hist(cy, bins=600)
-    plt.savefig("hist1d.png", dpi=200)
-
-
-def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
-    # Plot targets.txt histograms
-    x = np.loadtxt("targets.txt", dtype=np.float32).T
-    s = ["x targets", "y targets", "width targets", "height targets"]
-    fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
-    ax = ax.ravel()
-    for i in range(4):
-        ax[i].hist(x[i], bins=100, label=f"{x[i].mean():.3g} +/- {x[i].std():.3g}")
-        ax[i].legend()
-        ax[i].set_title(s[i])
-    plt.savefig("targets.jpg", dpi=200)
-
-
-def plot_val_study(file="", dir="", x=None):  # from utils.plots import *; plot_val_study()
-    # Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
-    save_dir = Path(file).parent if file else Path(dir)
-    plot2 = False  # plot additional results
-    if plot2:
-        ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
-
-    fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
-    # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
-    for f in sorted(save_dir.glob("study*.txt")):
-        y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
-        x = np.arange(y.shape[1]) if x is None else np.array(x)
-        if plot2:
-            s = [
-                "P",
-                "R",
-                "mAP@.5",
-                "mAP@.5:.95",
-                "t_preprocess (ms/img)",
-                "t_inference (ms/img)",
-                "t_NMS (ms/img)",]
-            for i in range(7):
-                ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8)
-                ax[i].set_title(s[i])
-
-        j = y[3].argmax() + 1
-        ax2.plot(
-            y[5, 1:j],
-            y[3, 1:j] * 1e2,
-            ".-",
-            linewidth=2,
-            markersize=8,
-            label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"),
-        )
-
-    ax2.plot(
-        1e3 / np.array([209, 140, 97, 58, 35, 18]),
-        [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
-        "k.-",
-        linewidth=2,
-        markersize=8,
-        alpha=0.25,
-        label="EfficientDet",
-    )
-
-    ax2.grid(alpha=0.2)
-    ax2.set_yticks(np.arange(20, 60, 5))
-    ax2.set_xlim(0, 57)
-    ax2.set_ylim(25, 55)
-    ax2.set_xlabel("GPU Speed (ms/img)")
-    ax2.set_ylabel("COCO AP val")
-    ax2.legend(loc="lower right")
-    f = save_dir / "study.png"
-    print(f"Saving {f}...")
-    plt.savefig(f, dpi=300)
-
-
-def plot_labels(labels, names=(), save_dir=Path("")):
-    # plot dataset labels
-    print("Plotting labels... ")
-    c, b = labels[:, 0], labels[:, 1:].transpose()  # classes, boxes
-    nc = int(c.max() + 1)  # number of classes
-    x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"])
-
-    # seaborn correlogram
-    sn.pairplot(
-        x,
-        corner=True,
-        diag_kind="auto",
-        kind="hist",
-        diag_kws=dict(bins=50),
-        plot_kws=dict(pmax=0.9),
-    )
-    plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200)
-    plt.close()
-
-    # matplotlib labels
-    matplotlib.use("svg")  # faster
-    ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
-    y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
-    # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)]  # update colors bug #3195
-    ax[0].set_ylabel("instances")
-    if 0 < len(names) < 30:
-        ax[0].set_xticks(range(len(names)))
-        ax[0].set_xticklabels(names, rotation=90, fontsize=10)
-    else:
-        ax[0].set_xlabel("classes")
-    sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9)
-    sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9)
-
-    # rectangles
-    labels[:, 1:3] = 0.5  # center
-    labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
-    img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
-    for cls, *box in labels[:1000]:
-        ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls))  # plot
-    ax[1].imshow(img)
-    ax[1].axis("off")
-
-    for a in [0, 1, 2, 3]:
-        for s in ["top", "right", "left", "bottom"]:
-            ax[a].spines[s].set_visible(False)
-
-    plt.savefig(save_dir / "labels.jpg", dpi=200)
-    matplotlib.use("Agg")
-    plt.close()
-
-
-def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
-    # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
-    ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
-    s = [
-        "Images",
-        "Free Storage (GB)",
-        "RAM Usage (GB)",
-        "Battery",
-        "dt_raw (ms)",
-        "dt_smooth (ms)",
-        "real-world FPS",]
-    files = list(Path(save_dir).glob("frames*.txt"))
-    for fi, f in enumerate(files):
-        try:
-            results = np.loadtxt(f, ndmin=2).T[:, 90:-30]  # clip first and last rows
-            n = results.shape[1]  # number of rows
-            x = np.arange(start, min(stop, n) if stop else n)
-            results = results[:, x]
-            t = results[0] - results[0].min()  # set t0=0s
-            results[0] = x
-            for i, a in enumerate(ax):
-                if i < len(results):
-                    label = labels[fi] if len(labels) else f.stem.replace("frames_", "")
-                    a.plot(
-                        t,
-                        results[i],
-                        marker=".",
-                        label=label,
-                        linewidth=1,
-                        markersize=5,
-                    )
-                    a.set_title(s[i])
-                    a.set_xlabel("time (s)")
-                    # if fi == len(files) - 1:
-                    #     a.set_ylim(bottom=0)
-                    for side in ["top", "right"]:
-                        a.spines[side].set_visible(False)
-                else:
-                    a.remove()
-        except Exception as e:
-            print(f"Warning: Plotting error for {f}; {e}")
-    ax[1].legend()
-    plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200)
-
-
-def plot_evolve(evolve_csv="path/to/evolve.csv",):  # from utils.plots import *; plot_evolve()
-    # Plot evolve.csv hyp evolution results
-    evolve_csv = Path(evolve_csv)
-    data = pd.read_csv(evolve_csv)
-    keys = [x.strip() for x in data.columns]
-    x = data.values
-    f = fitness(x)
-    j = np.argmax(f)  # max fitness index
-    plt.figure(figsize=(10, 12), tight_layout=True)
-    matplotlib.rc("font", **{"size": 8})
-    for i, k in enumerate(keys[7:]):
-        v = x[:, 7 + i]
-        mu = v[j]  # best single result
-        plt.subplot(6, 5, i + 1)
-        plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none")
-        plt.plot(mu, f.max(), "k+", markersize=15)
-        plt.title(f"{k} = {mu:.3g}", fontdict={"size": 9})  # limit to 40 characters
-        if i % 5 != 0:
-            plt.yticks([])
-        print("%15s: %.3g" % (k, mu))
-    f = evolve_csv.with_suffix(".png")  # filename
-    plt.savefig(f, dpi=200)
-    plt.close()
-    print(f"Saved {f}")
-
-
-def plot_results(file="path/to/results.csv", dir="", best=True):
-    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
-    save_dir = Path(file).parent if file else Path(dir)
-    fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
-    ax = ax.ravel()
-    files = list(save_dir.glob("results*.csv"))
-    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
-    for _, f in enumerate(files):
-        try:
-            data = pd.read_csv(f)
-            index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6])
-            s = [x.strip() for x in data.columns]
-            x = data.values[:, 0]
-            for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
-                y = data.values[:, j]
-                # y[y == 0] = np.nan  # don't show zero values
-                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
-                if best:
-                    # best
-                    ax[i].scatter(
-                        index,
-                        y[index],
-                        color="r",
-                        label=f"best:{index}",
-                        marker="*",
-                        linewidth=3,
-                    )
-                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
-                else:
-                    # last
-                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
-                    ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
-                # if j in [8, 9, 10]:  # share train and val loss y axes
-                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
-        except Exception as e:
-            print(f"Warning: Plotting error for {f}: {e}")
-    ax[1].legend()
-    fig.savefig(save_dir / "results.png", dpi=200)
-    plt.close()
-
-
-def plot_one_box(x, img, color=None, label=None, line_thickness=None):
-    import random
-
-    # Plots one bounding box on image img
-    tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1)  # line/font thickness
-    color = color or [random.randint(0, 255) for _ in range(3)]
-    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
-    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
-    if label:
-        tf = max(tl - 1, 1)  # font thickness
-        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
-        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
-        cv2.putText(
-            img,
-            label,
-            (c1[0], c1[1] - 2),
-            0,
-            tl / 3,
-            [225, 255, 255],
-            thickness=tf,
-            lineType=cv2.LINE_AA,
-        )
-
-
-def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")):
-    """
-    x:              Features to be visualized
-    module_type:    Module type
-    stage:          Module stage within model
-    n:              Maximum number of feature maps to plot
-    save_dir:       Directory to save results
-    """
-    if "Detect" not in module_type:
-        batch, channels, height, width = x.shape  # batch, channels, height, width
-        if height > 1 and width > 1:
-            f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
-
-            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
-            n = min(n, channels)  # number of plots
-            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
-            ax = ax.ravel()
-            plt.subplots_adjust(wspace=0.05, hspace=0.05)
-            for i in range(n):
-                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
-                ax[i].axis("off")
-
-            print(f"Saving {save_dir / f}... ({n}/{channels})")
-            plt.savefig(save_dir / f, dpi=300, bbox_inches="tight")
-            plt.close()

From 2298fcfd047c27084503cc649a71cbe17c3e7a22 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:00:23 +0200
Subject: [PATCH 076/247] Fix check_font()

---
 utils/general.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/utils/general.py b/utils/general.py
index 3f81e8733139..91566681c695 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -466,7 +466,6 @@ def check_file(file, suffix=''):
         return files[0]  # return file
 
 
-'''
 def check_font(font=FONT, progress=False):
     # Download font to CONFIG_DIR if necessary
     font = Path(font)
@@ -475,20 +474,6 @@ def check_font(font=FONT, progress=False):
         url = "https://ultralytics.com/assets/" + font.name
         LOGGER.info(f'Downloading {url} to {file}...')
         torch.hub.download_url_to_file(url, str(file), progress=progress)
-'''
-
-
-def check_font(font="Arial.ttf", size=10, progress=False):
-    # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
-    font = Path(font)
-    font = font if font.exists() else (CONFIG_DIR / font.name)
-    try:
-        return ImageFont.truetype(str(font) if font.exists() else font.name, size)
-    except Exception as e:  # download if missing
-        url = "https://ultralytics.com/assets/" + font.name
-        print(f"Downloading {url} to {font}...")
-        torch.hub.download_url_to_file(url, str(font), progress=progress)
-        return ImageFont.truetype(str(font), size)
 
 
 def check_dataset(data, autodownload=True):

From c9b376da97addb653ee05dbc9af85e4e998c16c9 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:04:02 +0200
Subject: [PATCH 077/247] # torch.use_deterministic_algorithms(True)

---
 utils/general.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/general.py b/utils/general.py
index 91566681c695..30ad949e06cf 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -242,7 +242,7 @@ def init_seeds(seed=0, deterministic=False):
     import torch.backends.cudnn as cudnn
 
     if deterministic and check_version(torch.__version__, '1.12.0'):  # https://github.com/ultralytics/yolov5/pull/8213
-        #torch.use_deterministic_algorithms(True)
+        # torch.use_deterministic_algorithms(True)
         os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
         os.environ['PYTHONHASHSEED'] = str(seed)
 

From ab00c7b5c375719fad49ea27f402228a60b2ebc4 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 20 Aug 2022 23:36:09 +0530
Subject: [PATCH 078/247] update doc detect->predict

---
 segment/predict.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 09efa844c6df..2bc2a5629d6f 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -3,7 +3,7 @@
 Run inference on images, videos, directories, streams, etc.
 
 Usage - sources:
-    $ python path/to/detect.py --weights yolov5s.pt --source 0              # webcam
+    $ python path/to/predict.py --weights yolov5s-seg.pt --source 0         # webcam
                                                              img.jpg        # image
                                                              vid.mp4        # video
                                                              path/          # directory
@@ -12,7 +12,7 @@
                                                              'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 
 Usage - formats:
-    $ python path/to/detect.py --weights yolov5s.pt                 # PyTorch
+    $ python path/to/predict.py --weights yolov5s.pt                 # PyTorch
                                          yolov5s.torchscript        # TorchScript
                                          yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
                                          yolov5s.xml                # OpenVINO
@@ -256,7 +256,7 @@ def parse_opt():
     parser.add_argument('--augment', action='store_true', help='augmented inference')
     parser.add_argument('--visualize', action='store_true', help='visualize features')
     parser.add_argument('--update', action='store_true', help='update all models')
-    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/predict_segment', help='save results to project/name')
     parser.add_argument('--name', default='exp', help='save results to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')

From 38d210ea7f7e63d8b934d95a00db5a3a3cd3535b Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:17:00 +0200
Subject: [PATCH 079/247] Resolve precommit for segment/train and segment/val

---
 segment/train.py | 15 ++++++---------
 segment/val.py   |  3 +--
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index cd48e909a9f3..fe91893ad08d 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -19,7 +19,6 @@
 import sys
 import time
 from copy import deepcopy
-from datetime import datetime
 from pathlib import Path
 
 import numpy as np
@@ -27,9 +26,8 @@
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import yaml
 from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.optim import SGD, Adam, AdamW, lr_scheduler
+from torch.optim import SGD, Adam, lr_scheduler
 from tqdm import tqdm
 
 import val  # for end-of-epoch mAP
@@ -43,10 +41,9 @@
 from models.experimental import attempt_load
 from models.yolo import Model
 from utils.autoanchor import check_anchors
-from utils.autobatch import check_train_batch_size
 from utils.downloads import attempt_download
-from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
-                           check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
+from utils.general import (check_dataset, check_file, check_git_status, check_img_size,
+                           check_requirements, check_suffix, check_yaml, colorstr, get_latest_run,
                            increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
                            labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer)
 from utils.loggers import GenericLogger
@@ -66,12 +63,12 @@
 
 from utils.autobatch import check_train_batch_size
 from utils.general import LOGGER, check_amp, check_version
-from utils.segment.metrics import BEST_KEYS, KEYS
+from utils.segment.metrics import KEYS
 from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
 
 
 def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio = \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
         opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
 
@@ -437,7 +434,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
                     'ema': deepcopy(ema.ema).half(),
                     'updates': ema.updates,
                     'optimizer': optimizer.state_dict(),
-                    #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
+                    # 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
                     'date': datetime.now().isoformat()}
 
                 # Save last, best and delete
diff --git a/segment/val.py b/segment/val.py
index 2cd0c36264ef..2632e6deacd4 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -37,7 +37,6 @@
 import pycocotools.mask as mask_util
 import torch.nn.functional as F
 
-from models.common import DetectMultiBackend
 from models.experimental import attempt_load  # scoped to avoid circular import
 from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords,
@@ -262,7 +261,7 @@ def run(
         dt[0] += t2 - t1
 
         # Inference
-        out, train_out = model(im)  #if training else model(im, augment=augment, val=True)  # inference, loss outputs
+        out, train_out = model(im)  # if training else model(im, augment=augment, val=True)  # inference, loss outputs
         dt[1] += time_sync() - t2
 
         # Loss

From e56df79dba6d8fe031e5eac0a64f9510ce5b6ffb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Aug 2022 18:17:24 +0000
Subject: [PATCH 080/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/train.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index fe91893ad08d..102de6239342 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -42,10 +42,10 @@
 from models.yolo import Model
 from utils.autoanchor import check_anchors
 from utils.downloads import attempt_download
-from utils.general import (check_dataset, check_file, check_git_status, check_img_size,
-                           check_requirements, check_suffix, check_yaml, colorstr, get_latest_run,
-                           increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
-                           labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer)
+from utils.general import (check_dataset, check_file, check_git_status, check_img_size, check_requirements,
+                           check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
+                           intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args,
+                           print_mutation, strip_optimizer)
 from utils.loggers import GenericLogger
 from utils.plots import plot_evolve, plot_labels
 from utils.segment.dataloaders import create_dataloader

From 28de97b49ef4a9ee667663ccbaeb66589374b027 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:19:29 +0200
Subject: [PATCH 081/247] Resolve precommit for utils/segment

---
 utils/segment/dataloaders.py | 2 +-
 utils/segment/general.py     | 1 -
 utils/segment/loss.py        | 4 ++--
 utils/segment/plots.py       | 7 +++----
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
index f6fe642d077f..ced6f23bf151 100644
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@@ -11,11 +11,11 @@
 import torch
 from torch.utils.data import DataLoader, distributed
 
+from .augmentations import mixup, random_perspective
 from ..augmentations import augment_hsv, copy_paste, letterbox
 from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
 from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
 from ..torch_utils import torch_distributed_zero_first
-from .augmentations import mixup, random_perspective
 
 
 def create_dataloader(path,
diff --git a/utils/segment/general.py b/utils/segment/general.py
index 675fac4fbd92..bed445312cde 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -1,7 +1,6 @@
 import time
 
 import cv2
-import numpy as np
 import torch
 import torch.nn.functional as F
 import torchvision
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index d1027a387f7e..e0a1823d43f0 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
-from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):
@@ -184,7 +184,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
         lseg = crop(lseg, xyxy)
         lseg = lseg.mean(dim=(0, 1)) / w / h
-        return lseg.mean()  #, iou# + lseg_iou.mean()
+        return lseg.mean()  # , iou# + lseg_iou.mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 8974fdfe1274..2de7a54135d5 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -9,7 +9,7 @@
 from PIL import Image
 
 from ..general import xywh2xyxy
-from ..plots import Annotator, colors
+from ..plots import colors
 
 
 def plot_masks(img, masks, colors, alpha=0.5):
@@ -162,9 +162,8 @@ def plot_images_and_masks(
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
                     label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}"
                     plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
-                    mosaic[block_y:block_y + h, block_x:block_x +
-                           w, :][mask] = mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (
-                               np.array(color) * 0.65)
+                    mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] = \
+                    mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65)
 
         # Draw image filename labels
         if paths:

From 1f7138733d90c27e8f4930f38400b31c6bc90d92 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Aug 2022 18:20:00 +0000
Subject: [PATCH 082/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/dataloaders.py | 2 +-
 utils/segment/loss.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
index ced6f23bf151..f6fe642d077f 100644
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@@ -11,11 +11,11 @@
 import torch
 from torch.utils.data import DataLoader, distributed
 
-from .augmentations import mixup, random_perspective
 from ..augmentations import augment_hsv, copy_paste, letterbox
 from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
 from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
 from ..torch_utils import torch_distributed_zero_first
+from .augmentations import mixup, random_perspective
 
 
 def create_dataloader(path,
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index e0a1823d43f0..bff4b25ca867 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
+from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):

From fd9ffb0009852289c431ca5e31e763b47b8eb191 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:22:54 +0200
Subject: [PATCH 083/247] Resolve precommit min_wh

---
 utils/segment/general.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index bed445312cde..a4999845a79d 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -34,7 +34,8 @@ def non_max_suppression_masks(
     assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
 
     # Settings
-    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    # min_wh = 2  # (pixels) minimum box width and height
+    max_wh = 7680  # (pixels) maximum box width and height
     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
     time_limit = 10.0  # seconds to quit after
     redundant = True  # require redundant detections

From 74eabbffc1a265c070be69f566872e0b96d012fc Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:23:30 +0200
Subject: [PATCH 084/247] Resolve precommit utils/segment/plots

---
 utils/segment/plots.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 2de7a54135d5..17877505ac4d 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -76,14 +76,14 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
 
 
 def plot_images_and_masks(
-    images,
-    targets,
-    masks,
-    paths=None,
-    fname="images.jpg",
-    names=None,
-    max_size=640,
-    max_subplots=16,
+        images,
+        targets,
+        masks,
+        paths=None,
+        fname="images.jpg",
+        names=None,
+        max_size=640,
+        max_subplots=16,
 ):
     if isinstance(images, torch.Tensor):
         images = images.cpu().float().numpy()
@@ -163,7 +163,7 @@ def plot_images_and_masks(
                     label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}"
                     plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
                     mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] = \
-                    mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65)
+                        mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65)
 
         # Draw image filename labels
         if paths:
@@ -210,7 +210,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11],)
+                0.1 * data.values[:, 11], )
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From 791a905dc752828a981195a46c0db9fb65b1d03b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Aug 2022 18:23:55 +0000
Subject: [PATCH 085/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/plots.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 17877505ac4d..dafe5f9eb31a 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -76,14 +76,14 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
 
 
 def plot_images_and_masks(
-        images,
-        targets,
-        masks,
-        paths=None,
-        fname="images.jpg",
-        names=None,
-        max_size=640,
-        max_subplots=16,
+    images,
+    targets,
+    masks,
+    paths=None,
+    fname="images.jpg",
+    names=None,
+    max_size=640,
+    max_subplots=16,
 ):
     if isinstance(images, torch.Tensor):
         images = images.cpu().float().numpy()
@@ -210,7 +210,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11], )
+                0.1 * data.values[:, 11],)
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From a752e671f1ee434e479ecdb88f02e5d755b7ee68 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:27:17 +0200
Subject: [PATCH 086/247] Resolve precommit utils/segment/general

---
 utils/segment/general.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index a4999845a79d..f1655e488944 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -5,7 +5,7 @@
 import torch.nn.functional as F
 import torchvision
 
-from ..general import xywh2xyxy
+from ..general import LOGGER, xywh2xyxy
 from ..metrics import box_iou
 
 
@@ -53,11 +53,11 @@ def non_max_suppression_masks(
 
         # Cat apriori labels if autolabelling
         if labels and len(labels[xi]):
-            l = labels[xi]
-            v = torch.zeros((len(l), nc + 5), device=x.device)
-            v[:, :4] = l[:, 1:5]  # box
+            lb = labels[xi]
+            v = torch.zeros((len(lb), nc + 5), device=x.device)
+            v[:, :4] = lb[:, 1:5]  # box
             v[:, 4] = 1.0  # conf
-            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
+            v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
             x = torch.cat((x, v), 0)
 
         # If none remain process next image
@@ -101,7 +101,7 @@ def non_max_suppression_masks(
         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
         if i.shape[0] > max_det:  # limit detections
             i = i[:max_det]
-        if merge and (1 < n < 3e3):  # Merge NMS (boxes merged using weighted mean)
+        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
             weights = iou * scores[None]  # box weights
@@ -111,7 +111,7 @@ def non_max_suppression_masks(
 
         output[xi] = x[i]
         if (time.time() - t) > time_limit:
-            print(f"WARNING: NMS time limit {time_limit}s exceeded")
+            LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
             break  # time limit exceeded
 
     return output

From 1a84f47a6a802431cad1fb1bfc93d5c35929e4bc Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 20 Aug 2022 20:37:00 +0200
Subject: [PATCH 087/247] Align NMS-seg closer to NMS

---
 utils/segment/general.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index f1655e488944..075dce192ddf 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -20,31 +20,32 @@ def non_max_suppression_masks(
         max_det=300,
         mask_dim=32,
 ):
-    """Runs Non-Maximum Suppression (NMS) on inference results
+    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
 
     Returns:
          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
     """
 
+    bs = prediction.shape[0]  # batch size
     nc = prediction.shape[2] - 5  # number of classes
     xc = prediction[..., 4] > conf_thres  # candidates
 
     # Checks
-    assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
-    assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
+    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
+    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
 
     # Settings
     # min_wh = 2  # (pixels) minimum box width and height
     max_wh = 7680  # (pixels) maximum box width and height
     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 10.0  # seconds to quit after
+    time_limit = 0.6 + 0.06 * bs  # seconds to quit after
     redundant = True  # require redundant detections
     multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
     merge = False  # use merge-NMS
     nm = 5 + mask_dim
 
     t = time.time()
-    output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * prediction.shape[0]
+    output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * bs
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@@ -92,8 +93,6 @@ def non_max_suppression_masks(
             continue
         elif n > max_nms:  # excess boxes
             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
-        else:
-            x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
 
         # Batched NMS
         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes

From 8b70e64fca7a3b7d9f77012dc0371a7af72f9c78 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 21 Aug 2022 00:34:22 +0200
Subject: [PATCH 088/247] restore deterministic init_seeds code

---
 segment/train.py | 2 +-
 utils/general.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index 102de6239342..d1d7b8dc1686 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -99,7 +99,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
     plots = not evolve and not opt.noplots  # create plots
     overlap = opt.overlap_mask
     cuda = device.type != 'cpu'
-    init_seeds(opt.seed + 1 + RANK, True)
+    init_seeds(opt.seed + 1 + RANK, deterministic=False)
     with torch_distributed_zero_first(LOCAL_RANK):
         data_dict = data_dict or check_dataset(data)  # check if None
     train_path, val_path = data_dict['train'], data_dict['val']
diff --git a/utils/general.py b/utils/general.py
index 30ad949e06cf..35c2e52cb6b3 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -242,7 +242,7 @@ def init_seeds(seed=0, deterministic=False):
     import torch.backends.cudnn as cudnn
 
     if deterministic and check_version(torch.__version__, '1.12.0'):  # https://github.com/ultralytics/yolov5/pull/8213
-        # torch.use_deterministic_algorithms(True)
+        torch.use_deterministic_algorithms(True)
         os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
         os.environ['PYTHONHASHSEED'] = str(seed)
 

From 6e4fbebc7e3a97cc71027edd9de5b1d48623c6a8 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sun, 21 Aug 2022 10:45:42 +0530
Subject: [PATCH 089/247] remove easydict dependency

---
 requirements.txt         | 1 -
 utils/segment/metrics.py | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 241bbe62b6ac..10620566ca66 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,6 @@ scipy>=1.4.1
 torch>=1.7.0
 torchvision>=0.8.1
 tqdm>=4.64.0
-easydict>=1.9
 protobuf<=3.20.1  # https://github.com/ultralytics/yolov5/issues/8012
 
 # Logging -------------------------------------
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
index 981d90252ec9..b09ce23fb9e3 100644
--- a/utils/segment/metrics.py
+++ b/utils/segment/metrics.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-from easydict import EasyDict as edict
 
 from ..metrics import ap_per_class
 
@@ -48,7 +47,7 @@ def ap_per_class_box_and_mask(
                                  names=names,
                                  prefix="Mask")[2:]
 
-    results = edict({
+    results = {
         "boxes": {
             "p": results_boxes[0],
             "r": results_boxes[1],
@@ -60,7 +59,7 @@ def ap_per_class_box_and_mask(
             "r": results_masks[1],
             "ap": results_masks[3],
             "f1": results_masks[2],
-            "ap_class": results_masks[4]}})
+            "ap_class": results_masks[4]}}
     return results
 
 

From c6f3b6ed1a98f2a8f4aff6aa6d93a41d073a35cc Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sun, 21 Aug 2022 10:47:47 +0530
Subject: [PATCH 090/247] update

---
 segment/train.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index d1d7b8dc1686..8323bcdc137d 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -390,7 +390,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
 
                     if ni == 10:
                         files = sorted(save_dir.glob('train*.jpg'))
-                        logger.log_images(files, "Mosaics")
+                        logger.log_images(files, "Mosaics", epoch)
             # end batch ------------------------------------------------------------------------------------------------
 
         # Scheduler
@@ -424,7 +424,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
             logger.log_metrics(metrics_dict, epoch)
             if plots:
                 files = sorted(save_dir.glob('val*.jpg'))
-                logger.log_images(files, "Validation")
+                logger.log_images(files, "Validation", epoch)
             # Save model
             if (not nosave) or (final_epoch and not evolve):  # if save
                 ckpt = {
@@ -491,7 +491,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
             files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
             files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
             LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
-            logger.log_images(files, "Results")
+            logger.log_images(files, "Results", epoch+1)
 
     torch.cuda.empty_cache()
     return results

From d1327d2b76d521b9d65caa2c8cdfca06eff836e8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 21 Aug 2022 05:18:14 +0000
Subject: [PATCH 091/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/train.py b/segment/train.py
index 8323bcdc137d..bc0792d5267c 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -491,7 +491,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
             files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
             files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
             LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
-            logger.log_images(files, "Results", epoch+1)
+            logger.log_images(files, "Results", epoch + 1)
 
     torch.cuda.empty_cache()
     return results

From 466ab71f56d879ddda53c0262e81577fd4fb06ff Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sun, 21 Aug 2022 13:39:34 +0530
Subject: [PATCH 092/247] restore output_to_target mask

---
 segment/val.py         | 4 ++--
 utils/segment/plots.py | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 2632e6deacd4..76f6af4f8859 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -42,11 +42,11 @@
                            coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords,
                            xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
-from utils.plots import output_to_target, plot_val_study
+from utils.plots import plot_val_study
 from utils.segment.dataloaders import create_dataloader
 from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
-from utils.segment.plots import plot_images_and_masks
+from utils.segment.plots import plot_images_and_masks, output_to_target
 from utils.torch_utils import de_parallel, select_device, time_sync
 
 
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index dafe5f9eb31a..641988e1030d 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -239,3 +239,12 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     ax[1].legend()
     fig.savefig(save_dir / "results.png", dpi=200)
     plt.close()
+
+def output_to_target(output, filter_dets=10):
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+    targets = []
+    for i, o in enumerate(output):
+        o = o[:filter_dets]
+        for *box, conf, cls in o.cpu().numpy()[:, :6]:
+            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
+    return np.array(targets)

From 7724c710be9e42e9138ba29982cfb2c607bc1ae0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 21 Aug 2022 08:10:01 +0000
Subject: [PATCH 093/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/val.py         | 2 +-
 utils/segment/plots.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index 76f6af4f8859..c17f41458603 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -46,7 +46,7 @@
 from utils.segment.dataloaders import create_dataloader
 from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
-from utils.segment.plots import plot_images_and_masks, output_to_target
+from utils.segment.plots import output_to_target, plot_images_and_masks
 from utils.torch_utils import de_parallel, select_device, time_sync
 
 
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 641988e1030d..c810d2182d80 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -240,6 +240,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     fig.savefig(save_dir / "results.png", dpi=200)
     plt.close()
 
+
 def output_to_target(output, filter_dets=10):
     # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
     targets = []

From 08735aa91f7b69b0e5ec65d44721b23b8e0661eb Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sun, 21 Aug 2022 13:53:32 +0530
Subject: [PATCH 094/247] update

---
 utils/segment/plots.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 641988e1030d..431880de84b5 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -8,7 +8,7 @@
 import torch
 from PIL import Image
 
-from ..general import xywh2xyxy
+from ..general import xywh2xyxy, xyxy2xywh
 from ..plots import colors
 
 

From 75f617f81a68255577081a5560a0511ce661b90e Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sun, 21 Aug 2022 23:13:02 +0530
Subject: [PATCH 095/247] cleanup

---
 utils/loggers/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 6a2734b26782..97df1371ec6f 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -15,7 +15,6 @@
 from utils.loggers.clearml.clearml_utils import ClearmlLogger
 from utils.loggers.wandb.wandb_utils import WandbLogger
 from utils.plots import plot_images, plot_labels, plot_results
-from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
 from utils.torch_utils import de_parallel
 
 LOGGERS = ('csv', 'tb', 'wandb', 'clearml')  # *.csv, TensorBoard, Weights & Biases, ClearML

From ba62c62f2a363960f4da07e7dcd9bddeacde109d Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 21 Aug 2022 23:48:03 +0200
Subject: [PATCH 096/247] Remove unused ImageFont import

---
 utils/general.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/general.py b/utils/general.py
index 47fdcf6d9b3c..3bc6fbc22d57 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -33,7 +33,6 @@
 import torch
 import torchvision
 import yaml
-from PIL import ImageFont
 
 from utils.downloads import gsutil_getsize
 from utils.metrics import box_iou, fitness

From 55ef06a660f4707c5d9a7916acfa50154c731a7b Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Mon, 22 Aug 2022 00:28:57 +0200
Subject: [PATCH 097/247] Unified NMS

---
 segment/predict.py       |   6 +-
 segment/val.py           |  20 +++----
 utils/general.py         |  24 ++++----
 utils/segment/general.py | 122 +--------------------------------------
 4 files changed, 30 insertions(+), 142 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 2bc2a5629d6f..1adb02348da3 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -41,9 +41,9 @@
 from models.experimental import attempt_load
 from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
 from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
-from utils.segment.general import non_max_suppression_masks, process_mask_upsample, scale_masks
+from utils.segment.general import process_mask_upsample, scale_masks
 from utils.segment.plots import plot_masks
 from utils.torch_utils import select_device, time_sync
 
@@ -130,7 +130,7 @@ def run(
         dt[1] += t3 - t2
 
         # NMS
-        pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32)
         dt[2] += time_sync() - t3
 
         # Second-stage classifier (optional)
diff --git a/segment/val.py b/segment/val.py
index c17f41458603..fdc318d3930b 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -39,12 +39,12 @@
 
 from models.experimental import attempt_load  # scoped to avoid circular import
 from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
-                           coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords,
-                           xywh2xyxy, xyxy2xywh)
+                           coco80_to_coco91_class, colorstr, emojis, increment_path, non_max_suppression, print_args,
+                           scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
 from utils.plots import plot_val_study
 from utils.segment.dataloaders import create_dataloader
-from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks
+from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
 from utils.segment.plots import output_to_target, plot_images_and_masks
 from utils.torch_utils import de_parallel, select_device, time_sync
@@ -272,13 +272,13 @@ def run(
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         t3 = time_sync()
-        out = non_max_suppression_masks(out,
-                                        conf_thres,
-                                        iou_thres,
-                                        labels=lb,
-                                        multi_label=True,
-                                        agnostic=single_cls,
-                                        mask_dim=de_parallel(model).model[-1].mask_dim)
+        out = non_max_suppression(out,
+                                  conf_thres,
+                                  iou_thres,
+                                  labels=lb,
+                                  multi_label=True,
+                                  agnostic=single_cls,
+                                  masks=de_parallel(model).model[-1].mask_dim)
         dt[2] += time_sync() - t3
 
         # keep pred masks for plotting
diff --git a/utils/general.py b/utils/general.py
index 3bc6fbc22d57..e00f69309fcf 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -823,8 +823,10 @@ def non_max_suppression(prediction,
                         agnostic=False,
                         multi_label=False,
                         labels=(),
-                        max_det=300):
-    """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
+                        max_det=300,
+                        masks=0,
+                        ):
+    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
 
     Returns:
          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
@@ -842,13 +844,14 @@ def non_max_suppression(prediction,
     # min_wh = 2  # (pixels) minimum box width and height
     max_wh = 7680  # (pixels) maximum box width and height
     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 0.3 + 0.03 * bs  # seconds to quit after
+    time_limit = 0.5 + 0.05 * bs  # seconds to quit after
     redundant = True  # require redundant detections
     multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
     merge = False  # use merge-NMS
 
     t = time.time()
-    output = [torch.zeros((0, 6), device=prediction.device)] * bs
+    si = 5 + masks  # box/mask start index
+    output = [torch.zeros((0, 6 + masks), device=prediction.device)] * bs
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@@ -870,16 +873,17 @@ def non_max_suppression(prediction,
         # Compute conf
         x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
 
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
-        box = xywh2xyxy(x[:, :4])
+        # Box/Mask
+        box = xywh2xyxy(x[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)
+        mask = x[:, 5:si]  # zero columns if no masks
 
         # Detections matrix nx6 (xyxy, conf, cls)
         if multi_label:
-            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+            i, j = (x[:, si:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, j + si, None], j[:, None].float(), mask[i]), 1)
         else:  # best class only
-            conf, j = x[:, 5:].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+            conf, j = x[:, si:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
 
         # Filter by class
         if classes is not None:
diff --git a/utils/segment/general.py b/utils/segment/general.py
index 075dce192ddf..c1ca23c344fa 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -1,119 +1,6 @@
-import time
-
 import cv2
 import torch
 import torch.nn.functional as F
-import torchvision
-
-from ..general import LOGGER, xywh2xyxy
-from ..metrics import box_iou
-
-
-def non_max_suppression_masks(
-        prediction,
-        conf_thres=0.25,
-        iou_thres=0.45,
-        classes=None,
-        agnostic=False,
-        multi_label=False,
-        labels=(),
-        max_det=300,
-        mask_dim=32,
-):
-    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
-
-    Returns:
-         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
-    """
-
-    bs = prediction.shape[0]  # batch size
-    nc = prediction.shape[2] - 5  # number of classes
-    xc = prediction[..., 4] > conf_thres  # candidates
-
-    # Checks
-    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
-    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
-
-    # Settings
-    # min_wh = 2  # (pixels) minimum box width and height
-    max_wh = 7680  # (pixels) maximum box width and height
-    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 0.6 + 0.06 * bs  # seconds to quit after
-    redundant = True  # require redundant detections
-    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
-    merge = False  # use merge-NMS
-    nm = 5 + mask_dim
-
-    t = time.time()
-    output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * bs
-    for xi, x in enumerate(prediction):  # image index, image inference
-        # Apply constraints
-        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
-        x = x[xc[xi]]  # confidence
-        pred_masks = x[:, 5:nm]
-
-        # Cat apriori labels if autolabelling
-        if labels and len(labels[xi]):
-            lb = labels[xi]
-            v = torch.zeros((len(lb), nc + 5), device=x.device)
-            v[:, :4] = lb[:, 1:5]  # box
-            v[:, 4] = 1.0  # conf
-            v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
-            x = torch.cat((x, v), 0)
-
-        # If none remain process next image
-        if not x.shape[0]:
-            continue
-
-        # Compute conf
-        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
-
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
-        box = xywh2xyxy(x[:, :4])
-
-        # Detections matrix nx6 (xyxy, conf, cls)
-        if multi_label:
-            i, j = (x[:, nm:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1)
-        else:  # best class only
-            conf, j = x[:, nm:].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float(), pred_masks), 1)[conf.view(-1) > conf_thres]
-
-        # Filter by class
-        if classes is not None:
-            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
-
-        # Apply finite constraint
-        # if not torch.isfinite(x).all():
-        #     x = x[torch.isfinite(x).all(1)]
-
-        # Check shape
-        n = x.shape[0]  # number of boxes
-        if not n:  # no boxes
-            continue
-        elif n > max_nms:  # excess boxes
-            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
-
-        # Batched NMS
-        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
-        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
-        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
-        if i.shape[0] > max_det:  # limit detections
-            i = i[:max_det]
-        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
-            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
-            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
-            weights = iou * scores[None]  # box weights
-            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
-            if redundant:
-                i = i[iou.sum(1) > 1]  # require redundancy
-
-        output[xi] = x[i]
-        if (time.time() - t) > time_limit:
-            LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
-            break  # time limit exceeded
-
-    return output
 
 
 def crop(masks, boxes):
@@ -127,10 +14,7 @@ def crop(masks, boxes):
     """
     h, w, n = masks.size()
     x1, x2 = boxes[:, 0], boxes[:, 2]
-    y1, y2 = (
-        boxes[:, 1],
-        boxes[:, 3],
-    )
+    y1, y2 = boxes[:, 1], boxes[:, 3]
 
     rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n))
     cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n))
@@ -150,7 +34,7 @@ def crop(masks, boxes):
 
 def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     """
-    Crop after unsample.
+    Crop after upsample.
     proto_out: [mask_dim, mask_h, mask_w]
     out_masks: [n, mask_dim], n is number of masks after nms
     bboxes: [n, 4], n is number of masks after nms
@@ -171,7 +55,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
 
 def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     """
-    Crop before unsample.
+    Crop before upsample.
     proto_out: [mask_dim, mask_h, mask_w]
     out_masks: [n, mask_dim], n is number of masks after nms
     bboxes: [n, 4], n is number of masks after nms

From 7ce737835b401949b0ecc3c3b29c78ab629adebf Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 21 Aug 2022 22:29:25 +0000
Subject: [PATCH 098/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/general.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/utils/general.py b/utils/general.py
index e00f69309fcf..565b53b6496d 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -816,16 +816,17 @@ def clip_coords(boxes, shape):
         boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
 
 
-def non_max_suppression(prediction,
-                        conf_thres=0.25,
-                        iou_thres=0.45,
-                        classes=None,
-                        agnostic=False,
-                        multi_label=False,
-                        labels=(),
-                        max_det=300,
-                        masks=0,
-                        ):
+def non_max_suppression(
+        prediction,
+        conf_thres=0.25,
+        iou_thres=0.45,
+        classes=None,
+        agnostic=False,
+        multi_label=False,
+        labels=(),
+        max_det=300,
+        masks=0,
+):
     """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
 
     Returns:

From 7ebd19d1e1148d58e9cc284118355f05720b5e86 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Mon, 22 Aug 2022 01:47:39 +0200
Subject: [PATCH 099/247] DetectMultiBackend compatibility

---
 models/common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/models/common.py b/models/common.py
index d308244c4a44..5d49da77a35e 100644
--- a/models/common.py
+++ b/models/common.py
@@ -333,6 +333,7 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False,
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()
             self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
+            segmentation_model = type(model.model[-1]).__name__ == 'DetectSegment'
         elif jit:  # TorchScript
             LOGGER.info(f'Loading {w} for TorchScript inference...')
             extra_files = {'config.txt': ''}  # model metadata
@@ -466,7 +467,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
 
         if self.pt:  # PyTorch
             y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
-            if isinstance(y, tuple):
+            if isinstance(y, tuple) and not self.segmentation_model:
                 y = y[0]
         elif self.jit:  # TorchScript
             y = self.model(im)[0]

From 261bec1ee6018f536f632332fb4462278ffb6dcc Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Mon, 22 Aug 2022 01:48:06 +0200
Subject: [PATCH 100/247] segment/predict.py update

---
 segment/predict.py | 135 +++++++++++++++++++++------------------------
 1 file changed, 64 insertions(+), 71 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 1adb02348da3..1b6eb9d35c95 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -1,31 +1,32 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
-Run inference on images, videos, directories, streams, etc.
+Run YOLOv5 segmentation inference on images, videos, directories, streams, etc.
 
 Usage - sources:
-    $ python path/to/predict.py --weights yolov5s-seg.pt --source 0         # webcam
-                                                             img.jpg        # image
-                                                             vid.mp4        # video
-                                                             path/          # directory
-                                                             path/*.jpg     # glob
-                                                             'https://youtu.be/Zgi9g1ksQHc'  # YouTube
-                                                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+    $ python segment/predict.py --weights yolov5s-seg.pt --source 0                               # webcam
+                                                                  img.jpg                         # image
+                                                                  vid.mp4                         # video
+                                                                  path/                           # directory
+                                                                  'path/*.jpg'                    # glob
+                                                                  'https://youtu.be/Zgi9g1ksQHc'  # YouTube
+                                                                  'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 
 Usage - formats:
-    $ python path/to/predict.py --weights yolov5s.pt                 # PyTorch
-                                         yolov5s.torchscript        # TorchScript
-                                         yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
-                                         yolov5s.xml                # OpenVINO
-                                         yolov5s.engine             # TensorRT
-                                         yolov5s.mlmodel            # CoreML (macOS-only)
-                                         yolov5s_saved_model        # TensorFlow SavedModel
-                                         yolov5s.pb                 # TensorFlow GraphDef
-                                         yolov5s.tflite             # TensorFlow Lite
-                                         yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+    $ python segment/predict.py --weights yolov5s-seg.pt                 # PyTorch
+                                          yolov5s-seg.torchscript        # TorchScript
+                                          yolov5s-seg.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                          yolov5s-seg.xml                # OpenVINO
+                                          yolov5s-seg.engine             # TensorRT
+                                          yolov5s-seg.mlmodel            # CoreML (macOS-only)
+                                          yolov5s-seg_saved_model        # TensorFlow SavedModel
+                                          yolov5s-seg.pb                 # TensorFlow GraphDef
+                                          yolov5s-seg.tflite             # TensorFlow Lite
+                                          yolov5s-seg_edgetpu.tflite     # TensorFlow Edge TPU
 """
 
 import argparse
 import os
+import platform
 import sys
 from pathlib import Path
 
@@ -38,20 +39,21 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
+from models.common import DetectMultiBackend
 from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                            increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
 from utils.segment.general import process_mask_upsample, scale_masks
 from utils.segment.plots import plot_masks
-from utils.torch_utils import select_device, time_sync
+from utils.torch_utils import select_device, smart_inference_mode
 
 
-@torch.no_grad()
+@smart_inference_mode()
 def run(
-        weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
+        weights=ROOT / 'yolov5s-seg.pt',  # model.pt path(s)
         source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
+        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
         imgsz=(640, 640),  # inference size (height, width)
         conf_thres=0.25,  # confidence threshold
         iou_thres=0.45,  # NMS IOU threshold
@@ -67,13 +69,14 @@ def run(
         augment=False,  # augmented inference
         visualize=False,  # visualize features
         update=False,  # update all models
-        project=ROOT / 'runs/predict_segment',  # save results to project/name
+        project=ROOT / 'runs/predict-seg',  # save results to project/name
         name='exp',  # save results to project/name
         exist_ok=False,  # existing project/name ok, do not increment
         line_thickness=3,  # bounding box thickness (pixels)
         hide_labels=False,  # hide labels
         hide_conf=False,  # hide confidences
         half=False,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
 ):
     source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images
@@ -89,11 +92,8 @@ def run(
 
     # Load model
     device = select_device(device)
-    model = attempt_load(weights, device=device, inplace=True, fuse=True)
-    stride = max(int(model.stride.max()), 32)  # model stride
-    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-    model.half() if half else model.float()
-    pt = True
+    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+    stride, names, pt = model.stride, model.names, model.pt
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Dataloader
@@ -108,30 +108,25 @@ def run(
     vid_path, vid_writer = [None] * bs, [None] * bs
 
     # Run inference
-    if str(device) != "cpu":
-        im = torch.zeros(1, 3, *imgsz).to(device).half()  # input image
-        model(im)  # warmup
-    seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
+    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
+    seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
     for path, im, im0s, vid_cap, s in dataset:
-        t1 = time_sync()
-        im = torch.from_numpy(im).to(device)
-        im = im.half() if half else im.float()  # uint8 to fp16/32
-        im /= 255  # 0 - 255 to 0.0 - 1.0
-        if len(im.shape) == 3:
-            im = im[None]  # expand for batch dim
-        t2 = time_sync()
-        dt[0] += t2 - t1
+        with dt[0]:
+            im = torch.from_numpy(im).to(device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
 
         # Inference
-        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-        pred, out = model(im, augment=augment, visualize=visualize)
-        proto = out[1]
-        t3 = time_sync()
-        dt[1] += t3 - t2
+        with dt[1]:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred, out = model(im, augment=augment, visualize=visualize)
+            proto = out[1]
 
         # NMS
-        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32)
-        dt[2] += time_sync() - t3
+        with dt[2]:
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32)
 
         # Second-stage classifier (optional)
         # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
@@ -153,14 +148,13 @@ def run(
             imc = im0.copy() if save_crop else im0  # for save_crop
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
-                # mask stuff
+                # Mask additions ---------------------------------------------------------------------------------------
                 masks_conf = det[:, 6:]
-                # binary mask, (img_h, img_w, n)
-                masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:])
-                # n, img_h, img_w
-                masks = masks.permute(2, 0, 1).contiguous()
-                # bbox stuff
-                det = det[:, :6]  # update the value in outputs, remove mask part.
+                masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:])  # binary_mask(imh,imw,n)
+                masks = masks.permute(2, 0, 1).contiguous()  # shape(n,imh,imw)
+                det = det[:, :6]  # remove masks
+                # Mask additions ---------------------------------------------------------------------------------------
+
                 # Rescale boxes from img_size to im0 size
                 det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
 
@@ -169,19 +163,16 @@ def run(
                     n = (det[:, -1] == c).sum()  # detections per class
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
-                # plot masks
+                # Mask plotting ----------------------------------------------------------------------------------------
                 mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))]
-                # NOTE: this way to draw masks is faster,
-                # but the image might get blurred,
-                # from https://github.com/dbolya/yolact
-                # image with masks, (img_h, img_w, 3)
-                img_masks = plot_masks(im[i], masks, mcolors)
-                # scale image to original hw
-                img_masks = scale_masks(im.shape[2:], img_masks, im0.shape)
+                # NOTE: this plot method is faster, but the image might get blurred https://github.com/dbolya/yolact
+                img_masks = plot_masks(im[i], masks, mcolors)  # image with masks shape(imh,imw,3)
+                img_masks = scale_masks(im.shape[2:], img_masks, im0.shape)  # scale to original h, w
                 annotator.im = img_masks
+                # Mask plotting ----------------------------------------------------------------------------------------
 
                 # Write results
-                for j, (*xyxy, conf, cls) in enumerate(det):
+                for *xyxy, conf, cls in reversed(det):
                     if save_txt:  # Write to file
                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                         line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
@@ -191,14 +182,14 @@ def run(
                     if save_img or save_crop or view_img:  # Add bbox to image
                         c = int(cls)  # integer class
                         label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
-                        annotator.box_label(xyxy, label, color=colors(j, True))
+                        annotator.box_label(xyxy, label, color=colors(c, True))
                     if save_crop:
                         save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
 
             # Stream results
             im0 = annotator.result()
             if view_img:
-                if p not in windows:
+                if platform.system() == 'Linux' and p not in windows:
                     windows.append(p)
                     cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
                     cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
@@ -225,22 +216,23 @@ def run(
                     vid_writer[i].write(im0)
 
         # Print time (inference-only)
-        LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
+        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
 
     # Print results
-    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
     LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
     if save_txt or save_img:
         s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
         LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
     if update:
-        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
+        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
 
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
     parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
@@ -256,13 +248,14 @@ def parse_opt():
     parser.add_argument('--augment', action='store_true', help='augmented inference')
     parser.add_argument('--visualize', action='store_true', help='visualize features')
     parser.add_argument('--update', action='store_true', help='update all models')
-    parser.add_argument('--project', default=ROOT / 'runs/predict_segment', help='save results to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/predict-seg', help='save results to project/name')
     parser.add_argument('--name', default='exp', help='save results to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
     parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
     parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
     opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
     print_args(vars(opt))

From 0095547f6211a9bd7d4f40c829dc70b60dc0c8ca Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 22 Aug 2022 11:59:17 +0800
Subject: [PATCH 101/247] update plot colors

---
 segment/predict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/predict.py b/segment/predict.py
index 1b6eb9d35c95..c24869a8866c 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -164,7 +164,7 @@ def run(
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # Mask plotting ----------------------------------------------------------------------------------------
-                mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))]
+                mcolors = [colors(int(cls), True) for cls in det[:, 5]]
                 # NOTE: this plot method is faster, but the image might get blurred https://github.com/dbolya/yolact
                 img_masks = plot_masks(im[i], masks, mcolors)  # image with masks shape(imh,imw,3)
                 img_masks = scale_masks(im.shape[2:], img_masks, im0.shape)  # scale to original h, w

From 139640cf84f312118febcab5864377f49e058bab Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 22 Aug 2022 11:59:40 +0800
Subject: [PATCH 102/247] fix bbox shifted

---
 models/yolo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/yolo.py b/models/yolo.py
index 15ef5023acf3..e6c2143e4b31 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -135,7 +135,7 @@ def forward(self, x):
                     y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
-                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    xy = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i]  # xy
                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                     y = torch.cat((xy.type_as(y), wh.type_as(y), y[..., 4:]), -1)
                 z.append(y.view(-1, self.na * ny * nx, self.no))

From cabb99d61a0765440991cdeea6c9098932f7e345 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 22 Aug 2022 11:59:59 +0800
Subject: [PATCH 103/247] sort bbox by confidence

---
 utils/general.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/general.py b/utils/general.py
index 565b53b6496d..1468c8c4d21f 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -900,6 +900,8 @@ def non_max_suppression(
             continue
         elif n > max_nms:  # excess boxes
             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        else:
+            x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
 
         # Batched NMS
         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes

From 63daead45242ae5b69ce0f7807389d4a0f420b05 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 22 Aug 2022 11:33:55 +0530
Subject: [PATCH 104/247] enable overlap by default

---
 segment/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index bc0792d5267c..55b9c53a7ef8 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -97,7 +97,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
 
     # Config
     plots = not evolve and not opt.noplots  # create plots
-    overlap = opt.overlap_mask
+    overlap = not opt.no_overlap
     cuda = device.type != 'cpu'
     init_seeds(opt.seed + 1 + RANK, deterministic=False)
     with torch_distributed_zero_first(LOCAL_RANK):
@@ -536,7 +536,7 @@ def parse_opt(known=False):
 
     # Instance Segmentation Args
     parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory')
-    parser.add_argument('--overlap-mask',
+    parser.add_argument('--no-overlap',
                         action='store_true',
                         help='Overlapping masks train faster at the cost of slight accuray decrease')
 

From 28ff5fe06b1a3b497ac389f7113451ac51eae143 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Mon, 22 Aug 2022 23:36:25 +0200
Subject: [PATCH 105/247] Merge detect/segment output_to_target() function

---
 segment/val.py         |  6 +++---
 utils/plots.py         | 10 ++++++----
 utils/segment/plots.py | 10 ----------
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index fdc318d3930b..7438426dfb88 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -42,11 +42,11 @@
                            coco80_to_coco91_class, colorstr, emojis, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
-from utils.plots import plot_val_study
+from utils.plots import output_to_target, plot_val_study
 from utils.segment.dataloaders import create_dataloader
 from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
-from utils.segment.plots import output_to_target, plot_images_and_masks
+from utils.segment.plots import plot_images_and_masks
 from utils.torch_utils import de_parallel, select_device, time_sync
 
 
@@ -345,7 +345,7 @@ def run(
             plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg',
                                   names)  # labels
             plot_masks = torch.cat(plot_masks, dim=0)
-            plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths,
+            plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths,
                                   save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
     # Compute metrics
diff --git a/utils/plots.py b/utils/plots.py
index d35e2bdd168a..cbdbd7da1428 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -177,12 +177,14 @@ def butter_lowpass(cutoff, fs, order):
     return filtfilt(b, a, data)  # forward-backward filter
 
 
-def output_to_target(output):
-    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+def output_to_target(output, max_det=300):
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
     targets = []
     for i, o in enumerate(output):
-        targets.extend([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf] for *box, conf, cls in o.cpu().numpy())
-    return np.array(targets)
+        box, conf, cls = o[:max_det].cpu().split((4, 1, 1), 1)
+        j = torch.full((conf.shape[0], 1), i)
+        targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
+    return torch.cat(targets, 0).numpy()
 
 
 @threaded
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index c1afa60786c6..da87245ab885 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -239,13 +239,3 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     ax[1].legend()
     fig.savefig(save_dir / "results.png", dpi=200)
     plt.close()
-
-
-def output_to_target(output, filter_dets=10):
-    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
-    targets = []
-    for i, o in enumerate(output):
-        o = o[:filter_dets]
-        for *box, conf, cls in o.cpu().numpy()[:, :6]:
-            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
-    return np.array(targets)

From 8c0eb6d276b339ae75eb8e6c8c11b135b8f8e864 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Mon, 22 Aug 2022 23:49:31 +0200
Subject: [PATCH 106/247] Start segmentation CI

---
 .github/workflows/ci-testing.yml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 4ef930c61233..bd3a31bf2379 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -123,6 +123,29 @@ jobs:
               model = torch.hub.load('.', 'custom', path=path, source='local')
               print(model('data/images/bus.jpg'))
           EOF
+      - name: Test segmentation
+        shell: bash  # for Windows compatibility
+        run: |
+          m=${{ matrix.model }}-seg  # official weights
+          b=runs/train-seg/exp/weights/best  # best.pt checkpoint
+          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
+#          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
+#          for d in cpu; do  # devices
+#            for w in $m $b; do  # weights
+#              python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d  # val
+#              python segment/predict.py --imgsz 64 --weights $w.pt --device $d  # detect
+#            done
+#          done
+#          # python hubconf.py --model $m  # hub
+#          # python models/tf.py --weights $m.pt  # build TF model
+#          python models/yolo.py --cfg $m.yaml  # build PyTorch model
+#          python export.py --weights $m.pt --img 64 --include torchscript  # export
+#          python - <<EOF
+#          import torch
+#          for path in '$m', '$b':
+#              model = torch.hub.load('.', 'custom', path=path, source='local')
+#              print(model('data/images/bus.jpg'))
+#          EOF
       - name: Test classification
         shell: bash  # for Windows compatibility
         run: |

From 0c7e7cfb569ca5e208cb710df1153f5b388a4f6b Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 23 Aug 2022 22:09:53 +0530
Subject: [PATCH 107/247] fix plots

---
 utils/plots.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/plots.py b/utils/plots.py
index cbdbd7da1428..4628ca632a46 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -181,7 +181,7 @@ def output_to_target(output, max_det=300):
     # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
     targets = []
     for i, o in enumerate(output):
-        box, conf, cls = o[:max_det].cpu().split((4, 1, 1), 1)
+        box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
         j = torch.full((conf.shape[0], 1), i)
         targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
     return torch.cat(targets, 0).numpy()

From 17a979c520e672ef395a1de9aa1db689fe04b3c1 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 18:55:34 +0200
Subject: [PATCH 108/247] Update ci-testing.yml

---
 .github/workflows/ci-testing.yml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index bd3a31bf2379..5074dd4093f3 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -128,7 +128,14 @@ jobs:
         run: |
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
-          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
+          
+          # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
+          m=yolov5s-seg  # official weights
+          python segment/predict.py --imgsz 64 --weights $m.pt --device cpu  # detect
+          python export.py --weights $m.pt --img 64 --include torchscript  # export
+          # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
+          
+#          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
 #          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
 #          for d in cpu; do  # devices
 #            for w in $m $b; do  # weights

From 8b8ea38c184c04e3bea0a449f8358bbf769f7595 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 19:50:06 +0200
Subject: [PATCH 109/247] fix training whitespace

---
 .github/workflows/ci-testing.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 5074dd4093f3..0edd05dbf86c 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -128,13 +128,13 @@ jobs:
         run: |
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
-          
+
           # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
           m=yolov5s-seg  # official weights
           python segment/predict.py --imgsz 64 --weights $m.pt --device cpu  # detect
           python export.py --weights $m.pt --img 64 --include torchscript  # export
           # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
-          
+
 #          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
 #          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
 #          for d in cpu; do  # devices

From b2c6d0917e6dbb6232b62f4bc43b848b1504243d Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 21:49:15 +0200
Subject: [PATCH 110/247] optimize process mask functions (can we merge both?)

---
 utils/segment/general.py | 41 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index c1ca23c344fa..fe4898b2cdd4 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -42,15 +42,12 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
 
     return: h, w, n
     """
-    # mask_h, mask_w, n
-    masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T
-    masks = masks.sigmoid()
-    masks = masks.permute(2, 0, 1).contiguous()
-    # [n, mask_h, mask_w]
-    masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0)
-    # [mask_h, mask_w, n]
-    masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes)
-    return masks.gt_(0.5)  # .gt_(0.2)
+
+    c, mh, mw = proto_out.shape  # CHW
+    masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw)
+    masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+    masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes)  # HWC
+    return masks.gt_(0.5)
 
 
 def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
@@ -63,23 +60,21 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
 
     return: h, w, n
     """
-    downsampled_bboxes = bboxes.clone()
-    mh, mw = proto_out.shape[1:]
+
+    c, mh, mw = proto_out.shape  # CHW
     ih, iw = shape
-    # mask_h, mask_w, n
-    masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T
-    # print(masks)
-    masks = masks.sigmoid()
-    # print('after sigmoid:', masks)
-    downsampled_bboxes[:, 0] = downsampled_bboxes[:, 0] / iw * mw
-    downsampled_bboxes[:, 2] = downsampled_bboxes[:, 2] / iw * mw
-    downsampled_bboxes[:, 1] = downsampled_bboxes[:, 1] / ih * mh
-    downsampled_bboxes[:, 3] = downsampled_bboxes[:, 3] / ih * mh
-    masks = crop(masks, downsampled_bboxes)
+    masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
+
+    downsampled_bboxes = bboxes.clone()
+    downsampled_bboxes[:, 0] *= mw / iw
+    downsampled_bboxes[:, 2] *= mw / iw
+    downsampled_bboxes[:, 3] *= mh / ih
+    downsampled_bboxes[:, 1] *= mh / ih
+    masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes)  # HWC
+
     masks = masks.permute(2, 0, 1).contiguous()
-    # [n, mask_h, mask_w]
     if upsample:
-        masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0)
+        masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
     return masks.gt_(0.5).permute(1, 2, 0).contiguous()
 
 

From d189aabe3d43ee2b4410134bd2b659e202f929fe Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 22:15:06 +0200
Subject: [PATCH 111/247] Update predict/detect

---
 detect.py          |  4 ++--
 segment/predict.py | 18 +++++++-----------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/detect.py b/detect.py
index 60a821b59a03..3af6baa0edc2 100644
--- a/detect.py
+++ b/detect.py
@@ -149,8 +149,8 @@ def run(
                 det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
 
                 # Print results
-                for c in det[:, -1].unique():
-                    n = (det[:, -1] == c).sum()  # detections per class
+                for c in det[:, 5].unique():
+                    n = (det[:, 5] == c).sum()  # detections per class
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # Write results
diff --git a/segment/predict.py b/segment/predict.py
index c24869a8866c..b29f3d2dfd8a 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -149,30 +149,26 @@ def run(
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 # Mask additions ---------------------------------------------------------------------------------------
-                masks_conf = det[:, 6:]
-                masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:])  # binary_mask(imh,imw,n)
-                masks = masks.permute(2, 0, 1).contiguous()  # shape(n,imh,imw)
-                det = det[:, :6]  # remove masks
+                masks = process_mask_upsample(proto[i], det[:, 6:], det[:, :4], im.shape[2:])  # HWC
+                masks = masks.permute(2, 0, 1).contiguous()  # CHW
                 # Mask additions ---------------------------------------------------------------------------------------
 
                 # Rescale boxes from img_size to im0 size
                 det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
 
                 # Print results
-                for c in det[:, -1].unique():
-                    n = (det[:, -1] == c).sum()  # detections per class
+                for c in det[:, 5].unique():
+                    n = (det[:, 5] == c).sum()  # detections per class
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # Mask plotting ----------------------------------------------------------------------------------------
                 mcolors = [colors(int(cls), True) for cls in det[:, 5]]
-                # NOTE: this plot method is faster, but the image might get blurred https://github.com/dbolya/yolact
-                img_masks = plot_masks(im[i], masks, mcolors)  # image with masks shape(imh,imw,3)
-                img_masks = scale_masks(im.shape[2:], img_masks, im0.shape)  # scale to original h, w
-                annotator.im = img_masks
+                im_masks = plot_masks(im[i], masks, mcolors)  # image with masks shape(imh,imw,3)
+                annotator.im = scale_masks(im.shape[2:], im_masks, im0.shape)  # scale to original h, w
                 # Mask plotting ----------------------------------------------------------------------------------------
 
                 # Write results
-                for *xyxy, conf, cls in reversed(det):
+                for *xyxy, conf, cls in reversed(det[:, :6]):
                     if save_txt:  # Write to file
                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                         line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format

From 4cf5775abee316a5cc22612a8c2c1dabeec59315 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 22:50:15 +0200
Subject: [PATCH 112/247] Update plot_images

---
 utils/plots.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/utils/plots.py b/utils/plots.py
index 4628ca632a46..e7dbe92ced1a 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -188,17 +188,20 @@ def output_to_target(output, max_det=300):
 
 
 @threaded
-def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16):
+def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
     # Plot image grid with labels
     if isinstance(images, torch.Tensor):
         images = images.cpu().float().numpy()
     if isinstance(targets, torch.Tensor):
         targets = targets.cpu().numpy()
-    if np.max(images[0]) <= 1:
-        images *= 255  # de-normalise (optional)
+
+    max_size = 1920  # max image size
+    max_subplots = 16  # max image subplots, i.e. 4x4
     bs, _, h, w = images.shape  # batch size, _, height, width
     bs = min(bs, max_subplots)  # limit plot images
     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    if np.max(images[0]) <= 1:
+        images *= 255  # de-normalise (optional)
 
     # Build Image
     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init

From 00a7117413901348a9ed8157532578992a4e6ff8 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 23:00:10 +0200
Subject: [PATCH 113/247] Update plot_images_and_masks

---
 utils/plots.py         |   5 ++
 utils/segment/plots.py | 182 ++++++++++++++---------------------------
 2 files changed, 68 insertions(+), 119 deletions(-)

diff --git a/utils/plots.py b/utils/plots.py
index e7dbe92ced1a..3c1fc92d167a 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -121,6 +121,11 @@ def text(self, xy, text, txt_color=(255, 255, 255)):
         w, h = self.font.getsize(text)  # text width, height
         self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
 
+    def fromarray(self, im):
+        # Update self.im from a numpy array
+        self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
+        self.draw = ImageDraw.Draw(self.im)
+
     def result(self):
         # Return annotated image as array
         return np.asarray(self.im)
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index da87245ab885..b7b2c0fca0de 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -6,10 +6,9 @@
 import numpy as np
 import pandas as pd
 import torch
-from PIL import Image
 
-from ..general import xywh2xyxy, xyxy2xywh
-from ..plots import colors
+from ..general import threaded, xywh2xyxy
+from ..plots import Annotator, colors
 
 
 def plot_masks(img, masks, colors, alpha=0.5):
@@ -50,152 +49,97 @@ def plot_masks(img, masks, colors, alpha=0.5):
     return (img_gpu * 255).byte().cpu().numpy()
 
 
-def plot_one_box(x, img, color=None, label=None, line_thickness=None):
-    import random
-
-    # Plots one bounding box on image img
-    tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1)  # line/font thickness
-    color = color or [random.randint(0, 255) for _ in range(3)]
-    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
-    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
-    if label:
-        tf = max(tl - 1, 1)  # font thickness
-        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
-        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
-        cv2.putText(
-            img,
-            label,
-            (c1[0], c1[1] - 2),
-            0,
-            tl / 3,
-            [225, 255, 255],
-            thickness=tf,
-            lineType=cv2.LINE_AA,
-        )
-
-
-def plot_images_and_masks(
-    images,
-    targets,
-    masks,
-    paths=None,
-    fname="images.jpg",
-    names=None,
-    max_size=640,
-    max_subplots=16,
-):
+@threaded
+def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
+    # Plot image grid with labels
     if isinstance(images, torch.Tensor):
         images = images.cpu().float().numpy()
     if isinstance(targets, torch.Tensor):
         targets = targets.cpu().numpy()
     if isinstance(masks, torch.Tensor):
-        masks = masks.cpu().numpy()
-        masks = masks.astype(int)
+        masks = masks.cpu().numpy().astype(int)
 
-    # un-normalise
-    if np.max(images[0]) <= 1:
-        images *= 255
-
-    tl = 3  # line thickness
-    tf = max(tl - 1, 1)  # font thickness
+    max_size = 1920  # max image size
+    max_subplots = 16  # max image subplots, i.e. 4x4
     bs, _, h, w = images.shape  # batch size, _, height, width
     bs = min(bs, max_subplots)  # limit plot images
     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    if np.max(images[0]) <= 1:
+        images *= 255  # de-normalise (optional)
 
-    # Check if we should resize
-    scale_factor = max_size / max(h, w)
-    if scale_factor < 1:
-        h = math.ceil(scale_factor * h)
-        w = math.ceil(scale_factor * w)
-
+    # Build Image
     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-    for i, img in enumerate(images):
+    for i, im in enumerate(images):
         if i == max_subplots:  # if last batch has fewer images than we expect
             break
-
-        block_x = int(w * (i // ns))
-        block_y = int(h * (i % ns))
-
-        img = img.transpose(1, 2, 0)
-        if scale_factor < 1:
-            img = cv2.resize(img, (w, h))
-
-        mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        im = im.transpose(1, 2, 0)
+        mosaic[y:y + h, x:x + w, :] = im
+
+    # Resize (optional)
+    scale = max_size / ns / max(h, w)
+    if scale < 1:
+        h = math.ceil(scale * h)
+        w = math.ceil(scale * w)
+        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+
+    # Annotate
+    fs = int((h + w) * ns * 0.01)  # font size
+    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
+    for i in range(i + 1):
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+        if paths:
+            annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
         if len(targets) > 0:
-            idx = (targets[:, 0]).astype(int)
-            image_targets = targets[idx == i]
+            j = targets[:, 0] == i
+            ti = targets[j]  # image targets
 
             if masks.max() > 1.0:  # mean that masks are overlap
                 image_masks = masks[[i]]  # (1, 640, 640)
                 # convert masks (1, 640, 640) -> (n, 640, 640)
-                nl = len(image_targets)
+                nl = len(ti)
                 index = np.arange(nl).reshape(nl, 1, 1) + 1
                 image_masks = np.repeat(image_masks, nl, axis=0)
                 image_masks = np.where(image_masks == index, 1.0, 0.0)
             else:
-                image_masks = masks[idx == i]
+                image_masks = masks[j]
 
-            boxes = xywh2xyxy(image_targets[:, 2:6]).T
-            classes = image_targets[:, 1].astype("int")
-            labels = image_targets.shape[1] == 6  # labels if no conf column
-            conf = (None if labels else image_targets[:, 6])  # check for confidence presence (label vs pred)
+            boxes = xywh2xyxy(ti[:, 2:6]).T
+            classes = ti[:, 1].astype('int')
+            labels = ti.shape[1] == 6  # labels if no conf column
+            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
 
             if boxes.shape[1]:
                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
                     boxes[[0, 2]] *= w  # scale to pixels
                     boxes[[1, 3]] *= h
-                elif scale_factor < 1:  # absolute coords need scale if image scales
-                    boxes *= scale_factor
-            boxes[[0, 2]] += block_x
-            boxes[[1, 3]] += block_y
-            for j, box in enumerate(boxes.T):
-                cls = int(classes[j])
+                elif scale < 1:  # absolute coords need scale if image scales
+                    boxes *= scale
+            boxes[[0, 2]] += x
+            boxes[[1, 3]] += y
+            for j, box in enumerate(boxes.T.tolist()):
+                cls = classes[j]
                 color = colors(cls)
                 cls = names[cls] if names else cls
-                if scale_factor < 1:
-                    mask = image_masks[j].astype(np.uint8)
-                    mask = cv2.resize(mask, (w, h))
-                    mask = mask.astype(np.bool)
-                else:
-                    mask = image_masks[j].astype(np.bool)
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}"
-                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
-                    mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] = \
-                        mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65)
-
-        # Draw image filename labels
-        if paths:
-            label = Path(paths[i]).name[:40]  # trim to 40 char
-            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-            cv2.putText(
-                mosaic,
-                label,
-                (block_x + 5, block_y + t_size[1] + 5),
-                0,
-                tl / 3,
-                [220, 220, 220],
-                thickness=tf,
-                lineType=cv2.LINE_AA,
-            )
-
-        # Image border
-        cv2.rectangle(
-            mosaic,
-            (block_x, block_y),
-            (block_x + w, block_y + h),
-            (255, 255, 255),
-            thickness=3,
-        )
-
-    if fname:
-        r = min(1280.0 / max(h, w) / ns, 1.0)  # ratio to limit image size
-        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
-        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
-        with Image.fromarray(mosaic) as im:
-            im.save(fname)
-    return mosaic
+                    label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
+                    annotator.box_label(box, label, color=color)
+
+            # Plot masks
+            im = np.asarray(annotator.im)
+            for j, box in enumerate(boxes.T.tolist()):
+                if conf[j] > 0.25:  # 0.25 conf thresh
+                    color = colors(classes[j])
+                    if scale < 1:
+                        mask = image_masks[j].astype(np.uint8)
+                        mask = cv2.resize(mask, (w, h))
+                        mask = mask.astype(np.bool)
+                    else:
+                        mask = image_masks[j].astype(np.bool)
+                    im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
+            annotator.fromarray(im)
+    annotator.im.save(fname)  # save
 
 
 def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
@@ -210,7 +154,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11],)
+                0.1 * data.values[:, 11], )
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From 1c94b4d48ec2e0978f42d0ab9aeaf4b40212715c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Aug 2022 21:01:12 +0000
Subject: [PATCH 114/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/plots.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index b7b2c0fca0de..5ca1ba707a3d 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -154,7 +154,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11], )
+                0.1 * data.values[:, 11],)
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From 75cbbbb577a178acb0748397b730fb655f3637b0 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 23:18:39 +0200
Subject: [PATCH 115/247] fix

---
 utils/segment/plots.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index b7b2c0fca0de..b98fbb770a51 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -127,9 +127,9 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     annotator.box_label(box, label, color=color)
 
             # Plot masks
-            im = np.asarray(annotator.im)
+            im = np.asarray(annotator.im).copy()
             for j, box in enumerate(boxes.T.tolist()):
-                if conf[j] > 0.25:  # 0.25 conf thresh
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
                     color = colors(classes[j])
                     if scale < 1:
                         mask = image_masks[j].astype(np.uint8)

From 56be6c44de5256d69825b7b79bd92c5debe4b67d Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 23:40:40 +0200
Subject: [PATCH 116/247] Add train to CI

---
 .github/workflows/ci-testing.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 0edd05dbf86c..da09b9017dd3 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -128,11 +128,18 @@ jobs:
         run: |
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
+          
+          d='../datasets' # unzip directory
+          f='coco128.zip' # or 'coco128-segments.zip', 68 MB
+          rm -rf $d
+          curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f
 
           # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
           m=yolov5s-seg  # official weights
           python segment/predict.py --imgsz 64 --weights $m.pt --device cpu  # detect
           python export.py --weights $m.pt --img 64 --include torchscript  # export
+          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg yolov5n_seg.yaml --epochs 1 --device cpu  # train
+
           # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
 
 #          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train

From f820f329aa5133ddc627eca310f747c4d12bef4d Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 23:42:59 +0200
Subject: [PATCH 117/247] fix precommit

---
 .github/workflows/ci-testing.yml | 38 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index da09b9017dd3..ba341f11843e 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -128,7 +128,7 @@ jobs:
         run: |
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
-          
+
           d='../datasets' # unzip directory
           f='coco128.zip' # or 'coco128-segments.zip', 68 MB
           rm -rf $d
@@ -142,24 +142,24 @@ jobs:
 
           # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
 
-#          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
-#          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
-#          for d in cpu; do  # devices
-#            for w in $m $b; do  # weights
-#              python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d  # val
-#              python segment/predict.py --imgsz 64 --weights $w.pt --device $d  # detect
-#            done
-#          done
-#          # python hubconf.py --model $m  # hub
-#          # python models/tf.py --weights $m.pt  # build TF model
-#          python models/yolo.py --cfg $m.yaml  # build PyTorch model
-#          python export.py --weights $m.pt --img 64 --include torchscript  # export
-#          python - <<EOF
-#          import torch
-#          for path in '$m', '$b':
-#              model = torch.hub.load('.', 'custom', path=path, source='local')
-#              print(model('data/images/bus.jpg'))
-#          EOF
+      #          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
+      #          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
+      #          for d in cpu; do  # devices
+      #            for w in $m $b; do  # weights
+      #              python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d  # val
+      #              python segment/predict.py --imgsz 64 --weights $w.pt --device $d  # detect
+      #            done
+      #          done
+      #          # python hubconf.py --model $m  # hub
+      #          # python models/tf.py --weights $m.pt  # build TF model
+      #          python models/yolo.py --cfg $m.yaml  # build PyTorch model
+      #          python export.py --weights $m.pt --img 64 --include torchscript  # export
+      #          python - <<EOF
+      #          import torch
+      #          for path in '$m', '$b':
+      #              model = torch.hub.load('.', 'custom', path=path, source='local')
+      #              print(model('data/images/bus.jpg'))
+      #          EOF
       - name: Test classification
         shell: bash  # for Windows compatibility
         run: |

From 5dc63fa8f94826fa446ff6cec77d5712cbefaf0c Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 23:44:04 +0200
Subject: [PATCH 118/247] fix precommit CI

---
 .github/workflows/ci-testing.yml | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index ba341f11843e..2f8b65ae70dd 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -130,36 +130,15 @@ jobs:
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
 
           d='../datasets' # unzip directory
-          f='coco128.zip' # or 'coco128-segments.zip', 68 MB
+          f='coco128-segments.zip'
           rm -rf $d
           curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f
 
-          # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
           m=yolov5s-seg  # official weights
           python segment/predict.py --imgsz 64 --weights $m.pt --device cpu  # detect
           python export.py --weights $m.pt --img 64 --include torchscript  # export
           python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg yolov5n_seg.yaml --epochs 1 --device cpu  # train
 
-          # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready ---------------------------------------------
-
-      #          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
-      #          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
-      #          for d in cpu; do  # devices
-      #            for w in $m $b; do  # weights
-      #              python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d  # val
-      #              python segment/predict.py --imgsz 64 --weights $w.pt --device $d  # detect
-      #            done
-      #          done
-      #          # python hubconf.py --model $m  # hub
-      #          # python models/tf.py --weights $m.pt  # build TF model
-      #          python models/yolo.py --cfg $m.yaml  # build PyTorch model
-      #          python export.py --weights $m.pt --img 64 --include torchscript  # export
-      #          python - <<EOF
-      #          import torch
-      #          for path in '$m', '$b':
-      #              model = torch.hub.load('.', 'custom', path=path, source='local')
-      #              print(model('data/images/bus.jpg'))
-      #          EOF
       - name: Test classification
         shell: bash  # for Windows compatibility
         run: |

From f6b0c847799a9b1c56125c9d79532b6678c1faf6 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Tue, 23 Aug 2022 23:53:40 +0200
Subject: [PATCH 119/247] fix precommit pycocotools

---
 .github/workflows/ci-testing.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 2f8b65ae70dd..eb05e72e95fd 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -134,6 +134,7 @@ jobs:
           rm -rf $d
           curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f
 
+          pip install pycocotools
           m=yolov5s-seg  # official weights
           python segment/predict.py --imgsz 64 --weights $m.pt --device cpu  # detect
           python export.py --weights $m.pt --img 64 --include torchscript  # export

From 492c89148f5c752113f19577a5c901c579396e21 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 00:09:35 +0200
Subject: [PATCH 120/247] fix val float issues

---
 segment/val.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 7438426dfb88..273c22b01bc9 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -299,10 +299,9 @@ def run(
 
             # deal with masks
             midx = [si] if overlap else targets[:, 0] == si
-            gt_masks = masks[midx]
+            gt_masks = masks[midx].float()
             proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0,
-                                                                                                      1).contiguous()
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()
             if plots and batch_i < 3:
                 # filter top 15 to plot
                 plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu())
@@ -447,9 +446,9 @@ def run(
 def parse_opt():
     parser = argparse.ArgumentParser()
     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
-    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
-    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model.pt path(s)')
+    parser.add_argument('--batch-size', type=int, default=8, help='batch size')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=320, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
     parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
     parser.add_argument('--task', default='val', help='train, val, test, speed or study')

From a86311444444d16f70d528645cd5915d75c0bd17 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Aug 2022 22:10:07 +0000
Subject: [PATCH 121/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/val.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index 273c22b01bc9..9b16c30a0bd4 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -301,7 +301,8 @@ def run(
             midx = [si] if overlap else targets[:, 0] == si
             gt_masks = masks[midx].float()
             proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4],
+                                 shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()
             if plots and batch_i < 3:
                 # filter top 15 to plot
                 plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu())

From ba46f44c3b498182b7b5db6132dd614bd849dd2e Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 00:21:46 +0200
Subject: [PATCH 122/247] fix masks float float issues

---
 segment/val.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 9b16c30a0bd4..e3d514df1023 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -253,7 +253,8 @@ def run(
         if cuda:
             im = im.to(device, non_blocking=True)
             targets = targets.to(device)
-            masks = masks.to(device).float()
+            masks = masks.to(device)
+        masks = masks.float()
         im = im.half() if half else im.float()  # uint8 to fp16/32
         im /= 255  # 0 - 255 to 0.0 - 1.0
         nb, _, height, width = im.shape  # batch size, channels, height, width
@@ -299,7 +300,7 @@ def run(
 
             # deal with masks
             midx = [si] if overlap else targets[:, 0] == si
-            gt_masks = masks[midx].float()
+            gt_masks = masks[midx]
             proto_out = train_out[1][si]
             pred_masks = process(proto_out, pred[:, 6:], pred[:, :4],
                                  shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()

From bdb79e4bf34c434e8603e97a4f1964d796c48cc1 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 00:47:22 +0200
Subject: [PATCH 123/247] suppress errors

---
 utils/segment/plots.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 9367ad80b404..724ef7b6e7f8 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -1,3 +1,4 @@
+import contextlib
 import math
 from pathlib import Path
 
@@ -137,7 +138,8 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                         mask = mask.astype(np.bool)
                     else:
                         mask = image_masks[j].astype(np.bool)
-                    im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
+                    with contextlib.suppress(Exception):
+                        im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
             annotator.fromarray(im)
     annotator.im.save(fname)  # save
 
@@ -149,12 +151,12 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     ax = ax.ravel()
     files = list(save_dir.glob("results*.csv"))
     assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
-    for _, f in enumerate(files):
+    for f in files:
         try:
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11],)
+                0.1 * data.values[:, 11], )
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
@@ -163,14 +165,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
                 ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
                 if best:
                     # best
-                    ax[i].scatter(
-                        index,
-                        y[index],
-                        color="r",
-                        label=f"best:{index}",
-                        marker="*",
-                        linewidth=3,
-                    )
+                    ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3)
                     ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
                 else:
                     # last

From e66c15cecff07dc5c71f27e3dc3565b28d6765b9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Aug 2022 22:48:01 +0000
Subject: [PATCH 124/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/plots.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 724ef7b6e7f8..986b334f6606 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -156,7 +156,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11], )
+                0.1 * data.values[:, 11],)
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From d6979baa5197c75b739e089095458f3921a4aaa9 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 02:00:25 +0200
Subject: [PATCH 125/247] fix no-predictions plotting bug

---
 segment/val.py         |  6 +++---
 utils/segment/plots.py | 48 ++++++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index e3d514df1023..96c8354428dd 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -343,9 +343,9 @@ def run(
                     mode="bilinear",
                     align_corners=False,
                 ).squeeze(0)
-            plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg',
-                                  names)  # labels
-            plot_masks = torch.cat(plot_masks, dim=0)
+            plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)
+            if any(plot_masks):
+                plot_masks = torch.cat(plot_masks, dim=0)
             plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths,
                                   save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 986b334f6606..280e6e6fe05a 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -96,15 +96,16 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
             j = targets[:, 0] == i
             ti = targets[j]  # image targets
 
-            if masks.max() > 1.0:  # mean that masks are overlap
-                image_masks = masks[[i]]  # (1, 640, 640)
-                # convert masks (1, 640, 640) -> (n, 640, 640)
-                nl = len(ti)
-                index = np.arange(nl).reshape(nl, 1, 1) + 1
-                image_masks = np.repeat(image_masks, nl, axis=0)
-                image_masks = np.where(image_masks == index, 1.0, 0.0)
-            else:
-                image_masks = masks[j]
+            if any(masks):
+                if masks.max() > 1.0:  # mean that masks are overlap
+                    image_masks = masks[[i]]  # (1, 640, 640)
+                    # convert masks (1, 640, 640) -> (n, 640, 640)
+                    nl = len(ti)
+                    index = np.arange(nl).reshape(nl, 1, 1) + 1
+                    image_masks = np.repeat(image_masks, nl, axis=0)
+                    image_masks = np.where(image_masks == index, 1.0, 0.0)
+                else:
+                    image_masks = masks[j]
 
             boxes = xywh2xyxy(ti[:, 2:6]).T
             classes = ti[:, 1].astype('int')
@@ -128,19 +129,20 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     annotator.box_label(box, label, color=color)
 
             # Plot masks
-            im = np.asarray(annotator.im).copy()
-            for j, box in enumerate(boxes.T.tolist()):
-                if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    color = colors(classes[j])
-                    if scale < 1:
-                        mask = image_masks[j].astype(np.uint8)
-                        mask = cv2.resize(mask, (w, h))
-                        mask = mask.astype(np.bool)
-                    else:
-                        mask = image_masks[j].astype(np.bool)
-                    with contextlib.suppress(Exception):
-                        im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
-            annotator.fromarray(im)
+            if any(masks):
+                im = np.asarray(annotator.im).copy()
+                for j, box in enumerate(boxes.T.tolist()):
+                    if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                        color = colors(classes[j])
+                        if scale < 1:
+                            mask = image_masks[j].astype(np.uint8)
+                            mask = cv2.resize(mask, (w, h))
+                            mask = mask.astype(np.bool)
+                        else:
+                            mask = image_masks[j].astype(np.bool)
+                        with contextlib.suppress(Exception):
+                            im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
+                annotator.fromarray(im)
     annotator.im.save(fname)  # save
 
 
@@ -156,7 +158,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11],)
+                0.1 * data.values[:, 11], )
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From 034b1a63c6a592a9c6df6e7ea0791a341ae4f3c2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 Aug 2022 00:02:12 +0000
Subject: [PATCH 126/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/plots.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 280e6e6fe05a..ef7940f2dad1 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -158,7 +158,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11], )
+                0.1 * data.values[:, 11],)
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From 5035ebd3974751ed1537e994a50b6a9c20b52d68 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 02:38:54 +0200
Subject: [PATCH 127/247] Add CSV Logger

---
 utils/loggers/__init__.py | 14 +++++++++++---
 utils/segment/plots.py    | 22 ++++++++++------------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index a59dbd31c073..42673d211f8d 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -245,6 +245,7 @@ def __init__(self, opt, console_logger, include=('tb', 'wandb')):
         self.save_dir = Path(opt.save_dir)
         self.include = include
         self.console_logger = console_logger
+        self.csv = self.save_dir / 'results.csv'  # CSV logger
         if 'tb' in self.include:
             prefix = colorstr('TensorBoard: ')
             self.console_logger.info(
@@ -258,14 +259,21 @@ def __init__(self, opt, console_logger, include=('tb', 'wandb')):
         else:
             self.wandb = None
 
-    def log_metrics(self, metrics_dict, epoch):
+    def log_metrics(self, metrics, epoch):
         # Log metrics dictionary to all loggers
+        if self.csv:
+            keys, vals = list(metrics.keys()), list(metrics.values())
+            n = len(metrics) + 1  # number of cols
+            s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n')  # header
+            with open(self.csv, 'a') as f:
+                f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
+
         if self.tb:
-            for k, v in metrics_dict.items():
+            for k, v in metrics.items():
                 self.tb.add_scalar(k, v, epoch)
 
         if self.wandb:
-            self.wandb.log(metrics_dict, step=epoch)
+            self.wandb.log(metrics, step=epoch)
 
     def log_images(self, files, name='Images', epoch=0):
         # Log images to all loggers
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 280e6e6fe05a..e9ee819c2fe7 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -96,17 +96,6 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
             j = targets[:, 0] == i
             ti = targets[j]  # image targets
 
-            if any(masks):
-                if masks.max() > 1.0:  # mean that masks are overlap
-                    image_masks = masks[[i]]  # (1, 640, 640)
-                    # convert masks (1, 640, 640) -> (n, 640, 640)
-                    nl = len(ti)
-                    index = np.arange(nl).reshape(nl, 1, 1) + 1
-                    image_masks = np.repeat(image_masks, nl, axis=0)
-                    image_masks = np.where(image_masks == index, 1.0, 0.0)
-                else:
-                    image_masks = masks[j]
-
             boxes = xywh2xyxy(ti[:, 2:6]).T
             classes = ti[:, 1].astype('int')
             labels = ti.shape[1] == 6  # labels if no conf column
@@ -129,7 +118,16 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     annotator.box_label(box, label, color=color)
 
             # Plot masks
-            if any(masks):
+            if len(masks):
+                if masks.max() > 1.0:  # mean that masks are overlap
+                    image_masks = masks[[i]]  # (1, 640, 640)
+                    nl = len(ti)
+                    index = np.arange(nl).reshape(nl, 1, 1) + 1
+                    image_masks = np.repeat(image_masks, nl, axis=0)
+                    image_masks = np.where(image_masks == index, 1.0, 0.0)
+                else:
+                    image_masks = masks[j]
+
                 im = np.asarray(annotator.im).copy()
                 for j, box in enumerate(boxes.T.tolist()):
                     if labels or conf[j] > 0.25:  # 0.25 conf thresh

From be5a244be9a3ce470ccdc3468e26b96876684e0b Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 02:40:24 +0200
Subject: [PATCH 128/247] fix val len(plot_masks)

---
 segment/val.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index 96c8354428dd..12f7c9fc3476 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -344,7 +344,7 @@ def run(
                     align_corners=False,
                 ).squeeze(0)
             plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)
-            if any(plot_masks):
+            if len(plot_masks):
                 plot_masks = torch.cat(plot_masks, dim=0)
             plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths,
                                   save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred

From 4fad59cfb45253a651dd536bd468fd09ff09ffa9 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Wed, 24 Aug 2022 10:52:53 +0800
Subject: [PATCH 129/247] speed up evaluation

---
 segment/val.py         | 19 ++++++++-----------
 utils/segment/plots.py |  3 ++-
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 12f7c9fc3476..d014131b7ddd 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -122,6 +122,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
             mode="bilinear",
             align_corners=False,
         ).squeeze(0)
+        gt_masks = gt_masks.gt_(0.5)
 
     iou = mask_iou(
         gt_masks.view(gt_masks.shape[0], -1),
@@ -171,7 +172,7 @@ def run(
         mask_downsample_ratio=1,
         compute_loss=None,
 ):
-    process = process_mask_upsample if plots else process_mask
+    process = process_mask_upsample if save_json else process_mask
     # Initialize/load model and set device
     training = model is not None
     if training:  # called by train.py
@@ -304,9 +305,6 @@ def run(
             proto_out = train_out[1][si]
             pred_masks = process(proto_out, pred[:, 6:], pred[:, :4],
                                  shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()
-            if plots and batch_i < 3:
-                # filter top 15 to plot
-                plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu())
 
             # Predictions
             if single_cls:
@@ -326,6 +324,12 @@ def run(
             stats.append(
                 (correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
 
+            # convert pred_masks to uint8
+            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
+            if plots and batch_i < 3:
+                # filter top 15 to plot
+                plot_masks.append(pred_masks[:15].cpu())
+
             # Save/log
             if save_txt:
                 save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
@@ -336,13 +340,6 @@ def run(
 
         # Plot images
         if plots and batch_i < 3:
-            if masks.shape[1:] != im.shape[2:]:
-                masks = F.interpolate(
-                    masks.unsqueeze(0).float(),
-                    im.shape[2:],
-                    mode="bilinear",
-                    align_corners=False,
-                ).squeeze(0)
             plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)
             if len(plot_masks):
                 plot_masks = torch.cat(plot_masks, dim=0)
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 11b7081f4995..6303103ed084 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -132,7 +132,8 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                 for j, box in enumerate(boxes.T.tolist()):
                     if labels or conf[j] > 0.25:  # 0.25 conf thresh
                         color = colors(classes[j])
-                        if scale < 1:
+                        mh, mw = image_masks[j].shape[:2]
+                        if mh != h or mw != w:
                             mask = image_masks[j].astype(np.uint8)
                             mask = cv2.resize(mask, (w, h))
                             mask = mask.astype(np.bool)

From ce6d849cf536e11172e95a45102c40c48612bf16 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Wed, 24 Aug 2022 11:26:31 +0800
Subject: [PATCH 130/247] fix process_mask

---
 utils/segment/general.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index fe4898b2cdd4..80286e3fd94b 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -44,7 +44,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     """
 
     c, mh, mw = proto_out.shape  # CHW
-    masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw)
+    masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw)
     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
     masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes)  # HWC
     return masks.gt_(0.5)
@@ -63,7 +63,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
 
     c, mh, mw = proto_out.shape  # CHW
     ih, iw = shape
-    masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
+    masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
 
     downsampled_bboxes = bboxes.clone()
     downsampled_bboxes[:, 0] *= mw / iw

From 61212a6a22aab6965e28dae25a5ec841965031eb Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Wed, 24 Aug 2022 11:26:40 +0800
Subject: [PATCH 131/247] fix plots

---
 utils/segment/plots.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 6303103ed084..4517ff455cba 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -93,8 +93,8 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
         if paths:
             annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
         if len(targets) > 0:
-            j = targets[:, 0] == i
-            ti = targets[j]  # image targets
+            idx = targets[:, 0] == i
+            ti = targets[idx]  # image targets
 
             boxes = xywh2xyxy(ti[:, 2:6]).T
             classes = ti[:, 1].astype('int')
@@ -126,13 +126,13 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     image_masks = np.repeat(image_masks, nl, axis=0)
                     image_masks = np.where(image_masks == index, 1.0, 0.0)
                 else:
-                    image_masks = masks[j]
+                    image_masks = masks[idx]
 
                 im = np.asarray(annotator.im).copy()
                 for j, box in enumerate(boxes.T.tolist()):
                     if labels or conf[j] > 0.25:  # 0.25 conf thresh
                         color = colors(classes[j])
-                        mh, mw = image_masks[j].shape[:2]
+                        mh, mw = image_masks[j].shape
                         if mh != h or mw != w:
                             mask = image_masks[j].astype(np.uint8)
                             mask = cv2.resize(mask, (w, h))

From c00c632da2afcc310c6f7861238d43a1c0524923 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 12:54:59 +0200
Subject: [PATCH 132/247] update segment/utils build_targets

---
 utils/segment/loss.py | 65 ++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index bff4b25ca867..c8bdc6a36dac 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
-from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):
@@ -39,6 +39,7 @@ def __init__(self, model, autobalance=False, overlap=False):
         self.overlap = overlap
         device = next(model.parameters()).device  # get model device
         h = model.hyp  # hyperparameters
+        self.device = device
 
         # Define criteria
         BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device))
@@ -190,58 +191,49 @@ def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], []
-        gain = torch.ones(8, device=targets.device)  # normalized to gridspace gain
-        ai = (torch.arange(na, device=targets.device).float().view(na, 1).repeat(1,
-                                                                                 nt))  # same as .repeat_interleave(nt)
+        gain = torch.ones(8, device=self.device)  # normalized to gridspace gain
+        ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
         if self.overlap:
             batch = p[0].shape[0]
             ti = []
             for i in range(batch):
-                # find number of targets of each image
-                num = (targets[:, 0] == i).sum()
-                # (na, num)
-                ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1)
-            # (na, nt)
-            ti = torch.cat(ti, 1)
+                num = (targets[:, 0] == i).sum()  # find number of targets of each image
+                ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1)  # (na, num)
+            ti = torch.cat(ti, 1)  # (na, nt)
         else:
-            ti = (torch.arange(nt, device=targets.device).float().view(1,
-                                                                       nt).repeat(na,
-                                                                                  1))  # same as .repeat_interleave(nt)
-
-        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2)  # append anchor indices
+            ti = torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2)  # append anchor indices
 
         g = 0.5  # bias
-        off = (
-            torch.tensor(
-                [
-                    [0, 0],
-                    [1, 0],
-                    [0, 1],
-                    [-1, 0],
-                    [0, -1],  # j,k,l,m
-                    # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-                ],
-                device=targets.device,
-            ).float() * g)  # offsets
+        off = torch.tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            device=self.device).float() * g  # offsets
 
         for i in range(self.nl):
             anchors, shape = self.anchors[i], p[i].shape
             gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
 
             # Match targets to anchors
-            t = targets * gain
+            t = targets * gain  # shape(3,n,7)
             if nt:
                 # Matches
-                r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
-                j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"]  # compare
+                r = t[..., 4:6] / anchors[:, None]  # wh ratio
+                j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t']  # compare
                 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
                 t = t[j]  # filter
 
                 # Offsets
                 gxy = t[:, 2:4]  # grid xy
                 gxi = gain[[2, 3]] - gxy  # inverse
-                j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T
-                l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T
+                j, k = ((gxy % 1 < g) & (gxy > 1)).T
+                l, m = ((gxi % 1 < g) & (gxi > 1)).T
                 j = torch.stack((torch.ones_like(j), j, k, l, m))
                 t = t.repeat((5, 1, 1))[j]
                 offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
@@ -250,15 +242,12 @@ def build_targets(self, p, targets):
                 offsets = 0
 
             # Define
-            b, c = t[:, :2].long().T  # image, class
-            gxy = t[:, 2:4]  # grid xy
-            gwh = t[:, 4:6]  # grid wh
+            bc, gxy, gwh, at = t.chunk(4, 1)  # (image, class), grid xy, grid wh, anchors
+            (a, tidx), (b, c) = at.long().T, bc.long().T  # anchors, image, class
             gij = (gxy - offsets).long()
-            gi, gj = gij.T  # grid xy indices
+            gi, gj = gij.T  # grid indices
 
             # Append
-            a = t[:, 6].long()  # anchor indices
-            tidx = t[:, 7].long()
             indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))  # image, anchor, grid
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors

From bd277b76d1104e7bea962c38c17016c34a4b0706 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 Aug 2022 10:57:43 +0000
Subject: [PATCH 133/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index c8bdc6a36dac..94f64dfcc3a7 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
+from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):

From c37820f4959273551107bf8d32997820e47a9c19 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 14:02:47 +0200
Subject: [PATCH 134/247] optimize utils/segment/general crop()

---
 utils/segment/general.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index 80286e3fd94b..e9a5c904fdc1 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -12,24 +12,20 @@ def crop(masks, boxes):
         - masks should be a size [h, w, n] tensor of masks
         - boxes should be a size [n, 4] tensor of bbox coords in relative point form
     """
-    h, w, n = masks.size()
-    x1, x2 = boxes[:, 0], boxes[:, 2]
-    y1, y2 = boxes[:, 1], boxes[:, 3]
+    h, w, n = masks.shape
+    x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1)  # x1 shape(1,1,n)
 
-    rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n))
-    cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n))
+    rows = torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n)  # shape(h,w,n)
+    cols = torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n)  # shape(h,w,n)
 
     # (1, w, 1), (1, 1, n) -> (1, w, n)
-    masks_left = rows >= x1.view(1, 1, -1)
-    masks_right = rows < x2.view(1, 1, -1)
+    masks_left = rows >= x1  # shape(h,w,n)
+    masks_right = rows < x2  # shape(h,w,n)
     # (h, 1, 1), (1, 1, n) -> (h, 1, n)
-    masks_up = cols >= y1.view(1, 1, -1)
-    masks_down = cols < y2.view(1, 1, -1)
+    masks_up = cols >= y1  # shape(h,w,n)
+    masks_down = cols < y2  # shape(h,w,n)
 
-    # (h, w, n)
-    crop_mask = masks_left * masks_right * masks_up * masks_down
-
-    return masks * crop_mask.float()
+    return masks * (masks_left * masks_right * masks_up * masks_down).float()
 
 
 def process_mask_upsample(proto_out, out_masks, bboxes, shape):

From bf9e19af3c56a2a7af73d69a7c58ca37e1490ced Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 15:30:17 +0200
Subject: [PATCH 135/247] optimize utils/segment/general crop() 2

---
 utils/segment/general.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index e9a5c904fdc1..cc8cc2997541 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -12,20 +12,12 @@ def crop(masks, boxes):
         - masks should be a size [h, w, n] tensor of masks
         - boxes should be a size [n, 4] tensor of bbox coords in relative point form
     """
+
     h, w, n = masks.shape
     x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1)  # x1 shape(1,1,n)
-
-    rows = torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n)  # shape(h,w,n)
-    cols = torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n)  # shape(h,w,n)
-
-    # (1, w, 1), (1, 1, n) -> (1, w, n)
-    masks_left = rows >= x1  # shape(h,w,n)
-    masks_right = rows < x2  # shape(h,w,n)
-    # (h, 1, 1), (1, 1, n) -> (h, 1, n)
-    masks_up = cols >= y1  # shape(h,w,n)
-    masks_down = cols < y2  # shape(h,w,n)
-
-    return masks * (masks_left * masks_right * masks_up * masks_down).float()
+    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None]  # rows shape(1,w,1)
+    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None]  # cols shape(h,1,1)
+    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)).float()
 
 
 def process_mask_upsample(proto_out, out_masks, bboxes, shape):

From d276b6791189589bdb0552165511ba5aba20220e Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 16:00:15 +0200
Subject: [PATCH 136/247] minor updates

---
 segment/val.py         |  2 +-
 utils/segment/loss.py  | 15 ++++++---------
 utils/segment/plots.py |  2 +-
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index d014131b7ddd..7f8361a53c24 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -113,7 +113,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
         nl = len(labels)
         index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
         gt_masks = gt_masks.repeat(nl, 1, 1)
-        gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+        gt_masks = torch.where(gt_masks == index)
 
     if gt_masks.shape[1:] != pred_masks.shape[1:]:
         gt_masks = F.interpolate(
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 94f64dfcc3a7..5aa617ae2065 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
-from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):
@@ -122,9 +122,9 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 # Mask Regression
                 # TODO:
                 # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w]
-                downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w),
+                downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w),
                                                   mode="bilinear",
-                                                  align_corners=False).squeeze(0)
+                                                  align_corners=False)[0]
 
                 mxywh = xywh[i]
                 mws, mhs = mxywh[:, 2:].T
@@ -138,10 +138,8 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     index = b == bi
                     if self.overlap:
                         mask_index = tidxs[i][index]
-                        # h, w, n
-                        mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum())
-                        # h, w, n
-                        mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0)
+                        mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum())  # shape(h,w,n)
+                        mask_gti = torch.where(mask_gti == mask_index)  # shape(h,w,n)
                     else:
                         mask_gti = downsampled_masks[tidxs[i]][index]
                         mask_gti = mask_gti.permute(1, 2, 0).contiguous()
@@ -151,8 +149,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     psi = ps[index][:, 5:self.nm]
                     proto = proto_out[bi]
 
-                    one_lseg = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
-                    batch_lseg += one_lseg
+                    batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
 
                     # # update tobj
                     # iou = iou.detach().clamp(0).type(tobj.dtype)
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 4517ff455cba..901d55888f44 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -124,7 +124,7 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     nl = len(ti)
                     index = np.arange(nl).reshape(nl, 1, 1) + 1
                     image_masks = np.repeat(image_masks, nl, axis=0)
-                    image_masks = np.where(image_masks == index, 1.0, 0.0)
+                    image_masks = np.where(image_masks == index)
                 else:
                     image_masks = masks[idx]
 

From 854f728c1cc96ebae2237ce345ec256f84f8309e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 Aug 2022 14:00:40 +0000
Subject: [PATCH 137/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/loss.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 5aa617ae2065..bd9501205038 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
+from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):
@@ -122,8 +122,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 # Mask Regression
                 # TODO:
                 # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w]
-                downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w),
-                                                  mode="bilinear",
+                downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear",
                                                   align_corners=False)[0]
 
                 mxywh = xywh[i]

From f1a533d207894c71804bd090ee8bb41c1bf28326 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 16:47:29 +0200
Subject: [PATCH 138/247] torch.where revert

---
 segment/val.py         | 2 +-
 utils/segment/loss.py  | 2 +-
 utils/segment/plots.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 7f8361a53c24..d014131b7ddd 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -113,7 +113,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
         nl = len(labels)
         index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
         gt_masks = gt_masks.repeat(nl, 1, 1)
-        gt_masks = torch.where(gt_masks == index)
+        gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
 
     if gt_masks.shape[1:] != pred_masks.shape[1:]:
         gt_masks = F.interpolate(
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index bd9501205038..13c62ab274eb 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -138,7 +138,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     if self.overlap:
                         mask_index = tidxs[i][index]
                         mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum())  # shape(h,w,n)
-                        mask_gti = torch.where(mask_gti == mask_index)  # shape(h,w,n)
+                        mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0)  # shape(h,w,n)
                     else:
                         mask_gti = downsampled_masks[tidxs[i]][index]
                         mask_gti = mask_gti.permute(1, 2, 0).contiguous()
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 901d55888f44..4517ff455cba 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -124,7 +124,7 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     nl = len(ti)
                     index = np.arange(nl).reshape(nl, 1, 1) + 1
                     image_masks = np.repeat(image_masks, nl, axis=0)
-                    image_masks = np.where(image_masks == index)
+                    image_masks = np.where(image_masks == index, 1.0, 0.0)
                 else:
                     image_masks = masks[idx]
 

From e0e256d7886edddb9b2723204dd7a160db660d35 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 17:03:19 +0200
Subject: [PATCH 139/247] downsample only if different shape

---
 utils/segment/loss.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 13c62ab274eb..f757fde30c30 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
-from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):
@@ -120,10 +120,9 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     lcls += self.BCEcls(ps[:, self.nm:], t)  # BCE
 
                 # Mask Regression
-                # TODO:
-                # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w]
-                downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear",
-                                                  align_corners=False)[0]
+                if tuple(masks.shape[-2:]) != (mask_h, mask_w):
+                    # downsample shape(bs * num_objs,img_h,img_w) -> (bs * num_objs,mask_h,mask_w)
+                    masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0]
 
                 mxywh = xywh[i]
                 mws, mhs = mxywh[:, 2:].T
@@ -137,10 +136,10 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     index = b == bi
                     if self.overlap:
                         mask_index = tidxs[i][index]
-                        mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum())  # shape(h,w,n)
+                        mask_gti = masks[bi][:, :, None].repeat(1, 1, index.sum())  # shape(h,w,n)
                         mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0)  # shape(h,w,n)
                     else:
-                        mask_gti = downsampled_masks[tidxs[i]][index]
+                        mask_gti = masks[tidxs[i]][index]
                         mask_gti = mask_gti.permute(1, 2, 0).contiguous()
 
                     mw, mh = mws[index], mhs[index]

From da11068ea9dade8958574ca3b85b16037c07ce54 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 Aug 2022 15:03:52 +0000
Subject: [PATCH 140/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index f757fde30c30..4469cb7658a7 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import is_parallel
+from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):

From 95a999434ec200fa7a3bc41e83a38d517a873a2f Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 17:30:18 +0200
Subject: [PATCH 141/247] loss cleanup

---
 utils/segment/loss.py | 40 ++++++++++++++--------------------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index f757fde30c30..473e2f65dad5 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -45,8 +45,6 @@ def __init__(self, model, autobalance=False, overlap=False):
         BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device))
         BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["obj_pw"]], device=device))
 
-        self.mask_loss = MaskIOULoss()
-
         # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
         self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0))  # positive, negative BCE targets
 
@@ -58,13 +56,8 @@ def __init__(self, model, autobalance=False, overlap=False):
         det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
         self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
         self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
-        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (
-            BCEcls,
-            BCEobj,
-            1.0,
-            h,
-            autobalance,
-        )
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance,
+        self.mask_loss = MaskIOULoss()
         for k in "na", "nc", "nl", "anchors", "nm":
             if hasattr(det, k):
                 setattr(self, k, getattr(det, k))
@@ -76,13 +69,12 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
         mask_h, mask_w = proto_out.shape[2:]
         proto_out = proto_out.permute(0, 2, 3, 1)
 
-        device = targets.device
-        lcls, lbox, lobj, lseg = (
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-            torch.zeros(1, device=device),
-        )
+        device = self.device
+        lcls = torch.zeros(1, device=device)
+        lbox = torch.zeros(1, device=device)
+        lobj = torch.zeros(1, device=device)
+        lseg = torch.zeros(1, device=device)
+
         tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets)  # targets
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
@@ -101,17 +93,13 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
-                score_iou = iou.detach().clamp(0).type(tobj.dtype)
+                iou = iou.detach().clamp(0).type(tobj.dtype)
                 if self.sort_obj_iou:
-                    sort_id = torch.argsort(score_iou)
-                    b, a, gj, gi, score_iou = (
-                        b[sort_id],
-                        a[sort_id],
-                        gj[sort_id],
-                        gi[sort_id],
-                        score_iou[sort_id],
-                    )
-                tobj[b, a, gj, gi] = 1.0 * ((1.0 - self.gr) + self.gr * score_iou)  # iou ratio
+                    j = iou.argsort()
+                    b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
+                if self.gr < 1:
+                    iou = (1.0 - self.gr) + self.gr * iou
+                tobj[b, a, gj, gi] = iou  # iou ratio
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)

From 868385f915e240f0c4306b2b527cd1bcabab6ada Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 19:00:03 +0200
Subject: [PATCH 142/247] loss cleanup 2

---
 utils/segment/loss.py | 60 +++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 5c37f5d19ca2..7d2880712d29 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
-from ..torch_utils import is_parallel
-from .general import crop, masks_iou
+from ..torch_utils import de_parallel
 
 
 class MaskIOULoss(nn.Module):
@@ -42,52 +42,52 @@ def __init__(self, model, autobalance=False, overlap=False):
         self.device = device
 
         # Define criteria
-        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device))
-        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["obj_pw"]], device=device))
+        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
+        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
 
         # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
-        self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0))  # positive, negative BCE targets
+        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
 
         # Focal loss
-        g = h["fl_gamma"]  # focal loss gamma
+        g = h['fl_gamma']  # focal loss gamma
         if g > 0:
             BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
 
-        det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
-        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
-        self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
-        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance,
+        m = de_parallel(model).model[-1]  # Detect() module
+        self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.ssi = list(m.stride).index(16) if autobalance else 0  # stride 16 index
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
         self.mask_loss = MaskIOULoss()
-        for k in "na", "nc", "nl", "anchors", "nm":
-            if hasattr(det, k):
-                setattr(self, k, getattr(det, k))
+        self.na = m.na  # number of anchors
+        self.nc = m.nc  # number of classes
+        self.nl = m.nl  # number of layers
+        self.nm = m.nm  # number of masks
+        self.anchors = m.anchors
+        self.device = device
 
     def __call__(self, preds, targets, masks):  # predictions, targets, model
-        p = preds[0]
-        # [batch-size, mask_dim, mask_hegiht, mask_width]
-        proto_out = preds[1]
+        p, proto_out = preds  # proto_out shape(bs, masks, mask_h, mask_w)
         mask_h, mask_w = proto_out.shape[2:]
         proto_out = proto_out.permute(0, 2, 3, 1)
 
-        device = self.device
-        lcls = torch.zeros(1, device=device)
-        lbox = torch.zeros(1, device=device)
-        lobj = torch.zeros(1, device=device)
-        lseg = torch.zeros(1, device=device)
-
+        lcls = torch.zeros(1, device=self.device)
+        lbox = torch.zeros(1, device=self.device)
+        lobj = torch.zeros(1, device=self.device)
+        lseg = torch.zeros(1, device=self.device)
         tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets)  # targets
+
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
             b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
-            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
+            tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device)  # target obj
 
             n = b.shape[0]  # number of targets
             if n:
-                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+                pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, 32, self.nc), 1)  # subset of predictions
 
                 # Regression
-                pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
-                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                pxy = pxy.sigmoid() * 2 - 0.5
+                pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
                 iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze()  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
@@ -103,9 +103,9 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, self.nm:], self.cn, device=device)  # targets
+                    t = torch.full_like(pcls, self.cn, device=self.device)  # targets
                     t[range(n), tcls[i]] = self.cp
-                    lcls += self.BCEcls(ps[:, self.nm:], t)  # BCE
+                    lcls += self.BCEcls(pcls, t)  # BCE
 
                 # Mask Regression
                 if tuple(masks.shape[-2:]) != (mask_h, mask_w):
@@ -119,7 +119,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                           torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device))
                 mxyxys = xywh2xyxy(mxywhs)
 
-                batch_lseg = torch.zeros(1, device=device)
+                batch_lseg = torch.zeros(1, device=self.device)
                 for bi in b.unique():
                     index = b == bi
                     if self.overlap:
@@ -132,7 +132,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
                     mw, mh = mws[index], mhs[index]
                     mxyxy = mxyxys[index]
-                    psi = ps[index][:, 5:self.nm]
+                    psi = pmask[index]
                     proto = proto_out[bi]
 
                     batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)

From 226c96ae866e9b3898f6d603bb0a070253e9283f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 Aug 2022 17:00:41 +0000
Subject: [PATCH 143/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 7d2880712d29..f01ef1683399 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -2,11 +2,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .general import crop, masks_iou
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import de_parallel
+from .general import crop, masks_iou
 
 
 class MaskIOULoss(nn.Module):

From 5b52c941126d2b833e25b2f14dfc1db2feab19dc Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 19:12:07 +0200
Subject: [PATCH 144/247] loss cleanup 3

---
 utils/segment/loss.py | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index f01ef1683399..719424478621 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -66,9 +66,9 @@ def __init__(self, model, autobalance=False, overlap=False):
         self.device = device
 
     def __call__(self, preds, targets, masks):  # predictions, targets, model
-        p, proto_out = preds  # proto_out shape(bs, masks, mask_h, mask_w)
-        mask_h, mask_w = proto_out.shape[2:]
-        proto_out = proto_out.permute(0, 2, 3, 1)
+        p, proto = preds
+        bs, nm, mask_h, mask_w = proto.shape  # proto shape(bs, mask_h, mask_w, num_masks)
+        proto = proto.permute(0, 2, 3, 1)
 
         lcls = torch.zeros(1, device=self.device)
         lbox = torch.zeros(1, device=self.device)
@@ -83,7 +83,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
             n = b.shape[0]  # number of targets
             if n:
-                pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, 32, self.nc), 1)  # subset of predictions
+                pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, nm, self.nc), 1)  # subset of predictions
 
                 # Regression
                 pxy = pxy.sigmoid() * 2 - 0.5
@@ -121,21 +121,16 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
                 batch_lseg = torch.zeros(1, device=self.device)
                 for bi in b.unique():
-                    index = b == bi
+                    j = b == bi  # matching index
                     if self.overlap:
-                        mask_index = tidxs[i][index]
-                        mask_gti = masks[bi][:, :, None].repeat(1, 1, index.sum())  # shape(h,w,n)
+                        mask_index = tidxs[i][j]
+                        mask_gti = masks[bi][:, :, None].repeat(1, 1, j.sum())  # shape(h,w,n)
                         mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0)  # shape(h,w,n)
                     else:
-                        mask_gti = masks[tidxs[i]][index]
+                        mask_gti = masks[tidxs[i]][j]
                         mask_gti = mask_gti.permute(1, 2, 0).contiguous()
 
-                    mw, mh = mws[index], mhs[index]
-                    mxyxy = mxyxys[index]
-                    psi = pmask[index]
-                    proto = proto_out[bi]
-
-                    batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
+                    batch_lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j])
 
                     # # update tobj
                     # iou = iou.detach().clamp(0).type(tobj.dtype)

From cf40b17375c22b05ca6b644853a4847654cd2126 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 19:24:19 +0200
Subject: [PATCH 145/247] update project names

---
 segment/train.py | 4 ++--
 segment/val.py   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index 55b9c53a7ef8..223e331db6ab 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -499,7 +499,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
 
 def parse_opt(known=False):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s-seg.pt', help='initial weights path')
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
     parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
@@ -522,7 +522,7 @@ def parse_opt(known=False):
     parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
     parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
     parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
-    parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save results to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
diff --git a/segment/val.py b/segment/val.py
index d014131b7ddd..ec53f589e8cc 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -445,7 +445,7 @@ def run(
 def parse_opt():
     parser = argparse.ArgumentParser()
     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model.pt path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)')
     parser.add_argument('--batch-size', type=int, default=8, help='batch size')
     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=320, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
@@ -460,7 +460,7 @@ def parse_opt():
     parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
     parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
     parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
-    parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/val-seg', help='save results to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')

From dae6549e200096feb49cd47938b299cf00cbcde6 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 21:21:39 +0200
Subject: [PATCH 146/247] Rename -seg yamls from _underscore to -dash

---
 models/segment/{yolov5l_seg.yaml => yolov5l-seg.yaml} | 0
 models/segment/{yolov5m_seg.yaml => yolov5m-seg.yaml} | 0
 models/segment/{yolov5n_seg.yaml => yolov5n-seg.yaml} | 0
 models/segment/{yolov5s_seg.yaml => yolov5s-seg.yaml} | 0
 models/segment/{yolov5x_seg.yaml => yolov5x-seg.yaml} | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename models/segment/{yolov5l_seg.yaml => yolov5l-seg.yaml} (100%)
 rename models/segment/{yolov5m_seg.yaml => yolov5m-seg.yaml} (100%)
 rename models/segment/{yolov5n_seg.yaml => yolov5n-seg.yaml} (100%)
 rename models/segment/{yolov5s_seg.yaml => yolov5s-seg.yaml} (100%)
 rename models/segment/{yolov5x_seg.yaml => yolov5x-seg.yaml} (100%)

diff --git a/models/segment/yolov5l_seg.yaml b/models/segment/yolov5l-seg.yaml
similarity index 100%
rename from models/segment/yolov5l_seg.yaml
rename to models/segment/yolov5l-seg.yaml
diff --git a/models/segment/yolov5m_seg.yaml b/models/segment/yolov5m-seg.yaml
similarity index 100%
rename from models/segment/yolov5m_seg.yaml
rename to models/segment/yolov5m-seg.yaml
diff --git a/models/segment/yolov5n_seg.yaml b/models/segment/yolov5n-seg.yaml
similarity index 100%
rename from models/segment/yolov5n_seg.yaml
rename to models/segment/yolov5n-seg.yaml
diff --git a/models/segment/yolov5s_seg.yaml b/models/segment/yolov5s-seg.yaml
similarity index 100%
rename from models/segment/yolov5s_seg.yaml
rename to models/segment/yolov5s-seg.yaml
diff --git a/models/segment/yolov5x_seg.yaml b/models/segment/yolov5x-seg.yaml
similarity index 100%
rename from models/segment/yolov5x_seg.yaml
rename to models/segment/yolov5x-seg.yaml

From 532958539b8756c434d9e13aa665dcd813895b65 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 23:08:43 +0200
Subject: [PATCH 147/247] prepare for yolov5n-seg.pt

---
 .github/workflows/ci-testing.yml | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index eb05e72e95fd..718dd377eafb 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -126,20 +126,23 @@ jobs:
       - name: Test segmentation
         shell: bash  # for Windows compatibility
         run: |
-          m=${{ matrix.model }}-seg  # official weights
-          b=runs/train-seg/exp/weights/best  # best.pt checkpoint
-
+          pip install pycocotools
+          
           d='../datasets' # unzip directory
           f='coco128-segments.zip'
           rm -rf $d
           curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f
 
-          pip install pycocotools
-          m=yolov5s-seg  # official weights
-          python segment/predict.py --imgsz 64 --weights $m.pt --device cpu  # detect
+          m=${{ matrix.model }}-seg  # official weights
+          b=runs/train-seg/exp/weights/best  # best.pt checkpoint
+          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
+          for d in cpu; do  # devices
+            for w in $m $b; do  # weights
+              python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d  # val
+              python segment/predict.py --imgsz 64 --weights $w.pt --device $d  # predict
+            done
+          done
           python export.py --weights $m.pt --img 64 --include torchscript  # export
-          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg yolov5n_seg.yaml --epochs 1 --device cpu  # train
-
       - name: Test classification
         shell: bash  # for Windows compatibility
         run: |

From 5c9306984c58f929612406f89a66e39e58618f03 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 23:09:26 +0200
Subject: [PATCH 148/247] precommit space fix

---
 .github/workflows/ci-testing.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 718dd377eafb..6494a8cf23f8 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -127,7 +127,7 @@ jobs:
         shell: bash  # for Windows compatibility
         run: |
           pip install pycocotools
-          
+
           d='../datasets' # unzip directory
           f='coco128-segments.zip'
           rm -rf $d

From 156c01284d751edba14f01c33b19a25b2292f0d9 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 23:19:17 +0200
Subject: [PATCH 149/247] add coco128-seg.yaml

---
 .github/workflows/ci-testing.yml |   8 +--
 data/coco128-seg.yaml            | 101 +++++++++++++++++++++++++++++++
 segment/train.py                 |   2 +-
 segment/val.py                   |   8 +--
 4 files changed, 107 insertions(+), 12 deletions(-)
 create mode 100644 data/coco128-seg.yaml

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 6494a8cf23f8..5be0884991c9 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -126,13 +126,7 @@ jobs:
       - name: Test segmentation
         shell: bash  # for Windows compatibility
         run: |
-          pip install pycocotools
-
-          d='../datasets' # unzip directory
-          f='coco128-segments.zip'
-          rm -rf $d
-          curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f
-
+          pip install -q pycocotools
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
           python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
diff --git a/data/coco128-seg.yaml b/data/coco128-seg.yaml
new file mode 100644
index 000000000000..db05534c526e
--- /dev/null
+++ b/data/coco128-seg.yaml
@@ -0,0 +1,101 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Example usage: python train.py --data coco128.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128  ← downloads here (7 MB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128-seg  # dataset root dir
+train: images/train2017  # train images (relative to 'path') 128 images
+val: images/train2017  # val images (relative to 'path') 128 images
+test:  # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco128-segments.zip
diff --git a/segment/train.py b/segment/train.py
index 223e331db6ab..ca23b4256317 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -501,7 +501,7 @@ def parse_opt(known=False):
     parser = argparse.ArgumentParser()
     parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s-seg.pt', help='initial weights path')
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path')
     parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
     parser.add_argument('--epochs', type=int, default=300)
     parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
diff --git a/segment/val.py b/segment/val.py
index ec53f589e8cc..884d32ffdf59 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -159,7 +159,7 @@ def run(
         save_hybrid=False,  # save label+prediction hybrid results to *.txt
         save_conf=False,  # save confidences in --save-txt labels
         save_json=False,  # save a COCO-JSON results file
-        project=ROOT / 'runs/val',  # save to project/name
+        project=ROOT / 'runs/val-seg',  # save to project/name
         name='exp',  # save to project/name
         exist_ok=False,  # existing project/name ok, do not increment
         half=True,  # use FP16 half-precision inference
@@ -444,10 +444,10 @@ def run(
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path')
     parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)')
-    parser.add_argument('--batch-size', type=int, default=8, help='batch size')
-    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=320, help='inference size (pixels)')
+    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
     parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
     parser.add_argument('--task', default='val', help='train, val, test, speed or study')

From 92a3ff0497d83dcc42178a19f7e2f84029dc6ff2 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 23:29:20 +0200
Subject: [PATCH 150/247] update coco128-seg comments

---
 data/coco128.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/data/coco128.yaml b/data/coco128.yaml
index 12556736a571..d551e994e379 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -1,10 +1,10 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
 # Example usage: python train.py --data coco128.yaml
 # parent
 # ├── yolov5
 # └── datasets
-#     └── coco128  ← downloads here (7 MB)
+#     └── coco128-seg  ← downloads here (7 MB)
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]

From e2b07026cc4d89bff802b106d3ad56544119762a Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Wed, 24 Aug 2022 23:38:52 +0200
Subject: [PATCH 151/247] cleanup val.py

---
 segment/val.py | 29 ++++++++++++++++-------------
 val.py         |  2 +-
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 884d32ffdf59..ac604cb59575 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -1,21 +1,22 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
-Validate a trained YOLOv5 model accuracy on a custom dataset
+Validate a trained YOLOv5 segment model on a segment dataset
 
 Usage:
-    $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640
+    $ bash data/scripts/get_coco.sh --val --segments  # download COCO-segments val split (1G, 5000 images)
+    $ python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640-  # validate COCO-segments
 
 Usage - formats:
-    $ python path/to/val.py --weights yolov5s.pt                 # PyTorch
-                                      yolov5s.torchscript        # TorchScript
-                                      yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
-                                      yolov5s.xml                # OpenVINO
-                                      yolov5s.engine             # TensorRT
-                                      yolov5s.mlmodel            # CoreML (macOS-only)
-                                      yolov5s_saved_model        # TensorFlow SavedModel
-                                      yolov5s.pb                 # TensorFlow GraphDef
-                                      yolov5s.tflite             # TensorFlow Lite
-                                      yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+    $ python segment/val.py --weights yolov5s-seg.pt                 # PyTorch
+                                      yolov5s-seg.torchscript        # TorchScript
+                                      yolov5s-seg.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                      yolov5s-seg.xml                # OpenVINO
+                                      yolov5s-seg.engine             # TensorRT
+                                      yolov5s-seg.mlmodel            # CoreML (macOS-only)
+                                      yolov5s-seg_saved_model        # TensorFlow SavedModel
+                                      yolov5s-seg.pb                 # TensorFlow GraphDef
+                                      yolov5s-seg.tflite             # TensorFlow Lite
+                                      yolov5s-seg_edgetpu.tflite     # TensorFlow Edge TPU
 """
 
 import argparse
@@ -478,7 +479,9 @@ def main(opt):
 
     if opt.task in ('train', 'val', 'test'):  # run normally
         if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
-            LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️'))
+            LOGGER.info(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️')
+        if opt.save_hybrid:
+            LOGGER.info('WARNING: --save-hybrid will return high mAP from hybrid labels, not from predictions alone ⚠️')
         run(**vars(opt))
 
     else:
diff --git a/val.py b/val.py
index 58b9c9e1bec0..9cacd47b3ab3 100644
--- a/val.py
+++ b/val.py
@@ -331,7 +331,7 @@ def run(
 def parse_opt():
     parser = argparse.ArgumentParser()
     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
     parser.add_argument('--batch-size', type=int, default=32, help='batch size')
     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')

From c4e84f1d552f1e86940a4ad030c07836bbc92afe Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 00:37:01 +0200
Subject: [PATCH 152/247] Major val.py cleanup

---
 segment/val.py | 200 +++++++++++++++++++------------------------------
 val.py         |   2 +-
 2 files changed, 80 insertions(+), 122 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index ac604cb59575..ffe0c7cc5148 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -35,20 +35,24 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
+from models.common import DetectMultiBackend
+from models.yolo import DetectionModel
+from utils.callbacks import Callbacks
 import pycocotools.mask as mask_util
 import torch.nn.functional as F
 
-from models.experimental import attempt_load  # scoped to avoid circular import
-from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml,
-                           coco80_to_coco91_class, colorstr, emojis, increment_path, non_max_suppression, print_args,
+from utils.dataloaders import create_dataloader
+from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
+                           coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
 from utils.plots import output_to_target, plot_val_study
+from utils.torch_utils import smart_inference_mode
 from utils.segment.dataloaders import create_dataloader
 from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
 from utils.segment.plots import plot_images_and_masks
-from utils.torch_utils import de_parallel, select_device, time_sync
+from utils.torch_utils import de_parallel, select_device
 
 
 def save_one_txt(predn, save_conf, shape, file):
@@ -66,20 +70,17 @@ def save_one_json(predn, jdict, path, class_map, pred_masks):
     image_id = int(path.stem) if path.stem.isnumeric() else path.stem
     box = xyxy2xywh(predn[:, :4])  # xywh
     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-
     pred_masks = np.transpose(pred_masks, (2, 0, 1))
     rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
     for rle in rles:
         rle["counts"] = rle["counts"].decode("utf-8")
-
     for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-        pred_dict = {
+        jdict.append({
             'image_id': image_id,
             'category_id': class_map[int(p[5])],
             'bbox': [round(x, 3) for x in b],
-            'score': round(p[4], 5)}
-        pred_dict["segmentation"] = rles[i]
-        jdict.append(pred_dict)
+            'score': round(p[4], 5),
+            'segmentation': rles[i]})
 
 
 def process_batch(detections, labels, iouv):
@@ -117,18 +118,9 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
         gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
 
     if gt_masks.shape[1:] != pred_masks.shape[1:]:
-        gt_masks = F.interpolate(
-            gt_masks.unsqueeze(0),
-            pred_masks.shape[1:],
-            mode="bilinear",
-            align_corners=False,
-        ).squeeze(0)
-        gt_masks = gt_masks.gt_(0.5)
-
-    iou = mask_iou(
-        gt_masks.view(gt_masks.shape[0], -1),
-        pred_masks.view(pred_masks.shape[0], -1),
-    )
+        gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0].gt_(0.5)
+
+    iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
     x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
     if x[0].shape[0]:
         matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
@@ -142,7 +134,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
     return correct
 
 
-@torch.no_grad()
+@smart_inference_mode()
 def run(
         data,
         weights=None,  # model.pt path(s)
@@ -172,6 +164,7 @@ def run(
         overlap=False,
         mask_downsample_ratio=1,
         compute_loss=None,
+        callbacks=Callbacks(),
 ):
     process = process_mask_upsample if save_json else process_mask
     # Initialize/load model and set device
@@ -180,6 +173,7 @@ def run(
         device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
         half &= device.type != 'cpu'  # half precision only supported on CUDA
         model.half() if half else model.float()
+        nm = de_parallel(model).model[-1].mask_dim  # number of masks
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
 
@@ -188,20 +182,15 @@ def run(
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        # model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
-        model = attempt_load(weights, device=device)  # load FP32 model
-        stride = 32
-        pt, jit, engine = True, False, False
-        # stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
         imgsz = check_img_size(imgsz, s=stride)  # check image size
-        # half = model.fp16  # FP16 supported on limited backends with CUDA
-        half = device.type != 'cpu'
-        if half:
-            model.half()
+        half = model.fp16  # FP16 supported on limited backends with CUDA
+        nm = de_parallel(model).model.model[-1].mask_dim if isinstance(model, DetectionModel) else 32  # number of masks
         if engine:
             batch_size = model.batch_size
         else:
-            # device = model.device
+            device = model.device
             if not (pt or jit):
                 batch_size = 1  # export.py models default to batch-size 1
                 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
@@ -220,10 +209,10 @@ def run(
     # Dataloader
     if not training:
         if pt and not single_cls:  # check --weights are trained on --data
-            ncm = model.nc
+            ncm = model.model.nc
             assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
                               f'classes). Pass correct combination of --weights and --data that are trained together.'
-        # model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
+        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
         pad = 0.0 if task in ('speed', 'benchmark') else 0.5
         rect = False if task == 'benchmark' else pt  # square inference for benchmarks
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
@@ -241,31 +230,33 @@ def run(
 
     seen = 0
     confusion_matrix = ConfusionMatrix(nc=nc)
-    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+    names = model.names if hasattr(model, 'names') else model.module.names  # get class names
+    if isinstance(names, (list, tuple)):  # old format
+        names = dict(enumerate(names))
     class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
-    s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R",
-                                  "mAP@.5", "mAP@.5:.95}")
-    dt = [0.0, 0.0, 0.0]
+    s = ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', "R", "mAP50", "mAP50-95)", "Mask(P", "R",
+                                  "mAP50", "mAP50-95)")
+    dt = Profile(), Profile(), Profile()
     metrics = Metrics()
     loss = torch.zeros(4, device=device)
     jdict, stats = [], []
+    # callbacks.run('on_val_start')
     pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
     for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
-        t1 = time_sync()
-        if cuda:
-            im = im.to(device, non_blocking=True)
-            targets = targets.to(device)
-            masks = masks.to(device)
-        masks = masks.float()
-        im = im.half() if half else im.float()  # uint8 to fp16/32
-        im /= 255  # 0 - 255 to 0.0 - 1.0
-        nb, _, height, width = im.shape  # batch size, channels, height, width
-        t2 = time_sync()
-        dt[0] += t2 - t1
+        # callbacks.run('on_val_batch_start')
+        with dt[0]:
+            if cuda:
+                im = im.to(device, non_blocking=True)
+                targets = targets.to(device)
+                masks = masks.to(device)
+            masks = masks.float()
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            nb, _, height, width = im.shape  # batch size, channels, height, width
 
         # Inference
-        out, train_out = model(im)  # if training else model(im, augment=augment, val=True)  # inference, loss outputs
-        dt[1] += time_sync() - t2
+        with dt[1]:
+            out, train_out = model(im)  # if training else model(im, augment=augment, val=True)  # inference, loss
 
         # Loss
         if compute_loss:
@@ -274,19 +265,12 @@ def run(
         # NMS
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
-        t3 = time_sync()
-        out = non_max_suppression(out,
-                                  conf_thres,
-                                  iou_thres,
-                                  labels=lb,
-                                  multi_label=True,
-                                  agnostic=single_cls,
-                                  masks=de_parallel(model).model[-1].mask_dim)
-        dt[2] += time_sync() - t3
-
-        # keep pred masks for plotting
-        plot_masks = []
+        with dt[2]:
+            out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls,
+                                      masks=nm)
+
         # Metrics
+        plot_masks = []  # masks for plotting
         for si, pred in enumerate(out):
             labels = targets[targets[:, 0] == si, 1:]
             nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
@@ -298,14 +282,16 @@ def run(
             if npr == 0:
                 if nl:
                     stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
+                    if plots:
+                        confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
                 continue
 
-            # deal with masks
+            # Masks
             midx = [si] if overlap else targets[:, 0] == si
             gt_masks = masks[midx]
             proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4],
-                                 shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0,
+                                                                                                      1).contiguous().float()
 
             # Predictions
             if single_cls:
@@ -322,43 +308,43 @@ def run(
                 correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap)
                 if plots:
                     confusion_matrix.process_batch(predn, labelsn)
-            stats.append(
-                (correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
+            stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (conf, pcls, tcls)
 
-            # convert pred_masks to uint8
             pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
             if plots and batch_i < 3:
-                # filter top 15 to plot
-                plot_masks.append(pred_masks[:15].cpu())
+                plot_masks.append(pred_masks[:15].cpu())  # filter top 15 to plot
 
             # Save/log
             if save_txt:
-                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
+                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
             if save_json:
                 pred_masks = scale_masks(im[si].shape[1:],
                                          pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1])
                 save_one_json(predn, jdict, path, class_map, pred_masks)  # append to COCO-JSON dictionary
+            # callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
 
         # Plot images
         if plots and batch_i < 3:
-            plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)
             if len(plot_masks):
                 plot_masks = torch.cat(plot_masks, dim=0)
+            plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)
             plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths,
                                   save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
+        # callbacks.run('on_val_batch_end')
+
     # Compute metrics
     stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
     if len(stats) and stats[0].any():
         results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
         metrics.update(results)
-        nt = np.bincount(stats[4].astype(int), minlength=nc)  # number of targets per class
-    else:
-        nt = torch.zeros(1)
+    nt = np.bincount(stats[4].astype(int), minlength=nc)  # number of targets per class
 
     # Print results
-    pf = '%20s' + '%11i' * 2 + '%11.3g' * 8  # print format
+    pf = '%22s' + '%11i' * 2 + '%11.3g' * 8  # print format
     LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
+    if nt.sum() == 0:
+        LOGGER.warning(f'WARNING: no labels found in {task} set, can not compute metrics without labels ⚠️')
 
     # Print results per class
     if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
@@ -366,7 +352,7 @@ def run(
             LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
 
     # Print speeds
-    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
     if not training:
         shape = (batch_size, 3, imgsz, imgsz)
         LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
@@ -374,18 +360,10 @@ def run(
     # Plots
     if plots:
         confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+    # callbacks.run('on_val_end')
+
+    mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results()
 
-    # in case the cocoeval will update map
-    (
-        mp_bbox,
-        mr_bbox,
-        map50_bbox,
-        map_bbox,
-        mp_mask,
-        mr_mask,
-        map50_mask,
-        map_mask,
-    ) = metrics.mean_results()
     # Save JSON
     if save_json and len(jdict):
         w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
@@ -402,22 +380,15 @@ def run(
 
             anno = COCO(anno_json)  # init annotations api
             pred = anno.loadRes(pred_json)  # init predictions api
-            eval_bbox = COCOeval(anno, pred, 'bbox')
-            eval_mask = COCOeval(anno, pred, 'segm')
-            if is_coco:
-                eval_bbox.params.imgIds = [int(Path(x).stem)
-                                           for x in dataloader.dataset.im_files]  # image IDs to evaluate
-                eval_mask.params.imgIds = [int(Path(x).stem)
-                                           for x in dataloader.dataset.im_files]  # image IDs to evaluate
-            eval_bbox.evaluate()
-            eval_bbox.accumulate()
-            eval_bbox.summarize()
-            map_bbox, map50_bbox = eval_bbox.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
-
-            eval_mask.evaluate()
-            eval_mask.accumulate()
-            eval_mask.summarize()
-            map_mask, map50_mask = eval_mask.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+            results = []
+            for eval in COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm'):
+                if is_coco:
+                    eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # img ID to evaluate
+                eval.evaluate()
+                eval.accumulate()
+                eval.summarize()
+                results.extend(eval.stats[:2])  # update results (mAP@0.5:0.95, mAP@0.5)
+            map_bbox, map50_bbox, map_mask, map50_mask = results
         except Exception as e:
             LOGGER.info(f'pycocotools unable to run: {e}')
 
@@ -426,21 +397,8 @@ def run(
     if not training:
         s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
         LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
-    final_metric = (
-        mp_bbox,
-        mr_bbox,
-        map50_bbox,
-        map_bbox,
-        mp_mask,
-        mr_mask,
-        map50_mask,
-        map_mask,
-    )
-    return (
-        (*final_metric, *(loss.cpu() / len(dataloader)).tolist()),
-        metrics.get_maps(nc),
-        t,
-    )
+    final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask
+    return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t
 
 
 def parse_opt():
diff --git a/val.py b/val.py
index 9cacd47b3ab3..d120b625e474 100644
--- a/val.py
+++ b/val.py
@@ -186,7 +186,7 @@ def run(
     if isinstance(names, (list, tuple)):  # old format
         names = dict(enumerate(names))
     class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
-    s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
+    s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95')
     dt, p, r, f1, mp, mr, map50, map = (Profile(), Profile(), Profile()), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
     loss = torch.zeros(3, device=device)
     jdict, stats, ap, ap_class = [], [], [], []

From 4710a400b933aabad3921ca7b73514a453957299 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 Aug 2022 22:37:31 +0000
Subject: [PATCH 153/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/val.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index ffe0c7cc5148..407d061b6d3a 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -35,24 +35,23 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.common import DetectMultiBackend
-from models.yolo import DetectionModel
-from utils.callbacks import Callbacks
 import pycocotools.mask as mask_util
 import torch.nn.functional as F
 
+from models.common import DetectMultiBackend
+from models.yolo import DetectionModel
+from utils.callbacks import Callbacks
 from utils.dataloaders import create_dataloader
 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
 from utils.plots import output_to_target, plot_val_study
-from utils.torch_utils import smart_inference_mode
 from utils.segment.dataloaders import create_dataloader
 from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
 from utils.segment.plots import plot_images_and_masks
-from utils.torch_utils import de_parallel, select_device
+from utils.torch_utils import de_parallel, select_device, smart_inference_mode
 
 
 def save_one_txt(predn, save_conf, shape, file):
@@ -266,7 +265,12 @@ def run(
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         with dt[2]:
-            out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls,
+            out = non_max_suppression(out,
+                                      conf_thres,
+                                      iou_thres,
+                                      labels=lb,
+                                      multi_label=True,
+                                      agnostic=single_cls,
                                       masks=nm)
 
         # Metrics
@@ -290,8 +294,8 @@ def run(
             midx = [si] if overlap else targets[:, 0] == si
             gt_masks = masks[midx]
             proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0,
-                                                                                                      1).contiguous().float()
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4],
+                                 shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()
 
             # Predictions
             if single_cls:

From 744058e026e9c27b5ff7489277dfc094f3759d41 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 00:42:41 +0200
Subject: [PATCH 154/247] precommit fix

---
 segment/val.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index ffe0c7cc5148..3840d7c8fd0d 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -41,7 +41,6 @@
 import pycocotools.mask as mask_util
 import torch.nn.functional as F
 
-from utils.dataloaders import create_dataloader
 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)

From 858db3477bda367b80b06fe46b2aeb349848fd48 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 00:43:40 +0200
Subject: [PATCH 155/247] precommit fix

---
 segment/val.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 76c1b60625a9..ec5070bcc7c4 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -35,9 +35,6 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-import pycocotools.mask as mask_util
-import torch.nn.functional as F
-
 from models.common import DetectMultiBackend
 from models.yolo import DetectionModel
 from utils.callbacks import Callbacks

From 1a00dda1eba1b59d6274901ef780750cda445e17 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 Aug 2022 22:44:18 +0000
Subject: [PATCH 156/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/val.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index ec5070bcc7c4..57cf60907715 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -35,12 +35,12 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.common import DetectMultiBackend
-from models.yolo import DetectionModel
-from utils.callbacks import Callbacks
 import pycocotools.mask as mask_util
 import torch.nn.functional as F
 
+from models.common import DetectMultiBackend
+from models.yolo import DetectionModel
+from utils.callbacks import Callbacks
 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)

From 6820f84af5992805701c98705e045ce46eec6999 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 00:57:20 +0200
Subject: [PATCH 157/247] optional pycocotools

---
 segment/val.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index ec5070bcc7c4..22cff1e67403 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -38,7 +38,6 @@
 from models.common import DetectMultiBackend
 from models.yolo import DetectionModel
 from utils.callbacks import Callbacks
-import pycocotools.mask as mask_util
 import torch.nn.functional as F
 
 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
@@ -65,11 +64,12 @@ def save_one_txt(predn, save_conf, shape, file):
 
 def save_one_json(predn, jdict, path, class_map, pred_masks):
     # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    from pycocotools.mask import encode
     image_id = int(path.stem) if path.stem.isnumeric() else path.stem
     box = xyxy2xywh(predn[:, :4])  # xywh
     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
     pred_masks = np.transpose(pred_masks, (2, 0, 1))
-    rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks]
+    rles = [encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] for x in pred_masks]
     for rle in rles:
         rle["counts"] = rle["counts"].decode("utf-8")
     for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
@@ -164,7 +164,12 @@ def run(
         compute_loss=None,
         callbacks=Callbacks(),
 ):
-    process = process_mask_upsample if save_json else process_mask
+    if save_json:
+        check_requirements(['pycocotools'])
+        process = process_mask_upsample  # more accurate
+    else:
+        process = process_mask  # faster
+
     # Initialize/load model and set device
     training = model is not None
     if training:  # called by train.py
@@ -377,7 +382,6 @@ def run(
             json.dump(jdict, f)
 
         try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
-            check_requirements(['pycocotools'])
             from pycocotools.coco import COCO
             from pycocotools.cocoeval import COCOeval
 

From 114485bd6a1fe32457c1d9f023196f907dcbca21 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 00:59:05 +0200
Subject: [PATCH 158/247] remove CI pip install pycocotools (auto-installed
 now)

---
 .github/workflows/ci-testing.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 5be0884991c9..044ece544648 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -126,7 +126,6 @@ jobs:
       - name: Test segmentation
         shell: bash  # for Windows compatibility
         run: |
-          pip install -q pycocotools
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
           python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train

From 0acd727a323043b4bcbd0e08340fd38b064c8456 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 01:11:47 +0200
Subject: [PATCH 159/247] seg yaml fix

---
 data/coco128-seg.yaml | 4 ++--
 data/coco128.yaml     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/data/coco128-seg.yaml b/data/coco128-seg.yaml
index db05534c526e..a0319670a92e 100644
--- a/data/coco128-seg.yaml
+++ b/data/coco128-seg.yaml
@@ -1,10 +1,10 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
 # Example usage: python train.py --data coco128.yaml
 # parent
 # ├── yolov5
 # └── datasets
-#     └── coco128  ← downloads here (7 MB)
+#     └── coco128-seg  ← downloads here (7 MB)
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
diff --git a/data/coco128.yaml b/data/coco128.yaml
index d551e994e379..12556736a571 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -1,10 +1,10 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
 # Example usage: python train.py --data coco128.yaml
 # parent
 # ├── yolov5
 # └── datasets
-#     └── coco128-seg  ← downloads here (7 MB)
+#     └── coco128  ← downloads here (7 MB)
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]

From 2c9719dcdd1a1cdce8cebc22a79b22a95ec5a4de Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 03:08:17 +0200
Subject: [PATCH 160/247] optimize mask_iou() and masks_iou()

---
 utils/segment/general.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index cc8cc2997541..b97d289f09c1 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -102,7 +102,7 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):
     return masks
 
 
-def mask_iou(mask1, mask2):
+def mask_iou(mask1, mask2, eps=1e-7):
     """
     mask1: [N, n] m1 means number of predicted objects
     mask2: [M, n] m2 means number of gt objects
@@ -110,17 +110,12 @@ def mask_iou(mask1, mask2):
 
     return: masks iou, [N, M]
     """
-    # print(mask1.shape)
-    # print(mask2.shape)
     intersection = torch.matmul(mask1, mask2.t()).clamp(0)
-    area1 = torch.sum(mask1, dim=1).view(1, -1)
-    area2 = torch.sum(mask2, dim=1).view(1, -1)
-    union = (area1.t() + area2) - intersection
+    union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection  # (area1 + area2) - intersection
+    return intersection / (union + eps)
 
-    return intersection / (union + 1e-7)
 
-
-def masks_iou(mask1, mask2):
+def masks_iou(mask1, mask2, eps=1e-7):
     """
     mask1: [N, n] m1 means number of predicted objects
     mask2: [N, n] m2 means number of gt objects
@@ -129,7 +124,5 @@ def masks_iou(mask1, mask2):
     return: masks iou, (N, )
     """
     intersection = (mask1 * mask2).sum(1).clamp(0)  # (N, )
-    area1 = torch.sum(mask1, dim=1).view(1, -1)
-    area2 = torch.sum(mask2, dim=1).view(1, -1)
-    union = (area1 + area2) - intersection
-    return intersection / (union + 1e-7)
+    union = (mask1.sum(1) + mask2.sum(1))[None] - intersection  # (area1 + area2) - intersection
+    return intersection / (union + eps)

From 4863beb745417e70d30ef905a8f0b7301077f870 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 15:03:17 +0200
Subject: [PATCH 161/247] threaded fix

---
 utils/segment/plots.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 4517ff455cba..eac46d9853aa 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -8,7 +8,8 @@
 import pandas as pd
 import torch
 
-from ..general import threaded, xywh2xyxy
+from .. import threaded
+from ..general import xywh2xyxy
 from ..plots import Annotator, colors
 
 

From 347968b38bca8d4baf5926a66f3cb64d3b4c4ce7 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 18:00:10 +0200
Subject: [PATCH 162/247] Major train.py update

---
 segment/train.py | 283 ++++++++++++++++++++++-------------------------
 1 file changed, 135 insertions(+), 148 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index ca23b4256317..b9f284b33eea 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -1,15 +1,18 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
-Train a YOLOv5 model on a custom dataset.
-
+Train a YOLOv5 segment model on a segment dataset
 Models and datasets download automatically from the latest YOLOv5 release.
-Models: https://github.com/ultralytics/yolov5/tree/master/models
-Datasets: https://github.com/ultralytics/yolov5/tree/master/data
-Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
 
-Usage:
-    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (RECOMMENDED)
-    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
+Usage - Single-GPU training:
+    $ python segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640  # from pretrained (recommended)
+    $ python segment/train.py --data coco128-seg.yaml --weights '' --cfg yolov5s-seg.yaml --img 640  # from scratch
+
+Usage - Multi-GPU DDP training:
+    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 --device 0,1,2,3
+
+Models:     https://github.com/ultralytics/yolov5/tree/master/models
+Datasets:   https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial:   https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
 """
 
 import argparse
@@ -19,58 +22,54 @@
 import sys
 import time
 from copy import deepcopy
+from datetime import datetime
 from pathlib import Path
 
 import numpy as np
 import torch
 import torch.distributed as dist
 import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.optim import SGD, Adam, lr_scheduler
+import yaml
+from torch.optim import lr_scheduler
 from tqdm import tqdm
 
-import val  # for end-of-epoch mAP
-
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
+import torch.nn.functional as F
+import segment.val as validate  # for end-of-epoch mAP
 from models.experimental import attempt_load
 from models.yolo import Model
 from utils.autoanchor import check_anchors
-from utils.downloads import attempt_download
-from utils.general import (check_dataset, check_file, check_git_status, check_img_size, check_requirements,
-                           check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
-                           intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args,
-                           print_mutation, strip_optimizer)
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from utils.downloads import attempt_download, is_url
+from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
+                           check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path,
+                           init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle,
+                           print_args, print_mutation, strip_optimizer, yaml_save)
 from utils.loggers import GenericLogger
-from utils.plots import plot_evolve, plot_labels
 from utils.segment.dataloaders import create_dataloader
 from utils.segment.loss import ComputeLoss
-from utils.segment.metrics import fitness
-from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
+from utils.segment.metrics import fitness, KEYS
+from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
+from utils.plots import plot_evolve, plot_labels
+from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer,
+                               smart_resume, torch_distributed_zero_first)
 
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
-from datetime import datetime
-
-import yaml
-from torch.optim import AdamW
-
-from utils.autobatch import check_train_batch_size
-from utils.general import LOGGER, check_amp, check_version
-from utils.segment.metrics import KEYS
-from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
 
 
-def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
+def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
     save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio = \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
         opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
+    # callbacks.run('on_pretrain_routine_start')
 
     # Directories
     w = save_dir / 'weights'  # weights dir
@@ -82,18 +81,28 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
         with open(hyp, errors='ignore') as f:
             hyp = yaml.safe_load(f)  # load hyps dict
     LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    opt.hyp = hyp.copy()  # for saving hyps to checkpoints
 
     # Save run settings
     if not evolve:
-        with open(save_dir / 'hyp.yaml', 'w') as f:
-            yaml.safe_dump(hyp, f, sort_keys=False)
-        with open(save_dir / 'opt.yaml', 'w') as f:
-            yaml.safe_dump(vars(opt), f, sort_keys=False)
+        yaml_save(save_dir / 'hyp.yaml', hyp)
+        yaml_save(save_dir / 'opt.yaml', vars(opt))
 
     # Loggers
     data_dict = None
     if RANK in {-1, 0}:
         logger = GenericLogger(opt=opt, console_logger=LOGGER)
+        # loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
+        # if loggers.clearml:
+        #     data_dict = loggers.clearml.data_dict  # None if no ClearML dataset or filled in by ClearML
+        # if loggers.wandb:
+        #     data_dict = loggers.wandb.data_dict
+        #     if resume:
+        #         weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
+        #
+        # # Register actions
+        # for k in methods(loggers):
+        #     callbacks.register_action(k, callback=getattr(loggers, k))
 
     # Config
     plots = not evolve and not opt.noplots  # create plots
@@ -104,8 +113,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
         data_dict = data_dict or check_dataset(data)  # check if None
     train_path, val_path = data_dict['train'], data_dict['val']
     nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
-    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
-    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
+    names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
     is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
 
     # Model
@@ -129,6 +137,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
     freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
     for k, v in model.named_parameters():
         v.requires_grad = True  # train all layers
+        # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
         if any(x in k for x in freeze):
             LOGGER.info(f'freezing {k}')
             v.requires_grad = False
@@ -141,35 +150,13 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
     if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
         batch_size = check_train_batch_size(model, imgsz, amp)
         logger.update_params({"batch_size": batch_size})
+        # loggers.on_params_update({"batch_size": batch_size})
 
     # Optimizer
     nbs = 64  # nominal batch size
     accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
     hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
-    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
-
-    g = [], [], []  # optimizer parameter groups
-    bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
-    for v in model.modules():
-        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
-            g[2].append(v.bias)
-        if isinstance(v, bn):  # weight (no decay)
-            g[1].append(v.weight)
-        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
-            g[0].append(v.weight)
-
-    if opt.optimizer == 'Adam':
-        optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
-    elif opt.optimizer == 'AdamW':
-        optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
-    else:
-        optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
-
-    optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']})  # add g0 with weight_decay
-    optimizer.add_param_group({'params': g[1]})  # add g1 (BatchNorm2d weights)
-    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
-                f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
-    del g
+    optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay'])
 
     # Scheduler
     if opt.cos_lr:
@@ -182,26 +169,10 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
     ema = ModelEMA(model) if RANK in {-1, 0} else None
 
     # Resume
-    start_epoch, best_fitness = 0, 0.0
+    best_fitness, start_epoch = 0.0, 0
     if pretrained:
-        # Optimizer
-        if ckpt['optimizer'] is not None:
-            optimizer.load_state_dict(ckpt['optimizer'])
-            best_fitness = ckpt['best_fitness']
-
-        # EMA
-        if ema and ckpt.get('ema'):
-            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
-            ema.updates = ckpt['updates']
-
-        # Epochs
-        start_epoch = ckpt['epoch'] + 1
         if resume:
-            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
-        if epochs < start_epoch:
-            LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
-            epochs += ckpt['epoch']  # finetune additional epochs
-
+            best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
         del ckpt, csd
 
     # DP mode
@@ -235,9 +206,8 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
         mask_downsample_ratio=mask_ratio,
         overlap_mask=overlap,
     )
-    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
-    print("mlc , nc ", mlc, "  ", nc)
-    nb = len(train_loader)  # number of batches
+    labels = np.concatenate(dataset.labels, 0)
+    mlc = int(labels[:, 0].max())  # max label class
     assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
 
     # Process 0
@@ -258,24 +228,17 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
                                        prefix=colorstr('val: '))[0]
 
         if not resume:
-            labels = np.concatenate(dataset.labels, 0)
-            # c = torch.tensor(labels[:, 0])  # classes
-            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
-            # model._initialize_biases(cf.to(device))
-            if plots:
-                plot_labels(labels, names, save_dir)
-
-            # Anchors
             if not opt.noautoanchor:
-                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)  # run AutoAnchor
             model.half().float()  # pre-reduce anchor precision
 
+            if plots:
+                plot_labels(labels, names, save_dir)
+        # callbacks.run('on_pretrain_routine_end', labels, names)
+
     # DDP mode
     if cuda and RANK != -1:
-        if check_version(torch.__version__, '1.11.0'):
-            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
-        else:
-            model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
+        model = smart_DDP(model)
 
     # Model attributes
     nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
@@ -290,6 +253,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
 
     # Start training
     t0 = time.time()
+    nb = len(train_loader)  # number of batches
     nw = max(round(hyp['warmup_epochs'] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
     # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
     last_opt_step = -1
@@ -299,11 +263,13 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
     scaler = torch.cuda.amp.GradScaler(enabled=amp)
     stopper, stop = EarlyStopping(patience=opt.patience), False
     compute_loss = ComputeLoss(model, overlap=overlap)  # init loss class
+    # callbacks.run('on_train_start')
     LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
                 f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
                 f"Logging results to {colorstr('bold', save_dir)}\n"
                 f'Starting training for {epochs} epochs...')
     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        # callbacks.run('on_train_epoch_start')
         model.train()
 
         # Update image weights (optional, single-GPU only)
@@ -315,16 +281,18 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
         # Update mosaic border (optional)
         # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
         # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
         mloss = torch.zeros(4, device=device)  # mean losses
         if RANK != -1:
             train_loader.sampler.set_epoch(epoch)
         pbar = enumerate(train_loader)
-        LOGGER.info(("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size"))
+        LOGGER.info(('\n' + '%11s' * 8) %
+                    ('Epoch', 'GPU_mem', 'box_loss', 'seg_loss', 'obj_loss', 'cls_loss', 'Instances', 'Size'))
         if RANK in {-1, 0}:
             pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
         optimizer.zero_grad()
-        for i, (imgs, targets, paths, _,
-                masks) in pbar:  # batch -------------------------------------------------------------
+        for i, (imgs, targets, paths, _, masks) in pbar:  # batch ------------------------------------------------------
+            # callbacks.run('on_train_batch_start')
             ni = i + nb * epoch  # number integrated batches (since train start)
             imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
 
@@ -350,8 +318,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
             # Forward
             with torch.cuda.amp.autocast(amp):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss(pred, targets.to(device),
-                                                masks=masks.to(device).float())  # loss scaled by batch_size
+                loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float())
                 if RANK != -1:
                     loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
                 if opt.quad:
@@ -360,8 +327,10 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
             # Backward
             scaler.scale(loss).backward()
 
-            # Optimize
+            # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
             if ni - last_opt_step >= accumulate:
+                scaler.unscale_(optimizer)  # unscale gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
                 scaler.step(optimizer)  # optimizer.step
                 scaler.update()
                 optimizer.zero_grad()
@@ -373,21 +342,18 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
             if RANK in {-1, 0}:
                 mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
                 mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
-                pbar.set_description(("%10s" * 2 + "%10.4g" * 6) %
-                                     (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1]))
-                # for plots
+                pbar.set_description(('%11s' * 2 + '%11.4g' * 6) %
+                                     (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
+                # callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths)
+                # if callbacks.stop_training:
+                #    return
+
+                # Mosaic plots
                 if mask_ratio != 1:
-                    masks = F.interpolate(
-                        masks[None, :].float(),
-                        (imgsz, imgsz),
-                        mode="bilinear",
-                        align_corners=False,
-                    ).squeeze(0)
+                    masks = F.interpolate(masks[None].float(), (imgsz, imgsz), mode="bilinear", align_corners=False)[0]
                 if plots:
                     if ni < 3:
-                        f = save_dir / f"train_batch{ni}.jpg"  # filename
-                        plot_images_and_masks(imgs, targets, masks, paths, f)
-
+                        plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg")
                     if ni == 10:
                         files = sorted(save_dir.glob('train*.jpg'))
                         logger.log_images(files, "Mosaics", epoch)
@@ -399,32 +365,38 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
 
         if RANK in {-1, 0}:
             # mAP
+            # callbacks.run('on_train_epoch_end', epoch=epoch)
             ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
             final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
             if not noval or final_epoch:  # Calculate mAP
-                results, maps, _ = val.run(data_dict,
-                                           batch_size=batch_size // WORLD_SIZE * 2,
-                                           imgsz=imgsz,
-                                           model=ema.ema,
-                                           single_cls=single_cls,
-                                           dataloader=val_loader,
-                                           save_dir=save_dir,
-                                           plots=plots,
-                                           compute_loss=compute_loss,
-                                           mask_downsample_ratio=mask_ratio,
-                                           overlap=overlap)
+                results, maps, _ = validate.run(data_dict,
+                                                batch_size=batch_size // WORLD_SIZE * 2,
+                                                imgsz=imgsz,
+                                                half=amp,
+                                                model=ema.ema,
+                                                single_cls=single_cls,
+                                                dataloader=val_loader,
+                                                save_dir=save_dir,
+                                                plots=False,
+                                                callbacks=callbacks,
+                                                compute_loss=compute_loss,
+                                                mask_downsample_ratio=mask_ratio,
+                                                overlap=overlap)
+
             # Update best mAP
             fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
             stop = stopper(epoch=epoch, fitness=fi)  # early stop check
             if fi > best_fitness:
                 best_fitness = fi
             log_vals = list(mloss) + list(results) + lr
+            # callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
             # Log val metrics and media
             metrics_dict = dict(zip(KEYS, log_vals))
             logger.log_metrics(metrics_dict, epoch)
             if plots:
                 files = sorted(save_dir.glob('val*.jpg'))
                 logger.log_images(files, "Validation", epoch)
+
             # Save model
             if (not nosave) or (final_epoch and not evolve):  # if save
                 ckpt = {
@@ -435,6 +407,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
                     'updates': ema.updates,
                     'optimizer': optimizer.state_dict(),
                     # 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
+                    'opt': vars(opt),
                     'date': datetime.now().isoformat()}
 
                 # Save last, best and delete
@@ -445,6 +418,7 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
                     torch.save(ckpt, w / f'epoch{epoch}.pt')
                     logger.log_model(w / f'epoch{epoch}.pt')
                 del ckpt
+                # callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
 
         # EarlyStopping
         if RANK != -1:  # if DDP training
@@ -464,24 +438,28 @@ def train(hyp, opt, device):  # hyp is path/to/hyp.yaml or hyp dictionary
                 strip_optimizer(f)  # strip optimizers
                 if f is best:
                     LOGGER.info(f'\nValidating {f}...')
-                    results, _, _ = val.run(
+                    results, _, _ = validate.run(
                         data_dict,
                         batch_size=batch_size // WORLD_SIZE * 2,
                         imgsz=imgsz,
                         model=attempt_load(f, device).half(),
-                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools at iou 0.65
                         single_cls=single_cls,
                         dataloader=val_loader,
                         save_dir=save_dir,
                         save_json=is_coco,
                         verbose=True,
                         plots=plots,
+                        callbacks=callbacks,
                         compute_loss=compute_loss,
                         mask_downsample_ratio=mask_ratio,
                         overlap=overlap)  # val best model with plots
                     if is_coco:
+                        # callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
                         metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
                         logger.log_metrics(metrics_dict, epoch)
+
+        # callbacks.run('on_train_end', last, best, epoch, results)
         # on train end callback using genericLogger
         logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs + 1)
         if not opt.evolve:
@@ -503,7 +481,7 @@ def parse_opt(known=False):
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path')
     parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
-    parser.add_argument('--epochs', type=int, default=300)
+    parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
     parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
     parser.add_argument('--rect', action='store_true', help='rectangular training')
@@ -522,7 +500,7 @@ def parse_opt(known=False):
     parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
     parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
     parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
-    parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save results to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
@@ -535,30 +513,39 @@ def parse_opt(known=False):
     parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
 
     # Instance Segmentation Args
-    parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory')
-    parser.add_argument('--no-overlap',
-                        action='store_true',
-                        help='Overlapping masks train faster at the cost of slight accuray decrease')
+    parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the truth masks to saving memory')
+    parser.add_argument('--no-overlap', action='store_true', help='Overlap masks train faster at slightly less mAP')
 
-    opt = parser.parse_known_args()[0] if known else parser.parse_args()
-    return opt
+    # Weights & Biases arguments
+    # parser.add_argument('--entity', default=None, help='W&B: Entity')
+    # parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
+    # parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
+    # parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
 
+    return parser.parse_known_args()[0] if known else parser.parse_args()
 
-def main(opt):
+
+def main(opt, callbacks=Callbacks()):
     # Checks
     if RANK in {-1, 0}:
         print_args(vars(opt))
         check_git_status()
-        check_requirements(exclude=['thop'])
+        check_requirements()
 
     # Resume
-    if opt.resume and not opt.evolve:  # resume an interrupted run
-        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
-        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
-        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
-            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
-        opt.cfg, opt.weights, opt.resume = '', ckpt, True  # reinstate
-        LOGGER.info(f'Resuming training from {ckpt}')
+    if opt.resume and not opt.evolve:  # resume from specified or most recent last.pt
+        last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
+        opt_yaml = last.parent.parent / 'opt.yaml'  # train options yaml
+        opt_data = opt.data  # original dataset
+        if opt_yaml.is_file():
+            with open(opt_yaml, errors='ignore') as f:
+                d = yaml.safe_load(f)
+        else:
+            d = torch.load(last, map_location='cpu')['opt']
+        opt = argparse.Namespace(**d)  # replace
+        opt.cfg, opt.weights, opt.resume = '', str(last), True  # reinstate
+        if is_url(opt_data):
+            opt.data = check_file(opt_data)  # avoid HUB resume auth timeout
     else:
         opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
             check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
@@ -586,10 +573,7 @@ def main(opt):
 
     # Train
     if not opt.evolve:
-        train(opt.hyp, opt, device)
-        if WORLD_SIZE > 1 and RANK == 0:
-            LOGGER.info('Destroying process group... ')
-            dist.destroy_process_group()
+        train(opt.hyp, opt, device, callbacks)
 
     # Evolve hyperparameters (optional)
     else:
@@ -629,6 +613,8 @@ def main(opt):
             hyp = yaml.safe_load(f)  # load hyps dict
             if 'anchors' not in hyp:  # anchors commented in hyp.yaml
                 hyp['anchors'] = 3
+        if opt.noautoanchor:
+            del hyp['anchors'], meta['anchors']
         opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
         # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
         evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
@@ -668,7 +654,8 @@ def main(opt):
                 hyp[k] = round(hyp[k], 5)  # significant digits
 
             # Train mutation
-            results = train(hyp.copy(), opt, device)
+            results = train(hyp.copy(), opt, device, callbacks)
+            callbacks = Callbacks()
             # Write mutation results
             print_mutation(results, hyp.copy(), save_dir, opt.bucket)
 

From 5956e7dae3912aa268b7ab7fbf12a10a98d328ee Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 25 Aug 2022 16:00:42 +0000
Subject: [PATCH 163/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/train.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index b9f284b33eea..ebeebf0c1eaa 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -40,6 +40,7 @@
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
 import torch.nn.functional as F
+
 import segment.val as validate  # for end-of-epoch mAP
 from models.experimental import attempt_load
 from models.yolo import Model
@@ -52,11 +53,11 @@
                            init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle,
                            print_args, print_mutation, strip_optimizer, yaml_save)
 from utils.loggers import GenericLogger
+from utils.plots import plot_evolve, plot_labels
 from utils.segment.dataloaders import create_dataloader
 from utils.segment.loss import ComputeLoss
-from utils.segment.metrics import fitness, KEYS
+from utils.segment.metrics import KEYS, fitness
 from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
-from utils.plots import plot_evolve, plot_labels
 from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer,
                                smart_resume, torch_distributed_zero_first)
 

From 252b8b32078e8f761385238813971e00653ffe93 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 20:43:54 +0200
Subject: [PATCH 164/247] Major segments/val/process_batch() update

---
 segment/val.py | 54 +++++++++++++++++++-------------------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 21a0b8f0c4fc..8997a5d9e119 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -81,17 +81,29 @@ def save_one_json(predn, jdict, path, class_map, pred_masks):
             'segmentation': rles[i]})
 
 
-def process_batch(detections, labels, iouv):
+def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
     """
-    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+    Return correct prediction matrix
     Arguments:
-        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-        labels (Array[M, 5]), class, x1, y1, x2, y2
+        detections (array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (array[M, 5]), class, x1, y1, x2, y2
     Returns:
-        correct (Array[N, 10]), for 10 IoU levels
+        correct (array[N, 10]), for 10 IoU levels
     """
-    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
-    iou = box_iou(labels[:, 1:], detections[:, :4])
+    if masks:
+        if overlap:
+            nl = len(labels)
+            index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
+            gt_masks = gt_masks.repeat(nl, 1, 1)  # shape(1,640,640) -> (n,640,640)
+            gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+        if gt_masks.shape[1:] != pred_masks.shape[1:]:
+            gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
+            gt_masks = gt_masks.gt_(0.5)
+        iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
+    else:  # boxes
+        iou = box_iou(labels[:, 1:], detections[:, :4])
+
+    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
     correct_class = labels[:, 0:1] == detections[:, 5]
     for i in range(len(iouv)):
         x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
@@ -103,32 +115,6 @@ def process_batch(detections, labels, iouv):
                 # matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
             correct[matches[:, 1].astype(int), i] = True
-    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
-
-
-def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap):
-    correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
-    # convert masks (1, 640, 640) -> (n, 640, 640)
-    if overlap:
-        nl = len(labels)
-        index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
-        gt_masks = gt_masks.repeat(nl, 1, 1)
-        gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
-
-    if gt_masks.shape[1:] != pred_masks.shape[1:]:
-        gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0].gt_(0.5)
-
-    iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
-    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5]))  # IoU above threshold and classes match
-    if x[0].shape[0]:
-        matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy())  # [label, detection, iou]
-        if x[0].shape[0] > 1:
-            matches = matches[matches[:, 2].argsort()[::-1]]
-            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
-            # matches = matches[matches[:, 2].argsort()[::-1]]
-            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
-        matches = torch.Tensor(matches).to(iouv.device)
-        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
     return correct
 
 
@@ -313,7 +299,7 @@ def run(
                 scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
                 labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
                 correct_bboxes = process_batch(predn, labelsn, iouv)
-                correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap)
+                correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True)
                 if plots:
                     confusion_matrix.process_batch(predn, labelsn)
             stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (conf, pcls, tcls)

From 0d08e0e8f26558b9ae0d59e8378632b27eefe8e7 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 21:11:11 +0200
Subject: [PATCH 165/247] yolov5/val updates from segment

---
 val.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/val.py b/val.py
index d120b625e474..3b17fdeeb8e1 100644
--- a/val.py
+++ b/val.py
@@ -70,12 +70,12 @@ def save_one_json(predn, jdict, path, class_map):
 
 def process_batch(detections, labels, iouv):
     """
-    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+    Return correct prediction matrix
     Arguments:
-        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
-        labels (Array[M, 5]), class, x1, y1, x2, y2
+        detections (array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (array[M, 5]), class, x1, y1, x2, y2
     Returns:
-        correct (Array[N, 10]), for 10 IoU levels
+        correct (array[N, 10]), for 10 IoU levels
     """
     correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
     iou = box_iou(labels[:, 1:], detections[:, :4])
@@ -90,7 +90,7 @@ def process_batch(detections, labels, iouv):
                 # matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
             correct[matches[:, 1].astype(int), i] = True
-    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+    return correct
 
 
 @smart_inference_mode()

From 865dfa7582fd123b34df1e471463d453f3e545d0 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 21:26:43 +0200
Subject: [PATCH 166/247] process_batch numpy/tensor fix

---
 segment/val.py | 4 ++--
 val.py         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 8997a5d9e119..87868f755b28 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -103,7 +103,7 @@ def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, over
     else:  # boxes
         iou = box_iou(labels[:, 1:], detections[:, :4])
 
-    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
     correct_class = labels[:, 0:1] == detections[:, 5]
     for i in range(len(iouv)):
         x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
@@ -115,7 +115,7 @@ def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, over
                 # matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
             correct[matches[:, 1].astype(int), i] = True
-    return correct
+    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
 
 
 @smart_inference_mode()
diff --git a/val.py b/val.py
index 3b17fdeeb8e1..7edf70ed5817 100644
--- a/val.py
+++ b/val.py
@@ -90,7 +90,7 @@ def process_batch(detections, labels, iouv):
                 # matches = matches[matches[:, 2].argsort()[::-1]]
                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
             correct[matches[:, 1].astype(int), i] = True
-    return correct
+    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
 
 
 @smart_inference_mode()

From 43ca2f293713f2f5db964513d15377e7c0eeb3da Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 25 Aug 2022 23:57:21 +0200
Subject: [PATCH 167/247] opt-in to pycocotools with --save-json

---
 data/coco128-seg.yaml | 2 +-
 segment/val.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/data/coco128-seg.yaml b/data/coco128-seg.yaml
index a0319670a92e..5e81910cc456 100644
--- a/data/coco128-seg.yaml
+++ b/data/coco128-seg.yaml
@@ -98,4 +98,4 @@ names:
 
 
 # Download script/URL (optional)
-download: https://ultralytics.com/assets/coco128-segments.zip
+download: https://ultralytics.com/assets/coco128-seg.zip
diff --git a/segment/val.py b/segment/val.py
index 87868f755b28..bb08383890fc 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -419,7 +419,7 @@ def parse_opt():
     parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
     opt.data = check_yaml(opt.data)  # check YAML
-    opt.save_json |= opt.data.endswith('coco.yaml')
+    # opt.save_json |= opt.data.endswith('coco.yaml')
     opt.save_txt |= opt.save_hybrid
     print_args(vars(opt))
     return opt

From 8c9f9069aed73f2512faf8ee0f266a806787e744 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 02:15:21 +0200
Subject: [PATCH 168/247] threaded pycocotools ops for 2x speed increase

---
 segment/val.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/segment/val.py b/segment/val.py
index bb08383890fc..7640a59904d2 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -39,6 +39,7 @@
 
 from models.common import DetectMultiBackend
 from models.yolo import DetectionModel
+from utils import threaded
 from utils.callbacks import Callbacks
 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
@@ -62,6 +63,7 @@ def save_one_txt(predn, save_conf, shape, file):
             f.write(('%g ' * len(line)).rstrip() % line + '\n')
 
 
+@threaded
 def save_one_json(predn, jdict, path, class_map, pred_masks):
     # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
     from pycocotools.mask import encode

From db545f97ec18ead7cf04fac5ab5d335bf551fd74 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 02:44:27 +0200
Subject: [PATCH 169/247] Avoid permute contiguous if possible

---
 utils/segment/general.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index b97d289f09c1..2044d3f018ac 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -60,10 +60,11 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     downsampled_bboxes[:, 1] *= mh / ih
     masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes)  # HWC
 
-    masks = masks.permute(2, 0, 1).contiguous()
     if upsample:
+        masks = masks.permute(2, 0, 1).contiguous()
         masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
-    return masks.gt_(0.5).permute(1, 2, 0).contiguous()
+        masks = masks.permute(1, 2, 0).contiguous()
+    return masks.gt_(0.5)
 
 
 def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):

From 20653374f530f23e04be57190232a88416135586 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 13:16:49 +0200
Subject: [PATCH 170/247] Add max_det=300 argument to both val.py and
 segment/val.py

---
 segment/val.py |  3 +++
 val.py         | 10 +++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index 7640a59904d2..16bf9c79d0e7 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -128,6 +128,7 @@ def run(
         imgsz=640,  # inference size (pixels)
         conf_thres=0.001,  # confidence threshold
         iou_thres=0.6,  # NMS IoU threshold
+        max_det=300,  # maximum detections per image
         task='val',  # train, val, test, speed or study
         device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
         workers=8,  # max dataloader workers (per RANK in DDP mode)
@@ -263,6 +264,7 @@ def run(
                                       labels=lb,
                                       multi_label=True,
                                       agnostic=single_cls,
+                                      max_det=max_det,
                                       masks=nm)
 
         # Metrics
@@ -404,6 +406,7 @@ def parse_opt():
     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
     parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
+    parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image')
     parser.add_argument('--task', default='val', help='train, val, test, speed or study')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
diff --git a/val.py b/val.py
index 7edf70ed5817..358e5ab6f0f2 100644
--- a/val.py
+++ b/val.py
@@ -101,6 +101,7 @@ def run(
         imgsz=640,  # inference size (pixels)
         conf_thres=0.001,  # confidence threshold
         iou_thres=0.6,  # NMS IoU threshold
+        max_det=300,  # maximum detections per image
         task='val',  # train, val, test, speed or study
         device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
         workers=8,  # max dataloader workers (per RANK in DDP mode)
@@ -214,7 +215,13 @@ def run(
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         with dt[2]:
-            out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
+            out = non_max_suppression(out,
+                                      conf_thres,
+                                      iou_thres,
+                                      labels=lb,
+                                      multi_label=True,
+                                      agnostic=single_cls,
+                                      max_det=max_det)
 
         # Metrics
         for si, pred in enumerate(out):
@@ -336,6 +343,7 @@ def parse_opt():
     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
     parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
+    parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image')
     parser.add_argument('--task', default='val', help='train, val, test, speed or study')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')

From f21e3497c97a34696d3b8ad0de9660c0d734bbf2 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 14:48:13 +0200
Subject: [PATCH 171/247] fix onnx_dynamic

---
 models/yolo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/yolo.py b/models/yolo.py
index b95f5d078fcf..2180792f15a4 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -125,7 +125,7 @@ def forward(self, x):
             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 
             if not self.training:  # inference
-                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
+                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.dynamic:
                     self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
 
                 y = x[i].clone()

From cb1649309a7bf48f0f06b28e0427987a69fa3730 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Fri, 26 Aug 2022 21:54:31 +0800
Subject: [PATCH 172/247] speed up pycocotools ops

---
 segment/val.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 16bf9c79d0e7..8fbe13ee06a1 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -28,6 +28,7 @@
 import numpy as np
 import torch
 from tqdm import tqdm
+from multiprocessing.pool import ThreadPool
 
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
@@ -39,7 +40,6 @@
 
 from models.common import DetectMultiBackend
 from models.yolo import DetectionModel
-from utils import threaded
 from utils.callbacks import Callbacks
 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
@@ -51,6 +51,7 @@
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
 from utils.segment.plots import plot_images_and_masks
 from utils.torch_utils import de_parallel, select_device, smart_inference_mode
+from utils.general import NUM_THREADS
 
 
 def save_one_txt(predn, save_conf, shape, file):
@@ -63,17 +64,20 @@ def save_one_txt(predn, save_conf, shape, file):
             f.write(('%g ' * len(line)).rstrip() % line + '\n')
 
 
-@threaded
 def save_one_json(predn, jdict, path, class_map, pred_masks):
     # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
     from pycocotools.mask import encode
+    def single_encode(x):
+        rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
+        rle["counts"] = rle["counts"].decode("utf-8")
+        return rle
+
     image_id = int(path.stem) if path.stem.isnumeric() else path.stem
     box = xyxy2xywh(predn[:, :4])  # xywh
     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
     pred_masks = np.transpose(pred_masks, (2, 0, 1))
-    rles = [encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] for x in pred_masks]
-    for rle in rles:
-        rle["counts"] = rle["counts"].decode("utf-8")
+    with ThreadPool(NUM_THREADS) as pool:
+        rles = pool.map(single_encode, pred_masks)
     for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
         jdict.append({
             'image_id': image_id,

From 6bec10efe928a81f5089698e4f7b74f442f3a764 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 17:24:37 +0200
Subject: [PATCH 173/247] faster process_mask(upsample=True) for predict

---
 segment/predict.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index b29f3d2dfd8a..858d2736554a 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -44,7 +44,7 @@
 from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                            increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
-from utils.segment.general import process_mask_upsample, scale_masks
+from utils.segment.general import process_mask, scale_masks
 from utils.segment.plots import plot_masks
 from utils.torch_utils import select_device, smart_inference_mode
 
@@ -149,7 +149,7 @@ def run(
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 # Mask additions ---------------------------------------------------------------------------------------
-                masks = process_mask_upsample(proto[i], det[:, 6:], det[:, :4], im.shape[2:])  # HWC
+                masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
                 masks = masks.permute(2, 0, 1).contiguous()  # CHW
                 # Mask additions ---------------------------------------------------------------------------------------
 

From 68f805b8bd25e0bd98f035399f8f46c8c9c3572c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 26 Aug 2022 15:30:06 +0000
Subject: [PATCH 174/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/val.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 8fbe13ee06a1..70053a8313fe 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -23,12 +23,12 @@
 import json
 import os
 import sys
+from multiprocessing.pool import ThreadPool
 from pathlib import Path
 
 import numpy as np
 import torch
 from tqdm import tqdm
-from multiprocessing.pool import ThreadPool
 
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
@@ -41,7 +41,7 @@
 from models.common import DetectMultiBackend
 from models.yolo import DetectionModel
 from utils.callbacks import Callbacks
-from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
+from utils.general import (LOGGER, NUM_THREADS, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, box_iou
@@ -51,7 +51,6 @@
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
 from utils.segment.plots import plot_images_and_masks
 from utils.torch_utils import de_parallel, select_device, smart_inference_mode
-from utils.general import NUM_THREADS
 
 
 def save_one_txt(predn, save_conf, shape, file):
@@ -67,6 +66,7 @@ def save_one_txt(predn, save_conf, shape, file):
 def save_one_json(predn, jdict, path, class_map, pred_masks):
     # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
     from pycocotools.mask import encode
+
     def single_encode(x):
         rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
         rle["counts"] = rle["counts"].decode("utf-8")

From 0d48eb59c3a5aa3034d8cf0d46cd0b0bd2864b2b Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 17:40:17 +0200
Subject: [PATCH 175/247] eliminate permutations for
 process_mask(upsample=True)

---
 segment/predict.py       | 3 ---
 utils/segment/general.py | 8 ++++++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 858d2736554a..04944c8a5d84 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -148,10 +148,7 @@ def run(
             imc = im0.copy() if save_crop else im0  # for save_crop
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
-                # Mask additions ---------------------------------------------------------------------------------------
                 masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
-                masks = masks.permute(2, 0, 1).contiguous()  # CHW
-                # Mask additions ---------------------------------------------------------------------------------------
 
                 # Rescale boxes from img_size to im0 size
                 det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
diff --git a/utils/segment/general.py b/utils/segment/general.py
index 2044d3f018ac..a21d69efceeb 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -34,7 +34,10 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     c, mh, mw = proto_out.shape  # CHW
     masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw)
     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+
     masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes)  # HWC
+    masks = masks.permute(2, 0, 1).contiguous()
+
     return masks.gt_(0.5)
 
 
@@ -58,12 +61,13 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     downsampled_bboxes[:, 2] *= mw / iw
     downsampled_bboxes[:, 3] *= mh / ih
     downsampled_bboxes[:, 1] *= mh / ih
+
     masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes)  # HWC
+    masks = masks.permute(2, 0, 1).contiguous()
 
     if upsample:
-        masks = masks.permute(2, 0, 1).contiguous()
         masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
-        masks = masks.permute(1, 2, 0).contiguous()
+
     return masks.gt_(0.5)
 
 

From 60105076b6d2cfe74ef87b7c7f4e4ba6f5d0de15 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 18:10:16 +0200
Subject: [PATCH 176/247] eliminate permute-contiguous in crop(), use native
 dimension order

---
 segment/val.py           |  3 +--
 utils/segment/general.py | 30 ++++++++++++++++--------------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/segment/val.py b/segment/val.py
index 70053a8313fe..f9ca1fcca4ad 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -292,8 +292,7 @@ def run(
             midx = [si] if overlap else targets[:, 0] == si
             gt_masks = masks[midx]
             proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4],
-                                 shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float()
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).float()
 
             # Predictions
             if single_cls:
diff --git a/utils/segment/general.py b/utils/segment/general.py
index a21d69efceeb..9e45d58fa0b0 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -3,7 +3,7 @@
 import torch.nn.functional as F
 
 
-def crop(masks, boxes):
+def crop(masks, boxes, hwc=True):
     """
     "Crop" predicted masks by zeroing out everything not in the predicted bbox.
     Vectorized by Chong (thanks Chong).
@@ -11,13 +11,21 @@ def crop(masks, boxes):
     Args:
         - masks should be a size [h, w, n] tensor of masks
         - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+        - nwc: are masks in height-width-channel HWC order
     """
 
-    h, w, n = masks.shape
-    x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1)  # x1 shape(1,1,n)
-    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None]  # rows shape(1,w,1)
-    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None]  # cols shape(h,1,1)
-    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)).float()
+    if hwc:  # hwc used for loss
+        h, w, n = masks.shape
+        x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1)  # x1 shape(1,1,n)
+        r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None]  # rows shape(1,w,1)
+        c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None]  # cols shape(h,1,1)
+    else:  # chw format used for inference
+        n, h, w = masks.shape
+        x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
+        r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
+        c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
+
+    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
 
 
 def process_mask_upsample(proto_out, out_masks, bboxes, shape):
@@ -34,10 +42,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     c, mh, mw = proto_out.shape  # CHW
     masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw)
     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
-
-    masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes)  # HWC
-    masks = masks.permute(2, 0, 1).contiguous()
-
+    masks = crop(masks, bboxes, hwc=False)  # CHW
     return masks.gt_(0.5)
 
 
@@ -62,12 +67,9 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     downsampled_bboxes[:, 3] *= mh / ih
     downsampled_bboxes[:, 1] *= mh / ih
 
-    masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes)  # HWC
-    masks = masks.permute(2, 0, 1).contiguous()
-
+    masks = crop(masks, downsampled_bboxes, hwc=False)  # CHW
     if upsample:
         masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
-
     return masks.gt_(0.5)
 
 

From d43b10118fd1100c120e7d904a8edd06503ea725 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 18:21:01 +0200
Subject: [PATCH 177/247] cleanup comment

---
 utils/segment/general.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index 9e45d58fa0b0..49e8b493ed15 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -11,7 +11,7 @@ def crop(masks, boxes, hwc=True):
     Args:
         - masks should be a size [h, w, n] tensor of masks
         - boxes should be a size [n, 4] tensor of bbox coords in relative point form
-        - nwc: are masks in height-width-channel HWC order
+        - hwc: True if masks in height-width-channel HWC order, pass False for CHW
     """
 
     if hwc:  # hwc used for loss

From 61e282c55e7078d51ef486ef033be852a0cee573 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 19:05:01 +0200
Subject: [PATCH 178/247] Add Proto() module

---
 models/common.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/models/common.py b/models/common.py
index 5d49da77a35e..79755e10ab61 100644
--- a/models/common.py
+++ b/models/common.py
@@ -761,8 +761,22 @@ def __str__(self):
         return ''
 
 
+class Proto(nn.Module):
+    # YOLOv5 mask proto module
+    def __init__(self, c1, c_, c2):  # ch_in, number of protos, number of masks
+        super().__init__()
+        self.cv1 = Conv(c1, c_, k=3, p=1)
+        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
+        # self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.cv2 = Conv(c_, c_, k=3, p=1)
+        self.cv3 = Conv(c_, c2, k=1, p=0)
+
+    def forward(self, x):
+        return self.cv3(self.cv2(self.upsample(self.cv1(x))))
+
+
 class Classify(nn.Module):
-    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
         super().__init__()
         c_ = 1280  # efficientnet_b0 size

From 8823206d0dd544e5f13aa6cbaf87fcee49205292 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 22:12:42 +0200
Subject: [PATCH 179/247] fix class count

---
 models/segment/yolov5s-seg.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/segment/yolov5s-seg.yaml b/models/segment/yolov5s-seg.yaml
index cb71f5853de6..8f26e6800e6b 100644
--- a/models/segment/yolov5s-seg.yaml
+++ b/models/segment/yolov5s-seg.yaml
@@ -1,7 +1,7 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 
-# Parameters 1767976
-nc: 3  # number of classes
+# Parameters
+nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.5  # layer channel multiple
 anchors:

From ff59beb1a64a893fd1a284be492c5f9ab51ebe1f Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 26 Aug 2022 22:16:22 +0200
Subject: [PATCH 180/247] fix anchor order

---
 models/yolo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/yolo.py b/models/yolo.py
index e440d79f94f5..2d78a31fd088 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -231,8 +231,8 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, i
             s = 256  # 2x min stride
             m.inplace = self.inplace
             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]])  # forward
-            m.anchors /= m.stride.view(-1, 1, 1)
             check_anchor_order(m)
+            m.anchors /= m.stride.view(-1, 1, 1)
             self.stride = m.stride
             self._initialize_biases()  # only run once
         elif isinstance(m, Detect):

From 1b10d12735e7be8b9e182abbecb433c2cf5d00e0 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 01:14:42 +0200
Subject: [PATCH 181/247] broadcast mask_gti in loss for speed

---
 utils/segment/loss.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 719424478621..cffbc59a513b 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -123,16 +123,11 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 for bi in b.unique():
                     j = b == bi  # matching index
                     if self.overlap:
-                        mask_index = tidxs[i][j]
-                        mask_gti = masks[bi][:, :, None].repeat(1, 1, j.sum())  # shape(h,w,n)
-                        mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0)  # shape(h,w,n)
+                        mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)  # shape(h,w,n)
                     else:
-                        mask_gti = masks[tidxs[i]][j]
-                        mask_gti = mask_gti.permute(1, 2, 0).contiguous()
-
+                        mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous()
                     batch_lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j])
-
-                    # # update tobj
+                    # Update tobj
                     # iou = iou.detach().clamp(0).type(tobj.dtype)
                     # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
 

From af8663cb83fa4f93bda9fa6108cded32ae79cc2d Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 01:37:17 +0200
Subject: [PATCH 182/247] Cleanup seg loss

---
 utils/segment/loss.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index cffbc59a513b..a57555735a9d 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -67,7 +67,7 @@ def __init__(self, model, autobalance=False, overlap=False):
 
     def __call__(self, preds, targets, masks):  # predictions, targets, model
         p, proto = preds
-        bs, nm, mask_h, mask_w = proto.shape  # proto shape(bs, mask_h, mask_w, num_masks)
+        bs, nm, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
         proto = proto.permute(0, 2, 3, 1)
 
         lcls = torch.zeros(1, device=self.device)
@@ -119,20 +119,17 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                           torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device))
                 mxyxys = xywh2xyxy(mxywhs)
 
-                batch_lseg = torch.zeros(1, device=self.device)
                 for bi in b.unique():
                     j = b == bi  # matching index
                     if self.overlap:
                         mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)  # shape(h,w,n)
                     else:
                         mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous()
-                    batch_lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j])
+                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j])
                     # Update tobj
                     # iou = iou.detach().clamp(0).type(tobj.dtype)
                     # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
 
-                lseg += batch_lseg / len(b.unique())
-
             obji = self.BCEobj(pi[..., 4], tobj)
             lobj += obji * self.balance[i]  # obj loss
             if self.autobalance:
@@ -143,8 +140,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
         lbox *= self.hyp["box"]
         lobj *= self.hyp["obj"]
         lcls *= self.hyp["cls"]
-        lseg *= self.hyp["box"]
-        bs = tobj.shape[0]  # batch size
+        lseg *= self.hyp["box"] / bs
 
         loss = lbox + lobj + lcls + lseg
         return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()

From 97e15a024bd7aacecec320223672073e330ebe1c Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 01:55:43 +0200
Subject: [PATCH 183/247] faster indexing

---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index a57555735a9d..9255177130fb 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -120,7 +120,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 mxyxys = xywh2xyxy(mxywhs)
 
                 for bi in b.unique():
-                    j = b == bi  # matching index
+                    j = torch.nonzero(b == bi)  # matching index
                     if self.overlap:
                         mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)  # shape(h,w,n)
                     else:

From 0b83f5d87b49fe92bfc05fe2ccd734de083c1671 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 02:05:35 +0200
Subject: [PATCH 184/247] faster indexing fix

---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 9255177130fb..e5671a7e6b15 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -120,7 +120,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 mxyxys = xywh2xyxy(mxywhs)
 
                 for bi in b.unique():
-                    j = torch.nonzero(b == bi)  # matching index
+                    j = torch.nonzero(b == bi).squeeze()  # matching index
                     if self.overlap:
                         mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)  # shape(h,w,n)
                     else:

From 46c38bb49e54beb7681174e7eb0ea2755f1f2a92 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 02:31:29 +0200
Subject: [PATCH 185/247] faster indexing fix2

---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index e5671a7e6b15..b19bd1e7ef8e 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -122,7 +122,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 for bi in b.unique():
                     j = torch.nonzero(b == bi).squeeze()  # matching index
                     if self.overlap:
-                        mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)  # shape(h,w,n)
+                        mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j][None, None], 1.0, 0.0)
                     else:
                         mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous()
                     lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j])

From f273ecb05c53320b74094ec994f97d251a01b767 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 02:39:08 +0200
Subject: [PATCH 186/247] revert faster indexing

---
 utils/segment/loss.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index b19bd1e7ef8e..45033dc87a91 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -120,9 +120,9 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 mxyxys = xywh2xyxy(mxywhs)
 
                 for bi in b.unique():
-                    j = torch.nonzero(b == bi).squeeze()  # matching index
+                    j = b == bi  # matching index
                     if self.overlap:
-                        mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j][None, None], 1.0, 0.0)
+                        mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)
                     else:
                         mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous()
                     lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j])

From fc814b79198ba1876e1c04c77c834891c30e4f7d Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sat, 27 Aug 2022 22:22:24 +0530
Subject: [PATCH 187/247] fix validation plotting

---
 segment/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/train.py b/segment/train.py
index ebeebf0c1eaa..98998adda69a 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -378,7 +378,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                                 single_cls=single_cls,
                                                 dataloader=val_loader,
                                                 save_dir=save_dir,
-                                                plots=False,
+                                                plots=plots,
                                                 callbacks=callbacks,
                                                 compute_loss=compute_loss,
                                                 mask_downsample_ratio=mask_ratio,

From 422b8d2eeab972ffff266b635e084a2e29b00c6a Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 22:28:20 +0200
Subject: [PATCH 188/247] Loss cleanup and mxyxy simplification

---
 utils/segment/loss.py | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 45033dc87a91..e33ab2391590 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -74,7 +74,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
         lbox = torch.zeros(1, device=self.device)
         lobj = torch.zeros(1, device=self.device)
         lseg = torch.zeros(1, device=self.device)
-        tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets)  # targets
+        tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets)  # targets
 
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
@@ -85,7 +85,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
             if n:
                 pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, nm, self.nc), 1)  # subset of predictions
 
-                # Regression
+                # Box regression
                 pxy = pxy.sigmoid() * 2 - 0.5
                 pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
@@ -107,28 +107,18 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     t[range(n), tcls[i]] = self.cp
                     lcls += self.BCEcls(pcls, t)  # BCE
 
-                # Mask Regression
-                if tuple(masks.shape[-2:]) != (mask_h, mask_w):
-                    # downsample shape(bs * num_objs,img_h,img_w) -> (bs * num_objs,mask_h,mask_w)
+                # Mask regression
+                if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
                     masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0]
-
-                mxywh = xywh[i]
-                mws, mhs = mxywh[:, 2:].T
-                mws, mhs = mws / pi.shape[3], mhs / pi.shape[2]
-                mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] *
-                          torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device))
-                mxyxys = xywh2xyxy(mxywhs)
-
+                mwn, mhn = xywhn[i][:, 2:].T  # mask width, height normalized
+                mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
                 for bi in b.unique():
                     j = b == bi  # matching index
                     if self.overlap:
                         mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)
                     else:
                         mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous()
-                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j])
-                    # Update tobj
-                    # iou = iou.detach().clamp(0).type(tobj.dtype)
-                    # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
+                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], mwn[j], mwn[j])
 
             obji = self.BCEobj(pi[..., 4], tobj)
             lobj += obji * self.balance[i]  # obj loss
@@ -146,9 +136,8 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
         return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
 
     def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
-        """mask loss of one single pic."""
-        # (80, 80, 32) @ (32, n) -> (80, 80, n)
-        pred_mask = proto @ pred.tanh().T
+        # Mask loss for one image
+        pred_mask = proto @ pred.tanh().T  # shape(80,80,32) @ (32,n) -> (80,80,n)
         # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
         # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
@@ -159,7 +148,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
-        tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], []
+        tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
         gain = torch.ones(8, device=self.device)  # normalized to gridspace gain
         ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
         if self.overlap:
@@ -222,6 +211,6 @@ def build_targets(self, p, targets):
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class
             tidxs.append(tidx)
-            xywh.append(torch.cat((gxy, gwh), 1))
+            xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6])  # xywh normalized
 
-        return tcls, tbox, indices, anch, tidxs, xywh
+        return tcls, tbox, indices, anch, tidxs, xywhn

From b5016683e6bbb650ef6078befc53720deb4d2da9 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 27 Aug 2022 22:51:33 +0200
Subject: [PATCH 189/247] Loss cleanup and mxyxy simplification 2

---
 utils/segment/loss.py | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index e33ab2391590..2a451c62e2ed 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -110,7 +110,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 # Mask regression
                 if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
                     masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0]
-                mwn, mhn = xywhn[i][:, 2:].T  # mask width, height normalized
+                marea = xywhn[i][:, 2:].prod(1)  # mask width, height normalized
                 mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
                 for bi in b.unique():
                     j = b == bi  # matching index
@@ -118,7 +118,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                         mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)
                     else:
                         mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous()
-                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], mwn[j], mwn[j])
+                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
 
             obji = self.BCEobj(pi[..., 4], tobj)
             lobj += obji * self.balance[i]  # obj loss
@@ -135,16 +135,33 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
         loss = lbox + lobj + lcls + lseg
         return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
 
-    def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
+    def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
         # Mask loss for one image
-        pred_mask = proto @ pred.tanh().T  # shape(80,80,32) @ (32,n) -> (80,80,n)
+        pred_mask = proto @ pred.T  # shape(80,80,32) @ (32,n) -> (80,80,n)
         # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
         # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
         lseg = crop(lseg, xyxy)
-        lseg = lseg.mean(dim=(0, 1)) / w / h
+        lseg = lseg.mean(dim=(0, 1)) / area
         return lseg.mean()  # , iou# + lseg_iou.mean()
 
+    def single_mask_loss_v2(self, gt_mask, pred, proto, xyxy, area, fast=False):
+        pred_mask = proto @ pred.T
+
+        # Crop
+        h, w, n = pred_mask.shape
+        x1, y1, x2, y2 = torch.chunk(xyxy.T[None], 4, 1)  # x1 shape(1,1,n)
+        r = torch.arange(w, device=pred_mask.device, dtype=x1.dtype)[None, :, None]  # rows shape(1,w,1)
+        c = torch.arange(h, device=pred_mask.device, dtype=x1.dtype)[:, None, None]
+        i = (r >= x1) * (r < x2) * (c >= y1) * (c < y2)
+
+        if fast:
+            return F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i])
+
+        loss = F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i], reduction="none")
+        mask_area = i * area
+        return (loss / mask_area[i]).mean() * area.mean()
+
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets

From 8d8f79723170efb3b1120a1175914303a6408e47 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Sun, 28 Aug 2022 02:25:39 +0530
Subject: [PATCH 190/247] revert validation plotting

---
 segment/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/train.py b/segment/train.py
index 98998adda69a..ebeebf0c1eaa 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -378,7 +378,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                                 single_cls=single_cls,
                                                 dataloader=val_loader,
                                                 save_dir=save_dir,
-                                                plots=plots,
+                                                plots=False,
                                                 callbacks=callbacks,
                                                 compute_loss=compute_loss,
                                                 mask_downsample_ratio=mask_ratio,

From 2361108ac02de8597dfcb9d595d016ef96a6dab0 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 28 Aug 2022 01:02:40 +0200
Subject: [PATCH 191/247] replace missing tanh

---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 2a451c62e2ed..5e0368e2909e 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -137,7 +137,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
     def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
         # Mask loss for one image
-        pred_mask = proto @ pred.T  # shape(80,80,32) @ (32,n) -> (80,80,n)
+        pred_mask = proto @ pred.tanh().T  # shape(80,80,32) @ (32,n) -> (80,80,n)
         # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
         # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")

From 5bd95410f2d896342259f13e8af77332b1e6f1a7 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 28 Aug 2022 13:00:42 +0200
Subject: [PATCH 192/247] Eliminate last permutation

---
 utils/segment/loss.py | 33 +++++----------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 5e0368e2909e..0a54f30f7275 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -68,8 +68,6 @@ def __init__(self, model, autobalance=False, overlap=False):
     def __call__(self, preds, targets, masks):  # predictions, targets, model
         p, proto = preds
         bs, nm, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
-        proto = proto.permute(0, 2, 3, 1)
-
         lcls = torch.zeros(1, device=self.device)
         lbox = torch.zeros(1, device=self.device)
         lobj = torch.zeros(1, device=self.device)
@@ -115,9 +113,9 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 for bi in b.unique():
                     j = b == bi  # matching index
                     if self.overlap:
-                        mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0)
+                        mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
                     else:
-                        mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous()
+                        mask_gti = masks[tidxs[i]][j]
                     lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
 
             obji = self.BCEobj(pi[..., 4], tobj)
@@ -137,30 +135,9 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
     def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
         # Mask loss for one image
-        pred_mask = proto @ pred.tanh().T  # shape(80,80,32) @ (32,n) -> (80,80,n)
-        # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
-        # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
-        lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
-        lseg = crop(lseg, xyxy)
-        lseg = lseg.mean(dim=(0, 1)) / area
-        return lseg.mean()  # , iou# + lseg_iou.mean()
-
-    def single_mask_loss_v2(self, gt_mask, pred, proto, xyxy, area, fast=False):
-        pred_mask = proto @ pred.T
-
-        # Crop
-        h, w, n = pred_mask.shape
-        x1, y1, x2, y2 = torch.chunk(xyxy.T[None], 4, 1)  # x1 shape(1,1,n)
-        r = torch.arange(w, device=pred_mask.device, dtype=x1.dtype)[None, :, None]  # rows shape(1,w,1)
-        c = torch.arange(h, device=pred_mask.device, dtype=x1.dtype)[:, None, None]
-        i = (r >= x1) * (r < x2) * (c >= y1) * (c < y2)
-
-        if fast:
-            return F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i])
-
-        loss = F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i], reduction="none")
-        mask_area = i * area
-        return (loss / mask_area[i]).mean() * area.mean()
+        pred_mask = (pred.tanh() @ proto.view(32, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
+        loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
+        return (crop(loss, xyxy, hwc=False).mean(dim=(1, 2)) / area).mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)

From 1aff5cc461e534abae58fa9664cdf9eb59d99fef Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 28 Aug 2022 13:16:48 +0200
Subject: [PATCH 193/247] delete unneeded .float()

---
 segment/val.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index f9ca1fcca4ad..b62a85cca19c 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -292,7 +292,7 @@ def run(
             midx = [si] if overlap else targets[:, 0] == si
             gt_masks = masks[midx]
             proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).float()
+            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
 
             # Predictions
             if single_cls:

From 8fe3f91af5d923d1ff2d7045c855b1d87e9808d5 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 28 Aug 2022 13:21:46 +0200
Subject: [PATCH 194/247] Remove MaskIOULoss and crop(if HWC)

---
 utils/segment/general.py | 21 +++++++--------------
 utils/segment/loss.py    | 28 ++--------------------------
 2 files changed, 9 insertions(+), 40 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index 49e8b493ed15..ba65eec68a9c 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -3,7 +3,7 @@
 import torch.nn.functional as F
 
 
-def crop(masks, boxes, hwc=True):
+def crop(masks, boxes):
     """
     "Crop" predicted masks by zeroing out everything not in the predicted bbox.
     Vectorized by Chong (thanks Chong).
@@ -11,19 +11,12 @@ def crop(masks, boxes, hwc=True):
     Args:
         - masks should be a size [h, w, n] tensor of masks
         - boxes should be a size [n, 4] tensor of bbox coords in relative point form
-        - hwc: True if masks in height-width-channel HWC order, pass False for CHW
     """
 
-    if hwc:  # hwc used for loss
-        h, w, n = masks.shape
-        x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1)  # x1 shape(1,1,n)
-        r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None]  # rows shape(1,w,1)
-        c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None]  # cols shape(h,1,1)
-    else:  # chw format used for inference
-        n, h, w = masks.shape
-        x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
-        r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
-        c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
+    n, h, w = masks.shape
+    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
+    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
+    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
 
     return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
 
@@ -42,7 +35,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     c, mh, mw = proto_out.shape  # CHW
     masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw)
     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
-    masks = crop(masks, bboxes, hwc=False)  # CHW
+    masks = crop(masks, bboxes)  # CHW
     return masks.gt_(0.5)
 
 
@@ -67,7 +60,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     downsampled_bboxes[:, 3] *= mh / ih
     downsampled_bboxes[:, 1] *= mh / ih
 
-    masks = crop(masks, downsampled_bboxes, hwc=False)  # CHW
+    masks = crop(masks, downsampled_bboxes)  # CHW
     if upsample:
         masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
     return masks.gt_(0.5)
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 0a54f30f7275..f29fad7f5ff8 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -6,30 +6,7 @@
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import de_parallel
-from .general import crop, masks_iou
-
-
-class MaskIOULoss(nn.Module):
-
-    def __init__(self) -> None:
-        super().__init__()
-
-    def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False):
-        """
-        Args:
-            pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n)
-            gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n)
-            mxyxy (torch.Tensor): ground truth of boxes, (n, 4)
-        """
-        _, _, n = pred_mask.shape  # same as gt_mask
-        pred_mask = pred_mask.sigmoid()
-        if mxyxy is not None:
-            pred_mask = crop(pred_mask, mxyxy)
-            gt_mask = crop(gt_mask, mxyxy)
-        pred_mask = pred_mask.permute(2, 0, 1).view(n, -1)
-        gt_mask = gt_mask.permute(2, 0, 1).view(n, -1)
-        iou = masks_iou(pred_mask, gt_mask)
-        return iou if return_iou else (1.0 - iou)
+from .general import crop
 
 
 class ComputeLoss:
@@ -57,7 +34,6 @@ def __init__(self, model, autobalance=False, overlap=False):
         self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
         self.ssi = list(m.stride).index(16) if autobalance else 0  # stride 16 index
         self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
-        self.mask_loss = MaskIOULoss()
         self.na = m.na  # number of anchors
         self.nc = m.nc  # number of classes
         self.nl = m.nl  # number of layers
@@ -137,7 +113,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
         # Mask loss for one image
         pred_mask = (pred.tanh() @ proto.view(32, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
         loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
-        return (crop(loss, xyxy, hwc=False).mean(dim=(1, 2)) / area).mean()
+        return (crop(loss, xyxy).mean(dim=(1, 2)) / area).mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)

From f99934c98adf8387989d12e70d4b2cd2c229a0bb Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 28 Aug 2022 18:48:30 +0200
Subject: [PATCH 195/247] Final v6.3 SegmentationModel architecture updates

---
 .github/workflows/ci-testing.yml |   9 ++-
 models/common.py                 |  15 ++---
 models/segment/yolov5l-seg.yaml  |   2 +-
 models/segment/yolov5m-seg.yaml  |   2 +-
 models/segment/yolov5n-seg.yaml  |   2 +-
 models/segment/yolov5s-seg.yaml  |   2 +-
 models/segment/yolov5x-seg.yaml  |   2 +-
 models/yolo.py                   | 108 +++++++++----------------------
 segment/predict.py               |   2 +-
 segment/train.py                 |   8 +--
 segment/val.py                   |   8 +--
 utils/general.py                 |  18 +++---
 utils/segment/general.py         |  12 ++--
 utils/segment/loss.py            |   8 +--
 14 files changed, 78 insertions(+), 120 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 044ece544648..65bba5bc366b 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -128,14 +128,17 @@ jobs:
         run: |
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
-          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
+          # python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
+          python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
           for d in cpu; do  # devices
-            for w in $m $b; do  # weights
+            # for w in $m $b; do  # weights
+            for w in $b; do  # weights
               python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d  # val
               python segment/predict.py --imgsz 64 --weights $w.pt --device $d  # predict
             done
           done
-          python export.py --weights $m.pt --img 64 --include torchscript  # export
+          # python export.py --weights $m.pt --img 64 --include torchscript  # export
+          python export.py --weights $b.pt --img 64 --include torchscript  # export
       - name: Test classification
         shell: bash  # for Windows compatibility
         run: |
diff --git a/models/common.py b/models/common.py
index 79755e10ab61..014cf2e98d58 100644
--- a/models/common.py
+++ b/models/common.py
@@ -333,7 +333,7 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False,
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()
             self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
-            segmentation_model = type(model.model[-1]).__name__ == 'DetectSegment'
+            segmentation_model = type(model.model[-1]).__name__ == 'Segment'
         elif jit:  # TorchScript
             LOGGER.info(f'Loading {w} for TorchScript inference...')
             extra_files = {'config.txt': ''}  # model metadata
@@ -762,14 +762,13 @@ def __str__(self):
 
 
 class Proto(nn.Module):
-    # YOLOv5 mask proto module
-    def __init__(self, c1, c_, c2):  # ch_in, number of protos, number of masks
+    # YOLOv5 mask Proto module for segmentation models
+    def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
         super().__init__()
-        self.cv1 = Conv(c1, c_, k=3, p=1)
-        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
-        # self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
-        self.cv2 = Conv(c_, c_, k=3, p=1)
-        self.cv3 = Conv(c_, c2, k=1, p=0)
+        self.cv1 = Conv(c1, c_, k=3)
+        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.cv2 = Conv(c_, c_, k=3)
+        self.cv3 = Conv(c_, c2)
 
     def forward(self, x):
         return self.cv3(self.cv2(self.upsample(self.cv1(x))))
diff --git a/models/segment/yolov5l-seg.yaml b/models/segment/yolov5l-seg.yaml
index 98fbe51addfe..4782de11dd2d 100644
--- a/models/segment/yolov5l-seg.yaml
+++ b/models/segment/yolov5l-seg.yaml
@@ -44,5 +44,5 @@ head:
    [[-1, 10], 1, Concat, [1]],  # cat head P5
    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
-   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
   ]
diff --git a/models/segment/yolov5m-seg.yaml b/models/segment/yolov5m-seg.yaml
index 37a0bb3f6050..f73d1992ac19 100644
--- a/models/segment/yolov5m-seg.yaml
+++ b/models/segment/yolov5m-seg.yaml
@@ -44,5 +44,5 @@ head:
    [[-1, 10], 1, Concat, [1]],  # cat head P5
    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
-   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
   ]
\ No newline at end of file
diff --git a/models/segment/yolov5n-seg.yaml b/models/segment/yolov5n-seg.yaml
index 40a0409aac46..c28225ab4a50 100644
--- a/models/segment/yolov5n-seg.yaml
+++ b/models/segment/yolov5n-seg.yaml
@@ -44,5 +44,5 @@ head:
    [[-1, 10], 1, Concat, [1]],  # cat head P5
    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
-   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
   ]
diff --git a/models/segment/yolov5s-seg.yaml b/models/segment/yolov5s-seg.yaml
index 8f26e6800e6b..7cbdb36b425c 100644
--- a/models/segment/yolov5s-seg.yaml
+++ b/models/segment/yolov5s-seg.yaml
@@ -44,5 +44,5 @@ head:
    [[-1, 10], 1, Concat, [1]],  # cat head P5
    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
-   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
   ]
\ No newline at end of file
diff --git a/models/segment/yolov5x-seg.yaml b/models/segment/yolov5x-seg.yaml
index e1f91c584dca..5d0c4524a99c 100644
--- a/models/segment/yolov5x-seg.yaml
+++ b/models/segment/yolov5x-seg.yaml
@@ -44,5 +44,5 @@ head:
    [[-1, 10], 1, Concat, [1]],  # cat head P5
    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
-   [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
   ]
diff --git a/models/yolo.py b/models/yolo.py
index 2d78a31fd088..0d0c925d9654 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -36,6 +36,7 @@
 
 
 class Detect(nn.Module):
+    # YOLOv5 Detect head for detection models
     stride = None  # strides computed during build
     dynamic = False  # force grid reconstruction
     export = False  # export mode
@@ -63,15 +64,16 @@ def forward(self, x):
                 if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                     self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
 
-                y = x[i].sigmoid()
+                y = x[i].clone()
+                y[..., :5 + self.nc].sigmoid_()
                 if self.inplace:
                     y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
-                    xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
+                    xy, wh, etc = y.split((2, 2, self.no - 4), 4)  # tensor_split((2, 4, 5), 4) if torch 1.8.0
                     xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
                     wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
-                    y = torch.cat((xy, wh, conf), 4)
+                    y = torch.cat((xy, wh, etc), 4)
                 z.append(y.view(bs, -1, self.no))
 
         return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
@@ -87,62 +89,21 @@ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version
         return grid, anchor_grid
 
 
-class DetectSegment(Detect):
-
-    def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inplace=True):
+class Segment(Detect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
         super().__init__(nc, anchors, ch, inplace)
-        self.mask_dim = mask_dim
-        self.no = nc + 5 + self.mask_dim  # number of outputs per anchor
-        self.nm = 5 + self.mask_dim
-        self.proto_c = proto_channel
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
-
-        # p3作为输入
-        self.proto_net = nn.Sequential(
-            nn.Conv2d(ch[0], self.proto_c, kernel_size=3, stride=1, padding=1),
-            nn.SiLU(inplace=True),
-            # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
-            # nn.SiLU(inplace=True),
-            # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
-            # nn.SiLU(inplace=True),
-            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
-            nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1),
-            nn.SiLU(inplace=True),
-            nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0),
-            nn.SiLU(inplace=True))
+        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
+        self.detect = Detect.forward
 
     def forward(self, x):
-        z = []  # inference output
-        for i in range(self.nl):
-            if i == 0:
-                proto_out = self.proto_net(x[i])
-
-            x[i] = self.m[i](x[i])  # conv
-            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
-            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
-
-            if not self.training:  # inference
-                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.dynamic:
-                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
-
-                y = x[i].clone()
-                y[..., 0:5] = y[..., 0:5].sigmoid()
-                y[..., self.nm:] = y[..., self.nm:].sigmoid()
-                if self.inplace:
-                    y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i]  # xy
-                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
-                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
-                    xy = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i]  # xy
-                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
-                    y = torch.cat((xy.type_as(y), wh.type_as(y), y[..., 4:]), -1)
-                z.append(y.view(-1, self.na * ny * nx, self.no))
-
-        # TODO: export
-        if torch.onnx.is_in_onnx_export():
-            output = torch.cat(z, 1)
-            return output  # keep the same type with x
-        else:
-            return (x, proto_out) if self.training else (torch.cat(z, 1), (x, proto_out))
+        p = self.proto(x[0])
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p))
 
 
 class BaseModel(nn.Module):
@@ -193,7 +154,7 @@ def _apply(self, fn):
         # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
         self = super()._apply(fn)
         m = self.model[-1]  # Detect()
-        if isinstance(m, Detect):
+        if isinstance(m, (Detect, Segment)):
             m.stride = fn(m.stride)
             m.grid = list(map(fn, m.grid))
             if isinstance(m.anchor_grid, list):
@@ -227,22 +188,15 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, i
 
         # Build strides, anchors
         m = self.model[-1]  # Detect()
-        if isinstance(m, DetectSegment):
+        if isinstance(m, (Detect, Segment)):
             s = 256  # 2x min stride
             m.inplace = self.inplace
-            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]])  # forward
+            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
+            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
             check_anchor_order(m)
             m.anchors /= m.stride.view(-1, 1, 1)
             self.stride = m.stride
             self._initialize_biases()  # only run once
-        elif isinstance(m, Detect):
-            s = 256  # 2x min stride
-            m.inplace = self.inplace
-            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.empty(1, ch, s, s))])  # forward
-            check_anchor_order(m)  # must be in pixel-space (not grid-space)
-            m.anchors /= m.stride.view(-1, 1, 1)
-            self.stride = m.stride
-            self._initialize_biases()  # only run once
 
         # Init weights, biases
         initialize_weights(self)
@@ -303,17 +257,19 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            if hasattr(m, "mask_dim"):
-                b.data[:, 5 + m.mask_dim:] += math.log(0.6 /
-                                                       (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
-            else:
-                b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
 
 Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
 
 
+class SegmentationModel(DetectionModel):
+    # YOLOv5 segmentation model
+    def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
+        super().__init__(cfg, ch, nc, anchors)
+
+
 class ClassificationModel(BaseModel):
     # YOLOv5 classification model
     def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
@@ -354,14 +310,14 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
 
         n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
-                 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
+        if m in {Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
+                 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
             c1, c2 = ch[f], args[0]
             if c2 != no:  # if not output
                 c2 = make_divisible(c2 * gw, 8)
 
             args = [c1, c2, *args[1:]]
-            if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
+            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
                 args.insert(2, n)  # number of repeats
                 n = 1
         elif m is nn.BatchNorm2d:
@@ -369,11 +325,11 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
         elif m is Concat:
             c2 = sum(ch[x] for x in f)
         # TODO: channel, gw, gd
-        elif m in [Detect, DetectSegment]:
+        elif m in {Detect, Segment}:
             args.append([ch[x] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)
-            if m is DetectSegment:
+            if m is Segment:
                 args[3] = make_divisible(args[3] * gw, 8)
         elif m is Contract:
             c2 = ch[f] * args[0] ** 2
diff --git a/segment/predict.py b/segment/predict.py
index 04944c8a5d84..c5b755ad1d62 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -126,7 +126,7 @@ def run(
 
         # NMS
         with dt[2]:
-            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32)
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32)
 
         # Second-stage classifier (optional)
         # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
diff --git a/segment/train.py b/segment/train.py
index ebeebf0c1eaa..36e8f153f677 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -43,7 +43,7 @@
 
 import segment.val as validate  # for end-of-epoch mAP
 from models.experimental import attempt_load
-from models.yolo import Model
+from models.yolo import SegmentationModel
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
 from utils.callbacks import Callbacks
@@ -109,7 +109,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     plots = not evolve and not opt.noplots  # create plots
     overlap = not opt.no_overlap
     cuda = device.type != 'cpu'
-    init_seeds(opt.seed + 1 + RANK, deterministic=False)
+    init_seeds(opt.seed + 1 + RANK, deterministic=True)
     with torch_distributed_zero_first(LOCAL_RANK):
         data_dict = data_dict or check_dataset(data)  # check if None
     train_path, val_path = data_dict['train'], data_dict['val']
@@ -124,14 +124,14 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
         with torch_distributed_zero_first(LOCAL_RANK):
             weights = attempt_download(weights)  # download if not found locally
         ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
-        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+        model = SegmentationModel(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
         exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
         csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
         csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
         model.load_state_dict(csd, strict=False)  # load
         LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
     else:
-        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+        model = SegmentationModel(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
     amp = check_amp(model)  # check AMP
 
     # Freeze
diff --git a/segment/val.py b/segment/val.py
index b62a85cca19c..c08f0bf5cce6 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -39,7 +39,7 @@
 import torch.nn.functional as F
 
 from models.common import DetectMultiBackend
-from models.yolo import DetectionModel
+from models.yolo import SegmentationModel
 from utils.callbacks import Callbacks
 from utils.general import (LOGGER, NUM_THREADS, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
@@ -169,7 +169,7 @@ def run(
         device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
         half &= device.type != 'cpu'  # half precision only supported on CUDA
         model.half() if half else model.float()
-        nm = de_parallel(model).model[-1].mask_dim  # number of masks
+        nm = de_parallel(model).model[-1].nm  # number of masks
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
 
@@ -182,7 +182,7 @@ def run(
         stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
         imgsz = check_img_size(imgsz, s=stride)  # check image size
         half = model.fp16  # FP16 supported on limited backends with CUDA
-        nm = de_parallel(model).model.model[-1].mask_dim if isinstance(model, DetectionModel) else 32  # number of masks
+        nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32  # number of masks
         if engine:
             batch_size = model.batch_size
         else:
@@ -269,7 +269,7 @@ def run(
                                       multi_label=True,
                                       agnostic=single_cls,
                                       max_det=max_det,
-                                      masks=nm)
+                                      nm=nm)
 
         # Metrics
         plot_masks = []  # masks for plotting
diff --git a/utils/general.py b/utils/general.py
index 98b4aa348c33..d336ba91ba5b 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -811,7 +811,7 @@ def non_max_suppression(
         multi_label=False,
         labels=(),
         max_det=300,
-        masks=0,
+        nm=0,  # number of masks
 ):
     """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
 
@@ -820,7 +820,7 @@ def non_max_suppression(
     """
 
     bs = prediction.shape[0]  # batch size
-    nc = prediction.shape[2] - 5  # number of classes
+    nc = prediction.shape[2] - nm - 5  # number of classes
     xc = prediction[..., 4] > conf_thres  # candidates
 
     # Checks
@@ -837,8 +837,8 @@ def non_max_suppression(
     merge = False  # use merge-NMS
 
     t = time.time()
-    si = 5 + masks  # box/mask start index
-    output = [torch.zeros((0, 6 + masks), device=prediction.device)] * bs
+    mi = 5 + nc  # mask start index
+    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@@ -847,7 +847,7 @@ def non_max_suppression(
         # Cat apriori labels if autolabelling
         if labels and len(labels[xi]):
             lb = labels[xi]
-            v = torch.zeros((len(lb), nc + 5), device=x.device)
+            v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
             v[:, :4] = lb[:, 1:5]  # box
             v[:, 4] = 1.0  # conf
             v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
@@ -862,14 +862,14 @@ def non_max_suppression(
 
         # Box/Mask
         box = xywh2xyxy(x[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)
-        mask = x[:, 5:si]  # zero columns if no masks
+        mask = x[:, mi:]  # zero columns if no masks
 
         # Detections matrix nx6 (xyxy, conf, cls)
         if multi_label:
-            i, j = (x[:, si:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[i], x[i, j + si, None], j[:, None].float(), mask[i]), 1)
+            i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
         else:  # best class only
-            conf, j = x[:, si:].max(1, keepdim=True)
+            conf, j = x[:, 5:mi].max(1, keepdim=True)
             x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
 
         # Filter by class
diff --git a/utils/segment/general.py b/utils/segment/general.py
index ba65eec68a9c..2c62e99b1389 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -21,7 +21,7 @@ def crop(masks, boxes):
     return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
 
 
-def process_mask_upsample(proto_out, out_masks, bboxes, shape):
+def process_mask_upsample(protos, masks_in, bboxes, shape):
     """
     Crop after upsample.
     proto_out: [mask_dim, mask_h, mask_w]
@@ -32,14 +32,14 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape):
     return: h, w, n
     """
 
-    c, mh, mw = proto_out.shape  # CHW
-    masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw)
+    c, mh, mw = protos.shape  # CHW
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
     masks = crop(masks, bboxes)  # CHW
     return masks.gt_(0.5)
 
 
-def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
+def process_mask(protos, masks_in, bboxes, shape, upsample=False):
     """
     Crop before upsample.
     proto_out: [mask_dim, mask_h, mask_w]
@@ -50,9 +50,9 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False):
     return: h, w, n
     """
 
-    c, mh, mw = proto_out.shape  # CHW
+    c, mh, mw = protos.shape  # CHW
     ih, iw = shape
-    masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
 
     downsampled_bboxes = bboxes.clone()
     downsampled_bboxes[:, 0] *= mw / iw
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index f29fad7f5ff8..fa1043488fd8 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -57,7 +57,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
             n = b.shape[0]  # number of targets
             if n:
-                pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, nm, self.nc), 1)  # subset of predictions
+                pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1)  # subset of predictions
 
                 # Box regression
                 pxy = pxy.sigmoid() * 2 - 0.5
@@ -111,7 +111,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
     def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
         # Mask loss for one image
-        pred_mask = (pred.tanh() @ proto.view(32, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
+        pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
         loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
         return (crop(loss, xyxy).mean(dim=(1, 2)) / area).mean()
 
@@ -126,10 +126,10 @@ def build_targets(self, p, targets):
             ti = []
             for i in range(batch):
                 num = (targets[:, 0] == i).sum()  # find number of targets of each image
-                ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1)  # (na, num)
+                ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1)  # (na, num)
             ti = torch.cat(ti, 1)  # (na, nt)
         else:
-            ti = torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)
+            ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
         targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2)  # append anchor indices
 
         g = 0.5  # bias

From 00a23e0c1cd2798be633bc9023477e3235145790 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 28 Aug 2022 16:48:58 +0000
Subject: [PATCH 196/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 models/yolo.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/models/yolo.py b/models/yolo.py
index 0d0c925d9654..2d32226a6ba6 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -310,8 +310,9 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
 
         n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in {Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
-                 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
+        if m in {
+                Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
+                BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
             c1, c2 = ch[f], args[0]
             if c2 != no:  # if not output
                 c2 = make_divisible(c2 * gw, 8)

From 2a26bdb002f22a58deff02a6da77f51538116be6 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 30 Aug 2022 13:16:27 +0530
Subject: [PATCH 197/247] Add support for TF export

---
 models/tf.py | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/models/tf.py b/models/tf.py
index ecb0d4d79c78..ed4f11324160 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -30,7 +30,7 @@
 from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
                            DWConvTranspose2d, Focus, autopad)
 from models.experimental import MixConv2d, attempt_load
-from models.yolo import Detect
+from models.yolo import Detect, Segment
 from utils.activations import SiLU
 from utils.general import LOGGER, make_divisible, print_args
 
@@ -319,6 +319,29 @@ def _make_grid(nx=20, ny=20):
         xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
         return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
 
+class TFSegment(TFDetect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
+        super().__init__(nc, anchors, ch, imgsz, w)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
+        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
+        self.detect = TFDetect.call
+
+    def forward(self, x):
+        p = self.proto(x[0])
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p))
+
+class TFProto(keras.layers.Layer):
+    def __init__(self, c1, c_=256, c2=32, w=None):
+        super().__init__()
+        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
+        self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
+        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
+        self.cv3 = TFConv(c_, c2, w=w.cv3)
 
 class TFUpsample(keras.layers.Layer):
     # TF version of torch.nn.Upsample()
@@ -377,7 +400,9 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
             args = [ch[f]]
         elif m is Concat:
             c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
-        elif m is Detect:
+        elif m in [Detect, Segment]:
+            import pdb;
+            pdb.set_trace()
             args.append([ch[x + 1] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)

From 0e0f9c0a4482fbd9286d7c60a29f49ead1057077 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 30 Aug 2022 13:25:43 +0530
Subject: [PATCH 198/247] remove debugger trace

---
 models/tf.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/models/tf.py b/models/tf.py
index ed4f11324160..4d9456e565c3 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -401,8 +401,6 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
         elif m is Concat:
             c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
         elif m in [Detect, Segment]:
-            import pdb;
-            pdb.set_trace()
             args.append([ch[x + 1] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)

From b6bca18fec29945e65de993b8b0ba6faa85dfd8e Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 30 Aug 2022 15:22:11 +0530
Subject: [PATCH 199/247] add call

---
 models/tf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/models/tf.py b/models/tf.py
index 4d9456e565c3..d38461065421 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -342,6 +342,9 @@ def __init__(self, c1, c_=256, c2=32, w=None):
         self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
         self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
         self.cv3 = TFConv(c_, c2, w=w.cv3)
+    
+    def call(self, inputs):
+        return self.cv2(self.cv2(self.upsample(self.cv1(inputs))))
 
 class TFUpsample(keras.layers.Layer):
     # TF version of torch.nn.Upsample()

From c7a2ec9aeb3994c60114ad87072c995b8fbba4dc Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 30 Aug 2022 15:24:37 +0530
Subject: [PATCH 200/247] update

---
 models/tf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/tf.py b/models/tf.py
index d38461065421..41cd3c9e8635 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -344,7 +344,7 @@ def __init__(self, c1, c_=256, c2=32, w=None):
         self.cv3 = TFConv(c_, c2, w=w.cv3)
     
     def call(self, inputs):
-        return self.cv2(self.cv2(self.upsample(self.cv1(inputs))))
+        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
 
 class TFUpsample(keras.layers.Layer):
     # TF version of torch.nn.Upsample()

From d2af8e1337b77a870c04c7542c56c0aea3dabefa Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Tue, 30 Aug 2022 16:45:07 +0530
Subject: [PATCH 201/247] update

---
 models/tf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/tf.py b/models/tf.py
index 41cd3c9e8635..747ac55a14cb 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -330,7 +330,7 @@ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w
         self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
         self.detect = TFDetect.call
 
-    def forward(self, x):
+    def call(self, x):
         p = self.proto(x[0])
         x = self.detect(self, x)
         return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p))

From 445680c2b54b055733560c3c1140e4ef3a21d700 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 1 Sep 2022 22:44:56 +0200
Subject: [PATCH 202/247] Merge master

---
 utils/dataloaders.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index 3f011911ebf7..ff46b43270ad 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -214,7 +214,7 @@ def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None):
         self.auto = auto
         self.transforms = transforms  # optional
         if any(videos):
-            self.new_video(videos[0])  # new video
+            self._new_video(videos[0])  # new video
         else:
             self.cap = None
         assert self.nf > 0, f'No images or videos found in {p}. ' \
@@ -239,10 +239,11 @@ def __next__(self):
                 if self.count == self.nf:  # last video
                     raise StopIteration
                 path = self.files[self.count]
-                self.new_video(path)
+                self._new_video(path)
                 ret_val, im0 = self.cap.read()
 
             self.frame += 1
+            # im0 = self._cv2_rotate(im0)  # for use if cv2 auto rotation is False
             s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
 
         else:
@@ -261,10 +262,23 @@ def __next__(self):
 
         return path, im, im0, self.cap, s
 
-    def new_video(self, path):
+    def _new_video(self, path):
+        # Create a new video capture object
         self.frame = 0
         self.cap = cv2.VideoCapture(path)
         self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META))  # rotation degrees
+        # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0)  # disable https://github.com/ultralytics/yolov5/issues/8493
+
+    def _cv2_rotate(self, im):
+        # Rotate a cv2 video manually
+        if self.orientation == 0:
+            return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
+        elif self.orientation == 180:
+            return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE)
+        elif self.orientation == 90:
+            return cv2.rotate(im, cv2.ROTATE_180)
+        return im
 
     def __len__(self):
         return self.nf  # number of files

From 342229409fa1d24c146f356fd06d919e5b7e1db0 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 1 Sep 2022 22:46:04 +0200
Subject: [PATCH 203/247] Merge master

---
 utils/dataloaders.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index ff46b43270ad..f0a50d7c8dca 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -40,6 +40,7 @@
 VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes
 BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}'  # tqdm bar format
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true'  # global pin_memory for dataloaders
 
 # Get orientation exif tag
 for orientation in ExifTags.TAGS.keys():
@@ -83,7 +84,7 @@ def exif_transpose(image):
             5: Image.TRANSPOSE,
             6: Image.ROTATE_270,
             7: Image.TRANSVERSE,
-            8: Image.ROTATE_90,}.get(orientation)
+            8: Image.ROTATE_90}.get(orientation)
         if method is not None:
             image = image.transpose(method)
             del exif[0x0112]
@@ -139,17 +140,16 @@ def create_dataloader(path,
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
     # generator = torch.Generator()
     # generator.manual_seed(0)
-    return loader(
-        dataset,
-        batch_size=batch_size,
-        shuffle=shuffle and sampler is None,
-        num_workers=nw,
-        sampler=sampler,
-        pin_memory=True,
-        collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
-        worker_init_fn=seed_worker,
-        # generator=generator,
-    ), dataset
+    return loader(dataset,
+                  batch_size=batch_size,
+                  shuffle=shuffle and sampler is None,
+                  num_workers=nw,
+                  sampler=sampler,
+                  pin_memory=PIN_MEMORY,
+                  collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
+                  worker_init_fn=seed_worker,
+                  # generator=generator
+                  ), dataset
 
 
 class InfiniteDataLoader(dataloader.DataLoader):
@@ -528,7 +528,6 @@ def __init__(self,
             self.im_files = [self.im_files[i] for i in irect]
             self.label_files = [self.label_files[i] for i in irect]
             self.labels = [self.labels[i] for i in irect]
-            self.segments = [self.segments[i] for i in irect]
             self.shapes = s[irect]  # wh
             ar = ar[irect]
 
@@ -1169,6 +1168,6 @@ def create_classification_dataloader(path,
                               shuffle=shuffle and sampler is None,
                               num_workers=nw,
                               sampler=sampler,
-                              pin_memory=True,
+                              pin_memory=PIN_MEMORY,
                               worker_init_fn=seed_worker,
                               generator=generator)  # or DataLoader(persistent_workers=True)

From 5d7ed132488d357eee3ee4f5eb5f87fc38ff59ea Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 1 Sep 2022 20:46:29 +0000
Subject: [PATCH 204/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/dataloaders.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index f0a50d7c8dca..2e499f182f4e 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -140,16 +140,17 @@ def create_dataloader(path,
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
     # generator = torch.Generator()
     # generator.manual_seed(0)
-    return loader(dataset,
-                  batch_size=batch_size,
-                  shuffle=shuffle and sampler is None,
-                  num_workers=nw,
-                  sampler=sampler,
-                  pin_memory=PIN_MEMORY,
-                  collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
-                  worker_init_fn=seed_worker,
-                  # generator=generator
-                  ), dataset
+    return loader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=shuffle and sampler is None,
+        num_workers=nw,
+        sampler=sampler,
+        pin_memory=PIN_MEMORY,
+        collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
+        worker_init_fn=seed_worker,
+        # generator=generator
+    ), dataset
 
 
 class InfiniteDataLoader(dataloader.DataLoader):

From 2738352f8b54e969778d3af08c921016c9b51777 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 1 Sep 2022 22:48:16 +0200
Subject: [PATCH 205/247] Update dataloaders.py

---
 utils/dataloaders.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index 2e499f182f4e..bc5a66b71a76 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -529,6 +529,7 @@ def __init__(self,
             self.im_files = [self.im_files[i] for i in irect]
             self.label_files = [self.label_files[i] for i in irect]
             self.labels = [self.labels[i] for i in irect]
+            self.segments = [self.segments[i] for i in irect]
             self.shapes = s[irect]  # wh
             ar = ar[irect]
 

From 70e35e557fb7f2d7c87f6f46c95ea8adfb0413bb Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Thu, 1 Sep 2022 22:50:07 +0200
Subject: [PATCH 206/247] Restore CI

---
 .github/workflows/ci-testing.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 65bba5bc366b..d271f6a3786d 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -128,17 +128,15 @@ jobs:
         run: |
           m=${{ matrix.model }}-seg  # official weights
           b=runs/train-seg/exp/weights/best  # best.pt checkpoint
-          # python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
+          python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu  # train
           python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu  # train
           for d in cpu; do  # devices
-            # for w in $m $b; do  # weights
-            for w in $b; do  # weights
+            for w in $m $b; do  # weights
               python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d  # val
               python segment/predict.py --imgsz 64 --weights $w.pt --device $d  # predict
+              python export.py --weights $w.pt --img 64 --include torchscript --device $d  # export
             done
           done
-          # python export.py --weights $m.pt --img 64 --include torchscript  # export
-          python export.py --weights $b.pt --img 64 --include torchscript  # export
       - name: Test classification
         shell: bash  # for Windows compatibility
         run: |

From e74c49f5807f7915b4c5a643efdeac9d5fe93014 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 1 Sep 2022 22:57:16 +0200
Subject: [PATCH 207/247] Update dataloaders.py

---
 utils/dataloaders.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index bc5a66b71a76..837fea1926c9 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -138,19 +138,17 @@ def create_dataloader(path,
     nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
     sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
-    # generator = torch.Generator()
-    # generator.manual_seed(0)
-    return loader(
-        dataset,
-        batch_size=batch_size,
-        shuffle=shuffle and sampler is None,
-        num_workers=nw,
-        sampler=sampler,
-        pin_memory=PIN_MEMORY,
-        collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
-        worker_init_fn=seed_worker,
-        # generator=generator
-    ), dataset
+    generator = torch.Generator()
+    generator.manual_seed(0)
+    return loader(dataset,
+                  batch_size=batch_size,
+                  shuffle=shuffle and sampler is None,
+                  num_workers=nw,
+                  sampler=sampler,
+                  pin_memory=PIN_MEMORY,
+                  collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
+                  worker_init_fn=seed_worker,
+                  generator=generator), dataset
 
 
 class InfiniteDataLoader(dataloader.DataLoader):

From 52f2123e9876f7c29fa907a068f3fa51c67ae0cd Mon Sep 17 00:00:00 2001
From: Jiacong Fang <zldrobit@126.com>
Date: Fri, 2 Sep 2022 20:50:05 +0800
Subject: [PATCH 208/247] Fix TF/TFLite export for segmentation model

---
 models/tf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/models/tf.py b/models/tf.py
index 747ac55a14cb..425ee33abb8f 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -333,7 +333,7 @@ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w
     def call(self, x):
         p = self.proto(x[0])
         x = self.detect(self, x)
-        return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p))
+        return (x, p) if self.training else ((x[0], p),)
 
 class TFProto(keras.layers.Layer):
     def __init__(self, c1, c_=256, c2=32, w=None):
@@ -407,6 +407,8 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
             args.append([ch[x + 1] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, 8)
             args.append(imgsz)
         else:
             c2 = ch[f]

From 74c3b252ae115ec6d87fbf98279558fd7888ce71 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 15:36:50 +0200
Subject: [PATCH 209/247] Merge master

---
 segment/predict.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index c5b755ad1d62..7441d6af5777 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -31,7 +31,6 @@
 from pathlib import Path
 
 import torch
-import torch.backends.cudnn as cudnn
 
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
@@ -99,7 +98,6 @@ def run(
     # Dataloader
     if webcam:
         view_img = check_imshow()
-        cudnn.benchmark = True  # set True to speed up constant image size inference
         dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
         bs = len(dataset)  # batch_size
     else:

From 5fdd16afe319f72ce6c98f1d828ad58894387d34 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 15:58:10 +0200
Subject: [PATCH 210/247] Cleanup predict.py mask plotting

---
 segment/predict.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 7441d6af5777..314c93f077ca 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -156,11 +156,9 @@ def run(
                     n = (det[:, 5] == c).sum()  # detections per class
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
-                # Mask plotting ----------------------------------------------------------------------------------------
-                mcolors = [colors(int(cls), True) for cls in det[:, 5]]
-                im_masks = plot_masks(im[i], masks, mcolors)  # image with masks shape(imh,imw,3)
+                # Mask plotting
+                im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]])  # shape(imh,imw,3)
                 annotator.im = scale_masks(im.shape[2:], im_masks, im0.shape)  # scale to original h, w
-                # Mask plotting ----------------------------------------------------------------------------------------
 
                 # Write results
                 for *xyxy, conf, cls in reversed(det[:, :6]):

From 4a3a5bdf0af9aa1b6fcf59d047ef62dc9571ab94 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 19:01:47 +0200
Subject: [PATCH 211/247] cleanup scale_masks()

---
 utils/segment/general.py | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index 2c62e99b1389..facf2286dccc 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -66,39 +66,31 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
     return masks.gt_(0.5)
 
 
-def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None):
+def scale_masks(im1_shape, masks, im0_shape, ratio_pad=None):
     """
     img1_shape: model input shape, [h, w]
     img0_shape: origin pic shape, [h, w, 3]
     masks: [h, w, num]
-    resize for the most time
     """
-    # Rescale coords (xyxy) from img1_shape to img0_shape
-    if ratio_pad is None:  # calculate from img0_shape
-        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
-        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    # Rescale coordinates (xyxy) from im1_shape to im0_shape
+    if ratio_pad is None:  # calculate from im0_shape
+        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
+        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
     else:
-        gain = ratio_pad[0][0]
         pad = ratio_pad[1]
-    tl_pad = int(pad[1]), int(pad[0])  # y, x
-    br_pad = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0])
+    top, left = int(pad[1]), int(pad[0])  # y, x
+    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
 
     if len(masks.shape) < 2:
         raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
-    # masks_h, masks_w, n
-    masks = masks[tl_pad[0]:br_pad[0], tl_pad[1]:br_pad[1]]
-    # 1, n, masks_h, masks_w
-    # masks = masks.permute(2, 0, 1).contiguous()[None, :]
-    # # shape = [1, n, masks_h, masks_w] after F.interpolate, so take first element
-    # masks = F.interpolate(masks, img0_shape[:2], mode='bilinear', align_corners=False)[0]
+    masks = masks[top:bottom, left:right]
+    # masks = masks.permute(2, 0, 1).contiguous()
+    # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
     # masks = masks.permute(1, 2, 0).contiguous()
-    # masks_h, masks_w, n
-    masks = cv2.resize(masks, (img0_shape[1], img0_shape[0]))
+    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
 
-    # keepdim
     if len(masks.shape) == 2:
         masks = masks[:, :, None]
-
     return masks
 
 

From b7cd6ea0aee7d5b41d50abfaba32a08f0a9c1d24 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 19:03:24 +0200
Subject: [PATCH 212/247] rename scale_masks to scale_image

---
 segment/predict.py       | 4 ++--
 segment/val.py           | 4 ++--
 utils/segment/general.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 314c93f077ca..fa68d4af6574 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -43,7 +43,7 @@
 from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                            increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
-from utils.segment.general import process_mask, scale_masks
+from utils.segment.general import process_mask, scale_image
 from utils.segment.plots import plot_masks
 from utils.torch_utils import select_device, smart_inference_mode
 
@@ -158,7 +158,7 @@ def run(
 
                 # Mask plotting
                 im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]])  # shape(imh,imw,3)
-                annotator.im = scale_masks(im.shape[2:], im_masks, im0.shape)  # scale to original h, w
+                annotator.im = scale_image(im.shape[2:], im_masks, im0.shape)  # scale to original h, w
 
                 # Write results
                 for *xyxy, conf, cls in reversed(det[:, :6]):
diff --git a/segment/val.py b/segment/val.py
index c08f0bf5cce6..1ab33f7a2194 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -47,7 +47,7 @@
 from utils.metrics import ConfusionMatrix, box_iou
 from utils.plots import output_to_target, plot_val_study
 from utils.segment.dataloaders import create_dataloader
-from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks
+from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_image
 from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
 from utils.segment.plots import plot_images_and_masks
 from utils.torch_utils import de_parallel, select_device, smart_inference_mode
@@ -319,7 +319,7 @@ def run(
             if save_txt:
                 save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
             if save_json:
-                pred_masks = scale_masks(im[si].shape[1:],
+                pred_masks = scale_image(im[si].shape[1:],
                                          pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1])
                 save_one_json(predn, jdict, path, class_map, pred_masks)  # append to COCO-JSON dictionary
             # callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
diff --git a/utils/segment/general.py b/utils/segment/general.py
index facf2286dccc..9e68e45e8dcc 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -66,7 +66,7 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
     return masks.gt_(0.5)
 
 
-def scale_masks(im1_shape, masks, im0_shape, ratio_pad=None):
+def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
     """
     img1_shape: model input shape, [h, w]
     img0_shape: origin pic shape, [h, w, 3]

From 92cd027772aff2d2a8b257130c432dd108dc20a6 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 20:22:21 +0200
Subject: [PATCH 213/247] cleanup/optimize plot_masks

---
 utils/segment/plots.py | 47 ++++++++++++++++--------------------------
 1 file changed, 18 insertions(+), 29 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index eac46d9853aa..d882dd07d56b 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -13,42 +13,31 @@
 from ..plots import Annotator, colors
 
 
-def plot_masks(img, masks, colors, alpha=0.5):
+def plot_masks(im, masks, colors, alpha=0.5):
     """
     Args:
-        img (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
+        im (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
         masks (tensor): predicted masks on cuda, shape: [n, h, w]
         colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
     Return:
         ndarray: img after draw masks, shape: [h, w, 3]
 
-    transform colors and send img_gpu to cpu for the most time.
     """
-    img_gpu = img.clone()
-    num_masks = len(masks)
-    if num_masks == 0:
-        return img.permute(1, 2, 0).contiguous().cpu().numpy() * 255
-
-    # [n, 1, 1, 3]
-    # faster this way to transform colors
-    colors = torch.tensor(colors, device=img.device).float() / 255.0
-    colors = colors[:, None, None, :]
-    # [n, h, w, 1]
-    masks = masks[:, :, :, None]
-    masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha
-    inv_alph_masks = masks * (-alpha) + 1
-    masks_color_summand = masks_color[0]
-    if num_masks > 1:
-        inv_alph_cumul = inv_alph_masks[:(num_masks - 1)].cumprod(dim=0)
-        masks_color_cumul = masks_color[1:] * inv_alph_cumul
-        masks_color_summand += masks_color_cumul.sum(dim=0)
-
-    # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1]
-    img_gpu = img_gpu.flip(dims=[0])  # filp channel for opencv
-    img_gpu = img_gpu.permute(1, 2, 0).contiguous()
-    # [h, w, 3]
-    img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
-    return (img_gpu * 255).byte().cpu().numpy()
+    if len(masks) == 0:
+        return im.permute(1, 2, 0).contiguous().cpu().numpy() * 255
+
+    colors = torch.tensor(colors, device=im.device).float() / 255.0
+    colors = colors[:, None, None]  # shape(n,1,1,3)
+    masks = masks.unsqueeze(3)  # shape(n,h,w,1)
+    masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
+
+    inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
+    mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)
+
+    im = im.flip(dims=[0])  # flip channel
+    im = im.permute(1, 2, 0).contiguous()  # shape(h,w,3)
+    im = im * inv_alph_masks[-1] + mcs
+    return (im * 255).byte().cpu().numpy()
 
 
 @threaded
@@ -158,7 +147,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
             data = pd.read_csv(f)
             index = np.argmax(
                 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11],)
+                0.1 * data.values[:, 11])
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From c9156c4232910586727e7d903a5b6297efdb3e1e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 2 Sep 2022 18:22:58 +0000
Subject: [PATCH 214/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/segment/plots.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index d882dd07d56b..d3fddf26e22a 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -145,9 +145,8 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     for f in files:
         try:
             data = pd.read_csv(f)
-            index = np.argmax(
-                0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
-                0.1 * data.values[:, 11])
+            index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
+                              0.1 * data.values[:, 11])
             s = [x.strip() for x in data.columns]
             x = data.values[:, 0]
             for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):

From d0f40c306061d77069ad6766e9544f0b69519e34 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 21:03:52 +0200
Subject: [PATCH 215/247] Add Annotator.masks()

---
 utils/plots.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/utils/plots.py b/utils/plots.py
index dd1c072a8846..b09be5d4afc9 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -113,6 +113,16 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2
                             thickness=tf,
                             lineType=cv2.LINE_AA)
 
+    def masks(self, masks, colors, alpha=0.5):
+        # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...)
+        if len(masks):
+            masks = np.ascontiguousarray(masks).astype(np.float32)[..., None]  # shape(n,h,w,1)
+            colors = np.array(colors, dtype=np.float32)[:, None, None] / 255.0  # shape(n,1,1,3)
+            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
+            inv_alph_masks = (1 - masks * alpha).cumprod(0)
+            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(h,w,3)
+            self.im[:] = self.im * inv_alph_masks[-1] + mcs
+
     def rectangle(self, xy, fill=None, outline=None, width=1):
         # Add rectangle to image (PIL-only)
         self.draw.rectangle(xy, fill, outline, width)

From b1056543c2b14349bf69399219165ed862d9e3bb Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 21:23:21 +0200
Subject: [PATCH 216/247] Annotator.masks() fix

---
 utils/plots.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/plots.py b/utils/plots.py
index b09be5d4afc9..500c203b3593 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -116,8 +116,8 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2
     def masks(self, masks, colors, alpha=0.5):
         # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...)
         if len(masks):
-            masks = np.ascontiguousarray(masks).astype(np.float32)[..., None]  # shape(n,h,w,1)
-            colors = np.array(colors, dtype=np.float32)[:, None, None] / 255.0  # shape(n,1,1,3)
+            masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] / 255.0  # shape(n,h,w,1)
+            colors = np.array(colors, dtype=np.float32)[:, None, None]  # shape(n,1,1,3)
             masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
             inv_alph_masks = (1 - masks * alpha).cumprod(0)
             mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(h,w,3)

From 1dc663ffcfaa13a13a4801142b2e874799c2d8e0 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 2 Sep 2022 22:32:08 +0200
Subject: [PATCH 217/247] Update plots.py

---
 utils/plots.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/utils/plots.py b/utils/plots.py
index 500c203b3593..0d79cf4ae3ef 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -114,14 +114,17 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2
                             lineType=cv2.LINE_AA)
 
     def masks(self, masks, colors, alpha=0.5):
-        # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...)
-        if len(masks):
-            masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] / 255.0  # shape(n,h,w,1)
-            colors = np.array(colors, dtype=np.float32)[:, None, None]  # shape(n,1,1,3)
-            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
-            inv_alph_masks = (1 - masks * alpha).cumprod(0)
-            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(h,w,3)
-            self.im[:] = self.im * inv_alph_masks[-1] + mcs
+        # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
+        n = masks.shape[2]  # number of masks
+        if n:
+            im = self.im.astype(np.float32)
+            masks = np.array([alpha], dtype=np.float32) * masks[..., None] / 255.0  # shape(n,h,w,1)
+            colors = np.array(colors, dtype=np.uint8).reshape((n, 1, 1, 3))  # shape(n,1,1,3)
+            for i in range(n):
+                m = masks[:, :, i]
+                im *= 1.0 - m
+                im += colors[i] * m
+            self.im = im.astype(np.uint8)
 
     def rectangle(self, xy, fill=None, outline=None, width=1):
         # Add rectangle to image (PIL-only)

From 558ee483d11fd4dfffa4ebb0d1c738d619c0ff19 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 3 Sep 2022 00:06:15 +0200
Subject: [PATCH 218/247] Annotator mask optimization

---
 utils/plots.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/utils/plots.py b/utils/plots.py
index 500c203b3593..5948827f4f41 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -113,15 +113,14 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2
                             thickness=tf,
                             lineType=cv2.LINE_AA)
 
-    def masks(self, masks, colors, alpha=0.5):
-        # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...)
+    def masks(self, masks, colors, alpha=0.5, eps=1e-7):
+        # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
         if len(masks):
-            masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] / 255.0  # shape(n,h,w,1)
-            colors = np.array(colors, dtype=np.float32)[:, None, None]  # shape(n,1,1,3)
-            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
-            inv_alph_masks = (1 - masks * alpha).cumprod(0)
-            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(h,w,3)
-            self.im[:] = self.im * inv_alph_masks[-1] + mcs
+            masks = masks.astype(np.float32) / 255.0  # shape(h,w,n)
+            colors = np.array(colors, dtype=np.uint8)  # shape(n,3)
+            s = masks.sum(2, keepdims=True)
+            masks = masks @ colors / (s + eps)  # (h,w,n) @ (n,3) = (h,w,3)
+            self.im[:] = masks * alpha + self.im * (1 - s * alpha)
 
     def rectangle(self, xy, fill=None, outline=None, width=1):
         # Add rectangle to image (PIL-only)

From 2378091a4775413d29485131a4690a59e6b3acc3 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 3 Sep 2022 00:10:42 +0200
Subject: [PATCH 219/247] Rename crop() to crop_mask()

---
 utils/segment/general.py | 6 +++---
 utils/segment/loss.py    | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/utils/segment/general.py b/utils/segment/general.py
index 9e68e45e8dcc..36547ed0889c 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -3,7 +3,7 @@
 import torch.nn.functional as F
 
 
-def crop(masks, boxes):
+def crop_mask(masks, boxes):
     """
     "Crop" predicted masks by zeroing out everything not in the predicted bbox.
     Vectorized by Chong (thanks Chong).
@@ -35,7 +35,7 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
     c, mh, mw = protos.shape  # CHW
     masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
-    masks = crop(masks, bboxes)  # CHW
+    masks = crop_mask(masks, bboxes)  # CHW
     return masks.gt_(0.5)
 
 
@@ -60,7 +60,7 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
     downsampled_bboxes[:, 3] *= mh / ih
     downsampled_bboxes[:, 1] *= mh / ih
 
-    masks = crop(masks, downsampled_bboxes)  # CHW
+    masks = crop_mask(masks, downsampled_bboxes)  # CHW
     if upsample:
         masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
     return masks.gt_(0.5)
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index fa1043488fd8..955faf3a36b4 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -6,7 +6,7 @@
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import de_parallel
-from .general import crop
+from .general import crop_mask
 
 
 class ComputeLoss:
@@ -113,7 +113,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
         # Mask loss for one image
         pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
         loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
-        return (crop(loss, xyxy).mean(dim=(1, 2)) / area).mean()
+        return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)

From 27c5563d66d5d9024fdd37df01628b6c8222a25f Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 3 Sep 2022 00:12:09 +0200
Subject: [PATCH 220/247] Do not crop in predict.py

---
 segment/predict.py       | 2 +-
 utils/segment/general.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index fa68d4af6574..37a780f6cde2 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -146,7 +146,7 @@ def run(
             imc = im0.copy() if save_crop else im0  # for save_crop
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
-                masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
+                masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], crop=False, upsample=True)  # HWC
 
                 # Rescale boxes from img_size to im0 size
                 det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
diff --git a/utils/segment/general.py b/utils/segment/general.py
index 36547ed0889c..f37f13847cac 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -39,7 +39,7 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
     return masks.gt_(0.5)
 
 
-def process_mask(protos, masks_in, bboxes, shape, upsample=False):
+def process_mask(protos, masks_in, bboxes, shape, crop=True, upsample=False):
     """
     Crop before upsample.
     proto_out: [mask_dim, mask_h, mask_w]
@@ -60,7 +60,8 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
     downsampled_bboxes[:, 3] *= mh / ih
     downsampled_bboxes[:, 1] *= mh / ih
 
-    masks = crop_mask(masks, downsampled_bboxes)  # CHW
+    if crop:
+        masks = crop_mask(masks, downsampled_bboxes)  # CHW
     if upsample:
         masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
     return masks.gt_(0.5)

From d1e49e469b6f0c7c1b6d3153bac1cef79d77178a Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 3 Sep 2022 00:15:46 +0200
Subject: [PATCH 221/247] crop always

---
 segment/predict.py       | 2 +-
 utils/segment/general.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 37a780f6cde2..fa68d4af6574 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -146,7 +146,7 @@ def run(
             imc = im0.copy() if save_crop else im0  # for save_crop
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
-                masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], crop=False, upsample=True)  # HWC
+                masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
 
                 # Rescale boxes from img_size to im0 size
                 det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
diff --git a/utils/segment/general.py b/utils/segment/general.py
index f37f13847cac..36547ed0889c 100644
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@@ -39,7 +39,7 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
     return masks.gt_(0.5)
 
 
-def process_mask(protos, masks_in, bboxes, shape, crop=True, upsample=False):
+def process_mask(protos, masks_in, bboxes, shape, upsample=False):
     """
     Crop before upsample.
     proto_out: [mask_dim, mask_h, mask_w]
@@ -60,8 +60,7 @@ def process_mask(protos, masks_in, bboxes, shape, crop=True, upsample=False):
     downsampled_bboxes[:, 3] *= mh / ih
     downsampled_bboxes[:, 1] *= mh / ih
 
-    if crop:
-        masks = crop_mask(masks, downsampled_bboxes)  # CHW
+    masks = crop_mask(masks, downsampled_bboxes)  # CHW
     if upsample:
         masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
     return masks.gt_(0.5)

From b1357c7fc34d2f52ecbded9c28aae98d64a44b8a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 4 Sep 2022 11:34:59 +0000
Subject: [PATCH 222/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 models/tf.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/models/tf.py b/models/tf.py
index 425ee33abb8f..8cce147059d3 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -319,6 +319,7 @@ def _make_grid(nx=20, ny=20):
         xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
         return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
 
+
 class TFSegment(TFDetect):
     # YOLOv5 Segment head for segmentation models
     def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
@@ -335,17 +336,20 @@ def call(self, x):
         x = self.detect(self, x)
         return (x, p) if self.training else ((x[0], p),)
 
+
 class TFProto(keras.layers.Layer):
+
     def __init__(self, c1, c_=256, c2=32, w=None):
         super().__init__()
         self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
         self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
         self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
         self.cv3 = TFConv(c_, c2, w=w.cv3)
-    
+
     def call(self, inputs):
         return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
 
+
 class TFUpsample(keras.layers.Layer):
     # TF version of torch.nn.Upsample()
     def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'

From f34346f137f50eb4d44c2c8f0b7c2e2dc64b568f Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 16:41:37 +0200
Subject: [PATCH 223/247] Merge master

---
 segment/val.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index 1ab33f7a2194..faa3f03b3659 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -252,7 +252,7 @@ def run(
 
         # Inference
         with dt[1]:
-            out, train_out = model(im)  # if training else model(im, augment=augment, val=True)  # inference, loss
+            out, train_out = model(im) if compute_loss else (model(im, augment=augment), None)
 
         # Loss
         if compute_loss:

From 82deb52cd7b59867a772ff2bacc9ee842bce67a6 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 17:19:19 +0200
Subject: [PATCH 224/247] Add vid-stride from master PR

---
 segment/predict.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index fa68d4af6574..8e4ebbd20028 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -76,6 +76,7 @@ def run(
         hide_conf=False,  # hide confidences
         half=False,  # use FP16 half-precision inference
         dnn=False,  # use OpenCV DNN for ONNX inference
+        vid_stride=1,  # video frame-rate stride
 ):
     source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images
@@ -98,10 +99,10 @@ def run(
     # Dataloader
     if webcam:
         view_img = check_imshow()
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
         bs = len(dataset)  # batch_size
     else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
         bs = 1  # batch_size
     vid_path, vid_writer = [None] * bs, [None] * bs
 
@@ -245,6 +246,7 @@ def parse_opt():
     parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
     parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
+    parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
     opt = parser.parse_args()
     opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
     print_args(vars(opt))

From 996a3e460b8521e53cad5276b7046813498311bd Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 17:42:35 +0200
Subject: [PATCH 225/247] Update seg model outputs

---
 models/yolo.py     |  2 +-
 segment/predict.py |  3 +--
 segment/val.py     | 25 ++++++++++++-------------
 val.py             | 20 ++++++++++----------
 4 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/models/yolo.py b/models/yolo.py
index 2d32226a6ba6..d59034bd4041 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -103,7 +103,7 @@ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
     def forward(self, x):
         p = self.proto(x[0])
         x = self.detect(self, x)
-        return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p))
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
 
 
 class BaseModel(nn.Module):
diff --git a/segment/predict.py b/segment/predict.py
index 8e4ebbd20028..7761f036a714 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -120,8 +120,7 @@ def run(
         # Inference
         with dt[1]:
             visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-            pred, out = model(im, augment=augment, visualize=visualize)
-            proto = out[1]
+            pred, proto = model(im, augment=augment, visualize=visualize)[:2]
 
         # NMS
         with dt[2]:
diff --git a/segment/val.py b/segment/val.py
index faa3f03b3659..8576ef490033 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -252,7 +252,7 @@ def run(
 
         # Inference
         with dt[1]:
-            out, train_out = model(im) if compute_loss else (model(im, augment=augment), None)
+            preds, protos, train_out = model(im) if compute_loss else (*model(im, augment=augment)[:2], None)
 
         # Loss
         if compute_loss:
@@ -262,18 +262,18 @@ def run(
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         with dt[2]:
-            out = non_max_suppression(out,
-                                      conf_thres,
-                                      iou_thres,
-                                      labels=lb,
-                                      multi_label=True,
-                                      agnostic=single_cls,
-                                      max_det=max_det,
-                                      nm=nm)
+            preds = non_max_suppression(preds,
+                                        conf_thres,
+                                        iou_thres,
+                                        labels=lb,
+                                        multi_label=True,
+                                        agnostic=single_cls,
+                                        max_det=max_det,
+                                        nm=nm)
 
         # Metrics
         plot_masks = []  # masks for plotting
-        for si, pred in enumerate(out):
+        for si, (pred, proto) in enumerate(zip(preds, protos)):
             labels = targets[targets[:, 0] == si, 1:]
             nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
             path, shape = Path(paths[si]), shapes[si][0]
@@ -291,8 +291,7 @@ def run(
             # Masks
             midx = [si] if overlap else targets[:, 0] == si
             gt_masks = masks[midx]
-            proto_out = train_out[1][si]
-            pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
+            pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
 
             # Predictions
             if single_cls:
@@ -329,7 +328,7 @@ def run(
             if len(plot_masks):
                 plot_masks = torch.cat(plot_masks, dim=0)
             plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)
-            plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths,
+            plot_images_and_masks(im, output_to_target(preds, max_det=15), plot_masks, paths,
                                   save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
         # callbacks.run('on_val_batch_end')
diff --git a/val.py b/val.py
index 32776acb261f..9dee4734214c 100644
--- a/val.py
+++ b/val.py
@@ -205,7 +205,7 @@ def run(
 
         # Inference
         with dt[1]:
-            out, train_out = model(im) if compute_loss else (model(im, augment=augment), None)
+            preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None)
 
         # Loss
         if compute_loss:
@@ -215,16 +215,16 @@ def run(
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         with dt[2]:
-            out = non_max_suppression(out,
-                                      conf_thres,
-                                      iou_thres,
-                                      labels=lb,
-                                      multi_label=True,
-                                      agnostic=single_cls,
-                                      max_det=max_det)
+            preds = non_max_suppression(preds,
+                                        conf_thres,
+                                        iou_thres,
+                                        labels=lb,
+                                        multi_label=True,
+                                        agnostic=single_cls,
+                                        max_det=max_det)
 
         # Metrics
-        for si, pred in enumerate(out):
+        for si, pred in enumerate(preds):
             labels = targets[targets[:, 0] == si, 1:]
             nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
             path, shape = Path(paths[si]), shapes[si][0]
@@ -264,7 +264,7 @@ def run(
         # Plot images
         if plots and batch_i < 3:
             plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels
-            plot_images(im, output_to_target(out), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
+            plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
         callbacks.run('on_val_batch_end')
 

From a014646144bc1656e63bb4e9b9d2da41cdb45636 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 17:52:21 +0200
Subject: [PATCH 226/247] Update seg model outputs

---
 segment/val.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/val.py b/segment/val.py
index 8576ef490033..d2250cee9ca7 100644
--- a/segment/val.py
+++ b/segment/val.py
@@ -256,7 +256,7 @@ def run(
 
         # Loss
         if compute_loss:
-            loss += compute_loss(train_out, targets, masks)[1]  # box, obj, cls
+            loss += compute_loss((train_out, protos), targets, masks)[1]  # box, obj, cls
 
         # NMS
         targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels

From 04eb59097c68a2b49dc748a91cac0c68044b63c8 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 18:26:50 +0200
Subject: [PATCH 227/247] Add segmentation benchmarks

---
 .github/workflows/ci-testing.yml |  4 ++--
 utils/benchmarks.py              | 18 +++++++++++++-----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 540df6088efa..98cf4aeea990 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -18,7 +18,7 @@ jobs:
       matrix:
         os: [ ubuntu-latest ]
         python-version: [ '3.9' ]  # requires python<=3.9
-        model: [ yolov5n ]
+        model: [ yolov5n, yolov5n-seg ]
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
@@ -39,7 +39,7 @@ jobs:
           pip list
       - name: Run benchmarks
         run: |
-          python utils/benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29
+          python utils/benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10
 
   Tests:
     timeout-minutes: 60
diff --git a/utils/benchmarks.py b/utils/benchmarks.py
index d5f4c1d61fbe..bec0da5ce4b9 100644
--- a/utils/benchmarks.py
+++ b/utils/benchmarks.py
@@ -40,10 +40,13 @@
 # ROOT = ROOT.relative_to(Path.cwd())  # relative
 
 import export
-import val
+from val import run as val_det
+from segment.val import run as val_seg
 from utils import notebook_init
 from utils.general import LOGGER, check_yaml, file_size, print_args
 from utils.torch_utils import select_device
+from models.experimental import attempt_load
+from models.yolo import SegmentationModel
 
 
 def run(
@@ -59,6 +62,7 @@ def run(
 ):
     y, t = [], time.time()
     device = select_device(device)
+    model_type = type(attempt_load(weights, fuse=False))  # DetectionModel, SegmentationModel, etc.
     for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows():  # index, (name, file, suffix, CPU, GPU)
         try:
             assert i not in (9, 10), 'inference not supported'  # Edge TPU and TF.js are unsupported
@@ -76,10 +80,14 @@ def run(
             assert suffix in str(w), 'export failed'
 
             # Validate
-            result = val.run(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half)
-            metrics = result[0]  # metrics (mp, mr, map50, map, *losses(box, obj, cls))
-            speeds = result[2]  # times (preprocess, inference, postprocess)
-            y.append([name, round(file_size(w), 1), round(metrics[3], 4), round(speeds[1], 2)])  # MB, mAP, t_inference
+            if model_type == SegmentationModel:
+                result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half)
+                metric = result[0][7]  # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls))
+            else:  # DetectionModel:
+                result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half)
+                metric = result[0][3]  # (p, r, map50, map, *loss(box, obj, cls))
+            speed = result[2][1]  # times (preprocess, inference, postprocess)
+            y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)])  # MB, mAP, t_inference
         except Exception as e:
             if hard_fail:
                 assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}'

From 11d27a7900a599d7ffebf7a29f07ff6954aa64b5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 4 Sep 2022 16:27:19 +0000
Subject: [PATCH 228/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/benchmarks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/benchmarks.py b/utils/benchmarks.py
index bec0da5ce4b9..4301a8d9aa41 100644
--- a/utils/benchmarks.py
+++ b/utils/benchmarks.py
@@ -40,13 +40,13 @@
 # ROOT = ROOT.relative_to(Path.cwd())  # relative
 
 import export
-from val import run as val_det
+from models.experimental import attempt_load
+from models.yolo import SegmentationModel
 from segment.val import run as val_seg
 from utils import notebook_init
 from utils.general import LOGGER, check_yaml, file_size, print_args
 from utils.torch_utils import select_device
-from models.experimental import attempt_load
-from models.yolo import SegmentationModel
+from val import run as val_det
 
 
 def run(

From 4016d72807d729228c269bbca2b072753aed899b Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 19:15:21 +0200
Subject: [PATCH 229/247] Add segmentation benchmarks

---
 .github/workflows/ci-testing.yml     | 3 ++-
 utils/benchmarks.py => benchmarks.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)
 rename utils/benchmarks.py => benchmarks.py (99%)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 98cf4aeea990..7fa3a467a1c9 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -15,6 +15,7 @@ jobs:
   Benchmarks:
     runs-on: ${{ matrix.os }}
     strategy:
+      fail-fast: false
       matrix:
         os: [ ubuntu-latest ]
         python-version: [ '3.9' ]  # requires python<=3.9
@@ -39,7 +40,7 @@ jobs:
           pip list
       - name: Run benchmarks
         run: |
-          python utils/benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10
+          python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10
 
   Tests:
     timeout-minutes: 60
diff --git a/utils/benchmarks.py b/benchmarks.py
similarity index 99%
rename from utils/benchmarks.py
rename to benchmarks.py
index bec0da5ce4b9..54574eb73f2b 100644
--- a/utils/benchmarks.py
+++ b/benchmarks.py
@@ -34,7 +34,7 @@
 import pandas as pd
 
 FILE = Path(__file__).resolve()
-ROOT = FILE.parents[1]  # YOLOv5 root directory
+ROOT = FILE.parents[0]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))  # add ROOT to PATH
 # ROOT = ROOT.relative_to(Path.cwd())  # relative

From e9ab8512fb04427c2f764040dc5fecf4124775ca Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 19:28:09 +0200
Subject: [PATCH 230/247] Add segmentation benchmarks

---
 .github/workflows/ci-testing.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 7fa3a467a1c9..45da7cb3769d 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -19,7 +19,7 @@ jobs:
       matrix:
         os: [ ubuntu-latest ]
         python-version: [ '3.9' ]  # requires python<=3.9
-        model: [ yolov5n, yolov5n-seg ]
+        model: [ yolov5n ]
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
@@ -38,9 +38,12 @@ jobs:
           python --version
           pip --version
           pip list
-      - name: Run benchmarks
+      - name: Benchmark DetectionModel
+        run: |
+          python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29
+      - name: Benchmark SegmentationModel
         run: |
-          python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10
+          python benchmarks.py --weights ${{ matrix.model }}-seg.pt --img 320
 
   Tests:
     timeout-minutes: 60

From 5a1abb2510b4e32f05a2938301085b6973cd94da Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 19:39:29 +0200
Subject: [PATCH 231/247] Add segmentation benchmarks

---
 .github/workflows/ci-testing.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 45da7cb3769d..63b5f8276176 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -40,10 +40,10 @@ jobs:
           pip list
       - name: Benchmark DetectionModel
         run: |
-          python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29
+          python benchmarks.py --data coco128.yaml --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29
       - name: Benchmark SegmentationModel
         run: |
-          python benchmarks.py --weights ${{ matrix.model }}-seg.pt --img 320
+          python benchmarks.py --data coco128-seg.yaml --weights ${{ matrix.model }}-seg.pt --img 320
 
   Tests:
     timeout-minutes: 60

From 29c03dac5bc1199ce64ac28f8bad56a79b57b00f Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sun, 4 Sep 2022 19:49:50 +0200
Subject: [PATCH 232/247] Fix DetectMultiBackend for OpenVINO

---
 models/common.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/models/common.py b/models/common.py
index bbdda5ceb6cc..746762004403 100644
--- a/models/common.py
+++ b/models/common.py
@@ -373,7 +373,6 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False,
             if batch_dim.is_static:
                 batch_size = batch_dim.get_length()
             executable_network = ie.compile_model(network, device_name="CPU")  # device_name="MYRIAD" for Intel NCS2
-            output_layer = next(iter(executable_network.outputs))
             stride, names = self._load_metadata(Path(w).with_suffix('.yaml'))  # load metadata
         elif engine:  # TensorRT
             LOGGER.info(f'Loading {w} for TensorRT inference...')
@@ -477,7 +476,7 @@ def forward(self, im, augment=False, visualize=False):
             y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
         elif self.xml:  # OpenVINO
             im = im.cpu().numpy()  # FP32
-            y = self.executable_network([im])[self.output_layer]
+            y = list(self.executable_network([im]).values())
         elif self.engine:  # TensorRT
             if self.dynamic and im.shape != self.bindings['images'].shape:
                 i_in, i_out = (self.model.get_binding_index(x) for x in ('images', 'output'))

From 3b63e8b79384351e449665dfd9ad3507308071e4 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 5 Sep 2022 14:49:41 +0800
Subject: [PATCH 233/247] update Annotator.masks

---
 segment/predict.py     | 22 ++++++++++++------
 utils/plots.py         | 50 +++++++++++++++++++++++++++++++++++------
 utils/segment/plots.py | 51 +++++++++++++++++-------------------------
 3 files changed, 78 insertions(+), 45 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 7761f036a714..b17ba17daf94 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -77,6 +77,7 @@ def run(
         half=False,  # use FP16 half-precision inference
         dnn=False,  # use OpenCV DNN for ONNX inference
         vid_stride=1,  # video frame-rate stride
+        retina_masks=False,
 ):
     source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images
@@ -144,7 +145,7 @@ def run(
             s += '%gx%g ' % im.shape[2:]  # print string
             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
             imc = im0.copy() if save_crop else im0  # for save_crop
-            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names), pil=True)
             if len(det):
                 masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
 
@@ -157,8 +158,13 @@ def run(
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # Mask plotting
-                im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]])  # shape(imh,imw,3)
-                annotator.im = scale_image(im.shape[2:], im_masks, im0.shape)  # scale to original h, w
+                import time
+                tms = time.time()
+                annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i], retina_masks=retina_masks)
+                tme = time.time()
+                print("plot mask:", tme - tms)
+                # im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]])  # shape(imh,imw,3)
+                # annotator.im = scale_image(im.shape[2:], im_masks, im0.shape)  # scale to original h, w
 
                 # Write results
                 for *xyxy, conf, cls in reversed(det[:, :6]):
@@ -183,7 +189,8 @@ def run(
                     cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
                     cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
                 cv2.imshow(str(p), im0)
-                cv2.waitKey(1)  # 1 millisecond
+                if cv2.waitKey(1) == ord('q'): # 1 millisecond
+                    exit()
 
             # Save results (image with detections)
             if save_img:
@@ -205,7 +212,7 @@ def run(
                     vid_writer[i].write(im0)
 
         # Print time (inference-only)
-        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
+        # LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
 
     # Print results
     t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
@@ -219,8 +226,8 @@ def run(
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)')
-    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
+    parser.add_argument('--weights', nargs='+', type=str, default='../weights/yolov5n-seg.pt', help='model path(s)')
+    parser.add_argument('--source', type=str, default='/home/laughing/Downloads/MOT17-03-FRCNN-raw.mp4', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
@@ -246,6 +253,7 @@ def parse_opt():
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
     parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
+    parser.add_argument('--retina-masks', default=True, action='store_true', help='whether to plot masks in native resolution')
     opt = parser.parse_args()
     opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
     print_args(vars(opt))
diff --git a/utils/plots.py b/utils/plots.py
index 5948827f4f41..0842688922ec 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -22,6 +22,7 @@
 from utils import TryExcept, threaded
 from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path,
                            is_ascii, xywh2xyxy, xyxy2xywh)
+from utils.segment.general import scale_image
 from utils.metrics import fitness
 
 # Settings
@@ -113,14 +114,49 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2
                             thickness=tf,
                             lineType=cv2.LINE_AA)
 
-    def masks(self, masks, colors, alpha=0.5, eps=1e-7):
-        # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
-        if len(masks):
-            masks = masks.astype(np.float32) / 255.0  # shape(h,w,n)
-            colors = np.array(colors, dtype=np.uint8)  # shape(n,3)
-            s = masks.sum(2, keepdims=True)
-            masks = masks @ colors / (s + eps)  # (h,w,n) @ (n,3) = (h,w,3)
+    def masks(self, masks, colors, img_gpu, retina_masks=False, alpha=0.5):
+        """Plot masks at once.
+        Args:
+            masks (tensor): predicted masks on cuda, shape: [n, h, w]
+            colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
+            img_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
+            retina_masks (bool): whether to plot masks in native resolution.
+        """
+        if self.pil:
+            # convert to numpy first
+            self.im = np.asarray(self.im).copy()
+        if retina_masks:
+            # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
+            if len(masks) == 0:
+                return
+            masks = torch.as_tensor(masks, dtype=torch.uint8)
+            masks = masks.permute(1, 2, 0).contiguous()
+            masks = masks.cpu().numpy()
+            masks = scale_image(img_gpu.shape[1:], masks, self.im.shape)
+            masks = np.asarray(masks, dtype=np.float32)
+            colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)
+            s = masks.sum(2, keepdims=True).clip(0, 1)   # add all masks together
+            masks = (masks @ colors).clip(0, 255)   # (h,w,n) @ (n,3) = (h,w,3)
             self.im[:] = masks * alpha + self.im * (1 - s * alpha)
+        else:
+            if len(masks) == 0:
+                self.im[:] = img_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
+            colors = torch.tensor(colors, device=img_gpu.device, dtype=torch.float32) / 255.0
+            colors = colors[:, None, None]  # shape(n,1,1,3)
+            masks = masks.unsqueeze(3)  # shape(n,h,w,1)
+            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
+
+            inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
+            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)
+
+            img_gpu = img_gpu.flip(dims=[0])  # flip channel
+            img_gpu = img_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)
+            img_gpu = img_gpu * inv_alph_masks[-1] + mcs
+            im_mask = (img_gpu * 255).byte().cpu().numpy()
+            self.im[:] = scale_image(img_gpu.shape, im_mask, self.im.shape)
+        if self.pil:
+            # convert im back to PIL and update draw
+            self.fromarray(self.im)
 
     def rectangle(self, xy, fill=None, outline=None, width=1):
         # Add rectangle to image (PIL-only)
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index d3fddf26e22a..21cbdfe1e6de 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -13,33 +13,6 @@
 from ..plots import Annotator, colors
 
 
-def plot_masks(im, masks, colors, alpha=0.5):
-    """
-    Args:
-        im (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
-        masks (tensor): predicted masks on cuda, shape: [n, h, w]
-        colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
-    Return:
-        ndarray: img after draw masks, shape: [h, w, 3]
-
-    """
-    if len(masks) == 0:
-        return im.permute(1, 2, 0).contiguous().cpu().numpy() * 255
-
-    colors = torch.tensor(colors, device=im.device).float() / 255.0
-    colors = colors[:, None, None]  # shape(n,1,1,3)
-    masks = masks.unsqueeze(3)  # shape(n,h,w,1)
-    masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
-
-    inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
-    mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)
-
-    im = im.flip(dims=[0])  # flip channel
-    im = im.permute(1, 2, 0).contiguous()  # shape(h,w,3)
-    im = im * inv_alph_masks[-1] + mcs
-    return (im * 255).byte().cpu().numpy()
-
-
 @threaded
 def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
     # Plot image grid with labels
@@ -119,7 +92,9 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     image_masks = masks[idx]
 
                 im = np.asarray(annotator.im).copy()
-                for j, box in enumerate(boxes.T.tolist()):
+                resized_masks = []
+                masks_colors = []
+                for j in range(len(boxes)):
                     if labels or conf[j] > 0.25:  # 0.25 conf thresh
                         color = colors(classes[j])
                         mh, mw = image_masks[j].shape
@@ -129,9 +104,23 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                             mask = mask.astype(np.bool)
                         else:
                             mask = image_masks[j].astype(np.bool)
-                        with contextlib.suppress(Exception):
-                            im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
-                annotator.fromarray(im)
+                        resized_masks.append(mask)
+                        masks_colors.append(color)
+                annotator.masks(resized_masks, colors, images[0], retina_masks=True)
+                #
+                # for j, box in enumerate(boxes.T.tolist()):
+                #     if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                #         color = colors(classes[j])
+                #         mh, mw = image_masks[j].shape
+                #         if mh != h or mw != w:
+                #             mask = image_masks[j].astype(np.uint8)
+                #             mask = cv2.resize(mask, (w, h))
+                #             mask = mask.astype(np.bool)
+                #         else:
+                #             mask = image_masks[j].astype(np.bool)
+                #         with contextlib.suppress(Exception):
+                #             im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
+                # annotator.fromarray(im)
     annotator.im.save(fname)  # save
 
 

From 69e59936de0ef4cf461bdc3207d832c33224eefc Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 5 Sep 2022 15:17:59 +0800
Subject: [PATCH 234/247] fix val plot

---
 segment/predict.py     |  2 +-
 utils/plots.py         | 13 +++++++------
 utils/segment/plots.py | 15 +++++++--------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index b17ba17daf94..fb5c578d3f86 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -160,7 +160,7 @@ def run(
                 # Mask plotting
                 import time
                 tms = time.time()
-                annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i], retina_masks=retina_masks)
+                annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i] if retina_masks else None)
                 tme = time.time()
                 print("plot mask:", tme - tms)
                 # im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]])  # shape(imh,imw,3)
diff --git a/utils/plots.py b/utils/plots.py
index 0842688922ec..681db44aede7 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -114,7 +114,7 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2
                             thickness=tf,
                             lineType=cv2.LINE_AA)
 
-    def masks(self, masks, colors, img_gpu, retina_masks=False, alpha=0.5):
+    def masks(self, masks, colors, img_gpu=None, alpha=0.5):
         """Plot masks at once.
         Args:
             masks (tensor): predicted masks on cuda, shape: [n, h, w]
@@ -125,14 +125,15 @@ def masks(self, masks, colors, img_gpu, retina_masks=False, alpha=0.5):
         if self.pil:
             # convert to numpy first
             self.im = np.asarray(self.im).copy()
-        if retina_masks:
+        if img_gpu is None:
             # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
             if len(masks) == 0:
                 return
-            masks = torch.as_tensor(masks, dtype=torch.uint8)
-            masks = masks.permute(1, 2, 0).contiguous()
-            masks = masks.cpu().numpy()
-            masks = scale_image(img_gpu.shape[1:], masks, self.im.shape)
+            if isinstance(masks, torch.Tensor):
+                masks = torch.as_tensor(masks, dtype=torch.uint8)
+                masks = masks.cpu().numpy()
+            masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
+            masks = scale_image(masks.shape[1:], masks, self.im.shape)
             masks = np.asarray(masks, dtype=np.float32)
             colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)
             s = masks.sum(2, keepdims=True).clip(0, 1)   # add all masks together
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 21cbdfe1e6de..79d3c812c954 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -94,20 +94,19 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                 im = np.asarray(annotator.im).copy()
                 resized_masks = []
                 masks_colors = []
-                for j in range(len(boxes)):
+                for j in range(len(boxes.T)):
                     if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                        color = colors(classes[j])
+                        color = np.array(colors(classes[j]))
                         mh, mw = image_masks[j].shape
+                        mask = image_masks[j].astype(np.uint8)
                         if mh != h or mw != w:
-                            mask = image_masks[j].astype(np.uint8)
                             mask = cv2.resize(mask, (w, h))
-                            mask = mask.astype(np.bool)
-                        else:
-                            mask = image_masks[j].astype(np.bool)
                         resized_masks.append(mask)
                         masks_colors.append(color)
-                annotator.masks(resized_masks, colors, images[0], retina_masks=True)
-                #
+                if len(resized_masks):
+                    resized_masks = np.stack(resized_masks, axis=0)
+                    annotator.masks(resized_masks, masks_colors)
+
                 # for j, box in enumerate(boxes.T.tolist()):
                 #     if labels or conf[j] > 0.25:  # 0.25 conf thresh
                 #         color = colors(classes[j])

From 6d0e952ab3afaa0e552f82a3e7f6bc78ddbeefba Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 5 Sep 2022 15:18:25 +0800
Subject: [PATCH 235/247] revert val plot

---
 utils/segment/plots.py | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
index 79d3c812c954..e882c14390f0 100644
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@@ -92,34 +92,19 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg'
                     image_masks = masks[idx]
 
                 im = np.asarray(annotator.im).copy()
-                resized_masks = []
-                masks_colors = []
-                for j in range(len(boxes.T)):
+                for j, box in enumerate(boxes.T.tolist()):
                     if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                        color = np.array(colors(classes[j]))
+                        color = colors(classes[j])
                         mh, mw = image_masks[j].shape
-                        mask = image_masks[j].astype(np.uint8)
                         if mh != h or mw != w:
+                            mask = image_masks[j].astype(np.uint8)
                             mask = cv2.resize(mask, (w, h))
-                        resized_masks.append(mask)
-                        masks_colors.append(color)
-                if len(resized_masks):
-                    resized_masks = np.stack(resized_masks, axis=0)
-                    annotator.masks(resized_masks, masks_colors)
-
-                # for j, box in enumerate(boxes.T.tolist()):
-                #     if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                #         color = colors(classes[j])
-                #         mh, mw = image_masks[j].shape
-                #         if mh != h or mw != w:
-                #             mask = image_masks[j].astype(np.uint8)
-                #             mask = cv2.resize(mask, (w, h))
-                #             mask = mask.astype(np.bool)
-                #         else:
-                #             mask = image_masks[j].astype(np.bool)
-                #         with contextlib.suppress(Exception):
-                #             im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
-                # annotator.fromarray(im)
+                            mask = mask.astype(np.bool)
+                        else:
+                            mask = image_masks[j].astype(np.bool)
+                        with contextlib.suppress(Exception):
+                            im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
+                annotator.fromarray(im)
     annotator.im.save(fname)  # save
 
 

From 71780b2a61279570d97d7a5d78ed04c9aa529310 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 5 Sep 2022 15:32:03 +0800
Subject: [PATCH 236/247] clean up

---
 segment/predict.py | 17 +++++------------
 utils/plots.py     |  5 +++--
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index fb5c578d3f86..8c07e6747d7f 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -43,8 +43,7 @@
 from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                            increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
-from utils.segment.general import process_mask, scale_image
-from utils.segment.plots import plot_masks
+from utils.segment.general import process_mask
 from utils.torch_utils import select_device, smart_inference_mode
 
 
@@ -158,13 +157,7 @@ def run(
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # Mask plotting
-                import time
-                tms = time.time()
-                annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i] if retina_masks else None)
-                tme = time.time()
-                print("plot mask:", tme - tms)
-                # im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]])  # shape(imh,imw,3)
-                # annotator.im = scale_image(im.shape[2:], im_masks, im0.shape)  # scale to original h, w
+                annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=None if retina_masks else im[i])
 
                 # Write results
                 for *xyxy, conf, cls in reversed(det[:, :6]):
@@ -226,8 +219,8 @@ def run(
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default='../weights/yolov5n-seg.pt', help='model path(s)')
-    parser.add_argument('--source', type=str, default='/home/laughing/Downloads/MOT17-03-FRCNN-raw.mp4', help='file/dir/URL/glob, 0 for webcam')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)')
+    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
@@ -253,7 +246,7 @@ def parse_opt():
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
     parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
-    parser.add_argument('--retina-masks', default=True, action='store_true', help='whether to plot masks in native resolution')
+    parser.add_argument('--retina-masks', action='store_true', help='whether to plot masks in native resolution')
     opt = parser.parse_args()
     opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
     print_args(vars(opt))
diff --git a/utils/plots.py b/utils/plots.py
index 681db44aede7..acbd6c5c3ca2 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -131,9 +131,10 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5):
                 return
             if isinstance(masks, torch.Tensor):
                 masks = torch.as_tensor(masks, dtype=torch.uint8)
+                masks = masks.permute(1, 2, 0).contiguous()
                 masks = masks.cpu().numpy()
-            masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
-            masks = scale_image(masks.shape[1:], masks, self.im.shape)
+            # masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
+            masks = scale_image(masks.shape[:2], masks, self.im.shape)
             masks = np.asarray(masks, dtype=np.float32)
             colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)
             s = masks.sum(2, keepdims=True).clip(0, 1)   # add all masks together

From d53c8256b51780841dfaf20155601d98c5789ae7 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 5 Sep 2022 15:36:49 +0800
Subject: [PATCH 237/247] revert pil

---
 segment/predict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/predict.py b/segment/predict.py
index 8c07e6747d7f..a55f99b7093c 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -144,7 +144,7 @@ def run(
             s += '%gx%g ' % im.shape[2:]  # print string
             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
             imc = im0.copy() if save_crop else im0  # for save_crop
-            annotator = Annotator(im0, line_width=line_thickness, example=str(names), pil=True)
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
 

From 78a42d2684cc2fc168ed01dcfbcec61d6c2bcb7b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 5 Sep 2022 08:19:13 +0000
Subject: [PATCH 238/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/predict.py | 62 ++++++++++++++++++++++++----------------------
 utils/plots.py     |  6 ++---
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index a55f99b7093c..fe1bf8d80af7 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -49,34 +49,34 @@
 
 @smart_inference_mode()
 def run(
-        weights=ROOT / 'yolov5s-seg.pt',  # model.pt path(s)
-        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
-        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
-        imgsz=(640, 640),  # inference size (height, width)
-        conf_thres=0.25,  # confidence threshold
-        iou_thres=0.45,  # NMS IOU threshold
-        max_det=1000,  # maximum detections per image
-        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        view_img=False,  # show results
-        save_txt=False,  # save results to *.txt
-        save_conf=False,  # save confidences in --save-txt labels
-        save_crop=False,  # save cropped prediction boxes
-        nosave=False,  # do not save images/videos
-        classes=None,  # filter by class: --class 0, or --class 0 2 3
-        agnostic_nms=False,  # class-agnostic NMS
-        augment=False,  # augmented inference
-        visualize=False,  # visualize features
-        update=False,  # update all models
-        project=ROOT / 'runs/predict-seg',  # save results to project/name
-        name='exp',  # save results to project/name
-        exist_ok=False,  # existing project/name ok, do not increment
-        line_thickness=3,  # bounding box thickness (pixels)
-        hide_labels=False,  # hide labels
-        hide_conf=False,  # hide confidences
-        half=False,  # use FP16 half-precision inference
-        dnn=False,  # use OpenCV DNN for ONNX inference
-        vid_stride=1,  # video frame-rate stride
-        retina_masks=False,
+    weights=ROOT / 'yolov5s-seg.pt',  # model.pt path(s)
+    source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
+    data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
+    imgsz=(640, 640),  # inference size (height, width)
+    conf_thres=0.25,  # confidence threshold
+    iou_thres=0.45,  # NMS IOU threshold
+    max_det=1000,  # maximum detections per image
+    device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    view_img=False,  # show results
+    save_txt=False,  # save results to *.txt
+    save_conf=False,  # save confidences in --save-txt labels
+    save_crop=False,  # save cropped prediction boxes
+    nosave=False,  # do not save images/videos
+    classes=None,  # filter by class: --class 0, or --class 0 2 3
+    agnostic_nms=False,  # class-agnostic NMS
+    augment=False,  # augmented inference
+    visualize=False,  # visualize features
+    update=False,  # update all models
+    project=ROOT / 'runs/predict-seg',  # save results to project/name
+    name='exp',  # save results to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    line_thickness=3,  # bounding box thickness (pixels)
+    hide_labels=False,  # hide labels
+    hide_conf=False,  # hide confidences
+    half=False,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    vid_stride=1,  # video frame-rate stride
+    retina_masks=False,
 ):
     source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images
@@ -157,7 +157,9 @@ def run(
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # Mask plotting
-                annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=None if retina_masks else im[i])
+                annotator.masks(masks,
+                                colors=[colors(x, True) for x in det[:, 5]],
+                                img_gpu=None if retina_masks else im[i])
 
                 # Write results
                 for *xyxy, conf, cls in reversed(det[:, :6]):
@@ -182,7 +184,7 @@ def run(
                     cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
                     cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
                 cv2.imshow(str(p), im0)
-                if cv2.waitKey(1) == ord('q'): # 1 millisecond
+                if cv2.waitKey(1) == ord('q'):  # 1 millisecond
                     exit()
 
             # Save results (image with detections)
diff --git a/utils/plots.py b/utils/plots.py
index acbd6c5c3ca2..c1298e9cc53a 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -22,8 +22,8 @@
 from utils import TryExcept, threaded
 from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path,
                            is_ascii, xywh2xyxy, xyxy2xywh)
-from utils.segment.general import scale_image
 from utils.metrics import fitness
+from utils.segment.general import scale_image
 
 # Settings
 RANK = int(os.getenv('RANK', -1))
@@ -137,8 +137,8 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5):
             masks = scale_image(masks.shape[:2], masks, self.im.shape)
             masks = np.asarray(masks, dtype=np.float32)
             colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)
-            s = masks.sum(2, keepdims=True).clip(0, 1)   # add all masks together
-            masks = (masks @ colors).clip(0, 255)   # (h,w,n) @ (n,3) = (h,w,3)
+            s = masks.sum(2, keepdims=True).clip(0, 1)  # add all masks together
+            masks = (masks @ colors).clip(0, 255)  # (h,w,n) @ (n,3) = (h,w,3)
             self.im[:] = masks * alpha + self.im * (1 - s * alpha)
         else:
             if len(masks) == 0:

From 1b3bacb932322efd91fa0a8d2bd1d28829ad4e20 Mon Sep 17 00:00:00 2001
From: glennjocher <glenn.jocher@ultralytics.com>
Date: Sat, 10 Sep 2022 12:13:57 +0300
Subject: [PATCH 239/247] Fix CI error

---
 val.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/val.py b/val.py
index c46109545cc0..5763f49eb663 100644
--- a/val.py
+++ b/val.py
@@ -266,7 +266,7 @@ def run(
             plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels
             plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
 
-        callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, out)
+        callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds)
 
     # Compute metrics
     stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy

From 87c7c68dde33d415d9447b4665bdd9bbfae8dd1f Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 12 Sep 2022 11:23:59 +0800
Subject: [PATCH 240/247] fix predict log

---
 segment/predict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segment/predict.py b/segment/predict.py
index 7c11abebc910..24ad81774a3f 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -208,7 +208,7 @@ def run(
                     vid_writer[i].write(im0)
 
         # Print time (inference-only)
-        # LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
+        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
 
     # Print results
     t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image

From 9d6fed15517a7a5bbb045380b3fa4416dea5349c Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 12 Sep 2022 11:24:19 +0800
Subject: [PATCH 241/247] remove upsample

---
 segment/train.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index 36e8f153f677..b1e3648e5478 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -350,8 +350,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                 #    return
 
                 # Mosaic plots
-                if mask_ratio != 1:
-                    masks = F.interpolate(masks[None].float(), (imgsz, imgsz), mode="bilinear", align_corners=False)[0]
                 if plots:
                     if ni < 3:
                         plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg")

From 5a9d410f03a8f50cb8b84c0a30a84d512fe99a62 Mon Sep 17 00:00:00 2001
From: Laughing-q <1185102784@qq.com>
Date: Mon, 12 Sep 2022 11:25:00 +0800
Subject: [PATCH 242/247] update interpolate

---
 utils/segment/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 955faf3a36b4..b45b2c27e0a0 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -83,7 +83,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
 
                 # Mask regression
                 if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
-                    masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0]
+                    masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
                 marea = xywhn[i][:, 2:].prod(1)  # mask width, height normalized
                 mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
                 for bi in b.unique():

From c2ea6c9b6c74a8f619bef188885fc2daaa5ab4f0 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 12 Sep 2022 13:10:15 +0530
Subject: [PATCH 243/247] fix validation plot logging

---
 segment/train.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/segment/train.py b/segment/train.py
index b1e3648e5478..bda379176151 100644
--- a/segment/train.py
+++ b/segment/train.py
@@ -392,9 +392,6 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             # Log val metrics and media
             metrics_dict = dict(zip(KEYS, log_vals))
             logger.log_metrics(metrics_dict, epoch)
-            if plots:
-                files = sorted(save_dir.glob('val*.jpg'))
-                logger.log_images(files, "Validation", epoch)
 
             # Save model
             if (not nosave) or (final_epoch and not evolve):  # if save
@@ -460,16 +457,16 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
 
         # callbacks.run('on_train_end', last, best, epoch, results)
         # on train end callback using genericLogger
-        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs + 1)
+        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs)
         if not opt.evolve:
-            logger.log_model(best, epoch + 1)
+            logger.log_model(best, epoch)
         if plots:
             plot_results_with_masks(file=save_dir / 'results.csv')  # save results.png
             files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
             files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
             LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
             logger.log_images(files, "Results", epoch + 1)
-
+            logger.log_images(sorted(save_dir.glob('val*.jpg')), "Validation", epoch + 1)
     torch.cuda.empty_cache()
     return results
 

From 52fbe315412e208bf5f71c4452fca71bd563064b Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 15 Sep 2022 23:47:35 +0200
Subject: [PATCH 244/247] Annotator.masks() cleanup

---
 segment/predict.py | 58 +++++++++++++++++++++++-----------------------
 utils/plots.py     | 22 +++++++++---------
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 24ad81774a3f..310c2222130b 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -50,34 +50,34 @@
 
 @smart_inference_mode()
 def run(
-    weights=ROOT / 'yolov5s-seg.pt',  # model.pt path(s)
-    source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
-    data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
-    imgsz=(640, 640),  # inference size (height, width)
-    conf_thres=0.25,  # confidence threshold
-    iou_thres=0.45,  # NMS IOU threshold
-    max_det=1000,  # maximum detections per image
-    device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-    view_img=False,  # show results
-    save_txt=False,  # save results to *.txt
-    save_conf=False,  # save confidences in --save-txt labels
-    save_crop=False,  # save cropped prediction boxes
-    nosave=False,  # do not save images/videos
-    classes=None,  # filter by class: --class 0, or --class 0 2 3
-    agnostic_nms=False,  # class-agnostic NMS
-    augment=False,  # augmented inference
-    visualize=False,  # visualize features
-    update=False,  # update all models
-    project=ROOT / 'runs/predict-seg',  # save results to project/name
-    name='exp',  # save results to project/name
-    exist_ok=False,  # existing project/name ok, do not increment
-    line_thickness=3,  # bounding box thickness (pixels)
-    hide_labels=False,  # hide labels
-    hide_conf=False,  # hide confidences
-    half=False,  # use FP16 half-precision inference
-    dnn=False,  # use OpenCV DNN for ONNX inference
-    vid_stride=1,  # video frame-rate stride
-    retina_masks=False,
+        weights=ROOT / 'yolov5s-seg.pt',  # model.pt path(s)
+        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
+        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
+        imgsz=(640, 640),  # inference size (height, width)
+        conf_thres=0.25,  # confidence threshold
+        iou_thres=0.45,  # NMS IOU threshold
+        max_det=1000,  # maximum detections per image
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        view_img=False,  # show results
+        save_txt=False,  # save results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_crop=False,  # save cropped prediction boxes
+        nosave=False,  # do not save images/videos
+        classes=None,  # filter by class: --class 0, or --class 0 2 3
+        agnostic_nms=False,  # class-agnostic NMS
+        augment=False,  # augmented inference
+        visualize=False,  # visualize features
+        update=False,  # update all models
+        project=ROOT / 'runs/predict-seg',  # save results to project/name
+        name='exp',  # save results to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        line_thickness=3,  # bounding box thickness (pixels)
+        hide_labels=False,  # hide labels
+        hide_conf=False,  # hide confidences
+        half=False,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        vid_stride=1,  # video frame-rate stride
+        retina_masks=False,
 ):
     source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images
@@ -160,7 +160,7 @@ def run(
                 # Mask plotting
                 annotator.masks(masks,
                                 colors=[colors(x, True) for x in det[:, 5]],
-                                img_gpu=None if retina_masks else im[i])
+                                im_gpu=None if retina_masks else im[i])
 
                 # Write results
                 for *xyxy, conf, cls in reversed(det[:, :6]):
diff --git a/utils/plots.py b/utils/plots.py
index 103364864a08..d8d5b225a774 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -114,18 +114,18 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2
                             thickness=tf,
                             lineType=cv2.LINE_AA)
 
-    def masks(self, masks, colors, img_gpu=None, alpha=0.5):
+    def masks(self, masks, colors, im_gpu=None, alpha=0.5):
         """Plot masks at once.
         Args:
             masks (tensor): predicted masks on cuda, shape: [n, h, w]
             colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
-            img_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
-            retina_masks (bool): whether to plot masks in native resolution.
+            im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
+            alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
         """
         if self.pil:
             # convert to numpy first
             self.im = np.asarray(self.im).copy()
-        if img_gpu is None:
+        if im_gpu is None:
             # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
             if len(masks) == 0:
                 return
@@ -142,8 +142,8 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5):
             self.im[:] = masks * alpha + self.im * (1 - s * alpha)
         else:
             if len(masks) == 0:
-                self.im[:] = img_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
-            colors = torch.tensor(colors, device=img_gpu.device, dtype=torch.float32) / 255.0
+                self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
+            colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
             colors = colors[:, None, None]  # shape(n,1,1,3)
             masks = masks.unsqueeze(3)  # shape(n,h,w,1)
             masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
@@ -151,11 +151,11 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5):
             inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
             mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)
 
-            img_gpu = img_gpu.flip(dims=[0])  # flip channel
-            img_gpu = img_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)
-            img_gpu = img_gpu * inv_alph_masks[-1] + mcs
-            im_mask = (img_gpu * 255).byte().cpu().numpy()
-            self.im[:] = scale_image(img_gpu.shape, im_mask, self.im.shape)
+            im_gpu = im_gpu.flip(dims=[0])  # flip channel
+            im_gpu = im_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)
+            im_gpu = im_gpu * inv_alph_masks[-1] + mcs
+            im_mask = (im_gpu * 255).byte().cpu().numpy()
+            self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)
         if self.pil:
             # convert im back to PIL and update draw
             self.fromarray(self.im)

From 6de176cc80514b9b63a2839c3c68c0e34ba5daf7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Sep 2022 21:48:03 +0000
Subject: [PATCH 245/247] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 segment/predict.py | 56 +++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/segment/predict.py b/segment/predict.py
index 310c2222130b..ba4cf2905255 100644
--- a/segment/predict.py
+++ b/segment/predict.py
@@ -50,34 +50,34 @@
 
 @smart_inference_mode()
 def run(
-        weights=ROOT / 'yolov5s-seg.pt',  # model.pt path(s)
-        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
-        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
-        imgsz=(640, 640),  # inference size (height, width)
-        conf_thres=0.25,  # confidence threshold
-        iou_thres=0.45,  # NMS IOU threshold
-        max_det=1000,  # maximum detections per image
-        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        view_img=False,  # show results
-        save_txt=False,  # save results to *.txt
-        save_conf=False,  # save confidences in --save-txt labels
-        save_crop=False,  # save cropped prediction boxes
-        nosave=False,  # do not save images/videos
-        classes=None,  # filter by class: --class 0, or --class 0 2 3
-        agnostic_nms=False,  # class-agnostic NMS
-        augment=False,  # augmented inference
-        visualize=False,  # visualize features
-        update=False,  # update all models
-        project=ROOT / 'runs/predict-seg',  # save results to project/name
-        name='exp',  # save results to project/name
-        exist_ok=False,  # existing project/name ok, do not increment
-        line_thickness=3,  # bounding box thickness (pixels)
-        hide_labels=False,  # hide labels
-        hide_conf=False,  # hide confidences
-        half=False,  # use FP16 half-precision inference
-        dnn=False,  # use OpenCV DNN for ONNX inference
-        vid_stride=1,  # video frame-rate stride
-        retina_masks=False,
+    weights=ROOT / 'yolov5s-seg.pt',  # model.pt path(s)
+    source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
+    data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
+    imgsz=(640, 640),  # inference size (height, width)
+    conf_thres=0.25,  # confidence threshold
+    iou_thres=0.45,  # NMS IOU threshold
+    max_det=1000,  # maximum detections per image
+    device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    view_img=False,  # show results
+    save_txt=False,  # save results to *.txt
+    save_conf=False,  # save confidences in --save-txt labels
+    save_crop=False,  # save cropped prediction boxes
+    nosave=False,  # do not save images/videos
+    classes=None,  # filter by class: --class 0, or --class 0 2 3
+    agnostic_nms=False,  # class-agnostic NMS
+    augment=False,  # augmented inference
+    visualize=False,  # visualize features
+    update=False,  # update all models
+    project=ROOT / 'runs/predict-seg',  # save results to project/name
+    name='exp',  # save results to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    line_thickness=3,  # bounding box thickness (pixels)
+    hide_labels=False,  # hide labels
+    hide_conf=False,  # hide confidences
+    half=False,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    vid_stride=1,  # video frame-rate stride
+    retina_masks=False,
 ):
     source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images

From 4958de59c712fc4bc9caa5ea2aaeb12b3c906d13 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 15 Sep 2022 23:48:52 +0200
Subject: [PATCH 246/247] Remove segmentation_model definition

---
 models/common.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/models/common.py b/models/common.py
index 8aea833388af..0d90ff4f8827 100644
--- a/models/common.py
+++ b/models/common.py
@@ -337,7 +337,6 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False,
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()
             self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
-            segmentation_model = type(model.model[-1]).__name__ == 'Segment'
         elif jit:  # TorchScript
             LOGGER.info(f'Loading {w} for TorchScript inference...')
             extra_files = {'config.txt': ''}  # model metadata

From 753adac58f00b97b1f0e58699cab36a280236469 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 15 Sep 2022 23:56:49 +0200
Subject: [PATCH 247/247] Restore 0.99999 decimals

---
 models/yolo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/yolo.py b/models/yolo.py
index d59034bd4041..a0702a7c0257 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -257,7 +257,7 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)