From 52fd8b0977fe569811a636276e571240b947833c Mon Sep 17 00:00:00 2001
From: JingxianKe <983231802@qq.com>
Date: Fri, 22 Sep 2023 12:02:28 +0800
Subject: [PATCH 1/2] YOUR REVISION MESSAGE
---
exps/default/__init__.py | 3 +
exps/default/yolov5l.py | 20 ++
exps/default/yolov5m.py | 20 ++
exps/default/yolov5m6.py | 20 ++
exps/default/yolov5n.py | 20 ++
exps/default/yolov5n6.py | 20 ++
exps/default/yolov5s.py | 20 ++
exps/default/yolov5s6.py | 20 ++
exps/default/yolov5ts.py | 20 ++
requirements.txt | 4 +-
test/test_data_pipeline.py | 52 +--
test/test_trainer.py | 148 +++++---
tools/eval_metric.py | 2 +-
yolort/data/__init__.py | 10 +-
yolort/data/_helper.py | 74 +---
yolort/data/builtin_meta.py | 154 --------
yolort/data/coco.py | 115 ------
yolort/data/data_augment.py | 243 +++++++++++++
yolort/data/data_module.py | 2 +-
yolort/data/data_prefetcher.py | 51 +++
yolort/data/dataloading.py | 113 ++++++
yolort/data/datasets/__init__.py | 8 +
yolort/data/datasets/coco.py | 187 ++++++++++
yolort/data/datasets/coco_classes.py | 86 +++++
yolort/data/datasets/datasets_wrapper.py | 300 ++++++++++++++++
yolort/data/datasets/mosaicdetection.py | 234 ++++++++++++
yolort/data/samplers.py | 85 +++++
yolort/evaluators/__init__.py | 5 +
yolort/evaluators/coco_evaluator.py | 317 +++++++++++++++++
yolort/exp/__init__.py | 5 +
yolort/exp/base_exp.py | 90 +++++
yolort/exp/default/__init__.py | 28 ++
yolort/exp/yolox_base.py | 387 ++++++++++++++++++++
yolort/trainer/__init__.py | 4 +-
yolort/trainer/lightning_task.py | 143 --------
yolort/trainer/trainer.py | 392 ++++++++++++++++++++
yolort/utils/__init__.py | 9 +
yolort/utils/allreduce_norm.py | 103 ++++++
yolort/utils/boxes.py | 143 ++++++++
yolort/utils/checkpoint.py | 43 +++
yolort/utils/dist.py | 294 +++++++++++++++
yolort/utils/ema.py | 60 ++++
yolort/utils/logger.py | 434 ++++++++++++++++++++++-
yolort/utils/lr_scheduler.py | 205 +++++++++++
yolort/utils/metric.py | 137 +++++++
yolort/utils/model_utils.py | 58 +++
46 files changed, 4328 insertions(+), 560 deletions(-)
create mode 100644 exps/default/__init__.py
create mode 100644 exps/default/yolov5l.py
create mode 100644 exps/default/yolov5m.py
create mode 100644 exps/default/yolov5m6.py
create mode 100644 exps/default/yolov5n.py
create mode 100644 exps/default/yolov5n6.py
create mode 100644 exps/default/yolov5s.py
create mode 100644 exps/default/yolov5s6.py
create mode 100644 exps/default/yolov5ts.py
delete mode 100644 yolort/data/builtin_meta.py
delete mode 100644 yolort/data/coco.py
create mode 100644 yolort/data/data_augment.py
create mode 100644 yolort/data/data_prefetcher.py
create mode 100644 yolort/data/dataloading.py
create mode 100644 yolort/data/datasets/__init__.py
create mode 100644 yolort/data/datasets/coco.py
create mode 100644 yolort/data/datasets/coco_classes.py
create mode 100644 yolort/data/datasets/datasets_wrapper.py
create mode 100644 yolort/data/datasets/mosaicdetection.py
create mode 100644 yolort/data/samplers.py
create mode 100644 yolort/evaluators/__init__.py
create mode 100644 yolort/evaluators/coco_evaluator.py
create mode 100644 yolort/exp/__init__.py
create mode 100644 yolort/exp/base_exp.py
create mode 100644 yolort/exp/default/__init__.py
create mode 100644 yolort/exp/yolox_base.py
delete mode 100644 yolort/trainer/lightning_task.py
create mode 100644 yolort/trainer/trainer.py
create mode 100644 yolort/utils/allreduce_norm.py
create mode 100644 yolort/utils/boxes.py
create mode 100644 yolort/utils/checkpoint.py
create mode 100644 yolort/utils/dist.py
create mode 100644 yolort/utils/ema.py
create mode 100644 yolort/utils/lr_scheduler.py
create mode 100644 yolort/utils/metric.py
create mode 100644 yolort/utils/model_utils.py
diff --git a/exps/default/__init__.py b/exps/default/__init__.py
new file mode 100644
index 00000000..ce9fae06
--- /dev/null
+++ b/exps/default/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
diff --git a/exps/default/yolov5l.py b/exps/default/yolov5l.py
new file mode 100644
index 00000000..b04d0f90
--- /dev/null
+++ b/exps/default/yolov5l.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5l'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5m.py b/exps/default/yolov5m.py
new file mode 100644
index 00000000..e33c2771
--- /dev/null
+++ b/exps/default/yolov5m.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5m'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5m6.py b/exps/default/yolov5m6.py
new file mode 100644
index 00000000..4ac71156
--- /dev/null
+++ b/exps/default/yolov5m6.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5m6'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5n.py b/exps/default/yolov5n.py
new file mode 100644
index 00000000..72bf63e8
--- /dev/null
+++ b/exps/default/yolov5n.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5n'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5n6.py b/exps/default/yolov5n6.py
new file mode 100644
index 00000000..3ac2cfd2
--- /dev/null
+++ b/exps/default/yolov5n6.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5n6'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5s.py b/exps/default/yolov5s.py
new file mode 100644
index 00000000..61736d25
--- /dev/null
+++ b/exps/default/yolov5s.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5s'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5s6.py b/exps/default/yolov5s6.py
new file mode 100644
index 00000000..cda2a942
--- /dev/null
+++ b/exps/default/yolov5s6.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5s6'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5ts.py b/exps/default/yolov5ts.py
new file mode 100644
index 00000000..365eab09
--- /dev/null
+++ b/exps/default/yolov5ts.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+ def __init__(self):
+ super(Exp, self).__init__()
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ def get_model(self):
+ self.model = models.__dict__['yolov5ts'](upstream_version="r6.0",)
+ self.model.train()
+ return self.model
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 8a349747..af814771 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,4 +29,6 @@ pandas
# extras --------------------------------------
# pycocotools on PyPI needs python3.7 as minimal
# pycocotools>=2.0.2 # corresponds to https://github.com/ppwwyyxx/cocoapi
-thop # FLOPs computation
+thop # FLOPs computation
+loguru # Python logging made (stupidly) simple
+Ninja # a small build system with a focus on speed
\ No newline at end of file
diff --git a/test/test_data_pipeline.py b/test/test_data_pipeline.py
index 2a597eb7..4e626a81 100644
--- a/test/test_data_pipeline.py
+++ b/test/test_data_pipeline.py
@@ -3,11 +3,23 @@
import numpy as np
import pytest
-import torch
+import sys
+sys.path.append("../yolort")
+import torch
from torch import Tensor
-from yolort.data import _helper as data_helper
+from yolort.exp import Exp
+from yolort.data import DataPrefetcher
from yolort.utils import contains_any_tensor
+from torch import distributed as dist
+
+
+def get_world_size() -> int:
+ if not dist.is_available():
+ return 1
+ if not dist.is_initialized():
+ return 1
+ return dist.get_world_size()
def test_contains_any_tensor():
@@ -21,28 +33,32 @@ def test_contains_any_tensor():
def test_get_dataset():
# Acquire the images and labels from the coco128 dataset
- train_dataset = data_helper.get_dataset(data_root="data-bin", mode="train")
+ train_dataset = Exp().get_dataset(data_root="data-bin", mode="train", cache_type=None)
# Test the datasets
- image, target = next(iter(train_dataset))
- assert isinstance(image, Tensor)
- assert isinstance(target, dict)
+ image, target, _, _ = next(iter(train_dataset))
+ assert image.shape == (3, 640, 640)
+ assert target.shape == (50, 5)
def test_get_dataloader():
batch_size = 8
- data_loader = data_helper.get_dataloader(data_root="data-bin", mode="train", batch_size=batch_size)
- # Test the dataloader
- images, targets = next(iter(data_loader))
+ is_distributed = get_world_size() > 1
+ data_loader = Exp().get_data_loader(
+ batch_size=batch_size,
+ is_distributed=is_distributed,
+ no_aug=False,
+ cache_img=None,
+ )
+ prefetcher = DataPrefetcher(data_loader)
+ images, targets = prefetcher.next()
assert len(images) == batch_size
assert isinstance(images[0], Tensor)
assert len(images[0]) == 3
assert len(targets) == batch_size
- assert isinstance(targets[0], dict)
- assert isinstance(targets[0]["image_id"], Tensor)
- assert isinstance(targets[0]["boxes"], Tensor)
- assert isinstance(targets[0]["labels"], Tensor)
- assert isinstance(targets[0]["orig_size"], Tensor)
+ assert isinstance(targets[0], Tensor)
+
+test_get_dataloader()
@pytest.mark.skip("Remove Lightning dependency")
@@ -65,11 +81,3 @@ def test_detection_data_module():
assert isinstance(targets[0]["image_id"], Tensor)
assert isinstance(targets[0]["boxes"], Tensor)
assert isinstance(targets[0]["labels"], Tensor)
-
-
-def test_prepare_coco128():
- data_path = Path("data-bin")
- coco128_dirname = "coco128"
- data_helper.prepare_coco128(data_path, dirname=coco128_dirname)
- annotation_file = data_path / coco128_dirname / "annotations" / "instances_train2017.json"
- assert annotation_file.is_file()
diff --git a/test/test_trainer.py b/test/test_trainer.py
index 9be94682..be1573c7 100644
--- a/test/test_trainer.py
+++ b/test/test_trainer.py
@@ -1,52 +1,108 @@
# Copyright (c) 2021, yolort team. All rights reserved.
-from pathlib import Path
+import argparse
+import importlib
-import pytest
-from yolort.data import _helper as data_helper
+import sys
+sys.path.append("../yolort/")
+def make_parser():
+ parser = argparse.ArgumentParser("YOLOX train parser")
+ parser.add_argument("-expn", "--experiment-name", type=str, default="yolov5n")
+ parser.add_argument("-n", "--name", type=str, default="yolov5n", help="model name")
+
+ # distributed
+ parser.add_argument(
+ "--dist-backend", default="nccl", type=str, help="distributed backend"
+ )
+ parser.add_argument(
+ "--dist-url",
+ default=None,
+ type=str,
+ help="url used to set up distributed training",
+ )
+ parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
+ parser.add_argument(
+ "-d", "--devices", default=None, type=int, help="device for training"
+ )
+ parser.add_argument(
+ "-f",
+ "--exp_file",
+ default=None,
+ type=str,
+ help="plz input your experiment description file",
+ )
+ parser.add_argument(
+ "--resume", default=False, action="store_true", help="resume training"
+ )
+ parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
+ parser.add_argument(
+ "-e",
+ "--start_epoch",
+ default=None,
+ type=int,
+ help="resume training start epoch",
+ )
+ parser.add_argument(
+ "--num_machines", default=1, type=int, help="num of node for training"
+ )
+ parser.add_argument(
+ "--machine_rank", default=0, type=int, help="node rank for multi-node training"
+ )
+ parser.add_argument(
+ "--fp16",
+ dest="fp16",
+ default=False,
+ action="store_true",
+ help="Adopting mix precision training.",
+ )
+ parser.add_argument(
+ "--cache",
+ type=str,
+ nargs="?",
+ const="ram",
+ help="Caching imgs to ram/disk for fast training.",
+ )
+ parser.add_argument(
+ "-o",
+ "--occupy",
+ dest="occupy",
+ default=False,
+ action="store_true",
+ help="occupy GPU memory first for training.",
+ )
+ parser.add_argument(
+ "-l",
+ "--logger",
+ type=str,
+ help="Logger to be used for metrics. \
+ Implemented loggers include `tensorboard` and `wandb`.",
+ default="tensorboard"
+ )
+ parser.add_argument(
+ "opts",
+ help="Modify config options using the command-line",
+ default=None,
+ nargs=argparse.REMAINDER,
+ )
+ return parser
-@pytest.mark.skip("Remove Lightning dependency")
def test_training_step():
- import pytorch_lightning as pl
- from yolort.data.data_module import DetectionDataModule
- from yolort.trainer import DefaultTask
-
- # Setup the DataModule
- data_path = "data-bin"
- train_dataset = data_helper.get_dataset(data_root=data_path, mode="train")
- val_dataset = data_helper.get_dataset(data_root=data_path, mode="val")
- data_module = DetectionDataModule(train_dataset, val_dataset, batch_size=8)
- # Load model
- model = DefaultTask(arch="yolov5n")
- model = model.train()
- # Trainer
- trainer = pl.Trainer(max_epochs=1)
- trainer.fit(model, data_module)
-
-
-@pytest.mark.skip("Remove Lightning dependency")
-@pytest.mark.parametrize("arch, version, map5095, map50", [("yolov5s", "r4.0", 42.5, 65.3)])
-def test_test_epoch_end(arch, version, map5095, map50):
- import pytorch_lightning as pl
- from yolort.trainer import DefaultTask
-
- # Acquire the annotation file
- data_path = Path("data-bin")
- coco128_dirname = "coco128"
- data_helper.prepare_coco128(data_path, dirname=coco128_dirname)
- annotation_file = data_path / coco128_dirname / "annotations" / "instances_train2017.json"
-
- # Get dataloader to test
- val_dataloader = data_helper.get_dataloader(data_root=data_path, mode="val")
-
- # Load model
- model = DefaultTask(arch=arch, version=version, pretrained=True, annotation_path=annotation_file)
-
- # test step
- trainer = pl.Trainer(max_epochs=1)
- trainer.test(model, dataloaders=val_dataloader)
- # test epoch end
- results = model.evaluator.compute()
- assert results["AP"] > map5095
- assert results["AP50"] > map50
+ args = make_parser().parse_args()
+ module_name = ".".join(["yolort", "exp", "default", args.name])
+ exp = importlib.import_module(module_name).Exp()
+ exp.merge(args.opts)
+ h, w = exp.input_size
+ assert h % 32 == 0 and w % 32 == 0, "input size must be multiples of 32"
+
+ from yolort.trainer import Trainer
+ trainer = Trainer(exp, args)
+ trainer.train()
+
+def test_test_epoch_end():
+ args = make_parser().parse_args()
+ module_name = ".".join(["yolort", "exp", "default", args.name])
+ exp = importlib.import_module(module_name).Exp()
+ exp.merge(args.opts)
+
+ main(exp, args)
diff --git a/tools/eval_metric.py b/tools/eval_metric.py
index 0ab6adae..0538f0df 100644
--- a/tools/eval_metric.py
+++ b/tools/eval_metric.py
@@ -8,7 +8,7 @@
import torchvision
import yolort
from yolort.data import _helper as data_helper
-from yolort.data.coco import COCODetection
+from yolort.data.datasets.coco import COCODetection
from yolort.data.coco_eval import COCOEvaluator
from yolort.data.transforms import collate_fn, default_val_transforms
from yolort.utils.logger import MetricLogger
diff --git a/yolort/data/__init__.py b/yolort/data/__init__.py
index efd93ced..5740093a 100644
--- a/yolort/data/__init__.py
+++ b/yolort/data/__init__.py
@@ -1 +1,9 @@
-# Copyright (c) 2021, yolort team. All rights reserved.
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+from .data_augment import TrainTransform, ValTransform
+from .data_prefetcher import DataPrefetcher
+from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed
+from .datasets import *
+from .samplers import InfiniteSampler, YoloBatchSampler
\ No newline at end of file
diff --git a/yolort/data/_helper.py b/yolort/data/_helper.py
index 2a95af9a..66fbf0cb 100644
--- a/yolort/data/_helper.py
+++ b/yolort/data/_helper.py
@@ -7,8 +7,7 @@
import torch
from tabulate import tabulate
-from .coco import COCODetection
-from .transforms import collate_fn, default_train_transforms, default_val_transforms
+from .transforms import collate_fn
def create_small_table(small_dict):
@@ -45,74 +44,3 @@ def get_coco_api_from_dataset(dataset):
return dataset.coco
else:
raise NotImplementedError("Currently only supports COCO datasets")
-
-
-def prepare_coco128(
- data_path: PosixPath,
- dirname: str = "coco128",
-) -> None:
- """
- Prepare coco128 dataset to test.
-
- Args:
- data_path (PosixPath): root path of coco128 dataset.
- dirname (str): the directory name of coco128 dataset. Default: 'coco128'.
- """
- logger = logging.getLogger(__name__)
-
- if not data_path.is_dir():
- logger.info(f"Create a new directory: {data_path}")
- data_path.mkdir(parents=True, exist_ok=True)
-
- zip_path = data_path / "coco128.zip"
- coco128_url = "https://github.com/zhiqwang/yolort/releases/download/v0.3.0/coco128.zip"
- if not zip_path.is_file():
- logger.info(f"Downloading coco128 datasets form {coco128_url}")
- torch.hub.download_url_to_file(coco128_url, zip_path, hash_prefix="a67d2887")
-
- coco128_path = data_path / dirname
- if not coco128_path.is_dir():
- logger.info(f"Unzipping dataset to {coco128_path}")
- with ZipFile(zip_path, "r") as zip_obj:
- zip_obj.extractall(data_path)
-
-
-def get_dataset(data_root: str, mode: str = "val"):
- # Acquire the images and labels from the coco128 dataset
- data_path = Path(data_root)
- coco128_dirname = "coco128"
- coco128_path = data_path / coco128_dirname
- image_root = coco128_path / "images" / "train2017"
- annotation_file = coco128_path / "annotations" / "instances_train2017.json"
-
- if not annotation_file.is_file():
- prepare_coco128(data_path, dirname=coco128_dirname)
-
- if mode == "train":
- dataset = COCODetection(image_root, annotation_file, default_train_transforms())
- elif mode == "val":
- dataset = COCODetection(image_root, annotation_file, default_val_transforms())
- else:
- raise NotImplementedError(f"Currently not supports mode {mode}")
-
- return dataset
-
-
-def get_dataloader(data_root: str, mode: str = "val", batch_size: int = 4):
- # Prepare the datasets for training
- # Acquire the images and labels from the coco128 dataset
- dataset = get_dataset(data_root=data_root, mode=mode)
-
- # We adopt the sequential sampler in order to repeat the experiment
- sampler = torch.utils.data.SequentialSampler(dataset)
-
- loader = torch.utils.data.DataLoader(
- dataset,
- batch_size,
- sampler=sampler,
- drop_last=False,
- collate_fn=collate_fn,
- num_workers=0,
- )
-
- return loader
diff --git a/yolort/data/builtin_meta.py b/yolort/data/builtin_meta.py
deleted file mode 100644
index be2fc7ab..00000000
--- a/yolort/data/builtin_meta.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates.
-
-"""
-Note:
-For your custom dataset, there is no need to hard-code metadata anywhere in the code.
-For example, for COCO-format dataset, metadata will be obtained automatically
-when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways
-during loading.
-
-However, we hard-coded metadata for a few common dataset here.
-The only goal is to allow users who don't have these dataset to use pre-trained models.
-Users don't have to download a COCO json (which contains metadata), in order to visualize a
-COCO model (with correct class names and colors).
-"""
-
-
-# All coco categories, together with their nice-looking visualization colors
-# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
-COCO_CATEGORIES = [
- {"id": 1, "color": [220, 20, 60], "isthing": 1, "name": "person"},
- {"id": 2, "color": [119, 11, 32], "isthing": 1, "name": "bicycle"},
- {"id": 3, "color": [0, 0, 142], "isthing": 1, "name": "car"},
- {"id": 4, "color": [0, 0, 230], "isthing": 1, "name": "motorcycle"},
- {"id": 5, "color": [106, 0, 228], "isthing": 1, "name": "airplane"},
- {"id": 6, "color": [0, 60, 100], "isthing": 1, "name": "bus"},
- {"id": 7, "color": [0, 80, 100], "isthing": 1, "name": "train"},
- {"id": 8, "color": [0, 0, 70], "isthing": 1, "name": "truck"},
- {"id": 9, "color": [0, 0, 192], "isthing": 1, "name": "boat"},
- {"id": 10, "color": [250, 170, 30], "isthing": 1, "name": "traffic light"},
- {"id": 11, "color": [100, 170, 30], "isthing": 1, "name": "fire hydrant"},
- {"id": 13, "color": [220, 220, 0], "isthing": 1, "name": "stop sign"},
- {"id": 14, "color": [175, 116, 175], "isthing": 1, "name": "parking meter"},
- {"id": 15, "color": [250, 0, 30], "isthing": 1, "name": "bench"},
- {"id": 16, "color": [165, 42, 42], "isthing": 1, "name": "bird"},
- {"id": 17, "color": [255, 77, 255], "isthing": 1, "name": "cat"},
- {"id": 18, "color": [0, 226, 252], "isthing": 1, "name": "dog"},
- {"id": 19, "color": [182, 182, 255], "isthing": 1, "name": "horse"},
- {"id": 20, "color": [0, 82, 0], "isthing": 1, "name": "sheep"},
- {"id": 21, "color": [120, 166, 157], "isthing": 1, "name": "cow"},
- {"id": 22, "color": [110, 76, 0], "isthing": 1, "name": "elephant"},
- {"id": 23, "color": [174, 57, 255], "isthing": 1, "name": "bear"},
- {"id": 24, "color": [199, 100, 0], "isthing": 1, "name": "zebra"},
- {"id": 25, "color": [72, 0, 118], "isthing": 1, "name": "giraffe"},
- {"id": 27, "color": [255, 179, 240], "isthing": 1, "name": "backpack"},
- {"id": 28, "color": [0, 125, 92], "isthing": 1, "name": "umbrella"},
- {"id": 31, "color": [209, 0, 151], "isthing": 1, "name": "handbag"},
- {"id": 32, "color": [188, 208, 182], "isthing": 1, "name": "tie"},
- {"id": 33, "color": [0, 220, 176], "isthing": 1, "name": "suitcase"},
- {"id": 34, "color": [255, 99, 164], "isthing": 1, "name": "frisbee"},
- {"id": 35, "color": [92, 0, 73], "isthing": 1, "name": "skis"},
- {"id": 36, "color": [133, 129, 255], "isthing": 1, "name": "snowboard"},
- {"id": 37, "color": [78, 180, 255], "isthing": 1, "name": "sports ball"},
- {"id": 38, "color": [0, 228, 0], "isthing": 1, "name": "kite"},
- {"id": 39, "color": [174, 255, 243], "isthing": 1, "name": "baseball bat"},
- {"id": 40, "color": [45, 89, 255], "isthing": 1, "name": "baseball glove"},
- {"id": 41, "color": [134, 134, 103], "isthing": 1, "name": "skateboard"},
- {"id": 42, "color": [145, 148, 174], "isthing": 1, "name": "surfboard"},
- {"id": 43, "color": [255, 208, 186], "isthing": 1, "name": "tennis racket"},
- {"id": 44, "color": [197, 226, 255], "isthing": 1, "name": "bottle"},
- {"id": 46, "color": [171, 134, 1], "isthing": 1, "name": "wine glass"},
- {"id": 47, "color": [109, 63, 54], "isthing": 1, "name": "cup"},
- {"id": 48, "color": [207, 138, 255], "isthing": 1, "name": "fork"},
- {"id": 49, "color": [151, 0, 95], "isthing": 1, "name": "knife"},
- {"id": 50, "color": [9, 80, 61], "isthing": 1, "name": "spoon"},
- {"id": 51, "color": [84, 105, 51], "isthing": 1, "name": "bowl"},
- {"id": 52, "color": [74, 65, 105], "isthing": 1, "name": "banana"},
- {"id": 53, "color": [166, 196, 102], "isthing": 1, "name": "apple"},
- {"id": 54, "color": [208, 195, 210], "isthing": 1, "name": "sandwich"},
- {"id": 55, "color": [255, 109, 65], "isthing": 1, "name": "orange"},
- {"id": 56, "color": [0, 143, 149], "isthing": 1, "name": "broccoli"},
- {"id": 57, "color": [179, 0, 194], "isthing": 1, "name": "carrot"},
- {"id": 58, "color": [209, 99, 106], "isthing": 1, "name": "hot dog"},
- {"id": 59, "color": [5, 121, 0], "isthing": 1, "name": "pizza"},
- {"id": 60, "color": [227, 255, 205], "isthing": 1, "name": "donut"},
- {"id": 61, "color": [147, 186, 208], "isthing": 1, "name": "cake"},
- {"id": 62, "color": [153, 69, 1], "isthing": 1, "name": "chair"},
- {"id": 63, "color": [3, 95, 161], "isthing": 1, "name": "couch"},
- {"id": 64, "color": [163, 255, 0], "isthing": 1, "name": "potted plant"},
- {"id": 65, "color": [119, 0, 170], "isthing": 1, "name": "bed"},
- {"id": 67, "color": [0, 182, 199], "isthing": 1, "name": "dining table"},
- {"id": 70, "color": [0, 165, 120], "isthing": 1, "name": "toilet"},
- {"id": 72, "color": [183, 130, 88], "isthing": 1, "name": "tv"},
- {"id": 73, "color": [95, 32, 0], "isthing": 1, "name": "laptop"},
- {"id": 74, "color": [130, 114, 135], "isthing": 1, "name": "mouse"},
- {"id": 75, "color": [110, 129, 133], "isthing": 1, "name": "remote"},
- {"id": 76, "color": [166, 74, 118], "isthing": 1, "name": "keyboard"},
- {"id": 77, "color": [219, 142, 185], "isthing": 1, "name": "cell phone"},
- {"id": 78, "color": [79, 210, 114], "isthing": 1, "name": "microwave"},
- {"id": 79, "color": [178, 90, 62], "isthing": 1, "name": "oven"},
- {"id": 80, "color": [65, 70, 15], "isthing": 1, "name": "toaster"},
- {"id": 81, "color": [127, 167, 115], "isthing": 1, "name": "sink"},
- {"id": 82, "color": [59, 105, 106], "isthing": 1, "name": "refrigerator"},
- {"id": 84, "color": [142, 108, 45], "isthing": 1, "name": "book"},
- {"id": 85, "color": [196, 172, 0], "isthing": 1, "name": "clock"},
- {"id": 86, "color": [95, 54, 80], "isthing": 1, "name": "vase"},
- {"id": 87, "color": [128, 76, 255], "isthing": 1, "name": "scissors"},
- {"id": 88, "color": [201, 57, 1], "isthing": 1, "name": "teddy bear"},
- {"id": 89, "color": [246, 0, 122], "isthing": 1, "name": "hair drier"},
- {"id": 90, "color": [191, 162, 208], "isthing": 1, "name": "toothbrush"},
- {"id": 92, "color": [255, 255, 128], "isthing": 0, "name": "banner"},
- {"id": 93, "color": [147, 211, 203], "isthing": 0, "name": "blanket"},
- {"id": 95, "color": [150, 100, 100], "isthing": 0, "name": "bridge"},
- {"id": 100, "color": [168, 171, 172], "isthing": 0, "name": "cardboard"},
- {"id": 107, "color": [146, 112, 198], "isthing": 0, "name": "counter"},
- {"id": 109, "color": [210, 170, 100], "isthing": 0, "name": "curtain"},
- {"id": 112, "color": [92, 136, 89], "isthing": 0, "name": "door-stuff"},
- {"id": 118, "color": [218, 88, 184], "isthing": 0, "name": "floor-wood"},
- {"id": 119, "color": [241, 129, 0], "isthing": 0, "name": "flower"},
- {"id": 122, "color": [217, 17, 255], "isthing": 0, "name": "fruit"},
- {"id": 125, "color": [124, 74, 181], "isthing": 0, "name": "gravel"},
- {"id": 128, "color": [70, 70, 70], "isthing": 0, "name": "house"},
- {"id": 130, "color": [255, 228, 255], "isthing": 0, "name": "light"},
- {"id": 133, "color": [154, 208, 0], "isthing": 0, "name": "mirror-stuff"},
- {"id": 138, "color": [193, 0, 92], "isthing": 0, "name": "net"},
- {"id": 141, "color": [76, 91, 113], "isthing": 0, "name": "pillow"},
- {"id": 144, "color": [255, 180, 195], "isthing": 0, "name": "platform"},
- {"id": 145, "color": [106, 154, 176], "isthing": 0, "name": "playingfield"},
- {"id": 147, "color": [230, 150, 140], "isthing": 0, "name": "railroad"},
- {"id": 148, "color": [60, 143, 255], "isthing": 0, "name": "river"},
- {"id": 149, "color": [128, 64, 128], "isthing": 0, "name": "road"},
- {"id": 151, "color": [92, 82, 55], "isthing": 0, "name": "roof"},
- {"id": 154, "color": [254, 212, 124], "isthing": 0, "name": "sand"},
- {"id": 155, "color": [73, 77, 174], "isthing": 0, "name": "sea"},
- {"id": 156, "color": [255, 160, 98], "isthing": 0, "name": "shelf"},
- {"id": 159, "color": [255, 255, 255], "isthing": 0, "name": "snow"},
- {"id": 161, "color": [104, 84, 109], "isthing": 0, "name": "stairs"},
- {"id": 166, "color": [169, 164, 131], "isthing": 0, "name": "tent"},
- {"id": 168, "color": [225, 199, 255], "isthing": 0, "name": "towel"},
- {"id": 171, "color": [137, 54, 74], "isthing": 0, "name": "wall-brick"},
- {"id": 175, "color": [135, 158, 223], "isthing": 0, "name": "wall-stone"},
- {"id": 176, "color": [7, 246, 231], "isthing": 0, "name": "wall-tile"},
- {"id": 177, "color": [107, 255, 200], "isthing": 0, "name": "wall-wood"},
- {"id": 178, "color": [58, 41, 149], "isthing": 0, "name": "water-other"},
- {"id": 180, "color": [183, 121, 142], "isthing": 0, "name": "window-blind"},
- {"id": 181, "color": [255, 73, 97], "isthing": 0, "name": "window-other"},
- {"id": 184, "color": [107, 142, 35], "isthing": 0, "name": "tree-merged"},
- {"id": 185, "color": [190, 153, 153], "isthing": 0, "name": "fence-merged"},
- {"id": 186, "color": [146, 139, 141], "isthing": 0, "name": "ceiling-merged"},
- {"id": 187, "color": [70, 130, 180], "isthing": 0, "name": "sky-other-merged"},
- {"id": 188, "color": [134, 199, 156], "isthing": 0, "name": "cabinet-merged"},
- {"id": 189, "color": [209, 226, 140], "isthing": 0, "name": "table-merged"},
- {"id": 190, "color": [96, 36, 108], "isthing": 0, "name": "floor-other-merged"},
- {"id": 191, "color": [96, 96, 96], "isthing": 0, "name": "pavement-merged"},
- {"id": 192, "color": [64, 170, 64], "isthing": 0, "name": "mountain-merged"},
- {"id": 193, "color": [152, 251, 152], "isthing": 0, "name": "grass-merged"},
- {"id": 194, "color": [208, 229, 228], "isthing": 0, "name": "dirt-merged"},
- {"id": 195, "color": [206, 186, 171], "isthing": 0, "name": "paper-merged"},
- {"id": 196, "color": [152, 161, 64], "isthing": 0, "name": "food-other-merged"},
- {"id": 197, "color": [116, 112, 0], "isthing": 0, "name": "building-other-merged"},
- {"id": 198, "color": [0, 114, 143], "isthing": 0, "name": "rock-merged"},
- {"id": 199, "color": [102, 102, 156], "isthing": 0, "name": "wall-other-merged"},
- {"id": 200, "color": [250, 141, 255], "isthing": 0, "name": "rug-merged"},
-]
diff --git a/yolort/data/coco.py b/yolort/data/coco.py
deleted file mode 100644
index 3e693ad4..00000000
--- a/yolort/data/coco.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-COCO dataset which returns image_id for evaluation.
-Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
-"""
-import torch
-import torchvision
-from yolort.utils import is_module_available, requires_module
-
-if is_module_available("pycocotools"):
- from pycocotools import mask as coco_mask
-
-
-class COCODetection(torchvision.datasets.CocoDetection):
- def __init__(self, img_folder, ann_file, transforms, return_masks=False):
- super().__init__(img_folder, ann_file)
- self._transforms = transforms
-
- json_category_id_to_contiguous_id = {v: i for i, v in enumerate(self.coco.getCatIds())}
- self.prepare = ConvertCocoPolysToMask(json_category_id_to_contiguous_id, return_masks)
-
- def __getitem__(self, idx):
- img, target = super().__getitem__(idx)
- image_id = self.ids[idx]
- target = {"image_id": image_id, "annotations": target}
- img, target = self.prepare(img, target)
- if self._transforms is not None:
- img, target = self._transforms(img, target)
- return img, target
-
-
-class ConvertCocoPolysToMask:
- def __init__(self, json_category_id_maps, return_masks=False):
- self.json_category_id_to_contiguous_id = json_category_id_maps
- self.return_masks = return_masks
-
- def __call__(self, image, target):
- w, h = image.size
-
- image_id = target["image_id"]
- image_id = torch.tensor([image_id])
-
- anno = target["annotations"]
-
- anno = [obj for obj in anno if "iscrowd" not in obj or obj["iscrowd"] == 0]
-
- boxes = [obj["bbox"] for obj in anno]
- # guard against no boxes via resizing
- boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
- # BoxMode: convert from XYWH_ABS to XYXY_ABS
- boxes[:, 2:] += boxes[:, :2]
- boxes[:, 0::2].clamp_(min=0, max=w)
- boxes[:, 1::2].clamp_(min=0, max=h)
-
- classes = [obj["category_id"] for obj in anno]
- classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
- classes = torch.tensor(classes, dtype=torch.int64)
-
- if self.return_masks:
- segmentations = [obj["segmentation"] for obj in anno]
- masks = convert_coco_poly_to_mask(segmentations, h, w)
-
- keypoints = None
- if anno and "keypoints" in anno[0]:
- keypoints = [obj["keypoints"] for obj in anno]
- keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
- num_keypoints = keypoints.shape[0]
- if num_keypoints:
- keypoints = keypoints.view(num_keypoints, -1, 3)
-
- keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
- boxes = boxes[keep]
- classes = classes[keep]
- if self.return_masks:
- masks = masks[keep]
- if keypoints is not None:
- keypoints = keypoints[keep]
-
- target = {}
- target["boxes"] = boxes
- target["labels"] = classes
- if self.return_masks:
- target["masks"] = masks
- target["image_id"] = image_id
- if keypoints is not None:
- target["keypoints"] = keypoints
-
- # for conversion to coco api
- area = torch.tensor([obj["area"] for obj in anno])
- iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
- target["area"] = area[keep]
- target["iscrowd"] = iscrowd[keep]
-
- target["orig_size"] = torch.as_tensor([int(h), int(w)])
- target["size"] = torch.as_tensor([int(h), int(w)])
-
- return image, target
-
-
-@requires_module("pycocotools")
-def convert_coco_poly_to_mask(segmentations, height, width):
- masks = []
- for polygons in segmentations:
- rles = coco_mask.frPyObjects(polygons, height, width)
- mask = coco_mask.decode(rles)
- if len(mask.shape) < 3:
- mask = mask[..., None]
- mask = torch.as_tensor(mask, dtype=torch.uint8)
- mask = mask.any(dim=2)
- masks.append(mask)
- if masks:
- masks = torch.stack(masks, dim=0)
- else:
- masks = torch.zeros((0, height, width), dtype=torch.uint8)
- return masks
diff --git a/yolort/data/data_augment.py b/yolort/data/data_augment.py
new file mode 100644
index 00000000..4e53f6c2
--- /dev/null
+++ b/yolort/data/data_augment.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+"""
+Data augmentation functionality. Passed as callable transformations to
+Dataset classes.
+
+The data augmentation procedures were interpreted from @weiliu89's SSD paper
+http://arxiv.org/abs/1512.02325
+"""
+
+import math
+import random
+
+import cv2
+import numpy as np
+
+from yolort.utils import xyxy2cxcywh
+
+
+def augment_hsv(img, hgain=5, sgain=30, vgain=30):
+ hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains
+ hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v
+ hsv_augs = hsv_augs.astype(np.int16)
+ img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
+
+ img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
+ img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
+ img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
+
+ cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) # no return needed
+
+
+def get_aug_params(value, center=0):
+ if isinstance(value, float):
+ return random.uniform(center - value, center + value)
+ elif len(value) == 2:
+ return random.uniform(value[0], value[1])
+ else:
+ raise ValueError(
+ "Affine params should be either a sequence containing two values\
+ or single float values. Got {}".format(value)
+ )
+
+
+def get_affine_matrix(
+ target_size,
+ degrees=10,
+ translate=0.1,
+ scales=0.1,
+ shear=10,
+):
+ twidth, theight = target_size
+
+ # Rotation and Scale
+ angle = get_aug_params(degrees)
+ scale = get_aug_params(scales, center=1.0)
+
+ if scale <= 0.0:
+ raise ValueError("Argument scale should be positive")
+
+ R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)
+
+ M = np.ones([2, 3])
+ # Shear
+ shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
+ shear_y = math.tan(get_aug_params(shear) * math.pi / 180)
+
+ M[0] = R[0] + shear_y * R[1]
+ M[1] = R[1] + shear_x * R[0]
+
+ # Translation
+ translation_x = get_aug_params(translate) * twidth # x translation (pixels)
+ translation_y = get_aug_params(translate) * theight # y translation (pixels)
+
+ M[0, 2] = translation_x
+ M[1, 2] = translation_y
+
+ return M, scale
+
+
+def apply_affine_to_bboxes(targets, target_size, M, scale):
+ num_gts = len(targets)
+
+ # warp corner points
+ twidth, theight = target_size
+ corner_points = np.ones((4 * num_gts, 3))
+ corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+ 4 * num_gts, 2
+ ) # x1y1, x2y2, x1y2, x2y1
+ corner_points = np.dot(corner_points, M.T) # apply affine transform
+ corner_points = corner_points.reshape(num_gts, 8)
+
+ # create new boxes
+ corner_xs = corner_points[:, 0::2]
+ corner_ys = corner_points[:, 1::2]
+ new_bboxes = (
+ np.concatenate(
+ (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
+ )
+ .reshape(4, num_gts)
+ .T
+ )
+
+ # clip boxes
+ new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
+ new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)
+
+ targets[:, :4] = new_bboxes
+
+ return targets
+
+
+def random_affine(
+ img,
+ targets=(),
+ target_size=(640, 640),
+ degrees=10,
+ translate=0.1,
+ scales=0.1,
+ shear=10,
+):
+ M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)
+
+ img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))
+
+ # Transform label coordinates
+ if len(targets) > 0:
+ targets = apply_affine_to_bboxes(targets, target_size, M, scale)
+
+ return img, targets
+
+
+def _mirror(image, boxes, prob=0.5):
+ _, width, _ = image.shape
+ if random.random() < prob:
+ image = image[:, ::-1]
+ boxes[:, 0::2] = width - boxes[:, 2::-2]
+ return image, boxes
+
+
+def preproc(img, input_size, swap=(2, 0, 1)):
+ if len(img.shape) == 3:
+ padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
+ else:
+ padded_img = np.ones(input_size, dtype=np.uint8) * 114
+
+ r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
+ resized_img = cv2.resize(
+ img,
+ (int(img.shape[1] * r), int(img.shape[0] * r)),
+ interpolation=cv2.INTER_LINEAR,
+ ).astype(np.uint8)
+ padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
+
+ padded_img = padded_img.transpose(swap)
+ padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
+ return padded_img, r
+
+
+class TrainTransform:
+ def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):
+ self.max_labels = max_labels
+ self.flip_prob = flip_prob
+ self.hsv_prob = hsv_prob
+
+ def __call__(self, image, targets, input_dim):
+ boxes = targets[:, :4].copy()
+ labels = targets[:, 4].copy()
+ if len(boxes) == 0:
+ targets = np.zeros((self.max_labels, 5), dtype=np.float32)
+ image, r_o = preproc(image, input_dim)
+ return image, targets
+
+ image_o = image.copy()
+ targets_o = targets.copy()
+ height_o, width_o, _ = image_o.shape
+ boxes_o = targets_o[:, :4]
+ labels_o = targets_o[:, 4]
+ # bbox_o: [xyxy] to [c_x,c_y,w,h]
+ boxes_o = xyxy2cxcywh(boxes_o)
+
+ if random.random() < self.hsv_prob:
+ augment_hsv(image)
+ image_t, boxes = _mirror(image, boxes, self.flip_prob)
+ height, width, _ = image_t.shape
+ image_t, r_ = preproc(image_t, input_dim)
+ # boxes [xyxy] 2 [cx,cy,w,h]
+ boxes = xyxy2cxcywh(boxes)
+ boxes *= r_
+
+ mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
+ boxes_t = boxes[mask_b]
+ labels_t = labels[mask_b]
+
+ if len(boxes_t) == 0:
+ image_t, r_o = preproc(image_o, input_dim)
+ boxes_o *= r_o
+ boxes_t = boxes_o
+ labels_t = labels_o
+
+ labels_t = np.expand_dims(labels_t, 1)
+
+ targets_t = np.hstack((labels_t, boxes_t))
+ padded_labels = np.zeros((self.max_labels, 5))
+ padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
+ : self.max_labels
+ ]
+ padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
+ return image_t, padded_labels
+
+
+class ValTransform:
+ """
+ Defines the transformations that should be applied to test PIL image
+ for input into the network
+
+ dimension -> tensorize -> color adj
+
+ Arguments:
+ resize (int): input dimension to SSD
+ rgb_means ((int,int,int)): average RGB of the dataset
+ (104,117,123)
+ swap ((int,int,int)): final order of channels
+
+ Returns:
+ transform (transform) : callable transform to be applied to test/val
+ data
+ """
+
+ def __init__(self, swap=(2, 0, 1), legacy=False):
+ self.swap = swap
+ self.legacy = legacy
+
+ # assume input is cv2 img for now
+ def __call__(self, img, res, input_size):
+ img, _ = preproc(img, input_size, self.swap)
+ if self.legacy:
+ img = img[::-1, :, :].copy()
+ img /= 255.0
+ img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
+ img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
+ return img, np.zeros((1, 5))
diff --git a/yolort/data/data_module.py b/yolort/data/data_module.py
index 55510da5..d17d7327 100644
--- a/yolort/data/data_module.py
+++ b/yolort/data/data_module.py
@@ -10,7 +10,7 @@
if is_module_available("pytorch_lightning"):
from pytorch_lightning import LightningDataModule
-from .coco import COCODetection
+from yolort.data.datasets.coco import COCODetection
from .transforms import collate_fn, default_train_transforms, default_val_transforms
from .voc import VOCDetection
diff --git a/yolort/data/data_prefetcher.py b/yolort/data/data_prefetcher.py
new file mode 100644
index 00000000..a118cf4e
--- /dev/null
+++ b/yolort/data/data_prefetcher.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import torch
+
+
+class DataPrefetcher:
+ """
+ DataPrefetcher is inspired by code of following file:
+ https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
+ It could speedup your pytorch dataloader. For more information, please check
+ https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.
+ """
+
+ def __init__(self, loader):
+ self.loader = iter(loader)
+ self.stream = torch.cuda.Stream()
+ self.input_cuda = self._input_cuda_for_image
+ self.record_stream = DataPrefetcher._record_stream_for_image
+ self.preload()
+
+ def preload(self):
+ try:
+ self.next_input, self.next_target, _, _ = next(self.loader)
+ except StopIteration:
+ self.next_input = None
+ self.next_target = None
+ return
+
+ with torch.cuda.stream(self.stream):
+ self.input_cuda()
+ self.next_target = self.next_target.cuda(non_blocking=True)
+
+ def next(self):
+ torch.cuda.current_stream().wait_stream(self.stream)
+ input = self.next_input
+ target = self.next_target
+ if input is not None:
+ self.record_stream(input)
+ if target is not None:
+ target.record_stream(torch.cuda.current_stream())
+ self.preload()
+ return input, target
+
+ def _input_cuda_for_image(self):
+ self.next_input = self.next_input.cuda(non_blocking=True)
+
+ @staticmethod
+ def _record_stream_for_image(input):
+ input.record_stream(torch.cuda.current_stream())
diff --git a/yolort/data/dataloading.py b/yolort/data/dataloading.py
new file mode 100644
index 00000000..6fecf3f0
--- /dev/null
+++ b/yolort/data/dataloading.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+import random
+import uuid
+
+import numpy as np
+
+import torch
+from torch.utils.data.dataloader import DataLoader as torchDataLoader
+from torch.utils.data.dataloader import default_collate
+
+from .samplers import YoloBatchSampler
+
+
+def get_yolox_datadir():
+ """
+ get dataset dir of YOLOX. If environment variable named `YOLOX_DATADIR` is set,
+ this function will return value of the environment variable. Otherwise, use data
+ """
+ yolox_datadir = os.getenv("YOLOX_DATADIR", None)
+ if yolox_datadir is None:
+ import yolox
+
+ yolox_path = os.path.dirname(os.path.dirname(yolox.__file__))
+ yolox_datadir = os.path.join(yolox_path, "datasets")
+ return yolox_datadir
+
+
+class DataLoader(torchDataLoader):
+ """
+ Lightnet dataloader that enables on the fly resizing of the images.
+ See :class:`torch.utils.data.DataLoader` for more information on the arguments.
+ Check more on the following website:
+ https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.__initialized = False
+ shuffle = False
+ batch_sampler = None
+ if len(args) > 5:
+ shuffle = args[2]
+ sampler = args[3]
+ batch_sampler = args[4]
+ elif len(args) > 4:
+ shuffle = args[2]
+ sampler = args[3]
+ if "batch_sampler" in kwargs:
+ batch_sampler = kwargs["batch_sampler"]
+ elif len(args) > 3:
+ shuffle = args[2]
+ if "sampler" in kwargs:
+ sampler = kwargs["sampler"]
+ if "batch_sampler" in kwargs:
+ batch_sampler = kwargs["batch_sampler"]
+ else:
+ if "shuffle" in kwargs:
+ shuffle = kwargs["shuffle"]
+ if "sampler" in kwargs:
+ sampler = kwargs["sampler"]
+ if "batch_sampler" in kwargs:
+ batch_sampler = kwargs["batch_sampler"]
+
+ # Use custom BatchSampler
+ if batch_sampler is None:
+ if sampler is None:
+ if shuffle:
+ sampler = torch.utils.data.sampler.RandomSampler(self.dataset)
+ # sampler = torch.utils.data.DistributedSampler(self.dataset)
+ else:
+ sampler = torch.utils.data.sampler.SequentialSampler(self.dataset)
+ batch_sampler = YoloBatchSampler(
+ sampler,
+ self.batch_size,
+ self.drop_last,
+ input_dimension=self.dataset.input_dim,
+ )
+ # batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iterations =
+
+ self.batch_sampler = batch_sampler
+
+ self.__initialized = True
+
+ def close_mosaic(self):
+ self.batch_sampler.mosaic = False
+
+
+def list_collate(batch):
+ """
+ Function that collates lists or tuples together into one list (of lists/tuples).
+ Use this as the collate function in a Dataloader, if you want to have a list of
+ items as an output, as opposed to tensors (eg. Brambox.boxes).
+ """
+ items = list(zip(*batch))
+
+ for i in range(len(items)):
+ if isinstance(items[i][0], (list, tuple)):
+ items[i] = list(items[i])
+ else:
+ items[i] = default_collate(items[i])
+
+ return items
+
+
+def worker_init_reset_seed(worker_id):
+ seed = uuid.uuid4().int % 2**32
+ random.seed(seed)
+ torch.set_rng_state(torch.manual_seed(seed).get_state())
+ np.random.seed(seed)
diff --git a/yolort/data/datasets/__init__.py b/yolort/data/datasets/__init__.py
new file mode 100644
index 00000000..8a02c7f0
--- /dev/null
+++ b/yolort/data/datasets/__init__.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+from .coco import COCODataset
+from .coco_classes import COCO_CLASSES
+from .datasets_wrapper import CacheDataset, ConcatDataset, Dataset, MixConcatDataset
+from .mosaicdetection import MosaicDetection
diff --git a/yolort/data/datasets/coco.py b/yolort/data/datasets/coco.py
new file mode 100644
index 00000000..5ac225a0
--- /dev/null
+++ b/yolort/data/datasets/coco.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import copy
+import os
+
+import cv2
+import numpy as np
+from pycocotools.coco import COCO
+
+from .datasets_wrapper import CacheDataset, cache_read_img
+
+
+def remove_useless_info(coco):
+ """
+ Remove useless info in coco dataset. COCO object is modified inplace.
+ This function is mainly used for saving memory (save about 30% mem).
+ """
+ if isinstance(coco, COCO):
+ dataset = coco.dataset
+ dataset.pop("info", None)
+ dataset.pop("licenses", None)
+ for img in dataset["images"]:
+ img.pop("license", None)
+ img.pop("coco_url", None)
+ img.pop("date_captured", None)
+ img.pop("flickr_url", None)
+ if "annotations" in coco.dataset:
+ for anno in coco.dataset["annotations"]:
+ anno.pop("segmentation", None)
+
+
+class COCODataset(CacheDataset):
+ """
+ COCO dataset class.
+ """
+
+ def __init__(
+ self,
+ data_dir=None,
+ json_file="instances_train2017.json",
+ name="train2017",
+ img_size=(416, 416),
+ preproc=None,
+ cache=False,
+ cache_type="ram",
+ ):
+ """
+ COCO dataset initialization. Annotation data are read into memory by COCO API.
+ Args:
+ data_dir (str): dataset root directory
+ json_file (str): COCO json file name
+ name (str): COCO data name (e.g. 'train2017' or 'val2017')
+ img_size (int): target image size after pre-processing
+ preproc: data augmentation strategy
+ """
+ if data_dir is None:
+ data_dir = os.path.join("data-bin", "coco128")
+ self.data_dir = data_dir
+ self.json_file = json_file
+
+ self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
+ remove_useless_info(self.coco)
+ self.ids = self.coco.getImgIds()
+ self.num_imgs = len(self.ids)
+ self.class_ids = sorted(self.coco.getCatIds())
+ self.cats = self.coco.loadCats(self.coco.getCatIds())
+ self._classes = tuple([c["name"] for c in self.cats])
+ self.name = name
+ self.img_size = img_size
+ self.preproc = preproc
+ self.annotations = self._load_coco_annotations()
+
+ path_filename = [os.path.join(name, anno[3]) for anno in self.annotations]
+ super().__init__(
+ input_dimension=img_size,
+ num_imgs=self.num_imgs,
+ data_dir=data_dir,
+ cache_dir_name=f"cache_{name}",
+ path_filename=path_filename,
+ cache=cache,
+ cache_type=cache_type
+ )
+
+ def __len__(self):
+ return self.num_imgs
+
+ def _load_coco_annotations(self):
+ return [self.load_anno_from_ids(_ids) for _ids in self.ids]
+
+ def load_anno_from_ids(self, id_):
+ im_ann = self.coco.loadImgs(id_)[0]
+ width = im_ann["width"]
+ height = im_ann["height"]
+ anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
+ annotations = self.coco.loadAnns(anno_ids)
+ objs = []
+ for obj in annotations:
+ x1 = np.max((0, obj["bbox"][0]))
+ y1 = np.max((0, obj["bbox"][1]))
+ x2 = np.min((width, x1 + np.max((0, obj["bbox"][2]))))
+ y2 = np.min((height, y1 + np.max((0, obj["bbox"][3]))))
+ if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
+ obj["clean_bbox"] = [x1, y1, x2, y2]
+ objs.append(obj)
+
+ num_objs = len(objs)
+
+ res = np.zeros((num_objs, 5))
+ for ix, obj in enumerate(objs):
+ cls = self.class_ids.index(obj["category_id"])
+ res[ix, 0:4] = obj["clean_bbox"]
+ res[ix, 4] = cls
+
+ r = min(self.img_size[0] / height, self.img_size[1] / width)
+ res[:, :4] *= r
+
+ img_info = (height, width)
+ resized_info = (int(height * r), int(width * r))
+
+ file_name = (
+ im_ann["file_name"]
+ if "file_name" in im_ann
+ else "{:012}".format(id_) + ".jpg"
+ )
+
+ return (res, img_info, resized_info, file_name)
+
+ def load_anno(self, index):
+ return self.annotations[index][0]
+
+ def load_resized_img(self, index):
+ img = self.load_image(index)
+ r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])
+ resized_img = cv2.resize(
+ img,
+ (int(img.shape[1] * r), int(img.shape[0] * r)),
+ interpolation=cv2.INTER_LINEAR,
+ ).astype(np.uint8)
+ return resized_img
+
+ def load_image(self, index):
+ file_name = self.annotations[index][3]
+
+ img_file = os.path.join(self.data_dir, "images", self.name, file_name)
+
+ img = cv2.imread(img_file)
+ assert img is not None, f"file named {img_file} not found"
+
+ return img
+
+ @cache_read_img(use_cache=True)
+ def read_img(self, index):
+ return self.load_resized_img(index)
+
+ def pull_item(self, index):
+ id_ = self.ids[index]
+ label, origin_image_size, _, _ = self.annotations[index]
+ img = self.read_img(index)
+
+ return img, copy.deepcopy(label), origin_image_size, np.array([id_])
+
+ @CacheDataset.mosaic_getitem
+ def __getitem__(self, index):
+ """
+ One image / label pair for the given index is picked up and pre-processed.
+
+ Args:
+ index (int): data index
+
+ Returns:
+ img (numpy.ndarray): pre-processed image
+ padded_labels (torch.Tensor): pre-processed label data.
+ The shape is :math:`[max_labels, 5]`.
+ each label consists of [class, xc, yc, w, h]:
+ class (float): class index.
+ xc, yc (float) : center of bbox whose values range from 0 to 1.
+ w, h (float) : size of bbox whose values range from 0 to 1.
+ info_img : tuple of h, w.
+ h, w (int): original shape of the image
+ img_id (int): same as the input index. Used for evaluation.
+ """
+ img, target, img_info, img_id = self.pull_item(index)
+
+ if self.preproc is not None:
+ img, target = self.preproc(img, target, self.input_dim)
+ return img, target, img_info, img_id
diff --git a/yolort/data/datasets/coco_classes.py b/yolort/data/datasets/coco_classes.py
new file mode 100644
index 00000000..17f5cbe6
--- /dev/null
+++ b/yolort/data/datasets/coco_classes.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+COCO_CLASSES = (
+ "person",
+ "bicycle",
+ "car",
+ "motorcycle",
+ "airplane",
+ "bus",
+ "train",
+ "truck",
+ "boat",
+ "traffic light",
+ "fire hydrant",
+ "stop sign",
+ "parking meter",
+ "bench",
+ "bird",
+ "cat",
+ "dog",
+ "horse",
+ "sheep",
+ "cow",
+ "elephant",
+ "bear",
+ "zebra",
+ "giraffe",
+ "backpack",
+ "umbrella",
+ "handbag",
+ "tie",
+ "suitcase",
+ "frisbee",
+ "skis",
+ "snowboard",
+ "sports ball",
+ "kite",
+ "baseball bat",
+ "baseball glove",
+ "skateboard",
+ "surfboard",
+ "tennis racket",
+ "bottle",
+ "wine glass",
+ "cup",
+ "fork",
+ "knife",
+ "spoon",
+ "bowl",
+ "banana",
+ "apple",
+ "sandwich",
+ "orange",
+ "broccoli",
+ "carrot",
+ "hot dog",
+ "pizza",
+ "donut",
+ "cake",
+ "chair",
+ "couch",
+ "potted plant",
+ "bed",
+ "dining table",
+ "toilet",
+ "tv",
+ "laptop",
+ "mouse",
+ "remote",
+ "keyboard",
+ "cell phone",
+ "microwave",
+ "oven",
+ "toaster",
+ "sink",
+ "refrigerator",
+ "book",
+ "clock",
+ "vase",
+ "scissors",
+ "teddy bear",
+ "hair drier",
+ "toothbrush",
+)
diff --git a/yolort/data/datasets/datasets_wrapper.py b/yolort/data/datasets/datasets_wrapper.py
new file mode 100644
index 00000000..c45fe380
--- /dev/null
+++ b/yolort/data/datasets/datasets_wrapper.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import bisect
+import copy
+import os
+import random
+from abc import ABCMeta, abstractmethod
+from functools import partial, wraps
+from multiprocessing.pool import ThreadPool
+import psutil
+from loguru import logger
+from tqdm import tqdm
+
+import numpy as np
+
+from torch.utils.data.dataset import ConcatDataset as torchConcatDataset
+from torch.utils.data.dataset import Dataset as torchDataset
+
+
+class ConcatDataset(torchConcatDataset):
+ def __init__(self, datasets):
+ super(ConcatDataset, self).__init__(datasets)
+ if hasattr(self.datasets[0], "input_dim"):
+ self._input_dim = self.datasets[0].input_dim
+ self.input_dim = self.datasets[0].input_dim
+
+ def pull_item(self, idx):
+ if idx < 0:
+ if -idx > len(self):
+ raise ValueError(
+ "absolute value of index should not exceed dataset length"
+ )
+ idx = len(self) + idx
+ dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+ if dataset_idx == 0:
+ sample_idx = idx
+ else:
+ sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+ return self.datasets[dataset_idx].pull_item(sample_idx)
+
+
+class MixConcatDataset(torchConcatDataset):
+ def __init__(self, datasets):
+ super(MixConcatDataset, self).__init__(datasets)
+ if hasattr(self.datasets[0], "input_dim"):
+ self._input_dim = self.datasets[0].input_dim
+ self.input_dim = self.datasets[0].input_dim
+
+ def __getitem__(self, index):
+
+ if not isinstance(index, int):
+ idx = index[1]
+ if idx < 0:
+ if -idx > len(self):
+ raise ValueError(
+ "absolute value of index should not exceed dataset length"
+ )
+ idx = len(self) + idx
+ dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+ if dataset_idx == 0:
+ sample_idx = idx
+ else:
+ sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+ if not isinstance(index, int):
+ index = (index[0], sample_idx, index[2])
+
+ return self.datasets[dataset_idx][index]
+
+
+class Dataset(torchDataset):
+ """ This class is a subclass of the base :class:`torch.utils.data.Dataset`,
+ that enables on the fly resizing of the ``input_dim``.
+
+ Args:
+ input_dimension (tuple): (width,height) tuple with default dimensions of the network
+ """
+
+ def __init__(self, input_dimension, mosaic=True):
+ super().__init__()
+ self.__input_dim = input_dimension[:2]
+ self.enable_mosaic = mosaic
+
+ @property
+ def input_dim(self):
+ """
+ Dimension that can be used by transforms to set the correct image size, etc.
+ This allows transforms to have a single source of truth
+ for the input dimension of the network.
+
+ Return:
+ list: Tuple containing the current width,height
+ """
+ if hasattr(self, "_input_dim"):
+ return self._input_dim
+ return self.__input_dim
+
+ @staticmethod
+ def mosaic_getitem(getitem_fn):
+ """
+ Decorator method that needs to be used around the ``__getitem__`` method. |br|
+ This decorator enables the closing mosaic
+
+ Example:
+ >>> class CustomSet(ln.data.Dataset):
+ ... def __len__(self):
+ ... return 10
+ ... @ln.data.Dataset.mosaic_getitem
+ ... def __getitem__(self, index):
+ ... return self.enable_mosaic
+ """
+
+ @wraps(getitem_fn)
+ def wrapper(self, index):
+ if not isinstance(index, int):
+ self.enable_mosaic = index[0]
+ index = index[1]
+
+ ret_val = getitem_fn(self, index)
+
+ return ret_val
+
+ return wrapper
+
+
+class CacheDataset(Dataset, metaclass=ABCMeta):
+ """ This class is a subclass of the base :class:`yolox.data.datasets.Dataset`,
+ that enables cache images to ram or disk.
+
+ Args:
+ input_dimension (tuple): (width,height) tuple with default dimensions of the network
+ num_imgs (int): datset size
+ data_dir (str): the root directory of the dataset, e.g. `/path/to/COCO`.
+ cache_dir_name (str): the name of the directory to cache to disk,
+ e.g. `"custom_cache"`. The files cached to disk will be saved
+ under `/path/to/COCO/custom_cache`.
+ path_filename (str): a list of paths to the data relative to the `data_dir`,
+ e.g. if you have data `/path/to/COCO/train/1.jpg`, `/path/to/COCO/train/2.jpg`,
+ then `path_filename = ['train/1.jpg', ' train/2.jpg']`.
+ cache (bool): whether to cache the images to ram or disk.
+ cache_type (str): the type of cache,
+ "ram" : Caching imgs to ram for fast training.
+ "disk": Caching imgs to disk for fast training.
+ """
+
+ def __init__(
+ self,
+ input_dimension,
+ num_imgs=None,
+ data_dir=None,
+ cache_dir_name=None,
+ path_filename=None,
+ cache=False,
+ cache_type="ram",
+ ):
+ super().__init__(input_dimension)
+ self.cache = cache
+ self.cache_type = cache_type
+
+ if self.cache and self.cache_type == "disk":
+ self.cache_dir = os.path.join(data_dir, cache_dir_name)
+ self.path_filename = path_filename
+
+ if self.cache and self.cache_type == "ram":
+ self.imgs = None
+
+ if self.cache:
+ self.cache_images(
+ num_imgs=num_imgs,
+ data_dir=data_dir,
+ cache_dir_name=cache_dir_name,
+ path_filename=path_filename,
+ )
+
+ def __del__(self):
+ if self.cache and self.cache_type == "ram":
+ del self.imgs
+
+ @abstractmethod
+ def read_img(self, index):
+ """
+ Given index, return the corresponding image
+
+ Args:
+ index (int): image index
+ """
+ raise NotImplementedError
+
+ def cache_images(
+ self,
+ num_imgs=None,
+ data_dir=None,
+ cache_dir_name=None,
+ path_filename=None,
+ ):
+ assert num_imgs is not None, "num_imgs must be specified as the size of the dataset"
+ if self.cache_type == "disk":
+ assert (data_dir and cache_dir_name and path_filename) is not None, \
+ "data_dir, cache_name and path_filename must be specified if cache_type is disk"
+ self.path_filename = path_filename
+
+ mem = psutil.virtual_memory()
+ mem_required = self.cal_cache_occupy(num_imgs)
+ gb = 1 << 30
+
+ if self.cache_type == "ram":
+ if mem_required > mem.available:
+ self.cache = False
+ else:
+ logger.info(
+ f"{mem_required / gb:.1f}GB RAM required, "
+ f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB RAM available, "
+ f"Since the first thing we do is cache, "
+ f"there is no guarantee that the remaining memory space is sufficient"
+ )
+
+ if self.cache and self.imgs is None:
+ if self.cache_type == 'ram':
+ self.imgs = [None] * num_imgs
+ logger.info("You are using cached images in RAM to accelerate training!")
+ else: # 'disk'
+ if not os.path.exists(self.cache_dir):
+ os.mkdir(self.cache_dir)
+ logger.warning(
+ f"\n*******************************************************************\n"
+ f"You are using cached images in DISK to accelerate training.\n"
+ f"This requires large DISK space.\n"
+ f"Make sure you have {mem_required / gb:.1f} "
+ f"available DISK space for training your dataset.\n"
+ f"*******************************************************************\\n"
+ )
+ else:
+ logger.info(f"Found disk cache at {self.cache_dir}")
+ return
+
+ logger.info(
+ "Caching images...\n"
+ "This might take some time for your dataset"
+ )
+
+ num_threads = min(8, max(1, os.cpu_count() - 1))
+ b = 0
+ load_imgs = ThreadPool(num_threads).imap(
+ partial(self.read_img, use_cache=False),
+ range(num_imgs)
+ )
+ pbar = tqdm(enumerate(load_imgs), total=num_imgs)
+ for i, x in pbar: # x = self.read_img(self, i, use_cache=False)
+ if self.cache_type == 'ram':
+ self.imgs[i] = x
+ else: # 'disk'
+ cache_filename = f'{self.path_filename[i].split(".")[0]}.npy'
+ cache_path_filename = os.path.join(self.cache_dir, cache_filename)
+ os.makedirs(os.path.dirname(cache_path_filename), exist_ok=True)
+ np.save(cache_path_filename, x)
+ b += x.nbytes
+ pbar.desc = \
+ f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})'
+ pbar.close()
+
+ def cal_cache_occupy(self, num_imgs):
+ cache_bytes = 0
+ num_samples = min(num_imgs, 32)
+ for _ in range(num_samples):
+ img = self.read_img(index=random.randint(0, num_imgs - 1), use_cache=False)
+ cache_bytes += img.nbytes
+ mem_required = cache_bytes * num_imgs / num_samples
+ return mem_required
+
+
+def cache_read_img(use_cache=True):
+ def decorator(read_img_fn):
+ """
+ Decorate the read_img function to cache the image
+
+ Args:
+ read_img_fn: read_img function
+ use_cache (bool, optional): For the decorated read_img function,
+ whether to read the image from cache.
+ Defaults to True.
+ """
+ @wraps(read_img_fn)
+ def wrapper(self, index, use_cache=use_cache):
+ cache = self.cache and use_cache
+ if cache:
+ if self.cache_type == "ram":
+ img = self.imgs[index]
+ img = copy.deepcopy(img)
+ elif self.cache_type == "disk":
+ img = np.load(
+ os.path.join(
+ self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy"))
+ else:
+ raise ValueError(f"Unknown cache type: {self.cache_type}")
+ else:
+ img = read_img_fn(self, index)
+ return img
+ return wrapper
+ return decorator
diff --git a/yolort/data/datasets/mosaicdetection.py b/yolort/data/datasets/mosaicdetection.py
new file mode 100644
index 00000000..ba11cfdc
--- /dev/null
+++ b/yolort/data/datasets/mosaicdetection.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import random
+
+import cv2
+import numpy as np
+
+from yolort.utils import adjust_box_anns, get_local_rank
+
+from ..data_augment import random_affine
+from .datasets_wrapper import Dataset
+
+
+def get_mosaic_coordinate(mosaic_image, mosaic_index, xc, yc, w, h, input_h, input_w):
+ # TODO update doc
+ # index0 to top left part of image
+ if mosaic_index == 0:
+ x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
+ small_coord = w - (x2 - x1), h - (y2 - y1), w, h
+ # index1 to top right part of image
+ elif mosaic_index == 1:
+ x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
+ small_coord = 0, h - (y2 - y1), min(w, x2 - x1), h
+ # index2 to bottom left part of image
+ elif mosaic_index == 2:
+ x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
+ small_coord = w - (x2 - x1), 0, w, min(y2 - y1, h)
+ # index2 to bottom right part of image
+ elif mosaic_index == 3:
+ x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2, yc + h) # noqa
+ small_coord = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
+ return (x1, y1, x2, y2), small_coord
+
+
+class MosaicDetection(Dataset):
+ """Detection dataset wrapper that performs mixup for normal dataset."""
+
+ def __init__(
+ self, dataset, img_size, mosaic=True, preproc=None,
+ degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
+ mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,
+ mosaic_prob=1.0, mixup_prob=1.0, *args
+ ):
+ """
+
+ Args:
+ dataset(Dataset) : Pytorch dataset object.
+ img_size (tuple):
+ mosaic (bool): enable mosaic augmentation or not.
+ preproc (func):
+ degrees (float):
+ translate (float):
+ mosaic_scale (tuple):
+ mixup_scale (tuple):
+ shear (float):
+ enable_mixup (bool):
+ *args(tuple) : Additional arguments for mixup random sampler.
+ """
+ super().__init__(img_size, mosaic=mosaic)
+ self._dataset = dataset
+ self.preproc = preproc
+ self.degrees = degrees
+ self.translate = translate
+ self.scale = mosaic_scale
+ self.shear = shear
+ self.mixup_scale = mixup_scale
+ self.enable_mosaic = mosaic
+ self.enable_mixup = enable_mixup
+ self.mosaic_prob = mosaic_prob
+ self.mixup_prob = mixup_prob
+ self.local_rank = get_local_rank()
+
+ def __len__(self):
+ return len(self._dataset)
+
+ @Dataset.mosaic_getitem
+ def __getitem__(self, idx):
+ if self.enable_mosaic and random.random() < self.mosaic_prob:
+ mosaic_labels = []
+ input_dim = self._dataset.input_dim
+ input_h, input_w = input_dim[0], input_dim[1]
+
+ # yc, xc = s, s # mosaic center x, y
+ yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
+ xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
+
+ # 3 additional image indices
+ indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
+
+ for i_mosaic, index in enumerate(indices):
+ img, _labels, _, img_id = self._dataset.pull_item(index)
+ h0, w0 = img.shape[:2] # orig hw
+ scale = min(1. * input_h / h0, 1. * input_w / w0)
+ img = cv2.resize(
+ img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR
+ )
+ # generate output mosaic image
+ (h, w, c) = img.shape[:3]
+ if i_mosaic == 0:
+ mosaic_img = np.full((input_h * 2, input_w * 2, c), 114, dtype=np.uint8)
+
+ # suffix l means large image, while s means small image in mosaic aug.
+ (l_x1, l_y1, l_x2, l_y2), (s_x1, s_y1, s_x2, s_y2) = get_mosaic_coordinate(
+ mosaic_img, i_mosaic, xc, yc, w, h, input_h, input_w
+ )
+
+ mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
+ padw, padh = l_x1 - s_x1, l_y1 - s_y1
+
+ labels = _labels.copy()
+ # Normalized xywh to pixel xyxy format
+ if _labels.size > 0:
+ labels[:, 0] = scale * _labels[:, 0] + padw
+ labels[:, 1] = scale * _labels[:, 1] + padh
+ labels[:, 2] = scale * _labels[:, 2] + padw
+ labels[:, 3] = scale * _labels[:, 3] + padh
+ mosaic_labels.append(labels)
+
+ if len(mosaic_labels):
+ mosaic_labels = np.concatenate(mosaic_labels, 0)
+ np.clip(mosaic_labels[:, 0], 0, 2 * input_w, out=mosaic_labels[:, 0])
+ np.clip(mosaic_labels[:, 1], 0, 2 * input_h, out=mosaic_labels[:, 1])
+ np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])
+ np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])
+
+ mosaic_img, mosaic_labels = random_affine(
+ mosaic_img,
+ mosaic_labels,
+ target_size=(input_w, input_h),
+ degrees=self.degrees,
+ translate=self.translate,
+ scales=self.scale,
+ shear=self.shear,
+ )
+
+ # -----------------------------------------------------------------
+ # CopyPaste: https://arxiv.org/abs/2012.07177
+ # -----------------------------------------------------------------
+ if (
+ self.enable_mixup
+ and not len(mosaic_labels) == 0
+ and random.random() < self.mixup_prob
+ ):
+ mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)
+ mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)
+ img_info = (mix_img.shape[1], mix_img.shape[0])
+
+ # -----------------------------------------------------------------
+ # img_info and img_id are not used for training.
+ # They are also hard to be specified on a mosaic image.
+ # -----------------------------------------------------------------
+ return mix_img, padded_labels, img_info, img_id
+
+ else:
+ self._dataset._input_dim = self.input_dim
+ img, label, img_info, img_id = self._dataset.pull_item(idx)
+ img, label = self.preproc(img, label, self.input_dim)
+ return img, label, img_info, img_id
+
+ def mixup(self, origin_img, origin_labels, input_dim):
+ jit_factor = random.uniform(*self.mixup_scale)
+ FLIP = random.uniform(0, 1) > 0.5
+ cp_labels = []
+ while len(cp_labels) == 0:
+ cp_index = random.randint(0, self.__len__() - 1)
+ cp_labels = self._dataset.load_anno(cp_index)
+ img, cp_labels, _, _ = self._dataset.pull_item(cp_index)
+
+ if len(img.shape) == 3:
+ cp_img = np.ones((input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
+ else:
+ cp_img = np.ones(input_dim, dtype=np.uint8) * 114
+
+ cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])
+ resized_img = cv2.resize(
+ img,
+ (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
+ interpolation=cv2.INTER_LINEAR,
+ )
+
+ cp_img[
+ : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)
+ ] = resized_img
+
+ cp_img = cv2.resize(
+ cp_img,
+ (int(cp_img.shape[1] * jit_factor), int(cp_img.shape[0] * jit_factor)),
+ )
+ cp_scale_ratio *= jit_factor
+
+ if FLIP:
+ cp_img = cp_img[:, ::-1, :]
+
+ origin_h, origin_w = cp_img.shape[:2]
+ target_h, target_w = origin_img.shape[:2]
+ padded_img = np.zeros(
+ (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
+ )
+ padded_img[:origin_h, :origin_w] = cp_img
+
+ x_offset, y_offset = 0, 0
+ if padded_img.shape[0] > target_h:
+ y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
+ if padded_img.shape[1] > target_w:
+ x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
+ padded_cropped_img = padded_img[
+ y_offset: y_offset + target_h, x_offset: x_offset + target_w
+ ]
+
+ cp_bboxes_origin_np = adjust_box_anns(
+ cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h
+ )
+ if FLIP:
+ cp_bboxes_origin_np[:, 0::2] = (
+ origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1]
+ )
+ cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
+ cp_bboxes_transformed_np[:, 0::2] = np.clip(
+ cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w
+ )
+ cp_bboxes_transformed_np[:, 1::2] = np.clip(
+ cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
+ )
+
+ cls_labels = cp_labels[:, 4:5].copy()
+ box_labels = cp_bboxes_transformed_np
+ labels = np.hstack((box_labels, cls_labels))
+ origin_labels = np.vstack((origin_labels, labels))
+ origin_img = origin_img.astype(np.float32)
+ origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
+
+ return origin_img.astype(np.uint8), origin_labels
diff --git a/yolort/data/samplers.py b/yolort/data/samplers.py
new file mode 100644
index 00000000..6b7ea38d
--- /dev/null
+++ b/yolort/data/samplers.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import itertools
+from typing import Optional
+
+import torch
+import torch.distributed as dist
+from torch.utils.data.sampler import BatchSampler as torchBatchSampler
+from torch.utils.data.sampler import Sampler
+
+
+class YoloBatchSampler(torchBatchSampler):
+ """
+ This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.
+ It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
+ but it will turn on/off the mosaic aug.
+ """
+
+ def __init__(self, *args, mosaic=True, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.mosaic = mosaic
+
+ def __iter__(self):
+ for batch in super().__iter__():
+ yield [(self.mosaic, idx) for idx in batch]
+
+
+class InfiniteSampler(Sampler):
+ """
+ In training, we only care about the "infinite stream" of training data.
+ So this sampler produces an infinite stream of indices and
+ all workers cooperate to correctly shuffle the indices and sample different indices.
+ The samplers in each worker effectively produces `indices[worker_id::num_workers]`
+ where `indices` is an infinite stream of indices consisting of
+ `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
+ or `range(size) + range(size) + ...` (if shuffle is False)
+ """
+
+ def __init__(
+ self,
+ size: int,
+ shuffle: bool = True,
+ seed: Optional[int] = 0,
+ rank=0,
+ world_size=1,
+ ):
+ """
+ Args:
+ size (int): the total number of data of the underlying dataset to sample from
+ shuffle (bool): whether to shuffle the indices or not
+ seed (int): the initial seed of the shuffle. Must be the same
+ across all workers. If None, will use a random seed shared
+ among workers (require synchronization among all workers).
+ """
+ self._size = size
+ assert size > 0
+ self._shuffle = shuffle
+ self._seed = int(seed)
+
+ if dist.is_available() and dist.is_initialized():
+ self._rank = dist.get_rank()
+ self._world_size = dist.get_world_size()
+ else:
+ self._rank = rank
+ self._world_size = world_size
+
+ def __iter__(self):
+ start = self._rank
+ yield from itertools.islice(
+ self._infinite_indices(), start, None, self._world_size
+ )
+
+ def _infinite_indices(self):
+ g = torch.Generator()
+ g.manual_seed(self._seed)
+ while True:
+ if self._shuffle:
+ yield from torch.randperm(self._size, generator=g)
+ else:
+ yield from torch.arange(self._size)
+
+ def __len__(self):
+ return self._size // self._world_size
diff --git a/yolort/evaluators/__init__.py b/yolort/evaluators/__init__.py
new file mode 100644
index 00000000..fc0b6875
--- /dev/null
+++ b/yolort/evaluators/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+from .coco_evaluator import COCOEvaluator
\ No newline at end of file
diff --git a/yolort/evaluators/coco_evaluator.py b/yolort/evaluators/coco_evaluator.py
new file mode 100644
index 00000000..a97c6d41
--- /dev/null
+++ b/yolort/evaluators/coco_evaluator.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import contextlib
+import io
+import itertools
+import json
+import tempfile
+import time
+from collections import ChainMap, defaultdict
+from loguru import logger
+from tabulate import tabulate
+from tqdm import tqdm
+
+import numpy as np
+
+import torch
+
+from yolort.data.datasets import COCO_CLASSES
+from yolort.utils import (
+ gather,
+ is_main_process,
+ postprocess,
+ synchronize,
+ time_synchronized,
+ xyxy2xywh
+)
+
+
+def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AR"], colums=6):
+ per_class_AR = {}
+ recalls = coco_eval.eval["recall"]
+ # dimension of recalls: [TxKxAxM]
+ # recall has dims (iou, cls, area range, max dets)
+ assert len(class_names) == recalls.shape[1]
+
+ for idx, name in enumerate(class_names):
+ recall = recalls[:, idx, 0, -1]
+ recall = recall[recall > -1]
+ ar = np.mean(recall) if recall.size else float("nan")
+ per_class_AR[name] = float(ar * 100)
+
+ num_cols = min(colums, len(per_class_AR) * len(headers))
+ result_pair = [x for pair in per_class_AR.items() for x in pair]
+ row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
+ table_headers = headers * (num_cols // len(headers))
+ table = tabulate(
+ row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+ )
+ return table
+
+
+def per_class_AP_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AP"], colums=6):
+ per_class_AP = {}
+ precisions = coco_eval.eval["precision"]
+ # dimension of precisions: [TxRxKxAxM]
+ # precision has dims (iou, recall, cls, area range, max dets)
+ assert len(class_names) == precisions.shape[2]
+
+ for idx, name in enumerate(class_names):
+ # area range index 0: all area ranges
+ # max dets index -1: typically 100 per image
+ precision = precisions[:, :, idx, 0, -1]
+ precision = precision[precision > -1]
+ ap = np.mean(precision) if precision.size else float("nan")
+ per_class_AP[name] = float(ap * 100)
+
+ num_cols = min(colums, len(per_class_AP) * len(headers))
+ result_pair = [x for pair in per_class_AP.items() for x in pair]
+ row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
+ table_headers = headers * (num_cols // len(headers))
+ table = tabulate(
+ row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+ )
+ return table
+
+
+class COCOEvaluator:
+ """
+ COCO AP Evaluation class. All the data in the val2017 dataset are processed
+ and evaluated by COCO API.
+ """
+
+ def __init__(
+ self,
+ dataloader,
+ img_size: int,
+ confthre: float,
+ nmsthre: float,
+ num_classes: int,
+ testdev: bool = False,
+ per_class_AP: bool = True,
+ per_class_AR: bool = True,
+ ):
+ """
+ Args:
+ dataloader (Dataloader): evaluate dataloader.
+ img_size: image size after preprocess. images are resized
+ to squares whose shape is (img_size, img_size).
+ confthre: confidence threshold ranging from 0 to 1, which
+ is defined in the config file.
+ nmsthre: IoU threshold of non-max supression ranging from 0 to 1.
+ per_class_AP: Show per class AP during evalution or not. Default to True.
+ per_class_AR: Show per class AR during evalution or not. Default to True.
+ """
+ self.dataloader = dataloader
+ self.img_size = img_size
+ self.confthre = confthre
+ self.nmsthre = nmsthre
+ self.num_classes = num_classes
+ self.testdev = testdev
+ self.per_class_AP = per_class_AP
+ self.per_class_AR = per_class_AR
+
+ def evaluate(
+ self, model, distributed=False, half=False, trt_file=None,
+ decoder=None, test_size=None, return_outputs=False
+ ):
+ """
+ COCO average precision (AP) Evaluation. Iterate inference on the test dataset
+ and the results are evaluated by COCO API.
+
+ NOTE: This function will change training mode to False, please save states if needed.
+
+ Args:
+ model : model to evaluate.
+
+ Returns:
+ ap50_95 (float) : COCO AP of IoU=50:95
+ ap50 (float) : COCO AP of IoU=50
+ summary (sr): summary info of evaluation.
+ """
+ # TODO half to amp_test
+ tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
+ model = model.eval()
+ if half:
+ model = model.half()
+ ids = []
+ data_list = []
+ output_data = defaultdict()
+ progress_bar = tqdm if is_main_process() else iter
+
+ inference_time = 0
+ nms_time = 0
+ n_samples = max(len(self.dataloader) - 1, 1)
+
+ if trt_file is not None:
+ from torch2trt import TRTModule
+
+ model_trt = TRTModule()
+ model_trt.load_state_dict(torch.load(trt_file))
+
+ x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
+ model(x)
+ model = model_trt
+
+ for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
+ progress_bar(self.dataloader)
+ ):
+ with torch.no_grad():
+ imgs = imgs.type(tensor_type)
+
+ # skip the last iters since batchsize might be not enough for batch inference
+ is_time_record = cur_iter < len(self.dataloader) - 1
+ if is_time_record:
+ start = time.time()
+
+ outputs = model(imgs)
+ if decoder is not None:
+ outputs = decoder(outputs, dtype=outputs.type())
+
+ if is_time_record:
+ infer_end = time_synchronized()
+ inference_time += infer_end - start
+
+ outputs = postprocess(
+ outputs, self.num_classes, self.confthre, self.nmsthre
+ )
+ if is_time_record:
+ nms_end = time_synchronized()
+ nms_time += nms_end - infer_end
+
+ data_list_elem, image_wise_data = self.convert_to_coco_format(
+ outputs, info_imgs, ids, return_outputs=True)
+ data_list.extend(data_list_elem)
+ output_data.update(image_wise_data)
+
+ statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples])
+ if distributed:
+ # different process/device might have different speed,
+ # to make sure the process will not be stucked, sync func is used here.
+ synchronize()
+ data_list = gather(data_list, dst=0)
+ output_data = gather(output_data, dst=0)
+ data_list = list(itertools.chain(*data_list))
+ output_data = dict(ChainMap(*output_data))
+ torch.distributed.reduce(statistics, dst=0)
+
+ eval_results = self.evaluate_prediction(data_list, statistics)
+ synchronize()
+
+ if return_outputs:
+ return eval_results, output_data
+ return eval_results
+
+ def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False):
+ data_list = []
+ image_wise_data = defaultdict(dict)
+ for (output, img_h, img_w, img_id) in zip(
+ outputs, info_imgs[0], info_imgs[1], ids
+ ):
+ if output is None:
+ continue
+ output = output.cpu()
+
+ bboxes = output[:, 0:4]
+
+ # preprocessing: resize
+ scale = min(
+ self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)
+ )
+ bboxes /= scale
+ cls = output[:, 6]
+ scores = output[:, 4] * output[:, 5]
+
+ image_wise_data.update({
+ int(img_id): {
+ "bboxes": [box.numpy().tolist() for box in bboxes],
+ "scores": [score.numpy().item() for score in scores],
+ "categories": [
+ self.dataloader.dataset.class_ids[int(cls[ind])]
+ for ind in range(bboxes.shape[0])
+ ],
+ }
+ })
+
+ bboxes = xyxy2xywh(bboxes)
+
+ for ind in range(bboxes.shape[0]):
+ label = self.dataloader.dataset.class_ids[int(cls[ind])]
+ pred_data = {
+ "image_id": int(img_id),
+ "category_id": label,
+ "bbox": bboxes[ind].numpy().tolist(),
+ "score": scores[ind].numpy().item(),
+ "segmentation": [],
+ } # COCO json format
+ data_list.append(pred_data)
+
+ if return_outputs:
+ return data_list, image_wise_data
+ return data_list
+
+ def evaluate_prediction(self, data_dict, statistics):
+ if not is_main_process():
+ return 0, 0, None
+
+ logger.info("Evaluate in main process...")
+
+ annType = ["segm", "bbox", "keypoints"]
+
+ inference_time = statistics[0].item()
+ nms_time = statistics[1].item()
+ n_samples = statistics[2].item()
+
+ a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)
+ a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size)
+
+ time_info = ", ".join(
+ [
+ "Average {} time: {:.2f} ms".format(k, v)
+ for k, v in zip(
+ ["forward", "NMS", "inference"],
+ [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)],
+ )
+ ]
+ )
+
+ info = time_info + "\n"
+
+ # Evaluate the Dt (detection) json comparing with the ground truth
+ if len(data_dict) > 0:
+ cocoGt = self.dataloader.dataset.coco
+ # TODO: since pycocotools can't process dict in py36, write data to json file.
+ if self.testdev:
+ json.dump(data_dict, open("./yolox_testdev_2017.json", "w"))
+ cocoDt = cocoGt.loadRes("./yolox_testdev_2017.json")
+ else:
+ _, tmp = tempfile.mkstemp()
+ json.dump(data_dict, open(tmp, "w"))
+ cocoDt = cocoGt.loadRes(tmp)
+ try:
+ from yolox.layers import COCOeval_opt as COCOeval
+ except ImportError:
+ from pycocotools.cocoeval import COCOeval
+
+ logger.warning("Use standard COCOeval.")
+
+ cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
+ cocoEval.evaluate()
+ cocoEval.accumulate()
+ redirect_string = io.StringIO()
+ with contextlib.redirect_stdout(redirect_string):
+ cocoEval.summarize()
+ info += redirect_string.getvalue()
+ cat_ids = list(cocoGt.cats.keys())
+ cat_names = [cocoGt.cats[catId]['name'] for catId in sorted(cat_ids)]
+ if self.per_class_AP:
+ AP_table = per_class_AP_table(cocoEval, class_names=cat_names)
+ info += "per class AP:\n" + AP_table + "\n"
+ if self.per_class_AR:
+ AR_table = per_class_AR_table(cocoEval, class_names=cat_names)
+ info += "per class AR:\n" + AR_table + "\n"
+ return cocoEval.stats[0], cocoEval.stats[1], info
+ else:
+ return 0, 0, info
\ No newline at end of file
diff --git a/yolort/exp/__init__.py b/yolort/exp/__init__.py
new file mode 100644
index 00000000..d7de27c8
--- /dev/null
+++ b/yolort/exp/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+from .base_exp import BaseExp
+from .yolox_base import Exp
\ No newline at end of file
diff --git a/yolort/exp/base_exp.py b/yolort/exp/base_exp.py
new file mode 100644
index 00000000..c0ae45fe
--- /dev/null
+++ b/yolort/exp/base_exp.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import ast
+import pprint
+from abc import ABCMeta, abstractmethod
+from typing import Dict, List, Tuple
+from tabulate import tabulate
+
+import torch
+from torch.nn import Module
+
+from yolort.utils import LRScheduler
+
+
+class BaseExp(metaclass=ABCMeta):
+ """Basic class for any experiment."""
+
+ def __init__(self):
+ self.seed = None
+ self.output_dir = "./"
+ self.print_interval = 100
+ self.eval_interval = 10
+ self.dataset = None
+
+ @abstractmethod
+ def get_model(self) -> Module:
+ pass
+
+ @abstractmethod
+ def get_dataset(self, cache: bool = False, cache_type: str = "ram"):
+ pass
+
+ @abstractmethod
+ def get_data_loader(
+ self, batch_size: int, is_distributed: bool
+ ) -> Dict[str, torch.utils.data.DataLoader]:
+ pass
+
+ @abstractmethod
+ def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
+ pass
+
+ @abstractmethod
+ def get_lr_scheduler(
+ self, lr: float, iters_per_epoch: int, **kwargs
+ ) -> LRScheduler:
+ pass
+
+ @abstractmethod
+ def get_evaluator(self):
+ pass
+
+ @abstractmethod
+ def eval(self, model, evaluator, weights):
+ pass
+
+ def __repr__(self):
+ table_header = ["keys", "values"]
+ exp_table = [
+ (str(k), pprint.pformat(v))
+ for k, v in vars(self).items()
+ if not k.startswith("_")
+ ]
+ return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
+
+ def merge(self, cfg_list):
+ assert len(cfg_list) % 2 == 0, f"length must be even, check value here: {cfg_list}"
+ for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
+ # only update value with same key
+ if hasattr(self, k):
+ src_value = getattr(self, k)
+ src_type = type(src_value)
+
+ # pre-process input if source type is list or tuple
+ if isinstance(src_value, (List, Tuple)):
+ v = v.strip("[]()")
+ v = [t.strip() for t in v.split(",")]
+
+ # find type of tuple
+ if len(src_value) > 0:
+ src_item_type = type(src_value[0])
+ v = [src_item_type(t) for t in v]
+
+ if src_value is not None and src_type != type(v):
+ try:
+ v = src_type(v)
+ except Exception:
+ v = ast.literal_eval(v)
+ setattr(self, k, v)
diff --git a/yolort/exp/default/__init__.py b/yolort/exp/default/__init__.py
new file mode 100644
index 00000000..1f361d78
--- /dev/null
+++ b/yolort/exp/default/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+# This file is used for package installation and find default exp file
+
+import sys
+from importlib import abc, util
+from pathlib import Path
+
+_EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / "exps" / "default"
+
+if _EXP_PATH.is_dir():
+ # This is true only for in-place installation (pip install -e, setup.py develop),
+ # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
+
+ class _ExpFinder(abc.MetaPathFinder):
+
+ def find_spec(self, name, path, target=None):
+ if not name.startswith("yolort.exp.default"):
+ return
+ project_name = name.split(".")[-1] + ".py"
+ target_file = _EXP_PATH / project_name
+ if not target_file.is_file():
+ return
+ return util.spec_from_file_location(name, target_file)
+
+ sys.meta_path.append(_ExpFinder())
diff --git a/yolort/exp/yolox_base.py b/yolort/exp/yolox_base.py
new file mode 100644
index 00000000..f3147743
--- /dev/null
+++ b/yolort/exp/yolox_base.py
@@ -0,0 +1,387 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import os
+import random
+import logging
+from zipfile import ZipFile
+from pathlib import Path, PosixPath
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+
+from .base_exp import BaseExp
+
+__all__ = ["Exp"]
+
+
+class Exp(BaseExp):
+ def __init__(self):
+ super().__init__()
+
+ # ---------------- model config ---------------- #
+ # detect classes number of model
+ self.num_classes = 80
+ # factor of model depth
+ self.depth = 1.00
+ # factor of model width
+ self.width = 1.00
+ # activation name. For example, if using "relu", then "silu" will be replaced to "relu".
+ self.act = "silu"
+
+ # ---------------- dataloader config ---------------- #
+ # set worker to 4 for shorter dataloader init time
+ # If your training process cost many memory, reduce this value.
+ self.data_num_workers = 4
+ self.input_size = (640, 640) # (height, width)
+ # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32].
+ # To disable multiscale training, set the value to 0.
+ self.multiscale_range = 5
+ # You can uncomment this line to specify a multiscale range
+ # self.random_size = (14, 26)
+ # dir of dataset images, if data_dir is None, this project will use `datasets` dir
+ self.data_dir = None
+ # name of annotation file for training
+ self.train_ann = "instances_train2017.json"
+ # name of annotation file for evaluation
+ self.val_ann = "instances_val2017.json"
+ # name of annotation file for testing
+ self.test_ann = "instances_test2017.json"
+
+ # --------------- transform config ----------------- #
+ # prob of applying mosaic aug
+ self.mosaic_prob = 1.0
+ # prob of applying mixup aug
+ self.mixup_prob = 1.0
+ # prob of applying hsv aug
+ self.hsv_prob = 1.0
+ # prob of applying flip aug
+ self.flip_prob = 0.5
+ # rotation angle range, for example, if set to 2, the true range is (-2, 2)
+ self.degrees = 10.0
+ # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1)
+ self.translate = 0.1
+ self.mosaic_scale = (0.1, 2)
+ # apply mixup aug or not
+ self.enable_mixup = True
+ self.mixup_scale = (0.5, 1.5)
+ # shear angle range, for example, if set to 2, the true range is (-2, 2)
+ self.shear = 2.0
+
+ # -------------- training config --------------------- #
+ # epoch number used for warmup
+ self.warmup_epochs = 5
+ # max training epoch
+ self.max_epoch = 300
+ # minimum learning rate during warmup
+ self.warmup_lr = 0
+ self.min_lr_ratio = 0.05
+ # learning rate for one image. During training, lr will multiply batchsize.
+ self.basic_lr_per_img = 0.01 / 64.0
+ # name of LRScheduler
+ self.scheduler = "yoloxwarmcos"
+ # last #epoch to close augmention like mosaic
+ self.no_aug_epochs = 15
+ # apply EMA during training
+ self.ema = True
+
+ # weight decay of optimizer
+ self.weight_decay = 5e-4
+ # momentum of optimizer
+ self.momentum = 0.9
+ # log period in iter, for example,
+ # if set to 1, user could see log every iteration.
+ self.print_interval = 10
+ # eval period in epoch, for example,
+ # if set to 1, model will be evaluate after every epoch.
+ self.eval_interval = 10
+ # save history checkpoint or not.
+ # If set to False, yolox will only save latest and best ckpt.
+ self.save_history_ckpt = True
+ # name of experiment
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+ # ----------------- testing config ------------------ #
+ # output image size during evaluation/test
+ self.test_size = (640, 640)
+ # confidence threshold during evaluation/test,
+ # boxes whose scores are less than test_conf will be filtered
+ self.test_conf = 0.01
+ # nms threshold
+ self.nmsthre = 0.65
+
+ def get_model(self):
+ import yolort.models as models
+
+ self.model = models.__dict__['yolov5n'](upstream_version="r6.0", )
+ self.model.train()
+ return self.model
+
+ def get_dataset(self, data_root: str, mode: str = "val", cache: bool = False, cache_type: str = "ram"):
+ # Acquire the images and labels from the coco128 dataset
+ data_path = Path(data_root)
+ coco128_dirname = "coco128"
+ coco128_path = data_path / coco128_dirname
+ image_root = coco128_path / "images" / "train2017"
+ annotation_file = coco128_path / "annotations" / "instances_train2017.json"
+
+ from yolort.data import COCODataset, TrainTransform
+
+ if not annotation_file.is_file():
+ self.prepare_coco128(data_path, dirname=coco128_dirname)
+
+ if mode == "train":
+ dataset = COCODataset(
+ data_dir=self.data_dir,
+ json_file=self.train_ann,
+ img_size=self.input_size,
+ preproc=TrainTransform(
+ max_labels=50,
+ flip_prob=self.flip_prob,
+ hsv_prob=self.hsv_prob
+ ),
+ cache=cache,
+ cache_type=cache_type,
+ )
+ elif mode == "val":
+ """ TODO """
+ dataset = COCODataset(
+ data_dir=self.data_dir,
+ json_file=self.train_ann,
+ img_size=self.input_size,
+ preproc=TrainTransform(
+ max_labels=50,
+ flip_prob=self.flip_prob,
+ hsv_prob=self.hsv_prob
+ ),
+ cache=cache,
+ cache_type=cache_type,
+ )
+ else:
+ raise NotImplementedError(f"Currently not supports mode {mode}")
+
+ return dataset
+
+ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: str = None):
+ """
+ Get dataloader according to cache_img parameter.
+ Args:
+ no_aug (bool, optional): Whether to turn off mosaic data enhancement. Defaults to False.
+ cache_img (str, optional): cache_img is equivalent to cache_type. Defaults to None.
+ "ram" : Caching imgs to ram for fast training.
+ "disk": Caching imgs to disk for fast training.
+ None: Do not use cache, in this case cache_data is also None.
+ """
+ from yolort.data import (
+ TrainTransform,
+ YoloBatchSampler,
+ DataLoader,
+ InfiniteSampler,
+ MosaicDetection,
+ worker_init_reset_seed,
+ )
+ from yolort.utils import wait_for_the_master
+
+ # if cache is True, we will create dataset before launch
+ # else we will create dataset after launch
+ if self.dataset is None:
+ with wait_for_the_master():
+ assert cache_img is None, \
+ "cache_img must be None if you didn't create dataset before launch"
+ self.dataset = self.get_dataset(data_root="data-bin", mode="train", cache=False, cache_type=cache_img)
+
+ self.dataset = MosaicDetection(
+ dataset=self.dataset,
+ mosaic=not no_aug,
+ img_size=self.input_size,
+ preproc=TrainTransform(
+ max_labels=120,
+ flip_prob=self.flip_prob,
+ hsv_prob=self.hsv_prob),
+ degrees=self.degrees,
+ translate=self.translate,
+ mosaic_scale=self.mosaic_scale,
+ mixup_scale=self.mixup_scale,
+ shear=self.shear,
+ enable_mixup=self.enable_mixup,
+ mosaic_prob=self.mosaic_prob,
+ mixup_prob=self.mixup_prob,
+ )
+
+ if is_distributed:
+ batch_size = batch_size // dist.get_world_size()
+
+ sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
+
+ batch_sampler = YoloBatchSampler(
+ sampler=sampler,
+ batch_size=batch_size,
+ drop_last=False,
+ mosaic=not no_aug,
+ )
+
+ dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
+ dataloader_kwargs["batch_sampler"] = batch_sampler
+
+ # Make sure each process has different random seed, especially for 'fork' method.
+ # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
+ dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
+
+ train_loader = DataLoader(self.dataset, **dataloader_kwargs)
+
+ return train_loader
+
+ def prepare_coco128(self,
+ data_path: PosixPath,
+ dirname: str = "coco128",
+ ) -> None:
+ """
+ Prepare coco128 dataset to test.
+
+ Args:
+ data_path (PosixPath): root path of coco128 dataset.
+ dirname (str): the directory name of coco128 dataset. Default: 'coco128'.
+ """
+ logger = logging.getLogger(__name__)
+
+ if not data_path.is_dir():
+ logger.info(f"Create a new directory: {data_path}")
+ data_path.mkdir(parents=True, exist_ok=True)
+
+ zip_path = data_path / "coco128.zip"
+ coco128_url = "https://github.com/zhiqwang/yolort/releases/download/v0.3.0/coco128.zip"
+ if not zip_path.is_file():
+ logger.info(f"Downloading coco128 datasets form {coco128_url}")
+ torch.hub.download_url_to_file(coco128_url, zip_path, hash_prefix="a67d2887")
+
+ coco128_path = data_path / dirname
+ if not coco128_path.is_dir():
+ logger.info(f"Unzipping dataset to {coco128_path}")
+ with ZipFile(zip_path, "r") as zip_obj:
+ zip_obj.extractall(data_path)
+
+ def random_resize(self, data_loader, epoch, rank, is_distributed):
+ tensor = torch.LongTensor(2).cuda()
+
+ if rank == 0:
+ size_factor = self.input_size[1] * 1.0 / self.input_size[0]
+ if not hasattr(self, 'random_size'):
+ min_size = int(self.input_size[0] / 32) - self.multiscale_range
+ max_size = int(self.input_size[0] / 32) + self.multiscale_range
+ self.random_size = (min_size, max_size)
+ size = random.randint(*self.random_size)
+ size = (int(32 * size), 32 * int(size * size_factor))
+ tensor[0] = size[0]
+ tensor[1] = size[1]
+
+ if is_distributed:
+ dist.barrier()
+ dist.broadcast(tensor, 0)
+
+ input_size = (tensor[0].item(), tensor[1].item())
+ return input_size
+
+ def preprocess(self, inputs, targets, tsize):
+ scale_y = tsize[0] / self.input_size[0]
+ scale_x = tsize[1] / self.input_size[1]
+ if scale_x != 1 or scale_y != 1:
+ inputs = nn.functional.interpolate(
+ inputs, size=tsize, mode="bilinear", align_corners=False
+ )
+ targets[..., 1::2] = targets[..., 1::2] * scale_x
+ targets[..., 2::2] = targets[..., 2::2] * scale_y
+ return inputs, targets
+
+ def get_optimizer(self, batch_size):
+ if "optimizer" not in self.__dict__:
+ if self.warmup_epochs > 0:
+ lr = self.warmup_lr
+ else:
+ lr = self.basic_lr_per_img * batch_size
+
+ pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
+
+ for k, v in self.model.named_modules():
+ if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
+ pg2.append(v.bias) # biases
+ if isinstance(v, nn.BatchNorm2d) or "bn" in k:
+ pg0.append(v.weight) # no decay
+ elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
+ pg1.append(v.weight) # apply decay
+
+ optimizer = torch.optim.SGD(
+ pg0, lr=lr, momentum=self.momentum, nesterov=True
+ )
+ optimizer.add_param_group(
+ {"params": pg1, "weight_decay": self.weight_decay}
+ ) # add pg1 with weight_decay
+ optimizer.add_param_group({"params": pg2})
+ self.optimizer = optimizer
+
+ return self.optimizer
+
+ def get_lr_scheduler(self, lr, iters_per_epoch):
+ from yolort.utils import LRScheduler
+
+ scheduler = LRScheduler(
+ self.scheduler,
+ lr,
+ iters_per_epoch,
+ self.max_epoch,
+ warmup_epochs=self.warmup_epochs,
+ warmup_lr_start=self.warmup_lr,
+ no_aug_epochs=self.no_aug_epochs,
+ min_lr_ratio=self.min_lr_ratio,
+ )
+ return scheduler
+
+ def get_eval_dataset(self, **kwargs):
+ from yolort.data import COCODataset, ValTransform
+ testdev = kwargs.get("testdev", False)
+ legacy = kwargs.get("legacy", False)
+
+ return COCODataset(
+ data_dir=self.data_dir,
+ json_file=self.train_ann, # 这里需要改为
+ name="train2017" if not testdev else "train2017", # 测试数据
+ img_size=self.test_size,
+ preproc=ValTransform(legacy=legacy),
+ )
+
+ def get_eval_loader(self, batch_size, is_distributed, **kwargs):
+ valdataset = self.get_eval_dataset(**kwargs)
+
+ if is_distributed:
+ batch_size = batch_size // dist.get_world_size()
+ sampler = torch.utils.data.distributed.DistributedSampler(
+ valdataset, shuffle=False
+ )
+ else:
+ sampler = torch.utils.data.SequentialSampler(valdataset)
+
+ dataloader_kwargs = {
+ "num_workers": self.data_num_workers,
+ "pin_memory": True,
+ "sampler": sampler,
+ }
+ dataloader_kwargs["batch_size"] = batch_size
+ val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
+
+ return val_loader
+
+ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
+ from yolort.evaluators import COCOEvaluator
+
+ return COCOEvaluator(
+ dataloader=self.get_eval_loader(batch_size, is_distributed,
+ testdev=testdev, legacy=legacy),
+ img_size=self.test_size,
+ confthre=self.test_conf,
+ nmsthre=self.nmsthre,
+ num_classes=self.num_classes,
+ testdev=testdev,
+ )
+
+ def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False):
+ return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)
\ No newline at end of file
diff --git a/yolort/trainer/__init__.py b/yolort/trainer/__init__.py
index 34724c90..ba0e63aa 100644
--- a/yolort/trainer/__init__.py
+++ b/yolort/trainer/__init__.py
@@ -1,5 +1,5 @@
# Copyright (c) 2021, yolort team. All rights reserved.
-from .lightning_task import DefaultTask
+from .trainer import Trainer
-__all__ = ["DefaultTask"]
+__all__ = ["Trainer"]
diff --git a/yolort/trainer/lightning_task.py b/yolort/trainer/lightning_task.py
deleted file mode 100644
index c8cec1e1..00000000
--- a/yolort/trainer/lightning_task.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright (c) 2021, yolort team. All rights reserved.
-
-import argparse
-from pathlib import PosixPath
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-import torch
-import yolort.models as models
-from pytorch_lightning import LightningModule
-from torch import Tensor
-from torchvision.ops import box_iou
-from yolort.data.coco_eval import COCOEvaluator
-
-
-__all__ = ["DefaultTask"]
-
-
-def _evaluate_iou(target, pred):
- """
- Evaluate intersection over union (IOU) for target from dataset and
- output prediction from model
- """
- if pred["boxes"].shape[0] == 0:
- # no box detected, 0 IOU
- return torch.tensor(0.0, device=pred["boxes"].device)
- return box_iou(target["boxes"], pred["boxes"]).diag().mean()
-
-
-class DefaultTask(LightningModule):
- """
- Wrapping the trainer into the YOLOv5 Module.
-
- Args:
- arch (string): YOLOv5 model architecture. Default: 'yolov5s'
- version (str): model released by the upstream YOLOv5. Possible values
- are ['r6.0']. Default: 'r6.0'.
- lr (float): The initial learning rate
- annotation_path (Optional[Union[string, PosixPath]]): Path of the COCO annotation file
- Default: None.
- """
-
- def __init__(
- self,
- arch: str = "yolov5s",
- version: str = "r6.0",
- lr: float = 0.01,
- annotation_path: Optional[Union[str, PosixPath]] = None,
- **kwargs: Any,
- ) -> None:
-
- super().__init__()
-
- self.model = models.__dict__[arch](upstream_version=version, **kwargs)
- self.lr = lr
-
- # evaluators for validation datasets
- self.evaluator = None
- if annotation_path is not None:
- self.evaluator = COCOEvaluator(annotation_path, iou_type="bbox")
-
- # used only on torchscript mode
- self._has_warned = False
-
- def forward(
- self,
- inputs: List[Tensor],
- targets: Optional[List[Dict[str, Tensor]]] = None,
- ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]:
- """
- This exists since PyTorchLightning forward are used for inference only (separate from
- ``training_step``). We keep ``targets`` here for Backward Compatible.
- """
- return self.model(inputs, targets)
-
- def training_step(self, batch, batch_idx):
- """
- The training step.
- """
- loss_dict = self.model(*batch)
- loss = sum(loss_dict.values())
- self.log_dict(loss_dict, on_step=True, on_epoch=True, prog_bar=True)
- return loss
-
- def validation_step(self, batch, batch_idx):
- images, targets = batch
- # fasterrcnn takes only images for eval() mode
- preds = self.model(images)
- iou = torch.stack([_evaluate_iou(t, o) for t, o in zip(targets, preds)]).mean()
- outs = {"val_iou": iou}
- self.log_dict(outs, on_step=True, on_epoch=True, prog_bar=True)
- return outs
-
- def validation_epoch_end(self, outs):
- avg_iou = torch.stack([o["val_iou"] for o in outs]).mean()
- self.log("avg_val_iou", avg_iou)
-
- def test_step(self, batch, batch_idx):
- """
- The test step.
- """
- images, targets = batch
- images = list(image.to(next(self.parameters()).device) for image in images)
- preds = self.model(images)
- results = self.evaluator(preds, targets)
- # log step metric
- self.log("eval_step", results, prog_bar=True, on_step=True)
-
- def test_epoch_end(self, outputs):
- return self.log("coco_eval", self.evaluator.compute())
-
- def configure_optimizers(self):
- return torch.optim.SGD(
- self.model.parameters(),
- lr=self.lr,
- momentum=0.9,
- weight_decay=5e-4,
- )
-
- @staticmethod
- def add_model_specific_args(parent_parser):
- parser = argparse.ArgumentParser(parents=[parent_parser], add_help=False)
- parser.add_argument("--arch", default="yolov5_darknet_pan_s_r40", help="model architecture")
- parser.add_argument(
- "--pretrained",
- action="store_true",
- help="Use pre-trained models from the modelzoo",
- )
- parser.add_argument(
- "--lr",
- default=0.01,
- type=float,
- help="initial learning rate, 0.01 is the default value for training "
- "on 8 gpus and 2 images_per_gpu",
- )
- parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
- parser.add_argument(
- "--weight-decay",
- default=5e-4,
- type=float,
- metavar="W",
- help="weight decay (default: 5e-4)",
- )
- return parser
diff --git a/yolort/trainer/trainer.py b/yolort/trainer/trainer.py
new file mode 100644
index 00000000..28f1fbe1
--- /dev/null
+++ b/yolort/trainer/trainer.py
@@ -0,0 +1,392 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import datetime
+import os
+import time
+from loguru import logger
+
+import torch
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.tensorboard import SummaryWriter
+
+from yolort.data import DataPrefetcher
+from yolort.exp import Exp
+from yolort.utils import (
+ MeterBuffer,
+ ModelEMA,
+ WandbLogger,
+ adjust_status,
+ all_reduce_norm,
+ get_local_rank,
+ get_model_info,
+ get_rank,
+ get_world_size,
+ gpu_mem_usage,
+ is_parallel,
+ load_ckpt,
+ mem_usage,
+ occupy_mem,
+ save_checkpoint,
+ setup_logger,
+ synchronize
+)
+
+__all__ = ["Trainer"]
+
+class Trainer:
+ def __init__(self, exp: Exp, args):
+ # init function only defines some basic attr, other attrs like model, optimizer are built in
+ # before_train methods.
+ self.exp = exp
+ self.args = args
+
+ # training related attr
+ self.max_epoch = exp.max_epoch
+ self.amp_training = args.fp16
+ self.scaler = torch.cuda.amp.GradScaler(enabled=args.fp16)
+ self.is_distributed = get_world_size() > 1
+ self.rank = get_rank()
+ self.local_rank = get_local_rank()
+ self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else 'cpu'
+ self.use_model_ema = exp.ema
+ self.save_history_ckpt = exp.save_history_ckpt
+
+ # data/dataloader related attr
+ self.data_type = torch.float16 if args.fp16 else torch.float32
+ self.input_size = exp.input_size
+ self.best_ap = 0
+
+ # metric record
+ self.meter = MeterBuffer(window_size=exp.print_interval)
+ self.file_name = os.path.join(exp.output_dir, args.experiment_name)
+
+ if self.rank == 0:
+ os.makedirs(self.file_name, exist_ok=True)
+
+ setup_logger(
+ self.file_name,
+ distributed_rank=self.rank,
+ filename="train_log.txt",
+ mode="a",
+ )
+
+ def train(self):
+ self.before_train()
+ try:
+ self.train_in_epoch()
+ except Exception:
+ raise
+ finally:
+ self.after_train()
+
+ def train_in_epoch(self):
+ for self.epoch in range(self.start_epoch, self.max_epoch):
+ self.before_epoch()
+ self.train_in_iter()
+ self.after_epoch()
+
+ def train_in_iter(self):
+ for self.iter in range(self.max_iter):
+ self.before_iter()
+ self.train_one_iter()
+ self.after_iter()
+
+ def train_one_iter(self):
+ iter_start_time = time.time()
+
+ inps, targets = self.prefetcher.next()
+ inps = inps.to(self.data_type)
+ targets = targets.to(self.data_type)
+ targets.requires_grad = False
+ inps, targets = self.exp.preprocess(inps, targets, self.input_size)
+ data_end_time = time.time()
+
+ with torch.cuda.amp.autocast(enabled=self.amp_training):
+ outputs = self.model(inps, targets)
+
+ loss = outputs["total_loss"]
+
+ self.optimizer.zero_grad()
+ self.scaler.scale(loss).backward()
+ self.scaler.step(self.optimizer)
+ self.scaler.update()
+
+ if self.use_model_ema:
+ self.ema_model.update(self.model)
+
+ lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1)
+ for param_group in self.optimizer.param_groups:
+ param_group["lr"] = lr
+
+ iter_end_time = time.time()
+ self.meter.update(
+ iter_time=iter_end_time - iter_start_time,
+ data_time=data_end_time - iter_start_time,
+ lr=lr,
+ **outputs,
+ )
+
+ def before_train(self):
+ logger.info("args: {}".format(self.args))
+ logger.info("exp value:\n{}".format(self.exp))
+
+ # model related init
+ if self.device != 'cpu':
+ torch.cuda.set_device(self.local_rank)
+ model = self.exp.get_model()
+ logger.info(
+ "Model Summary: {}".format(get_model_info(model, self.exp.test_size))
+ )
+ model.to(self.device)
+
+ # solver related init
+ self.optimizer = self.exp.get_optimizer(self.args.batch_size)
+
+ # value of epoch will be set in `resume_train`
+ model = self.resume_train(model)
+
+ # data related init
+ self.no_aug = self.start_epoch >= self.max_epoch - self.exp.no_aug_epochs
+ self.train_loader = self.exp.get_data_loader(
+ batch_size=self.args.batch_size,
+ is_distributed=self.is_distributed,
+ no_aug=self.no_aug,
+ cache_img=self.args.cache,
+ )
+ logger.info("init prefetcher, this might take one minute or less...")
+ self.prefetcher = DataPrefetcher(self.train_loader)
+ # max_iter means iters per epoch
+ self.max_iter = len(self.train_loader)
+
+ self.lr_scheduler = self.exp.get_lr_scheduler(
+ self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter
+ )
+ if self.args.occupy:
+ occupy_mem(self.local_rank)
+
+ if self.is_distributed:
+ model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False)
+
+ if self.use_model_ema:
+ self.ema_model = ModelEMA(model, 0.9998)
+ self.ema_model.updates = self.max_iter * self.start_epoch
+
+ self.model = model
+
+ self.evaluator = self.exp.get_evaluator(
+ batch_size=self.args.batch_size, is_distributed=self.is_distributed
+ )
+ # Tensorboard and Wandb loggers
+ if self.rank == 0:
+ if self.args.logger == "tensorboard":
+ self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
+ elif self.args.logger == "wandb":
+ self.wandb_logger = WandbLogger.initialize_wandb_logger(
+ self.args,
+ self.exp,
+ self.evaluator.dataloader.dataset
+ )
+ else:
+ raise ValueError("logger must be either 'tensorboard' or 'wandb'")
+
+ logger.info("Training start...")
+ logger.info("\n{}".format(model))
+
+ def after_train(self):
+ logger.info(
+ "Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100)
+ )
+ if self.rank == 0:
+ if self.args.logger == "wandb":
+ self.wandb_logger.finish()
+
+ def before_epoch(self):
+ logger.info("---> start train epoch{}".format(self.epoch + 1))
+
+ if self.epoch + 1 == self.max_epoch - self.exp.no_aug_epochs or self.no_aug:
+ logger.info("--->No mosaic aug now!")
+ self.train_loader.close_mosaic()
+ logger.info("--->Add additional L1 loss now!")
+ if self.is_distributed:
+ self.model.module.head.use_l1 = True
+ else:
+ self.model.head.use_l1 = True
+ self.exp.eval_interval = 1
+ if not self.no_aug:
+ self.save_ckpt(ckpt_name="last_mosaic_epoch")
+
+ def after_epoch(self):
+ self.save_ckpt(ckpt_name="latest")
+
+ if (self.epoch + 1) % self.exp.eval_interval == 0:
+ all_reduce_norm(self.model)
+ self.evaluate_and_save_model()
+
+ def before_iter(self):
+ pass
+
+ def after_iter(self):
+ """
+ `after_iter` contains two parts of logic:
+ * log information
+ * reset setting of resize
+ """
+ # log needed information
+ if (self.iter + 1) % self.exp.print_interval == 0:
+ # TODO check ETA logic
+ left_iters = self.max_iter * self.max_epoch - (self.progress_in_iter + 1)
+ eta_seconds = self.meter["iter_time"].global_avg * left_iters
+ eta_str = "ETA: {}".format(datetime.timedelta(seconds=int(eta_seconds)))
+
+ progress_str = "epoch: {}/{}, iter: {}/{}".format(
+ self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter
+ )
+ loss_meter = self.meter.get_filtered_meter("loss")
+ loss_str = ", ".join(
+ ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
+ )
+
+ time_meter = self.meter.get_filtered_meter("time")
+ time_str = ", ".join(
+ ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
+ )
+
+ mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage())
+
+ logger.info(
+ "{}, {}, {}, {}, lr: {:.3e}".format(
+ progress_str,
+ mem_str,
+ time_str,
+ loss_str,
+ self.meter["lr"].latest,
+ )
+ + (", size: {:d}, {}".format(self.input_size[0], eta_str))
+ )
+
+ if self.rank == 0:
+ if self.args.logger == "tensorboard":
+ self.tblogger.add_scalar(
+ "train/lr", self.meter["lr"].latest, self.progress_in_iter)
+ for k, v in loss_meter.items():
+ self.tblogger.add_scalar(
+ f"train/{k}", v.latest, self.progress_in_iter)
+ if self.args.logger == "wandb":
+ metrics = {"train/" + k: v.latest for k, v in loss_meter.items()}
+ metrics.update({
+ "train/lr": self.meter["lr"].latest
+ })
+ self.wandb_logger.log_metrics(metrics, step=self.progress_in_iter)
+
+ self.meter.clear_meters()
+
+ # random resizing
+ if (self.progress_in_iter + 1) % 10 == 0:
+ self.input_size = self.exp.random_resize(
+ self.train_loader, self.epoch, self.rank, self.is_distributed
+ )
+
+ @property
+ def progress_in_iter(self):
+ return self.epoch * self.max_iter + self.iter
+
+ def resume_train(self, model):
+ if self.args.resume:
+ logger.info("resume training")
+ if self.args.ckpt is None:
+ ckpt_file = os.path.join(self.file_name, "latest" + "_ckpt.pth")
+ else:
+ ckpt_file = self.args.ckpt
+
+ ckpt = torch.load(ckpt_file, map_location=self.device)
+ # resume the model/optimizer state dict
+ model.load_state_dict(ckpt["model"])
+ self.optimizer.load_state_dict(ckpt["optimizer"])
+ self.best_ap = ckpt.pop("best_ap", 0)
+ # resume the training states variables
+ start_epoch = (
+ self.args.start_epoch - 1
+ if self.args.start_epoch is not None
+ else ckpt["start_epoch"]
+ )
+ self.start_epoch = start_epoch
+ logger.info(
+ "loaded checkpoint '{}' (epoch {})".format(
+ self.args.resume, self.start_epoch
+ )
+ ) # noqa
+ else:
+ if self.args.ckpt is not None:
+ logger.info("loading checkpoint for fine tuning")
+ ckpt_file = self.args.ckpt
+ ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
+ model = load_ckpt(model, ckpt)
+ self.start_epoch = 0
+
+ return model
+
+ def evaluate_and_save_model(self):
+ if self.use_model_ema:
+ evalmodel = self.ema_model.ema
+ else:
+ evalmodel = self.model
+ if is_parallel(evalmodel):
+ evalmodel = evalmodel.module
+
+ with adjust_status(evalmodel, training=False):
+ (ap50_95, ap50, summary), predictions = self.exp.eval(
+ evalmodel, self.evaluator, self.is_distributed, return_outputs=True
+ )
+
+ update_best_ckpt = ap50_95 > self.best_ap
+ self.best_ap = max(self.best_ap, ap50_95)
+
+ if self.rank == 0:
+ if self.args.logger == "tensorboard":
+ self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
+ self.tblogger.add_scalar("val/COCOAP50_95", ap50_95, self.epoch + 1)
+ if self.args.logger == "wandb":
+ self.wandb_logger.log_metrics({
+ "val/COCOAP50": ap50,
+ "val/COCOAP50_95": ap50_95,
+ "train/epoch": self.epoch + 1,
+ })
+ self.wandb_logger.log_images(predictions)
+ logger.info("\n" + summary)
+ synchronize()
+
+ self.save_ckpt("last_epoch", update_best_ckpt, ap=ap50_95)
+ if self.save_history_ckpt:
+ self.save_ckpt(f"epoch_{self.epoch + 1}", ap=ap50_95)
+
+ def save_ckpt(self, ckpt_name, update_best_ckpt=False, ap=None):
+ if self.rank == 0:
+ save_model = self.ema_model.ema if self.use_model_ema else self.model
+ logger.info("Save weights to {}".format(self.file_name))
+ ckpt_state = {
+ "start_epoch": self.epoch + 1,
+ "model": save_model.state_dict(),
+ "optimizer": self.optimizer.state_dict(),
+ "best_ap": self.best_ap,
+ "curr_ap": ap,
+ }
+ save_checkpoint(
+ ckpt_state,
+ update_best_ckpt,
+ self.file_name,
+ ckpt_name,
+ )
+
+ if self.args.logger == "wandb":
+ self.wandb_logger.save_checkpoint(
+ self.file_name,
+ ckpt_name,
+ update_best_ckpt,
+ metadata={
+ "epoch": self.epoch + 1,
+ "optimizer": self.optimizer.state_dict(),
+ "best_ap": self.best_ap,
+ "curr_ap": ap
+ }
+ )
\ No newline at end of file
diff --git a/yolort/utils/__init__.py b/yolort/utils/__init__.py
index c16127d2..cf4c00b0 100644
--- a/yolort/utils/__init__.py
+++ b/yolort/utils/__init__.py
@@ -14,6 +14,15 @@
from .hooks import FeatureExtractor
from .image_utils import cv2_imshow, get_image_from_url, read_image_to_tensor
from .visualizer import Visualizer
+from .allreduce_norm import *
+from .boxes import *
+from .checkpoint import load_ckpt, save_checkpoint
+from .dist import *
+from .ema import *
+from .logger import WandbLogger, setup_logger
+from .lr_scheduler import LRScheduler
+from .metric import *
+from .model_utils import *
__all__ = [
diff --git a/yolort/utils/allreduce_norm.py b/yolort/utils/allreduce_norm.py
new file mode 100644
index 00000000..142c76c7
--- /dev/null
+++ b/yolort/utils/allreduce_norm.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import pickle
+from collections import OrderedDict
+
+import torch
+from torch import distributed as dist
+from torch import nn
+
+from .dist import _get_global_gloo_group, get_world_size
+
+ASYNC_NORM = (
+ nn.BatchNorm1d,
+ nn.BatchNorm2d,
+ nn.BatchNorm3d,
+ nn.InstanceNorm1d,
+ nn.InstanceNorm2d,
+ nn.InstanceNorm3d,
+)
+
+__all__ = [
+ "get_async_norm_states",
+ "pyobj2tensor",
+ "tensor2pyobj",
+ "all_reduce",
+ "all_reduce_norm",
+]
+
+
+def get_async_norm_states(module):
+ async_norm_states = OrderedDict()
+ for name, child in module.named_modules():
+ if isinstance(child, ASYNC_NORM):
+ for k, v in child.state_dict().items():
+ async_norm_states[".".join([name, k])] = v
+ return async_norm_states
+
+
+def pyobj2tensor(pyobj, device="cuda"):
+ """serialize picklable python object to tensor"""
+ storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj))
+ return torch.ByteTensor(storage).to(device=device)
+
+
+def tensor2pyobj(tensor):
+ """deserialize tensor to picklable python object"""
+ return pickle.loads(tensor.cpu().numpy().tobytes())
+
+
+def _get_reduce_op(op_name):
+ return {
+ "sum": dist.ReduceOp.SUM,
+ "mean": dist.ReduceOp.SUM,
+ }[op_name.lower()]
+
+
+def all_reduce(py_dict, op="sum", group=None):
+ """
+ Apply all reduce function for python dict object.
+ NOTE: make sure that every py_dict has the same keys and values are in the same shape.
+
+ Args:
+ py_dict (dict): dict to apply all reduce op.
+ op (str): operator, could be "sum" or "mean".
+ """
+ world_size = get_world_size()
+ if world_size == 1:
+ return py_dict
+ if group is None:
+ group = _get_global_gloo_group()
+ if dist.get_world_size(group) == 1:
+ return py_dict
+
+ # all reduce logic across different devices.
+ py_key = list(py_dict.keys())
+ py_key_tensor = pyobj2tensor(py_key)
+ dist.broadcast(py_key_tensor, src=0)
+ py_key = tensor2pyobj(py_key_tensor)
+
+ tensor_shapes = [py_dict[k].shape for k in py_key]
+ tensor_numels = [py_dict[k].numel() for k in py_key]
+
+ flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key])
+ dist.all_reduce(flatten_tensor, op=_get_reduce_op(op))
+ if op == "mean":
+ flatten_tensor /= world_size
+
+ split_tensors = [
+ x.reshape(shape)
+ for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
+ ]
+ return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})
+
+
+def all_reduce_norm(module):
+ """
+ All reduce norm statistics in different devices.
+ """
+ states = get_async_norm_states(module)
+ states = all_reduce(states, op="mean")
+ module.load_state_dict(states, strict=False)
diff --git a/yolort/utils/boxes.py b/yolort/utils/boxes.py
new file mode 100644
index 00000000..a8eaf3f4
--- /dev/null
+++ b/yolort/utils/boxes.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import numpy as np
+
+import torch
+import torchvision
+
+__all__ = [
+ "filter_box",
+ "postprocess",
+ "bboxes_iou",
+ "matrix_iou",
+ "adjust_box_anns",
+ "xyxy2xywh",
+ "xyxy2cxcywh",
+ "cxcywh2xyxy",
+]
+
+
+def filter_box(output, scale_range):
+ """
+ output: (N, 5+class) shape
+ """
+ min_scale, max_scale = scale_range
+ w = output[:, 2] - output[:, 0]
+ h = output[:, 3] - output[:, 1]
+ keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)
+ return output[keep]
+
+
+def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
+ box_corner = prediction.new(prediction.shape)
+ box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
+ box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
+ box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
+ box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
+ prediction[:, :, :4] = box_corner[:, :, :4]
+
+ output = [None for _ in range(len(prediction))]
+ for i, image_pred in enumerate(prediction):
+
+ # If none are remaining => process next image
+ if not image_pred.size(0):
+ continue
+ # Get score and class with highest confidence
+ class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
+
+ conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
+ # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+ detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
+ detections = detections[conf_mask]
+ if not detections.size(0):
+ continue
+
+ if class_agnostic:
+ nms_out_index = torchvision.ops.nms(
+ detections[:, :4],
+ detections[:, 4] * detections[:, 5],
+ nms_thre,
+ )
+ else:
+ nms_out_index = torchvision.ops.batched_nms(
+ detections[:, :4],
+ detections[:, 4] * detections[:, 5],
+ detections[:, 6],
+ nms_thre,
+ )
+
+ detections = detections[nms_out_index]
+ if output[i] is None:
+ output[i] = detections
+ else:
+ output[i] = torch.cat((output[i], detections))
+
+ return output
+
+
+def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
+ if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
+ raise IndexError
+
+ if xyxy:
+ tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
+ br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
+ area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
+ area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
+ else:
+ tl = torch.max(
+ (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
+ (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
+ )
+ br = torch.min(
+ (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
+ (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
+ )
+
+ area_a = torch.prod(bboxes_a[:, 2:], 1)
+ area_b = torch.prod(bboxes_b[:, 2:], 1)
+ en = (tl < br).type(tl.type()).prod(dim=2)
+ area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all())
+ return area_i / (area_a[:, None] + area_b - area_i)
+
+
+def matrix_iou(a, b):
+ """
+ return iou of a and b, numpy version for data augenmentation
+ """
+ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+ rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+
+ area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+ area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+ return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
+
+
+def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):
+ bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)
+ bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)
+ return bbox
+
+
+def xyxy2xywh(bboxes):
+ bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
+ bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
+ return bboxes
+
+
+def xyxy2cxcywh(bboxes):
+ bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
+ bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
+ bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
+ bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
+ return bboxes
+
+
+def cxcywh2xyxy(bboxes):
+ bboxes[:, 0] = bboxes[:, 0] - bboxes[:, 2] * 0.5
+ bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5
+ bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
+ bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
+ return bboxes
\ No newline at end of file
diff --git a/yolort/utils/checkpoint.py b/yolort/utils/checkpoint.py
new file mode 100644
index 00000000..a0c200e4
--- /dev/null
+++ b/yolort/utils/checkpoint.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+import os
+import shutil
+from loguru import logger
+
+import torch
+
+
+def load_ckpt(model, ckpt):
+ model_state_dict = model.state_dict()
+ load_dict = {}
+ for key_model, v in model_state_dict.items():
+ if key_model not in ckpt:
+ logger.warning(
+ "{} is not in the ckpt. Please double check and see if this is desired.".format(
+ key_model
+ )
+ )
+ continue
+ v_ckpt = ckpt[key_model]
+ if v.shape != v_ckpt.shape:
+ logger.warning(
+ "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
+ key_model, v_ckpt.shape, key_model, v.shape
+ )
+ )
+ continue
+ load_dict[key_model] = v_ckpt
+
+ model.load_state_dict(load_dict, strict=False)
+ return model
+
+
+def save_checkpoint(state, is_best, save_dir, model_name=""):
+ if not os.path.exists(save_dir):
+ os.makedirs(save_dir)
+ filename = os.path.join(save_dir, model_name + "_ckpt.pth")
+ torch.save(state, filename)
+ if is_best:
+ best_filename = os.path.join(save_dir, "best_ckpt.pth")
+ shutil.copyfile(filename, best_filename)
diff --git a/yolort/utils/dist.py b/yolort/utils/dist.py
new file mode 100644
index 00000000..a4b46801
--- /dev/null
+++ b/yolort/utils/dist.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# This file mainly comes from
+# https://github.com/facebookresearch/detectron2/blob/master/detectron2/utils/comm.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Copyright (c) Megvii Inc. All rights reserved.
+"""
+This file contains primitives for multi-gpu communication.
+This is useful when doing distributed training.
+"""
+
+import functools
+import os
+import pickle
+import time
+from contextlib import contextmanager
+from loguru import logger
+
+import numpy as np
+
+import torch
+from torch import distributed as dist
+
+__all__ = [
+ "get_num_devices",
+ "wait_for_the_master",
+ "is_main_process",
+ "synchronize",
+ "get_world_size",
+ "get_rank",
+ "get_local_rank",
+ "get_local_size",
+ "time_synchronized",
+ "gather",
+ "all_gather",
+]
+
+_LOCAL_PROCESS_GROUP = None
+
+
+def get_num_devices():
+ gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None)
+ if gpu_list is not None:
+ return len(gpu_list.split(','))
+ else:
+ devices_list_info = os.popen("nvidia-smi -L")
+ devices_list_info = devices_list_info.read().strip().split("\n")
+ return len(devices_list_info)
+
+
+@contextmanager
+def wait_for_the_master(local_rank: int = None):
+ """
+ Make all processes waiting for the master to do some task.
+
+ Args:
+ local_rank (int): the rank of the current process. Default to None.
+ If None, it will use the rank of the current process.
+ """
+ if local_rank is None:
+ local_rank = get_local_rank()
+
+ if local_rank > 0:
+ dist.barrier()
+ yield
+ if local_rank == 0:
+ if not dist.is_available():
+ return
+ if not dist.is_initialized():
+ return
+ else:
+ dist.barrier()
+
+
+def synchronize():
+ """
+ Helper function to synchronize (barrier) among all processes when using distributed training
+ """
+ if not dist.is_available():
+ return
+ if not dist.is_initialized():
+ return
+ world_size = dist.get_world_size()
+ if world_size == 1:
+ return
+ dist.barrier()
+
+
+def get_world_size() -> int:
+ if not dist.is_available():
+ return 1
+ if not dist.is_initialized():
+ return 1
+ return dist.get_world_size()
+
+
+def get_rank() -> int:
+ if not dist.is_available():
+ return 0
+ if not dist.is_initialized():
+ return 0
+ return dist.get_rank()
+
+
+def get_local_rank() -> int:
+ """
+ Returns:
+ The rank of the current process within the local (per-machine) process group.
+ """
+ if _LOCAL_PROCESS_GROUP is None:
+ return get_rank()
+
+ if not dist.is_available():
+ return 0
+ if not dist.is_initialized():
+ return 0
+ return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+
+
+def get_local_size() -> int:
+ """
+ Returns:
+ The size of the per-machine process group, i.e. the number of processes per machine.
+ """
+ if not dist.is_available():
+ return 1
+ if not dist.is_initialized():
+ return 1
+ return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+
+
+def is_main_process() -> bool:
+ return get_rank() == 0
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+ """
+ Return a process group based on gloo backend, containing all the ranks
+ The result is cached.
+ """
+ if dist.get_backend() == "nccl":
+ return dist.new_group(backend="gloo")
+ else:
+ return dist.group.WORLD
+
+
+def _serialize_to_tensor(data, group):
+ backend = dist.get_backend(group)
+ assert backend in ["gloo", "nccl"]
+ device = torch.device("cpu" if backend == "gloo" else "cuda")
+
+ buffer = pickle.dumps(data)
+ if len(buffer) > 1024 ** 3:
+ logger.warning(
+ "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+ get_rank(), len(buffer) / (1024 ** 3), device
+ )
+ )
+ storage = torch.ByteStorage.from_buffer(buffer)
+ tensor = torch.ByteTensor(storage).to(device=device)
+ return tensor
+
+
+def _pad_to_largest_tensor(tensor, group):
+ """
+ Returns:
+ list[int]: size of the tensor, on each rank
+ Tensor: padded tensor that has the max size
+ """
+ world_size = dist.get_world_size(group=group)
+ assert (
+ world_size >= 1
+ ), "comm.gather/all_gather must be called from ranks within the given group!"
+ local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+ size_list = [
+ torch.zeros([1], dtype=torch.int64, device=tensor.device)
+ for _ in range(world_size)
+ ]
+ dist.all_gather(size_list, local_size, group=group)
+ size_list = [int(size.item()) for size in size_list]
+
+ max_size = max(size_list)
+
+ # we pad the tensor because torch all_gather does not support
+ # gathering tensors of different shapes
+ if local_size != max_size:
+ padding = torch.zeros(
+ (max_size - local_size,), dtype=torch.uint8, device=tensor.device
+ )
+ tensor = torch.cat((tensor, padding), dim=0)
+ return size_list, tensor
+
+
+def all_gather(data, group=None):
+ """
+ Run all_gather on arbitrary picklable data (not necessarily tensors).
+
+ Args:
+ data: any picklable object
+ group: a torch process group. By default, will use a group which
+ contains all ranks on gloo backend.
+ Returns:
+ list[data]: list of data gathered from each rank
+ """
+ if get_world_size() == 1:
+ return [data]
+ if group is None:
+ group = _get_global_gloo_group()
+ if dist.get_world_size(group) == 1:
+ return [data]
+
+ tensor = _serialize_to_tensor(data, group)
+
+ size_list, tensor = _pad_to_largest_tensor(tensor, group)
+ max_size = max(size_list)
+
+ # receiving Tensor from all ranks
+ tensor_list = [
+ torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
+ for _ in size_list
+ ]
+ dist.all_gather(tensor_list, tensor, group=group)
+
+ data_list = []
+ for size, tensor in zip(size_list, tensor_list):
+ buffer = tensor.cpu().numpy().tobytes()[:size]
+ data_list.append(pickle.loads(buffer))
+
+ return data_list
+
+
+def gather(data, dst=0, group=None):
+ """
+ Run gather on arbitrary picklable data (not necessarily tensors).
+
+ Args:
+ data: any picklable object
+ dst (int): destination rank
+ group: a torch process group. By default, will use a group which
+ contains all ranks on gloo backend.
+
+ Returns:
+ list[data]: on dst, a list of data gathered from each rank. Otherwise,
+ an empty list.
+ """
+ if get_world_size() == 1:
+ return [data]
+ if group is None:
+ group = _get_global_gloo_group()
+ if dist.get_world_size(group=group) == 1:
+ return [data]
+ rank = dist.get_rank(group=group)
+
+ tensor = _serialize_to_tensor(data, group)
+ size_list, tensor = _pad_to_largest_tensor(tensor, group)
+
+ # receiving Tensor from all ranks
+ if rank == dst:
+ max_size = max(size_list)
+ tensor_list = [
+ torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
+ for _ in size_list
+ ]
+ dist.gather(tensor, tensor_list, dst=dst, group=group)
+
+ data_list = []
+ for size, tensor in zip(size_list, tensor_list):
+ buffer = tensor.cpu().numpy().tobytes()[:size]
+ data_list.append(pickle.loads(buffer))
+ return data_list
+ else:
+ dist.gather(tensor, [], dst=dst, group=group)
+ return []
+
+
+def shared_random_seed():
+ """
+ Returns:
+ int: a random number that is the same across all workers.
+ If workers need a shared RNG, they can use this shared seed to
+ create one.
+ All workers must call this function, otherwise it will deadlock.
+ """
+ ints = np.random.randint(2 ** 31)
+ all_ints = all_gather(ints)
+ return all_ints[0]
+
+
+def time_synchronized():
+ """pytorch-accurate time"""
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+ return time.time()
\ No newline at end of file
diff --git a/yolort/utils/ema.py b/yolort/utils/ema.py
new file mode 100644
index 00000000..364e8c87
--- /dev/null
+++ b/yolort/utils/ema.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+import math
+from copy import deepcopy
+
+import torch
+import torch.nn as nn
+
+__all__ = ["ModelEMA", "is_parallel"]
+
+
+def is_parallel(model):
+ """check if model is in parallel mode."""
+ parallel_type = (
+ nn.parallel.DataParallel,
+ nn.parallel.DistributedDataParallel,
+ )
+ return isinstance(model, parallel_type)
+
+
+class ModelEMA:
+ """
+ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
+ Keep a moving average of everything in the model state_dict (parameters and buffers).
+ This is intended to allow functionality like
+ https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+ A smoothed version of the weights is necessary for some training schemes to perform well.
+ This class is sensitive where it is initialized in the sequence of model init,
+ GPU assignment and distributed training wrappers.
+ """
+
+ def __init__(self, model, decay=0.9999, updates=0):
+ """
+ Args:
+ model (nn.Module): model to apply EMA.
+ decay (float): ema decay reate.
+ updates (int): counter of EMA updates.
+ """
+ # Create EMA(FP32)
+ self.ema = deepcopy(model.module if is_parallel(model) else model).eval()
+ self.updates = updates
+ # decay exponential ramp (to help early epochs)
+ self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
+ for p in self.ema.parameters():
+ p.requires_grad_(False)
+
+ def update(self, model):
+ # Update EMA parameters
+ with torch.no_grad():
+ self.updates += 1
+ d = self.decay(self.updates)
+
+ msd = (
+ model.module.state_dict() if is_parallel(model) else model.state_dict()
+ ) # model state_dict
+ for k, v in self.ema.state_dict().items():
+ if v.dtype.is_floating_point:
+ v *= d
+ v += (1.0 - d) * msd[k].detach()
\ No newline at end of file
diff --git a/yolort/utils/logger.py b/yolort/utils/logger.py
index 866c189c..00f1d125 100644
--- a/yolort/utils/logger.py
+++ b/yolort/utils/logger.py
@@ -1,5 +1,11 @@
-import datetime
+import os
+import sys
+import cv2
import time
+import datetime
+import inspect
+import numpy as np
+from loguru import logger
from collections import defaultdict, deque
import torch
@@ -197,3 +203,429 @@ def get_rank():
def is_main_process():
return get_rank() == 0
+
+def get_caller_name(depth=0):
+ """
+ Args:
+ depth (int): Depth of caller conext, use 0 for caller depth.
+ Default value: 0.
+
+ Returns:
+ str: module name of the caller
+ """
+ # the following logic is a little bit faster than inspect.stack() logic
+ frame = inspect.currentframe().f_back
+ for _ in range(depth):
+ frame = frame.f_back
+
+ return frame.f_globals["__name__"]
+
+
+class StreamToLoguru:
+ """
+ stream object that redirects writes to a logger instance.
+ """
+
+ def __init__(self, level="INFO", caller_names=("apex", "pycocotools")):
+ """
+ Args:
+ level(str): log level string of loguru. Default value: "INFO".
+ caller_names(tuple): caller names of redirected module.
+ Default value: (apex, pycocotools).
+ """
+ self.level = level
+ self.linebuf = ""
+ self.caller_names = caller_names
+
+ def write(self, buf):
+ full_name = get_caller_name(depth=1)
+ module_name = full_name.rsplit(".", maxsplit=-1)[0]
+ if module_name in self.caller_names:
+ for line in buf.rstrip().splitlines():
+ # use caller level log
+ logger.opt(depth=2).log(self.level, line.rstrip())
+ else:
+ sys.__stdout__.write(buf)
+
+ def flush(self):
+ # flush is related with CPR(cursor position report) in terminal
+ return sys.__stdout__.flush()
+
+ def isatty(self):
+ # when using colab, jax is installed by default and issue like
+ # https://github.com/Megvii-BaseDetection/YOLOX/issues/1437 might be raised
+ # due to missing attribute like`isatty`.
+ # For more details, checked the following link:
+ # https://github.com/google/jax/blob/10720258ea7fb5bde997dfa2f3f71135ab7a6733/jax/_src/pretty_printer.py#L54 # noqa
+ return sys.__stdout__.isatty()
+
+ def fileno(self):
+ # To solve the issue when using debug tools like pdb
+ return sys.__stdout__.fileno()
+
+
+def redirect_sys_output(log_level="INFO"):
+ redirect_logger = StreamToLoguru(log_level)
+ sys.stderr = redirect_logger
+ sys.stdout = redirect_logger
+
+
+def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"):
+ """setup logger for training and testing.
+ Args:
+ save_dir(str): location to save log file
+ distributed_rank(int): device rank when multi-gpu environment
+ filename (string): log save name.
+ mode(str): log file write mode, `append` or `override`. default is `a`.
+
+ Return:
+ logger instance.
+ """
+ loguru_format = (
+ "{time:YYYY-MM-DD HH:mm:ss} | "
+ "{level: <8} | "
+ "{name}:{line} - {message}"
+ )
+
+ logger.remove()
+ save_file = os.path.join(save_dir, filename)
+ if mode == "o" and os.path.exists(save_file):
+ os.remove(save_file)
+ # only keep logger in rank0 process
+ if distributed_rank == 0:
+ logger.add(
+ sys.stderr,
+ format=loguru_format,
+ level="INFO",
+ enqueue=True,
+ )
+ logger.add(save_file)
+
+ # redirect stdout/stderr to loguru
+ redirect_sys_output("INFO")
+
+
+class WandbLogger(object):
+ """
+ Log training runs, datasets, models, and predictions to Weights & Biases.
+ This logger sends information to W&B at wandb.ai.
+ By default, this information includes hyperparameters,
+ system configuration and metrics, model metrics,
+ and basic data metrics and analyses.
+
+ For more information, please refer to:
+ https://docs.wandb.ai/guides/track
+ https://docs.wandb.ai/guides/integrations/other/yolox
+ """
+ def __init__(self,
+ project=None,
+ name=None,
+ id=None,
+ entity=None,
+ save_dir=None,
+ config=None,
+ val_dataset=None,
+ num_eval_images=100,
+ log_checkpoints=False,
+ **kwargs):
+ """
+ Args:
+ project (str): wandb project name.
+ name (str): wandb run name.
+ id (str): wandb run id.
+ entity (str): wandb entity name.
+ save_dir (str): save directory.
+ config (dict): config dict.
+ val_dataset (Dataset): validation dataset.
+ num_eval_images (int): number of images from the validation set to log.
+ log_checkpoints (bool): log checkpoints
+ **kwargs: other kwargs.
+
+ Usage:
+ Any arguments for wandb.init can be provided on the command line using
+ the prefix `wandb-`.
+ Example
+ ```
+ python tools/train.py .... --logger wandb wandb-project \
+ wandb-name \
+ wandb-id \
+ wandb-save_dir \
+ wandb-num_eval_imges \
+ wandb-log_checkpoints
+ ```
+ The val_dataset argument is not open to the command line.
+ """
+ try:
+ import wandb
+ self.wandb = wandb
+ except ModuleNotFoundError:
+ raise ModuleNotFoundError(
+ "wandb is not installed."
+ "Please install wandb using pip install wandb"
+ )
+
+ from yolox.data.datasets import VOCDetection
+
+ self.project = project
+ self.name = name
+ self.id = id
+ self.save_dir = save_dir
+ self.config = config
+ self.kwargs = kwargs
+ self.entity = entity
+ self._run = None
+ self.val_artifact = None
+ if num_eval_images == -1:
+ self.num_log_images = len(val_dataset)
+ else:
+ self.num_log_images = min(num_eval_images, len(val_dataset))
+ self.log_checkpoints = (log_checkpoints == "True" or log_checkpoints == "true")
+ self._wandb_init = dict(
+ project=self.project,
+ name=self.name,
+ id=self.id,
+ entity=self.entity,
+ dir=self.save_dir,
+ resume="allow"
+ )
+ self._wandb_init.update(**kwargs)
+
+ _ = self.run
+
+ if self.config:
+ self.run.config.update(self.config)
+ self.run.define_metric("train/epoch")
+ self.run.define_metric("val/*", step_metric="train/epoch")
+ self.run.define_metric("train/step")
+ self.run.define_metric("train/*", step_metric="train/step")
+
+ self.voc_dataset = VOCDetection
+
+ if val_dataset and self.num_log_images != 0:
+ self.val_dataset = val_dataset
+ self.cats = val_dataset.cats
+ self.id_to_class = {
+ cls['id']: cls['name'] for cls in self.cats
+ }
+ self._log_validation_set(val_dataset)
+
+ @property
+ def run(self):
+ if self._run is None:
+ if self.wandb.run is not None:
+ logger.info(
+ "There is a wandb run already in progress "
+ "and newly created instances of `WandbLogger` will reuse"
+ " this run. If this is not desired, call `wandb.finish()`"
+ "before instantiating `WandbLogger`."
+ )
+ self._run = self.wandb.run
+ else:
+ self._run = self.wandb.init(**self._wandb_init)
+ return self._run
+
+ def _log_validation_set(self, val_dataset):
+ """
+ Log validation set to wandb.
+
+ Args:
+ val_dataset (Dataset): validation dataset.
+ """
+ if self.val_artifact is None:
+ self.val_artifact = self.wandb.Artifact(name="validation_images", type="dataset")
+ self.val_table = self.wandb.Table(columns=["id", "input"])
+
+ for i in range(self.num_log_images):
+ data_point = val_dataset[i]
+ img = data_point[0]
+ id = data_point[3]
+ img = np.transpose(img, (1, 2, 0))
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+ if isinstance(id, torch.Tensor):
+ id = id.item()
+
+ self.val_table.add_data(
+ id,
+ self.wandb.Image(img)
+ )
+
+ self.val_artifact.add(self.val_table, "validation_images_table")
+ self.run.use_artifact(self.val_artifact)
+ self.val_artifact.wait()
+
+ def _convert_prediction_format(self, predictions):
+ image_wise_data = defaultdict(int)
+
+ for key, val in predictions.items():
+ img_id = key
+
+ try:
+ bboxes, cls, scores = val
+ except KeyError:
+ bboxes, cls, scores = val["bboxes"], val["categories"], val["scores"]
+
+ # These store information of actual bounding boxes i.e. the ones which are not None
+ act_box = []
+ act_scores = []
+ act_cls = []
+
+ if bboxes is not None:
+ for box, classes, score in zip(bboxes, cls, scores):
+ if box is None or score is None or classes is None:
+ continue
+ act_box.append(box)
+ act_scores.append(score)
+ act_cls.append(classes)
+
+ image_wise_data.update({
+ int(img_id): {
+ "bboxes": [box.numpy().tolist() for box in act_box],
+ "scores": [score.numpy().item() for score in act_scores],
+ "categories": [
+ self.val_dataset.class_ids[int(act_cls[ind])]
+ for ind in range(len(act_box))
+ ],
+ }
+ })
+
+ return image_wise_data
+
+ def log_metrics(self, metrics, step=None):
+ """
+ Args:
+ metrics (dict): metrics dict.
+ step (int): step number.
+ """
+
+ for k, v in metrics.items():
+ if isinstance(v, torch.Tensor):
+ metrics[k] = v.item()
+
+ if step is not None:
+ metrics.update({"train/step": step})
+ self.run.log(metrics)
+ else:
+ self.run.log(metrics)
+
+ def log_images(self, predictions):
+ if len(predictions) == 0 or self.val_artifact is None or self.num_log_images == 0:
+ return
+
+ table_ref = self.val_artifact.get("validation_images_table")
+
+ columns = ["id", "predicted"]
+ for cls in self.cats:
+ columns.append(cls["name"])
+
+ if isinstance(self.val_dataset, self.voc_dataset):
+ predictions = self._convert_prediction_format(predictions)
+
+ result_table = self.wandb.Table(columns=columns)
+
+ for idx, val in table_ref.iterrows():
+
+ avg_scores = defaultdict(int)
+ num_occurrences = defaultdict(int)
+
+ id = val[0]
+ if isinstance(id, list):
+ id = id[0]
+
+ if id in predictions:
+ prediction = predictions[id]
+ boxes = []
+ for i in range(len(prediction["bboxes"])):
+ bbox = prediction["bboxes"][i]
+ x0 = bbox[0]
+ y0 = bbox[1]
+ x1 = bbox[2]
+ y1 = bbox[3]
+ box = {
+ "position": {
+ "minX": min(x0, x1),
+ "minY": min(y0, y1),
+ "maxX": max(x0, x1),
+ "maxY": max(y0, y1)
+ },
+ "class_id": prediction["categories"][i],
+ "domain": "pixel"
+ }
+ avg_scores[
+ self.id_to_class[prediction["categories"][i]]
+ ] += prediction["scores"][i]
+ num_occurrences[self.id_to_class[prediction["categories"][i]]] += 1
+ boxes.append(box)
+ else:
+ boxes = []
+ average_class_score = []
+ for cls in self.cats:
+ if cls["name"] not in num_occurrences:
+ score = 0
+ else:
+ score = avg_scores[cls["name"]] / num_occurrences[cls["name"]]
+ average_class_score.append(score)
+ result_table.add_data(
+ idx,
+ self.wandb.Image(val[1], boxes={
+ "prediction": {
+ "box_data": boxes,
+ "class_labels": self.id_to_class
+ }
+ }
+ ),
+ *average_class_score
+ )
+
+ self.wandb.log({"val_results/result_table": result_table})
+
+ def save_checkpoint(self, save_dir, model_name, is_best, metadata=None):
+ """
+ Args:
+ save_dir (str): save directory.
+ model_name (str): model name.
+ is_best (bool): whether the model is the best model.
+ metadata (dict): metadata to save corresponding to the checkpoint.
+ """
+
+ if not self.log_checkpoints:
+ return
+
+ if "epoch" in metadata:
+ epoch = metadata["epoch"]
+ else:
+ epoch = None
+
+ filename = os.path.join(save_dir, model_name + "_ckpt.pth")
+ artifact = self.wandb.Artifact(
+ name=f"run_{self.run.id}_model",
+ type="model",
+ metadata=metadata
+ )
+ artifact.add_file(filename, name="model_ckpt.pth")
+
+ aliases = ["latest"]
+
+ if is_best:
+ aliases.append("best")
+
+ if epoch:
+ aliases.append(f"epoch-{epoch}")
+
+ self.run.log_artifact(artifact, aliases=aliases)
+
+ def finish(self):
+ self.run.finish()
+
+ @classmethod
+ def initialize_wandb_logger(cls, args, exp, val_dataset):
+ wandb_params = dict()
+ prefix = "wandb-"
+ for k, v in zip(args.opts[0::2], args.opts[1::2]):
+ if k.startswith("wandb-"):
+ try:
+ wandb_params.update({k[len(prefix):]: int(v)})
+ except ValueError:
+ wandb_params.update({k[len(prefix):]: v})
+
+ return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params)
\ No newline at end of file
diff --git a/yolort/utils/lr_scheduler.py b/yolort/utils/lr_scheduler.py
new file mode 100644
index 00000000..42c00cf2
--- /dev/null
+++ b/yolort/utils/lr_scheduler.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import math
+from functools import partial
+
+
+class LRScheduler:
+ def __init__(self, name, lr, iters_per_epoch, total_epochs, **kwargs):
+ """
+ Supported lr schedulers: [cos, warmcos, multistep]
+
+ Args:
+ lr (float): learning rate.
+ iters_per_epoch (int): number of iterations in one epoch.
+ total_epochs (int): number of epochs in training.
+ kwargs (dict):
+ - cos: None
+ - warmcos: [warmup_epochs, warmup_lr_start (default 1e-6)]
+ - multistep: [milestones (epochs), gamma (default 0.1)]
+ """
+
+ self.lr = lr
+ self.iters_per_epoch = iters_per_epoch
+ self.total_epochs = total_epochs
+ self.total_iters = iters_per_epoch * total_epochs
+
+ self.__dict__.update(kwargs)
+
+ self.lr_func = self._get_lr_func(name)
+
+ def update_lr(self, iters):
+ return self.lr_func(iters)
+
+ def _get_lr_func(self, name):
+ if name == "cos": # cosine lr schedule
+ lr_func = partial(cos_lr, self.lr, self.total_iters)
+ elif name == "warmcos":
+ warmup_total_iters = self.iters_per_epoch * self.warmup_epochs
+ warmup_lr_start = getattr(self, "warmup_lr_start", 1e-6)
+ lr_func = partial(
+ warm_cos_lr,
+ self.lr,
+ self.total_iters,
+ warmup_total_iters,
+ warmup_lr_start,
+ )
+ elif name == "yoloxwarmcos":
+ warmup_total_iters = self.iters_per_epoch * self.warmup_epochs
+ no_aug_iters = self.iters_per_epoch * self.no_aug_epochs
+ warmup_lr_start = getattr(self, "warmup_lr_start", 0)
+ min_lr_ratio = getattr(self, "min_lr_ratio", 0.2)
+ lr_func = partial(
+ yolox_warm_cos_lr,
+ self.lr,
+ min_lr_ratio,
+ self.total_iters,
+ warmup_total_iters,
+ warmup_lr_start,
+ no_aug_iters,
+ )
+ elif name == "yoloxsemiwarmcos":
+ warmup_lr_start = getattr(self, "warmup_lr_start", 0)
+ min_lr_ratio = getattr(self, "min_lr_ratio", 0.2)
+ warmup_total_iters = self.iters_per_epoch * self.warmup_epochs
+ no_aug_iters = self.iters_per_epoch * self.no_aug_epochs
+ normal_iters = self.iters_per_epoch * self.semi_epoch
+ semi_iters = self.iters_per_epoch_semi * (
+ self.total_epochs - self.semi_epoch - self.no_aug_epochs
+ )
+ lr_func = partial(
+ yolox_semi_warm_cos_lr,
+ self.lr,
+ min_lr_ratio,
+ warmup_lr_start,
+ self.total_iters,
+ normal_iters,
+ no_aug_iters,
+ warmup_total_iters,
+ semi_iters,
+ self.iters_per_epoch,
+ self.iters_per_epoch_semi,
+ )
+ elif name == "multistep": # stepwise lr schedule
+ milestones = [
+ int(self.total_iters * milestone / self.total_epochs)
+ for milestone in self.milestones
+ ]
+ gamma = getattr(self, "gamma", 0.1)
+ lr_func = partial(multistep_lr, self.lr, milestones, gamma)
+ else:
+ raise ValueError("Scheduler version {} not supported.".format(name))
+ return lr_func
+
+
+def cos_lr(lr, total_iters, iters):
+ """Cosine learning rate"""
+ lr *= 0.5 * (1.0 + math.cos(math.pi * iters / total_iters))
+ return lr
+
+
+def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters):
+ """Cosine learning rate with warm up."""
+ if iters <= warmup_total_iters:
+ lr = (lr - warmup_lr_start) * iters / float(
+ warmup_total_iters
+ ) + warmup_lr_start
+ else:
+ lr *= 0.5 * (
+ 1.0
+ + math.cos(
+ math.pi
+ * (iters - warmup_total_iters)
+ / (total_iters - warmup_total_iters)
+ )
+ )
+ return lr
+
+
+def yolox_warm_cos_lr(
+ lr,
+ min_lr_ratio,
+ total_iters,
+ warmup_total_iters,
+ warmup_lr_start,
+ no_aug_iter,
+ iters,
+):
+ """Cosine learning rate with warm up."""
+ min_lr = lr * min_lr_ratio
+ if iters <= warmup_total_iters:
+ # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
+ lr = (lr - warmup_lr_start) * pow(
+ iters / float(warmup_total_iters), 2
+ ) + warmup_lr_start
+ elif iters >= total_iters - no_aug_iter:
+ lr = min_lr
+ else:
+ lr = min_lr + 0.5 * (lr - min_lr) * (
+ 1.0
+ + math.cos(
+ math.pi
+ * (iters - warmup_total_iters)
+ / (total_iters - warmup_total_iters - no_aug_iter)
+ )
+ )
+ return lr
+
+
+def yolox_semi_warm_cos_lr(
+ lr,
+ min_lr_ratio,
+ warmup_lr_start,
+ total_iters,
+ normal_iters,
+ no_aug_iters,
+ warmup_total_iters,
+ semi_iters,
+ iters_per_epoch,
+ iters_per_epoch_semi,
+ iters,
+):
+ """Cosine learning rate with warm up."""
+ min_lr = lr * min_lr_ratio
+ if iters <= warmup_total_iters:
+ # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
+ lr = (lr - warmup_lr_start) * pow(
+ iters / float(warmup_total_iters), 2
+ ) + warmup_lr_start
+ elif iters >= normal_iters + semi_iters:
+ lr = min_lr
+ elif iters <= normal_iters:
+ lr = min_lr + 0.5 * (lr - min_lr) * (
+ 1.0
+ + math.cos(
+ math.pi
+ * (iters - warmup_total_iters)
+ / (total_iters - warmup_total_iters - no_aug_iters)
+ )
+ )
+ else:
+ lr = min_lr + 0.5 * (lr - min_lr) * (
+ 1.0
+ + math.cos(
+ math.pi
+ * (
+ normal_iters
+ - warmup_total_iters
+ + (iters - normal_iters)
+ * iters_per_epoch
+ * 1.0
+ / iters_per_epoch_semi
+ )
+ / (total_iters - warmup_total_iters - no_aug_iters)
+ )
+ )
+ return lr
+
+
+def multistep_lr(lr, milestones, gamma, iters):
+ """MultiStep learning rate"""
+ for milestone in milestones:
+ lr *= gamma if iters >= milestone else 1.0
+ return lr
diff --git a/yolort/utils/metric.py b/yolort/utils/metric.py
new file mode 100644
index 00000000..f04013a3
--- /dev/null
+++ b/yolort/utils/metric.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+import functools
+import os
+import time
+from collections import defaultdict, deque
+import psutil
+
+import numpy as np
+
+import torch
+
+__all__ = [
+ "AverageMeter",
+ "MeterBuffer",
+ "get_total_and_free_memory_in_Mb",
+ "occupy_mem",
+ "gpu_mem_usage",
+ "mem_usage"
+]
+
+
+def get_total_and_free_memory_in_Mb(cuda_device):
+ devices_info_str = os.popen(
+ "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
+ )
+ devices_info = devices_info_str.read().strip().split("\n")
+ if "CUDA_VISIBLE_DEVICES" in os.environ:
+ visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',')
+ cuda_device = int(visible_devices[cuda_device])
+ total, used = devices_info[int(cuda_device)].split(",")
+ return int(total), int(used)
+
+
+def occupy_mem(cuda_device, mem_ratio=0.9):
+ """
+ pre-allocate gpu memory for training to avoid memory Fragmentation.
+ """
+ total, used = get_total_and_free_memory_in_Mb(cuda_device)
+ max_mem = int(total * mem_ratio)
+ block_mem = max_mem - used
+ x = torch.cuda.FloatTensor(256, 1024, block_mem)
+ del x
+ time.sleep(5)
+
+
+def gpu_mem_usage():
+ """
+ Compute the GPU memory usage for the current device (MB).
+ """
+ mem_usage_bytes = torch.cuda.max_memory_allocated()
+ return mem_usage_bytes / (1024 * 1024)
+
+
+def mem_usage():
+ """
+ Compute the memory usage for the current machine (GB).
+ """
+ gb = 1 << 30
+ mem = psutil.virtual_memory()
+ return mem.used / gb
+
+
+class AverageMeter:
+ """Track a series of values and provide access to smoothed values over a
+ window or the global series average.
+ """
+
+ def __init__(self, window_size=50):
+ self._deque = deque(maxlen=window_size)
+ self._total = 0.0
+ self._count = 0
+
+ def update(self, value):
+ self._deque.append(value)
+ self._count += 1
+ self._total += value
+
+ @property
+ def median(self):
+ d = np.array(list(self._deque))
+ return np.median(d)
+
+ @property
+ def avg(self):
+ # if deque is empty, nan will be returned.
+ d = np.array(list(self._deque))
+ return d.mean()
+
+ @property
+ def global_avg(self):
+ return self._total / max(self._count, 1e-5)
+
+ @property
+ def latest(self):
+ return self._deque[-1] if len(self._deque) > 0 else None
+
+ @property
+ def total(self):
+ return self._total
+
+ def reset(self):
+ self._deque.clear()
+ self._total = 0.0
+ self._count = 0
+
+ def clear(self):
+ self._deque.clear()
+
+
+class MeterBuffer(defaultdict):
+ """Computes and stores the average and current value"""
+
+ def __init__(self, window_size=20):
+ factory = functools.partial(AverageMeter, window_size=window_size)
+ super().__init__(factory)
+
+ def reset(self):
+ for v in self.values():
+ v.reset()
+
+ def get_filtered_meter(self, filter_key="time"):
+ return {k: v for k, v in self.items() if filter_key in k}
+
+ def update(self, values=None, **kwargs):
+ if values is None:
+ values = {}
+ values.update(kwargs)
+ for k, v in values.items():
+ if isinstance(v, torch.Tensor):
+ v = v.detach()
+ self[k].update(v)
+
+ def clear_meters(self):
+ for v in self.values():
+ v.clear()
\ No newline at end of file
diff --git a/yolort/utils/model_utils.py b/yolort/utils/model_utils.py
new file mode 100644
index 00000000..0b848888
--- /dev/null
+++ b/yolort/utils/model_utils.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import contextlib
+from copy import deepcopy
+from typing import Sequence
+
+import torch
+import torch.nn as nn
+
+__all__ = [
+ "get_model_info",
+ "adjust_status",
+]
+
+
+def get_model_info(model: nn.Module, tsize: Sequence[int]) -> str:
+ from thop import profile
+
+ stride = 64
+ img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)
+ flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)
+ params /= 1e6
+ flops /= 1e9
+ flops *= tsize[0] * tsize[1] / stride / stride * 2 # Gflops
+ info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
+ return info
+
+
+@contextlib.contextmanager
+def adjust_status(module: nn.Module, training: bool = False) -> nn.Module:
+ """Adjust module to training/eval mode temporarily.
+
+ Args:
+ module (nn.Module): module to adjust status.
+ training (bool): training mode to set. True for train mode, False fro eval mode.
+
+ Examples:
+ >>> with adjust_status(model, training=False):
+ ... model(data)
+ """
+ status = {}
+
+ def backup_status(module):
+ for m in module.modules():
+ # save prev status to dict
+ status[m] = m.training
+ m.training = training
+
+ def recover_status(module):
+ for m in module.modules():
+ # recover prev status from dict
+ m.training = status.pop(m)
+
+ backup_status(module)
+ yield module
+ recover_status(module)
\ No newline at end of file
From 9a245802fdfe70209c3fa50f7cdedd4cd2b36494 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
<66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Sep 2023 04:05:06 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---
exps/default/yolov5l.py | 6 +-
exps/default/yolov5m.py | 6 +-
exps/default/yolov5m6.py | 6 +-
exps/default/yolov5n.py | 6 +-
exps/default/yolov5n6.py | 6 +-
exps/default/yolov5s.py | 6 +-
exps/default/yolov5s6.py | 6 +-
exps/default/yolov5ts.py | 6 +-
requirements.txt | 2 +-
test/test_data_pipeline.py | 9 +-
test/test_trainer.py | 27 +++---
tools/eval_metric.py | 2 +-
yolort/data/__init__.py | 2 +-
yolort/data/data_augment.py | 12 ++-
yolort/data/data_module.py | 1 +
yolort/data/dataloading.py | 3 +-
yolort/data/datasets/coco.py | 10 +--
yolort/data/datasets/datasets_wrapper.py | 56 ++++++------
yolort/data/datasets/mosaicdetection.py | 54 +++++-------
yolort/data/samplers.py | 7 +-
yolort/evaluators/__init__.py | 2 +-
yolort/evaluators/coco_evaluator.py | 79 ++++++++---------
yolort/exp/__init__.py | 2 +-
yolort/exp/base_exp.py | 12 +--
yolort/exp/default/__init__.py | 1 -
yolort/exp/yolox_base.py | 67 ++++++---------
yolort/trainer/trainer.py | 71 ++++++---------
yolort/utils/__init__.py | 2 +-
yolort/utils/allreduce_norm.py | 6 +-
yolort/utils/boxes.py | 4 +-
yolort/utils/checkpoint.py | 6 +-
yolort/utils/dist.py | 37 +++-----
yolort/utils/ema.py | 6 +-
yolort/utils/logger.py | 105 +++++++++++------------
yolort/utils/lr_scheduler.py | 35 ++------
yolort/utils/metric.py | 8 +-
yolort/utils/model_utils.py | 2 +-
37 files changed, 292 insertions(+), 386 deletions(-)
diff --git a/exps/default/yolov5l.py b/exps/default/yolov5l.py
index b04d0f90..a838ae16 100644
--- a/exps/default/yolov5l.py
+++ b/exps/default/yolov5l.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5l'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5l"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/exps/default/yolov5m.py b/exps/default/yolov5m.py
index e33c2771..cd4ab778 100644
--- a/exps/default/yolov5m.py
+++ b/exps/default/yolov5m.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5m'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5m"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/exps/default/yolov5m6.py b/exps/default/yolov5m6.py
index 4ac71156..55c7e504 100644
--- a/exps/default/yolov5m6.py
+++ b/exps/default/yolov5m6.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5m6'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5m6"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/exps/default/yolov5n.py b/exps/default/yolov5n.py
index 72bf63e8..a36cb8e4 100644
--- a/exps/default/yolov5n.py
+++ b/exps/default/yolov5n.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5n'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5n"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/exps/default/yolov5n6.py b/exps/default/yolov5n6.py
index 3ac2cfd2..9cda7acc 100644
--- a/exps/default/yolov5n6.py
+++ b/exps/default/yolov5n6.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5n6'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5n6"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/exps/default/yolov5s.py b/exps/default/yolov5s.py
index 61736d25..00512d04 100644
--- a/exps/default/yolov5s.py
+++ b/exps/default/yolov5s.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5s'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5s"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/exps/default/yolov5s6.py b/exps/default/yolov5s6.py
index cda2a942..8b394afb 100644
--- a/exps/default/yolov5s6.py
+++ b/exps/default/yolov5s6.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5s6'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5s6"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/exps/default/yolov5ts.py b/exps/default/yolov5ts.py
index 365eab09..b71185df 100644
--- a/exps/default/yolov5ts.py
+++ b/exps/default/yolov5ts.py
@@ -15,6 +15,8 @@ def __init__(self):
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
def get_model(self):
- self.model = models.__dict__['yolov5ts'](upstream_version="r6.0",)
+ self.model = models.__dict__["yolov5ts"](
+ upstream_version="r6.0",
+ )
self.model.train()
- return self.model
\ No newline at end of file
+ return self.model
diff --git a/requirements.txt b/requirements.txt
index af814771..8fa37f38 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,4 +31,4 @@ pandas
# pycocotools>=2.0.2 # corresponds to https://github.com/ppwwyyxx/cocoapi
thop # FLOPs computation
loguru # Python logging made (stupidly) simple
-Ninja # a small build system with a focus on speed
\ No newline at end of file
+Ninja # a small build system with a focus on speed
diff --git a/test/test_data_pipeline.py b/test/test_data_pipeline.py
index 4e626a81..7d61e9c4 100644
--- a/test/test_data_pipeline.py
+++ b/test/test_data_pipeline.py
@@ -1,17 +1,17 @@
# Copyright (c) 2021, Zhiqiang Wang. All Rights Reserved.
+import sys
from pathlib import Path
import numpy as np
import pytest
-import sys
+
sys.path.append("../yolort")
import torch
-from torch import Tensor
-from yolort.exp import Exp
+from torch import distributed as dist, Tensor
from yolort.data import DataPrefetcher
+from yolort.exp import Exp
from yolort.utils import contains_any_tensor
-from torch import distributed as dist
def get_world_size() -> int:
@@ -58,6 +58,7 @@ def test_get_dataloader():
assert len(targets) == batch_size
assert isinstance(targets[0], Tensor)
+
test_get_dataloader()
diff --git a/test/test_trainer.py b/test/test_trainer.py
index be1573c7..76251945 100644
--- a/test/test_trainer.py
+++ b/test/test_trainer.py
@@ -4,17 +4,17 @@
import importlib
import sys
+
sys.path.append("../yolort/")
+
def make_parser():
parser = argparse.ArgumentParser("YOLOX train parser")
parser.add_argument("-expn", "--experiment-name", type=str, default="yolov5n")
parser.add_argument("-n", "--name", type=str, default="yolov5n", help="model name")
# distributed
- parser.add_argument(
- "--dist-backend", default="nccl", type=str, help="distributed backend"
- )
+ parser.add_argument("--dist-backend", default="nccl", type=str, help="distributed backend")
parser.add_argument(
"--dist-url",
default=None,
@@ -22,9 +22,7 @@ def make_parser():
help="url used to set up distributed training",
)
parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
- parser.add_argument(
- "-d", "--devices", default=None, type=int, help="device for training"
- )
+ parser.add_argument("-d", "--devices", default=None, type=int, help="device for training")
parser.add_argument(
"-f",
"--exp_file",
@@ -32,9 +30,7 @@ def make_parser():
type=str,
help="plz input your experiment description file",
)
- parser.add_argument(
- "--resume", default=False, action="store_true", help="resume training"
- )
+ parser.add_argument("--resume", default=False, action="store_true", help="resume training")
parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
parser.add_argument(
"-e",
@@ -43,12 +39,8 @@ def make_parser():
type=int,
help="resume training start epoch",
)
- parser.add_argument(
- "--num_machines", default=1, type=int, help="num of node for training"
- )
- parser.add_argument(
- "--machine_rank", default=0, type=int, help="node rank for multi-node training"
- )
+ parser.add_argument("--num_machines", default=1, type=int, help="num of node for training")
+ parser.add_argument("--machine_rank", default=0, type=int, help="node rank for multi-node training")
parser.add_argument(
"--fp16",
dest="fp16",
@@ -77,7 +69,7 @@ def make_parser():
type=str,
help="Logger to be used for metrics. \
Implemented loggers include `tensorboard` and `wandb`.",
- default="tensorboard"
+ default="tensorboard",
)
parser.add_argument(
"opts",
@@ -87,6 +79,7 @@ def make_parser():
)
return parser
+
def test_training_step():
args = make_parser().parse_args()
module_name = ".".join(["yolort", "exp", "default", args.name])
@@ -96,9 +89,11 @@ def test_training_step():
assert h % 32 == 0 and w % 32 == 0, "input size must be multiples of 32"
from yolort.trainer import Trainer
+
trainer = Trainer(exp, args)
trainer.train()
+
def test_test_epoch_end():
args = make_parser().parse_args()
module_name = ".".join(["yolort", "exp", "default", args.name])
diff --git a/tools/eval_metric.py b/tools/eval_metric.py
index 0538f0df..3b64632d 100644
--- a/tools/eval_metric.py
+++ b/tools/eval_metric.py
@@ -8,8 +8,8 @@
import torchvision
import yolort
from yolort.data import _helper as data_helper
-from yolort.data.datasets.coco import COCODetection
from yolort.data.coco_eval import COCOEvaluator
+from yolort.data.datasets.coco import COCODetection
from yolort.data.transforms import collate_fn, default_val_transforms
from yolort.utils.logger import MetricLogger
diff --git a/yolort/data/__init__.py b/yolort/data/__init__.py
index 5740093a..aeaf4f93 100644
--- a/yolort/data/__init__.py
+++ b/yolort/data/__init__.py
@@ -6,4 +6,4 @@
from .data_prefetcher import DataPrefetcher
from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed
from .datasets import *
-from .samplers import InfiniteSampler, YoloBatchSampler
\ No newline at end of file
+from .samplers import InfiniteSampler, YoloBatchSampler
diff --git a/yolort/data/data_augment.py b/yolort/data/data_augment.py
index 4e53f6c2..3c35f7fd 100644
--- a/yolort/data/data_augment.py
+++ b/yolort/data/data_augment.py
@@ -39,7 +39,9 @@ def get_aug_params(value, center=0):
else:
raise ValueError(
"Affine params should be either a sequence containing two values\
- or single float values. Got {}".format(value)
+ or single float values. Got {}".format(
+ value
+ )
)
@@ -95,9 +97,7 @@ def apply_affine_to_bboxes(targets, target_size, M, scale):
corner_xs = corner_points[:, 0::2]
corner_ys = corner_points[:, 1::2]
new_bboxes = (
- np.concatenate(
- (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
- )
+ np.concatenate((corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1)))
.reshape(4, num_gts)
.T
)
@@ -203,9 +203,7 @@ def __call__(self, image, targets, input_dim):
targets_t = np.hstack((labels_t, boxes_t))
padded_labels = np.zeros((self.max_labels, 5))
- padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
- : self.max_labels
- ]
+ padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[: self.max_labels]
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
return image_t, padded_labels
diff --git a/yolort/data/data_module.py b/yolort/data/data_module.py
index d17d7327..c2bd3134 100644
--- a/yolort/data/data_module.py
+++ b/yolort/data/data_module.py
@@ -11,6 +11,7 @@
from pytorch_lightning import LightningDataModule
from yolort.data.datasets.coco import COCODetection
+
from .transforms import collate_fn, default_train_transforms, default_val_transforms
from .voc import VOCDetection
diff --git a/yolort/data/dataloading.py b/yolort/data/dataloading.py
index 6fecf3f0..cf805b21 100644
--- a/yolort/data/dataloading.py
+++ b/yolort/data/dataloading.py
@@ -9,8 +9,7 @@
import numpy as np
import torch
-from torch.utils.data.dataloader import DataLoader as torchDataLoader
-from torch.utils.data.dataloader import default_collate
+from torch.utils.data.dataloader import DataLoader as torchDataLoader, default_collate
from .samplers import YoloBatchSampler
diff --git a/yolort/data/datasets/coco.py b/yolort/data/datasets/coco.py
index 5ac225a0..6aedce86 100644
--- a/yolort/data/datasets/coco.py
+++ b/yolort/data/datasets/coco.py
@@ -8,7 +8,7 @@
import numpy as np
from pycocotools.coco import COCO
-from .datasets_wrapper import CacheDataset, cache_read_img
+from .datasets_wrapper import cache_read_img, CacheDataset
def remove_useless_info(coco):
@@ -79,7 +79,7 @@ def __init__(
cache_dir_name=f"cache_{name}",
path_filename=path_filename,
cache=cache,
- cache_type=cache_type
+ cache_type=cache_type,
)
def __len__(self):
@@ -118,11 +118,7 @@ def load_anno_from_ids(self, id_):
img_info = (height, width)
resized_info = (int(height * r), int(width * r))
- file_name = (
- im_ann["file_name"]
- if "file_name" in im_ann
- else "{:012}".format(id_) + ".jpg"
- )
+ file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
return (res, img_info, resized_info, file_name)
diff --git a/yolort/data/datasets/datasets_wrapper.py b/yolort/data/datasets/datasets_wrapper.py
index c45fe380..32f9b92f 100644
--- a/yolort/data/datasets/datasets_wrapper.py
+++ b/yolort/data/datasets/datasets_wrapper.py
@@ -9,14 +9,13 @@
from abc import ABCMeta, abstractmethod
from functools import partial, wraps
from multiprocessing.pool import ThreadPool
-import psutil
-from loguru import logger
-from tqdm import tqdm
import numpy as np
+import psutil
+from loguru import logger
-from torch.utils.data.dataset import ConcatDataset as torchConcatDataset
-from torch.utils.data.dataset import Dataset as torchDataset
+from torch.utils.data.dataset import ConcatDataset as torchConcatDataset, Dataset as torchDataset
+from tqdm import tqdm
class ConcatDataset(torchConcatDataset):
@@ -29,9 +28,7 @@ def __init__(self, datasets):
def pull_item(self, idx):
if idx < 0:
if -idx > len(self):
- raise ValueError(
- "absolute value of index should not exceed dataset length"
- )
+ raise ValueError("absolute value of index should not exceed dataset length")
idx = len(self) + idx
dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
if dataset_idx == 0:
@@ -54,9 +51,7 @@ def __getitem__(self, index):
idx = index[1]
if idx < 0:
if -idx > len(self):
- raise ValueError(
- "absolute value of index should not exceed dataset length"
- )
+ raise ValueError("absolute value of index should not exceed dataset length")
idx = len(self) + idx
dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
if dataset_idx == 0:
@@ -70,7 +65,7 @@ def __getitem__(self, index):
class Dataset(torchDataset):
- """ This class is a subclass of the base :class:`torch.utils.data.Dataset`,
+ """This class is a subclass of the base :class:`torch.utils.data.Dataset`,
that enables on the fly resizing of the ``input_dim``.
Args:
@@ -125,7 +120,7 @@ def wrapper(self, index):
class CacheDataset(Dataset, metaclass=ABCMeta):
- """ This class is a subclass of the base :class:`yolox.data.datasets.Dataset`,
+ """This class is a subclass of the base :class:`yolox.data.datasets.Dataset`,
that enables cache images to ram or disk.
Args:
@@ -196,8 +191,9 @@ def cache_images(
):
assert num_imgs is not None, "num_imgs must be specified as the size of the dataset"
if self.cache_type == "disk":
- assert (data_dir and cache_dir_name and path_filename) is not None, \
- "data_dir, cache_name and path_filename must be specified if cache_type is disk"
+ assert (
+ data_dir and cache_dir_name and path_filename
+ ) is not None, "data_dir, cache_name and path_filename must be specified if cache_type is disk"
self.path_filename = path_filename
mem = psutil.virtual_memory()
@@ -216,10 +212,10 @@ def cache_images(
)
if self.cache and self.imgs is None:
- if self.cache_type == 'ram':
+ if self.cache_type == "ram":
self.imgs = [None] * num_imgs
logger.info("You are using cached images in RAM to accelerate training!")
- else: # 'disk'
+ else: # 'disk'
if not os.path.exists(self.cache_dir):
os.mkdir(self.cache_dir)
logger.warning(
@@ -234,29 +230,22 @@ def cache_images(
logger.info(f"Found disk cache at {self.cache_dir}")
return
- logger.info(
- "Caching images...\n"
- "This might take some time for your dataset"
- )
+ logger.info("Caching images...\n" "This might take some time for your dataset")
num_threads = min(8, max(1, os.cpu_count() - 1))
b = 0
- load_imgs = ThreadPool(num_threads).imap(
- partial(self.read_img, use_cache=False),
- range(num_imgs)
- )
+ load_imgs = ThreadPool(num_threads).imap(partial(self.read_img, use_cache=False), range(num_imgs))
pbar = tqdm(enumerate(load_imgs), total=num_imgs)
- for i, x in pbar: # x = self.read_img(self, i, use_cache=False)
- if self.cache_type == 'ram':
+ for i, x in pbar: # x = self.read_img(self, i, use_cache=False)
+ if self.cache_type == "ram":
self.imgs[i] = x
- else: # 'disk'
+ else: # 'disk'
cache_filename = f'{self.path_filename[i].split(".")[0]}.npy'
cache_path_filename = os.path.join(self.cache_dir, cache_filename)
os.makedirs(os.path.dirname(cache_path_filename), exist_ok=True)
np.save(cache_path_filename, x)
b += x.nbytes
- pbar.desc = \
- f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})'
+ pbar.desc = f"Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})"
pbar.close()
def cal_cache_occupy(self, num_imgs):
@@ -280,6 +269,7 @@ def decorator(read_img_fn):
whether to read the image from cache.
Defaults to True.
"""
+
@wraps(read_img_fn)
def wrapper(self, index, use_cache=use_cache):
cache = self.cache and use_cache
@@ -289,12 +279,14 @@ def wrapper(self, index, use_cache=use_cache):
img = copy.deepcopy(img)
elif self.cache_type == "disk":
img = np.load(
- os.path.join(
- self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy"))
+ os.path.join(self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy")
+ )
else:
raise ValueError(f"Unknown cache type: {self.cache_type}")
else:
img = read_img_fn(self, index)
return img
+
return wrapper
+
return decorator
diff --git a/yolort/data/datasets/mosaicdetection.py b/yolort/data/datasets/mosaicdetection.py
index ba11cfdc..7f3b5f75 100644
--- a/yolort/data/datasets/mosaicdetection.py
+++ b/yolort/data/datasets/mosaicdetection.py
@@ -38,10 +38,20 @@ class MosaicDetection(Dataset):
"""Detection dataset wrapper that performs mixup for normal dataset."""
def __init__(
- self, dataset, img_size, mosaic=True, preproc=None,
- degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
- mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,
- mosaic_prob=1.0, mixup_prob=1.0, *args
+ self,
+ dataset,
+ img_size,
+ mosaic=True,
+ preproc=None,
+ degrees=10.0,
+ translate=0.1,
+ mosaic_scale=(0.5, 1.5),
+ mixup_scale=(0.5, 1.5),
+ shear=2.0,
+ enable_mixup=True,
+ mosaic_prob=1.0,
+ mixup_prob=1.0,
+ *args,
):
"""
@@ -92,10 +102,8 @@ def __getitem__(self, idx):
for i_mosaic, index in enumerate(indices):
img, _labels, _, img_id = self._dataset.pull_item(index)
h0, w0 = img.shape[:2] # orig hw
- scale = min(1. * input_h / h0, 1. * input_w / w0)
- img = cv2.resize(
- img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR
- )
+ scale = min(1.0 * input_h / h0, 1.0 * input_w / w0)
+ img = cv2.resize(img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR)
# generate output mosaic image
(h, w, c) = img.shape[:3]
if i_mosaic == 0:
@@ -138,11 +146,7 @@ def __getitem__(self, idx):
# -----------------------------------------------------------------
# CopyPaste: https://arxiv.org/abs/2012.07177
# -----------------------------------------------------------------
- if (
- self.enable_mixup
- and not len(mosaic_labels) == 0
- and random.random() < self.mixup_prob
- ):
+ if self.enable_mixup and not len(mosaic_labels) == 0 and random.random() < self.mixup_prob:
mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)
mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)
img_info = (mix_img.shape[1], mix_img.shape[0])
@@ -180,9 +184,7 @@ def mixup(self, origin_img, origin_labels, input_dim):
interpolation=cv2.INTER_LINEAR,
)
- cp_img[
- : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)
- ] = resized_img
+ cp_img[: int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)] = resized_img
cp_img = cv2.resize(
cp_img,
@@ -195,9 +197,7 @@ def mixup(self, origin_img, origin_labels, input_dim):
origin_h, origin_w = cp_img.shape[:2]
target_h, target_w = origin_img.shape[:2]
- padded_img = np.zeros(
- (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
- )
+ padded_img = np.zeros((max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8)
padded_img[:origin_h, :origin_w] = cp_img
x_offset, y_offset = 0, 0
@@ -205,24 +205,16 @@ def mixup(self, origin_img, origin_labels, input_dim):
y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
if padded_img.shape[1] > target_w:
x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
- padded_cropped_img = padded_img[
- y_offset: y_offset + target_h, x_offset: x_offset + target_w
- ]
+ padded_cropped_img = padded_img[y_offset : y_offset + target_h, x_offset : x_offset + target_w]
cp_bboxes_origin_np = adjust_box_anns(
cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h
)
if FLIP:
- cp_bboxes_origin_np[:, 0::2] = (
- origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1]
- )
+ cp_bboxes_origin_np[:, 0::2] = origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1]
cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
- cp_bboxes_transformed_np[:, 0::2] = np.clip(
- cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w
- )
- cp_bboxes_transformed_np[:, 1::2] = np.clip(
- cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
- )
+ cp_bboxes_transformed_np[:, 0::2] = np.clip(cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w)
+ cp_bboxes_transformed_np[:, 1::2] = np.clip(cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h)
cls_labels = cp_labels[:, 4:5].copy()
box_labels = cp_bboxes_transformed_np
diff --git a/yolort/data/samplers.py b/yolort/data/samplers.py
index 6b7ea38d..b08b3d68 100644
--- a/yolort/data/samplers.py
+++ b/yolort/data/samplers.py
@@ -7,8 +7,7 @@
import torch
import torch.distributed as dist
-from torch.utils.data.sampler import BatchSampler as torchBatchSampler
-from torch.utils.data.sampler import Sampler
+from torch.utils.data.sampler import BatchSampler as torchBatchSampler, Sampler
class YoloBatchSampler(torchBatchSampler):
@@ -68,9 +67,7 @@ def __init__(
def __iter__(self):
start = self._rank
- yield from itertools.islice(
- self._infinite_indices(), start, None, self._world_size
- )
+ yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
def _infinite_indices(self):
g = torch.Generator()
diff --git a/yolort/evaluators/__init__.py b/yolort/evaluators/__init__.py
index fc0b6875..83b5a9f1 100644
--- a/yolort/evaluators/__init__.py
+++ b/yolort/evaluators/__init__.py
@@ -2,4 +2,4 @@
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
-from .coco_evaluator import COCOEvaluator
\ No newline at end of file
+from .coco_evaluator import COCOEvaluator
diff --git a/yolort/evaluators/coco_evaluator.py b/yolort/evaluators/coco_evaluator.py
index a97c6d41..75e79aa2 100644
--- a/yolort/evaluators/coco_evaluator.py
+++ b/yolort/evaluators/coco_evaluator.py
@@ -9,23 +9,16 @@
import tempfile
import time
from collections import ChainMap, defaultdict
-from loguru import logger
-from tabulate import tabulate
-from tqdm import tqdm
import numpy as np
import torch
+from loguru import logger
+from tabulate import tabulate
+from tqdm import tqdm
from yolort.data.datasets import COCO_CLASSES
-from yolort.utils import (
- gather,
- is_main_process,
- postprocess,
- synchronize,
- time_synchronized,
- xyxy2xywh
-)
+from yolort.utils import gather, is_main_process, postprocess, synchronize, time_synchronized, xyxy2xywh
def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AR"], colums=6):
@@ -46,7 +39,11 @@ def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "A
row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
table_headers = headers * (num_cols // len(headers))
table = tabulate(
- row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+ row_pair,
+ tablefmt="pipe",
+ floatfmt=".3f",
+ headers=table_headers,
+ numalign="left",
)
return table
@@ -71,7 +68,11 @@ def per_class_AP_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "A
row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
table_headers = headers * (num_cols // len(headers))
table = tabulate(
- row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+ row_pair,
+ tablefmt="pipe",
+ floatfmt=".3f",
+ headers=table_headers,
+ numalign="left",
)
return table
@@ -114,8 +115,14 @@ def __init__(
self.per_class_AR = per_class_AR
def evaluate(
- self, model, distributed=False, half=False, trt_file=None,
- decoder=None, test_size=None, return_outputs=False
+ self,
+ model,
+ distributed=False,
+ half=False,
+ trt_file=None,
+ decoder=None,
+ test_size=None,
+ return_outputs=False,
):
"""
COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -155,9 +162,7 @@ def evaluate(
model(x)
model = model_trt
- for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
- progress_bar(self.dataloader)
- ):
+ for cur_iter, (imgs, _, info_imgs, ids) in enumerate(progress_bar(self.dataloader)):
with torch.no_grad():
imgs = imgs.type(tensor_type)
@@ -174,15 +179,14 @@ def evaluate(
infer_end = time_synchronized()
inference_time += infer_end - start
- outputs = postprocess(
- outputs, self.num_classes, self.confthre, self.nmsthre
- )
+ outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record:
nms_end = time_synchronized()
nms_time += nms_end - infer_end
data_list_elem, image_wise_data = self.convert_to_coco_format(
- outputs, info_imgs, ids, return_outputs=True)
+ outputs, info_imgs, ids, return_outputs=True
+ )
data_list.extend(data_list_elem)
output_data.update(image_wise_data)
@@ -207,9 +211,7 @@ def evaluate(
def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False):
data_list = []
image_wise_data = defaultdict(dict)
- for (output, img_h, img_w, img_id) in zip(
- outputs, info_imgs[0], info_imgs[1], ids
- ):
+ for (output, img_h, img_w, img_id) in zip(outputs, info_imgs[0], info_imgs[1], ids):
if output is None:
continue
output = output.cpu()
@@ -217,23 +219,22 @@ def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False):
bboxes = output[:, 0:4]
# preprocessing: resize
- scale = min(
- self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)
- )
+ scale = min(self.img_size[0] / float(img_h), self.img_size[1] / float(img_w))
bboxes /= scale
cls = output[:, 6]
scores = output[:, 4] * output[:, 5]
- image_wise_data.update({
- int(img_id): {
- "bboxes": [box.numpy().tolist() for box in bboxes],
- "scores": [score.numpy().item() for score in scores],
- "categories": [
- self.dataloader.dataset.class_ids[int(cls[ind])]
- for ind in range(bboxes.shape[0])
- ],
+ image_wise_data.update(
+ {
+ int(img_id): {
+ "bboxes": [box.numpy().tolist() for box in bboxes],
+ "scores": [score.numpy().item() for score in scores],
+ "categories": [
+ self.dataloader.dataset.class_ids[int(cls[ind])] for ind in range(bboxes.shape[0])
+ ],
+ }
}
- })
+ )
bboxes = xyxy2xywh(bboxes)
@@ -305,7 +306,7 @@ def evaluate_prediction(self, data_dict, statistics):
cocoEval.summarize()
info += redirect_string.getvalue()
cat_ids = list(cocoGt.cats.keys())
- cat_names = [cocoGt.cats[catId]['name'] for catId in sorted(cat_ids)]
+ cat_names = [cocoGt.cats[catId]["name"] for catId in sorted(cat_ids)]
if self.per_class_AP:
AP_table = per_class_AP_table(cocoEval, class_names=cat_names)
info += "per class AP:\n" + AP_table + "\n"
@@ -314,4 +315,4 @@ def evaluate_prediction(self, data_dict, statistics):
info += "per class AR:\n" + AR_table + "\n"
return cocoEval.stats[0], cocoEval.stats[1], info
else:
- return 0, 0, info
\ No newline at end of file
+ return 0, 0, info
diff --git a/yolort/exp/__init__.py b/yolort/exp/__init__.py
index d7de27c8..94b059ce 100644
--- a/yolort/exp/__init__.py
+++ b/yolort/exp/__init__.py
@@ -2,4 +2,4 @@
# Copyright (c) Megvii Inc. All rights reserved.
from .base_exp import BaseExp
-from .yolox_base import Exp
\ No newline at end of file
+from .yolox_base import Exp
diff --git a/yolort/exp/base_exp.py b/yolort/exp/base_exp.py
index c0ae45fe..41506546 100644
--- a/yolort/exp/base_exp.py
+++ b/yolort/exp/base_exp.py
@@ -5,9 +5,9 @@
import pprint
from abc import ABCMeta, abstractmethod
from typing import Dict, List, Tuple
-from tabulate import tabulate
import torch
+from tabulate import tabulate
from torch.nn import Module
from yolort.utils import LRScheduler
@@ -42,9 +42,7 @@ def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
pass
@abstractmethod
- def get_lr_scheduler(
- self, lr: float, iters_per_epoch: int, **kwargs
- ) -> LRScheduler:
+ def get_lr_scheduler(self, lr: float, iters_per_epoch: int, **kwargs) -> LRScheduler:
pass
@abstractmethod
@@ -57,11 +55,7 @@ def eval(self, model, evaluator, weights):
def __repr__(self):
table_header = ["keys", "values"]
- exp_table = [
- (str(k), pprint.pformat(v))
- for k, v in vars(self).items()
- if not k.startswith("_")
- ]
+ exp_table = [(str(k), pprint.pformat(v)) for k, v in vars(self).items() if not k.startswith("_")]
return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
def merge(self, cfg_list):
diff --git a/yolort/exp/default/__init__.py b/yolort/exp/default/__init__.py
index 1f361d78..b439cbde 100644
--- a/yolort/exp/default/__init__.py
+++ b/yolort/exp/default/__init__.py
@@ -15,7 +15,6 @@
# where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
class _ExpFinder(abc.MetaPathFinder):
-
def find_spec(self, name, path, target=None):
if not name.startswith("yolort.exp.default"):
return
diff --git a/yolort/exp/yolox_base.py b/yolort/exp/yolox_base.py
index f3147743..c46af070 100644
--- a/yolort/exp/yolox_base.py
+++ b/yolort/exp/yolox_base.py
@@ -1,11 +1,11 @@
#!/usr/bin/env python3
# Copyright (c) Megvii Inc. All rights reserved.
+import logging
import os
import random
-import logging
-from zipfile import ZipFile
from pathlib import Path, PosixPath
+from zipfile import ZipFile
import torch
import torch.distributed as dist
@@ -114,7 +114,9 @@ def __init__(self):
def get_model(self):
import yolort.models as models
- self.model = models.__dict__['yolov5n'](upstream_version="r6.0", )
+ self.model = models.__dict__["yolov5n"](
+ upstream_version="r6.0",
+ )
self.model.train()
return self.model
@@ -136,25 +138,17 @@ def get_dataset(self, data_root: str, mode: str = "val", cache: bool = False, ca
data_dir=self.data_dir,
json_file=self.train_ann,
img_size=self.input_size,
- preproc=TrainTransform(
- max_labels=50,
- flip_prob=self.flip_prob,
- hsv_prob=self.hsv_prob
- ),
+ preproc=TrainTransform(max_labels=50, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob),
cache=cache,
cache_type=cache_type,
)
elif mode == "val":
- """ TODO """
+ """TODO"""
dataset = COCODataset(
data_dir=self.data_dir,
json_file=self.train_ann,
img_size=self.input_size,
- preproc=TrainTransform(
- max_labels=50,
- flip_prob=self.flip_prob,
- hsv_prob=self.hsv_prob
- ),
+ preproc=TrainTransform(max_labels=50, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob),
cache=cache,
cache_type=cache_type,
)
@@ -174,12 +168,12 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s
None: Do not use cache, in this case cache_data is also None.
"""
from yolort.data import (
- TrainTransform,
- YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
+ TrainTransform,
worker_init_reset_seed,
+ YoloBatchSampler,
)
from yolort.utils import wait_for_the_master
@@ -187,18 +181,16 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s
# else we will create dataset after launch
if self.dataset is None:
with wait_for_the_master():
- assert cache_img is None, \
- "cache_img must be None if you didn't create dataset before launch"
- self.dataset = self.get_dataset(data_root="data-bin", mode="train", cache=False, cache_type=cache_img)
+ assert cache_img is None, "cache_img must be None if you didn't create dataset before launch"
+ self.dataset = self.get_dataset(
+ data_root="data-bin", mode="train", cache=False, cache_type=cache_img
+ )
self.dataset = MosaicDetection(
dataset=self.dataset,
mosaic=not no_aug,
img_size=self.input_size,
- preproc=TrainTransform(
- max_labels=120,
- flip_prob=self.flip_prob,
- hsv_prob=self.hsv_prob),
+ preproc=TrainTransform(max_labels=120, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob),
degrees=self.degrees,
translate=self.translate,
mosaic_scale=self.mosaic_scale,
@@ -232,9 +224,10 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s
return train_loader
- def prepare_coco128(self,
- data_path: PosixPath,
- dirname: str = "coco128",
+ def prepare_coco128(
+ self,
+ data_path: PosixPath,
+ dirname: str = "coco128",
) -> None:
"""
Prepare coco128 dataset to test.
@@ -266,7 +259,7 @@ def random_resize(self, data_loader, epoch, rank, is_distributed):
if rank == 0:
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
- if not hasattr(self, 'random_size'):
+ if not hasattr(self, "random_size"):
min_size = int(self.input_size[0] / 32) - self.multiscale_range
max_size = int(self.input_size[0] / 32) + self.multiscale_range
self.random_size = (min_size, max_size)
@@ -286,9 +279,7 @@ def preprocess(self, inputs, targets, tsize):
scale_y = tsize[0] / self.input_size[0]
scale_x = tsize[1] / self.input_size[1]
if scale_x != 1 or scale_y != 1:
- inputs = nn.functional.interpolate(
- inputs, size=tsize, mode="bilinear", align_corners=False
- )
+ inputs = nn.functional.interpolate(inputs, size=tsize, mode="bilinear", align_corners=False)
targets[..., 1::2] = targets[..., 1::2] * scale_x
targets[..., 2::2] = targets[..., 2::2] * scale_y
return inputs, targets
@@ -310,9 +301,7 @@ def get_optimizer(self, batch_size):
elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
pg1.append(v.weight) # apply decay
- optimizer = torch.optim.SGD(
- pg0, lr=lr, momentum=self.momentum, nesterov=True
- )
+ optimizer = torch.optim.SGD(pg0, lr=lr, momentum=self.momentum, nesterov=True)
optimizer.add_param_group(
{"params": pg1, "weight_decay": self.weight_decay}
) # add pg1 with weight_decay
@@ -338,12 +327,13 @@ def get_lr_scheduler(self, lr, iters_per_epoch):
def get_eval_dataset(self, **kwargs):
from yolort.data import COCODataset, ValTransform
+
testdev = kwargs.get("testdev", False)
legacy = kwargs.get("legacy", False)
return COCODataset(
data_dir=self.data_dir,
- json_file=self.train_ann, # 这里需要改为
+ json_file=self.train_ann, # 这里需要改为
name="train2017" if not testdev else "train2017", # 测试数据
img_size=self.test_size,
preproc=ValTransform(legacy=legacy),
@@ -354,9 +344,7 @@ def get_eval_loader(self, batch_size, is_distributed, **kwargs):
if is_distributed:
batch_size = batch_size // dist.get_world_size()
- sampler = torch.utils.data.distributed.DistributedSampler(
- valdataset, shuffle=False
- )
+ sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)
@@ -374,8 +362,7 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False)
from yolort.evaluators import COCOEvaluator
return COCOEvaluator(
- dataloader=self.get_eval_loader(batch_size, is_distributed,
- testdev=testdev, legacy=legacy),
+ dataloader=self.get_eval_loader(batch_size, is_distributed, testdev=testdev, legacy=legacy),
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
@@ -384,4 +371,4 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False)
)
def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False):
- return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)
\ No newline at end of file
+ return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)
diff --git a/yolort/trainer/trainer.py b/yolort/trainer/trainer.py
index 28f1fbe1..aeb418db 100644
--- a/yolort/trainer/trainer.py
+++ b/yolort/trainer/trainer.py
@@ -4,18 +4,15 @@
import datetime
import os
import time
-from loguru import logger
import torch
+from loguru import logger
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
from yolort.data import DataPrefetcher
from yolort.exp import Exp
from yolort.utils import (
- MeterBuffer,
- ModelEMA,
- WandbLogger,
adjust_status,
all_reduce_norm,
get_local_rank,
@@ -26,14 +23,18 @@
is_parallel,
load_ckpt,
mem_usage,
+ MeterBuffer,
+ ModelEMA,
occupy_mem,
save_checkpoint,
setup_logger,
- synchronize
+ synchronize,
+ WandbLogger,
)
__all__ = ["Trainer"]
+
class Trainer:
def __init__(self, exp: Exp, args):
# init function only defines some basic attr, other attrs like model, optimizer are built in
@@ -48,7 +49,7 @@ def __init__(self, exp: Exp, args):
self.is_distributed = get_world_size() > 1
self.rank = get_rank()
self.local_rank = get_local_rank()
- self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else 'cpu'
+ self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else "cpu"
self.use_model_ema = exp.ema
self.save_history_ckpt = exp.save_history_ckpt
@@ -132,12 +133,10 @@ def before_train(self):
logger.info("exp value:\n{}".format(self.exp))
# model related init
- if self.device != 'cpu':
+ if self.device != "cpu":
torch.cuda.set_device(self.local_rank)
model = self.exp.get_model()
- logger.info(
- "Model Summary: {}".format(get_model_info(model, self.exp.test_size))
- )
+ logger.info("Model Summary: {}".format(get_model_info(model, self.exp.test_size)))
model.to(self.device)
# solver related init
@@ -183,9 +182,7 @@ def before_train(self):
self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
elif self.args.logger == "wandb":
self.wandb_logger = WandbLogger.initialize_wandb_logger(
- self.args,
- self.exp,
- self.evaluator.dataloader.dataset
+ self.args, self.exp, self.evaluator.dataloader.dataset
)
else:
raise ValueError("logger must be either 'tensorboard' or 'wandb'")
@@ -194,9 +191,7 @@ def before_train(self):
logger.info("\n{}".format(model))
def after_train(self):
- logger.info(
- "Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100)
- )
+ logger.info("Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100))
if self.rank == 0:
if self.args.logger == "wandb":
self.wandb_logger.finish()
@@ -243,14 +238,10 @@ def after_iter(self):
self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter
)
loss_meter = self.meter.get_filtered_meter("loss")
- loss_str = ", ".join(
- ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
- )
+ loss_str = ", ".join(["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()])
time_meter = self.meter.get_filtered_meter("time")
- time_str = ", ".join(
- ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
- )
+ time_str = ", ".join(["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()])
mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage())
@@ -267,16 +258,12 @@ def after_iter(self):
if self.rank == 0:
if self.args.logger == "tensorboard":
- self.tblogger.add_scalar(
- "train/lr", self.meter["lr"].latest, self.progress_in_iter)
+ self.tblogger.add_scalar("train/lr", self.meter["lr"].latest, self.progress_in_iter)
for k, v in loss_meter.items():
- self.tblogger.add_scalar(
- f"train/{k}", v.latest, self.progress_in_iter)
+ self.tblogger.add_scalar(f"train/{k}", v.latest, self.progress_in_iter)
if self.args.logger == "wandb":
metrics = {"train/" + k: v.latest for k, v in loss_meter.items()}
- metrics.update({
- "train/lr": self.meter["lr"].latest
- })
+ metrics.update({"train/lr": self.meter["lr"].latest})
self.wandb_logger.log_metrics(metrics, step=self.progress_in_iter)
self.meter.clear_meters()
@@ -306,15 +293,11 @@ def resume_train(self, model):
self.best_ap = ckpt.pop("best_ap", 0)
# resume the training states variables
start_epoch = (
- self.args.start_epoch - 1
- if self.args.start_epoch is not None
- else ckpt["start_epoch"]
+ self.args.start_epoch - 1 if self.args.start_epoch is not None else ckpt["start_epoch"]
)
self.start_epoch = start_epoch
logger.info(
- "loaded checkpoint '{}' (epoch {})".format(
- self.args.resume, self.start_epoch
- )
+ "loaded checkpoint '{}' (epoch {})".format(self.args.resume, self.start_epoch)
) # noqa
else:
if self.args.ckpt is not None:
@@ -347,11 +330,13 @@ def evaluate_and_save_model(self):
self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
self.tblogger.add_scalar("val/COCOAP50_95", ap50_95, self.epoch + 1)
if self.args.logger == "wandb":
- self.wandb_logger.log_metrics({
- "val/COCOAP50": ap50,
- "val/COCOAP50_95": ap50_95,
- "train/epoch": self.epoch + 1,
- })
+ self.wandb_logger.log_metrics(
+ {
+ "val/COCOAP50": ap50,
+ "val/COCOAP50_95": ap50_95,
+ "train/epoch": self.epoch + 1,
+ }
+ )
self.wandb_logger.log_images(predictions)
logger.info("\n" + summary)
synchronize()
@@ -387,6 +372,6 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False, ap=None):
"epoch": self.epoch + 1,
"optimizer": self.optimizer.state_dict(),
"best_ap": self.best_ap,
- "curr_ap": ap
- }
- )
\ No newline at end of file
+ "curr_ap": ap,
+ },
+ )
diff --git a/yolort/utils/__init__.py b/yolort/utils/__init__.py
index cf4c00b0..ee0c373f 100644
--- a/yolort/utils/__init__.py
+++ b/yolort/utils/__init__.py
@@ -19,7 +19,7 @@
from .checkpoint import load_ckpt, save_checkpoint
from .dist import *
from .ema import *
-from .logger import WandbLogger, setup_logger
+from .logger import setup_logger, WandbLogger
from .lr_scheduler import LRScheduler
from .metric import *
from .model_utils import *
diff --git a/yolort/utils/allreduce_norm.py b/yolort/utils/allreduce_norm.py
index 142c76c7..71881952 100644
--- a/yolort/utils/allreduce_norm.py
+++ b/yolort/utils/allreduce_norm.py
@@ -6,8 +6,7 @@
from collections import OrderedDict
import torch
-from torch import distributed as dist
-from torch import nn
+from torch import distributed as dist, nn
from .dist import _get_global_gloo_group, get_world_size
@@ -88,8 +87,7 @@ def all_reduce(py_dict, op="sum", group=None):
flatten_tensor /= world_size
split_tensors = [
- x.reshape(shape)
- for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
+ x.reshape(shape) for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
]
return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})
diff --git a/yolort/utils/boxes.py b/yolort/utils/boxes.py
index a8eaf3f4..7cffcd99 100644
--- a/yolort/utils/boxes.py
+++ b/yolort/utils/boxes.py
@@ -44,7 +44,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
if not image_pred.size(0):
continue
# Get score and class with highest confidence
- class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
+ class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
# Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
@@ -140,4 +140,4 @@ def cxcywh2xyxy(bboxes):
bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5
bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
- return bboxes
\ No newline at end of file
+ return bboxes
diff --git a/yolort/utils/checkpoint.py b/yolort/utils/checkpoint.py
index a0c200e4..d7dbe56e 100644
--- a/yolort/utils/checkpoint.py
+++ b/yolort/utils/checkpoint.py
@@ -3,9 +3,9 @@
# Copyright (c) Megvii Inc. All rights reserved.
import os
import shutil
-from loguru import logger
import torch
+from loguru import logger
def load_ckpt(model, ckpt):
@@ -14,9 +14,7 @@ def load_ckpt(model, ckpt):
for key_model, v in model_state_dict.items():
if key_model not in ckpt:
logger.warning(
- "{} is not in the ckpt. Please double check and see if this is desired.".format(
- key_model
- )
+ "{} is not in the ckpt. Please double check and see if this is desired.".format(key_model)
)
continue
v_ckpt = ckpt[key_model]
diff --git a/yolort/utils/dist.py b/yolort/utils/dist.py
index a4b46801..1485c88b 100644
--- a/yolort/utils/dist.py
+++ b/yolort/utils/dist.py
@@ -14,11 +14,11 @@
import pickle
import time
from contextlib import contextmanager
-from loguru import logger
import numpy as np
import torch
+from loguru import logger
from torch import distributed as dist
__all__ = [
@@ -39,9 +39,9 @@
def get_num_devices():
- gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None)
+ gpu_list = os.getenv("CUDA_VISIBLE_DEVICES", None)
if gpu_list is not None:
- return len(gpu_list.split(','))
+ return len(gpu_list.split(","))
else:
devices_list_info = os.popen("nvidia-smi -L")
devices_list_info = devices_list_info.read().strip().split("\n")
@@ -151,10 +151,10 @@ def _serialize_to_tensor(data, group):
device = torch.device("cpu" if backend == "gloo" else "cuda")
buffer = pickle.dumps(data)
- if len(buffer) > 1024 ** 3:
+ if len(buffer) > 1024**3:
logger.warning(
"Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
- get_rank(), len(buffer) / (1024 ** 3), device
+ get_rank(), len(buffer) / (1024**3), device
)
)
storage = torch.ByteStorage.from_buffer(buffer)
@@ -169,14 +169,9 @@ def _pad_to_largest_tensor(tensor, group):
Tensor: padded tensor that has the max size
"""
world_size = dist.get_world_size(group=group)
- assert (
- world_size >= 1
- ), "comm.gather/all_gather must be called from ranks within the given group!"
+ assert world_size >= 1, "comm.gather/all_gather must be called from ranks within the given group!"
local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
- size_list = [
- torch.zeros([1], dtype=torch.int64, device=tensor.device)
- for _ in range(world_size)
- ]
+ size_list = [torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)]
dist.all_gather(size_list, local_size, group=group)
size_list = [int(size.item()) for size in size_list]
@@ -185,9 +180,7 @@ def _pad_to_largest_tensor(tensor, group):
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
if local_size != max_size:
- padding = torch.zeros(
- (max_size - local_size,), dtype=torch.uint8, device=tensor.device
- )
+ padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
tensor = torch.cat((tensor, padding), dim=0)
return size_list, tensor
@@ -216,10 +209,7 @@ def all_gather(data, group=None):
max_size = max(size_list)
# receiving Tensor from all ranks
- tensor_list = [
- torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
- for _ in size_list
- ]
+ tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
dist.all_gather(tensor_list, tensor, group=group)
data_list = []
@@ -258,10 +248,7 @@ def gather(data, dst=0, group=None):
# receiving Tensor from all ranks
if rank == dst:
max_size = max(size_list)
- tensor_list = [
- torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
- for _ in size_list
- ]
+ tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
dist.gather(tensor, tensor_list, dst=dst, group=group)
data_list = []
@@ -282,7 +269,7 @@ def shared_random_seed():
create one.
All workers must call this function, otherwise it will deadlock.
"""
- ints = np.random.randint(2 ** 31)
+ ints = np.random.randint(2**31)
all_ints = all_gather(ints)
return all_ints[0]
@@ -291,4 +278,4 @@ def time_synchronized():
"""pytorch-accurate time"""
if torch.cuda.is_available():
torch.cuda.synchronize()
- return time.time()
\ No newline at end of file
+ return time.time()
diff --git a/yolort/utils/ema.py b/yolort/utils/ema.py
index 364e8c87..67734266 100644
--- a/yolort/utils/ema.py
+++ b/yolort/utils/ema.py
@@ -51,10 +51,8 @@ def update(self, model):
self.updates += 1
d = self.decay(self.updates)
- msd = (
- model.module.state_dict() if is_parallel(model) else model.state_dict()
- ) # model state_dict
+ msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
for k, v in self.ema.state_dict().items():
if v.dtype.is_floating_point:
v *= d
- v += (1.0 - d) * msd[k].detach()
\ No newline at end of file
+ v += (1.0 - d) * msd[k].detach()
diff --git a/yolort/utils/logger.py b/yolort/utils/logger.py
index 00f1d125..ed78a4cb 100644
--- a/yolort/utils/logger.py
+++ b/yolort/utils/logger.py
@@ -1,15 +1,16 @@
+import datetime
+import inspect
import os
import sys
-import cv2
import time
-import datetime
-import inspect
-import numpy as np
-from loguru import logger
from collections import defaultdict, deque
+import cv2
+import numpy as np
+
import torch
import torch.distributed as dist
+from loguru import logger
from yolort.utils import is_module_available
@@ -204,6 +205,7 @@ def get_rank():
def is_main_process():
return get_rank() == 0
+
def get_caller_name(depth=0):
"""
Args:
@@ -317,17 +319,20 @@ class WandbLogger(object):
https://docs.wandb.ai/guides/track
https://docs.wandb.ai/guides/integrations/other/yolox
"""
- def __init__(self,
- project=None,
- name=None,
- id=None,
- entity=None,
- save_dir=None,
- config=None,
- val_dataset=None,
- num_eval_images=100,
- log_checkpoints=False,
- **kwargs):
+
+ def __init__(
+ self,
+ project=None,
+ name=None,
+ id=None,
+ entity=None,
+ save_dir=None,
+ config=None,
+ val_dataset=None,
+ num_eval_images=100,
+ log_checkpoints=False,
+ **kwargs,
+ ):
"""
Args:
project (str): wandb project name.
@@ -357,12 +362,12 @@ def __init__(self,
"""
try:
import wandb
+
self.wandb = wandb
except ModuleNotFoundError:
raise ModuleNotFoundError(
- "wandb is not installed."
- "Please install wandb using pip install wandb"
- )
+ "wandb is not installed." "Please install wandb using pip install wandb"
+ )
from yolox.data.datasets import VOCDetection
@@ -379,14 +384,14 @@ def __init__(self,
self.num_log_images = len(val_dataset)
else:
self.num_log_images = min(num_eval_images, len(val_dataset))
- self.log_checkpoints = (log_checkpoints == "True" or log_checkpoints == "true")
+ self.log_checkpoints = log_checkpoints == "True" or log_checkpoints == "true"
self._wandb_init = dict(
project=self.project,
name=self.name,
id=self.id,
entity=self.entity,
dir=self.save_dir,
- resume="allow"
+ resume="allow",
)
self._wandb_init.update(**kwargs)
@@ -404,9 +409,7 @@ def __init__(self,
if val_dataset and self.num_log_images != 0:
self.val_dataset = val_dataset
self.cats = val_dataset.cats
- self.id_to_class = {
- cls['id']: cls['name'] for cls in self.cats
- }
+ self.id_to_class = {cls["id"]: cls["name"] for cls in self.cats}
self._log_validation_set(val_dataset)
@property
@@ -445,10 +448,7 @@ def _log_validation_set(self, val_dataset):
if isinstance(id, torch.Tensor):
id = id.item()
- self.val_table.add_data(
- id,
- self.wandb.Image(img)
- )
+ self.val_table.add_data(id, self.wandb.Image(img))
self.val_artifact.add(self.val_table, "validation_images_table")
self.run.use_artifact(self.val_artifact)
@@ -478,16 +478,17 @@ def _convert_prediction_format(self, predictions):
act_scores.append(score)
act_cls.append(classes)
- image_wise_data.update({
- int(img_id): {
- "bboxes": [box.numpy().tolist() for box in act_box],
- "scores": [score.numpy().item() for score in act_scores],
- "categories": [
- self.val_dataset.class_ids[int(act_cls[ind])]
- for ind in range(len(act_box))
- ],
+ image_wise_data.update(
+ {
+ int(img_id): {
+ "bboxes": [box.numpy().tolist() for box in act_box],
+ "scores": [score.numpy().item() for score in act_scores],
+ "categories": [
+ self.val_dataset.class_ids[int(act_cls[ind])] for ind in range(len(act_box))
+ ],
+ }
}
- })
+ )
return image_wise_data
@@ -546,14 +547,12 @@ def log_images(self, predictions):
"minX": min(x0, x1),
"minY": min(y0, y1),
"maxX": max(x0, x1),
- "maxY": max(y0, y1)
+ "maxY": max(y0, y1),
},
"class_id": prediction["categories"][i],
- "domain": "pixel"
+ "domain": "pixel",
}
- avg_scores[
- self.id_to_class[prediction["categories"][i]]
- ] += prediction["scores"][i]
+ avg_scores[self.id_to_class[prediction["categories"][i]]] += prediction["scores"][i]
num_occurrences[self.id_to_class[prediction["categories"][i]]] += 1
boxes.append(box)
else:
@@ -567,14 +566,10 @@ def log_images(self, predictions):
average_class_score.append(score)
result_table.add_data(
idx,
- self.wandb.Image(val[1], boxes={
- "prediction": {
- "box_data": boxes,
- "class_labels": self.id_to_class
- }
- }
+ self.wandb.Image(
+ val[1], boxes={"prediction": {"box_data": boxes, "class_labels": self.id_to_class}}
),
- *average_class_score
+ *average_class_score,
)
self.wandb.log({"val_results/result_table": result_table})
@@ -597,11 +592,7 @@ def save_checkpoint(self, save_dir, model_name, is_best, metadata=None):
epoch = None
filename = os.path.join(save_dir, model_name + "_ckpt.pth")
- artifact = self.wandb.Artifact(
- name=f"run_{self.run.id}_model",
- type="model",
- metadata=metadata
- )
+ artifact = self.wandb.Artifact(name=f"run_{self.run.id}_model", type="model", metadata=metadata)
artifact.add_file(filename, name="model_ckpt.pth")
aliases = ["latest"]
@@ -624,8 +615,8 @@ def initialize_wandb_logger(cls, args, exp, val_dataset):
for k, v in zip(args.opts[0::2], args.opts[1::2]):
if k.startswith("wandb-"):
try:
- wandb_params.update({k[len(prefix):]: int(v)})
+ wandb_params.update({k[len(prefix) :]: int(v)})
except ValueError:
- wandb_params.update({k[len(prefix):]: v})
+ wandb_params.update({k[len(prefix) :]: v})
- return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params)
\ No newline at end of file
+ return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params)
diff --git a/yolort/utils/lr_scheduler.py b/yolort/utils/lr_scheduler.py
index 42c00cf2..777da407 100644
--- a/yolort/utils/lr_scheduler.py
+++ b/yolort/utils/lr_scheduler.py
@@ -84,8 +84,7 @@ def _get_lr_func(self, name):
)
elif name == "multistep": # stepwise lr schedule
milestones = [
- int(self.total_iters * milestone / self.total_epochs)
- for milestone in self.milestones
+ int(self.total_iters * milestone / self.total_epochs) for milestone in self.milestones
]
gamma = getattr(self, "gamma", 0.1)
lr_func = partial(multistep_lr, self.lr, milestones, gamma)
@@ -103,17 +102,10 @@ def cos_lr(lr, total_iters, iters):
def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters):
"""Cosine learning rate with warm up."""
if iters <= warmup_total_iters:
- lr = (lr - warmup_lr_start) * iters / float(
- warmup_total_iters
- ) + warmup_lr_start
+ lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
else:
lr *= 0.5 * (
- 1.0
- + math.cos(
- math.pi
- * (iters - warmup_total_iters)
- / (total_iters - warmup_total_iters)
- )
+ 1.0 + math.cos(math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters))
)
return lr
@@ -131,18 +123,14 @@ def yolox_warm_cos_lr(
min_lr = lr * min_lr_ratio
if iters <= warmup_total_iters:
# lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
- lr = (lr - warmup_lr_start) * pow(
- iters / float(warmup_total_iters), 2
- ) + warmup_lr_start
+ lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
elif iters >= total_iters - no_aug_iter:
lr = min_lr
else:
lr = min_lr + 0.5 * (lr - min_lr) * (
1.0
+ math.cos(
- math.pi
- * (iters - warmup_total_iters)
- / (total_iters - warmup_total_iters - no_aug_iter)
+ math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter)
)
)
return lr
@@ -165,18 +153,14 @@ def yolox_semi_warm_cos_lr(
min_lr = lr * min_lr_ratio
if iters <= warmup_total_iters:
# lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
- lr = (lr - warmup_lr_start) * pow(
- iters / float(warmup_total_iters), 2
- ) + warmup_lr_start
+ lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
elif iters >= normal_iters + semi_iters:
lr = min_lr
elif iters <= normal_iters:
lr = min_lr + 0.5 * (lr - min_lr) * (
1.0
+ math.cos(
- math.pi
- * (iters - warmup_total_iters)
- / (total_iters - warmup_total_iters - no_aug_iters)
+ math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iters)
)
)
else:
@@ -187,10 +171,7 @@ def yolox_semi_warm_cos_lr(
* (
normal_iters
- warmup_total_iters
- + (iters - normal_iters)
- * iters_per_epoch
- * 1.0
- / iters_per_epoch_semi
+ + (iters - normal_iters) * iters_per_epoch * 1.0 / iters_per_epoch_semi
)
/ (total_iters - warmup_total_iters - no_aug_iters)
)
diff --git a/yolort/utils/metric.py b/yolort/utils/metric.py
index f04013a3..2cb79271 100644
--- a/yolort/utils/metric.py
+++ b/yolort/utils/metric.py
@@ -5,9 +5,9 @@
import os
import time
from collections import defaultdict, deque
-import psutil
import numpy as np
+import psutil
import torch
@@ -17,7 +17,7 @@
"get_total_and_free_memory_in_Mb",
"occupy_mem",
"gpu_mem_usage",
- "mem_usage"
+ "mem_usage",
]
@@ -27,7 +27,7 @@ def get_total_and_free_memory_in_Mb(cuda_device):
)
devices_info = devices_info_str.read().strip().split("\n")
if "CUDA_VISIBLE_DEVICES" in os.environ:
- visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',')
+ visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
cuda_device = int(visible_devices[cuda_device])
total, used = devices_info[int(cuda_device)].split(",")
return int(total), int(used)
@@ -134,4 +134,4 @@ def update(self, values=None, **kwargs):
def clear_meters(self):
for v in self.values():
- v.clear()
\ No newline at end of file
+ v.clear()
diff --git a/yolort/utils/model_utils.py b/yolort/utils/model_utils.py
index 0b848888..228c3851 100644
--- a/yolort/utils/model_utils.py
+++ b/yolort/utils/model_utils.py
@@ -55,4 +55,4 @@ def recover_status(module):
backup_status(module)
yield module
- recover_status(module)
\ No newline at end of file
+ recover_status(module)