From 52fd8b0977fe569811a636276e571240b947833c Mon Sep 17 00:00:00 2001
From: JingxianKe <983231802@qq.com>
Date: Fri, 22 Sep 2023 12:02:28 +0800
Subject: [PATCH 1/2] YOUR REVISION MESSAGE

---
 exps/default/__init__.py                 |   3 +
 exps/default/yolov5l.py                  |  20 ++
 exps/default/yolov5m.py                  |  20 ++
 exps/default/yolov5m6.py                 |  20 ++
 exps/default/yolov5n.py                  |  20 ++
 exps/default/yolov5n6.py                 |  20 ++
 exps/default/yolov5s.py                  |  20 ++
 exps/default/yolov5s6.py                 |  20 ++
 exps/default/yolov5ts.py                 |  20 ++
 requirements.txt                         |   4 +-
 test/test_data_pipeline.py               |  52 +--
 test/test_trainer.py                     | 148 +++++---
 tools/eval_metric.py                     |   2 +-
 yolort/data/__init__.py                  |  10 +-
 yolort/data/_helper.py                   |  74 +---
 yolort/data/builtin_meta.py              | 154 --------
 yolort/data/coco.py                      | 115 ------
 yolort/data/data_augment.py              | 243 +++++++++++++
 yolort/data/data_module.py               |   2 +-
 yolort/data/data_prefetcher.py           |  51 +++
 yolort/data/dataloading.py               | 113 ++++++
 yolort/data/datasets/__init__.py         |   8 +
 yolort/data/datasets/coco.py             | 187 ++++++++++
 yolort/data/datasets/coco_classes.py     |  86 +++++
 yolort/data/datasets/datasets_wrapper.py | 300 ++++++++++++++++
 yolort/data/datasets/mosaicdetection.py  | 234 ++++++++++++
 yolort/data/samplers.py                  |  85 +++++
 yolort/evaluators/__init__.py            |   5 +
 yolort/evaluators/coco_evaluator.py      | 317 +++++++++++++++++
 yolort/exp/__init__.py                   |   5 +
 yolort/exp/base_exp.py                   |  90 +++++
 yolort/exp/default/__init__.py           |  28 ++
 yolort/exp/yolox_base.py                 | 387 ++++++++++++++++++++
 yolort/trainer/__init__.py               |   4 +-
 yolort/trainer/lightning_task.py         | 143 --------
 yolort/trainer/trainer.py                | 392 ++++++++++++++++++++
 yolort/utils/__init__.py                 |   9 +
 yolort/utils/allreduce_norm.py           | 103 ++++++
 yolort/utils/boxes.py                    | 143 ++++++++
 yolort/utils/checkpoint.py               |  43 +++
 yolort/utils/dist.py                     | 294 +++++++++++++++
 yolort/utils/ema.py                      |  60 ++++
 yolort/utils/logger.py                   | 434 ++++++++++++++++++++++-
 yolort/utils/lr_scheduler.py             | 205 +++++++++++
 yolort/utils/metric.py                   | 137 +++++++
 yolort/utils/model_utils.py              |  58 +++
 46 files changed, 4328 insertions(+), 560 deletions(-)
 create mode 100644 exps/default/__init__.py
 create mode 100644 exps/default/yolov5l.py
 create mode 100644 exps/default/yolov5m.py
 create mode 100644 exps/default/yolov5m6.py
 create mode 100644 exps/default/yolov5n.py
 create mode 100644 exps/default/yolov5n6.py
 create mode 100644 exps/default/yolov5s.py
 create mode 100644 exps/default/yolov5s6.py
 create mode 100644 exps/default/yolov5ts.py
 delete mode 100644 yolort/data/builtin_meta.py
 delete mode 100644 yolort/data/coco.py
 create mode 100644 yolort/data/data_augment.py
 create mode 100644 yolort/data/data_prefetcher.py
 create mode 100644 yolort/data/dataloading.py
 create mode 100644 yolort/data/datasets/__init__.py
 create mode 100644 yolort/data/datasets/coco.py
 create mode 100644 yolort/data/datasets/coco_classes.py
 create mode 100644 yolort/data/datasets/datasets_wrapper.py
 create mode 100644 yolort/data/datasets/mosaicdetection.py
 create mode 100644 yolort/data/samplers.py
 create mode 100644 yolort/evaluators/__init__.py
 create mode 100644 yolort/evaluators/coco_evaluator.py
 create mode 100644 yolort/exp/__init__.py
 create mode 100644 yolort/exp/base_exp.py
 create mode 100644 yolort/exp/default/__init__.py
 create mode 100644 yolort/exp/yolox_base.py
 delete mode 100644 yolort/trainer/lightning_task.py
 create mode 100644 yolort/trainer/trainer.py
 create mode 100644 yolort/utils/allreduce_norm.py
 create mode 100644 yolort/utils/boxes.py
 create mode 100644 yolort/utils/checkpoint.py
 create mode 100644 yolort/utils/dist.py
 create mode 100644 yolort/utils/ema.py
 create mode 100644 yolort/utils/lr_scheduler.py
 create mode 100644 yolort/utils/metric.py
 create mode 100644 yolort/utils/model_utils.py

diff --git a/exps/default/__init__.py b/exps/default/__init__.py
new file mode 100644
index 00000000..ce9fae06
--- /dev/null
+++ b/exps/default/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
diff --git a/exps/default/yolov5l.py b/exps/default/yolov5l.py
new file mode 100644
index 00000000..b04d0f90
--- /dev/null
+++ b/exps/default/yolov5l.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5l'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5m.py b/exps/default/yolov5m.py
new file mode 100644
index 00000000..e33c2771
--- /dev/null
+++ b/exps/default/yolov5m.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5m'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5m6.py b/exps/default/yolov5m6.py
new file mode 100644
index 00000000..4ac71156
--- /dev/null
+++ b/exps/default/yolov5m6.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5m6'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5n.py b/exps/default/yolov5n.py
new file mode 100644
index 00000000..72bf63e8
--- /dev/null
+++ b/exps/default/yolov5n.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5n'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5n6.py b/exps/default/yolov5n6.py
new file mode 100644
index 00000000..3ac2cfd2
--- /dev/null
+++ b/exps/default/yolov5n6.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5n6'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5s.py b/exps/default/yolov5s.py
new file mode 100644
index 00000000..61736d25
--- /dev/null
+++ b/exps/default/yolov5s.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5s'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5s6.py b/exps/default/yolov5s6.py
new file mode 100644
index 00000000..cda2a942
--- /dev/null
+++ b/exps/default/yolov5s6.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5s6'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/exps/default/yolov5ts.py b/exps/default/yolov5ts.py
new file mode 100644
index 00000000..365eab09
--- /dev/null
+++ b/exps/default/yolov5ts.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+import yolort.models as models
+
+from yolort.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+    def get_model(self):
+        self.model = models.__dict__['yolov5ts'](upstream_version="r6.0",)
+        self.model.train()
+        return self.model
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 8a349747..af814771 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,4 +29,6 @@ pandas
 # extras --------------------------------------
 # pycocotools on PyPI needs python3.7 as minimal
 # pycocotools>=2.0.2  # corresponds to https://github.com/ppwwyyxx/cocoapi
-thop  # FLOPs computation
+thop    # FLOPs computation
+loguru  # Python logging made (stupidly) simple
+Ninja   # a small build system with a focus on speed
\ No newline at end of file
diff --git a/test/test_data_pipeline.py b/test/test_data_pipeline.py
index 2a597eb7..4e626a81 100644
--- a/test/test_data_pipeline.py
+++ b/test/test_data_pipeline.py
@@ -3,11 +3,23 @@
 
 import numpy as np
 import pytest
-import torch
+import sys
+sys.path.append("../yolort")
 
+import torch
 from torch import Tensor
-from yolort.data import _helper as data_helper
+from yolort.exp import Exp
+from yolort.data import DataPrefetcher
 from yolort.utils import contains_any_tensor
+from torch import distributed as dist
+
+
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
 
 
 def test_contains_any_tensor():
@@ -21,28 +33,32 @@ def test_contains_any_tensor():
 
 def test_get_dataset():
     # Acquire the images and labels from the coco128 dataset
-    train_dataset = data_helper.get_dataset(data_root="data-bin", mode="train")
+    train_dataset = Exp().get_dataset(data_root="data-bin", mode="train", cache_type=None)
     # Test the datasets
-    image, target = next(iter(train_dataset))
-    assert isinstance(image, Tensor)
-    assert isinstance(target, dict)
+    image, target, _, _ = next(iter(train_dataset))
+    assert image.shape == (3, 640, 640)
+    assert target.shape == (50, 5)
 
 
 def test_get_dataloader():
     batch_size = 8
-    data_loader = data_helper.get_dataloader(data_root="data-bin", mode="train", batch_size=batch_size)
-    # Test the dataloader
-    images, targets = next(iter(data_loader))
+    is_distributed = get_world_size() > 1
+    data_loader = Exp().get_data_loader(
+        batch_size=batch_size,
+        is_distributed=is_distributed,
+        no_aug=False,
+        cache_img=None,
+    )
+    prefetcher = DataPrefetcher(data_loader)
+    images, targets = prefetcher.next()
 
     assert len(images) == batch_size
     assert isinstance(images[0], Tensor)
     assert len(images[0]) == 3
     assert len(targets) == batch_size
-    assert isinstance(targets[0], dict)
-    assert isinstance(targets[0]["image_id"], Tensor)
-    assert isinstance(targets[0]["boxes"], Tensor)
-    assert isinstance(targets[0]["labels"], Tensor)
-    assert isinstance(targets[0]["orig_size"], Tensor)
+    assert isinstance(targets[0], Tensor)
+
+test_get_dataloader()
 
 
 @pytest.mark.skip("Remove Lightning dependency")
@@ -65,11 +81,3 @@ def test_detection_data_module():
     assert isinstance(targets[0]["image_id"], Tensor)
     assert isinstance(targets[0]["boxes"], Tensor)
     assert isinstance(targets[0]["labels"], Tensor)
-
-
-def test_prepare_coco128():
-    data_path = Path("data-bin")
-    coco128_dirname = "coco128"
-    data_helper.prepare_coco128(data_path, dirname=coco128_dirname)
-    annotation_file = data_path / coco128_dirname / "annotations" / "instances_train2017.json"
-    assert annotation_file.is_file()
diff --git a/test/test_trainer.py b/test/test_trainer.py
index 9be94682..be1573c7 100644
--- a/test/test_trainer.py
+++ b/test/test_trainer.py
@@ -1,52 +1,108 @@
 # Copyright (c) 2021, yolort team. All rights reserved.
 
-from pathlib import Path
+import argparse
+import importlib
 
-import pytest
-from yolort.data import _helper as data_helper
+import sys
+sys.path.append("../yolort/")
 
+def make_parser():
+    parser = argparse.ArgumentParser("YOLOX train parser")
+    parser.add_argument("-expn", "--experiment-name", type=str, default="yolov5n")
+    parser.add_argument("-n", "--name", type=str, default="yolov5n", help="model name")
+
+    # distributed
+    parser.add_argument(
+        "--dist-backend", default="nccl", type=str, help="distributed backend"
+    )
+    parser.add_argument(
+        "--dist-url",
+        default=None,
+        type=str,
+        help="url used to set up distributed training",
+    )
+    parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
+    parser.add_argument(
+        "-d", "--devices", default=None, type=int, help="device for training"
+    )
+    parser.add_argument(
+        "-f",
+        "--exp_file",
+        default=None,
+        type=str,
+        help="plz input your experiment description file",
+    )
+    parser.add_argument(
+        "--resume", default=False, action="store_true", help="resume training"
+    )
+    parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
+    parser.add_argument(
+        "-e",
+        "--start_epoch",
+        default=None,
+        type=int,
+        help="resume training start epoch",
+    )
+    parser.add_argument(
+        "--num_machines", default=1, type=int, help="num of node for training"
+    )
+    parser.add_argument(
+        "--machine_rank", default=0, type=int, help="node rank for multi-node training"
+    )
+    parser.add_argument(
+        "--fp16",
+        dest="fp16",
+        default=False,
+        action="store_true",
+        help="Adopting mix precision training.",
+    )
+    parser.add_argument(
+        "--cache",
+        type=str,
+        nargs="?",
+        const="ram",
+        help="Caching imgs to ram/disk for fast training.",
+    )
+    parser.add_argument(
+        "-o",
+        "--occupy",
+        dest="occupy",
+        default=False,
+        action="store_true",
+        help="occupy GPU memory first for training.",
+    )
+    parser.add_argument(
+        "-l",
+        "--logger",
+        type=str,
+        help="Logger to be used for metrics. \
+        Implemented loggers include `tensorboard` and `wandb`.",
+        default="tensorboard"
+    )
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    return parser
 
-@pytest.mark.skip("Remove Lightning dependency")
 def test_training_step():
-    import pytorch_lightning as pl
-    from yolort.data.data_module import DetectionDataModule
-    from yolort.trainer import DefaultTask
-
-    # Setup the DataModule
-    data_path = "data-bin"
-    train_dataset = data_helper.get_dataset(data_root=data_path, mode="train")
-    val_dataset = data_helper.get_dataset(data_root=data_path, mode="val")
-    data_module = DetectionDataModule(train_dataset, val_dataset, batch_size=8)
-    # Load model
-    model = DefaultTask(arch="yolov5n")
-    model = model.train()
-    # Trainer
-    trainer = pl.Trainer(max_epochs=1)
-    trainer.fit(model, data_module)
-
-
-@pytest.mark.skip("Remove Lightning dependency")
-@pytest.mark.parametrize("arch, version, map5095, map50", [("yolov5s", "r4.0", 42.5, 65.3)])
-def test_test_epoch_end(arch, version, map5095, map50):
-    import pytorch_lightning as pl
-    from yolort.trainer import DefaultTask
-
-    # Acquire the annotation file
-    data_path = Path("data-bin")
-    coco128_dirname = "coco128"
-    data_helper.prepare_coco128(data_path, dirname=coco128_dirname)
-    annotation_file = data_path / coco128_dirname / "annotations" / "instances_train2017.json"
-
-    # Get dataloader to test
-    val_dataloader = data_helper.get_dataloader(data_root=data_path, mode="val")
-
-    # Load model
-    model = DefaultTask(arch=arch, version=version, pretrained=True, annotation_path=annotation_file)
-
-    # test step
-    trainer = pl.Trainer(max_epochs=1)
-    trainer.test(model, dataloaders=val_dataloader)
-    # test epoch end
-    results = model.evaluator.compute()
-    assert results["AP"] > map5095
-    assert results["AP50"] > map50
+    args = make_parser().parse_args()
+    module_name = ".".join(["yolort", "exp", "default", args.name])
+    exp = importlib.import_module(module_name).Exp()
+    exp.merge(args.opts)
+    h, w = exp.input_size
+    assert h % 32 == 0 and w % 32 == 0, "input size must be multiples of 32"
+
+    from yolort.trainer import Trainer
+    trainer = Trainer(exp, args)
+    trainer.train()
+
+def test_test_epoch_end():
+    args = make_parser().parse_args()
+    module_name = ".".join(["yolort", "exp", "default", args.name])
+    exp = importlib.import_module(module_name).Exp()
+    exp.merge(args.opts)
+
+    main(exp, args)
diff --git a/tools/eval_metric.py b/tools/eval_metric.py
index 0ab6adae..0538f0df 100644
--- a/tools/eval_metric.py
+++ b/tools/eval_metric.py
@@ -8,7 +8,7 @@
 import torchvision
 import yolort
 from yolort.data import _helper as data_helper
-from yolort.data.coco import COCODetection
+from yolort.data.datasets.coco import COCODetection
 from yolort.data.coco_eval import COCOEvaluator
 from yolort.data.transforms import collate_fn, default_val_transforms
 from yolort.utils.logger import MetricLogger
diff --git a/yolort/data/__init__.py b/yolort/data/__init__.py
index efd93ced..5740093a 100644
--- a/yolort/data/__init__.py
+++ b/yolort/data/__init__.py
@@ -1 +1,9 @@
-# Copyright (c) 2021, yolort team. All rights reserved.
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+from .data_augment import TrainTransform, ValTransform
+from .data_prefetcher import DataPrefetcher
+from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed
+from .datasets import *
+from .samplers import InfiniteSampler, YoloBatchSampler
\ No newline at end of file
diff --git a/yolort/data/_helper.py b/yolort/data/_helper.py
index 2a95af9a..66fbf0cb 100644
--- a/yolort/data/_helper.py
+++ b/yolort/data/_helper.py
@@ -7,8 +7,7 @@
 import torch
 from tabulate import tabulate
 
-from .coco import COCODetection
-from .transforms import collate_fn, default_train_transforms, default_val_transforms
+from .transforms import collate_fn
 
 
 def create_small_table(small_dict):
@@ -45,74 +44,3 @@ def get_coco_api_from_dataset(dataset):
         return dataset.coco
     else:
         raise NotImplementedError("Currently only supports COCO datasets")
-
-
-def prepare_coco128(
-    data_path: PosixPath,
-    dirname: str = "coco128",
-) -> None:
-    """
-    Prepare coco128 dataset to test.
-
-    Args:
-        data_path (PosixPath): root path of coco128 dataset.
-        dirname (str): the directory name of coco128 dataset. Default: 'coco128'.
-    """
-    logger = logging.getLogger(__name__)
-
-    if not data_path.is_dir():
-        logger.info(f"Create a new directory: {data_path}")
-        data_path.mkdir(parents=True, exist_ok=True)
-
-    zip_path = data_path / "coco128.zip"
-    coco128_url = "https://github.com/zhiqwang/yolort/releases/download/v0.3.0/coco128.zip"
-    if not zip_path.is_file():
-        logger.info(f"Downloading coco128 datasets form {coco128_url}")
-        torch.hub.download_url_to_file(coco128_url, zip_path, hash_prefix="a67d2887")
-
-    coco128_path = data_path / dirname
-    if not coco128_path.is_dir():
-        logger.info(f"Unzipping dataset to {coco128_path}")
-        with ZipFile(zip_path, "r") as zip_obj:
-            zip_obj.extractall(data_path)
-
-
-def get_dataset(data_root: str, mode: str = "val"):
-    # Acquire the images and labels from the coco128 dataset
-    data_path = Path(data_root)
-    coco128_dirname = "coco128"
-    coco128_path = data_path / coco128_dirname
-    image_root = coco128_path / "images" / "train2017"
-    annotation_file = coco128_path / "annotations" / "instances_train2017.json"
-
-    if not annotation_file.is_file():
-        prepare_coco128(data_path, dirname=coco128_dirname)
-
-    if mode == "train":
-        dataset = COCODetection(image_root, annotation_file, default_train_transforms())
-    elif mode == "val":
-        dataset = COCODetection(image_root, annotation_file, default_val_transforms())
-    else:
-        raise NotImplementedError(f"Currently not supports mode {mode}")
-
-    return dataset
-
-
-def get_dataloader(data_root: str, mode: str = "val", batch_size: int = 4):
-    # Prepare the datasets for training
-    # Acquire the images and labels from the coco128 dataset
-    dataset = get_dataset(data_root=data_root, mode=mode)
-
-    # We adopt the sequential sampler in order to repeat the experiment
-    sampler = torch.utils.data.SequentialSampler(dataset)
-
-    loader = torch.utils.data.DataLoader(
-        dataset,
-        batch_size,
-        sampler=sampler,
-        drop_last=False,
-        collate_fn=collate_fn,
-        num_workers=0,
-    )
-
-    return loader
diff --git a/yolort/data/builtin_meta.py b/yolort/data/builtin_meta.py
deleted file mode 100644
index be2fc7ab..00000000
--- a/yolort/data/builtin_meta.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Facebook, Inc. and its affiliates.
-
-"""
-Note:
-For your custom dataset, there is no need to hard-code metadata anywhere in the code.
-For example, for COCO-format dataset, metadata will be obtained automatically
-when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways
-during loading.
-
-However, we hard-coded metadata for a few common dataset here.
-The only goal is to allow users who don't have these dataset to use pre-trained models.
-Users don't have to download a COCO json (which contains metadata), in order to visualize a
-COCO model (with correct class names and colors).
-"""
-
-
-# All coco categories, together with their nice-looking visualization colors
-# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
-COCO_CATEGORIES = [
-    {"id": 1, "color": [220, 20, 60], "isthing": 1, "name": "person"},
-    {"id": 2, "color": [119, 11, 32], "isthing": 1, "name": "bicycle"},
-    {"id": 3, "color": [0, 0, 142], "isthing": 1, "name": "car"},
-    {"id": 4, "color": [0, 0, 230], "isthing": 1, "name": "motorcycle"},
-    {"id": 5, "color": [106, 0, 228], "isthing": 1, "name": "airplane"},
-    {"id": 6, "color": [0, 60, 100], "isthing": 1, "name": "bus"},
-    {"id": 7, "color": [0, 80, 100], "isthing": 1, "name": "train"},
-    {"id": 8, "color": [0, 0, 70], "isthing": 1, "name": "truck"},
-    {"id": 9, "color": [0, 0, 192], "isthing": 1, "name": "boat"},
-    {"id": 10, "color": [250, 170, 30], "isthing": 1, "name": "traffic light"},
-    {"id": 11, "color": [100, 170, 30], "isthing": 1, "name": "fire hydrant"},
-    {"id": 13, "color": [220, 220, 0], "isthing": 1, "name": "stop sign"},
-    {"id": 14, "color": [175, 116, 175], "isthing": 1, "name": "parking meter"},
-    {"id": 15, "color": [250, 0, 30], "isthing": 1, "name": "bench"},
-    {"id": 16, "color": [165, 42, 42], "isthing": 1, "name": "bird"},
-    {"id": 17, "color": [255, 77, 255], "isthing": 1, "name": "cat"},
-    {"id": 18, "color": [0, 226, 252], "isthing": 1, "name": "dog"},
-    {"id": 19, "color": [182, 182, 255], "isthing": 1, "name": "horse"},
-    {"id": 20, "color": [0, 82, 0], "isthing": 1, "name": "sheep"},
-    {"id": 21, "color": [120, 166, 157], "isthing": 1, "name": "cow"},
-    {"id": 22, "color": [110, 76, 0], "isthing": 1, "name": "elephant"},
-    {"id": 23, "color": [174, 57, 255], "isthing": 1, "name": "bear"},
-    {"id": 24, "color": [199, 100, 0], "isthing": 1, "name": "zebra"},
-    {"id": 25, "color": [72, 0, 118], "isthing": 1, "name": "giraffe"},
-    {"id": 27, "color": [255, 179, 240], "isthing": 1, "name": "backpack"},
-    {"id": 28, "color": [0, 125, 92], "isthing": 1, "name": "umbrella"},
-    {"id": 31, "color": [209, 0, 151], "isthing": 1, "name": "handbag"},
-    {"id": 32, "color": [188, 208, 182], "isthing": 1, "name": "tie"},
-    {"id": 33, "color": [0, 220, 176], "isthing": 1, "name": "suitcase"},
-    {"id": 34, "color": [255, 99, 164], "isthing": 1, "name": "frisbee"},
-    {"id": 35, "color": [92, 0, 73], "isthing": 1, "name": "skis"},
-    {"id": 36, "color": [133, 129, 255], "isthing": 1, "name": "snowboard"},
-    {"id": 37, "color": [78, 180, 255], "isthing": 1, "name": "sports ball"},
-    {"id": 38, "color": [0, 228, 0], "isthing": 1, "name": "kite"},
-    {"id": 39, "color": [174, 255, 243], "isthing": 1, "name": "baseball bat"},
-    {"id": 40, "color": [45, 89, 255], "isthing": 1, "name": "baseball glove"},
-    {"id": 41, "color": [134, 134, 103], "isthing": 1, "name": "skateboard"},
-    {"id": 42, "color": [145, 148, 174], "isthing": 1, "name": "surfboard"},
-    {"id": 43, "color": [255, 208, 186], "isthing": 1, "name": "tennis racket"},
-    {"id": 44, "color": [197, 226, 255], "isthing": 1, "name": "bottle"},
-    {"id": 46, "color": [171, 134, 1], "isthing": 1, "name": "wine glass"},
-    {"id": 47, "color": [109, 63, 54], "isthing": 1, "name": "cup"},
-    {"id": 48, "color": [207, 138, 255], "isthing": 1, "name": "fork"},
-    {"id": 49, "color": [151, 0, 95], "isthing": 1, "name": "knife"},
-    {"id": 50, "color": [9, 80, 61], "isthing": 1, "name": "spoon"},
-    {"id": 51, "color": [84, 105, 51], "isthing": 1, "name": "bowl"},
-    {"id": 52, "color": [74, 65, 105], "isthing": 1, "name": "banana"},
-    {"id": 53, "color": [166, 196, 102], "isthing": 1, "name": "apple"},
-    {"id": 54, "color": [208, 195, 210], "isthing": 1, "name": "sandwich"},
-    {"id": 55, "color": [255, 109, 65], "isthing": 1, "name": "orange"},
-    {"id": 56, "color": [0, 143, 149], "isthing": 1, "name": "broccoli"},
-    {"id": 57, "color": [179, 0, 194], "isthing": 1, "name": "carrot"},
-    {"id": 58, "color": [209, 99, 106], "isthing": 1, "name": "hot dog"},
-    {"id": 59, "color": [5, 121, 0], "isthing": 1, "name": "pizza"},
-    {"id": 60, "color": [227, 255, 205], "isthing": 1, "name": "donut"},
-    {"id": 61, "color": [147, 186, 208], "isthing": 1, "name": "cake"},
-    {"id": 62, "color": [153, 69, 1], "isthing": 1, "name": "chair"},
-    {"id": 63, "color": [3, 95, 161], "isthing": 1, "name": "couch"},
-    {"id": 64, "color": [163, 255, 0], "isthing": 1, "name": "potted plant"},
-    {"id": 65, "color": [119, 0, 170], "isthing": 1, "name": "bed"},
-    {"id": 67, "color": [0, 182, 199], "isthing": 1, "name": "dining table"},
-    {"id": 70, "color": [0, 165, 120], "isthing": 1, "name": "toilet"},
-    {"id": 72, "color": [183, 130, 88], "isthing": 1, "name": "tv"},
-    {"id": 73, "color": [95, 32, 0], "isthing": 1, "name": "laptop"},
-    {"id": 74, "color": [130, 114, 135], "isthing": 1, "name": "mouse"},
-    {"id": 75, "color": [110, 129, 133], "isthing": 1, "name": "remote"},
-    {"id": 76, "color": [166, 74, 118], "isthing": 1, "name": "keyboard"},
-    {"id": 77, "color": [219, 142, 185], "isthing": 1, "name": "cell phone"},
-    {"id": 78, "color": [79, 210, 114], "isthing": 1, "name": "microwave"},
-    {"id": 79, "color": [178, 90, 62], "isthing": 1, "name": "oven"},
-    {"id": 80, "color": [65, 70, 15], "isthing": 1, "name": "toaster"},
-    {"id": 81, "color": [127, 167, 115], "isthing": 1, "name": "sink"},
-    {"id": 82, "color": [59, 105, 106], "isthing": 1, "name": "refrigerator"},
-    {"id": 84, "color": [142, 108, 45], "isthing": 1, "name": "book"},
-    {"id": 85, "color": [196, 172, 0], "isthing": 1, "name": "clock"},
-    {"id": 86, "color": [95, 54, 80], "isthing": 1, "name": "vase"},
-    {"id": 87, "color": [128, 76, 255], "isthing": 1, "name": "scissors"},
-    {"id": 88, "color": [201, 57, 1], "isthing": 1, "name": "teddy bear"},
-    {"id": 89, "color": [246, 0, 122], "isthing": 1, "name": "hair drier"},
-    {"id": 90, "color": [191, 162, 208], "isthing": 1, "name": "toothbrush"},
-    {"id": 92, "color": [255, 255, 128], "isthing": 0, "name": "banner"},
-    {"id": 93, "color": [147, 211, 203], "isthing": 0, "name": "blanket"},
-    {"id": 95, "color": [150, 100, 100], "isthing": 0, "name": "bridge"},
-    {"id": 100, "color": [168, 171, 172], "isthing": 0, "name": "cardboard"},
-    {"id": 107, "color": [146, 112, 198], "isthing": 0, "name": "counter"},
-    {"id": 109, "color": [210, 170, 100], "isthing": 0, "name": "curtain"},
-    {"id": 112, "color": [92, 136, 89], "isthing": 0, "name": "door-stuff"},
-    {"id": 118, "color": [218, 88, 184], "isthing": 0, "name": "floor-wood"},
-    {"id": 119, "color": [241, 129, 0], "isthing": 0, "name": "flower"},
-    {"id": 122, "color": [217, 17, 255], "isthing": 0, "name": "fruit"},
-    {"id": 125, "color": [124, 74, 181], "isthing": 0, "name": "gravel"},
-    {"id": 128, "color": [70, 70, 70], "isthing": 0, "name": "house"},
-    {"id": 130, "color": [255, 228, 255], "isthing": 0, "name": "light"},
-    {"id": 133, "color": [154, 208, 0], "isthing": 0, "name": "mirror-stuff"},
-    {"id": 138, "color": [193, 0, 92], "isthing": 0, "name": "net"},
-    {"id": 141, "color": [76, 91, 113], "isthing": 0, "name": "pillow"},
-    {"id": 144, "color": [255, 180, 195], "isthing": 0, "name": "platform"},
-    {"id": 145, "color": [106, 154, 176], "isthing": 0, "name": "playingfield"},
-    {"id": 147, "color": [230, 150, 140], "isthing": 0, "name": "railroad"},
-    {"id": 148, "color": [60, 143, 255], "isthing": 0, "name": "river"},
-    {"id": 149, "color": [128, 64, 128], "isthing": 0, "name": "road"},
-    {"id": 151, "color": [92, 82, 55], "isthing": 0, "name": "roof"},
-    {"id": 154, "color": [254, 212, 124], "isthing": 0, "name": "sand"},
-    {"id": 155, "color": [73, 77, 174], "isthing": 0, "name": "sea"},
-    {"id": 156, "color": [255, 160, 98], "isthing": 0, "name": "shelf"},
-    {"id": 159, "color": [255, 255, 255], "isthing": 0, "name": "snow"},
-    {"id": 161, "color": [104, 84, 109], "isthing": 0, "name": "stairs"},
-    {"id": 166, "color": [169, 164, 131], "isthing": 0, "name": "tent"},
-    {"id": 168, "color": [225, 199, 255], "isthing": 0, "name": "towel"},
-    {"id": 171, "color": [137, 54, 74], "isthing": 0, "name": "wall-brick"},
-    {"id": 175, "color": [135, 158, 223], "isthing": 0, "name": "wall-stone"},
-    {"id": 176, "color": [7, 246, 231], "isthing": 0, "name": "wall-tile"},
-    {"id": 177, "color": [107, 255, 200], "isthing": 0, "name": "wall-wood"},
-    {"id": 178, "color": [58, 41, 149], "isthing": 0, "name": "water-other"},
-    {"id": 180, "color": [183, 121, 142], "isthing": 0, "name": "window-blind"},
-    {"id": 181, "color": [255, 73, 97], "isthing": 0, "name": "window-other"},
-    {"id": 184, "color": [107, 142, 35], "isthing": 0, "name": "tree-merged"},
-    {"id": 185, "color": [190, 153, 153], "isthing": 0, "name": "fence-merged"},
-    {"id": 186, "color": [146, 139, 141], "isthing": 0, "name": "ceiling-merged"},
-    {"id": 187, "color": [70, 130, 180], "isthing": 0, "name": "sky-other-merged"},
-    {"id": 188, "color": [134, 199, 156], "isthing": 0, "name": "cabinet-merged"},
-    {"id": 189, "color": [209, 226, 140], "isthing": 0, "name": "table-merged"},
-    {"id": 190, "color": [96, 36, 108], "isthing": 0, "name": "floor-other-merged"},
-    {"id": 191, "color": [96, 96, 96], "isthing": 0, "name": "pavement-merged"},
-    {"id": 192, "color": [64, 170, 64], "isthing": 0, "name": "mountain-merged"},
-    {"id": 193, "color": [152, 251, 152], "isthing": 0, "name": "grass-merged"},
-    {"id": 194, "color": [208, 229, 228], "isthing": 0, "name": "dirt-merged"},
-    {"id": 195, "color": [206, 186, 171], "isthing": 0, "name": "paper-merged"},
-    {"id": 196, "color": [152, 161, 64], "isthing": 0, "name": "food-other-merged"},
-    {"id": 197, "color": [116, 112, 0], "isthing": 0, "name": "building-other-merged"},
-    {"id": 198, "color": [0, 114, 143], "isthing": 0, "name": "rock-merged"},
-    {"id": 199, "color": [102, 102, 156], "isthing": 0, "name": "wall-other-merged"},
-    {"id": 200, "color": [250, 141, 255], "isthing": 0, "name": "rug-merged"},
-]
diff --git a/yolort/data/coco.py b/yolort/data/coco.py
deleted file mode 100644
index 3e693ad4..00000000
--- a/yolort/data/coco.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-COCO dataset which returns image_id for evaluation.
-Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
-"""
-import torch
-import torchvision
-from yolort.utils import is_module_available, requires_module
-
-if is_module_available("pycocotools"):
-    from pycocotools import mask as coco_mask
-
-
-class COCODetection(torchvision.datasets.CocoDetection):
-    def __init__(self, img_folder, ann_file, transforms, return_masks=False):
-        super().__init__(img_folder, ann_file)
-        self._transforms = transforms
-
-        json_category_id_to_contiguous_id = {v: i for i, v in enumerate(self.coco.getCatIds())}
-        self.prepare = ConvertCocoPolysToMask(json_category_id_to_contiguous_id, return_masks)
-
-    def __getitem__(self, idx):
-        img, target = super().__getitem__(idx)
-        image_id = self.ids[idx]
-        target = {"image_id": image_id, "annotations": target}
-        img, target = self.prepare(img, target)
-        if self._transforms is not None:
-            img, target = self._transforms(img, target)
-        return img, target
-
-
-class ConvertCocoPolysToMask:
-    def __init__(self, json_category_id_maps, return_masks=False):
-        self.json_category_id_to_contiguous_id = json_category_id_maps
-        self.return_masks = return_masks
-
-    def __call__(self, image, target):
-        w, h = image.size
-
-        image_id = target["image_id"]
-        image_id = torch.tensor([image_id])
-
-        anno = target["annotations"]
-
-        anno = [obj for obj in anno if "iscrowd" not in obj or obj["iscrowd"] == 0]
-
-        boxes = [obj["bbox"] for obj in anno]
-        # guard against no boxes via resizing
-        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-        # BoxMode: convert from XYWH_ABS to XYXY_ABS
-        boxes[:, 2:] += boxes[:, :2]
-        boxes[:, 0::2].clamp_(min=0, max=w)
-        boxes[:, 1::2].clamp_(min=0, max=h)
-
-        classes = [obj["category_id"] for obj in anno]
-        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
-        classes = torch.tensor(classes, dtype=torch.int64)
-
-        if self.return_masks:
-            segmentations = [obj["segmentation"] for obj in anno]
-            masks = convert_coco_poly_to_mask(segmentations, h, w)
-
-        keypoints = None
-        if anno and "keypoints" in anno[0]:
-            keypoints = [obj["keypoints"] for obj in anno]
-            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
-            num_keypoints = keypoints.shape[0]
-            if num_keypoints:
-                keypoints = keypoints.view(num_keypoints, -1, 3)
-
-        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
-        boxes = boxes[keep]
-        classes = classes[keep]
-        if self.return_masks:
-            masks = masks[keep]
-        if keypoints is not None:
-            keypoints = keypoints[keep]
-
-        target = {}
-        target["boxes"] = boxes
-        target["labels"] = classes
-        if self.return_masks:
-            target["masks"] = masks
-        target["image_id"] = image_id
-        if keypoints is not None:
-            target["keypoints"] = keypoints
-
-        # for conversion to coco api
-        area = torch.tensor([obj["area"] for obj in anno])
-        iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
-        target["area"] = area[keep]
-        target["iscrowd"] = iscrowd[keep]
-
-        target["orig_size"] = torch.as_tensor([int(h), int(w)])
-        target["size"] = torch.as_tensor([int(h), int(w)])
-
-        return image, target
-
-
-@requires_module("pycocotools")
-def convert_coco_poly_to_mask(segmentations, height, width):
-    masks = []
-    for polygons in segmentations:
-        rles = coco_mask.frPyObjects(polygons, height, width)
-        mask = coco_mask.decode(rles)
-        if len(mask.shape) < 3:
-            mask = mask[..., None]
-        mask = torch.as_tensor(mask, dtype=torch.uint8)
-        mask = mask.any(dim=2)
-        masks.append(mask)
-    if masks:
-        masks = torch.stack(masks, dim=0)
-    else:
-        masks = torch.zeros((0, height, width), dtype=torch.uint8)
-    return masks
diff --git a/yolort/data/data_augment.py b/yolort/data/data_augment.py
new file mode 100644
index 00000000..4e53f6c2
--- /dev/null
+++ b/yolort/data/data_augment.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+"""
+Data augmentation functionality. Passed as callable transformations to
+Dataset classes.
+
+The data augmentation procedures were interpreted from @weiliu89's SSD paper
+http://arxiv.org/abs/1512.02325
+"""
+
+import math
+import random
+
+import cv2
+import numpy as np
+
+from yolort.utils import xyxy2cxcywh
+
+
+def augment_hsv(img, hgain=5, sgain=30, vgain=30):
+    hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain]  # random gains
+    hsv_augs *= np.random.randint(0, 2, 3)  # random selection of h, s, v
+    hsv_augs = hsv_augs.astype(np.int16)
+    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
+
+    img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
+    img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
+    img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
+
+    cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img)  # no return needed
+
+
+def get_aug_params(value, center=0):
+    if isinstance(value, float):
+        return random.uniform(center - value, center + value)
+    elif len(value) == 2:
+        return random.uniform(value[0], value[1])
+    else:
+        raise ValueError(
+            "Affine params should be either a sequence containing two values\
+             or single float values. Got {}".format(value)
+        )
+
+
+def get_affine_matrix(
+    target_size,
+    degrees=10,
+    translate=0.1,
+    scales=0.1,
+    shear=10,
+):
+    twidth, theight = target_size
+
+    # Rotation and Scale
+    angle = get_aug_params(degrees)
+    scale = get_aug_params(scales, center=1.0)
+
+    if scale <= 0.0:
+        raise ValueError("Argument scale should be positive")
+
+    R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)
+
+    M = np.ones([2, 3])
+    # Shear
+    shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
+    shear_y = math.tan(get_aug_params(shear) * math.pi / 180)
+
+    M[0] = R[0] + shear_y * R[1]
+    M[1] = R[1] + shear_x * R[0]
+
+    # Translation
+    translation_x = get_aug_params(translate) * twidth  # x translation (pixels)
+    translation_y = get_aug_params(translate) * theight  # y translation (pixels)
+
+    M[0, 2] = translation_x
+    M[1, 2] = translation_y
+
+    return M, scale
+
+
+def apply_affine_to_bboxes(targets, target_size, M, scale):
+    num_gts = len(targets)
+
+    # warp corner points
+    twidth, theight = target_size
+    corner_points = np.ones((4 * num_gts, 3))
+    corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+        4 * num_gts, 2
+    )  # x1y1, x2y2, x1y2, x2y1
+    corner_points = np.dot(corner_points, M.T)  # apply affine transform
+    corner_points = corner_points.reshape(num_gts, 8)
+
+    # create new boxes
+    corner_xs = corner_points[:, 0::2]
+    corner_ys = corner_points[:, 1::2]
+    new_bboxes = (
+        np.concatenate(
+            (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
+        )
+        .reshape(4, num_gts)
+        .T
+    )
+
+    # clip boxes
+    new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
+    new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)
+
+    targets[:, :4] = new_bboxes
+
+    return targets
+
+
+def random_affine(
+    img,
+    targets=(),
+    target_size=(640, 640),
+    degrees=10,
+    translate=0.1,
+    scales=0.1,
+    shear=10,
+):
+    M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)
+
+    img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))
+
+    # Transform label coordinates
+    if len(targets) > 0:
+        targets = apply_affine_to_bboxes(targets, target_size, M, scale)
+
+    return img, targets
+
+
+def _mirror(image, boxes, prob=0.5):
+    _, width, _ = image.shape
+    if random.random() < prob:
+        image = image[:, ::-1]
+        boxes[:, 0::2] = width - boxes[:, 2::-2]
+    return image, boxes
+
+
+def preproc(img, input_size, swap=(2, 0, 1)):
+    if len(img.shape) == 3:
+        padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
+    else:
+        padded_img = np.ones(input_size, dtype=np.uint8) * 114
+
+    r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
+    resized_img = cv2.resize(
+        img,
+        (int(img.shape[1] * r), int(img.shape[0] * r)),
+        interpolation=cv2.INTER_LINEAR,
+    ).astype(np.uint8)
+    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
+
+    padded_img = padded_img.transpose(swap)
+    padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
+    return padded_img, r
+
+
+class TrainTransform:
+    def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):
+        self.max_labels = max_labels
+        self.flip_prob = flip_prob
+        self.hsv_prob = hsv_prob
+
+    def __call__(self, image, targets, input_dim):
+        boxes = targets[:, :4].copy()
+        labels = targets[:, 4].copy()
+        if len(boxes) == 0:
+            targets = np.zeros((self.max_labels, 5), dtype=np.float32)
+            image, r_o = preproc(image, input_dim)
+            return image, targets
+
+        image_o = image.copy()
+        targets_o = targets.copy()
+        height_o, width_o, _ = image_o.shape
+        boxes_o = targets_o[:, :4]
+        labels_o = targets_o[:, 4]
+        # bbox_o: [xyxy] to [c_x,c_y,w,h]
+        boxes_o = xyxy2cxcywh(boxes_o)
+
+        if random.random() < self.hsv_prob:
+            augment_hsv(image)
+        image_t, boxes = _mirror(image, boxes, self.flip_prob)
+        height, width, _ = image_t.shape
+        image_t, r_ = preproc(image_t, input_dim)
+        # boxes [xyxy] 2 [cx,cy,w,h]
+        boxes = xyxy2cxcywh(boxes)
+        boxes *= r_
+
+        mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
+        boxes_t = boxes[mask_b]
+        labels_t = labels[mask_b]
+
+        if len(boxes_t) == 0:
+            image_t, r_o = preproc(image_o, input_dim)
+            boxes_o *= r_o
+            boxes_t = boxes_o
+            labels_t = labels_o
+
+        labels_t = np.expand_dims(labels_t, 1)
+
+        targets_t = np.hstack((labels_t, boxes_t))
+        padded_labels = np.zeros((self.max_labels, 5))
+        padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
+            : self.max_labels
+        ]
+        padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
+        return image_t, padded_labels
+
+
+class ValTransform:
+    """
+    Defines the transformations that should be applied to test PIL image
+    for input into the network
+
+    dimension -> tensorize -> color adj
+
+    Arguments:
+        resize (int): input dimension to SSD
+        rgb_means ((int,int,int)): average RGB of the dataset
+            (104,117,123)
+        swap ((int,int,int)): final order of channels
+
+    Returns:
+        transform (transform) : callable transform to be applied to test/val
+        data
+    """
+
+    def __init__(self, swap=(2, 0, 1), legacy=False):
+        self.swap = swap
+        self.legacy = legacy
+
+    # assume input is cv2 img for now
+    def __call__(self, img, res, input_size):
+        img, _ = preproc(img, input_size, self.swap)
+        if self.legacy:
+            img = img[::-1, :, :].copy()
+            img /= 255.0
+            img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
+            img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
+        return img, np.zeros((1, 5))
diff --git a/yolort/data/data_module.py b/yolort/data/data_module.py
index 55510da5..d17d7327 100644
--- a/yolort/data/data_module.py
+++ b/yolort/data/data_module.py
@@ -10,7 +10,7 @@
 if is_module_available("pytorch_lightning"):
     from pytorch_lightning import LightningDataModule
 
-from .coco import COCODetection
+from yolort.data.datasets.coco import COCODetection
 from .transforms import collate_fn, default_train_transforms, default_val_transforms
 from .voc import VOCDetection
 
diff --git a/yolort/data/data_prefetcher.py b/yolort/data/data_prefetcher.py
new file mode 100644
index 00000000..a118cf4e
--- /dev/null
+++ b/yolort/data/data_prefetcher.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import torch
+
+
+class DataPrefetcher:
+    """
+    DataPrefetcher is inspired by code of following file:
+    https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
+    It could speedup your pytorch dataloader. For more information, please check
+    https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.
+    """
+
+    def __init__(self, loader):
+        self.loader = iter(loader)
+        self.stream = torch.cuda.Stream()
+        self.input_cuda = self._input_cuda_for_image
+        self.record_stream = DataPrefetcher._record_stream_for_image
+        self.preload()
+
+    def preload(self):
+        try:
+            self.next_input, self.next_target, _, _ = next(self.loader)
+        except StopIteration:
+            self.next_input = None
+            self.next_target = None
+            return
+
+        with torch.cuda.stream(self.stream):
+            self.input_cuda()
+            self.next_target = self.next_target.cuda(non_blocking=True)
+
+    def next(self):
+        torch.cuda.current_stream().wait_stream(self.stream)
+        input = self.next_input
+        target = self.next_target
+        if input is not None:
+            self.record_stream(input)
+        if target is not None:
+            target.record_stream(torch.cuda.current_stream())
+        self.preload()
+        return input, target
+
+    def _input_cuda_for_image(self):
+        self.next_input = self.next_input.cuda(non_blocking=True)
+
+    @staticmethod
+    def _record_stream_for_image(input):
+        input.record_stream(torch.cuda.current_stream())
diff --git a/yolort/data/dataloading.py b/yolort/data/dataloading.py
new file mode 100644
index 00000000..6fecf3f0
--- /dev/null
+++ b/yolort/data/dataloading.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+import random
+import uuid
+
+import numpy as np
+
+import torch
+from torch.utils.data.dataloader import DataLoader as torchDataLoader
+from torch.utils.data.dataloader import default_collate
+
+from .samplers import YoloBatchSampler
+
+
+def get_yolox_datadir():
+    """
+    get dataset dir of YOLOX. If environment variable named `YOLOX_DATADIR` is set,
+    this function will return value of the environment variable. Otherwise, use data
+    """
+    yolox_datadir = os.getenv("YOLOX_DATADIR", None)
+    if yolox_datadir is None:
+        import yolox
+
+        yolox_path = os.path.dirname(os.path.dirname(yolox.__file__))
+        yolox_datadir = os.path.join(yolox_path, "datasets")
+    return yolox_datadir
+
+
+class DataLoader(torchDataLoader):
+    """
+    Lightnet dataloader that enables on the fly resizing of the images.
+    See :class:`torch.utils.data.DataLoader` for more information on the arguments.
+    Check more on the following website:
+    https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.__initialized = False
+        shuffle = False
+        batch_sampler = None
+        if len(args) > 5:
+            shuffle = args[2]
+            sampler = args[3]
+            batch_sampler = args[4]
+        elif len(args) > 4:
+            shuffle = args[2]
+            sampler = args[3]
+            if "batch_sampler" in kwargs:
+                batch_sampler = kwargs["batch_sampler"]
+        elif len(args) > 3:
+            shuffle = args[2]
+            if "sampler" in kwargs:
+                sampler = kwargs["sampler"]
+            if "batch_sampler" in kwargs:
+                batch_sampler = kwargs["batch_sampler"]
+        else:
+            if "shuffle" in kwargs:
+                shuffle = kwargs["shuffle"]
+            if "sampler" in kwargs:
+                sampler = kwargs["sampler"]
+            if "batch_sampler" in kwargs:
+                batch_sampler = kwargs["batch_sampler"]
+
+        # Use custom BatchSampler
+        if batch_sampler is None:
+            if sampler is None:
+                if shuffle:
+                    sampler = torch.utils.data.sampler.RandomSampler(self.dataset)
+                    # sampler = torch.utils.data.DistributedSampler(self.dataset)
+                else:
+                    sampler = torch.utils.data.sampler.SequentialSampler(self.dataset)
+            batch_sampler = YoloBatchSampler(
+                sampler,
+                self.batch_size,
+                self.drop_last,
+                input_dimension=self.dataset.input_dim,
+            )
+            # batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iterations =
+
+        self.batch_sampler = batch_sampler
+
+        self.__initialized = True
+
+    def close_mosaic(self):
+        self.batch_sampler.mosaic = False
+
+
+def list_collate(batch):
+    """
+    Function that collates lists or tuples together into one list (of lists/tuples).
+    Use this as the collate function in a Dataloader, if you want to have a list of
+    items as an output, as opposed to tensors (eg. Brambox.boxes).
+    """
+    items = list(zip(*batch))
+
+    for i in range(len(items)):
+        if isinstance(items[i][0], (list, tuple)):
+            items[i] = list(items[i])
+        else:
+            items[i] = default_collate(items[i])
+
+    return items
+
+
+def worker_init_reset_seed(worker_id):
+    seed = uuid.uuid4().int % 2**32
+    random.seed(seed)
+    torch.set_rng_state(torch.manual_seed(seed).get_state())
+    np.random.seed(seed)
diff --git a/yolort/data/datasets/__init__.py b/yolort/data/datasets/__init__.py
new file mode 100644
index 00000000..8a02c7f0
--- /dev/null
+++ b/yolort/data/datasets/__init__.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+from .coco import COCODataset
+from .coco_classes import COCO_CLASSES
+from .datasets_wrapper import CacheDataset, ConcatDataset, Dataset, MixConcatDataset
+from .mosaicdetection import MosaicDetection
diff --git a/yolort/data/datasets/coco.py b/yolort/data/datasets/coco.py
new file mode 100644
index 00000000..5ac225a0
--- /dev/null
+++ b/yolort/data/datasets/coco.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import copy
+import os
+
+import cv2
+import numpy as np
+from pycocotools.coco import COCO
+
+from .datasets_wrapper import CacheDataset, cache_read_img
+
+
+def remove_useless_info(coco):
+    """
+    Remove useless info in coco dataset. COCO object is modified inplace.
+    This function is mainly used for saving memory (save about 30% mem).
+    """
+    if isinstance(coco, COCO):
+        dataset = coco.dataset
+        dataset.pop("info", None)
+        dataset.pop("licenses", None)
+        for img in dataset["images"]:
+            img.pop("license", None)
+            img.pop("coco_url", None)
+            img.pop("date_captured", None)
+            img.pop("flickr_url", None)
+        if "annotations" in coco.dataset:
+            for anno in coco.dataset["annotations"]:
+                anno.pop("segmentation", None)
+
+
+class COCODataset(CacheDataset):
+    """
+    COCO dataset class.
+    """
+
+    def __init__(
+        self,
+        data_dir=None,
+        json_file="instances_train2017.json",
+        name="train2017",
+        img_size=(416, 416),
+        preproc=None,
+        cache=False,
+        cache_type="ram",
+    ):
+        """
+        COCO dataset initialization. Annotation data are read into memory by COCO API.
+        Args:
+            data_dir (str): dataset root directory
+            json_file (str): COCO json file name
+            name (str): COCO data name (e.g. 'train2017' or 'val2017')
+            img_size (int): target image size after pre-processing
+            preproc: data augmentation strategy
+        """
+        if data_dir is None:
+            data_dir = os.path.join("data-bin", "coco128")
+        self.data_dir = data_dir
+        self.json_file = json_file
+
+        self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
+        remove_useless_info(self.coco)
+        self.ids = self.coco.getImgIds()
+        self.num_imgs = len(self.ids)
+        self.class_ids = sorted(self.coco.getCatIds())
+        self.cats = self.coco.loadCats(self.coco.getCatIds())
+        self._classes = tuple([c["name"] for c in self.cats])
+        self.name = name
+        self.img_size = img_size
+        self.preproc = preproc
+        self.annotations = self._load_coco_annotations()
+
+        path_filename = [os.path.join(name, anno[3]) for anno in self.annotations]
+        super().__init__(
+            input_dimension=img_size,
+            num_imgs=self.num_imgs,
+            data_dir=data_dir,
+            cache_dir_name=f"cache_{name}",
+            path_filename=path_filename,
+            cache=cache,
+            cache_type=cache_type
+        )
+
+    def __len__(self):
+        return self.num_imgs
+
+    def _load_coco_annotations(self):
+        return [self.load_anno_from_ids(_ids) for _ids in self.ids]
+
+    def load_anno_from_ids(self, id_):
+        im_ann = self.coco.loadImgs(id_)[0]
+        width = im_ann["width"]
+        height = im_ann["height"]
+        anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
+        annotations = self.coco.loadAnns(anno_ids)
+        objs = []
+        for obj in annotations:
+            x1 = np.max((0, obj["bbox"][0]))
+            y1 = np.max((0, obj["bbox"][1]))
+            x2 = np.min((width, x1 + np.max((0, obj["bbox"][2]))))
+            y2 = np.min((height, y1 + np.max((0, obj["bbox"][3]))))
+            if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
+                obj["clean_bbox"] = [x1, y1, x2, y2]
+                objs.append(obj)
+
+        num_objs = len(objs)
+
+        res = np.zeros((num_objs, 5))
+        for ix, obj in enumerate(objs):
+            cls = self.class_ids.index(obj["category_id"])
+            res[ix, 0:4] = obj["clean_bbox"]
+            res[ix, 4] = cls
+
+        r = min(self.img_size[0] / height, self.img_size[1] / width)
+        res[:, :4] *= r
+
+        img_info = (height, width)
+        resized_info = (int(height * r), int(width * r))
+
+        file_name = (
+            im_ann["file_name"]
+            if "file_name" in im_ann
+            else "{:012}".format(id_) + ".jpg"
+        )
+
+        return (res, img_info, resized_info, file_name)
+
+    def load_anno(self, index):
+        return self.annotations[index][0]
+
+    def load_resized_img(self, index):
+        img = self.load_image(index)
+        r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])
+        resized_img = cv2.resize(
+            img,
+            (int(img.shape[1] * r), int(img.shape[0] * r)),
+            interpolation=cv2.INTER_LINEAR,
+        ).astype(np.uint8)
+        return resized_img
+
+    def load_image(self, index):
+        file_name = self.annotations[index][3]
+
+        img_file = os.path.join(self.data_dir, "images", self.name, file_name)
+
+        img = cv2.imread(img_file)
+        assert img is not None, f"file named {img_file} not found"
+
+        return img
+
+    @cache_read_img(use_cache=True)
+    def read_img(self, index):
+        return self.load_resized_img(index)
+
+    def pull_item(self, index):
+        id_ = self.ids[index]
+        label, origin_image_size, _, _ = self.annotations[index]
+        img = self.read_img(index)
+
+        return img, copy.deepcopy(label), origin_image_size, np.array([id_])
+
+    @CacheDataset.mosaic_getitem
+    def __getitem__(self, index):
+        """
+        One image / label pair for the given index is picked up and pre-processed.
+
+        Args:
+            index (int): data index
+
+        Returns:
+            img (numpy.ndarray): pre-processed image
+            padded_labels (torch.Tensor): pre-processed label data.
+                The shape is :math:`[max_labels, 5]`.
+                each label consists of [class, xc, yc, w, h]:
+                    class (float): class index.
+                    xc, yc (float) : center of bbox whose values range from 0 to 1.
+                    w, h (float) : size of bbox whose values range from 0 to 1.
+            info_img : tuple of h, w.
+                h, w (int): original shape of the image
+            img_id (int): same as the input index. Used for evaluation.
+        """
+        img, target, img_info, img_id = self.pull_item(index)
+
+        if self.preproc is not None:
+            img, target = self.preproc(img, target, self.input_dim)
+        return img, target, img_info, img_id
diff --git a/yolort/data/datasets/coco_classes.py b/yolort/data/datasets/coco_classes.py
new file mode 100644
index 00000000..17f5cbe6
--- /dev/null
+++ b/yolort/data/datasets/coco_classes.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+COCO_CLASSES = (
+    "person",
+    "bicycle",
+    "car",
+    "motorcycle",
+    "airplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "couch",
+    "potted plant",
+    "bed",
+    "dining table",
+    "toilet",
+    "tv",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+)
diff --git a/yolort/data/datasets/datasets_wrapper.py b/yolort/data/datasets/datasets_wrapper.py
new file mode 100644
index 00000000..c45fe380
--- /dev/null
+++ b/yolort/data/datasets/datasets_wrapper.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import bisect
+import copy
+import os
+import random
+from abc import ABCMeta, abstractmethod
+from functools import partial, wraps
+from multiprocessing.pool import ThreadPool
+import psutil
+from loguru import logger
+from tqdm import tqdm
+
+import numpy as np
+
+from torch.utils.data.dataset import ConcatDataset as torchConcatDataset
+from torch.utils.data.dataset import Dataset as torchDataset
+
+
+class ConcatDataset(torchConcatDataset):
+    def __init__(self, datasets):
+        super(ConcatDataset, self).__init__(datasets)
+        if hasattr(self.datasets[0], "input_dim"):
+            self._input_dim = self.datasets[0].input_dim
+            self.input_dim = self.datasets[0].input_dim
+
+    def pull_item(self, idx):
+        if idx < 0:
+            if -idx > len(self):
+                raise ValueError(
+                    "absolute value of index should not exceed dataset length"
+                )
+            idx = len(self) + idx
+        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+        if dataset_idx == 0:
+            sample_idx = idx
+        else:
+            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+        return self.datasets[dataset_idx].pull_item(sample_idx)
+
+
+class MixConcatDataset(torchConcatDataset):
+    def __init__(self, datasets):
+        super(MixConcatDataset, self).__init__(datasets)
+        if hasattr(self.datasets[0], "input_dim"):
+            self._input_dim = self.datasets[0].input_dim
+            self.input_dim = self.datasets[0].input_dim
+
+    def __getitem__(self, index):
+
+        if not isinstance(index, int):
+            idx = index[1]
+        if idx < 0:
+            if -idx > len(self):
+                raise ValueError(
+                    "absolute value of index should not exceed dataset length"
+                )
+            idx = len(self) + idx
+        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+        if dataset_idx == 0:
+            sample_idx = idx
+        else:
+            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+        if not isinstance(index, int):
+            index = (index[0], sample_idx, index[2])
+
+        return self.datasets[dataset_idx][index]
+
+
+class Dataset(torchDataset):
+    """ This class is a subclass of the base :class:`torch.utils.data.Dataset`,
+    that enables on the fly resizing of the ``input_dim``.
+
+    Args:
+        input_dimension (tuple): (width,height) tuple with default dimensions of the network
+    """
+
+    def __init__(self, input_dimension, mosaic=True):
+        super().__init__()
+        self.__input_dim = input_dimension[:2]
+        self.enable_mosaic = mosaic
+
+    @property
+    def input_dim(self):
+        """
+        Dimension that can be used by transforms to set the correct image size, etc.
+        This allows transforms to have a single source of truth
+        for the input dimension of the network.
+
+        Return:
+            list: Tuple containing the current width,height
+        """
+        if hasattr(self, "_input_dim"):
+            return self._input_dim
+        return self.__input_dim
+
+    @staticmethod
+    def mosaic_getitem(getitem_fn):
+        """
+        Decorator method that needs to be used around the ``__getitem__`` method. |br|
+        This decorator enables the closing mosaic
+
+        Example:
+            >>> class CustomSet(ln.data.Dataset):
+            ...     def __len__(self):
+            ...         return 10
+            ...     @ln.data.Dataset.mosaic_getitem
+            ...     def __getitem__(self, index):
+            ...         return self.enable_mosaic
+        """
+
+        @wraps(getitem_fn)
+        def wrapper(self, index):
+            if not isinstance(index, int):
+                self.enable_mosaic = index[0]
+                index = index[1]
+
+            ret_val = getitem_fn(self, index)
+
+            return ret_val
+
+        return wrapper
+
+
+class CacheDataset(Dataset, metaclass=ABCMeta):
+    """ This class is a subclass of the base :class:`yolox.data.datasets.Dataset`,
+    that enables cache images to ram or disk.
+
+    Args:
+        input_dimension (tuple): (width,height) tuple with default dimensions of the network
+        num_imgs (int): datset size
+        data_dir (str): the root directory of the dataset, e.g. `/path/to/COCO`.
+        cache_dir_name (str): the name of the directory to cache to disk,
+            e.g. `"custom_cache"`. The files cached to disk will be saved
+            under `/path/to/COCO/custom_cache`.
+        path_filename (str): a list of paths to the data relative to the `data_dir`,
+            e.g. if you have data `/path/to/COCO/train/1.jpg`, `/path/to/COCO/train/2.jpg`,
+            then `path_filename = ['train/1.jpg', ' train/2.jpg']`.
+        cache (bool): whether to cache the images to ram or disk.
+        cache_type (str): the type of cache,
+            "ram" : Caching imgs to ram for fast training.
+            "disk": Caching imgs to disk for fast training.
+    """
+
+    def __init__(
+        self,
+        input_dimension,
+        num_imgs=None,
+        data_dir=None,
+        cache_dir_name=None,
+        path_filename=None,
+        cache=False,
+        cache_type="ram",
+    ):
+        super().__init__(input_dimension)
+        self.cache = cache
+        self.cache_type = cache_type
+
+        if self.cache and self.cache_type == "disk":
+            self.cache_dir = os.path.join(data_dir, cache_dir_name)
+            self.path_filename = path_filename
+
+        if self.cache and self.cache_type == "ram":
+            self.imgs = None
+
+        if self.cache:
+            self.cache_images(
+                num_imgs=num_imgs,
+                data_dir=data_dir,
+                cache_dir_name=cache_dir_name,
+                path_filename=path_filename,
+            )
+
+    def __del__(self):
+        if self.cache and self.cache_type == "ram":
+            del self.imgs
+
+    @abstractmethod
+    def read_img(self, index):
+        """
+        Given index, return the corresponding image
+
+        Args:
+            index (int): image index
+        """
+        raise NotImplementedError
+
+    def cache_images(
+        self,
+        num_imgs=None,
+        data_dir=None,
+        cache_dir_name=None,
+        path_filename=None,
+    ):
+        assert num_imgs is not None, "num_imgs must be specified as the size of the dataset"
+        if self.cache_type == "disk":
+            assert (data_dir and cache_dir_name and path_filename) is not None, \
+                "data_dir, cache_name and path_filename must be specified if cache_type is disk"
+            self.path_filename = path_filename
+
+        mem = psutil.virtual_memory()
+        mem_required = self.cal_cache_occupy(num_imgs)
+        gb = 1 << 30
+
+        if self.cache_type == "ram":
+            if mem_required > mem.available:
+                self.cache = False
+            else:
+                logger.info(
+                    f"{mem_required / gb:.1f}GB RAM required, "
+                    f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB RAM available, "
+                    f"Since the first thing we do is cache, "
+                    f"there is no guarantee that the remaining memory space is sufficient"
+                )
+
+        if self.cache and self.imgs is None:
+            if self.cache_type == 'ram':
+                self.imgs = [None] * num_imgs
+                logger.info("You are using cached images in RAM to accelerate training!")
+            else:   # 'disk'
+                if not os.path.exists(self.cache_dir):
+                    os.mkdir(self.cache_dir)
+                    logger.warning(
+                        f"\n*******************************************************************\n"
+                        f"You are using cached images in DISK to accelerate training.\n"
+                        f"This requires large DISK space.\n"
+                        f"Make sure you have {mem_required / gb:.1f} "
+                        f"available DISK space for training your dataset.\n"
+                        f"*******************************************************************\\n"
+                    )
+                else:
+                    logger.info(f"Found disk cache at {self.cache_dir}")
+                    return
+
+            logger.info(
+                "Caching images...\n"
+                "This might take some time for your dataset"
+            )
+
+            num_threads = min(8, max(1, os.cpu_count() - 1))
+            b = 0
+            load_imgs = ThreadPool(num_threads).imap(
+                partial(self.read_img, use_cache=False),
+                range(num_imgs)
+            )
+            pbar = tqdm(enumerate(load_imgs), total=num_imgs)
+            for i, x in pbar:   # x = self.read_img(self, i, use_cache=False)
+                if self.cache_type == 'ram':
+                    self.imgs[i] = x
+                else:   # 'disk'
+                    cache_filename = f'{self.path_filename[i].split(".")[0]}.npy'
+                    cache_path_filename = os.path.join(self.cache_dir, cache_filename)
+                    os.makedirs(os.path.dirname(cache_path_filename), exist_ok=True)
+                    np.save(cache_path_filename, x)
+                b += x.nbytes
+                pbar.desc = \
+                    f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})'
+            pbar.close()
+
+    def cal_cache_occupy(self, num_imgs):
+        cache_bytes = 0
+        num_samples = min(num_imgs, 32)
+        for _ in range(num_samples):
+            img = self.read_img(index=random.randint(0, num_imgs - 1), use_cache=False)
+            cache_bytes += img.nbytes
+        mem_required = cache_bytes * num_imgs / num_samples
+        return mem_required
+
+
+def cache_read_img(use_cache=True):
+    def decorator(read_img_fn):
+        """
+        Decorate the read_img function to cache the image
+
+        Args:
+            read_img_fn: read_img function
+            use_cache (bool, optional): For the decorated read_img function,
+                whether to read the image from cache.
+                Defaults to True.
+        """
+        @wraps(read_img_fn)
+        def wrapper(self, index, use_cache=use_cache):
+            cache = self.cache and use_cache
+            if cache:
+                if self.cache_type == "ram":
+                    img = self.imgs[index]
+                    img = copy.deepcopy(img)
+                elif self.cache_type == "disk":
+                    img = np.load(
+                        os.path.join(
+                            self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy"))
+                else:
+                    raise ValueError(f"Unknown cache type: {self.cache_type}")
+            else:
+                img = read_img_fn(self, index)
+            return img
+        return wrapper
+    return decorator
diff --git a/yolort/data/datasets/mosaicdetection.py b/yolort/data/datasets/mosaicdetection.py
new file mode 100644
index 00000000..ba11cfdc
--- /dev/null
+++ b/yolort/data/datasets/mosaicdetection.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import random
+
+import cv2
+import numpy as np
+
+from yolort.utils import adjust_box_anns, get_local_rank
+
+from ..data_augment import random_affine
+from .datasets_wrapper import Dataset
+
+
+def get_mosaic_coordinate(mosaic_image, mosaic_index, xc, yc, w, h, input_h, input_w):
+    # TODO update doc
+    # index0 to top left part of image
+    if mosaic_index == 0:
+        x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
+        small_coord = w - (x2 - x1), h - (y2 - y1), w, h
+    # index1 to top right part of image
+    elif mosaic_index == 1:
+        x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
+        small_coord = 0, h - (y2 - y1), min(w, x2 - x1), h
+    # index2 to bottom left part of image
+    elif mosaic_index == 2:
+        x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
+        small_coord = w - (x2 - x1), 0, w, min(y2 - y1, h)
+    # index2 to bottom right part of image
+    elif mosaic_index == 3:
+        x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2, yc + h)  # noqa
+        small_coord = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
+    return (x1, y1, x2, y2), small_coord
+
+
+class MosaicDetection(Dataset):
+    """Detection dataset wrapper that performs mixup for normal dataset."""
+
+    def __init__(
+        self, dataset, img_size, mosaic=True, preproc=None,
+        degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
+        mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,
+        mosaic_prob=1.0, mixup_prob=1.0, *args
+    ):
+        """
+
+        Args:
+            dataset(Dataset) : Pytorch dataset object.
+            img_size (tuple):
+            mosaic (bool): enable mosaic augmentation or not.
+            preproc (func):
+            degrees (float):
+            translate (float):
+            mosaic_scale (tuple):
+            mixup_scale (tuple):
+            shear (float):
+            enable_mixup (bool):
+            *args(tuple) : Additional arguments for mixup random sampler.
+        """
+        super().__init__(img_size, mosaic=mosaic)
+        self._dataset = dataset
+        self.preproc = preproc
+        self.degrees = degrees
+        self.translate = translate
+        self.scale = mosaic_scale
+        self.shear = shear
+        self.mixup_scale = mixup_scale
+        self.enable_mosaic = mosaic
+        self.enable_mixup = enable_mixup
+        self.mosaic_prob = mosaic_prob
+        self.mixup_prob = mixup_prob
+        self.local_rank = get_local_rank()
+
+    def __len__(self):
+        return len(self._dataset)
+
+    @Dataset.mosaic_getitem
+    def __getitem__(self, idx):
+        if self.enable_mosaic and random.random() < self.mosaic_prob:
+            mosaic_labels = []
+            input_dim = self._dataset.input_dim
+            input_h, input_w = input_dim[0], input_dim[1]
+
+            # yc, xc = s, s  # mosaic center x, y
+            yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
+            xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
+
+            # 3 additional image indices
+            indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
+
+            for i_mosaic, index in enumerate(indices):
+                img, _labels, _, img_id = self._dataset.pull_item(index)
+                h0, w0 = img.shape[:2]  # orig hw
+                scale = min(1. * input_h / h0, 1. * input_w / w0)
+                img = cv2.resize(
+                    img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR
+                )
+                # generate output mosaic image
+                (h, w, c) = img.shape[:3]
+                if i_mosaic == 0:
+                    mosaic_img = np.full((input_h * 2, input_w * 2, c), 114, dtype=np.uint8)
+
+                # suffix l means large image, while s means small image in mosaic aug.
+                (l_x1, l_y1, l_x2, l_y2), (s_x1, s_y1, s_x2, s_y2) = get_mosaic_coordinate(
+                    mosaic_img, i_mosaic, xc, yc, w, h, input_h, input_w
+                )
+
+                mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
+                padw, padh = l_x1 - s_x1, l_y1 - s_y1
+
+                labels = _labels.copy()
+                # Normalized xywh to pixel xyxy format
+                if _labels.size > 0:
+                    labels[:, 0] = scale * _labels[:, 0] + padw
+                    labels[:, 1] = scale * _labels[:, 1] + padh
+                    labels[:, 2] = scale * _labels[:, 2] + padw
+                    labels[:, 3] = scale * _labels[:, 3] + padh
+                mosaic_labels.append(labels)
+
+            if len(mosaic_labels):
+                mosaic_labels = np.concatenate(mosaic_labels, 0)
+                np.clip(mosaic_labels[:, 0], 0, 2 * input_w, out=mosaic_labels[:, 0])
+                np.clip(mosaic_labels[:, 1], 0, 2 * input_h, out=mosaic_labels[:, 1])
+                np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])
+                np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])
+
+            mosaic_img, mosaic_labels = random_affine(
+                mosaic_img,
+                mosaic_labels,
+                target_size=(input_w, input_h),
+                degrees=self.degrees,
+                translate=self.translate,
+                scales=self.scale,
+                shear=self.shear,
+            )
+
+            # -----------------------------------------------------------------
+            # CopyPaste: https://arxiv.org/abs/2012.07177
+            # -----------------------------------------------------------------
+            if (
+                self.enable_mixup
+                and not len(mosaic_labels) == 0
+                and random.random() < self.mixup_prob
+            ):
+                mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)
+            mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)
+            img_info = (mix_img.shape[1], mix_img.shape[0])
+
+            # -----------------------------------------------------------------
+            # img_info and img_id are not used for training.
+            # They are also hard to be specified on a mosaic image.
+            # -----------------------------------------------------------------
+            return mix_img, padded_labels, img_info, img_id
+
+        else:
+            self._dataset._input_dim = self.input_dim
+            img, label, img_info, img_id = self._dataset.pull_item(idx)
+            img, label = self.preproc(img, label, self.input_dim)
+            return img, label, img_info, img_id
+
+    def mixup(self, origin_img, origin_labels, input_dim):
+        jit_factor = random.uniform(*self.mixup_scale)
+        FLIP = random.uniform(0, 1) > 0.5
+        cp_labels = []
+        while len(cp_labels) == 0:
+            cp_index = random.randint(0, self.__len__() - 1)
+            cp_labels = self._dataset.load_anno(cp_index)
+        img, cp_labels, _, _ = self._dataset.pull_item(cp_index)
+
+        if len(img.shape) == 3:
+            cp_img = np.ones((input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
+        else:
+            cp_img = np.ones(input_dim, dtype=np.uint8) * 114
+
+        cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])
+        resized_img = cv2.resize(
+            img,
+            (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
+            interpolation=cv2.INTER_LINEAR,
+        )
+
+        cp_img[
+            : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)
+        ] = resized_img
+
+        cp_img = cv2.resize(
+            cp_img,
+            (int(cp_img.shape[1] * jit_factor), int(cp_img.shape[0] * jit_factor)),
+        )
+        cp_scale_ratio *= jit_factor
+
+        if FLIP:
+            cp_img = cp_img[:, ::-1, :]
+
+        origin_h, origin_w = cp_img.shape[:2]
+        target_h, target_w = origin_img.shape[:2]
+        padded_img = np.zeros(
+            (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
+        )
+        padded_img[:origin_h, :origin_w] = cp_img
+
+        x_offset, y_offset = 0, 0
+        if padded_img.shape[0] > target_h:
+            y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
+        if padded_img.shape[1] > target_w:
+            x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
+        padded_cropped_img = padded_img[
+            y_offset: y_offset + target_h, x_offset: x_offset + target_w
+        ]
+
+        cp_bboxes_origin_np = adjust_box_anns(
+            cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h
+        )
+        if FLIP:
+            cp_bboxes_origin_np[:, 0::2] = (
+                origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1]
+            )
+        cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
+        cp_bboxes_transformed_np[:, 0::2] = np.clip(
+            cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w
+        )
+        cp_bboxes_transformed_np[:, 1::2] = np.clip(
+            cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
+        )
+
+        cls_labels = cp_labels[:, 4:5].copy()
+        box_labels = cp_bboxes_transformed_np
+        labels = np.hstack((box_labels, cls_labels))
+        origin_labels = np.vstack((origin_labels, labels))
+        origin_img = origin_img.astype(np.float32)
+        origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
+
+        return origin_img.astype(np.uint8), origin_labels
diff --git a/yolort/data/samplers.py b/yolort/data/samplers.py
new file mode 100644
index 00000000..6b7ea38d
--- /dev/null
+++ b/yolort/data/samplers.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import itertools
+from typing import Optional
+
+import torch
+import torch.distributed as dist
+from torch.utils.data.sampler import BatchSampler as torchBatchSampler
+from torch.utils.data.sampler import Sampler
+
+
+class YoloBatchSampler(torchBatchSampler):
+    """
+    This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.
+    It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
+    but it will turn on/off the mosaic aug.
+    """
+
+    def __init__(self, *args, mosaic=True, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.mosaic = mosaic
+
+    def __iter__(self):
+        for batch in super().__iter__():
+            yield [(self.mosaic, idx) for idx in batch]
+
+
+class InfiniteSampler(Sampler):
+    """
+    In training, we only care about the "infinite stream" of training data.
+    So this sampler produces an infinite stream of indices and
+    all workers cooperate to correctly shuffle the indices and sample different indices.
+    The samplers in each worker effectively produces `indices[worker_id::num_workers]`
+    where `indices` is an infinite stream of indices consisting of
+    `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
+    or `range(size) + range(size) + ...` (if shuffle is False)
+    """
+
+    def __init__(
+        self,
+        size: int,
+        shuffle: bool = True,
+        seed: Optional[int] = 0,
+        rank=0,
+        world_size=1,
+    ):
+        """
+        Args:
+            size (int): the total number of data of the underlying dataset to sample from
+            shuffle (bool): whether to shuffle the indices or not
+            seed (int): the initial seed of the shuffle. Must be the same
+                across all workers. If None, will use a random seed shared
+                among workers (require synchronization among all workers).
+        """
+        self._size = size
+        assert size > 0
+        self._shuffle = shuffle
+        self._seed = int(seed)
+
+        if dist.is_available() and dist.is_initialized():
+            self._rank = dist.get_rank()
+            self._world_size = dist.get_world_size()
+        else:
+            self._rank = rank
+            self._world_size = world_size
+
+    def __iter__(self):
+        start = self._rank
+        yield from itertools.islice(
+            self._infinite_indices(), start, None, self._world_size
+        )
+
+    def _infinite_indices(self):
+        g = torch.Generator()
+        g.manual_seed(self._seed)
+        while True:
+            if self._shuffle:
+                yield from torch.randperm(self._size, generator=g)
+            else:
+                yield from torch.arange(self._size)
+
+    def __len__(self):
+        return self._size // self._world_size
diff --git a/yolort/evaluators/__init__.py b/yolort/evaluators/__init__.py
new file mode 100644
index 00000000..fc0b6875
--- /dev/null
+++ b/yolort/evaluators/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+from .coco_evaluator import COCOEvaluator
\ No newline at end of file
diff --git a/yolort/evaluators/coco_evaluator.py b/yolort/evaluators/coco_evaluator.py
new file mode 100644
index 00000000..a97c6d41
--- /dev/null
+++ b/yolort/evaluators/coco_evaluator.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import contextlib
+import io
+import itertools
+import json
+import tempfile
+import time
+from collections import ChainMap, defaultdict
+from loguru import logger
+from tabulate import tabulate
+from tqdm import tqdm
+
+import numpy as np
+
+import torch
+
+from yolort.data.datasets import COCO_CLASSES
+from yolort.utils import (
+    gather,
+    is_main_process,
+    postprocess,
+    synchronize,
+    time_synchronized,
+    xyxy2xywh
+)
+
+
+def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AR"], colums=6):
+    per_class_AR = {}
+    recalls = coco_eval.eval["recall"]
+    # dimension of recalls: [TxKxAxM]
+    # recall has dims (iou, cls, area range, max dets)
+    assert len(class_names) == recalls.shape[1]
+
+    for idx, name in enumerate(class_names):
+        recall = recalls[:, idx, 0, -1]
+        recall = recall[recall > -1]
+        ar = np.mean(recall) if recall.size else float("nan")
+        per_class_AR[name] = float(ar * 100)
+
+    num_cols = min(colums, len(per_class_AR) * len(headers))
+    result_pair = [x for pair in per_class_AR.items() for x in pair]
+    row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
+    table_headers = headers * (num_cols // len(headers))
+    table = tabulate(
+        row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+    )
+    return table
+
+
+def per_class_AP_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AP"], colums=6):
+    per_class_AP = {}
+    precisions = coco_eval.eval["precision"]
+    # dimension of precisions: [TxRxKxAxM]
+    # precision has dims (iou, recall, cls, area range, max dets)
+    assert len(class_names) == precisions.shape[2]
+
+    for idx, name in enumerate(class_names):
+        # area range index 0: all area ranges
+        # max dets index -1: typically 100 per image
+        precision = precisions[:, :, idx, 0, -1]
+        precision = precision[precision > -1]
+        ap = np.mean(precision) if precision.size else float("nan")
+        per_class_AP[name] = float(ap * 100)
+
+    num_cols = min(colums, len(per_class_AP) * len(headers))
+    result_pair = [x for pair in per_class_AP.items() for x in pair]
+    row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
+    table_headers = headers * (num_cols // len(headers))
+    table = tabulate(
+        row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+    )
+    return table
+
+
+class COCOEvaluator:
+    """
+    COCO AP Evaluation class.  All the data in the val2017 dataset are processed
+    and evaluated by COCO API.
+    """
+
+    def __init__(
+        self,
+        dataloader,
+        img_size: int,
+        confthre: float,
+        nmsthre: float,
+        num_classes: int,
+        testdev: bool = False,
+        per_class_AP: bool = True,
+        per_class_AR: bool = True,
+    ):
+        """
+        Args:
+            dataloader (Dataloader): evaluate dataloader.
+            img_size: image size after preprocess. images are resized
+                to squares whose shape is (img_size, img_size).
+            confthre: confidence threshold ranging from 0 to 1, which
+                is defined in the config file.
+            nmsthre: IoU threshold of non-max supression ranging from 0 to 1.
+            per_class_AP: Show per class AP during evalution or not. Default to True.
+            per_class_AR: Show per class AR during evalution or not. Default to True.
+        """
+        self.dataloader = dataloader
+        self.img_size = img_size
+        self.confthre = confthre
+        self.nmsthre = nmsthre
+        self.num_classes = num_classes
+        self.testdev = testdev
+        self.per_class_AP = per_class_AP
+        self.per_class_AR = per_class_AR
+
+    def evaluate(
+        self, model, distributed=False, half=False, trt_file=None,
+        decoder=None, test_size=None, return_outputs=False
+    ):
+        """
+        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
+        and the results are evaluated by COCO API.
+
+        NOTE: This function will change training mode to False, please save states if needed.
+
+        Args:
+            model : model to evaluate.
+
+        Returns:
+            ap50_95 (float) : COCO AP of IoU=50:95
+            ap50 (float) : COCO AP of IoU=50
+            summary (sr): summary info of evaluation.
+        """
+        # TODO half to amp_test
+        tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
+        model = model.eval()
+        if half:
+            model = model.half()
+        ids = []
+        data_list = []
+        output_data = defaultdict()
+        progress_bar = tqdm if is_main_process() else iter
+
+        inference_time = 0
+        nms_time = 0
+        n_samples = max(len(self.dataloader) - 1, 1)
+
+        if trt_file is not None:
+            from torch2trt import TRTModule
+
+            model_trt = TRTModule()
+            model_trt.load_state_dict(torch.load(trt_file))
+
+            x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
+            model(x)
+            model = model_trt
+
+        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
+            progress_bar(self.dataloader)
+        ):
+            with torch.no_grad():
+                imgs = imgs.type(tensor_type)
+
+                # skip the last iters since batchsize might be not enough for batch inference
+                is_time_record = cur_iter < len(self.dataloader) - 1
+                if is_time_record:
+                    start = time.time()
+
+                outputs = model(imgs)
+                if decoder is not None:
+                    outputs = decoder(outputs, dtype=outputs.type())
+
+                if is_time_record:
+                    infer_end = time_synchronized()
+                    inference_time += infer_end - start
+
+                outputs = postprocess(
+                    outputs, self.num_classes, self.confthre, self.nmsthre
+                )
+                if is_time_record:
+                    nms_end = time_synchronized()
+                    nms_time += nms_end - infer_end
+
+            data_list_elem, image_wise_data = self.convert_to_coco_format(
+                outputs, info_imgs, ids, return_outputs=True)
+            data_list.extend(data_list_elem)
+            output_data.update(image_wise_data)
+
+        statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples])
+        if distributed:
+            # different process/device might have different speed,
+            # to make sure the process will not be stucked, sync func is used here.
+            synchronize()
+            data_list = gather(data_list, dst=0)
+            output_data = gather(output_data, dst=0)
+            data_list = list(itertools.chain(*data_list))
+            output_data = dict(ChainMap(*output_data))
+            torch.distributed.reduce(statistics, dst=0)
+
+        eval_results = self.evaluate_prediction(data_list, statistics)
+        synchronize()
+
+        if return_outputs:
+            return eval_results, output_data
+        return eval_results
+
+    def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False):
+        data_list = []
+        image_wise_data = defaultdict(dict)
+        for (output, img_h, img_w, img_id) in zip(
+            outputs, info_imgs[0], info_imgs[1], ids
+        ):
+            if output is None:
+                continue
+            output = output.cpu()
+
+            bboxes = output[:, 0:4]
+
+            # preprocessing: resize
+            scale = min(
+                self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)
+            )
+            bboxes /= scale
+            cls = output[:, 6]
+            scores = output[:, 4] * output[:, 5]
+
+            image_wise_data.update({
+                int(img_id): {
+                    "bboxes": [box.numpy().tolist() for box in bboxes],
+                    "scores": [score.numpy().item() for score in scores],
+                    "categories": [
+                        self.dataloader.dataset.class_ids[int(cls[ind])]
+                        for ind in range(bboxes.shape[0])
+                    ],
+                }
+            })
+
+            bboxes = xyxy2xywh(bboxes)
+
+            for ind in range(bboxes.shape[0]):
+                label = self.dataloader.dataset.class_ids[int(cls[ind])]
+                pred_data = {
+                    "image_id": int(img_id),
+                    "category_id": label,
+                    "bbox": bboxes[ind].numpy().tolist(),
+                    "score": scores[ind].numpy().item(),
+                    "segmentation": [],
+                }  # COCO json format
+                data_list.append(pred_data)
+
+        if return_outputs:
+            return data_list, image_wise_data
+        return data_list
+
+    def evaluate_prediction(self, data_dict, statistics):
+        if not is_main_process():
+            return 0, 0, None
+
+        logger.info("Evaluate in main process...")
+
+        annType = ["segm", "bbox", "keypoints"]
+
+        inference_time = statistics[0].item()
+        nms_time = statistics[1].item()
+        n_samples = statistics[2].item()
+
+        a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)
+        a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size)
+
+        time_info = ", ".join(
+            [
+                "Average {} time: {:.2f} ms".format(k, v)
+                for k, v in zip(
+                    ["forward", "NMS", "inference"],
+                    [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)],
+                )
+            ]
+        )
+
+        info = time_info + "\n"
+
+        # Evaluate the Dt (detection) json comparing with the ground truth
+        if len(data_dict) > 0:
+            cocoGt = self.dataloader.dataset.coco
+            # TODO: since pycocotools can't process dict in py36, write data to json file.
+            if self.testdev:
+                json.dump(data_dict, open("./yolox_testdev_2017.json", "w"))
+                cocoDt = cocoGt.loadRes("./yolox_testdev_2017.json")
+            else:
+                _, tmp = tempfile.mkstemp()
+                json.dump(data_dict, open(tmp, "w"))
+                cocoDt = cocoGt.loadRes(tmp)
+            try:
+                from yolox.layers import COCOeval_opt as COCOeval
+            except ImportError:
+                from pycocotools.cocoeval import COCOeval
+
+                logger.warning("Use standard COCOeval.")
+
+            cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
+            cocoEval.evaluate()
+            cocoEval.accumulate()
+            redirect_string = io.StringIO()
+            with contextlib.redirect_stdout(redirect_string):
+                cocoEval.summarize()
+            info += redirect_string.getvalue()
+            cat_ids = list(cocoGt.cats.keys())
+            cat_names = [cocoGt.cats[catId]['name'] for catId in sorted(cat_ids)]
+            if self.per_class_AP:
+                AP_table = per_class_AP_table(cocoEval, class_names=cat_names)
+                info += "per class AP:\n" + AP_table + "\n"
+            if self.per_class_AR:
+                AR_table = per_class_AR_table(cocoEval, class_names=cat_names)
+                info += "per class AR:\n" + AR_table + "\n"
+            return cocoEval.stats[0], cocoEval.stats[1], info
+        else:
+            return 0, 0, info
\ No newline at end of file
diff --git a/yolort/exp/__init__.py b/yolort/exp/__init__.py
new file mode 100644
index 00000000..d7de27c8
--- /dev/null
+++ b/yolort/exp/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+from .base_exp import BaseExp
+from .yolox_base import Exp
\ No newline at end of file
diff --git a/yolort/exp/base_exp.py b/yolort/exp/base_exp.py
new file mode 100644
index 00000000..c0ae45fe
--- /dev/null
+++ b/yolort/exp/base_exp.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import ast
+import pprint
+from abc import ABCMeta, abstractmethod
+from typing import Dict, List, Tuple
+from tabulate import tabulate
+
+import torch
+from torch.nn import Module
+
+from yolort.utils import LRScheduler
+
+
+class BaseExp(metaclass=ABCMeta):
+    """Basic class for any experiment."""
+
+    def __init__(self):
+        self.seed = None
+        self.output_dir = "./"
+        self.print_interval = 100
+        self.eval_interval = 10
+        self.dataset = None
+
+    @abstractmethod
+    def get_model(self) -> Module:
+        pass
+
+    @abstractmethod
+    def get_dataset(self, cache: bool = False, cache_type: str = "ram"):
+        pass
+
+    @abstractmethod
+    def get_data_loader(
+        self, batch_size: int, is_distributed: bool
+    ) -> Dict[str, torch.utils.data.DataLoader]:
+        pass
+
+    @abstractmethod
+    def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
+        pass
+
+    @abstractmethod
+    def get_lr_scheduler(
+        self, lr: float, iters_per_epoch: int, **kwargs
+    ) -> LRScheduler:
+        pass
+
+    @abstractmethod
+    def get_evaluator(self):
+        pass
+
+    @abstractmethod
+    def eval(self, model, evaluator, weights):
+        pass
+
+    def __repr__(self):
+        table_header = ["keys", "values"]
+        exp_table = [
+            (str(k), pprint.pformat(v))
+            for k, v in vars(self).items()
+            if not k.startswith("_")
+        ]
+        return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
+
+    def merge(self, cfg_list):
+        assert len(cfg_list) % 2 == 0, f"length must be even, check value here: {cfg_list}"
+        for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
+            # only update value with same key
+            if hasattr(self, k):
+                src_value = getattr(self, k)
+                src_type = type(src_value)
+
+                # pre-process input if source type is list or tuple
+                if isinstance(src_value, (List, Tuple)):
+                    v = v.strip("[]()")
+                    v = [t.strip() for t in v.split(",")]
+
+                    # find type of tuple
+                    if len(src_value) > 0:
+                        src_item_type = type(src_value[0])
+                        v = [src_item_type(t) for t in v]
+
+                if src_value is not None and src_type != type(v):
+                    try:
+                        v = src_type(v)
+                    except Exception:
+                        v = ast.literal_eval(v)
+                setattr(self, k, v)
diff --git a/yolort/exp/default/__init__.py b/yolort/exp/default/__init__.py
new file mode 100644
index 00000000..1f361d78
--- /dev/null
+++ b/yolort/exp/default/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+# This file is used for package installation and find default exp file
+
+import sys
+from importlib import abc, util
+from pathlib import Path
+
+_EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / "exps" / "default"
+
+if _EXP_PATH.is_dir():
+    # This is true only for in-place installation (pip install -e, setup.py develop),
+    # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
+
+    class _ExpFinder(abc.MetaPathFinder):
+        
+        def find_spec(self, name, path, target=None):
+            if not name.startswith("yolort.exp.default"):
+                return
+            project_name = name.split(".")[-1] + ".py"
+            target_file = _EXP_PATH / project_name
+            if not target_file.is_file():
+                return
+            return util.spec_from_file_location(name, target_file)
+
+    sys.meta_path.append(_ExpFinder())
diff --git a/yolort/exp/yolox_base.py b/yolort/exp/yolox_base.py
new file mode 100644
index 00000000..f3147743
--- /dev/null
+++ b/yolort/exp/yolox_base.py
@@ -0,0 +1,387 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import os
+import random
+import logging
+from zipfile import ZipFile
+from pathlib import Path, PosixPath
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+
+from .base_exp import BaseExp
+
+__all__ = ["Exp"]
+
+
+class Exp(BaseExp):
+    def __init__(self):
+        super().__init__()
+
+        # ---------------- model config ---------------- #
+        # detect classes number of model
+        self.num_classes = 80
+        # factor of model depth
+        self.depth = 1.00
+        # factor of model width
+        self.width = 1.00
+        # activation name. For example, if using "relu", then "silu" will be replaced to "relu".
+        self.act = "silu"
+
+        # ---------------- dataloader config ---------------- #
+        # set worker to 4 for shorter dataloader init time
+        # If your training process cost many memory, reduce this value.
+        self.data_num_workers = 4
+        self.input_size = (640, 640)  # (height, width)
+        # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32].
+        # To disable multiscale training, set the value to 0.
+        self.multiscale_range = 5
+        # You can uncomment this line to specify a multiscale range
+        # self.random_size = (14, 26)
+        # dir of dataset images, if data_dir is None, this project will use `datasets` dir
+        self.data_dir = None
+        # name of annotation file for training
+        self.train_ann = "instances_train2017.json"
+        # name of annotation file for evaluation
+        self.val_ann = "instances_val2017.json"
+        # name of annotation file for testing
+        self.test_ann = "instances_test2017.json"
+
+        # --------------- transform config ----------------- #
+        # prob of applying mosaic aug
+        self.mosaic_prob = 1.0
+        # prob of applying mixup aug
+        self.mixup_prob = 1.0
+        # prob of applying hsv aug
+        self.hsv_prob = 1.0
+        # prob of applying flip aug
+        self.flip_prob = 0.5
+        # rotation angle range, for example, if set to 2, the true range is (-2, 2)
+        self.degrees = 10.0
+        # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1)
+        self.translate = 0.1
+        self.mosaic_scale = (0.1, 2)
+        # apply mixup aug or not
+        self.enable_mixup = True
+        self.mixup_scale = (0.5, 1.5)
+        # shear angle range, for example, if set to 2, the true range is (-2, 2)
+        self.shear = 2.0
+
+        # --------------  training config --------------------- #
+        # epoch number used for warmup
+        self.warmup_epochs = 5
+        # max training epoch
+        self.max_epoch = 300
+        # minimum learning rate during warmup
+        self.warmup_lr = 0
+        self.min_lr_ratio = 0.05
+        # learning rate for one image. During training, lr will multiply batchsize.
+        self.basic_lr_per_img = 0.01 / 64.0
+        # name of LRScheduler
+        self.scheduler = "yoloxwarmcos"
+        # last #epoch to close augmention like mosaic
+        self.no_aug_epochs = 15
+        # apply EMA during training
+        self.ema = True
+
+        # weight decay of optimizer
+        self.weight_decay = 5e-4
+        # momentum of optimizer
+        self.momentum = 0.9
+        # log period in iter, for example,
+        # if set to 1, user could see log every iteration.
+        self.print_interval = 10
+        # eval period in epoch, for example,
+        # if set to 1, model will be evaluate after every epoch.
+        self.eval_interval = 10
+        # save history checkpoint or not.
+        # If set to False, yolox will only save latest and best ckpt.
+        self.save_history_ckpt = True
+        # name of experiment
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+
+        # -----------------  testing config ------------------ #
+        # output image size during evaluation/test
+        self.test_size = (640, 640)
+        # confidence threshold during evaluation/test,
+        # boxes whose scores are less than test_conf will be filtered
+        self.test_conf = 0.01
+        # nms threshold
+        self.nmsthre = 0.65
+
+    def get_model(self):
+        import yolort.models as models
+
+        self.model = models.__dict__['yolov5n'](upstream_version="r6.0", )
+        self.model.train()
+        return self.model
+
+    def get_dataset(self, data_root: str, mode: str = "val", cache: bool = False, cache_type: str = "ram"):
+        # Acquire the images and labels from the coco128 dataset
+        data_path = Path(data_root)
+        coco128_dirname = "coco128"
+        coco128_path = data_path / coco128_dirname
+        image_root = coco128_path / "images" / "train2017"
+        annotation_file = coco128_path / "annotations" / "instances_train2017.json"
+
+        from yolort.data import COCODataset, TrainTransform
+
+        if not annotation_file.is_file():
+            self.prepare_coco128(data_path, dirname=coco128_dirname)
+
+        if mode == "train":
+            dataset = COCODataset(
+                data_dir=self.data_dir,
+                json_file=self.train_ann,
+                img_size=self.input_size,
+                preproc=TrainTransform(
+                    max_labels=50,
+                    flip_prob=self.flip_prob,
+                    hsv_prob=self.hsv_prob
+                ),
+                cache=cache,
+                cache_type=cache_type,
+            )
+        elif mode == "val":
+            """ TODO """
+            dataset = COCODataset(
+                data_dir=self.data_dir,
+                json_file=self.train_ann,
+                img_size=self.input_size,
+                preproc=TrainTransform(
+                    max_labels=50,
+                    flip_prob=self.flip_prob,
+                    hsv_prob=self.hsv_prob
+                ),
+                cache=cache,
+                cache_type=cache_type,
+            )
+        else:
+            raise NotImplementedError(f"Currently not supports mode {mode}")
+
+        return dataset
+
+    def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: str = None):
+        """
+        Get dataloader according to cache_img parameter.
+        Args:
+            no_aug (bool, optional): Whether to turn off mosaic data enhancement. Defaults to False.
+            cache_img (str, optional): cache_img is equivalent to cache_type. Defaults to None.
+                "ram" : Caching imgs to ram for fast training.
+                "disk": Caching imgs to disk for fast training.
+                None: Do not use cache, in this case cache_data is also None.
+        """
+        from yolort.data import (
+            TrainTransform,
+            YoloBatchSampler,
+            DataLoader,
+            InfiniteSampler,
+            MosaicDetection,
+            worker_init_reset_seed,
+        )
+        from yolort.utils import wait_for_the_master
+
+        # if cache is True, we will create dataset before launch
+        # else we will create dataset after launch
+        if self.dataset is None:
+            with wait_for_the_master():
+                assert cache_img is None, \
+                    "cache_img must be None if you didn't create dataset before launch"
+                self.dataset = self.get_dataset(data_root="data-bin", mode="train", cache=False, cache_type=cache_img)
+
+        self.dataset = MosaicDetection(
+            dataset=self.dataset,
+            mosaic=not no_aug,
+            img_size=self.input_size,
+            preproc=TrainTransform(
+                max_labels=120,
+                flip_prob=self.flip_prob,
+                hsv_prob=self.hsv_prob),
+            degrees=self.degrees,
+            translate=self.translate,
+            mosaic_scale=self.mosaic_scale,
+            mixup_scale=self.mixup_scale,
+            shear=self.shear,
+            enable_mixup=self.enable_mixup,
+            mosaic_prob=self.mosaic_prob,
+            mixup_prob=self.mixup_prob,
+        )
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+
+        sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
+
+        batch_sampler = YoloBatchSampler(
+            sampler=sampler,
+            batch_size=batch_size,
+            drop_last=False,
+            mosaic=not no_aug,
+        )
+
+        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
+        dataloader_kwargs["batch_sampler"] = batch_sampler
+
+        # Make sure each process has different random seed, especially for 'fork' method.
+        # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
+        dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
+
+        train_loader = DataLoader(self.dataset, **dataloader_kwargs)
+
+        return train_loader
+
+    def prepare_coco128(self,
+            data_path: PosixPath,
+            dirname: str = "coco128",
+    ) -> None:
+        """
+        Prepare coco128 dataset to test.
+
+        Args:
+            data_path (PosixPath): root path of coco128 dataset.
+            dirname (str): the directory name of coco128 dataset. Default: 'coco128'.
+        """
+        logger = logging.getLogger(__name__)
+
+        if not data_path.is_dir():
+            logger.info(f"Create a new directory: {data_path}")
+            data_path.mkdir(parents=True, exist_ok=True)
+
+        zip_path = data_path / "coco128.zip"
+        coco128_url = "https://github.com/zhiqwang/yolort/releases/download/v0.3.0/coco128.zip"
+        if not zip_path.is_file():
+            logger.info(f"Downloading coco128 datasets form {coco128_url}")
+            torch.hub.download_url_to_file(coco128_url, zip_path, hash_prefix="a67d2887")
+
+        coco128_path = data_path / dirname
+        if not coco128_path.is_dir():
+            logger.info(f"Unzipping dataset to {coco128_path}")
+            with ZipFile(zip_path, "r") as zip_obj:
+                zip_obj.extractall(data_path)
+
+    def random_resize(self, data_loader, epoch, rank, is_distributed):
+        tensor = torch.LongTensor(2).cuda()
+
+        if rank == 0:
+            size_factor = self.input_size[1] * 1.0 / self.input_size[0]
+            if not hasattr(self, 'random_size'):
+                min_size = int(self.input_size[0] / 32) - self.multiscale_range
+                max_size = int(self.input_size[0] / 32) + self.multiscale_range
+                self.random_size = (min_size, max_size)
+            size = random.randint(*self.random_size)
+            size = (int(32 * size), 32 * int(size * size_factor))
+            tensor[0] = size[0]
+            tensor[1] = size[1]
+
+        if is_distributed:
+            dist.barrier()
+            dist.broadcast(tensor, 0)
+
+        input_size = (tensor[0].item(), tensor[1].item())
+        return input_size
+
+    def preprocess(self, inputs, targets, tsize):
+        scale_y = tsize[0] / self.input_size[0]
+        scale_x = tsize[1] / self.input_size[1]
+        if scale_x != 1 or scale_y != 1:
+            inputs = nn.functional.interpolate(
+                inputs, size=tsize, mode="bilinear", align_corners=False
+            )
+            targets[..., 1::2] = targets[..., 1::2] * scale_x
+            targets[..., 2::2] = targets[..., 2::2] * scale_y
+        return inputs, targets
+
+    def get_optimizer(self, batch_size):
+        if "optimizer" not in self.__dict__:
+            if self.warmup_epochs > 0:
+                lr = self.warmup_lr
+            else:
+                lr = self.basic_lr_per_img * batch_size
+
+            pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
+
+            for k, v in self.model.named_modules():
+                if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
+                    pg2.append(v.bias)  # biases
+                if isinstance(v, nn.BatchNorm2d) or "bn" in k:
+                    pg0.append(v.weight)  # no decay
+                elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
+                    pg1.append(v.weight)  # apply decay
+
+            optimizer = torch.optim.SGD(
+                pg0, lr=lr, momentum=self.momentum, nesterov=True
+            )
+            optimizer.add_param_group(
+                {"params": pg1, "weight_decay": self.weight_decay}
+            )  # add pg1 with weight_decay
+            optimizer.add_param_group({"params": pg2})
+            self.optimizer = optimizer
+
+        return self.optimizer
+
+    def get_lr_scheduler(self, lr, iters_per_epoch):
+        from yolort.utils import LRScheduler
+
+        scheduler = LRScheduler(
+            self.scheduler,
+            lr,
+            iters_per_epoch,
+            self.max_epoch,
+            warmup_epochs=self.warmup_epochs,
+            warmup_lr_start=self.warmup_lr,
+            no_aug_epochs=self.no_aug_epochs,
+            min_lr_ratio=self.min_lr_ratio,
+        )
+        return scheduler
+
+    def get_eval_dataset(self, **kwargs):
+        from yolort.data import COCODataset, ValTransform
+        testdev = kwargs.get("testdev", False)
+        legacy = kwargs.get("legacy", False)
+
+        return COCODataset(
+            data_dir=self.data_dir,
+            json_file=self.train_ann,                          # 这里需要改为
+            name="train2017" if not testdev else "train2017",  # 测试数据
+            img_size=self.test_size,
+            preproc=ValTransform(legacy=legacy),
+        )
+
+    def get_eval_loader(self, batch_size, is_distributed, **kwargs):
+        valdataset = self.get_eval_dataset(**kwargs)
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+            sampler = torch.utils.data.distributed.DistributedSampler(
+                valdataset, shuffle=False
+            )
+        else:
+            sampler = torch.utils.data.SequentialSampler(valdataset)
+
+        dataloader_kwargs = {
+            "num_workers": self.data_num_workers,
+            "pin_memory": True,
+            "sampler": sampler,
+        }
+        dataloader_kwargs["batch_size"] = batch_size
+        val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
+
+        return val_loader
+
+    def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
+        from yolort.evaluators import COCOEvaluator
+
+        return COCOEvaluator(
+            dataloader=self.get_eval_loader(batch_size, is_distributed,
+                                            testdev=testdev, legacy=legacy),
+            img_size=self.test_size,
+            confthre=self.test_conf,
+            nmsthre=self.nmsthre,
+            num_classes=self.num_classes,
+            testdev=testdev,
+        )
+
+    def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False):
+        return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)
\ No newline at end of file
diff --git a/yolort/trainer/__init__.py b/yolort/trainer/__init__.py
index 34724c90..ba0e63aa 100644
--- a/yolort/trainer/__init__.py
+++ b/yolort/trainer/__init__.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2021, yolort team. All rights reserved.
 
-from .lightning_task import DefaultTask
+from .trainer import Trainer
 
-__all__ = ["DefaultTask"]
+__all__ = ["Trainer"]
diff --git a/yolort/trainer/lightning_task.py b/yolort/trainer/lightning_task.py
deleted file mode 100644
index c8cec1e1..00000000
--- a/yolort/trainer/lightning_task.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright (c) 2021, yolort team. All rights reserved.
-
-import argparse
-from pathlib import PosixPath
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-import torch
-import yolort.models as models
-from pytorch_lightning import LightningModule
-from torch import Tensor
-from torchvision.ops import box_iou
-from yolort.data.coco_eval import COCOEvaluator
-
-
-__all__ = ["DefaultTask"]
-
-
-def _evaluate_iou(target, pred):
-    """
-    Evaluate intersection over union (IOU) for target from dataset and
-    output prediction from model
-    """
-    if pred["boxes"].shape[0] == 0:
-        # no box detected, 0 IOU
-        return torch.tensor(0.0, device=pred["boxes"].device)
-    return box_iou(target["boxes"], pred["boxes"]).diag().mean()
-
-
-class DefaultTask(LightningModule):
-    """
-    Wrapping the trainer into the YOLOv5 Module.
-
-    Args:
-        arch (string): YOLOv5 model architecture. Default: 'yolov5s'
-        version (str): model released by the upstream YOLOv5. Possible values
-            are ['r6.0']. Default: 'r6.0'.
-        lr (float): The initial learning rate
-        annotation_path (Optional[Union[string, PosixPath]]): Path of the COCO annotation file
-            Default: None.
-    """
-
-    def __init__(
-        self,
-        arch: str = "yolov5s",
-        version: str = "r6.0",
-        lr: float = 0.01,
-        annotation_path: Optional[Union[str, PosixPath]] = None,
-        **kwargs: Any,
-    ) -> None:
-
-        super().__init__()
-
-        self.model = models.__dict__[arch](upstream_version=version, **kwargs)
-        self.lr = lr
-
-        # evaluators for validation datasets
-        self.evaluator = None
-        if annotation_path is not None:
-            self.evaluator = COCOEvaluator(annotation_path, iou_type="bbox")
-
-        # used only on torchscript mode
-        self._has_warned = False
-
-    def forward(
-        self,
-        inputs: List[Tensor],
-        targets: Optional[List[Dict[str, Tensor]]] = None,
-    ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]:
-        """
-        This exists since PyTorchLightning forward are used for inference only (separate from
-        ``training_step``). We keep ``targets`` here for Backward Compatible.
-        """
-        return self.model(inputs, targets)
-
-    def training_step(self, batch, batch_idx):
-        """
-        The training step.
-        """
-        loss_dict = self.model(*batch)
-        loss = sum(loss_dict.values())
-        self.log_dict(loss_dict, on_step=True, on_epoch=True, prog_bar=True)
-        return loss
-
-    def validation_step(self, batch, batch_idx):
-        images, targets = batch
-        # fasterrcnn takes only images for eval() mode
-        preds = self.model(images)
-        iou = torch.stack([_evaluate_iou(t, o) for t, o in zip(targets, preds)]).mean()
-        outs = {"val_iou": iou}
-        self.log_dict(outs, on_step=True, on_epoch=True, prog_bar=True)
-        return outs
-
-    def validation_epoch_end(self, outs):
-        avg_iou = torch.stack([o["val_iou"] for o in outs]).mean()
-        self.log("avg_val_iou", avg_iou)
-
-    def test_step(self, batch, batch_idx):
-        """
-        The test step.
-        """
-        images, targets = batch
-        images = list(image.to(next(self.parameters()).device) for image in images)
-        preds = self.model(images)
-        results = self.evaluator(preds, targets)
-        # log step metric
-        self.log("eval_step", results, prog_bar=True, on_step=True)
-
-    def test_epoch_end(self, outputs):
-        return self.log("coco_eval", self.evaluator.compute())
-
-    def configure_optimizers(self):
-        return torch.optim.SGD(
-            self.model.parameters(),
-            lr=self.lr,
-            momentum=0.9,
-            weight_decay=5e-4,
-        )
-
-    @staticmethod
-    def add_model_specific_args(parent_parser):
-        parser = argparse.ArgumentParser(parents=[parent_parser], add_help=False)
-        parser.add_argument("--arch", default="yolov5_darknet_pan_s_r40", help="model architecture")
-        parser.add_argument(
-            "--pretrained",
-            action="store_true",
-            help="Use pre-trained models from the modelzoo",
-        )
-        parser.add_argument(
-            "--lr",
-            default=0.01,
-            type=float,
-            help="initial learning rate, 0.01 is the default value for training "
-            "on 8 gpus and 2 images_per_gpu",
-        )
-        parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
-        parser.add_argument(
-            "--weight-decay",
-            default=5e-4,
-            type=float,
-            metavar="W",
-            help="weight decay (default: 5e-4)",
-        )
-        return parser
diff --git a/yolort/trainer/trainer.py b/yolort/trainer/trainer.py
new file mode 100644
index 00000000..28f1fbe1
--- /dev/null
+++ b/yolort/trainer/trainer.py
@@ -0,0 +1,392 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import datetime
+import os
+import time
+from loguru import logger
+
+import torch
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.tensorboard import SummaryWriter
+
+from yolort.data import DataPrefetcher
+from yolort.exp import Exp
+from yolort.utils import (
+    MeterBuffer,
+    ModelEMA,
+    WandbLogger,
+    adjust_status,
+    all_reduce_norm,
+    get_local_rank,
+    get_model_info,
+    get_rank,
+    get_world_size,
+    gpu_mem_usage,
+    is_parallel,
+    load_ckpt,
+    mem_usage,
+    occupy_mem,
+    save_checkpoint,
+    setup_logger,
+    synchronize
+)
+
+__all__ = ["Trainer"]
+
+class Trainer:
+    def __init__(self, exp: Exp, args):
+        # init function only defines some basic attr, other attrs like model, optimizer are built in
+        # before_train methods.
+        self.exp = exp
+        self.args = args
+
+        # training related attr
+        self.max_epoch = exp.max_epoch
+        self.amp_training = args.fp16
+        self.scaler = torch.cuda.amp.GradScaler(enabled=args.fp16)
+        self.is_distributed = get_world_size() > 1
+        self.rank = get_rank()
+        self.local_rank = get_local_rank()
+        self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else 'cpu'
+        self.use_model_ema = exp.ema
+        self.save_history_ckpt = exp.save_history_ckpt
+
+        # data/dataloader related attr
+        self.data_type = torch.float16 if args.fp16 else torch.float32
+        self.input_size = exp.input_size
+        self.best_ap = 0
+
+        # metric record
+        self.meter = MeterBuffer(window_size=exp.print_interval)
+        self.file_name = os.path.join(exp.output_dir, args.experiment_name)
+
+        if self.rank == 0:
+            os.makedirs(self.file_name, exist_ok=True)
+
+        setup_logger(
+            self.file_name,
+            distributed_rank=self.rank,
+            filename="train_log.txt",
+            mode="a",
+        )
+
+    def train(self):
+        self.before_train()
+        try:
+            self.train_in_epoch()
+        except Exception:
+            raise
+        finally:
+            self.after_train()
+
+    def train_in_epoch(self):
+        for self.epoch in range(self.start_epoch, self.max_epoch):
+            self.before_epoch()
+            self.train_in_iter()
+            self.after_epoch()
+
+    def train_in_iter(self):
+        for self.iter in range(self.max_iter):
+            self.before_iter()
+            self.train_one_iter()
+            self.after_iter()
+
+    def train_one_iter(self):
+        iter_start_time = time.time()
+
+        inps, targets = self.prefetcher.next()
+        inps = inps.to(self.data_type)
+        targets = targets.to(self.data_type)
+        targets.requires_grad = False
+        inps, targets = self.exp.preprocess(inps, targets, self.input_size)
+        data_end_time = time.time()
+
+        with torch.cuda.amp.autocast(enabled=self.amp_training):
+            outputs = self.model(inps, targets)
+
+        loss = outputs["total_loss"]
+
+        self.optimizer.zero_grad()
+        self.scaler.scale(loss).backward()
+        self.scaler.step(self.optimizer)
+        self.scaler.update()
+
+        if self.use_model_ema:
+            self.ema_model.update(self.model)
+
+        lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1)
+        for param_group in self.optimizer.param_groups:
+            param_group["lr"] = lr
+
+        iter_end_time = time.time()
+        self.meter.update(
+            iter_time=iter_end_time - iter_start_time,
+            data_time=data_end_time - iter_start_time,
+            lr=lr,
+            **outputs,
+        )
+
+    def before_train(self):
+        logger.info("args: {}".format(self.args))
+        logger.info("exp value:\n{}".format(self.exp))
+
+        # model related init
+        if self.device != 'cpu':
+            torch.cuda.set_device(self.local_rank)
+        model = self.exp.get_model()
+        logger.info(
+            "Model Summary: {}".format(get_model_info(model, self.exp.test_size))
+        )
+        model.to(self.device)
+
+        # solver related init
+        self.optimizer = self.exp.get_optimizer(self.args.batch_size)
+
+        # value of epoch will be set in `resume_train`
+        model = self.resume_train(model)
+
+        # data related init
+        self.no_aug = self.start_epoch >= self.max_epoch - self.exp.no_aug_epochs
+        self.train_loader = self.exp.get_data_loader(
+            batch_size=self.args.batch_size,
+            is_distributed=self.is_distributed,
+            no_aug=self.no_aug,
+            cache_img=self.args.cache,
+        )
+        logger.info("init prefetcher, this might take one minute or less...")
+        self.prefetcher = DataPrefetcher(self.train_loader)
+        # max_iter means iters per epoch
+        self.max_iter = len(self.train_loader)
+
+        self.lr_scheduler = self.exp.get_lr_scheduler(
+            self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter
+        )
+        if self.args.occupy:
+            occupy_mem(self.local_rank)
+
+        if self.is_distributed:
+            model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False)
+
+        if self.use_model_ema:
+            self.ema_model = ModelEMA(model, 0.9998)
+            self.ema_model.updates = self.max_iter * self.start_epoch
+
+        self.model = model
+
+        self.evaluator = self.exp.get_evaluator(
+            batch_size=self.args.batch_size, is_distributed=self.is_distributed
+        )
+        # Tensorboard and Wandb loggers
+        if self.rank == 0:
+            if self.args.logger == "tensorboard":
+                self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
+            elif self.args.logger == "wandb":
+                self.wandb_logger = WandbLogger.initialize_wandb_logger(
+                    self.args,
+                    self.exp,
+                    self.evaluator.dataloader.dataset
+                )
+            else:
+                raise ValueError("logger must be either 'tensorboard' or 'wandb'")
+
+        logger.info("Training start...")
+        logger.info("\n{}".format(model))
+
+    def after_train(self):
+        logger.info(
+            "Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100)
+        )
+        if self.rank == 0:
+            if self.args.logger == "wandb":
+                self.wandb_logger.finish()
+
+    def before_epoch(self):
+        logger.info("---> start train epoch{}".format(self.epoch + 1))
+
+        if self.epoch + 1 == self.max_epoch - self.exp.no_aug_epochs or self.no_aug:
+            logger.info("--->No mosaic aug now!")
+            self.train_loader.close_mosaic()
+            logger.info("--->Add additional L1 loss now!")
+            if self.is_distributed:
+                self.model.module.head.use_l1 = True
+            else:
+                self.model.head.use_l1 = True
+            self.exp.eval_interval = 1
+            if not self.no_aug:
+                self.save_ckpt(ckpt_name="last_mosaic_epoch")
+
+    def after_epoch(self):
+        self.save_ckpt(ckpt_name="latest")
+
+        if (self.epoch + 1) % self.exp.eval_interval == 0:
+            all_reduce_norm(self.model)
+            self.evaluate_and_save_model()
+
+    def before_iter(self):
+        pass
+
+    def after_iter(self):
+        """
+        `after_iter` contains two parts of logic:
+            * log information
+            * reset setting of resize
+        """
+        # log needed information
+        if (self.iter + 1) % self.exp.print_interval == 0:
+            # TODO check ETA logic
+            left_iters = self.max_iter * self.max_epoch - (self.progress_in_iter + 1)
+            eta_seconds = self.meter["iter_time"].global_avg * left_iters
+            eta_str = "ETA: {}".format(datetime.timedelta(seconds=int(eta_seconds)))
+
+            progress_str = "epoch: {}/{}, iter: {}/{}".format(
+                self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter
+            )
+            loss_meter = self.meter.get_filtered_meter("loss")
+            loss_str = ", ".join(
+                ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
+            )
+
+            time_meter = self.meter.get_filtered_meter("time")
+            time_str = ", ".join(
+                ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
+            )
+
+            mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage())
+
+            logger.info(
+                "{}, {}, {}, {}, lr: {:.3e}".format(
+                    progress_str,
+                    mem_str,
+                    time_str,
+                    loss_str,
+                    self.meter["lr"].latest,
+                )
+                + (", size: {:d}, {}".format(self.input_size[0], eta_str))
+            )
+
+            if self.rank == 0:
+                if self.args.logger == "tensorboard":
+                    self.tblogger.add_scalar(
+                        "train/lr", self.meter["lr"].latest, self.progress_in_iter)
+                    for k, v in loss_meter.items():
+                        self.tblogger.add_scalar(
+                            f"train/{k}", v.latest, self.progress_in_iter)
+                if self.args.logger == "wandb":
+                    metrics = {"train/" + k: v.latest for k, v in loss_meter.items()}
+                    metrics.update({
+                        "train/lr": self.meter["lr"].latest
+                    })
+                    self.wandb_logger.log_metrics(metrics, step=self.progress_in_iter)
+
+            self.meter.clear_meters()
+
+        # random resizing
+        if (self.progress_in_iter + 1) % 10 == 0:
+            self.input_size = self.exp.random_resize(
+                self.train_loader, self.epoch, self.rank, self.is_distributed
+            )
+
+    @property
+    def progress_in_iter(self):
+        return self.epoch * self.max_iter + self.iter
+
+    def resume_train(self, model):
+        if self.args.resume:
+            logger.info("resume training")
+            if self.args.ckpt is None:
+                ckpt_file = os.path.join(self.file_name, "latest" + "_ckpt.pth")
+            else:
+                ckpt_file = self.args.ckpt
+
+            ckpt = torch.load(ckpt_file, map_location=self.device)
+            # resume the model/optimizer state dict
+            model.load_state_dict(ckpt["model"])
+            self.optimizer.load_state_dict(ckpt["optimizer"])
+            self.best_ap = ckpt.pop("best_ap", 0)
+            # resume the training states variables
+            start_epoch = (
+                self.args.start_epoch - 1
+                if self.args.start_epoch is not None
+                else ckpt["start_epoch"]
+            )
+            self.start_epoch = start_epoch
+            logger.info(
+                "loaded checkpoint '{}' (epoch {})".format(
+                    self.args.resume, self.start_epoch
+                )
+            )  # noqa
+        else:
+            if self.args.ckpt is not None:
+                logger.info("loading checkpoint for fine tuning")
+                ckpt_file = self.args.ckpt
+                ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
+                model = load_ckpt(model, ckpt)
+            self.start_epoch = 0
+
+        return model
+
+    def evaluate_and_save_model(self):
+        if self.use_model_ema:
+            evalmodel = self.ema_model.ema
+        else:
+            evalmodel = self.model
+            if is_parallel(evalmodel):
+                evalmodel = evalmodel.module
+
+        with adjust_status(evalmodel, training=False):
+            (ap50_95, ap50, summary), predictions = self.exp.eval(
+                evalmodel, self.evaluator, self.is_distributed, return_outputs=True
+            )
+
+        update_best_ckpt = ap50_95 > self.best_ap
+        self.best_ap = max(self.best_ap, ap50_95)
+
+        if self.rank == 0:
+            if self.args.logger == "tensorboard":
+                self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
+                self.tblogger.add_scalar("val/COCOAP50_95", ap50_95, self.epoch + 1)
+            if self.args.logger == "wandb":
+                self.wandb_logger.log_metrics({
+                    "val/COCOAP50": ap50,
+                    "val/COCOAP50_95": ap50_95,
+                    "train/epoch": self.epoch + 1,
+                })
+                self.wandb_logger.log_images(predictions)
+            logger.info("\n" + summary)
+        synchronize()
+
+        self.save_ckpt("last_epoch", update_best_ckpt, ap=ap50_95)
+        if self.save_history_ckpt:
+            self.save_ckpt(f"epoch_{self.epoch + 1}", ap=ap50_95)
+
+    def save_ckpt(self, ckpt_name, update_best_ckpt=False, ap=None):
+        if self.rank == 0:
+            save_model = self.ema_model.ema if self.use_model_ema else self.model
+            logger.info("Save weights to {}".format(self.file_name))
+            ckpt_state = {
+                "start_epoch": self.epoch + 1,
+                "model": save_model.state_dict(),
+                "optimizer": self.optimizer.state_dict(),
+                "best_ap": self.best_ap,
+                "curr_ap": ap,
+            }
+            save_checkpoint(
+                ckpt_state,
+                update_best_ckpt,
+                self.file_name,
+                ckpt_name,
+            )
+
+            if self.args.logger == "wandb":
+                self.wandb_logger.save_checkpoint(
+                    self.file_name,
+                    ckpt_name,
+                    update_best_ckpt,
+                    metadata={
+                        "epoch": self.epoch + 1,
+                        "optimizer": self.optimizer.state_dict(),
+                        "best_ap": self.best_ap,
+                        "curr_ap": ap
+                    }
+                )
\ No newline at end of file
diff --git a/yolort/utils/__init__.py b/yolort/utils/__init__.py
index c16127d2..cf4c00b0 100644
--- a/yolort/utils/__init__.py
+++ b/yolort/utils/__init__.py
@@ -14,6 +14,15 @@
 from .hooks import FeatureExtractor
 from .image_utils import cv2_imshow, get_image_from_url, read_image_to_tensor
 from .visualizer import Visualizer
+from .allreduce_norm import *
+from .boxes import *
+from .checkpoint import load_ckpt, save_checkpoint
+from .dist import *
+from .ema import *
+from .logger import WandbLogger, setup_logger
+from .lr_scheduler import LRScheduler
+from .metric import *
+from .model_utils import *
 
 
 __all__ = [
diff --git a/yolort/utils/allreduce_norm.py b/yolort/utils/allreduce_norm.py
new file mode 100644
index 00000000..142c76c7
--- /dev/null
+++ b/yolort/utils/allreduce_norm.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import pickle
+from collections import OrderedDict
+
+import torch
+from torch import distributed as dist
+from torch import nn
+
+from .dist import _get_global_gloo_group, get_world_size
+
+ASYNC_NORM = (
+    nn.BatchNorm1d,
+    nn.BatchNorm2d,
+    nn.BatchNorm3d,
+    nn.InstanceNorm1d,
+    nn.InstanceNorm2d,
+    nn.InstanceNorm3d,
+)
+
+__all__ = [
+    "get_async_norm_states",
+    "pyobj2tensor",
+    "tensor2pyobj",
+    "all_reduce",
+    "all_reduce_norm",
+]
+
+
+def get_async_norm_states(module):
+    async_norm_states = OrderedDict()
+    for name, child in module.named_modules():
+        if isinstance(child, ASYNC_NORM):
+            for k, v in child.state_dict().items():
+                async_norm_states[".".join([name, k])] = v
+    return async_norm_states
+
+
+def pyobj2tensor(pyobj, device="cuda"):
+    """serialize picklable python object to tensor"""
+    storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj))
+    return torch.ByteTensor(storage).to(device=device)
+
+
+def tensor2pyobj(tensor):
+    """deserialize tensor to picklable python object"""
+    return pickle.loads(tensor.cpu().numpy().tobytes())
+
+
+def _get_reduce_op(op_name):
+    return {
+        "sum": dist.ReduceOp.SUM,
+        "mean": dist.ReduceOp.SUM,
+    }[op_name.lower()]
+
+
+def all_reduce(py_dict, op="sum", group=None):
+    """
+    Apply all reduce function for python dict object.
+    NOTE: make sure that every py_dict has the same keys and values are in the same shape.
+
+    Args:
+        py_dict (dict): dict to apply all reduce op.
+        op (str): operator, could be "sum" or "mean".
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return py_dict
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return py_dict
+
+    # all reduce logic across different devices.
+    py_key = list(py_dict.keys())
+    py_key_tensor = pyobj2tensor(py_key)
+    dist.broadcast(py_key_tensor, src=0)
+    py_key = tensor2pyobj(py_key_tensor)
+
+    tensor_shapes = [py_dict[k].shape for k in py_key]
+    tensor_numels = [py_dict[k].numel() for k in py_key]
+
+    flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key])
+    dist.all_reduce(flatten_tensor, op=_get_reduce_op(op))
+    if op == "mean":
+        flatten_tensor /= world_size
+
+    split_tensors = [
+        x.reshape(shape)
+        for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
+    ]
+    return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})
+
+
+def all_reduce_norm(module):
+    """
+    All reduce norm statistics in different devices.
+    """
+    states = get_async_norm_states(module)
+    states = all_reduce(states, op="mean")
+    module.load_state_dict(states, strict=False)
diff --git a/yolort/utils/boxes.py b/yolort/utils/boxes.py
new file mode 100644
index 00000000..a8eaf3f4
--- /dev/null
+++ b/yolort/utils/boxes.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import numpy as np
+
+import torch
+import torchvision
+
+__all__ = [
+    "filter_box",
+    "postprocess",
+    "bboxes_iou",
+    "matrix_iou",
+    "adjust_box_anns",
+    "xyxy2xywh",
+    "xyxy2cxcywh",
+    "cxcywh2xyxy",
+]
+
+
+def filter_box(output, scale_range):
+    """
+    output: (N, 5+class) shape
+    """
+    min_scale, max_scale = scale_range
+    w = output[:, 2] - output[:, 0]
+    h = output[:, 3] - output[:, 1]
+    keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)
+    return output[keep]
+
+
+def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
+    box_corner = prediction.new(prediction.shape)
+    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
+    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
+    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
+    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
+    prediction[:, :, :4] = box_corner[:, :, :4]
+
+    output = [None for _ in range(len(prediction))]
+    for i, image_pred in enumerate(prediction):
+
+        # If none are remaining => process next image
+        if not image_pred.size(0):
+            continue
+        # Get score and class with highest confidence
+        class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
+
+        conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
+        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
+        detections = detections[conf_mask]
+        if not detections.size(0):
+            continue
+
+        if class_agnostic:
+            nms_out_index = torchvision.ops.nms(
+                detections[:, :4],
+                detections[:, 4] * detections[:, 5],
+                nms_thre,
+            )
+        else:
+            nms_out_index = torchvision.ops.batched_nms(
+                detections[:, :4],
+                detections[:, 4] * detections[:, 5],
+                detections[:, 6],
+                nms_thre,
+            )
+
+        detections = detections[nms_out_index]
+        if output[i] is None:
+            output[i] = detections
+        else:
+            output[i] = torch.cat((output[i], detections))
+
+    return output
+
+
+def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
+    if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
+        raise IndexError
+
+    if xyxy:
+        tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
+        br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
+        area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
+        area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
+    else:
+        tl = torch.max(
+            (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
+            (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
+        )
+        br = torch.min(
+            (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
+            (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
+        )
+
+        area_a = torch.prod(bboxes_a[:, 2:], 1)
+        area_b = torch.prod(bboxes_b[:, 2:], 1)
+    en = (tl < br).type(tl.type()).prod(dim=2)
+    area_i = torch.prod(br - tl, 2) * en  # * ((tl < br).all())
+    return area_i / (area_a[:, None] + area_b - area_i)
+
+
+def matrix_iou(a, b):
+    """
+    return iou of a and b, numpy version for data augenmentation
+    """
+    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+
+    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+    return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
+
+
+def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):
+    bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)
+    bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)
+    return bbox
+
+
+def xyxy2xywh(bboxes):
+    bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
+    bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
+    return bboxes
+
+
+def xyxy2cxcywh(bboxes):
+    bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
+    bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
+    bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
+    bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
+    return bboxes
+
+
+def cxcywh2xyxy(bboxes):
+    bboxes[:, 0] = bboxes[:, 0] - bboxes[:, 2] * 0.5
+    bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5
+    bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
+    bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
+    return bboxes
\ No newline at end of file
diff --git a/yolort/utils/checkpoint.py b/yolort/utils/checkpoint.py
new file mode 100644
index 00000000..a0c200e4
--- /dev/null
+++ b/yolort/utils/checkpoint.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+import os
+import shutil
+from loguru import logger
+
+import torch
+
+
+def load_ckpt(model, ckpt):
+    model_state_dict = model.state_dict()
+    load_dict = {}
+    for key_model, v in model_state_dict.items():
+        if key_model not in ckpt:
+            logger.warning(
+                "{} is not in the ckpt. Please double check and see if this is desired.".format(
+                    key_model
+                )
+            )
+            continue
+        v_ckpt = ckpt[key_model]
+        if v.shape != v_ckpt.shape:
+            logger.warning(
+                "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
+                    key_model, v_ckpt.shape, key_model, v.shape
+                )
+            )
+            continue
+        load_dict[key_model] = v_ckpt
+
+    model.load_state_dict(load_dict, strict=False)
+    return model
+
+
+def save_checkpoint(state, is_best, save_dir, model_name=""):
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    filename = os.path.join(save_dir, model_name + "_ckpt.pth")
+    torch.save(state, filename)
+    if is_best:
+        best_filename = os.path.join(save_dir, "best_ckpt.pth")
+        shutil.copyfile(filename, best_filename)
diff --git a/yolort/utils/dist.py b/yolort/utils/dist.py
new file mode 100644
index 00000000..a4b46801
--- /dev/null
+++ b/yolort/utils/dist.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# This file mainly comes from
+# https://github.com/facebookresearch/detectron2/blob/master/detectron2/utils/comm.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Copyright (c) Megvii Inc. All rights reserved.
+"""
+This file contains primitives for multi-gpu communication.
+This is useful when doing distributed training.
+"""
+
+import functools
+import os
+import pickle
+import time
+from contextlib import contextmanager
+from loguru import logger
+
+import numpy as np
+
+import torch
+from torch import distributed as dist
+
+__all__ = [
+    "get_num_devices",
+    "wait_for_the_master",
+    "is_main_process",
+    "synchronize",
+    "get_world_size",
+    "get_rank",
+    "get_local_rank",
+    "get_local_size",
+    "time_synchronized",
+    "gather",
+    "all_gather",
+]
+
+_LOCAL_PROCESS_GROUP = None
+
+
+def get_num_devices():
+    gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None)
+    if gpu_list is not None:
+        return len(gpu_list.split(','))
+    else:
+        devices_list_info = os.popen("nvidia-smi -L")
+        devices_list_info = devices_list_info.read().strip().split("\n")
+        return len(devices_list_info)
+
+
+@contextmanager
+def wait_for_the_master(local_rank: int = None):
+    """
+    Make all processes waiting for the master to do some task.
+
+    Args:
+        local_rank (int): the rank of the current process. Default to None.
+            If None, it will use the rank of the current process.
+    """
+    if local_rank is None:
+        local_rank = get_local_rank()
+
+    if local_rank > 0:
+        dist.barrier()
+    yield
+    if local_rank == 0:
+        if not dist.is_available():
+            return
+        if not dist.is_initialized():
+            return
+        else:
+            dist.barrier()
+
+
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+
+
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if _LOCAL_PROCESS_GROUP is None:
+        return get_rank()
+
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+
+
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group, i.e. the number of processes per machine.
+    """
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+
+
+def is_main_process() -> bool:
+    return get_rank() == 0
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+
+
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+
+
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+        world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device)
+        for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+
+    max_size = max(size_list)
+
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros(
+            (max_size - local_size,), dtype=torch.uint8, device=tensor.device
+        )
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+
+
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+
+    tensor = _serialize_to_tensor(data, group)
+
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    tensor_list = [
+        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
+        for _ in size_list
+    ]
+    dist.all_gather(tensor_list, tensor, group=group)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [
+            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
+            for _ in size_list
+        ]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+
+
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+
+
+def time_synchronized():
+    """pytorch-accurate time"""
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+    return time.time()
\ No newline at end of file
diff --git a/yolort/utils/ema.py b/yolort/utils/ema.py
new file mode 100644
index 00000000..364e8c87
--- /dev/null
+++ b/yolort/utils/ema.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+import math
+from copy import deepcopy
+
+import torch
+import torch.nn as nn
+
+__all__ = ["ModelEMA", "is_parallel"]
+
+
+def is_parallel(model):
+    """check if model is in parallel mode."""
+    parallel_type = (
+        nn.parallel.DataParallel,
+        nn.parallel.DistributedDataParallel,
+    )
+    return isinstance(model, parallel_type)
+
+
+class ModelEMA:
+    """
+    Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
+    Keep a moving average of everything in the model state_dict (parameters and buffers).
+    This is intended to allow functionality like
+    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+    A smoothed version of the weights is necessary for some training schemes to perform well.
+    This class is sensitive where it is initialized in the sequence of model init,
+    GPU assignment and distributed training wrappers.
+    """
+
+    def __init__(self, model, decay=0.9999, updates=0):
+        """
+        Args:
+            model (nn.Module): model to apply EMA.
+            decay (float): ema decay reate.
+            updates (int): counter of EMA updates.
+        """
+        # Create EMA(FP32)
+        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()
+        self.updates = updates
+        # decay exponential ramp (to help early epochs)
+        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
+        for p in self.ema.parameters():
+            p.requires_grad_(False)
+
+    def update(self, model):
+        # Update EMA parameters
+        with torch.no_grad():
+            self.updates += 1
+            d = self.decay(self.updates)
+
+            msd = (
+                model.module.state_dict() if is_parallel(model) else model.state_dict()
+            )  # model state_dict
+            for k, v in self.ema.state_dict().items():
+                if v.dtype.is_floating_point:
+                    v *= d
+                    v += (1.0 - d) * msd[k].detach()
\ No newline at end of file
diff --git a/yolort/utils/logger.py b/yolort/utils/logger.py
index 866c189c..00f1d125 100644
--- a/yolort/utils/logger.py
+++ b/yolort/utils/logger.py
@@ -1,5 +1,11 @@
-import datetime
+import os
+import sys
+import cv2
 import time
+import datetime
+import inspect
+import numpy as np
+from loguru import logger
 from collections import defaultdict, deque
 
 import torch
@@ -197,3 +203,429 @@ def get_rank():
 
 def is_main_process():
     return get_rank() == 0
+
+def get_caller_name(depth=0):
+    """
+    Args:
+        depth (int): Depth of caller conext, use 0 for caller depth.
+        Default value: 0.
+
+    Returns:
+        str: module name of the caller
+    """
+    # the following logic is a little bit faster than inspect.stack() logic
+    frame = inspect.currentframe().f_back
+    for _ in range(depth):
+        frame = frame.f_back
+
+    return frame.f_globals["__name__"]
+
+
+class StreamToLoguru:
+    """
+    stream object that redirects writes to a logger instance.
+    """
+
+    def __init__(self, level="INFO", caller_names=("apex", "pycocotools")):
+        """
+        Args:
+            level(str): log level string of loguru. Default value: "INFO".
+            caller_names(tuple): caller names of redirected module.
+                Default value: (apex, pycocotools).
+        """
+        self.level = level
+        self.linebuf = ""
+        self.caller_names = caller_names
+
+    def write(self, buf):
+        full_name = get_caller_name(depth=1)
+        module_name = full_name.rsplit(".", maxsplit=-1)[0]
+        if module_name in self.caller_names:
+            for line in buf.rstrip().splitlines():
+                # use caller level log
+                logger.opt(depth=2).log(self.level, line.rstrip())
+        else:
+            sys.__stdout__.write(buf)
+
+    def flush(self):
+        # flush is related with CPR(cursor position report) in terminal
+        return sys.__stdout__.flush()
+
+    def isatty(self):
+        # when using colab, jax is installed by default and issue like
+        # https://github.com/Megvii-BaseDetection/YOLOX/issues/1437 might be raised
+        # due to missing attribute like`isatty`.
+        # For more details, checked the following link:
+        # https://github.com/google/jax/blob/10720258ea7fb5bde997dfa2f3f71135ab7a6733/jax/_src/pretty_printer.py#L54  # noqa
+        return sys.__stdout__.isatty()
+
+    def fileno(self):
+        # To solve the issue when using debug tools like pdb
+        return sys.__stdout__.fileno()
+
+
+def redirect_sys_output(log_level="INFO"):
+    redirect_logger = StreamToLoguru(log_level)
+    sys.stderr = redirect_logger
+    sys.stdout = redirect_logger
+
+
+def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"):
+    """setup logger for training and testing.
+    Args:
+        save_dir(str): location to save log file
+        distributed_rank(int): device rank when multi-gpu environment
+        filename (string): log save name.
+        mode(str): log file write mode, `append` or `override`. default is `a`.
+
+    Return:
+        logger instance.
+    """
+    loguru_format = (
+        "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
+        "<level>{level: <8}</level> | "
+        "<cyan>{name}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
+    )
+
+    logger.remove()
+    save_file = os.path.join(save_dir, filename)
+    if mode == "o" and os.path.exists(save_file):
+        os.remove(save_file)
+    # only keep logger in rank0 process
+    if distributed_rank == 0:
+        logger.add(
+            sys.stderr,
+            format=loguru_format,
+            level="INFO",
+            enqueue=True,
+        )
+        logger.add(save_file)
+
+    # redirect stdout/stderr to loguru
+    redirect_sys_output("INFO")
+
+
+class WandbLogger(object):
+    """
+    Log training runs, datasets, models, and predictions to Weights & Biases.
+    This logger sends information to W&B at wandb.ai.
+    By default, this information includes hyperparameters,
+    system configuration and metrics, model metrics,
+    and basic data metrics and analyses.
+
+    For more information, please refer to:
+    https://docs.wandb.ai/guides/track
+    https://docs.wandb.ai/guides/integrations/other/yolox
+    """
+    def __init__(self,
+                 project=None,
+                 name=None,
+                 id=None,
+                 entity=None,
+                 save_dir=None,
+                 config=None,
+                 val_dataset=None,
+                 num_eval_images=100,
+                 log_checkpoints=False,
+                 **kwargs):
+        """
+        Args:
+            project (str): wandb project name.
+            name (str): wandb run name.
+            id (str): wandb run id.
+            entity (str): wandb entity name.
+            save_dir (str): save directory.
+            config (dict): config dict.
+            val_dataset (Dataset): validation dataset.
+            num_eval_images (int): number of images from the validation set to log.
+            log_checkpoints (bool): log checkpoints
+            **kwargs: other kwargs.
+
+        Usage:
+            Any arguments for wandb.init can be provided on the command line using
+            the prefix `wandb-`.
+            Example
+            ```
+            python tools/train.py .... --logger wandb wandb-project <project-name> \
+                wandb-name <run-name> \
+                wandb-id <run-id> \
+                wandb-save_dir <save-dir> \
+                wandb-num_eval_imges <num-images> \
+                wandb-log_checkpoints <bool>
+            ```
+            The val_dataset argument is not open to the command line.
+        """
+        try:
+            import wandb
+            self.wandb = wandb
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "wandb is not installed."
+                "Please install wandb using pip install wandb"
+                )
+
+        from yolox.data.datasets import VOCDetection
+
+        self.project = project
+        self.name = name
+        self.id = id
+        self.save_dir = save_dir
+        self.config = config
+        self.kwargs = kwargs
+        self.entity = entity
+        self._run = None
+        self.val_artifact = None
+        if num_eval_images == -1:
+            self.num_log_images = len(val_dataset)
+        else:
+            self.num_log_images = min(num_eval_images, len(val_dataset))
+        self.log_checkpoints = (log_checkpoints == "True" or log_checkpoints == "true")
+        self._wandb_init = dict(
+            project=self.project,
+            name=self.name,
+            id=self.id,
+            entity=self.entity,
+            dir=self.save_dir,
+            resume="allow"
+        )
+        self._wandb_init.update(**kwargs)
+
+        _ = self.run
+
+        if self.config:
+            self.run.config.update(self.config)
+        self.run.define_metric("train/epoch")
+        self.run.define_metric("val/*", step_metric="train/epoch")
+        self.run.define_metric("train/step")
+        self.run.define_metric("train/*", step_metric="train/step")
+
+        self.voc_dataset = VOCDetection
+
+        if val_dataset and self.num_log_images != 0:
+            self.val_dataset = val_dataset
+            self.cats = val_dataset.cats
+            self.id_to_class = {
+                cls['id']: cls['name'] for cls in self.cats
+            }
+            self._log_validation_set(val_dataset)
+
+    @property
+    def run(self):
+        if self._run is None:
+            if self.wandb.run is not None:
+                logger.info(
+                    "There is a wandb run already in progress "
+                    "and newly created instances of `WandbLogger` will reuse"
+                    " this run. If this is not desired, call `wandb.finish()`"
+                    "before instantiating `WandbLogger`."
+                )
+                self._run = self.wandb.run
+            else:
+                self._run = self.wandb.init(**self._wandb_init)
+        return self._run
+
+    def _log_validation_set(self, val_dataset):
+        """
+        Log validation set to wandb.
+
+        Args:
+            val_dataset (Dataset): validation dataset.
+        """
+        if self.val_artifact is None:
+            self.val_artifact = self.wandb.Artifact(name="validation_images", type="dataset")
+            self.val_table = self.wandb.Table(columns=["id", "input"])
+
+            for i in range(self.num_log_images):
+                data_point = val_dataset[i]
+                img = data_point[0]
+                id = data_point[3]
+                img = np.transpose(img, (1, 2, 0))
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+                if isinstance(id, torch.Tensor):
+                    id = id.item()
+
+                self.val_table.add_data(
+                    id,
+                    self.wandb.Image(img)
+                )
+
+            self.val_artifact.add(self.val_table, "validation_images_table")
+            self.run.use_artifact(self.val_artifact)
+            self.val_artifact.wait()
+
+    def _convert_prediction_format(self, predictions):
+        image_wise_data = defaultdict(int)
+
+        for key, val in predictions.items():
+            img_id = key
+
+            try:
+                bboxes, cls, scores = val
+            except KeyError:
+                bboxes, cls, scores = val["bboxes"], val["categories"], val["scores"]
+
+            # These store information of actual bounding boxes i.e. the ones which are not None
+            act_box = []
+            act_scores = []
+            act_cls = []
+
+            if bboxes is not None:
+                for box, classes, score in zip(bboxes, cls, scores):
+                    if box is None or score is None or classes is None:
+                        continue
+                    act_box.append(box)
+                    act_scores.append(score)
+                    act_cls.append(classes)
+
+            image_wise_data.update({
+                int(img_id): {
+                    "bboxes": [box.numpy().tolist() for box in act_box],
+                    "scores": [score.numpy().item() for score in act_scores],
+                    "categories": [
+                        self.val_dataset.class_ids[int(act_cls[ind])]
+                        for ind in range(len(act_box))
+                    ],
+                }
+            })
+
+        return image_wise_data
+
+    def log_metrics(self, metrics, step=None):
+        """
+        Args:
+            metrics (dict): metrics dict.
+            step (int): step number.
+        """
+
+        for k, v in metrics.items():
+            if isinstance(v, torch.Tensor):
+                metrics[k] = v.item()
+
+        if step is not None:
+            metrics.update({"train/step": step})
+            self.run.log(metrics)
+        else:
+            self.run.log(metrics)
+
+    def log_images(self, predictions):
+        if len(predictions) == 0 or self.val_artifact is None or self.num_log_images == 0:
+            return
+
+        table_ref = self.val_artifact.get("validation_images_table")
+
+        columns = ["id", "predicted"]
+        for cls in self.cats:
+            columns.append(cls["name"])
+
+        if isinstance(self.val_dataset, self.voc_dataset):
+            predictions = self._convert_prediction_format(predictions)
+
+        result_table = self.wandb.Table(columns=columns)
+
+        for idx, val in table_ref.iterrows():
+
+            avg_scores = defaultdict(int)
+            num_occurrences = defaultdict(int)
+
+            id = val[0]
+            if isinstance(id, list):
+                id = id[0]
+
+            if id in predictions:
+                prediction = predictions[id]
+                boxes = []
+                for i in range(len(prediction["bboxes"])):
+                    bbox = prediction["bboxes"][i]
+                    x0 = bbox[0]
+                    y0 = bbox[1]
+                    x1 = bbox[2]
+                    y1 = bbox[3]
+                    box = {
+                        "position": {
+                            "minX": min(x0, x1),
+                            "minY": min(y0, y1),
+                            "maxX": max(x0, x1),
+                            "maxY": max(y0, y1)
+                        },
+                        "class_id": prediction["categories"][i],
+                        "domain": "pixel"
+                    }
+                    avg_scores[
+                        self.id_to_class[prediction["categories"][i]]
+                    ] += prediction["scores"][i]
+                    num_occurrences[self.id_to_class[prediction["categories"][i]]] += 1
+                    boxes.append(box)
+            else:
+                boxes = []
+            average_class_score = []
+            for cls in self.cats:
+                if cls["name"] not in num_occurrences:
+                    score = 0
+                else:
+                    score = avg_scores[cls["name"]] / num_occurrences[cls["name"]]
+                average_class_score.append(score)
+            result_table.add_data(
+                idx,
+                self.wandb.Image(val[1], boxes={
+                        "prediction": {
+                            "box_data": boxes,
+                            "class_labels": self.id_to_class
+                        }
+                    }
+                ),
+                *average_class_score
+            )
+
+        self.wandb.log({"val_results/result_table": result_table})
+
+    def save_checkpoint(self, save_dir, model_name, is_best, metadata=None):
+        """
+        Args:
+            save_dir (str): save directory.
+            model_name (str): model name.
+            is_best (bool): whether the model is the best model.
+            metadata (dict): metadata to save corresponding to the checkpoint.
+        """
+
+        if not self.log_checkpoints:
+            return
+
+        if "epoch" in metadata:
+            epoch = metadata["epoch"]
+        else:
+            epoch = None
+
+        filename = os.path.join(save_dir, model_name + "_ckpt.pth")
+        artifact = self.wandb.Artifact(
+            name=f"run_{self.run.id}_model",
+            type="model",
+            metadata=metadata
+        )
+        artifact.add_file(filename, name="model_ckpt.pth")
+
+        aliases = ["latest"]
+
+        if is_best:
+            aliases.append("best")
+
+        if epoch:
+            aliases.append(f"epoch-{epoch}")
+
+        self.run.log_artifact(artifact, aliases=aliases)
+
+    def finish(self):
+        self.run.finish()
+
+    @classmethod
+    def initialize_wandb_logger(cls, args, exp, val_dataset):
+        wandb_params = dict()
+        prefix = "wandb-"
+        for k, v in zip(args.opts[0::2], args.opts[1::2]):
+            if k.startswith("wandb-"):
+                try:
+                    wandb_params.update({k[len(prefix):]: int(v)})
+                except ValueError:
+                    wandb_params.update({k[len(prefix):]: v})
+
+        return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params)
\ No newline at end of file
diff --git a/yolort/utils/lr_scheduler.py b/yolort/utils/lr_scheduler.py
new file mode 100644
index 00000000..42c00cf2
--- /dev/null
+++ b/yolort/utils/lr_scheduler.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import math
+from functools import partial
+
+
+class LRScheduler:
+    def __init__(self, name, lr, iters_per_epoch, total_epochs, **kwargs):
+        """
+        Supported lr schedulers: [cos, warmcos, multistep]
+
+        Args:
+            lr (float): learning rate.
+            iters_per_epoch (int): number of iterations in one epoch.
+            total_epochs (int): number of epochs in training.
+            kwargs (dict):
+                - cos: None
+                - warmcos: [warmup_epochs, warmup_lr_start (default 1e-6)]
+                - multistep: [milestones (epochs), gamma (default 0.1)]
+        """
+
+        self.lr = lr
+        self.iters_per_epoch = iters_per_epoch
+        self.total_epochs = total_epochs
+        self.total_iters = iters_per_epoch * total_epochs
+
+        self.__dict__.update(kwargs)
+
+        self.lr_func = self._get_lr_func(name)
+
+    def update_lr(self, iters):
+        return self.lr_func(iters)
+
+    def _get_lr_func(self, name):
+        if name == "cos":  # cosine lr schedule
+            lr_func = partial(cos_lr, self.lr, self.total_iters)
+        elif name == "warmcos":
+            warmup_total_iters = self.iters_per_epoch * self.warmup_epochs
+            warmup_lr_start = getattr(self, "warmup_lr_start", 1e-6)
+            lr_func = partial(
+                warm_cos_lr,
+                self.lr,
+                self.total_iters,
+                warmup_total_iters,
+                warmup_lr_start,
+            )
+        elif name == "yoloxwarmcos":
+            warmup_total_iters = self.iters_per_epoch * self.warmup_epochs
+            no_aug_iters = self.iters_per_epoch * self.no_aug_epochs
+            warmup_lr_start = getattr(self, "warmup_lr_start", 0)
+            min_lr_ratio = getattr(self, "min_lr_ratio", 0.2)
+            lr_func = partial(
+                yolox_warm_cos_lr,
+                self.lr,
+                min_lr_ratio,
+                self.total_iters,
+                warmup_total_iters,
+                warmup_lr_start,
+                no_aug_iters,
+            )
+        elif name == "yoloxsemiwarmcos":
+            warmup_lr_start = getattr(self, "warmup_lr_start", 0)
+            min_lr_ratio = getattr(self, "min_lr_ratio", 0.2)
+            warmup_total_iters = self.iters_per_epoch * self.warmup_epochs
+            no_aug_iters = self.iters_per_epoch * self.no_aug_epochs
+            normal_iters = self.iters_per_epoch * self.semi_epoch
+            semi_iters = self.iters_per_epoch_semi * (
+                self.total_epochs - self.semi_epoch - self.no_aug_epochs
+            )
+            lr_func = partial(
+                yolox_semi_warm_cos_lr,
+                self.lr,
+                min_lr_ratio,
+                warmup_lr_start,
+                self.total_iters,
+                normal_iters,
+                no_aug_iters,
+                warmup_total_iters,
+                semi_iters,
+                self.iters_per_epoch,
+                self.iters_per_epoch_semi,
+            )
+        elif name == "multistep":  # stepwise lr schedule
+            milestones = [
+                int(self.total_iters * milestone / self.total_epochs)
+                for milestone in self.milestones
+            ]
+            gamma = getattr(self, "gamma", 0.1)
+            lr_func = partial(multistep_lr, self.lr, milestones, gamma)
+        else:
+            raise ValueError("Scheduler version {} not supported.".format(name))
+        return lr_func
+
+
+def cos_lr(lr, total_iters, iters):
+    """Cosine learning rate"""
+    lr *= 0.5 * (1.0 + math.cos(math.pi * iters / total_iters))
+    return lr
+
+
+def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters):
+    """Cosine learning rate with warm up."""
+    if iters <= warmup_total_iters:
+        lr = (lr - warmup_lr_start) * iters / float(
+            warmup_total_iters
+        ) + warmup_lr_start
+    else:
+        lr *= 0.5 * (
+            1.0
+            + math.cos(
+                math.pi
+                * (iters - warmup_total_iters)
+                / (total_iters - warmup_total_iters)
+            )
+        )
+    return lr
+
+
+def yolox_warm_cos_lr(
+    lr,
+    min_lr_ratio,
+    total_iters,
+    warmup_total_iters,
+    warmup_lr_start,
+    no_aug_iter,
+    iters,
+):
+    """Cosine learning rate with warm up."""
+    min_lr = lr * min_lr_ratio
+    if iters <= warmup_total_iters:
+        # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
+        lr = (lr - warmup_lr_start) * pow(
+            iters / float(warmup_total_iters), 2
+        ) + warmup_lr_start
+    elif iters >= total_iters - no_aug_iter:
+        lr = min_lr
+    else:
+        lr = min_lr + 0.5 * (lr - min_lr) * (
+            1.0
+            + math.cos(
+                math.pi
+                * (iters - warmup_total_iters)
+                / (total_iters - warmup_total_iters - no_aug_iter)
+            )
+        )
+    return lr
+
+
+def yolox_semi_warm_cos_lr(
+    lr,
+    min_lr_ratio,
+    warmup_lr_start,
+    total_iters,
+    normal_iters,
+    no_aug_iters,
+    warmup_total_iters,
+    semi_iters,
+    iters_per_epoch,
+    iters_per_epoch_semi,
+    iters,
+):
+    """Cosine learning rate with warm up."""
+    min_lr = lr * min_lr_ratio
+    if iters <= warmup_total_iters:
+        # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
+        lr = (lr - warmup_lr_start) * pow(
+            iters / float(warmup_total_iters), 2
+        ) + warmup_lr_start
+    elif iters >= normal_iters + semi_iters:
+        lr = min_lr
+    elif iters <= normal_iters:
+        lr = min_lr + 0.5 * (lr - min_lr) * (
+            1.0
+            + math.cos(
+                math.pi
+                * (iters - warmup_total_iters)
+                / (total_iters - warmup_total_iters - no_aug_iters)
+            )
+        )
+    else:
+        lr = min_lr + 0.5 * (lr - min_lr) * (
+            1.0
+            + math.cos(
+                math.pi
+                * (
+                    normal_iters
+                    - warmup_total_iters
+                    + (iters - normal_iters)
+                    * iters_per_epoch
+                    * 1.0
+                    / iters_per_epoch_semi
+                )
+                / (total_iters - warmup_total_iters - no_aug_iters)
+            )
+        )
+    return lr
+
+
+def multistep_lr(lr, milestones, gamma, iters):
+    """MultiStep learning rate"""
+    for milestone in milestones:
+        lr *= gamma if iters >= milestone else 1.0
+    return lr
diff --git a/yolort/utils/metric.py b/yolort/utils/metric.py
new file mode 100644
index 00000000..f04013a3
--- /dev/null
+++ b/yolort/utils/metric.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+import functools
+import os
+import time
+from collections import defaultdict, deque
+import psutil
+
+import numpy as np
+
+import torch
+
+__all__ = [
+    "AverageMeter",
+    "MeterBuffer",
+    "get_total_and_free_memory_in_Mb",
+    "occupy_mem",
+    "gpu_mem_usage",
+    "mem_usage"
+]
+
+
+def get_total_and_free_memory_in_Mb(cuda_device):
+    devices_info_str = os.popen(
+        "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
+    )
+    devices_info = devices_info_str.read().strip().split("\n")
+    if "CUDA_VISIBLE_DEVICES" in os.environ:
+        visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',')
+        cuda_device = int(visible_devices[cuda_device])
+    total, used = devices_info[int(cuda_device)].split(",")
+    return int(total), int(used)
+
+
+def occupy_mem(cuda_device, mem_ratio=0.9):
+    """
+    pre-allocate gpu memory for training to avoid memory Fragmentation.
+    """
+    total, used = get_total_and_free_memory_in_Mb(cuda_device)
+    max_mem = int(total * mem_ratio)
+    block_mem = max_mem - used
+    x = torch.cuda.FloatTensor(256, 1024, block_mem)
+    del x
+    time.sleep(5)
+
+
+def gpu_mem_usage():
+    """
+    Compute the GPU memory usage for the current device (MB).
+    """
+    mem_usage_bytes = torch.cuda.max_memory_allocated()
+    return mem_usage_bytes / (1024 * 1024)
+
+
+def mem_usage():
+    """
+    Compute the memory usage for the current machine (GB).
+    """
+    gb = 1 << 30
+    mem = psutil.virtual_memory()
+    return mem.used / gb
+
+
+class AverageMeter:
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=50):
+        self._deque = deque(maxlen=window_size)
+        self._total = 0.0
+        self._count = 0
+
+    def update(self, value):
+        self._deque.append(value)
+        self._count += 1
+        self._total += value
+
+    @property
+    def median(self):
+        d = np.array(list(self._deque))
+        return np.median(d)
+
+    @property
+    def avg(self):
+        # if deque is empty, nan will be returned.
+        d = np.array(list(self._deque))
+        return d.mean()
+
+    @property
+    def global_avg(self):
+        return self._total / max(self._count, 1e-5)
+
+    @property
+    def latest(self):
+        return self._deque[-1] if len(self._deque) > 0 else None
+
+    @property
+    def total(self):
+        return self._total
+
+    def reset(self):
+        self._deque.clear()
+        self._total = 0.0
+        self._count = 0
+
+    def clear(self):
+        self._deque.clear()
+
+
+class MeterBuffer(defaultdict):
+    """Computes and stores the average and current value"""
+
+    def __init__(self, window_size=20):
+        factory = functools.partial(AverageMeter, window_size=window_size)
+        super().__init__(factory)
+
+    def reset(self):
+        for v in self.values():
+            v.reset()
+
+    def get_filtered_meter(self, filter_key="time"):
+        return {k: v for k, v in self.items() if filter_key in k}
+
+    def update(self, values=None, **kwargs):
+        if values is None:
+            values = {}
+        values.update(kwargs)
+        for k, v in values.items():
+            if isinstance(v, torch.Tensor):
+                v = v.detach()
+            self[k].update(v)
+
+    def clear_meters(self):
+        for v in self.values():
+            v.clear()
\ No newline at end of file
diff --git a/yolort/utils/model_utils.py b/yolort/utils/model_utils.py
new file mode 100644
index 00000000..0b848888
--- /dev/null
+++ b/yolort/utils/model_utils.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii Inc. All rights reserved.
+
+import contextlib
+from copy import deepcopy
+from typing import Sequence
+
+import torch
+import torch.nn as nn
+
+__all__ = [
+    "get_model_info",
+    "adjust_status",
+]
+
+
+def get_model_info(model: nn.Module, tsize: Sequence[int]) -> str:
+    from thop import profile
+
+    stride = 64
+    img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)
+    flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)
+    params /= 1e6
+    flops /= 1e9
+    flops *= tsize[0] * tsize[1] / stride / stride * 2  # Gflops
+    info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
+    return info
+
+
+@contextlib.contextmanager
+def adjust_status(module: nn.Module, training: bool = False) -> nn.Module:
+    """Adjust module to training/eval mode temporarily.
+
+    Args:
+        module (nn.Module): module to adjust status.
+        training (bool): training mode to set. True for train mode, False fro eval mode.
+
+    Examples:
+        >>> with adjust_status(model, training=False):
+        ...     model(data)
+    """
+    status = {}
+
+    def backup_status(module):
+        for m in module.modules():
+            # save prev status to dict
+            status[m] = m.training
+            m.training = training
+
+    def recover_status(module):
+        for m in module.modules():
+            # recover prev status from dict
+            m.training = status.pop(m)
+
+    backup_status(module)
+    yield module
+    recover_status(module)
\ No newline at end of file

From 9a245802fdfe70209c3fa50f7cdedd4cd2b36494 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Sep 2023 04:05:06 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 exps/default/yolov5l.py                  |   6 +-
 exps/default/yolov5m.py                  |   6 +-
 exps/default/yolov5m6.py                 |   6 +-
 exps/default/yolov5n.py                  |   6 +-
 exps/default/yolov5n6.py                 |   6 +-
 exps/default/yolov5s.py                  |   6 +-
 exps/default/yolov5s6.py                 |   6 +-
 exps/default/yolov5ts.py                 |   6 +-
 requirements.txt                         |   2 +-
 test/test_data_pipeline.py               |   9 +-
 test/test_trainer.py                     |  27 +++---
 tools/eval_metric.py                     |   2 +-
 yolort/data/__init__.py                  |   2 +-
 yolort/data/data_augment.py              |  12 ++-
 yolort/data/data_module.py               |   1 +
 yolort/data/dataloading.py               |   3 +-
 yolort/data/datasets/coco.py             |  10 +--
 yolort/data/datasets/datasets_wrapper.py |  56 ++++++------
 yolort/data/datasets/mosaicdetection.py  |  54 +++++-------
 yolort/data/samplers.py                  |   7 +-
 yolort/evaluators/__init__.py            |   2 +-
 yolort/evaluators/coco_evaluator.py      |  79 ++++++++---------
 yolort/exp/__init__.py                   |   2 +-
 yolort/exp/base_exp.py                   |  12 +--
 yolort/exp/default/__init__.py           |   1 -
 yolort/exp/yolox_base.py                 |  67 ++++++---------
 yolort/trainer/trainer.py                |  71 ++++++---------
 yolort/utils/__init__.py                 |   2 +-
 yolort/utils/allreduce_norm.py           |   6 +-
 yolort/utils/boxes.py                    |   4 +-
 yolort/utils/checkpoint.py               |   6 +-
 yolort/utils/dist.py                     |  37 +++-----
 yolort/utils/ema.py                      |   6 +-
 yolort/utils/logger.py                   | 105 +++++++++++------------
 yolort/utils/lr_scheduler.py             |  35 ++------
 yolort/utils/metric.py                   |   8 +-
 yolort/utils/model_utils.py              |   2 +-
 37 files changed, 292 insertions(+), 386 deletions(-)

diff --git a/exps/default/yolov5l.py b/exps/default/yolov5l.py
index b04d0f90..a838ae16 100644
--- a/exps/default/yolov5l.py
+++ b/exps/default/yolov5l.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5l'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5l"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/exps/default/yolov5m.py b/exps/default/yolov5m.py
index e33c2771..cd4ab778 100644
--- a/exps/default/yolov5m.py
+++ b/exps/default/yolov5m.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5m'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5m"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/exps/default/yolov5m6.py b/exps/default/yolov5m6.py
index 4ac71156..55c7e504 100644
--- a/exps/default/yolov5m6.py
+++ b/exps/default/yolov5m6.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5m6'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5m6"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/exps/default/yolov5n.py b/exps/default/yolov5n.py
index 72bf63e8..a36cb8e4 100644
--- a/exps/default/yolov5n.py
+++ b/exps/default/yolov5n.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5n'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5n"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/exps/default/yolov5n6.py b/exps/default/yolov5n6.py
index 3ac2cfd2..9cda7acc 100644
--- a/exps/default/yolov5n6.py
+++ b/exps/default/yolov5n6.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5n6'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5n6"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/exps/default/yolov5s.py b/exps/default/yolov5s.py
index 61736d25..00512d04 100644
--- a/exps/default/yolov5s.py
+++ b/exps/default/yolov5s.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5s'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5s"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/exps/default/yolov5s6.py b/exps/default/yolov5s6.py
index cda2a942..8b394afb 100644
--- a/exps/default/yolov5s6.py
+++ b/exps/default/yolov5s6.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5s6'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5s6"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/exps/default/yolov5ts.py b/exps/default/yolov5ts.py
index 365eab09..b71185df 100644
--- a/exps/default/yolov5ts.py
+++ b/exps/default/yolov5ts.py
@@ -15,6 +15,8 @@ def __init__(self):
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
     def get_model(self):
-        self.model = models.__dict__['yolov5ts'](upstream_version="r6.0",)
+        self.model = models.__dict__["yolov5ts"](
+            upstream_version="r6.0",
+        )
         self.model.train()
-        return self.model
\ No newline at end of file
+        return self.model
diff --git a/requirements.txt b/requirements.txt
index af814771..8fa37f38 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,4 +31,4 @@ pandas
 # pycocotools>=2.0.2  # corresponds to https://github.com/ppwwyyxx/cocoapi
 thop    # FLOPs computation
 loguru  # Python logging made (stupidly) simple
-Ninja   # a small build system with a focus on speed
\ No newline at end of file
+Ninja   # a small build system with a focus on speed
diff --git a/test/test_data_pipeline.py b/test/test_data_pipeline.py
index 4e626a81..7d61e9c4 100644
--- a/test/test_data_pipeline.py
+++ b/test/test_data_pipeline.py
@@ -1,17 +1,17 @@
 # Copyright (c) 2021, Zhiqiang Wang. All Rights Reserved.
+import sys
 from pathlib import Path
 
 import numpy as np
 import pytest
-import sys
+
 sys.path.append("../yolort")
 
 import torch
-from torch import Tensor
-from yolort.exp import Exp
+from torch import distributed as dist, Tensor
 from yolort.data import DataPrefetcher
+from yolort.exp import Exp
 from yolort.utils import contains_any_tensor
-from torch import distributed as dist
 
 
 def get_world_size() -> int:
@@ -58,6 +58,7 @@ def test_get_dataloader():
     assert len(targets) == batch_size
     assert isinstance(targets[0], Tensor)
 
+
 test_get_dataloader()
 
 
diff --git a/test/test_trainer.py b/test/test_trainer.py
index be1573c7..76251945 100644
--- a/test/test_trainer.py
+++ b/test/test_trainer.py
@@ -4,17 +4,17 @@
 import importlib
 
 import sys
+
 sys.path.append("../yolort/")
 
+
 def make_parser():
     parser = argparse.ArgumentParser("YOLOX train parser")
     parser.add_argument("-expn", "--experiment-name", type=str, default="yolov5n")
     parser.add_argument("-n", "--name", type=str, default="yolov5n", help="model name")
 
     # distributed
-    parser.add_argument(
-        "--dist-backend", default="nccl", type=str, help="distributed backend"
-    )
+    parser.add_argument("--dist-backend", default="nccl", type=str, help="distributed backend")
     parser.add_argument(
         "--dist-url",
         default=None,
@@ -22,9 +22,7 @@ def make_parser():
         help="url used to set up distributed training",
     )
     parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
-    parser.add_argument(
-        "-d", "--devices", default=None, type=int, help="device for training"
-    )
+    parser.add_argument("-d", "--devices", default=None, type=int, help="device for training")
     parser.add_argument(
         "-f",
         "--exp_file",
@@ -32,9 +30,7 @@ def make_parser():
         type=str,
         help="plz input your experiment description file",
     )
-    parser.add_argument(
-        "--resume", default=False, action="store_true", help="resume training"
-    )
+    parser.add_argument("--resume", default=False, action="store_true", help="resume training")
     parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
     parser.add_argument(
         "-e",
@@ -43,12 +39,8 @@ def make_parser():
         type=int,
         help="resume training start epoch",
     )
-    parser.add_argument(
-        "--num_machines", default=1, type=int, help="num of node for training"
-    )
-    parser.add_argument(
-        "--machine_rank", default=0, type=int, help="node rank for multi-node training"
-    )
+    parser.add_argument("--num_machines", default=1, type=int, help="num of node for training")
+    parser.add_argument("--machine_rank", default=0, type=int, help="node rank for multi-node training")
     parser.add_argument(
         "--fp16",
         dest="fp16",
@@ -77,7 +69,7 @@ def make_parser():
         type=str,
         help="Logger to be used for metrics. \
         Implemented loggers include `tensorboard` and `wandb`.",
-        default="tensorboard"
+        default="tensorboard",
     )
     parser.add_argument(
         "opts",
@@ -87,6 +79,7 @@ def make_parser():
     )
     return parser
 
+
 def test_training_step():
     args = make_parser().parse_args()
     module_name = ".".join(["yolort", "exp", "default", args.name])
@@ -96,9 +89,11 @@ def test_training_step():
     assert h % 32 == 0 and w % 32 == 0, "input size must be multiples of 32"
 
     from yolort.trainer import Trainer
+
     trainer = Trainer(exp, args)
     trainer.train()
 
+
 def test_test_epoch_end():
     args = make_parser().parse_args()
     module_name = ".".join(["yolort", "exp", "default", args.name])
diff --git a/tools/eval_metric.py b/tools/eval_metric.py
index 0538f0df..3b64632d 100644
--- a/tools/eval_metric.py
+++ b/tools/eval_metric.py
@@ -8,8 +8,8 @@
 import torchvision
 import yolort
 from yolort.data import _helper as data_helper
-from yolort.data.datasets.coco import COCODetection
 from yolort.data.coco_eval import COCOEvaluator
+from yolort.data.datasets.coco import COCODetection
 from yolort.data.transforms import collate_fn, default_val_transforms
 from yolort.utils.logger import MetricLogger
 
diff --git a/yolort/data/__init__.py b/yolort/data/__init__.py
index 5740093a..aeaf4f93 100644
--- a/yolort/data/__init__.py
+++ b/yolort/data/__init__.py
@@ -6,4 +6,4 @@
 from .data_prefetcher import DataPrefetcher
 from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed
 from .datasets import *
-from .samplers import InfiniteSampler, YoloBatchSampler
\ No newline at end of file
+from .samplers import InfiniteSampler, YoloBatchSampler
diff --git a/yolort/data/data_augment.py b/yolort/data/data_augment.py
index 4e53f6c2..3c35f7fd 100644
--- a/yolort/data/data_augment.py
+++ b/yolort/data/data_augment.py
@@ -39,7 +39,9 @@ def get_aug_params(value, center=0):
     else:
         raise ValueError(
             "Affine params should be either a sequence containing two values\
-             or single float values. Got {}".format(value)
+             or single float values. Got {}".format(
+                value
+            )
         )
 
 
@@ -95,9 +97,7 @@ def apply_affine_to_bboxes(targets, target_size, M, scale):
     corner_xs = corner_points[:, 0::2]
     corner_ys = corner_points[:, 1::2]
     new_bboxes = (
-        np.concatenate(
-            (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
-        )
+        np.concatenate((corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1)))
         .reshape(4, num_gts)
         .T
     )
@@ -203,9 +203,7 @@ def __call__(self, image, targets, input_dim):
 
         targets_t = np.hstack((labels_t, boxes_t))
         padded_labels = np.zeros((self.max_labels, 5))
-        padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
-            : self.max_labels
-        ]
+        padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[: self.max_labels]
         padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
         return image_t, padded_labels
 
diff --git a/yolort/data/data_module.py b/yolort/data/data_module.py
index d17d7327..c2bd3134 100644
--- a/yolort/data/data_module.py
+++ b/yolort/data/data_module.py
@@ -11,6 +11,7 @@
     from pytorch_lightning import LightningDataModule
 
 from yolort.data.datasets.coco import COCODetection
+
 from .transforms import collate_fn, default_train_transforms, default_val_transforms
 from .voc import VOCDetection
 
diff --git a/yolort/data/dataloading.py b/yolort/data/dataloading.py
index 6fecf3f0..cf805b21 100644
--- a/yolort/data/dataloading.py
+++ b/yolort/data/dataloading.py
@@ -9,8 +9,7 @@
 import numpy as np
 
 import torch
-from torch.utils.data.dataloader import DataLoader as torchDataLoader
-from torch.utils.data.dataloader import default_collate
+from torch.utils.data.dataloader import DataLoader as torchDataLoader, default_collate
 
 from .samplers import YoloBatchSampler
 
diff --git a/yolort/data/datasets/coco.py b/yolort/data/datasets/coco.py
index 5ac225a0..6aedce86 100644
--- a/yolort/data/datasets/coco.py
+++ b/yolort/data/datasets/coco.py
@@ -8,7 +8,7 @@
 import numpy as np
 from pycocotools.coco import COCO
 
-from .datasets_wrapper import CacheDataset, cache_read_img
+from .datasets_wrapper import cache_read_img, CacheDataset
 
 
 def remove_useless_info(coco):
@@ -79,7 +79,7 @@ def __init__(
             cache_dir_name=f"cache_{name}",
             path_filename=path_filename,
             cache=cache,
-            cache_type=cache_type
+            cache_type=cache_type,
         )
 
     def __len__(self):
@@ -118,11 +118,7 @@ def load_anno_from_ids(self, id_):
         img_info = (height, width)
         resized_info = (int(height * r), int(width * r))
 
-        file_name = (
-            im_ann["file_name"]
-            if "file_name" in im_ann
-            else "{:012}".format(id_) + ".jpg"
-        )
+        file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
 
         return (res, img_info, resized_info, file_name)
 
diff --git a/yolort/data/datasets/datasets_wrapper.py b/yolort/data/datasets/datasets_wrapper.py
index c45fe380..32f9b92f 100644
--- a/yolort/data/datasets/datasets_wrapper.py
+++ b/yolort/data/datasets/datasets_wrapper.py
@@ -9,14 +9,13 @@
 from abc import ABCMeta, abstractmethod
 from functools import partial, wraps
 from multiprocessing.pool import ThreadPool
-import psutil
-from loguru import logger
-from tqdm import tqdm
 
 import numpy as np
+import psutil
+from loguru import logger
 
-from torch.utils.data.dataset import ConcatDataset as torchConcatDataset
-from torch.utils.data.dataset import Dataset as torchDataset
+from torch.utils.data.dataset import ConcatDataset as torchConcatDataset, Dataset as torchDataset
+from tqdm import tqdm
 
 
 class ConcatDataset(torchConcatDataset):
@@ -29,9 +28,7 @@ def __init__(self, datasets):
     def pull_item(self, idx):
         if idx < 0:
             if -idx > len(self):
-                raise ValueError(
-                    "absolute value of index should not exceed dataset length"
-                )
+                raise ValueError("absolute value of index should not exceed dataset length")
             idx = len(self) + idx
         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
         if dataset_idx == 0:
@@ -54,9 +51,7 @@ def __getitem__(self, index):
             idx = index[1]
         if idx < 0:
             if -idx > len(self):
-                raise ValueError(
-                    "absolute value of index should not exceed dataset length"
-                )
+                raise ValueError("absolute value of index should not exceed dataset length")
             idx = len(self) + idx
         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
         if dataset_idx == 0:
@@ -70,7 +65,7 @@ def __getitem__(self, index):
 
 
 class Dataset(torchDataset):
-    """ This class is a subclass of the base :class:`torch.utils.data.Dataset`,
+    """This class is a subclass of the base :class:`torch.utils.data.Dataset`,
     that enables on the fly resizing of the ``input_dim``.
 
     Args:
@@ -125,7 +120,7 @@ def wrapper(self, index):
 
 
 class CacheDataset(Dataset, metaclass=ABCMeta):
-    """ This class is a subclass of the base :class:`yolox.data.datasets.Dataset`,
+    """This class is a subclass of the base :class:`yolox.data.datasets.Dataset`,
     that enables cache images to ram or disk.
 
     Args:
@@ -196,8 +191,9 @@ def cache_images(
     ):
         assert num_imgs is not None, "num_imgs must be specified as the size of the dataset"
         if self.cache_type == "disk":
-            assert (data_dir and cache_dir_name and path_filename) is not None, \
-                "data_dir, cache_name and path_filename must be specified if cache_type is disk"
+            assert (
+                data_dir and cache_dir_name and path_filename
+            ) is not None, "data_dir, cache_name and path_filename must be specified if cache_type is disk"
             self.path_filename = path_filename
 
         mem = psutil.virtual_memory()
@@ -216,10 +212,10 @@ def cache_images(
                 )
 
         if self.cache and self.imgs is None:
-            if self.cache_type == 'ram':
+            if self.cache_type == "ram":
                 self.imgs = [None] * num_imgs
                 logger.info("You are using cached images in RAM to accelerate training!")
-            else:   # 'disk'
+            else:  # 'disk'
                 if not os.path.exists(self.cache_dir):
                     os.mkdir(self.cache_dir)
                     logger.warning(
@@ -234,29 +230,22 @@ def cache_images(
                     logger.info(f"Found disk cache at {self.cache_dir}")
                     return
 
-            logger.info(
-                "Caching images...\n"
-                "This might take some time for your dataset"
-            )
+            logger.info("Caching images...\n" "This might take some time for your dataset")
 
             num_threads = min(8, max(1, os.cpu_count() - 1))
             b = 0
-            load_imgs = ThreadPool(num_threads).imap(
-                partial(self.read_img, use_cache=False),
-                range(num_imgs)
-            )
+            load_imgs = ThreadPool(num_threads).imap(partial(self.read_img, use_cache=False), range(num_imgs))
             pbar = tqdm(enumerate(load_imgs), total=num_imgs)
-            for i, x in pbar:   # x = self.read_img(self, i, use_cache=False)
-                if self.cache_type == 'ram':
+            for i, x in pbar:  # x = self.read_img(self, i, use_cache=False)
+                if self.cache_type == "ram":
                     self.imgs[i] = x
-                else:   # 'disk'
+                else:  # 'disk'
                     cache_filename = f'{self.path_filename[i].split(".")[0]}.npy'
                     cache_path_filename = os.path.join(self.cache_dir, cache_filename)
                     os.makedirs(os.path.dirname(cache_path_filename), exist_ok=True)
                     np.save(cache_path_filename, x)
                 b += x.nbytes
-                pbar.desc = \
-                    f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})'
+                pbar.desc = f"Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})"
             pbar.close()
 
     def cal_cache_occupy(self, num_imgs):
@@ -280,6 +269,7 @@ def decorator(read_img_fn):
                 whether to read the image from cache.
                 Defaults to True.
         """
+
         @wraps(read_img_fn)
         def wrapper(self, index, use_cache=use_cache):
             cache = self.cache and use_cache
@@ -289,12 +279,14 @@ def wrapper(self, index, use_cache=use_cache):
                     img = copy.deepcopy(img)
                 elif self.cache_type == "disk":
                     img = np.load(
-                        os.path.join(
-                            self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy"))
+                        os.path.join(self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy")
+                    )
                 else:
                     raise ValueError(f"Unknown cache type: {self.cache_type}")
             else:
                 img = read_img_fn(self, index)
             return img
+
         return wrapper
+
     return decorator
diff --git a/yolort/data/datasets/mosaicdetection.py b/yolort/data/datasets/mosaicdetection.py
index ba11cfdc..7f3b5f75 100644
--- a/yolort/data/datasets/mosaicdetection.py
+++ b/yolort/data/datasets/mosaicdetection.py
@@ -38,10 +38,20 @@ class MosaicDetection(Dataset):
     """Detection dataset wrapper that performs mixup for normal dataset."""
 
     def __init__(
-        self, dataset, img_size, mosaic=True, preproc=None,
-        degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
-        mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,
-        mosaic_prob=1.0, mixup_prob=1.0, *args
+        self,
+        dataset,
+        img_size,
+        mosaic=True,
+        preproc=None,
+        degrees=10.0,
+        translate=0.1,
+        mosaic_scale=(0.5, 1.5),
+        mixup_scale=(0.5, 1.5),
+        shear=2.0,
+        enable_mixup=True,
+        mosaic_prob=1.0,
+        mixup_prob=1.0,
+        *args,
     ):
         """
 
@@ -92,10 +102,8 @@ def __getitem__(self, idx):
             for i_mosaic, index in enumerate(indices):
                 img, _labels, _, img_id = self._dataset.pull_item(index)
                 h0, w0 = img.shape[:2]  # orig hw
-                scale = min(1. * input_h / h0, 1. * input_w / w0)
-                img = cv2.resize(
-                    img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR
-                )
+                scale = min(1.0 * input_h / h0, 1.0 * input_w / w0)
+                img = cv2.resize(img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR)
                 # generate output mosaic image
                 (h, w, c) = img.shape[:3]
                 if i_mosaic == 0:
@@ -138,11 +146,7 @@ def __getitem__(self, idx):
             # -----------------------------------------------------------------
             # CopyPaste: https://arxiv.org/abs/2012.07177
             # -----------------------------------------------------------------
-            if (
-                self.enable_mixup
-                and not len(mosaic_labels) == 0
-                and random.random() < self.mixup_prob
-            ):
+            if self.enable_mixup and not len(mosaic_labels) == 0 and random.random() < self.mixup_prob:
                 mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)
             mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)
             img_info = (mix_img.shape[1], mix_img.shape[0])
@@ -180,9 +184,7 @@ def mixup(self, origin_img, origin_labels, input_dim):
             interpolation=cv2.INTER_LINEAR,
         )
 
-        cp_img[
-            : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)
-        ] = resized_img
+        cp_img[: int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)] = resized_img
 
         cp_img = cv2.resize(
             cp_img,
@@ -195,9 +197,7 @@ def mixup(self, origin_img, origin_labels, input_dim):
 
         origin_h, origin_w = cp_img.shape[:2]
         target_h, target_w = origin_img.shape[:2]
-        padded_img = np.zeros(
-            (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
-        )
+        padded_img = np.zeros((max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8)
         padded_img[:origin_h, :origin_w] = cp_img
 
         x_offset, y_offset = 0, 0
@@ -205,24 +205,16 @@ def mixup(self, origin_img, origin_labels, input_dim):
             y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
         if padded_img.shape[1] > target_w:
             x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
-        padded_cropped_img = padded_img[
-            y_offset: y_offset + target_h, x_offset: x_offset + target_w
-        ]
+        padded_cropped_img = padded_img[y_offset : y_offset + target_h, x_offset : x_offset + target_w]
 
         cp_bboxes_origin_np = adjust_box_anns(
             cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h
         )
         if FLIP:
-            cp_bboxes_origin_np[:, 0::2] = (
-                origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1]
-            )
+            cp_bboxes_origin_np[:, 0::2] = origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1]
         cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
-        cp_bboxes_transformed_np[:, 0::2] = np.clip(
-            cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w
-        )
-        cp_bboxes_transformed_np[:, 1::2] = np.clip(
-            cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
-        )
+        cp_bboxes_transformed_np[:, 0::2] = np.clip(cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w)
+        cp_bboxes_transformed_np[:, 1::2] = np.clip(cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h)
 
         cls_labels = cp_labels[:, 4:5].copy()
         box_labels = cp_bboxes_transformed_np
diff --git a/yolort/data/samplers.py b/yolort/data/samplers.py
index 6b7ea38d..b08b3d68 100644
--- a/yolort/data/samplers.py
+++ b/yolort/data/samplers.py
@@ -7,8 +7,7 @@
 
 import torch
 import torch.distributed as dist
-from torch.utils.data.sampler import BatchSampler as torchBatchSampler
-from torch.utils.data.sampler import Sampler
+from torch.utils.data.sampler import BatchSampler as torchBatchSampler, Sampler
 
 
 class YoloBatchSampler(torchBatchSampler):
@@ -68,9 +67,7 @@ def __init__(
 
     def __iter__(self):
         start = self._rank
-        yield from itertools.islice(
-            self._infinite_indices(), start, None, self._world_size
-        )
+        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
 
     def _infinite_indices(self):
         g = torch.Generator()
diff --git a/yolort/evaluators/__init__.py b/yolort/evaluators/__init__.py
index fc0b6875..83b5a9f1 100644
--- a/yolort/evaluators/__init__.py
+++ b/yolort/evaluators/__init__.py
@@ -2,4 +2,4 @@
 # -*- coding:utf-8 -*-
 # Copyright (c) Megvii, Inc. and its affiliates.
 
-from .coco_evaluator import COCOEvaluator
\ No newline at end of file
+from .coco_evaluator import COCOEvaluator
diff --git a/yolort/evaluators/coco_evaluator.py b/yolort/evaluators/coco_evaluator.py
index a97c6d41..75e79aa2 100644
--- a/yolort/evaluators/coco_evaluator.py
+++ b/yolort/evaluators/coco_evaluator.py
@@ -9,23 +9,16 @@
 import tempfile
 import time
 from collections import ChainMap, defaultdict
-from loguru import logger
-from tabulate import tabulate
-from tqdm import tqdm
 
 import numpy as np
 
 import torch
+from loguru import logger
+from tabulate import tabulate
+from tqdm import tqdm
 
 from yolort.data.datasets import COCO_CLASSES
-from yolort.utils import (
-    gather,
-    is_main_process,
-    postprocess,
-    synchronize,
-    time_synchronized,
-    xyxy2xywh
-)
+from yolort.utils import gather, is_main_process, postprocess, synchronize, time_synchronized, xyxy2xywh
 
 
 def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AR"], colums=6):
@@ -46,7 +39,11 @@ def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "A
     row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
     table_headers = headers * (num_cols // len(headers))
     table = tabulate(
-        row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+        row_pair,
+        tablefmt="pipe",
+        floatfmt=".3f",
+        headers=table_headers,
+        numalign="left",
     )
     return table
 
@@ -71,7 +68,11 @@ def per_class_AP_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "A
     row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])
     table_headers = headers * (num_cols // len(headers))
     table = tabulate(
-        row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left",
+        row_pair,
+        tablefmt="pipe",
+        floatfmt=".3f",
+        headers=table_headers,
+        numalign="left",
     )
     return table
 
@@ -114,8 +115,14 @@ def __init__(
         self.per_class_AR = per_class_AR
 
     def evaluate(
-        self, model, distributed=False, half=False, trt_file=None,
-        decoder=None, test_size=None, return_outputs=False
+        self,
+        model,
+        distributed=False,
+        half=False,
+        trt_file=None,
+        decoder=None,
+        test_size=None,
+        return_outputs=False,
     ):
         """
         COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -155,9 +162,7 @@ def evaluate(
             model(x)
             model = model_trt
 
-        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
-            progress_bar(self.dataloader)
-        ):
+        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(progress_bar(self.dataloader)):
             with torch.no_grad():
                 imgs = imgs.type(tensor_type)
 
@@ -174,15 +179,14 @@ def evaluate(
                     infer_end = time_synchronized()
                     inference_time += infer_end - start
 
-                outputs = postprocess(
-                    outputs, self.num_classes, self.confthre, self.nmsthre
-                )
+                outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
                 if is_time_record:
                     nms_end = time_synchronized()
                     nms_time += nms_end - infer_end
 
             data_list_elem, image_wise_data = self.convert_to_coco_format(
-                outputs, info_imgs, ids, return_outputs=True)
+                outputs, info_imgs, ids, return_outputs=True
+            )
             data_list.extend(data_list_elem)
             output_data.update(image_wise_data)
 
@@ -207,9 +211,7 @@ def evaluate(
     def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False):
         data_list = []
         image_wise_data = defaultdict(dict)
-        for (output, img_h, img_w, img_id) in zip(
-            outputs, info_imgs[0], info_imgs[1], ids
-        ):
+        for (output, img_h, img_w, img_id) in zip(outputs, info_imgs[0], info_imgs[1], ids):
             if output is None:
                 continue
             output = output.cpu()
@@ -217,23 +219,22 @@ def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False):
             bboxes = output[:, 0:4]
 
             # preprocessing: resize
-            scale = min(
-                self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)
-            )
+            scale = min(self.img_size[0] / float(img_h), self.img_size[1] / float(img_w))
             bboxes /= scale
             cls = output[:, 6]
             scores = output[:, 4] * output[:, 5]
 
-            image_wise_data.update({
-                int(img_id): {
-                    "bboxes": [box.numpy().tolist() for box in bboxes],
-                    "scores": [score.numpy().item() for score in scores],
-                    "categories": [
-                        self.dataloader.dataset.class_ids[int(cls[ind])]
-                        for ind in range(bboxes.shape[0])
-                    ],
+            image_wise_data.update(
+                {
+                    int(img_id): {
+                        "bboxes": [box.numpy().tolist() for box in bboxes],
+                        "scores": [score.numpy().item() for score in scores],
+                        "categories": [
+                            self.dataloader.dataset.class_ids[int(cls[ind])] for ind in range(bboxes.shape[0])
+                        ],
+                    }
                 }
-            })
+            )
 
             bboxes = xyxy2xywh(bboxes)
 
@@ -305,7 +306,7 @@ def evaluate_prediction(self, data_dict, statistics):
                 cocoEval.summarize()
             info += redirect_string.getvalue()
             cat_ids = list(cocoGt.cats.keys())
-            cat_names = [cocoGt.cats[catId]['name'] for catId in sorted(cat_ids)]
+            cat_names = [cocoGt.cats[catId]["name"] for catId in sorted(cat_ids)]
             if self.per_class_AP:
                 AP_table = per_class_AP_table(cocoEval, class_names=cat_names)
                 info += "per class AP:\n" + AP_table + "\n"
@@ -314,4 +315,4 @@ def evaluate_prediction(self, data_dict, statistics):
                 info += "per class AR:\n" + AR_table + "\n"
             return cocoEval.stats[0], cocoEval.stats[1], info
         else:
-            return 0, 0, info
\ No newline at end of file
+            return 0, 0, info
diff --git a/yolort/exp/__init__.py b/yolort/exp/__init__.py
index d7de27c8..94b059ce 100644
--- a/yolort/exp/__init__.py
+++ b/yolort/exp/__init__.py
@@ -2,4 +2,4 @@
 # Copyright (c) Megvii Inc. All rights reserved.
 
 from .base_exp import BaseExp
-from .yolox_base import Exp
\ No newline at end of file
+from .yolox_base import Exp
diff --git a/yolort/exp/base_exp.py b/yolort/exp/base_exp.py
index c0ae45fe..41506546 100644
--- a/yolort/exp/base_exp.py
+++ b/yolort/exp/base_exp.py
@@ -5,9 +5,9 @@
 import pprint
 from abc import ABCMeta, abstractmethod
 from typing import Dict, List, Tuple
-from tabulate import tabulate
 
 import torch
+from tabulate import tabulate
 from torch.nn import Module
 
 from yolort.utils import LRScheduler
@@ -42,9 +42,7 @@ def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
         pass
 
     @abstractmethod
-    def get_lr_scheduler(
-        self, lr: float, iters_per_epoch: int, **kwargs
-    ) -> LRScheduler:
+    def get_lr_scheduler(self, lr: float, iters_per_epoch: int, **kwargs) -> LRScheduler:
         pass
 
     @abstractmethod
@@ -57,11 +55,7 @@ def eval(self, model, evaluator, weights):
 
     def __repr__(self):
         table_header = ["keys", "values"]
-        exp_table = [
-            (str(k), pprint.pformat(v))
-            for k, v in vars(self).items()
-            if not k.startswith("_")
-        ]
+        exp_table = [(str(k), pprint.pformat(v)) for k, v in vars(self).items() if not k.startswith("_")]
         return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
 
     def merge(self, cfg_list):
diff --git a/yolort/exp/default/__init__.py b/yolort/exp/default/__init__.py
index 1f361d78..b439cbde 100644
--- a/yolort/exp/default/__init__.py
+++ b/yolort/exp/default/__init__.py
@@ -15,7 +15,6 @@
     # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
 
     class _ExpFinder(abc.MetaPathFinder):
-        
         def find_spec(self, name, path, target=None):
             if not name.startswith("yolort.exp.default"):
                 return
diff --git a/yolort/exp/yolox_base.py b/yolort/exp/yolox_base.py
index f3147743..c46af070 100644
--- a/yolort/exp/yolox_base.py
+++ b/yolort/exp/yolox_base.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 # Copyright (c) Megvii Inc. All rights reserved.
 
+import logging
 import os
 import random
-import logging
-from zipfile import ZipFile
 from pathlib import Path, PosixPath
+from zipfile import ZipFile
 
 import torch
 import torch.distributed as dist
@@ -114,7 +114,9 @@ def __init__(self):
     def get_model(self):
         import yolort.models as models
 
-        self.model = models.__dict__['yolov5n'](upstream_version="r6.0", )
+        self.model = models.__dict__["yolov5n"](
+            upstream_version="r6.0",
+        )
         self.model.train()
         return self.model
 
@@ -136,25 +138,17 @@ def get_dataset(self, data_root: str, mode: str = "val", cache: bool = False, ca
                 data_dir=self.data_dir,
                 json_file=self.train_ann,
                 img_size=self.input_size,
-                preproc=TrainTransform(
-                    max_labels=50,
-                    flip_prob=self.flip_prob,
-                    hsv_prob=self.hsv_prob
-                ),
+                preproc=TrainTransform(max_labels=50, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob),
                 cache=cache,
                 cache_type=cache_type,
             )
         elif mode == "val":
-            """ TODO """
+            """TODO"""
             dataset = COCODataset(
                 data_dir=self.data_dir,
                 json_file=self.train_ann,
                 img_size=self.input_size,
-                preproc=TrainTransform(
-                    max_labels=50,
-                    flip_prob=self.flip_prob,
-                    hsv_prob=self.hsv_prob
-                ),
+                preproc=TrainTransform(max_labels=50, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob),
                 cache=cache,
                 cache_type=cache_type,
             )
@@ -174,12 +168,12 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s
                 None: Do not use cache, in this case cache_data is also None.
         """
         from yolort.data import (
-            TrainTransform,
-            YoloBatchSampler,
             DataLoader,
             InfiniteSampler,
             MosaicDetection,
+            TrainTransform,
             worker_init_reset_seed,
+            YoloBatchSampler,
         )
         from yolort.utils import wait_for_the_master
 
@@ -187,18 +181,16 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s
         # else we will create dataset after launch
         if self.dataset is None:
             with wait_for_the_master():
-                assert cache_img is None, \
-                    "cache_img must be None if you didn't create dataset before launch"
-                self.dataset = self.get_dataset(data_root="data-bin", mode="train", cache=False, cache_type=cache_img)
+                assert cache_img is None, "cache_img must be None if you didn't create dataset before launch"
+                self.dataset = self.get_dataset(
+                    data_root="data-bin", mode="train", cache=False, cache_type=cache_img
+                )
 
         self.dataset = MosaicDetection(
             dataset=self.dataset,
             mosaic=not no_aug,
             img_size=self.input_size,
-            preproc=TrainTransform(
-                max_labels=120,
-                flip_prob=self.flip_prob,
-                hsv_prob=self.hsv_prob),
+            preproc=TrainTransform(max_labels=120, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob),
             degrees=self.degrees,
             translate=self.translate,
             mosaic_scale=self.mosaic_scale,
@@ -232,9 +224,10 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s
 
         return train_loader
 
-    def prepare_coco128(self,
-            data_path: PosixPath,
-            dirname: str = "coco128",
+    def prepare_coco128(
+        self,
+        data_path: PosixPath,
+        dirname: str = "coco128",
     ) -> None:
         """
         Prepare coco128 dataset to test.
@@ -266,7 +259,7 @@ def random_resize(self, data_loader, epoch, rank, is_distributed):
 
         if rank == 0:
             size_factor = self.input_size[1] * 1.0 / self.input_size[0]
-            if not hasattr(self, 'random_size'):
+            if not hasattr(self, "random_size"):
                 min_size = int(self.input_size[0] / 32) - self.multiscale_range
                 max_size = int(self.input_size[0] / 32) + self.multiscale_range
                 self.random_size = (min_size, max_size)
@@ -286,9 +279,7 @@ def preprocess(self, inputs, targets, tsize):
         scale_y = tsize[0] / self.input_size[0]
         scale_x = tsize[1] / self.input_size[1]
         if scale_x != 1 or scale_y != 1:
-            inputs = nn.functional.interpolate(
-                inputs, size=tsize, mode="bilinear", align_corners=False
-            )
+            inputs = nn.functional.interpolate(inputs, size=tsize, mode="bilinear", align_corners=False)
             targets[..., 1::2] = targets[..., 1::2] * scale_x
             targets[..., 2::2] = targets[..., 2::2] * scale_y
         return inputs, targets
@@ -310,9 +301,7 @@ def get_optimizer(self, batch_size):
                 elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
                     pg1.append(v.weight)  # apply decay
 
-            optimizer = torch.optim.SGD(
-                pg0, lr=lr, momentum=self.momentum, nesterov=True
-            )
+            optimizer = torch.optim.SGD(pg0, lr=lr, momentum=self.momentum, nesterov=True)
             optimizer.add_param_group(
                 {"params": pg1, "weight_decay": self.weight_decay}
             )  # add pg1 with weight_decay
@@ -338,12 +327,13 @@ def get_lr_scheduler(self, lr, iters_per_epoch):
 
     def get_eval_dataset(self, **kwargs):
         from yolort.data import COCODataset, ValTransform
+
         testdev = kwargs.get("testdev", False)
         legacy = kwargs.get("legacy", False)
 
         return COCODataset(
             data_dir=self.data_dir,
-            json_file=self.train_ann,                          # 这里需要改为
+            json_file=self.train_ann,  # 这里需要改为
             name="train2017" if not testdev else "train2017",  # 测试数据
             img_size=self.test_size,
             preproc=ValTransform(legacy=legacy),
@@ -354,9 +344,7 @@ def get_eval_loader(self, batch_size, is_distributed, **kwargs):
 
         if is_distributed:
             batch_size = batch_size // dist.get_world_size()
-            sampler = torch.utils.data.distributed.DistributedSampler(
-                valdataset, shuffle=False
-            )
+            sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
         else:
             sampler = torch.utils.data.SequentialSampler(valdataset)
 
@@ -374,8 +362,7 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False)
         from yolort.evaluators import COCOEvaluator
 
         return COCOEvaluator(
-            dataloader=self.get_eval_loader(batch_size, is_distributed,
-                                            testdev=testdev, legacy=legacy),
+            dataloader=self.get_eval_loader(batch_size, is_distributed, testdev=testdev, legacy=legacy),
             img_size=self.test_size,
             confthre=self.test_conf,
             nmsthre=self.nmsthre,
@@ -384,4 +371,4 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False)
         )
 
     def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False):
-        return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)
\ No newline at end of file
+        return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)
diff --git a/yolort/trainer/trainer.py b/yolort/trainer/trainer.py
index 28f1fbe1..aeb418db 100644
--- a/yolort/trainer/trainer.py
+++ b/yolort/trainer/trainer.py
@@ -4,18 +4,15 @@
 import datetime
 import os
 import time
-from loguru import logger
 
 import torch
+from loguru import logger
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.utils.tensorboard import SummaryWriter
 
 from yolort.data import DataPrefetcher
 from yolort.exp import Exp
 from yolort.utils import (
-    MeterBuffer,
-    ModelEMA,
-    WandbLogger,
     adjust_status,
     all_reduce_norm,
     get_local_rank,
@@ -26,14 +23,18 @@
     is_parallel,
     load_ckpt,
     mem_usage,
+    MeterBuffer,
+    ModelEMA,
     occupy_mem,
     save_checkpoint,
     setup_logger,
-    synchronize
+    synchronize,
+    WandbLogger,
 )
 
 __all__ = ["Trainer"]
 
+
 class Trainer:
     def __init__(self, exp: Exp, args):
         # init function only defines some basic attr, other attrs like model, optimizer are built in
@@ -48,7 +49,7 @@ def __init__(self, exp: Exp, args):
         self.is_distributed = get_world_size() > 1
         self.rank = get_rank()
         self.local_rank = get_local_rank()
-        self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else 'cpu'
+        self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else "cpu"
         self.use_model_ema = exp.ema
         self.save_history_ckpt = exp.save_history_ckpt
 
@@ -132,12 +133,10 @@ def before_train(self):
         logger.info("exp value:\n{}".format(self.exp))
 
         # model related init
-        if self.device != 'cpu':
+        if self.device != "cpu":
             torch.cuda.set_device(self.local_rank)
         model = self.exp.get_model()
-        logger.info(
-            "Model Summary: {}".format(get_model_info(model, self.exp.test_size))
-        )
+        logger.info("Model Summary: {}".format(get_model_info(model, self.exp.test_size)))
         model.to(self.device)
 
         # solver related init
@@ -183,9 +182,7 @@ def before_train(self):
                 self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
             elif self.args.logger == "wandb":
                 self.wandb_logger = WandbLogger.initialize_wandb_logger(
-                    self.args,
-                    self.exp,
-                    self.evaluator.dataloader.dataset
+                    self.args, self.exp, self.evaluator.dataloader.dataset
                 )
             else:
                 raise ValueError("logger must be either 'tensorboard' or 'wandb'")
@@ -194,9 +191,7 @@ def before_train(self):
         logger.info("\n{}".format(model))
 
     def after_train(self):
-        logger.info(
-            "Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100)
-        )
+        logger.info("Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100))
         if self.rank == 0:
             if self.args.logger == "wandb":
                 self.wandb_logger.finish()
@@ -243,14 +238,10 @@ def after_iter(self):
                 self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter
             )
             loss_meter = self.meter.get_filtered_meter("loss")
-            loss_str = ", ".join(
-                ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
-            )
+            loss_str = ", ".join(["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()])
 
             time_meter = self.meter.get_filtered_meter("time")
-            time_str = ", ".join(
-                ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
-            )
+            time_str = ", ".join(["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()])
 
             mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage())
 
@@ -267,16 +258,12 @@ def after_iter(self):
 
             if self.rank == 0:
                 if self.args.logger == "tensorboard":
-                    self.tblogger.add_scalar(
-                        "train/lr", self.meter["lr"].latest, self.progress_in_iter)
+                    self.tblogger.add_scalar("train/lr", self.meter["lr"].latest, self.progress_in_iter)
                     for k, v in loss_meter.items():
-                        self.tblogger.add_scalar(
-                            f"train/{k}", v.latest, self.progress_in_iter)
+                        self.tblogger.add_scalar(f"train/{k}", v.latest, self.progress_in_iter)
                 if self.args.logger == "wandb":
                     metrics = {"train/" + k: v.latest for k, v in loss_meter.items()}
-                    metrics.update({
-                        "train/lr": self.meter["lr"].latest
-                    })
+                    metrics.update({"train/lr": self.meter["lr"].latest})
                     self.wandb_logger.log_metrics(metrics, step=self.progress_in_iter)
 
             self.meter.clear_meters()
@@ -306,15 +293,11 @@ def resume_train(self, model):
             self.best_ap = ckpt.pop("best_ap", 0)
             # resume the training states variables
             start_epoch = (
-                self.args.start_epoch - 1
-                if self.args.start_epoch is not None
-                else ckpt["start_epoch"]
+                self.args.start_epoch - 1 if self.args.start_epoch is not None else ckpt["start_epoch"]
             )
             self.start_epoch = start_epoch
             logger.info(
-                "loaded checkpoint '{}' (epoch {})".format(
-                    self.args.resume, self.start_epoch
-                )
+                "loaded checkpoint '{}' (epoch {})".format(self.args.resume, self.start_epoch)
             )  # noqa
         else:
             if self.args.ckpt is not None:
@@ -347,11 +330,13 @@ def evaluate_and_save_model(self):
                 self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
                 self.tblogger.add_scalar("val/COCOAP50_95", ap50_95, self.epoch + 1)
             if self.args.logger == "wandb":
-                self.wandb_logger.log_metrics({
-                    "val/COCOAP50": ap50,
-                    "val/COCOAP50_95": ap50_95,
-                    "train/epoch": self.epoch + 1,
-                })
+                self.wandb_logger.log_metrics(
+                    {
+                        "val/COCOAP50": ap50,
+                        "val/COCOAP50_95": ap50_95,
+                        "train/epoch": self.epoch + 1,
+                    }
+                )
                 self.wandb_logger.log_images(predictions)
             logger.info("\n" + summary)
         synchronize()
@@ -387,6 +372,6 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False, ap=None):
                         "epoch": self.epoch + 1,
                         "optimizer": self.optimizer.state_dict(),
                         "best_ap": self.best_ap,
-                        "curr_ap": ap
-                    }
-                )
\ No newline at end of file
+                        "curr_ap": ap,
+                    },
+                )
diff --git a/yolort/utils/__init__.py b/yolort/utils/__init__.py
index cf4c00b0..ee0c373f 100644
--- a/yolort/utils/__init__.py
+++ b/yolort/utils/__init__.py
@@ -19,7 +19,7 @@
 from .checkpoint import load_ckpt, save_checkpoint
 from .dist import *
 from .ema import *
-from .logger import WandbLogger, setup_logger
+from .logger import setup_logger, WandbLogger
 from .lr_scheduler import LRScheduler
 from .metric import *
 from .model_utils import *
diff --git a/yolort/utils/allreduce_norm.py b/yolort/utils/allreduce_norm.py
index 142c76c7..71881952 100644
--- a/yolort/utils/allreduce_norm.py
+++ b/yolort/utils/allreduce_norm.py
@@ -6,8 +6,7 @@
 from collections import OrderedDict
 
 import torch
-from torch import distributed as dist
-from torch import nn
+from torch import distributed as dist, nn
 
 from .dist import _get_global_gloo_group, get_world_size
 
@@ -88,8 +87,7 @@ def all_reduce(py_dict, op="sum", group=None):
         flatten_tensor /= world_size
 
     split_tensors = [
-        x.reshape(shape)
-        for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
+        x.reshape(shape) for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
     ]
     return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})
 
diff --git a/yolort/utils/boxes.py b/yolort/utils/boxes.py
index a8eaf3f4..7cffcd99 100644
--- a/yolort/utils/boxes.py
+++ b/yolort/utils/boxes.py
@@ -44,7 +44,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
         if not image_pred.size(0):
             continue
         # Get score and class with highest confidence
-        class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
+        class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
 
         conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
         # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
@@ -140,4 +140,4 @@ def cxcywh2xyxy(bboxes):
     bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5
     bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
     bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
-    return bboxes
\ No newline at end of file
+    return bboxes
diff --git a/yolort/utils/checkpoint.py b/yolort/utils/checkpoint.py
index a0c200e4..d7dbe56e 100644
--- a/yolort/utils/checkpoint.py
+++ b/yolort/utils/checkpoint.py
@@ -3,9 +3,9 @@
 # Copyright (c) Megvii Inc. All rights reserved.
 import os
 import shutil
-from loguru import logger
 
 import torch
+from loguru import logger
 
 
 def load_ckpt(model, ckpt):
@@ -14,9 +14,7 @@ def load_ckpt(model, ckpt):
     for key_model, v in model_state_dict.items():
         if key_model not in ckpt:
             logger.warning(
-                "{} is not in the ckpt. Please double check and see if this is desired.".format(
-                    key_model
-                )
+                "{} is not in the ckpt. Please double check and see if this is desired.".format(key_model)
             )
             continue
         v_ckpt = ckpt[key_model]
diff --git a/yolort/utils/dist.py b/yolort/utils/dist.py
index a4b46801..1485c88b 100644
--- a/yolort/utils/dist.py
+++ b/yolort/utils/dist.py
@@ -14,11 +14,11 @@
 import pickle
 import time
 from contextlib import contextmanager
-from loguru import logger
 
 import numpy as np
 
 import torch
+from loguru import logger
 from torch import distributed as dist
 
 __all__ = [
@@ -39,9 +39,9 @@
 
 
 def get_num_devices():
-    gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None)
+    gpu_list = os.getenv("CUDA_VISIBLE_DEVICES", None)
     if gpu_list is not None:
-        return len(gpu_list.split(','))
+        return len(gpu_list.split(","))
     else:
         devices_list_info = os.popen("nvidia-smi -L")
         devices_list_info = devices_list_info.read().strip().split("\n")
@@ -151,10 +151,10 @@ def _serialize_to_tensor(data, group):
     device = torch.device("cpu" if backend == "gloo" else "cuda")
 
     buffer = pickle.dumps(data)
-    if len(buffer) > 1024 ** 3:
+    if len(buffer) > 1024**3:
         logger.warning(
             "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
-                get_rank(), len(buffer) / (1024 ** 3), device
+                get_rank(), len(buffer) / (1024**3), device
             )
         )
     storage = torch.ByteStorage.from_buffer(buffer)
@@ -169,14 +169,9 @@ def _pad_to_largest_tensor(tensor, group):
         Tensor: padded tensor that has the max size
     """
     world_size = dist.get_world_size(group=group)
-    assert (
-        world_size >= 1
-    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    assert world_size >= 1, "comm.gather/all_gather must be called from ranks within the given group!"
     local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
-    size_list = [
-        torch.zeros([1], dtype=torch.int64, device=tensor.device)
-        for _ in range(world_size)
-    ]
+    size_list = [torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)]
     dist.all_gather(size_list, local_size, group=group)
     size_list = [int(size.item()) for size in size_list]
 
@@ -185,9 +180,7 @@ def _pad_to_largest_tensor(tensor, group):
     # we pad the tensor because torch all_gather does not support
     # gathering tensors of different shapes
     if local_size != max_size:
-        padding = torch.zeros(
-            (max_size - local_size,), dtype=torch.uint8, device=tensor.device
-        )
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
         tensor = torch.cat((tensor, padding), dim=0)
     return size_list, tensor
 
@@ -216,10 +209,7 @@ def all_gather(data, group=None):
     max_size = max(size_list)
 
     # receiving Tensor from all ranks
-    tensor_list = [
-        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
-        for _ in size_list
-    ]
+    tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
     dist.all_gather(tensor_list, tensor, group=group)
 
     data_list = []
@@ -258,10 +248,7 @@ def gather(data, dst=0, group=None):
     # receiving Tensor from all ranks
     if rank == dst:
         max_size = max(size_list)
-        tensor_list = [
-            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)
-            for _ in size_list
-        ]
+        tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
         dist.gather(tensor, tensor_list, dst=dst, group=group)
 
         data_list = []
@@ -282,7 +269,7 @@ def shared_random_seed():
             create one.
     All workers must call this function, otherwise it will deadlock.
     """
-    ints = np.random.randint(2 ** 31)
+    ints = np.random.randint(2**31)
     all_ints = all_gather(ints)
     return all_ints[0]
 
@@ -291,4 +278,4 @@ def time_synchronized():
     """pytorch-accurate time"""
     if torch.cuda.is_available():
         torch.cuda.synchronize()
-    return time.time()
\ No newline at end of file
+    return time.time()
diff --git a/yolort/utils/ema.py b/yolort/utils/ema.py
index 364e8c87..67734266 100644
--- a/yolort/utils/ema.py
+++ b/yolort/utils/ema.py
@@ -51,10 +51,8 @@ def update(self, model):
             self.updates += 1
             d = self.decay(self.updates)
 
-            msd = (
-                model.module.state_dict() if is_parallel(model) else model.state_dict()
-            )  # model state_dict
+            msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
             for k, v in self.ema.state_dict().items():
                 if v.dtype.is_floating_point:
                     v *= d
-                    v += (1.0 - d) * msd[k].detach()
\ No newline at end of file
+                    v += (1.0 - d) * msd[k].detach()
diff --git a/yolort/utils/logger.py b/yolort/utils/logger.py
index 00f1d125..ed78a4cb 100644
--- a/yolort/utils/logger.py
+++ b/yolort/utils/logger.py
@@ -1,15 +1,16 @@
+import datetime
+import inspect
 import os
 import sys
-import cv2
 import time
-import datetime
-import inspect
-import numpy as np
-from loguru import logger
 from collections import defaultdict, deque
 
+import cv2
+import numpy as np
+
 import torch
 import torch.distributed as dist
+from loguru import logger
 
 from yolort.utils import is_module_available
 
@@ -204,6 +205,7 @@ def get_rank():
 def is_main_process():
     return get_rank() == 0
 
+
 def get_caller_name(depth=0):
     """
     Args:
@@ -317,17 +319,20 @@ class WandbLogger(object):
     https://docs.wandb.ai/guides/track
     https://docs.wandb.ai/guides/integrations/other/yolox
     """
-    def __init__(self,
-                 project=None,
-                 name=None,
-                 id=None,
-                 entity=None,
-                 save_dir=None,
-                 config=None,
-                 val_dataset=None,
-                 num_eval_images=100,
-                 log_checkpoints=False,
-                 **kwargs):
+
+    def __init__(
+        self,
+        project=None,
+        name=None,
+        id=None,
+        entity=None,
+        save_dir=None,
+        config=None,
+        val_dataset=None,
+        num_eval_images=100,
+        log_checkpoints=False,
+        **kwargs,
+    ):
         """
         Args:
             project (str): wandb project name.
@@ -357,12 +362,12 @@ def __init__(self,
         """
         try:
             import wandb
+
             self.wandb = wandb
         except ModuleNotFoundError:
             raise ModuleNotFoundError(
-                "wandb is not installed."
-                "Please install wandb using pip install wandb"
-                )
+                "wandb is not installed." "Please install wandb using pip install wandb"
+            )
 
         from yolox.data.datasets import VOCDetection
 
@@ -379,14 +384,14 @@ def __init__(self,
             self.num_log_images = len(val_dataset)
         else:
             self.num_log_images = min(num_eval_images, len(val_dataset))
-        self.log_checkpoints = (log_checkpoints == "True" or log_checkpoints == "true")
+        self.log_checkpoints = log_checkpoints == "True" or log_checkpoints == "true"
         self._wandb_init = dict(
             project=self.project,
             name=self.name,
             id=self.id,
             entity=self.entity,
             dir=self.save_dir,
-            resume="allow"
+            resume="allow",
         )
         self._wandb_init.update(**kwargs)
 
@@ -404,9 +409,7 @@ def __init__(self,
         if val_dataset and self.num_log_images != 0:
             self.val_dataset = val_dataset
             self.cats = val_dataset.cats
-            self.id_to_class = {
-                cls['id']: cls['name'] for cls in self.cats
-            }
+            self.id_to_class = {cls["id"]: cls["name"] for cls in self.cats}
             self._log_validation_set(val_dataset)
 
     @property
@@ -445,10 +448,7 @@ def _log_validation_set(self, val_dataset):
                 if isinstance(id, torch.Tensor):
                     id = id.item()
 
-                self.val_table.add_data(
-                    id,
-                    self.wandb.Image(img)
-                )
+                self.val_table.add_data(id, self.wandb.Image(img))
 
             self.val_artifact.add(self.val_table, "validation_images_table")
             self.run.use_artifact(self.val_artifact)
@@ -478,16 +478,17 @@ def _convert_prediction_format(self, predictions):
                     act_scores.append(score)
                     act_cls.append(classes)
 
-            image_wise_data.update({
-                int(img_id): {
-                    "bboxes": [box.numpy().tolist() for box in act_box],
-                    "scores": [score.numpy().item() for score in act_scores],
-                    "categories": [
-                        self.val_dataset.class_ids[int(act_cls[ind])]
-                        for ind in range(len(act_box))
-                    ],
+            image_wise_data.update(
+                {
+                    int(img_id): {
+                        "bboxes": [box.numpy().tolist() for box in act_box],
+                        "scores": [score.numpy().item() for score in act_scores],
+                        "categories": [
+                            self.val_dataset.class_ids[int(act_cls[ind])] for ind in range(len(act_box))
+                        ],
+                    }
                 }
-            })
+            )
 
         return image_wise_data
 
@@ -546,14 +547,12 @@ def log_images(self, predictions):
                             "minX": min(x0, x1),
                             "minY": min(y0, y1),
                             "maxX": max(x0, x1),
-                            "maxY": max(y0, y1)
+                            "maxY": max(y0, y1),
                         },
                         "class_id": prediction["categories"][i],
-                        "domain": "pixel"
+                        "domain": "pixel",
                     }
-                    avg_scores[
-                        self.id_to_class[prediction["categories"][i]]
-                    ] += prediction["scores"][i]
+                    avg_scores[self.id_to_class[prediction["categories"][i]]] += prediction["scores"][i]
                     num_occurrences[self.id_to_class[prediction["categories"][i]]] += 1
                     boxes.append(box)
             else:
@@ -567,14 +566,10 @@ def log_images(self, predictions):
                 average_class_score.append(score)
             result_table.add_data(
                 idx,
-                self.wandb.Image(val[1], boxes={
-                        "prediction": {
-                            "box_data": boxes,
-                            "class_labels": self.id_to_class
-                        }
-                    }
+                self.wandb.Image(
+                    val[1], boxes={"prediction": {"box_data": boxes, "class_labels": self.id_to_class}}
                 ),
-                *average_class_score
+                *average_class_score,
             )
 
         self.wandb.log({"val_results/result_table": result_table})
@@ -597,11 +592,7 @@ def save_checkpoint(self, save_dir, model_name, is_best, metadata=None):
             epoch = None
 
         filename = os.path.join(save_dir, model_name + "_ckpt.pth")
-        artifact = self.wandb.Artifact(
-            name=f"run_{self.run.id}_model",
-            type="model",
-            metadata=metadata
-        )
+        artifact = self.wandb.Artifact(name=f"run_{self.run.id}_model", type="model", metadata=metadata)
         artifact.add_file(filename, name="model_ckpt.pth")
 
         aliases = ["latest"]
@@ -624,8 +615,8 @@ def initialize_wandb_logger(cls, args, exp, val_dataset):
         for k, v in zip(args.opts[0::2], args.opts[1::2]):
             if k.startswith("wandb-"):
                 try:
-                    wandb_params.update({k[len(prefix):]: int(v)})
+                    wandb_params.update({k[len(prefix) :]: int(v)})
                 except ValueError:
-                    wandb_params.update({k[len(prefix):]: v})
+                    wandb_params.update({k[len(prefix) :]: v})
 
-        return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params)
\ No newline at end of file
+        return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params)
diff --git a/yolort/utils/lr_scheduler.py b/yolort/utils/lr_scheduler.py
index 42c00cf2..777da407 100644
--- a/yolort/utils/lr_scheduler.py
+++ b/yolort/utils/lr_scheduler.py
@@ -84,8 +84,7 @@ def _get_lr_func(self, name):
             )
         elif name == "multistep":  # stepwise lr schedule
             milestones = [
-                int(self.total_iters * milestone / self.total_epochs)
-                for milestone in self.milestones
+                int(self.total_iters * milestone / self.total_epochs) for milestone in self.milestones
             ]
             gamma = getattr(self, "gamma", 0.1)
             lr_func = partial(multistep_lr, self.lr, milestones, gamma)
@@ -103,17 +102,10 @@ def cos_lr(lr, total_iters, iters):
 def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters):
     """Cosine learning rate with warm up."""
     if iters <= warmup_total_iters:
-        lr = (lr - warmup_lr_start) * iters / float(
-            warmup_total_iters
-        ) + warmup_lr_start
+        lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
     else:
         lr *= 0.5 * (
-            1.0
-            + math.cos(
-                math.pi
-                * (iters - warmup_total_iters)
-                / (total_iters - warmup_total_iters)
-            )
+            1.0 + math.cos(math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters))
         )
     return lr
 
@@ -131,18 +123,14 @@ def yolox_warm_cos_lr(
     min_lr = lr * min_lr_ratio
     if iters <= warmup_total_iters:
         # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
-        lr = (lr - warmup_lr_start) * pow(
-            iters / float(warmup_total_iters), 2
-        ) + warmup_lr_start
+        lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
     elif iters >= total_iters - no_aug_iter:
         lr = min_lr
     else:
         lr = min_lr + 0.5 * (lr - min_lr) * (
             1.0
             + math.cos(
-                math.pi
-                * (iters - warmup_total_iters)
-                / (total_iters - warmup_total_iters - no_aug_iter)
+                math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter)
             )
         )
     return lr
@@ -165,18 +153,14 @@ def yolox_semi_warm_cos_lr(
     min_lr = lr * min_lr_ratio
     if iters <= warmup_total_iters:
         # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
-        lr = (lr - warmup_lr_start) * pow(
-            iters / float(warmup_total_iters), 2
-        ) + warmup_lr_start
+        lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
     elif iters >= normal_iters + semi_iters:
         lr = min_lr
     elif iters <= normal_iters:
         lr = min_lr + 0.5 * (lr - min_lr) * (
             1.0
             + math.cos(
-                math.pi
-                * (iters - warmup_total_iters)
-                / (total_iters - warmup_total_iters - no_aug_iters)
+                math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iters)
             )
         )
     else:
@@ -187,10 +171,7 @@ def yolox_semi_warm_cos_lr(
                 * (
                     normal_iters
                     - warmup_total_iters
-                    + (iters - normal_iters)
-                    * iters_per_epoch
-                    * 1.0
-                    / iters_per_epoch_semi
+                    + (iters - normal_iters) * iters_per_epoch * 1.0 / iters_per_epoch_semi
                 )
                 / (total_iters - warmup_total_iters - no_aug_iters)
             )
diff --git a/yolort/utils/metric.py b/yolort/utils/metric.py
index f04013a3..2cb79271 100644
--- a/yolort/utils/metric.py
+++ b/yolort/utils/metric.py
@@ -5,9 +5,9 @@
 import os
 import time
 from collections import defaultdict, deque
-import psutil
 
 import numpy as np
+import psutil
 
 import torch
 
@@ -17,7 +17,7 @@
     "get_total_and_free_memory_in_Mb",
     "occupy_mem",
     "gpu_mem_usage",
-    "mem_usage"
+    "mem_usage",
 ]
 
 
@@ -27,7 +27,7 @@ def get_total_and_free_memory_in_Mb(cuda_device):
     )
     devices_info = devices_info_str.read().strip().split("\n")
     if "CUDA_VISIBLE_DEVICES" in os.environ:
-        visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',')
+        visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
         cuda_device = int(visible_devices[cuda_device])
     total, used = devices_info[int(cuda_device)].split(",")
     return int(total), int(used)
@@ -134,4 +134,4 @@ def update(self, values=None, **kwargs):
 
     def clear_meters(self):
         for v in self.values():
-            v.clear()
\ No newline at end of file
+            v.clear()
diff --git a/yolort/utils/model_utils.py b/yolort/utils/model_utils.py
index 0b848888..228c3851 100644
--- a/yolort/utils/model_utils.py
+++ b/yolort/utils/model_utils.py
@@ -55,4 +55,4 @@ def recover_status(module):
 
     backup_status(module)
     yield module
-    recover_status(module)
\ No newline at end of file
+    recover_status(module)