From 52fd8b0977fe569811a636276e571240b947833c Mon Sep 17 00:00:00 2001 From: JingxianKe <983231802@qq.com> Date: Fri, 22 Sep 2023 12:02:28 +0800 Subject: [PATCH 1/2] YOUR REVISION MESSAGE --- exps/default/__init__.py | 3 + exps/default/yolov5l.py | 20 ++ exps/default/yolov5m.py | 20 ++ exps/default/yolov5m6.py | 20 ++ exps/default/yolov5n.py | 20 ++ exps/default/yolov5n6.py | 20 ++ exps/default/yolov5s.py | 20 ++ exps/default/yolov5s6.py | 20 ++ exps/default/yolov5ts.py | 20 ++ requirements.txt | 4 +- test/test_data_pipeline.py | 52 +-- test/test_trainer.py | 148 +++++--- tools/eval_metric.py | 2 +- yolort/data/__init__.py | 10 +- yolort/data/_helper.py | 74 +--- yolort/data/builtin_meta.py | 154 -------- yolort/data/coco.py | 115 ------ yolort/data/data_augment.py | 243 +++++++++++++ yolort/data/data_module.py | 2 +- yolort/data/data_prefetcher.py | 51 +++ yolort/data/dataloading.py | 113 ++++++ yolort/data/datasets/__init__.py | 8 + yolort/data/datasets/coco.py | 187 ++++++++++ yolort/data/datasets/coco_classes.py | 86 +++++ yolort/data/datasets/datasets_wrapper.py | 300 ++++++++++++++++ yolort/data/datasets/mosaicdetection.py | 234 ++++++++++++ yolort/data/samplers.py | 85 +++++ yolort/evaluators/__init__.py | 5 + yolort/evaluators/coco_evaluator.py | 317 +++++++++++++++++ yolort/exp/__init__.py | 5 + yolort/exp/base_exp.py | 90 +++++ yolort/exp/default/__init__.py | 28 ++ yolort/exp/yolox_base.py | 387 ++++++++++++++++++++ yolort/trainer/__init__.py | 4 +- yolort/trainer/lightning_task.py | 143 -------- yolort/trainer/trainer.py | 392 ++++++++++++++++++++ yolort/utils/__init__.py | 9 + yolort/utils/allreduce_norm.py | 103 ++++++ yolort/utils/boxes.py | 143 ++++++++ yolort/utils/checkpoint.py | 43 +++ yolort/utils/dist.py | 294 +++++++++++++++ yolort/utils/ema.py | 60 ++++ yolort/utils/logger.py | 434 ++++++++++++++++++++++- yolort/utils/lr_scheduler.py | 205 +++++++++++ yolort/utils/metric.py | 137 +++++++ yolort/utils/model_utils.py | 58 +++ 46 files changed, 4328 insertions(+), 560 deletions(-) create mode 100644 exps/default/__init__.py create mode 100644 exps/default/yolov5l.py create mode 100644 exps/default/yolov5m.py create mode 100644 exps/default/yolov5m6.py create mode 100644 exps/default/yolov5n.py create mode 100644 exps/default/yolov5n6.py create mode 100644 exps/default/yolov5s.py create mode 100644 exps/default/yolov5s6.py create mode 100644 exps/default/yolov5ts.py delete mode 100644 yolort/data/builtin_meta.py delete mode 100644 yolort/data/coco.py create mode 100644 yolort/data/data_augment.py create mode 100644 yolort/data/data_prefetcher.py create mode 100644 yolort/data/dataloading.py create mode 100644 yolort/data/datasets/__init__.py create mode 100644 yolort/data/datasets/coco.py create mode 100644 yolort/data/datasets/coco_classes.py create mode 100644 yolort/data/datasets/datasets_wrapper.py create mode 100644 yolort/data/datasets/mosaicdetection.py create mode 100644 yolort/data/samplers.py create mode 100644 yolort/evaluators/__init__.py create mode 100644 yolort/evaluators/coco_evaluator.py create mode 100644 yolort/exp/__init__.py create mode 100644 yolort/exp/base_exp.py create mode 100644 yolort/exp/default/__init__.py create mode 100644 yolort/exp/yolox_base.py delete mode 100644 yolort/trainer/lightning_task.py create mode 100644 yolort/trainer/trainer.py create mode 100644 yolort/utils/allreduce_norm.py create mode 100644 yolort/utils/boxes.py create mode 100644 yolort/utils/checkpoint.py create mode 100644 yolort/utils/dist.py create mode 100644 yolort/utils/ema.py create mode 100644 yolort/utils/lr_scheduler.py create mode 100644 yolort/utils/metric.py create mode 100644 yolort/utils/model_utils.py diff --git a/exps/default/__init__.py b/exps/default/__init__.py new file mode 100644 index 00000000..ce9fae06 --- /dev/null +++ b/exps/default/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. diff --git a/exps/default/yolov5l.py b/exps/default/yolov5l.py new file mode 100644 index 00000000..b04d0f90 --- /dev/null +++ b/exps/default/yolov5l.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5l'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/exps/default/yolov5m.py b/exps/default/yolov5m.py new file mode 100644 index 00000000..e33c2771 --- /dev/null +++ b/exps/default/yolov5m.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5m'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/exps/default/yolov5m6.py b/exps/default/yolov5m6.py new file mode 100644 index 00000000..4ac71156 --- /dev/null +++ b/exps/default/yolov5m6.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5m6'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/exps/default/yolov5n.py b/exps/default/yolov5n.py new file mode 100644 index 00000000..72bf63e8 --- /dev/null +++ b/exps/default/yolov5n.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5n'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/exps/default/yolov5n6.py b/exps/default/yolov5n6.py new file mode 100644 index 00000000..3ac2cfd2 --- /dev/null +++ b/exps/default/yolov5n6.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5n6'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/exps/default/yolov5s.py b/exps/default/yolov5s.py new file mode 100644 index 00000000..61736d25 --- /dev/null +++ b/exps/default/yolov5s.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5s'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/exps/default/yolov5s6.py b/exps/default/yolov5s6.py new file mode 100644 index 00000000..cda2a942 --- /dev/null +++ b/exps/default/yolov5s6.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5s6'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/exps/default/yolov5ts.py b/exps/default/yolov5ts.py new file mode 100644 index 00000000..365eab09 --- /dev/null +++ b/exps/default/yolov5ts.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import yolort.models as models + +from yolort.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self): + self.model = models.__dict__['yolov5ts'](upstream_version="r6.0",) + self.model.train() + return self.model \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8a349747..af814771 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,6 @@ pandas # extras -------------------------------------- # pycocotools on PyPI needs python3.7 as minimal # pycocotools>=2.0.2 # corresponds to https://github.com/ppwwyyxx/cocoapi -thop # FLOPs computation +thop # FLOPs computation +loguru # Python logging made (stupidly) simple +Ninja # a small build system with a focus on speed \ No newline at end of file diff --git a/test/test_data_pipeline.py b/test/test_data_pipeline.py index 2a597eb7..4e626a81 100644 --- a/test/test_data_pipeline.py +++ b/test/test_data_pipeline.py @@ -3,11 +3,23 @@ import numpy as np import pytest -import torch +import sys +sys.path.append("../yolort") +import torch from torch import Tensor -from yolort.data import _helper as data_helper +from yolort.exp import Exp +from yolort.data import DataPrefetcher from yolort.utils import contains_any_tensor +from torch import distributed as dist + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() def test_contains_any_tensor(): @@ -21,28 +33,32 @@ def test_contains_any_tensor(): def test_get_dataset(): # Acquire the images and labels from the coco128 dataset - train_dataset = data_helper.get_dataset(data_root="data-bin", mode="train") + train_dataset = Exp().get_dataset(data_root="data-bin", mode="train", cache_type=None) # Test the datasets - image, target = next(iter(train_dataset)) - assert isinstance(image, Tensor) - assert isinstance(target, dict) + image, target, _, _ = next(iter(train_dataset)) + assert image.shape == (3, 640, 640) + assert target.shape == (50, 5) def test_get_dataloader(): batch_size = 8 - data_loader = data_helper.get_dataloader(data_root="data-bin", mode="train", batch_size=batch_size) - # Test the dataloader - images, targets = next(iter(data_loader)) + is_distributed = get_world_size() > 1 + data_loader = Exp().get_data_loader( + batch_size=batch_size, + is_distributed=is_distributed, + no_aug=False, + cache_img=None, + ) + prefetcher = DataPrefetcher(data_loader) + images, targets = prefetcher.next() assert len(images) == batch_size assert isinstance(images[0], Tensor) assert len(images[0]) == 3 assert len(targets) == batch_size - assert isinstance(targets[0], dict) - assert isinstance(targets[0]["image_id"], Tensor) - assert isinstance(targets[0]["boxes"], Tensor) - assert isinstance(targets[0]["labels"], Tensor) - assert isinstance(targets[0]["orig_size"], Tensor) + assert isinstance(targets[0], Tensor) + +test_get_dataloader() @pytest.mark.skip("Remove Lightning dependency") @@ -65,11 +81,3 @@ def test_detection_data_module(): assert isinstance(targets[0]["image_id"], Tensor) assert isinstance(targets[0]["boxes"], Tensor) assert isinstance(targets[0]["labels"], Tensor) - - -def test_prepare_coco128(): - data_path = Path("data-bin") - coco128_dirname = "coco128" - data_helper.prepare_coco128(data_path, dirname=coco128_dirname) - annotation_file = data_path / coco128_dirname / "annotations" / "instances_train2017.json" - assert annotation_file.is_file() diff --git a/test/test_trainer.py b/test/test_trainer.py index 9be94682..be1573c7 100644 --- a/test/test_trainer.py +++ b/test/test_trainer.py @@ -1,52 +1,108 @@ # Copyright (c) 2021, yolort team. All rights reserved. -from pathlib import Path +import argparse +import importlib -import pytest -from yolort.data import _helper as data_helper +import sys +sys.path.append("../yolort/") +def make_parser(): + parser = argparse.ArgumentParser("YOLOX train parser") + parser.add_argument("-expn", "--experiment-name", type=str, default="yolov5n") + parser.add_argument("-n", "--name", type=str, default="yolov5n", help="model name") + + # distributed + parser.add_argument( + "--dist-backend", default="nccl", type=str, help="distributed backend" + ) + parser.add_argument( + "--dist-url", + default=None, + type=str, + help="url used to set up distributed training", + ) + parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") + parser.add_argument( + "-d", "--devices", default=None, type=int, help="device for training" + ) + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="plz input your experiment description file", + ) + parser.add_argument( + "--resume", default=False, action="store_true", help="resume training" + ) + parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file") + parser.add_argument( + "-e", + "--start_epoch", + default=None, + type=int, + help="resume training start epoch", + ) + parser.add_argument( + "--num_machines", default=1, type=int, help="num of node for training" + ) + parser.add_argument( + "--machine_rank", default=0, type=int, help="node rank for multi-node training" + ) + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision training.", + ) + parser.add_argument( + "--cache", + type=str, + nargs="?", + const="ram", + help="Caching imgs to ram/disk for fast training.", + ) + parser.add_argument( + "-o", + "--occupy", + dest="occupy", + default=False, + action="store_true", + help="occupy GPU memory first for training.", + ) + parser.add_argument( + "-l", + "--logger", + type=str, + help="Logger to be used for metrics. \ + Implemented loggers include `tensorboard` and `wandb`.", + default="tensorboard" + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + return parser -@pytest.mark.skip("Remove Lightning dependency") def test_training_step(): - import pytorch_lightning as pl - from yolort.data.data_module import DetectionDataModule - from yolort.trainer import DefaultTask - - # Setup the DataModule - data_path = "data-bin" - train_dataset = data_helper.get_dataset(data_root=data_path, mode="train") - val_dataset = data_helper.get_dataset(data_root=data_path, mode="val") - data_module = DetectionDataModule(train_dataset, val_dataset, batch_size=8) - # Load model - model = DefaultTask(arch="yolov5n") - model = model.train() - # Trainer - trainer = pl.Trainer(max_epochs=1) - trainer.fit(model, data_module) - - -@pytest.mark.skip("Remove Lightning dependency") -@pytest.mark.parametrize("arch, version, map5095, map50", [("yolov5s", "r4.0", 42.5, 65.3)]) -def test_test_epoch_end(arch, version, map5095, map50): - import pytorch_lightning as pl - from yolort.trainer import DefaultTask - - # Acquire the annotation file - data_path = Path("data-bin") - coco128_dirname = "coco128" - data_helper.prepare_coco128(data_path, dirname=coco128_dirname) - annotation_file = data_path / coco128_dirname / "annotations" / "instances_train2017.json" - - # Get dataloader to test - val_dataloader = data_helper.get_dataloader(data_root=data_path, mode="val") - - # Load model - model = DefaultTask(arch=arch, version=version, pretrained=True, annotation_path=annotation_file) - - # test step - trainer = pl.Trainer(max_epochs=1) - trainer.test(model, dataloaders=val_dataloader) - # test epoch end - results = model.evaluator.compute() - assert results["AP"] > map5095 - assert results["AP50"] > map50 + args = make_parser().parse_args() + module_name = ".".join(["yolort", "exp", "default", args.name]) + exp = importlib.import_module(module_name).Exp() + exp.merge(args.opts) + h, w = exp.input_size + assert h % 32 == 0 and w % 32 == 0, "input size must be multiples of 32" + + from yolort.trainer import Trainer + trainer = Trainer(exp, args) + trainer.train() + +def test_test_epoch_end(): + args = make_parser().parse_args() + module_name = ".".join(["yolort", "exp", "default", args.name]) + exp = importlib.import_module(module_name).Exp() + exp.merge(args.opts) + + main(exp, args) diff --git a/tools/eval_metric.py b/tools/eval_metric.py index 0ab6adae..0538f0df 100644 --- a/tools/eval_metric.py +++ b/tools/eval_metric.py @@ -8,7 +8,7 @@ import torchvision import yolort from yolort.data import _helper as data_helper -from yolort.data.coco import COCODetection +from yolort.data.datasets.coco import COCODetection from yolort.data.coco_eval import COCOEvaluator from yolort.data.transforms import collate_fn, default_val_transforms from yolort.utils.logger import MetricLogger diff --git a/yolort/data/__init__.py b/yolort/data/__init__.py index efd93ced..5740093a 100644 --- a/yolort/data/__init__.py +++ b/yolort/data/__init__.py @@ -1 +1,9 @@ -# Copyright (c) 2021, yolort team. All rights reserved. +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from .data_augment import TrainTransform, ValTransform +from .data_prefetcher import DataPrefetcher +from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed +from .datasets import * +from .samplers import InfiniteSampler, YoloBatchSampler \ No newline at end of file diff --git a/yolort/data/_helper.py b/yolort/data/_helper.py index 2a95af9a..66fbf0cb 100644 --- a/yolort/data/_helper.py +++ b/yolort/data/_helper.py @@ -7,8 +7,7 @@ import torch from tabulate import tabulate -from .coco import COCODetection -from .transforms import collate_fn, default_train_transforms, default_val_transforms +from .transforms import collate_fn def create_small_table(small_dict): @@ -45,74 +44,3 @@ def get_coco_api_from_dataset(dataset): return dataset.coco else: raise NotImplementedError("Currently only supports COCO datasets") - - -def prepare_coco128( - data_path: PosixPath, - dirname: str = "coco128", -) -> None: - """ - Prepare coco128 dataset to test. - - Args: - data_path (PosixPath): root path of coco128 dataset. - dirname (str): the directory name of coco128 dataset. Default: 'coco128'. - """ - logger = logging.getLogger(__name__) - - if not data_path.is_dir(): - logger.info(f"Create a new directory: {data_path}") - data_path.mkdir(parents=True, exist_ok=True) - - zip_path = data_path / "coco128.zip" - coco128_url = "https://github.com/zhiqwang/yolort/releases/download/v0.3.0/coco128.zip" - if not zip_path.is_file(): - logger.info(f"Downloading coco128 datasets form {coco128_url}") - torch.hub.download_url_to_file(coco128_url, zip_path, hash_prefix="a67d2887") - - coco128_path = data_path / dirname - if not coco128_path.is_dir(): - logger.info(f"Unzipping dataset to {coco128_path}") - with ZipFile(zip_path, "r") as zip_obj: - zip_obj.extractall(data_path) - - -def get_dataset(data_root: str, mode: str = "val"): - # Acquire the images and labels from the coco128 dataset - data_path = Path(data_root) - coco128_dirname = "coco128" - coco128_path = data_path / coco128_dirname - image_root = coco128_path / "images" / "train2017" - annotation_file = coco128_path / "annotations" / "instances_train2017.json" - - if not annotation_file.is_file(): - prepare_coco128(data_path, dirname=coco128_dirname) - - if mode == "train": - dataset = COCODetection(image_root, annotation_file, default_train_transforms()) - elif mode == "val": - dataset = COCODetection(image_root, annotation_file, default_val_transforms()) - else: - raise NotImplementedError(f"Currently not supports mode {mode}") - - return dataset - - -def get_dataloader(data_root: str, mode: str = "val", batch_size: int = 4): - # Prepare the datasets for training - # Acquire the images and labels from the coco128 dataset - dataset = get_dataset(data_root=data_root, mode=mode) - - # We adopt the sequential sampler in order to repeat the experiment - sampler = torch.utils.data.SequentialSampler(dataset) - - loader = torch.utils.data.DataLoader( - dataset, - batch_size, - sampler=sampler, - drop_last=False, - collate_fn=collate_fn, - num_workers=0, - ) - - return loader diff --git a/yolort/data/builtin_meta.py b/yolort/data/builtin_meta.py deleted file mode 100644 index be2fc7ab..00000000 --- a/yolort/data/builtin_meta.py +++ /dev/null @@ -1,154 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -Note: -For your custom dataset, there is no need to hard-code metadata anywhere in the code. -For example, for COCO-format dataset, metadata will be obtained automatically -when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways -during loading. - -However, we hard-coded metadata for a few common dataset here. -The only goal is to allow users who don't have these dataset to use pre-trained models. -Users don't have to download a COCO json (which contains metadata), in order to visualize a -COCO model (with correct class names and colors). -""" - - -# All coco categories, together with their nice-looking visualization colors -# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json -COCO_CATEGORIES = [ - {"id": 1, "color": [220, 20, 60], "isthing": 1, "name": "person"}, - {"id": 2, "color": [119, 11, 32], "isthing": 1, "name": "bicycle"}, - {"id": 3, "color": [0, 0, 142], "isthing": 1, "name": "car"}, - {"id": 4, "color": [0, 0, 230], "isthing": 1, "name": "motorcycle"}, - {"id": 5, "color": [106, 0, 228], "isthing": 1, "name": "airplane"}, - {"id": 6, "color": [0, 60, 100], "isthing": 1, "name": "bus"}, - {"id": 7, "color": [0, 80, 100], "isthing": 1, "name": "train"}, - {"id": 8, "color": [0, 0, 70], "isthing": 1, "name": "truck"}, - {"id": 9, "color": [0, 0, 192], "isthing": 1, "name": "boat"}, - {"id": 10, "color": [250, 170, 30], "isthing": 1, "name": "traffic light"}, - {"id": 11, "color": [100, 170, 30], "isthing": 1, "name": "fire hydrant"}, - {"id": 13, "color": [220, 220, 0], "isthing": 1, "name": "stop sign"}, - {"id": 14, "color": [175, 116, 175], "isthing": 1, "name": "parking meter"}, - {"id": 15, "color": [250, 0, 30], "isthing": 1, "name": "bench"}, - {"id": 16, "color": [165, 42, 42], "isthing": 1, "name": "bird"}, - {"id": 17, "color": [255, 77, 255], "isthing": 1, "name": "cat"}, - {"id": 18, "color": [0, 226, 252], "isthing": 1, "name": "dog"}, - {"id": 19, "color": [182, 182, 255], "isthing": 1, "name": "horse"}, - {"id": 20, "color": [0, 82, 0], "isthing": 1, "name": "sheep"}, - {"id": 21, "color": [120, 166, 157], "isthing": 1, "name": "cow"}, - {"id": 22, "color": [110, 76, 0], "isthing": 1, "name": "elephant"}, - {"id": 23, "color": [174, 57, 255], "isthing": 1, "name": "bear"}, - {"id": 24, "color": [199, 100, 0], "isthing": 1, "name": "zebra"}, - {"id": 25, "color": [72, 0, 118], "isthing": 1, "name": "giraffe"}, - {"id": 27, "color": [255, 179, 240], "isthing": 1, "name": "backpack"}, - {"id": 28, "color": [0, 125, 92], "isthing": 1, "name": "umbrella"}, - {"id": 31, "color": [209, 0, 151], "isthing": 1, "name": "handbag"}, - {"id": 32, "color": [188, 208, 182], "isthing": 1, "name": "tie"}, - {"id": 33, "color": [0, 220, 176], "isthing": 1, "name": "suitcase"}, - {"id": 34, "color": [255, 99, 164], "isthing": 1, "name": "frisbee"}, - {"id": 35, "color": [92, 0, 73], "isthing": 1, "name": "skis"}, - {"id": 36, "color": [133, 129, 255], "isthing": 1, "name": "snowboard"}, - {"id": 37, "color": [78, 180, 255], "isthing": 1, "name": "sports ball"}, - {"id": 38, "color": [0, 228, 0], "isthing": 1, "name": "kite"}, - {"id": 39, "color": [174, 255, 243], "isthing": 1, "name": "baseball bat"}, - {"id": 40, "color": [45, 89, 255], "isthing": 1, "name": "baseball glove"}, - {"id": 41, "color": [134, 134, 103], "isthing": 1, "name": "skateboard"}, - {"id": 42, "color": [145, 148, 174], "isthing": 1, "name": "surfboard"}, - {"id": 43, "color": [255, 208, 186], "isthing": 1, "name": "tennis racket"}, - {"id": 44, "color": [197, 226, 255], "isthing": 1, "name": "bottle"}, - {"id": 46, "color": [171, 134, 1], "isthing": 1, "name": "wine glass"}, - {"id": 47, "color": [109, 63, 54], "isthing": 1, "name": "cup"}, - {"id": 48, "color": [207, 138, 255], "isthing": 1, "name": "fork"}, - {"id": 49, "color": [151, 0, 95], "isthing": 1, "name": "knife"}, - {"id": 50, "color": [9, 80, 61], "isthing": 1, "name": "spoon"}, - {"id": 51, "color": [84, 105, 51], "isthing": 1, "name": "bowl"}, - {"id": 52, "color": [74, 65, 105], "isthing": 1, "name": "banana"}, - {"id": 53, "color": [166, 196, 102], "isthing": 1, "name": "apple"}, - {"id": 54, "color": [208, 195, 210], "isthing": 1, "name": "sandwich"}, - {"id": 55, "color": [255, 109, 65], "isthing": 1, "name": "orange"}, - {"id": 56, "color": [0, 143, 149], "isthing": 1, "name": "broccoli"}, - {"id": 57, "color": [179, 0, 194], "isthing": 1, "name": "carrot"}, - {"id": 58, "color": [209, 99, 106], "isthing": 1, "name": "hot dog"}, - {"id": 59, "color": [5, 121, 0], "isthing": 1, "name": "pizza"}, - {"id": 60, "color": [227, 255, 205], "isthing": 1, "name": "donut"}, - {"id": 61, "color": [147, 186, 208], "isthing": 1, "name": "cake"}, - {"id": 62, "color": [153, 69, 1], "isthing": 1, "name": "chair"}, - {"id": 63, "color": [3, 95, 161], "isthing": 1, "name": "couch"}, - {"id": 64, "color": [163, 255, 0], "isthing": 1, "name": "potted plant"}, - {"id": 65, "color": [119, 0, 170], "isthing": 1, "name": "bed"}, - {"id": 67, "color": [0, 182, 199], "isthing": 1, "name": "dining table"}, - {"id": 70, "color": [0, 165, 120], "isthing": 1, "name": "toilet"}, - {"id": 72, "color": [183, 130, 88], "isthing": 1, "name": "tv"}, - {"id": 73, "color": [95, 32, 0], "isthing": 1, "name": "laptop"}, - {"id": 74, "color": [130, 114, 135], "isthing": 1, "name": "mouse"}, - {"id": 75, "color": [110, 129, 133], "isthing": 1, "name": "remote"}, - {"id": 76, "color": [166, 74, 118], "isthing": 1, "name": "keyboard"}, - {"id": 77, "color": [219, 142, 185], "isthing": 1, "name": "cell phone"}, - {"id": 78, "color": [79, 210, 114], "isthing": 1, "name": "microwave"}, - {"id": 79, "color": [178, 90, 62], "isthing": 1, "name": "oven"}, - {"id": 80, "color": [65, 70, 15], "isthing": 1, "name": "toaster"}, - {"id": 81, "color": [127, 167, 115], "isthing": 1, "name": "sink"}, - {"id": 82, "color": [59, 105, 106], "isthing": 1, "name": "refrigerator"}, - {"id": 84, "color": [142, 108, 45], "isthing": 1, "name": "book"}, - {"id": 85, "color": [196, 172, 0], "isthing": 1, "name": "clock"}, - {"id": 86, "color": [95, 54, 80], "isthing": 1, "name": "vase"}, - {"id": 87, "color": [128, 76, 255], "isthing": 1, "name": "scissors"}, - {"id": 88, "color": [201, 57, 1], "isthing": 1, "name": "teddy bear"}, - {"id": 89, "color": [246, 0, 122], "isthing": 1, "name": "hair drier"}, - {"id": 90, "color": [191, 162, 208], "isthing": 1, "name": "toothbrush"}, - {"id": 92, "color": [255, 255, 128], "isthing": 0, "name": "banner"}, - {"id": 93, "color": [147, 211, 203], "isthing": 0, "name": "blanket"}, - {"id": 95, "color": [150, 100, 100], "isthing": 0, "name": "bridge"}, - {"id": 100, "color": [168, 171, 172], "isthing": 0, "name": "cardboard"}, - {"id": 107, "color": [146, 112, 198], "isthing": 0, "name": "counter"}, - {"id": 109, "color": [210, 170, 100], "isthing": 0, "name": "curtain"}, - {"id": 112, "color": [92, 136, 89], "isthing": 0, "name": "door-stuff"}, - {"id": 118, "color": [218, 88, 184], "isthing": 0, "name": "floor-wood"}, - {"id": 119, "color": [241, 129, 0], "isthing": 0, "name": "flower"}, - {"id": 122, "color": [217, 17, 255], "isthing": 0, "name": "fruit"}, - {"id": 125, "color": [124, 74, 181], "isthing": 0, "name": "gravel"}, - {"id": 128, "color": [70, 70, 70], "isthing": 0, "name": "house"}, - {"id": 130, "color": [255, 228, 255], "isthing": 0, "name": "light"}, - {"id": 133, "color": [154, 208, 0], "isthing": 0, "name": "mirror-stuff"}, - {"id": 138, "color": [193, 0, 92], "isthing": 0, "name": "net"}, - {"id": 141, "color": [76, 91, 113], "isthing": 0, "name": "pillow"}, - {"id": 144, "color": [255, 180, 195], "isthing": 0, "name": "platform"}, - {"id": 145, "color": [106, 154, 176], "isthing": 0, "name": "playingfield"}, - {"id": 147, "color": [230, 150, 140], "isthing": 0, "name": "railroad"}, - {"id": 148, "color": [60, 143, 255], "isthing": 0, "name": "river"}, - {"id": 149, "color": [128, 64, 128], "isthing": 0, "name": "road"}, - {"id": 151, "color": [92, 82, 55], "isthing": 0, "name": "roof"}, - {"id": 154, "color": [254, 212, 124], "isthing": 0, "name": "sand"}, - {"id": 155, "color": [73, 77, 174], "isthing": 0, "name": "sea"}, - {"id": 156, "color": [255, 160, 98], "isthing": 0, "name": "shelf"}, - {"id": 159, "color": [255, 255, 255], "isthing": 0, "name": "snow"}, - {"id": 161, "color": [104, 84, 109], "isthing": 0, "name": "stairs"}, - {"id": 166, "color": [169, 164, 131], "isthing": 0, "name": "tent"}, - {"id": 168, "color": [225, 199, 255], "isthing": 0, "name": "towel"}, - {"id": 171, "color": [137, 54, 74], "isthing": 0, "name": "wall-brick"}, - {"id": 175, "color": [135, 158, 223], "isthing": 0, "name": "wall-stone"}, - {"id": 176, "color": [7, 246, 231], "isthing": 0, "name": "wall-tile"}, - {"id": 177, "color": [107, 255, 200], "isthing": 0, "name": "wall-wood"}, - {"id": 178, "color": [58, 41, 149], "isthing": 0, "name": "water-other"}, - {"id": 180, "color": [183, 121, 142], "isthing": 0, "name": "window-blind"}, - {"id": 181, "color": [255, 73, 97], "isthing": 0, "name": "window-other"}, - {"id": 184, "color": [107, 142, 35], "isthing": 0, "name": "tree-merged"}, - {"id": 185, "color": [190, 153, 153], "isthing": 0, "name": "fence-merged"}, - {"id": 186, "color": [146, 139, 141], "isthing": 0, "name": "ceiling-merged"}, - {"id": 187, "color": [70, 130, 180], "isthing": 0, "name": "sky-other-merged"}, - {"id": 188, "color": [134, 199, 156], "isthing": 0, "name": "cabinet-merged"}, - {"id": 189, "color": [209, 226, 140], "isthing": 0, "name": "table-merged"}, - {"id": 190, "color": [96, 36, 108], "isthing": 0, "name": "floor-other-merged"}, - {"id": 191, "color": [96, 96, 96], "isthing": 0, "name": "pavement-merged"}, - {"id": 192, "color": [64, 170, 64], "isthing": 0, "name": "mountain-merged"}, - {"id": 193, "color": [152, 251, 152], "isthing": 0, "name": "grass-merged"}, - {"id": 194, "color": [208, 229, 228], "isthing": 0, "name": "dirt-merged"}, - {"id": 195, "color": [206, 186, 171], "isthing": 0, "name": "paper-merged"}, - {"id": 196, "color": [152, 161, 64], "isthing": 0, "name": "food-other-merged"}, - {"id": 197, "color": [116, 112, 0], "isthing": 0, "name": "building-other-merged"}, - {"id": 198, "color": [0, 114, 143], "isthing": 0, "name": "rock-merged"}, - {"id": 199, "color": [102, 102, 156], "isthing": 0, "name": "wall-other-merged"}, - {"id": 200, "color": [250, 141, 255], "isthing": 0, "name": "rug-merged"}, -] diff --git a/yolort/data/coco.py b/yolort/data/coco.py deleted file mode 100644 index 3e693ad4..00000000 --- a/yolort/data/coco.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -COCO dataset which returns image_id for evaluation. -Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py -""" -import torch -import torchvision -from yolort.utils import is_module_available, requires_module - -if is_module_available("pycocotools"): - from pycocotools import mask as coco_mask - - -class COCODetection(torchvision.datasets.CocoDetection): - def __init__(self, img_folder, ann_file, transforms, return_masks=False): - super().__init__(img_folder, ann_file) - self._transforms = transforms - - json_category_id_to_contiguous_id = {v: i for i, v in enumerate(self.coco.getCatIds())} - self.prepare = ConvertCocoPolysToMask(json_category_id_to_contiguous_id, return_masks) - - def __getitem__(self, idx): - img, target = super().__getitem__(idx) - image_id = self.ids[idx] - target = {"image_id": image_id, "annotations": target} - img, target = self.prepare(img, target) - if self._transforms is not None: - img, target = self._transforms(img, target) - return img, target - - -class ConvertCocoPolysToMask: - def __init__(self, json_category_id_maps, return_masks=False): - self.json_category_id_to_contiguous_id = json_category_id_maps - self.return_masks = return_masks - - def __call__(self, image, target): - w, h = image.size - - image_id = target["image_id"] - image_id = torch.tensor([image_id]) - - anno = target["annotations"] - - anno = [obj for obj in anno if "iscrowd" not in obj or obj["iscrowd"] == 0] - - boxes = [obj["bbox"] for obj in anno] - # guard against no boxes via resizing - boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) - # BoxMode: convert from XYWH_ABS to XYXY_ABS - boxes[:, 2:] += boxes[:, :2] - boxes[:, 0::2].clamp_(min=0, max=w) - boxes[:, 1::2].clamp_(min=0, max=h) - - classes = [obj["category_id"] for obj in anno] - classes = [self.json_category_id_to_contiguous_id[c] for c in classes] - classes = torch.tensor(classes, dtype=torch.int64) - - if self.return_masks: - segmentations = [obj["segmentation"] for obj in anno] - masks = convert_coco_poly_to_mask(segmentations, h, w) - - keypoints = None - if anno and "keypoints" in anno[0]: - keypoints = [obj["keypoints"] for obj in anno] - keypoints = torch.as_tensor(keypoints, dtype=torch.float32) - num_keypoints = keypoints.shape[0] - if num_keypoints: - keypoints = keypoints.view(num_keypoints, -1, 3) - - keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) - boxes = boxes[keep] - classes = classes[keep] - if self.return_masks: - masks = masks[keep] - if keypoints is not None: - keypoints = keypoints[keep] - - target = {} - target["boxes"] = boxes - target["labels"] = classes - if self.return_masks: - target["masks"] = masks - target["image_id"] = image_id - if keypoints is not None: - target["keypoints"] = keypoints - - # for conversion to coco api - area = torch.tensor([obj["area"] for obj in anno]) - iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno]) - target["area"] = area[keep] - target["iscrowd"] = iscrowd[keep] - - target["orig_size"] = torch.as_tensor([int(h), int(w)]) - target["size"] = torch.as_tensor([int(h), int(w)]) - - return image, target - - -@requires_module("pycocotools") -def convert_coco_poly_to_mask(segmentations, height, width): - masks = [] - for polygons in segmentations: - rles = coco_mask.frPyObjects(polygons, height, width) - mask = coco_mask.decode(rles) - if len(mask.shape) < 3: - mask = mask[..., None] - mask = torch.as_tensor(mask, dtype=torch.uint8) - mask = mask.any(dim=2) - masks.append(mask) - if masks: - masks = torch.stack(masks, dim=0) - else: - masks = torch.zeros((0, height, width), dtype=torch.uint8) - return masks diff --git a/yolort/data/data_augment.py b/yolort/data/data_augment.py new file mode 100644 index 00000000..4e53f6c2 --- /dev/null +++ b/yolort/data/data_augment.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. +""" +Data augmentation functionality. Passed as callable transformations to +Dataset classes. + +The data augmentation procedures were interpreted from @weiliu89's SSD paper +http://arxiv.org/abs/1512.02325 +""" + +import math +import random + +import cv2 +import numpy as np + +from yolort.utils import xyxy2cxcywh + + +def augment_hsv(img, hgain=5, sgain=30, vgain=30): + hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains + hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v + hsv_augs = hsv_augs.astype(np.int16) + img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16) + + img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180 + img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255) + img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255) + + cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) # no return needed + + +def get_aug_params(value, center=0): + if isinstance(value, float): + return random.uniform(center - value, center + value) + elif len(value) == 2: + return random.uniform(value[0], value[1]) + else: + raise ValueError( + "Affine params should be either a sequence containing two values\ + or single float values. Got {}".format(value) + ) + + +def get_affine_matrix( + target_size, + degrees=10, + translate=0.1, + scales=0.1, + shear=10, +): + twidth, theight = target_size + + # Rotation and Scale + angle = get_aug_params(degrees) + scale = get_aug_params(scales, center=1.0) + + if scale <= 0.0: + raise ValueError("Argument scale should be positive") + + R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale) + + M = np.ones([2, 3]) + # Shear + shear_x = math.tan(get_aug_params(shear) * math.pi / 180) + shear_y = math.tan(get_aug_params(shear) * math.pi / 180) + + M[0] = R[0] + shear_y * R[1] + M[1] = R[1] + shear_x * R[0] + + # Translation + translation_x = get_aug_params(translate) * twidth # x translation (pixels) + translation_y = get_aug_params(translate) * theight # y translation (pixels) + + M[0, 2] = translation_x + M[1, 2] = translation_y + + return M, scale + + +def apply_affine_to_bboxes(targets, target_size, M, scale): + num_gts = len(targets) + + # warp corner points + twidth, theight = target_size + corner_points = np.ones((4 * num_gts, 3)) + corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( + 4 * num_gts, 2 + ) # x1y1, x2y2, x1y2, x2y1 + corner_points = np.dot(corner_points, M.T) # apply affine transform + corner_points = corner_points.reshape(num_gts, 8) + + # create new boxes + corner_xs = corner_points[:, 0::2] + corner_ys = corner_points[:, 1::2] + new_bboxes = ( + np.concatenate( + (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1)) + ) + .reshape(4, num_gts) + .T + ) + + # clip boxes + new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth) + new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight) + + targets[:, :4] = new_bboxes + + return targets + + +def random_affine( + img, + targets=(), + target_size=(640, 640), + degrees=10, + translate=0.1, + scales=0.1, + shear=10, +): + M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear) + + img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114)) + + # Transform label coordinates + if len(targets) > 0: + targets = apply_affine_to_bboxes(targets, target_size, M, scale) + + return img, targets + + +def _mirror(image, boxes, prob=0.5): + _, width, _ = image.shape + if random.random() < prob: + image = image[:, ::-1] + boxes[:, 0::2] = width - boxes[:, 2::-2] + return image, boxes + + +def preproc(img, input_size, swap=(2, 0, 1)): + if len(img.shape) == 3: + padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 + else: + padded_img = np.ones(input_size, dtype=np.uint8) * 114 + + r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + return padded_img, r + + +class TrainTransform: + def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0): + self.max_labels = max_labels + self.flip_prob = flip_prob + self.hsv_prob = hsv_prob + + def __call__(self, image, targets, input_dim): + boxes = targets[:, :4].copy() + labels = targets[:, 4].copy() + if len(boxes) == 0: + targets = np.zeros((self.max_labels, 5), dtype=np.float32) + image, r_o = preproc(image, input_dim) + return image, targets + + image_o = image.copy() + targets_o = targets.copy() + height_o, width_o, _ = image_o.shape + boxes_o = targets_o[:, :4] + labels_o = targets_o[:, 4] + # bbox_o: [xyxy] to [c_x,c_y,w,h] + boxes_o = xyxy2cxcywh(boxes_o) + + if random.random() < self.hsv_prob: + augment_hsv(image) + image_t, boxes = _mirror(image, boxes, self.flip_prob) + height, width, _ = image_t.shape + image_t, r_ = preproc(image_t, input_dim) + # boxes [xyxy] 2 [cx,cy,w,h] + boxes = xyxy2cxcywh(boxes) + boxes *= r_ + + mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 + boxes_t = boxes[mask_b] + labels_t = labels[mask_b] + + if len(boxes_t) == 0: + image_t, r_o = preproc(image_o, input_dim) + boxes_o *= r_o + boxes_t = boxes_o + labels_t = labels_o + + labels_t = np.expand_dims(labels_t, 1) + + targets_t = np.hstack((labels_t, boxes_t)) + padded_labels = np.zeros((self.max_labels, 5)) + padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ + : self.max_labels + ] + padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) + return image_t, padded_labels + + +class ValTransform: + """ + Defines the transformations that should be applied to test PIL image + for input into the network + + dimension -> tensorize -> color adj + + Arguments: + resize (int): input dimension to SSD + rgb_means ((int,int,int)): average RGB of the dataset + (104,117,123) + swap ((int,int,int)): final order of channels + + Returns: + transform (transform) : callable transform to be applied to test/val + data + """ + + def __init__(self, swap=(2, 0, 1), legacy=False): + self.swap = swap + self.legacy = legacy + + # assume input is cv2 img for now + def __call__(self, img, res, input_size): + img, _ = preproc(img, input_size, self.swap) + if self.legacy: + img = img[::-1, :, :].copy() + img /= 255.0 + img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) + img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) + return img, np.zeros((1, 5)) diff --git a/yolort/data/data_module.py b/yolort/data/data_module.py index 55510da5..d17d7327 100644 --- a/yolort/data/data_module.py +++ b/yolort/data/data_module.py @@ -10,7 +10,7 @@ if is_module_available("pytorch_lightning"): from pytorch_lightning import LightningDataModule -from .coco import COCODetection +from yolort.data.datasets.coco import COCODetection from .transforms import collate_fn, default_train_transforms, default_val_transforms from .voc import VOCDetection diff --git a/yolort/data/data_prefetcher.py b/yolort/data/data_prefetcher.py new file mode 100644 index 00000000..a118cf4e --- /dev/null +++ b/yolort/data/data_prefetcher.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import torch + + +class DataPrefetcher: + """ + DataPrefetcher is inspired by code of following file: + https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py + It could speedup your pytorch dataloader. For more information, please check + https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789. + """ + + def __init__(self, loader): + self.loader = iter(loader) + self.stream = torch.cuda.Stream() + self.input_cuda = self._input_cuda_for_image + self.record_stream = DataPrefetcher._record_stream_for_image + self.preload() + + def preload(self): + try: + self.next_input, self.next_target, _, _ = next(self.loader) + except StopIteration: + self.next_input = None + self.next_target = None + return + + with torch.cuda.stream(self.stream): + self.input_cuda() + self.next_target = self.next_target.cuda(non_blocking=True) + + def next(self): + torch.cuda.current_stream().wait_stream(self.stream) + input = self.next_input + target = self.next_target + if input is not None: + self.record_stream(input) + if target is not None: + target.record_stream(torch.cuda.current_stream()) + self.preload() + return input, target + + def _input_cuda_for_image(self): + self.next_input = self.next_input.cuda(non_blocking=True) + + @staticmethod + def _record_stream_for_image(input): + input.record_stream(torch.cuda.current_stream()) diff --git a/yolort/data/dataloading.py b/yolort/data/dataloading.py new file mode 100644 index 00000000..6fecf3f0 --- /dev/null +++ b/yolort/data/dataloading.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os +import random +import uuid + +import numpy as np + +import torch +from torch.utils.data.dataloader import DataLoader as torchDataLoader +from torch.utils.data.dataloader import default_collate + +from .samplers import YoloBatchSampler + + +def get_yolox_datadir(): + """ + get dataset dir of YOLOX. If environment variable named `YOLOX_DATADIR` is set, + this function will return value of the environment variable. Otherwise, use data + """ + yolox_datadir = os.getenv("YOLOX_DATADIR", None) + if yolox_datadir is None: + import yolox + + yolox_path = os.path.dirname(os.path.dirname(yolox.__file__)) + yolox_datadir = os.path.join(yolox_path, "datasets") + return yolox_datadir + + +class DataLoader(torchDataLoader): + """ + Lightnet dataloader that enables on the fly resizing of the images. + See :class:`torch.utils.data.DataLoader` for more information on the arguments. + Check more on the following website: + https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__initialized = False + shuffle = False + batch_sampler = None + if len(args) > 5: + shuffle = args[2] + sampler = args[3] + batch_sampler = args[4] + elif len(args) > 4: + shuffle = args[2] + sampler = args[3] + if "batch_sampler" in kwargs: + batch_sampler = kwargs["batch_sampler"] + elif len(args) > 3: + shuffle = args[2] + if "sampler" in kwargs: + sampler = kwargs["sampler"] + if "batch_sampler" in kwargs: + batch_sampler = kwargs["batch_sampler"] + else: + if "shuffle" in kwargs: + shuffle = kwargs["shuffle"] + if "sampler" in kwargs: + sampler = kwargs["sampler"] + if "batch_sampler" in kwargs: + batch_sampler = kwargs["batch_sampler"] + + # Use custom BatchSampler + if batch_sampler is None: + if sampler is None: + if shuffle: + sampler = torch.utils.data.sampler.RandomSampler(self.dataset) + # sampler = torch.utils.data.DistributedSampler(self.dataset) + else: + sampler = torch.utils.data.sampler.SequentialSampler(self.dataset) + batch_sampler = YoloBatchSampler( + sampler, + self.batch_size, + self.drop_last, + input_dimension=self.dataset.input_dim, + ) + # batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iterations = + + self.batch_sampler = batch_sampler + + self.__initialized = True + + def close_mosaic(self): + self.batch_sampler.mosaic = False + + +def list_collate(batch): + """ + Function that collates lists or tuples together into one list (of lists/tuples). + Use this as the collate function in a Dataloader, if you want to have a list of + items as an output, as opposed to tensors (eg. Brambox.boxes). + """ + items = list(zip(*batch)) + + for i in range(len(items)): + if isinstance(items[i][0], (list, tuple)): + items[i] = list(items[i]) + else: + items[i] = default_collate(items[i]) + + return items + + +def worker_init_reset_seed(worker_id): + seed = uuid.uuid4().int % 2**32 + random.seed(seed) + torch.set_rng_state(torch.manual_seed(seed).get_state()) + np.random.seed(seed) diff --git a/yolort/data/datasets/__init__.py b/yolort/data/datasets/__init__.py new file mode 100644 index 00000000..8a02c7f0 --- /dev/null +++ b/yolort/data/datasets/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from .coco import COCODataset +from .coco_classes import COCO_CLASSES +from .datasets_wrapper import CacheDataset, ConcatDataset, Dataset, MixConcatDataset +from .mosaicdetection import MosaicDetection diff --git a/yolort/data/datasets/coco.py b/yolort/data/datasets/coco.py new file mode 100644 index 00000000..5ac225a0 --- /dev/null +++ b/yolort/data/datasets/coco.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. +import copy +import os + +import cv2 +import numpy as np +from pycocotools.coco import COCO + +from .datasets_wrapper import CacheDataset, cache_read_img + + +def remove_useless_info(coco): + """ + Remove useless info in coco dataset. COCO object is modified inplace. + This function is mainly used for saving memory (save about 30% mem). + """ + if isinstance(coco, COCO): + dataset = coco.dataset + dataset.pop("info", None) + dataset.pop("licenses", None) + for img in dataset["images"]: + img.pop("license", None) + img.pop("coco_url", None) + img.pop("date_captured", None) + img.pop("flickr_url", None) + if "annotations" in coco.dataset: + for anno in coco.dataset["annotations"]: + anno.pop("segmentation", None) + + +class COCODataset(CacheDataset): + """ + COCO dataset class. + """ + + def __init__( + self, + data_dir=None, + json_file="instances_train2017.json", + name="train2017", + img_size=(416, 416), + preproc=None, + cache=False, + cache_type="ram", + ): + """ + COCO dataset initialization. Annotation data are read into memory by COCO API. + Args: + data_dir (str): dataset root directory + json_file (str): COCO json file name + name (str): COCO data name (e.g. 'train2017' or 'val2017') + img_size (int): target image size after pre-processing + preproc: data augmentation strategy + """ + if data_dir is None: + data_dir = os.path.join("data-bin", "coco128") + self.data_dir = data_dir + self.json_file = json_file + + self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file)) + remove_useless_info(self.coco) + self.ids = self.coco.getImgIds() + self.num_imgs = len(self.ids) + self.class_ids = sorted(self.coco.getCatIds()) + self.cats = self.coco.loadCats(self.coco.getCatIds()) + self._classes = tuple([c["name"] for c in self.cats]) + self.name = name + self.img_size = img_size + self.preproc = preproc + self.annotations = self._load_coco_annotations() + + path_filename = [os.path.join(name, anno[3]) for anno in self.annotations] + super().__init__( + input_dimension=img_size, + num_imgs=self.num_imgs, + data_dir=data_dir, + cache_dir_name=f"cache_{name}", + path_filename=path_filename, + cache=cache, + cache_type=cache_type + ) + + def __len__(self): + return self.num_imgs + + def _load_coco_annotations(self): + return [self.load_anno_from_ids(_ids) for _ids in self.ids] + + def load_anno_from_ids(self, id_): + im_ann = self.coco.loadImgs(id_)[0] + width = im_ann["width"] + height = im_ann["height"] + anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False) + annotations = self.coco.loadAnns(anno_ids) + objs = [] + for obj in annotations: + x1 = np.max((0, obj["bbox"][0])) + y1 = np.max((0, obj["bbox"][1])) + x2 = np.min((width, x1 + np.max((0, obj["bbox"][2])))) + y2 = np.min((height, y1 + np.max((0, obj["bbox"][3])))) + if obj["area"] > 0 and x2 >= x1 and y2 >= y1: + obj["clean_bbox"] = [x1, y1, x2, y2] + objs.append(obj) + + num_objs = len(objs) + + res = np.zeros((num_objs, 5)) + for ix, obj in enumerate(objs): + cls = self.class_ids.index(obj["category_id"]) + res[ix, 0:4] = obj["clean_bbox"] + res[ix, 4] = cls + + r = min(self.img_size[0] / height, self.img_size[1] / width) + res[:, :4] *= r + + img_info = (height, width) + resized_info = (int(height * r), int(width * r)) + + file_name = ( + im_ann["file_name"] + if "file_name" in im_ann + else "{:012}".format(id_) + ".jpg" + ) + + return (res, img_info, resized_info, file_name) + + def load_anno(self, index): + return self.annotations[index][0] + + def load_resized_img(self, index): + img = self.load_image(index) + r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + return resized_img + + def load_image(self, index): + file_name = self.annotations[index][3] + + img_file = os.path.join(self.data_dir, "images", self.name, file_name) + + img = cv2.imread(img_file) + assert img is not None, f"file named {img_file} not found" + + return img + + @cache_read_img(use_cache=True) + def read_img(self, index): + return self.load_resized_img(index) + + def pull_item(self, index): + id_ = self.ids[index] + label, origin_image_size, _, _ = self.annotations[index] + img = self.read_img(index) + + return img, copy.deepcopy(label), origin_image_size, np.array([id_]) + + @CacheDataset.mosaic_getitem + def __getitem__(self, index): + """ + One image / label pair for the given index is picked up and pre-processed. + + Args: + index (int): data index + + Returns: + img (numpy.ndarray): pre-processed image + padded_labels (torch.Tensor): pre-processed label data. + The shape is :math:`[max_labels, 5]`. + each label consists of [class, xc, yc, w, h]: + class (float): class index. + xc, yc (float) : center of bbox whose values range from 0 to 1. + w, h (float) : size of bbox whose values range from 0 to 1. + info_img : tuple of h, w. + h, w (int): original shape of the image + img_id (int): same as the input index. Used for evaluation. + """ + img, target, img_info, img_id = self.pull_item(index) + + if self.preproc is not None: + img, target = self.preproc(img, target, self.input_dim) + return img, target, img_info, img_id diff --git a/yolort/data/datasets/coco_classes.py b/yolort/data/datasets/coco_classes.py new file mode 100644 index 00000000..17f5cbe6 --- /dev/null +++ b/yolort/data/datasets/coco_classes.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +COCO_CLASSES = ( + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +) diff --git a/yolort/data/datasets/datasets_wrapper.py b/yolort/data/datasets/datasets_wrapper.py new file mode 100644 index 00000000..c45fe380 --- /dev/null +++ b/yolort/data/datasets/datasets_wrapper.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import bisect +import copy +import os +import random +from abc import ABCMeta, abstractmethod +from functools import partial, wraps +from multiprocessing.pool import ThreadPool +import psutil +from loguru import logger +from tqdm import tqdm + +import numpy as np + +from torch.utils.data.dataset import ConcatDataset as torchConcatDataset +from torch.utils.data.dataset import Dataset as torchDataset + + +class ConcatDataset(torchConcatDataset): + def __init__(self, datasets): + super(ConcatDataset, self).__init__(datasets) + if hasattr(self.datasets[0], "input_dim"): + self._input_dim = self.datasets[0].input_dim + self.input_dim = self.datasets[0].input_dim + + def pull_item(self, idx): + if idx < 0: + if -idx > len(self): + raise ValueError( + "absolute value of index should not exceed dataset length" + ) + idx = len(self) + idx + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + return self.datasets[dataset_idx].pull_item(sample_idx) + + +class MixConcatDataset(torchConcatDataset): + def __init__(self, datasets): + super(MixConcatDataset, self).__init__(datasets) + if hasattr(self.datasets[0], "input_dim"): + self._input_dim = self.datasets[0].input_dim + self.input_dim = self.datasets[0].input_dim + + def __getitem__(self, index): + + if not isinstance(index, int): + idx = index[1] + if idx < 0: + if -idx > len(self): + raise ValueError( + "absolute value of index should not exceed dataset length" + ) + idx = len(self) + idx + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + if not isinstance(index, int): + index = (index[0], sample_idx, index[2]) + + return self.datasets[dataset_idx][index] + + +class Dataset(torchDataset): + """ This class is a subclass of the base :class:`torch.utils.data.Dataset`, + that enables on the fly resizing of the ``input_dim``. + + Args: + input_dimension (tuple): (width,height) tuple with default dimensions of the network + """ + + def __init__(self, input_dimension, mosaic=True): + super().__init__() + self.__input_dim = input_dimension[:2] + self.enable_mosaic = mosaic + + @property + def input_dim(self): + """ + Dimension that can be used by transforms to set the correct image size, etc. + This allows transforms to have a single source of truth + for the input dimension of the network. + + Return: + list: Tuple containing the current width,height + """ + if hasattr(self, "_input_dim"): + return self._input_dim + return self.__input_dim + + @staticmethod + def mosaic_getitem(getitem_fn): + """ + Decorator method that needs to be used around the ``__getitem__`` method. |br| + This decorator enables the closing mosaic + + Example: + >>> class CustomSet(ln.data.Dataset): + ... def __len__(self): + ... return 10 + ... @ln.data.Dataset.mosaic_getitem + ... def __getitem__(self, index): + ... return self.enable_mosaic + """ + + @wraps(getitem_fn) + def wrapper(self, index): + if not isinstance(index, int): + self.enable_mosaic = index[0] + index = index[1] + + ret_val = getitem_fn(self, index) + + return ret_val + + return wrapper + + +class CacheDataset(Dataset, metaclass=ABCMeta): + """ This class is a subclass of the base :class:`yolox.data.datasets.Dataset`, + that enables cache images to ram or disk. + + Args: + input_dimension (tuple): (width,height) tuple with default dimensions of the network + num_imgs (int): datset size + data_dir (str): the root directory of the dataset, e.g. `/path/to/COCO`. + cache_dir_name (str): the name of the directory to cache to disk, + e.g. `"custom_cache"`. The files cached to disk will be saved + under `/path/to/COCO/custom_cache`. + path_filename (str): a list of paths to the data relative to the `data_dir`, + e.g. if you have data `/path/to/COCO/train/1.jpg`, `/path/to/COCO/train/2.jpg`, + then `path_filename = ['train/1.jpg', ' train/2.jpg']`. + cache (bool): whether to cache the images to ram or disk. + cache_type (str): the type of cache, + "ram" : Caching imgs to ram for fast training. + "disk": Caching imgs to disk for fast training. + """ + + def __init__( + self, + input_dimension, + num_imgs=None, + data_dir=None, + cache_dir_name=None, + path_filename=None, + cache=False, + cache_type="ram", + ): + super().__init__(input_dimension) + self.cache = cache + self.cache_type = cache_type + + if self.cache and self.cache_type == "disk": + self.cache_dir = os.path.join(data_dir, cache_dir_name) + self.path_filename = path_filename + + if self.cache and self.cache_type == "ram": + self.imgs = None + + if self.cache: + self.cache_images( + num_imgs=num_imgs, + data_dir=data_dir, + cache_dir_name=cache_dir_name, + path_filename=path_filename, + ) + + def __del__(self): + if self.cache and self.cache_type == "ram": + del self.imgs + + @abstractmethod + def read_img(self, index): + """ + Given index, return the corresponding image + + Args: + index (int): image index + """ + raise NotImplementedError + + def cache_images( + self, + num_imgs=None, + data_dir=None, + cache_dir_name=None, + path_filename=None, + ): + assert num_imgs is not None, "num_imgs must be specified as the size of the dataset" + if self.cache_type == "disk": + assert (data_dir and cache_dir_name and path_filename) is not None, \ + "data_dir, cache_name and path_filename must be specified if cache_type is disk" + self.path_filename = path_filename + + mem = psutil.virtual_memory() + mem_required = self.cal_cache_occupy(num_imgs) + gb = 1 << 30 + + if self.cache_type == "ram": + if mem_required > mem.available: + self.cache = False + else: + logger.info( + f"{mem_required / gb:.1f}GB RAM required, " + f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB RAM available, " + f"Since the first thing we do is cache, " + f"there is no guarantee that the remaining memory space is sufficient" + ) + + if self.cache and self.imgs is None: + if self.cache_type == 'ram': + self.imgs = [None] * num_imgs + logger.info("You are using cached images in RAM to accelerate training!") + else: # 'disk' + if not os.path.exists(self.cache_dir): + os.mkdir(self.cache_dir) + logger.warning( + f"\n*******************************************************************\n" + f"You are using cached images in DISK to accelerate training.\n" + f"This requires large DISK space.\n" + f"Make sure you have {mem_required / gb:.1f} " + f"available DISK space for training your dataset.\n" + f"*******************************************************************\\n" + ) + else: + logger.info(f"Found disk cache at {self.cache_dir}") + return + + logger.info( + "Caching images...\n" + "This might take some time for your dataset" + ) + + num_threads = min(8, max(1, os.cpu_count() - 1)) + b = 0 + load_imgs = ThreadPool(num_threads).imap( + partial(self.read_img, use_cache=False), + range(num_imgs) + ) + pbar = tqdm(enumerate(load_imgs), total=num_imgs) + for i, x in pbar: # x = self.read_img(self, i, use_cache=False) + if self.cache_type == 'ram': + self.imgs[i] = x + else: # 'disk' + cache_filename = f'{self.path_filename[i].split(".")[0]}.npy' + cache_path_filename = os.path.join(self.cache_dir, cache_filename) + os.makedirs(os.path.dirname(cache_path_filename), exist_ok=True) + np.save(cache_path_filename, x) + b += x.nbytes + pbar.desc = \ + f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})' + pbar.close() + + def cal_cache_occupy(self, num_imgs): + cache_bytes = 0 + num_samples = min(num_imgs, 32) + for _ in range(num_samples): + img = self.read_img(index=random.randint(0, num_imgs - 1), use_cache=False) + cache_bytes += img.nbytes + mem_required = cache_bytes * num_imgs / num_samples + return mem_required + + +def cache_read_img(use_cache=True): + def decorator(read_img_fn): + """ + Decorate the read_img function to cache the image + + Args: + read_img_fn: read_img function + use_cache (bool, optional): For the decorated read_img function, + whether to read the image from cache. + Defaults to True. + """ + @wraps(read_img_fn) + def wrapper(self, index, use_cache=use_cache): + cache = self.cache and use_cache + if cache: + if self.cache_type == "ram": + img = self.imgs[index] + img = copy.deepcopy(img) + elif self.cache_type == "disk": + img = np.load( + os.path.join( + self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy")) + else: + raise ValueError(f"Unknown cache type: {self.cache_type}") + else: + img = read_img_fn(self, index) + return img + return wrapper + return decorator diff --git a/yolort/data/datasets/mosaicdetection.py b/yolort/data/datasets/mosaicdetection.py new file mode 100644 index 00000000..ba11cfdc --- /dev/null +++ b/yolort/data/datasets/mosaicdetection.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import random + +import cv2 +import numpy as np + +from yolort.utils import adjust_box_anns, get_local_rank + +from ..data_augment import random_affine +from .datasets_wrapper import Dataset + + +def get_mosaic_coordinate(mosaic_image, mosaic_index, xc, yc, w, h, input_h, input_w): + # TODO update doc + # index0 to top left part of image + if mosaic_index == 0: + x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc + small_coord = w - (x2 - x1), h - (y2 - y1), w, h + # index1 to top right part of image + elif mosaic_index == 1: + x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc + small_coord = 0, h - (y2 - y1), min(w, x2 - x1), h + # index2 to bottom left part of image + elif mosaic_index == 2: + x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h) + small_coord = w - (x2 - x1), 0, w, min(y2 - y1, h) + # index2 to bottom right part of image + elif mosaic_index == 3: + x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2, yc + h) # noqa + small_coord = 0, 0, min(w, x2 - x1), min(y2 - y1, h) + return (x1, y1, x2, y2), small_coord + + +class MosaicDetection(Dataset): + """Detection dataset wrapper that performs mixup for normal dataset.""" + + def __init__( + self, dataset, img_size, mosaic=True, preproc=None, + degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5), + mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True, + mosaic_prob=1.0, mixup_prob=1.0, *args + ): + """ + + Args: + dataset(Dataset) : Pytorch dataset object. + img_size (tuple): + mosaic (bool): enable mosaic augmentation or not. + preproc (func): + degrees (float): + translate (float): + mosaic_scale (tuple): + mixup_scale (tuple): + shear (float): + enable_mixup (bool): + *args(tuple) : Additional arguments for mixup random sampler. + """ + super().__init__(img_size, mosaic=mosaic) + self._dataset = dataset + self.preproc = preproc + self.degrees = degrees + self.translate = translate + self.scale = mosaic_scale + self.shear = shear + self.mixup_scale = mixup_scale + self.enable_mosaic = mosaic + self.enable_mixup = enable_mixup + self.mosaic_prob = mosaic_prob + self.mixup_prob = mixup_prob + self.local_rank = get_local_rank() + + def __len__(self): + return len(self._dataset) + + @Dataset.mosaic_getitem + def __getitem__(self, idx): + if self.enable_mosaic and random.random() < self.mosaic_prob: + mosaic_labels = [] + input_dim = self._dataset.input_dim + input_h, input_w = input_dim[0], input_dim[1] + + # yc, xc = s, s # mosaic center x, y + yc = int(random.uniform(0.5 * input_h, 1.5 * input_h)) + xc = int(random.uniform(0.5 * input_w, 1.5 * input_w)) + + # 3 additional image indices + indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)] + + for i_mosaic, index in enumerate(indices): + img, _labels, _, img_id = self._dataset.pull_item(index) + h0, w0 = img.shape[:2] # orig hw + scale = min(1. * input_h / h0, 1. * input_w / w0) + img = cv2.resize( + img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR + ) + # generate output mosaic image + (h, w, c) = img.shape[:3] + if i_mosaic == 0: + mosaic_img = np.full((input_h * 2, input_w * 2, c), 114, dtype=np.uint8) + + # suffix l means large image, while s means small image in mosaic aug. + (l_x1, l_y1, l_x2, l_y2), (s_x1, s_y1, s_x2, s_y2) = get_mosaic_coordinate( + mosaic_img, i_mosaic, xc, yc, w, h, input_h, input_w + ) + + mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2] + padw, padh = l_x1 - s_x1, l_y1 - s_y1 + + labels = _labels.copy() + # Normalized xywh to pixel xyxy format + if _labels.size > 0: + labels[:, 0] = scale * _labels[:, 0] + padw + labels[:, 1] = scale * _labels[:, 1] + padh + labels[:, 2] = scale * _labels[:, 2] + padw + labels[:, 3] = scale * _labels[:, 3] + padh + mosaic_labels.append(labels) + + if len(mosaic_labels): + mosaic_labels = np.concatenate(mosaic_labels, 0) + np.clip(mosaic_labels[:, 0], 0, 2 * input_w, out=mosaic_labels[:, 0]) + np.clip(mosaic_labels[:, 1], 0, 2 * input_h, out=mosaic_labels[:, 1]) + np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2]) + np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3]) + + mosaic_img, mosaic_labels = random_affine( + mosaic_img, + mosaic_labels, + target_size=(input_w, input_h), + degrees=self.degrees, + translate=self.translate, + scales=self.scale, + shear=self.shear, + ) + + # ----------------------------------------------------------------- + # CopyPaste: https://arxiv.org/abs/2012.07177 + # ----------------------------------------------------------------- + if ( + self.enable_mixup + and not len(mosaic_labels) == 0 + and random.random() < self.mixup_prob + ): + mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim) + mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim) + img_info = (mix_img.shape[1], mix_img.shape[0]) + + # ----------------------------------------------------------------- + # img_info and img_id are not used for training. + # They are also hard to be specified on a mosaic image. + # ----------------------------------------------------------------- + return mix_img, padded_labels, img_info, img_id + + else: + self._dataset._input_dim = self.input_dim + img, label, img_info, img_id = self._dataset.pull_item(idx) + img, label = self.preproc(img, label, self.input_dim) + return img, label, img_info, img_id + + def mixup(self, origin_img, origin_labels, input_dim): + jit_factor = random.uniform(*self.mixup_scale) + FLIP = random.uniform(0, 1) > 0.5 + cp_labels = [] + while len(cp_labels) == 0: + cp_index = random.randint(0, self.__len__() - 1) + cp_labels = self._dataset.load_anno(cp_index) + img, cp_labels, _, _ = self._dataset.pull_item(cp_index) + + if len(img.shape) == 3: + cp_img = np.ones((input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114 + else: + cp_img = np.ones(input_dim, dtype=np.uint8) * 114 + + cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)), + interpolation=cv2.INTER_LINEAR, + ) + + cp_img[ + : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio) + ] = resized_img + + cp_img = cv2.resize( + cp_img, + (int(cp_img.shape[1] * jit_factor), int(cp_img.shape[0] * jit_factor)), + ) + cp_scale_ratio *= jit_factor + + if FLIP: + cp_img = cp_img[:, ::-1, :] + + origin_h, origin_w = cp_img.shape[:2] + target_h, target_w = origin_img.shape[:2] + padded_img = np.zeros( + (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8 + ) + padded_img[:origin_h, :origin_w] = cp_img + + x_offset, y_offset = 0, 0 + if padded_img.shape[0] > target_h: + y_offset = random.randint(0, padded_img.shape[0] - target_h - 1) + if padded_img.shape[1] > target_w: + x_offset = random.randint(0, padded_img.shape[1] - target_w - 1) + padded_cropped_img = padded_img[ + y_offset: y_offset + target_h, x_offset: x_offset + target_w + ] + + cp_bboxes_origin_np = adjust_box_anns( + cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h + ) + if FLIP: + cp_bboxes_origin_np[:, 0::2] = ( + origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1] + ) + cp_bboxes_transformed_np = cp_bboxes_origin_np.copy() + cp_bboxes_transformed_np[:, 0::2] = np.clip( + cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w + ) + cp_bboxes_transformed_np[:, 1::2] = np.clip( + cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h + ) + + cls_labels = cp_labels[:, 4:5].copy() + box_labels = cp_bboxes_transformed_np + labels = np.hstack((box_labels, cls_labels)) + origin_labels = np.vstack((origin_labels, labels)) + origin_img = origin_img.astype(np.float32) + origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32) + + return origin_img.astype(np.uint8), origin_labels diff --git a/yolort/data/samplers.py b/yolort/data/samplers.py new file mode 100644 index 00000000..6b7ea38d --- /dev/null +++ b/yolort/data/samplers.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import itertools +from typing import Optional + +import torch +import torch.distributed as dist +from torch.utils.data.sampler import BatchSampler as torchBatchSampler +from torch.utils.data.sampler import Sampler + + +class YoloBatchSampler(torchBatchSampler): + """ + This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler. + It works just like the :class:`torch.utils.data.sampler.BatchSampler`, + but it will turn on/off the mosaic aug. + """ + + def __init__(self, *args, mosaic=True, **kwargs): + super().__init__(*args, **kwargs) + self.mosaic = mosaic + + def __iter__(self): + for batch in super().__iter__(): + yield [(self.mosaic, idx) for idx in batch] + + +class InfiniteSampler(Sampler): + """ + In training, we only care about the "infinite stream" of training data. + So this sampler produces an infinite stream of indices and + all workers cooperate to correctly shuffle the indices and sample different indices. + The samplers in each worker effectively produces `indices[worker_id::num_workers]` + where `indices` is an infinite stream of indices consisting of + `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) + or `range(size) + range(size) + ...` (if shuffle is False) + """ + + def __init__( + self, + size: int, + shuffle: bool = True, + seed: Optional[int] = 0, + rank=0, + world_size=1, + ): + """ + Args: + size (int): the total number of data of the underlying dataset to sample from + shuffle (bool): whether to shuffle the indices or not + seed (int): the initial seed of the shuffle. Must be the same + across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + """ + self._size = size + assert size > 0 + self._shuffle = shuffle + self._seed = int(seed) + + if dist.is_available() and dist.is_initialized(): + self._rank = dist.get_rank() + self._world_size = dist.get_world_size() + else: + self._rank = rank + self._world_size = world_size + + def __iter__(self): + start = self._rank + yield from itertools.islice( + self._infinite_indices(), start, None, self._world_size + ) + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) + while True: + if self._shuffle: + yield from torch.randperm(self._size, generator=g) + else: + yield from torch.arange(self._size) + + def __len__(self): + return self._size // self._world_size diff --git a/yolort/evaluators/__init__.py b/yolort/evaluators/__init__.py new file mode 100644 index 00000000..fc0b6875 --- /dev/null +++ b/yolort/evaluators/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from .coco_evaluator import COCOEvaluator \ No newline at end of file diff --git a/yolort/evaluators/coco_evaluator.py b/yolort/evaluators/coco_evaluator.py new file mode 100644 index 00000000..a97c6d41 --- /dev/null +++ b/yolort/evaluators/coco_evaluator.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import contextlib +import io +import itertools +import json +import tempfile +import time +from collections import ChainMap, defaultdict +from loguru import logger +from tabulate import tabulate +from tqdm import tqdm + +import numpy as np + +import torch + +from yolort.data.datasets import COCO_CLASSES +from yolort.utils import ( + gather, + is_main_process, + postprocess, + synchronize, + time_synchronized, + xyxy2xywh +) + + +def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AR"], colums=6): + per_class_AR = {} + recalls = coco_eval.eval["recall"] + # dimension of recalls: [TxKxAxM] + # recall has dims (iou, cls, area range, max dets) + assert len(class_names) == recalls.shape[1] + + for idx, name in enumerate(class_names): + recall = recalls[:, idx, 0, -1] + recall = recall[recall > -1] + ar = np.mean(recall) if recall.size else float("nan") + per_class_AR[name] = float(ar * 100) + + num_cols = min(colums, len(per_class_AR) * len(headers)) + result_pair = [x for pair in per_class_AR.items() for x in pair] + row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)]) + table_headers = headers * (num_cols // len(headers)) + table = tabulate( + row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left", + ) + return table + + +def per_class_AP_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AP"], colums=6): + per_class_AP = {} + precisions = coco_eval.eval["precision"] + # dimension of precisions: [TxRxKxAxM] + # precision has dims (iou, recall, cls, area range, max dets) + assert len(class_names) == precisions.shape[2] + + for idx, name in enumerate(class_names): + # area range index 0: all area ranges + # max dets index -1: typically 100 per image + precision = precisions[:, :, idx, 0, -1] + precision = precision[precision > -1] + ap = np.mean(precision) if precision.size else float("nan") + per_class_AP[name] = float(ap * 100) + + num_cols = min(colums, len(per_class_AP) * len(headers)) + result_pair = [x for pair in per_class_AP.items() for x in pair] + row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)]) + table_headers = headers * (num_cols // len(headers)) + table = tabulate( + row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left", + ) + return table + + +class COCOEvaluator: + """ + COCO AP Evaluation class. All the data in the val2017 dataset are processed + and evaluated by COCO API. + """ + + def __init__( + self, + dataloader, + img_size: int, + confthre: float, + nmsthre: float, + num_classes: int, + testdev: bool = False, + per_class_AP: bool = True, + per_class_AR: bool = True, + ): + """ + Args: + dataloader (Dataloader): evaluate dataloader. + img_size: image size after preprocess. images are resized + to squares whose shape is (img_size, img_size). + confthre: confidence threshold ranging from 0 to 1, which + is defined in the config file. + nmsthre: IoU threshold of non-max supression ranging from 0 to 1. + per_class_AP: Show per class AP during evalution or not. Default to True. + per_class_AR: Show per class AR during evalution or not. Default to True. + """ + self.dataloader = dataloader + self.img_size = img_size + self.confthre = confthre + self.nmsthre = nmsthre + self.num_classes = num_classes + self.testdev = testdev + self.per_class_AP = per_class_AP + self.per_class_AR = per_class_AR + + def evaluate( + self, model, distributed=False, half=False, trt_file=None, + decoder=None, test_size=None, return_outputs=False + ): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + + NOTE: This function will change training mode to False, please save states if needed. + + Args: + model : model to evaluate. + + Returns: + ap50_95 (float) : COCO AP of IoU=50:95 + ap50 (float) : COCO AP of IoU=50 + summary (sr): summary info of evaluation. + """ + # TODO half to amp_test + tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor + model = model.eval() + if half: + model = model.half() + ids = [] + data_list = [] + output_data = defaultdict() + progress_bar = tqdm if is_main_process() else iter + + inference_time = 0 + nms_time = 0 + n_samples = max(len(self.dataloader) - 1, 1) + + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() + model(x) + model = model_trt + + for cur_iter, (imgs, _, info_imgs, ids) in enumerate( + progress_bar(self.dataloader) + ): + with torch.no_grad(): + imgs = imgs.type(tensor_type) + + # skip the last iters since batchsize might be not enough for batch inference + is_time_record = cur_iter < len(self.dataloader) - 1 + if is_time_record: + start = time.time() + + outputs = model(imgs) + if decoder is not None: + outputs = decoder(outputs, dtype=outputs.type()) + + if is_time_record: + infer_end = time_synchronized() + inference_time += infer_end - start + + outputs = postprocess( + outputs, self.num_classes, self.confthre, self.nmsthre + ) + if is_time_record: + nms_end = time_synchronized() + nms_time += nms_end - infer_end + + data_list_elem, image_wise_data = self.convert_to_coco_format( + outputs, info_imgs, ids, return_outputs=True) + data_list.extend(data_list_elem) + output_data.update(image_wise_data) + + statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples]) + if distributed: + # different process/device might have different speed, + # to make sure the process will not be stucked, sync func is used here. + synchronize() + data_list = gather(data_list, dst=0) + output_data = gather(output_data, dst=0) + data_list = list(itertools.chain(*data_list)) + output_data = dict(ChainMap(*output_data)) + torch.distributed.reduce(statistics, dst=0) + + eval_results = self.evaluate_prediction(data_list, statistics) + synchronize() + + if return_outputs: + return eval_results, output_data + return eval_results + + def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False): + data_list = [] + image_wise_data = defaultdict(dict) + for (output, img_h, img_w, img_id) in zip( + outputs, info_imgs[0], info_imgs[1], ids + ): + if output is None: + continue + output = output.cpu() + + bboxes = output[:, 0:4] + + # preprocessing: resize + scale = min( + self.img_size[0] / float(img_h), self.img_size[1] / float(img_w) + ) + bboxes /= scale + cls = output[:, 6] + scores = output[:, 4] * output[:, 5] + + image_wise_data.update({ + int(img_id): { + "bboxes": [box.numpy().tolist() for box in bboxes], + "scores": [score.numpy().item() for score in scores], + "categories": [ + self.dataloader.dataset.class_ids[int(cls[ind])] + for ind in range(bboxes.shape[0]) + ], + } + }) + + bboxes = xyxy2xywh(bboxes) + + for ind in range(bboxes.shape[0]): + label = self.dataloader.dataset.class_ids[int(cls[ind])] + pred_data = { + "image_id": int(img_id), + "category_id": label, + "bbox": bboxes[ind].numpy().tolist(), + "score": scores[ind].numpy().item(), + "segmentation": [], + } # COCO json format + data_list.append(pred_data) + + if return_outputs: + return data_list, image_wise_data + return data_list + + def evaluate_prediction(self, data_dict, statistics): + if not is_main_process(): + return 0, 0, None + + logger.info("Evaluate in main process...") + + annType = ["segm", "bbox", "keypoints"] + + inference_time = statistics[0].item() + nms_time = statistics[1].item() + n_samples = statistics[2].item() + + a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size) + a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size) + + time_info = ", ".join( + [ + "Average {} time: {:.2f} ms".format(k, v) + for k, v in zip( + ["forward", "NMS", "inference"], + [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)], + ) + ] + ) + + info = time_info + "\n" + + # Evaluate the Dt (detection) json comparing with the ground truth + if len(data_dict) > 0: + cocoGt = self.dataloader.dataset.coco + # TODO: since pycocotools can't process dict in py36, write data to json file. + if self.testdev: + json.dump(data_dict, open("./yolox_testdev_2017.json", "w")) + cocoDt = cocoGt.loadRes("./yolox_testdev_2017.json") + else: + _, tmp = tempfile.mkstemp() + json.dump(data_dict, open(tmp, "w")) + cocoDt = cocoGt.loadRes(tmp) + try: + from yolox.layers import COCOeval_opt as COCOeval + except ImportError: + from pycocotools.cocoeval import COCOeval + + logger.warning("Use standard COCOeval.") + + cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) + cocoEval.evaluate() + cocoEval.accumulate() + redirect_string = io.StringIO() + with contextlib.redirect_stdout(redirect_string): + cocoEval.summarize() + info += redirect_string.getvalue() + cat_ids = list(cocoGt.cats.keys()) + cat_names = [cocoGt.cats[catId]['name'] for catId in sorted(cat_ids)] + if self.per_class_AP: + AP_table = per_class_AP_table(cocoEval, class_names=cat_names) + info += "per class AP:\n" + AP_table + "\n" + if self.per_class_AR: + AR_table = per_class_AR_table(cocoEval, class_names=cat_names) + info += "per class AR:\n" + AR_table + "\n" + return cocoEval.stats[0], cocoEval.stats[1], info + else: + return 0, 0, info \ No newline at end of file diff --git a/yolort/exp/__init__.py b/yolort/exp/__init__.py new file mode 100644 index 00000000..d7de27c8 --- /dev/null +++ b/yolort/exp/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# Copyright (c) Megvii Inc. All rights reserved. + +from .base_exp import BaseExp +from .yolox_base import Exp \ No newline at end of file diff --git a/yolort/exp/base_exp.py b/yolort/exp/base_exp.py new file mode 100644 index 00000000..c0ae45fe --- /dev/null +++ b/yolort/exp/base_exp.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# Copyright (c) Megvii Inc. All rights reserved. + +import ast +import pprint +from abc import ABCMeta, abstractmethod +from typing import Dict, List, Tuple +from tabulate import tabulate + +import torch +from torch.nn import Module + +from yolort.utils import LRScheduler + + +class BaseExp(metaclass=ABCMeta): + """Basic class for any experiment.""" + + def __init__(self): + self.seed = None + self.output_dir = "./" + self.print_interval = 100 + self.eval_interval = 10 + self.dataset = None + + @abstractmethod + def get_model(self) -> Module: + pass + + @abstractmethod + def get_dataset(self, cache: bool = False, cache_type: str = "ram"): + pass + + @abstractmethod + def get_data_loader( + self, batch_size: int, is_distributed: bool + ) -> Dict[str, torch.utils.data.DataLoader]: + pass + + @abstractmethod + def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer: + pass + + @abstractmethod + def get_lr_scheduler( + self, lr: float, iters_per_epoch: int, **kwargs + ) -> LRScheduler: + pass + + @abstractmethod + def get_evaluator(self): + pass + + @abstractmethod + def eval(self, model, evaluator, weights): + pass + + def __repr__(self): + table_header = ["keys", "values"] + exp_table = [ + (str(k), pprint.pformat(v)) + for k, v in vars(self).items() + if not k.startswith("_") + ] + return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") + + def merge(self, cfg_list): + assert len(cfg_list) % 2 == 0, f"length must be even, check value here: {cfg_list}" + for k, v in zip(cfg_list[0::2], cfg_list[1::2]): + # only update value with same key + if hasattr(self, k): + src_value = getattr(self, k) + src_type = type(src_value) + + # pre-process input if source type is list or tuple + if isinstance(src_value, (List, Tuple)): + v = v.strip("[]()") + v = [t.strip() for t in v.split(",")] + + # find type of tuple + if len(src_value) > 0: + src_item_type = type(src_value[0]) + v = [src_item_type(t) for t in v] + + if src_value is not None and src_type != type(v): + try: + v = src_type(v) + except Exception: + v = ast.literal_eval(v) + setattr(self, k, v) diff --git a/yolort/exp/default/__init__.py b/yolort/exp/default/__init__.py new file mode 100644 index 00000000..1f361d78 --- /dev/null +++ b/yolort/exp/default/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +# This file is used for package installation and find default exp file + +import sys +from importlib import abc, util +from pathlib import Path + +_EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / "exps" / "default" + +if _EXP_PATH.is_dir(): + # This is true only for in-place installation (pip install -e, setup.py develop), + # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 + + class _ExpFinder(abc.MetaPathFinder): + + def find_spec(self, name, path, target=None): + if not name.startswith("yolort.exp.default"): + return + project_name = name.split(".")[-1] + ".py" + target_file = _EXP_PATH / project_name + if not target_file.is_file(): + return + return util.spec_from_file_location(name, target_file) + + sys.meta_path.append(_ExpFinder()) diff --git a/yolort/exp/yolox_base.py b/yolort/exp/yolox_base.py new file mode 100644 index 00000000..f3147743 --- /dev/null +++ b/yolort/exp/yolox_base.py @@ -0,0 +1,387 @@ +#!/usr/bin/env python3 +# Copyright (c) Megvii Inc. All rights reserved. + +import os +import random +import logging +from zipfile import ZipFile +from pathlib import Path, PosixPath + +import torch +import torch.distributed as dist +import torch.nn as nn + +from .base_exp import BaseExp + +__all__ = ["Exp"] + + +class Exp(BaseExp): + def __init__(self): + super().__init__() + + # ---------------- model config ---------------- # + # detect classes number of model + self.num_classes = 80 + # factor of model depth + self.depth = 1.00 + # factor of model width + self.width = 1.00 + # activation name. For example, if using "relu", then "silu" will be replaced to "relu". + self.act = "silu" + + # ---------------- dataloader config ---------------- # + # set worker to 4 for shorter dataloader init time + # If your training process cost many memory, reduce this value. + self.data_num_workers = 4 + self.input_size = (640, 640) # (height, width) + # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32]. + # To disable multiscale training, set the value to 0. + self.multiscale_range = 5 + # You can uncomment this line to specify a multiscale range + # self.random_size = (14, 26) + # dir of dataset images, if data_dir is None, this project will use `datasets` dir + self.data_dir = None + # name of annotation file for training + self.train_ann = "instances_train2017.json" + # name of annotation file for evaluation + self.val_ann = "instances_val2017.json" + # name of annotation file for testing + self.test_ann = "instances_test2017.json" + + # --------------- transform config ----------------- # + # prob of applying mosaic aug + self.mosaic_prob = 1.0 + # prob of applying mixup aug + self.mixup_prob = 1.0 + # prob of applying hsv aug + self.hsv_prob = 1.0 + # prob of applying flip aug + self.flip_prob = 0.5 + # rotation angle range, for example, if set to 2, the true range is (-2, 2) + self.degrees = 10.0 + # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1) + self.translate = 0.1 + self.mosaic_scale = (0.1, 2) + # apply mixup aug or not + self.enable_mixup = True + self.mixup_scale = (0.5, 1.5) + # shear angle range, for example, if set to 2, the true range is (-2, 2) + self.shear = 2.0 + + # -------------- training config --------------------- # + # epoch number used for warmup + self.warmup_epochs = 5 + # max training epoch + self.max_epoch = 300 + # minimum learning rate during warmup + self.warmup_lr = 0 + self.min_lr_ratio = 0.05 + # learning rate for one image. During training, lr will multiply batchsize. + self.basic_lr_per_img = 0.01 / 64.0 + # name of LRScheduler + self.scheduler = "yoloxwarmcos" + # last #epoch to close augmention like mosaic + self.no_aug_epochs = 15 + # apply EMA during training + self.ema = True + + # weight decay of optimizer + self.weight_decay = 5e-4 + # momentum of optimizer + self.momentum = 0.9 + # log period in iter, for example, + # if set to 1, user could see log every iteration. + self.print_interval = 10 + # eval period in epoch, for example, + # if set to 1, model will be evaluate after every epoch. + self.eval_interval = 10 + # save history checkpoint or not. + # If set to False, yolox will only save latest and best ckpt. + self.save_history_ckpt = True + # name of experiment + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + # ----------------- testing config ------------------ # + # output image size during evaluation/test + self.test_size = (640, 640) + # confidence threshold during evaluation/test, + # boxes whose scores are less than test_conf will be filtered + self.test_conf = 0.01 + # nms threshold + self.nmsthre = 0.65 + + def get_model(self): + import yolort.models as models + + self.model = models.__dict__['yolov5n'](upstream_version="r6.0", ) + self.model.train() + return self.model + + def get_dataset(self, data_root: str, mode: str = "val", cache: bool = False, cache_type: str = "ram"): + # Acquire the images and labels from the coco128 dataset + data_path = Path(data_root) + coco128_dirname = "coco128" + coco128_path = data_path / coco128_dirname + image_root = coco128_path / "images" / "train2017" + annotation_file = coco128_path / "annotations" / "instances_train2017.json" + + from yolort.data import COCODataset, TrainTransform + + if not annotation_file.is_file(): + self.prepare_coco128(data_path, dirname=coco128_dirname) + + if mode == "train": + dataset = COCODataset( + data_dir=self.data_dir, + json_file=self.train_ann, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob + ), + cache=cache, + cache_type=cache_type, + ) + elif mode == "val": + """ TODO """ + dataset = COCODataset( + data_dir=self.data_dir, + json_file=self.train_ann, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob + ), + cache=cache, + cache_type=cache_type, + ) + else: + raise NotImplementedError(f"Currently not supports mode {mode}") + + return dataset + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: str = None): + """ + Get dataloader according to cache_img parameter. + Args: + no_aug (bool, optional): Whether to turn off mosaic data enhancement. Defaults to False. + cache_img (str, optional): cache_img is equivalent to cache_type. Defaults to None. + "ram" : Caching imgs to ram for fast training. + "disk": Caching imgs to disk for fast training. + None: Do not use cache, in this case cache_data is also None. + """ + from yolort.data import ( + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolort.utils import wait_for_the_master + + # if cache is True, we will create dataset before launch + # else we will create dataset after launch + if self.dataset is None: + with wait_for_the_master(): + assert cache_img is None, \ + "cache_img must be None if you didn't create dataset before launch" + self.dataset = self.get_dataset(data_root="data-bin", mode="train", cache=False, cache_type=cache_img) + + self.dataset = MosaicDetection( + dataset=self.dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method. + # Check https://github.com/pytorch/pytorch/issues/63311 for more details. + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def prepare_coco128(self, + data_path: PosixPath, + dirname: str = "coco128", + ) -> None: + """ + Prepare coco128 dataset to test. + + Args: + data_path (PosixPath): root path of coco128 dataset. + dirname (str): the directory name of coco128 dataset. Default: 'coco128'. + """ + logger = logging.getLogger(__name__) + + if not data_path.is_dir(): + logger.info(f"Create a new directory: {data_path}") + data_path.mkdir(parents=True, exist_ok=True) + + zip_path = data_path / "coco128.zip" + coco128_url = "https://github.com/zhiqwang/yolort/releases/download/v0.3.0/coco128.zip" + if not zip_path.is_file(): + logger.info(f"Downloading coco128 datasets form {coco128_url}") + torch.hub.download_url_to_file(coco128_url, zip_path, hash_prefix="a67d2887") + + coco128_path = data_path / dirname + if not coco128_path.is_dir(): + logger.info(f"Unzipping dataset to {coco128_path}") + with ZipFile(zip_path, "r") as zip_obj: + zip_obj.extractall(data_path) + + def random_resize(self, data_loader, epoch, rank, is_distributed): + tensor = torch.LongTensor(2).cuda() + + if rank == 0: + size_factor = self.input_size[1] * 1.0 / self.input_size[0] + if not hasattr(self, 'random_size'): + min_size = int(self.input_size[0] / 32) - self.multiscale_range + max_size = int(self.input_size[0] / 32) + self.multiscale_range + self.random_size = (min_size, max_size) + size = random.randint(*self.random_size) + size = (int(32 * size), 32 * int(size * size_factor)) + tensor[0] = size[0] + tensor[1] = size[1] + + if is_distributed: + dist.barrier() + dist.broadcast(tensor, 0) + + input_size = (tensor[0].item(), tensor[1].item()) + return input_size + + def preprocess(self, inputs, targets, tsize): + scale_y = tsize[0] / self.input_size[0] + scale_x = tsize[1] / self.input_size[1] + if scale_x != 1 or scale_y != 1: + inputs = nn.functional.interpolate( + inputs, size=tsize, mode="bilinear", align_corners=False + ) + targets[..., 1::2] = targets[..., 1::2] * scale_x + targets[..., 2::2] = targets[..., 2::2] * scale_y + return inputs, targets + + def get_optimizer(self, batch_size): + if "optimizer" not in self.__dict__: + if self.warmup_epochs > 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.SGD( + pg0, lr=lr, momentum=self.momentum, nesterov=True + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + + return self.optimizer + + def get_lr_scheduler(self, lr, iters_per_epoch): + from yolort.utils import LRScheduler + + scheduler = LRScheduler( + self.scheduler, + lr, + iters_per_epoch, + self.max_epoch, + warmup_epochs=self.warmup_epochs, + warmup_lr_start=self.warmup_lr, + no_aug_epochs=self.no_aug_epochs, + min_lr_ratio=self.min_lr_ratio, + ) + return scheduler + + def get_eval_dataset(self, **kwargs): + from yolort.data import COCODataset, ValTransform + testdev = kwargs.get("testdev", False) + legacy = kwargs.get("legacy", False) + + return COCODataset( + data_dir=self.data_dir, + json_file=self.train_ann, # 这里需要改为 + name="train2017" if not testdev else "train2017", # 测试数据 + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + def get_eval_loader(self, batch_size, is_distributed, **kwargs): + valdataset = self.get_eval_dataset(**kwargs) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolort.evaluators import COCOEvaluator + + return COCOEvaluator( + dataloader=self.get_eval_loader(batch_size, is_distributed, + testdev=testdev, legacy=legacy), + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + + def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False): + return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs) \ No newline at end of file diff --git a/yolort/trainer/__init__.py b/yolort/trainer/__init__.py index 34724c90..ba0e63aa 100644 --- a/yolort/trainer/__init__.py +++ b/yolort/trainer/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) 2021, yolort team. All rights reserved. -from .lightning_task import DefaultTask +from .trainer import Trainer -__all__ = ["DefaultTask"] +__all__ = ["Trainer"] diff --git a/yolort/trainer/lightning_task.py b/yolort/trainer/lightning_task.py deleted file mode 100644 index c8cec1e1..00000000 --- a/yolort/trainer/lightning_task.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) 2021, yolort team. All rights reserved. - -import argparse -from pathlib import PosixPath -from typing import Any, Dict, List, Optional, Tuple, Union - -import torch -import yolort.models as models -from pytorch_lightning import LightningModule -from torch import Tensor -from torchvision.ops import box_iou -from yolort.data.coco_eval import COCOEvaluator - - -__all__ = ["DefaultTask"] - - -def _evaluate_iou(target, pred): - """ - Evaluate intersection over union (IOU) for target from dataset and - output prediction from model - """ - if pred["boxes"].shape[0] == 0: - # no box detected, 0 IOU - return torch.tensor(0.0, device=pred["boxes"].device) - return box_iou(target["boxes"], pred["boxes"]).diag().mean() - - -class DefaultTask(LightningModule): - """ - Wrapping the trainer into the YOLOv5 Module. - - Args: - arch (string): YOLOv5 model architecture. Default: 'yolov5s' - version (str): model released by the upstream YOLOv5. Possible values - are ['r6.0']. Default: 'r6.0'. - lr (float): The initial learning rate - annotation_path (Optional[Union[string, PosixPath]]): Path of the COCO annotation file - Default: None. - """ - - def __init__( - self, - arch: str = "yolov5s", - version: str = "r6.0", - lr: float = 0.01, - annotation_path: Optional[Union[str, PosixPath]] = None, - **kwargs: Any, - ) -> None: - - super().__init__() - - self.model = models.__dict__[arch](upstream_version=version, **kwargs) - self.lr = lr - - # evaluators for validation datasets - self.evaluator = None - if annotation_path is not None: - self.evaluator = COCOEvaluator(annotation_path, iou_type="bbox") - - # used only on torchscript mode - self._has_warned = False - - def forward( - self, - inputs: List[Tensor], - targets: Optional[List[Dict[str, Tensor]]] = None, - ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]: - """ - This exists since PyTorchLightning forward are used for inference only (separate from - ``training_step``). We keep ``targets`` here for Backward Compatible. - """ - return self.model(inputs, targets) - - def training_step(self, batch, batch_idx): - """ - The training step. - """ - loss_dict = self.model(*batch) - loss = sum(loss_dict.values()) - self.log_dict(loss_dict, on_step=True, on_epoch=True, prog_bar=True) - return loss - - def validation_step(self, batch, batch_idx): - images, targets = batch - # fasterrcnn takes only images for eval() mode - preds = self.model(images) - iou = torch.stack([_evaluate_iou(t, o) for t, o in zip(targets, preds)]).mean() - outs = {"val_iou": iou} - self.log_dict(outs, on_step=True, on_epoch=True, prog_bar=True) - return outs - - def validation_epoch_end(self, outs): - avg_iou = torch.stack([o["val_iou"] for o in outs]).mean() - self.log("avg_val_iou", avg_iou) - - def test_step(self, batch, batch_idx): - """ - The test step. - """ - images, targets = batch - images = list(image.to(next(self.parameters()).device) for image in images) - preds = self.model(images) - results = self.evaluator(preds, targets) - # log step metric - self.log("eval_step", results, prog_bar=True, on_step=True) - - def test_epoch_end(self, outputs): - return self.log("coco_eval", self.evaluator.compute()) - - def configure_optimizers(self): - return torch.optim.SGD( - self.model.parameters(), - lr=self.lr, - momentum=0.9, - weight_decay=5e-4, - ) - - @staticmethod - def add_model_specific_args(parent_parser): - parser = argparse.ArgumentParser(parents=[parent_parser], add_help=False) - parser.add_argument("--arch", default="yolov5_darknet_pan_s_r40", help="model architecture") - parser.add_argument( - "--pretrained", - action="store_true", - help="Use pre-trained models from the modelzoo", - ) - parser.add_argument( - "--lr", - default=0.01, - type=float, - help="initial learning rate, 0.01 is the default value for training " - "on 8 gpus and 2 images_per_gpu", - ) - parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") - parser.add_argument( - "--weight-decay", - default=5e-4, - type=float, - metavar="W", - help="weight decay (default: 5e-4)", - ) - return parser diff --git a/yolort/trainer/trainer.py b/yolort/trainer/trainer.py new file mode 100644 index 00000000..28f1fbe1 --- /dev/null +++ b/yolort/trainer/trainer.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python3 +# Copyright (c) Megvii, Inc. and its affiliates. + +import datetime +import os +import time +from loguru import logger + +import torch +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter + +from yolort.data import DataPrefetcher +from yolort.exp import Exp +from yolort.utils import ( + MeterBuffer, + ModelEMA, + WandbLogger, + adjust_status, + all_reduce_norm, + get_local_rank, + get_model_info, + get_rank, + get_world_size, + gpu_mem_usage, + is_parallel, + load_ckpt, + mem_usage, + occupy_mem, + save_checkpoint, + setup_logger, + synchronize +) + +__all__ = ["Trainer"] + +class Trainer: + def __init__(self, exp: Exp, args): + # init function only defines some basic attr, other attrs like model, optimizer are built in + # before_train methods. + self.exp = exp + self.args = args + + # training related attr + self.max_epoch = exp.max_epoch + self.amp_training = args.fp16 + self.scaler = torch.cuda.amp.GradScaler(enabled=args.fp16) + self.is_distributed = get_world_size() > 1 + self.rank = get_rank() + self.local_rank = get_local_rank() + self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else 'cpu' + self.use_model_ema = exp.ema + self.save_history_ckpt = exp.save_history_ckpt + + # data/dataloader related attr + self.data_type = torch.float16 if args.fp16 else torch.float32 + self.input_size = exp.input_size + self.best_ap = 0 + + # metric record + self.meter = MeterBuffer(window_size=exp.print_interval) + self.file_name = os.path.join(exp.output_dir, args.experiment_name) + + if self.rank == 0: + os.makedirs(self.file_name, exist_ok=True) + + setup_logger( + self.file_name, + distributed_rank=self.rank, + filename="train_log.txt", + mode="a", + ) + + def train(self): + self.before_train() + try: + self.train_in_epoch() + except Exception: + raise + finally: + self.after_train() + + def train_in_epoch(self): + for self.epoch in range(self.start_epoch, self.max_epoch): + self.before_epoch() + self.train_in_iter() + self.after_epoch() + + def train_in_iter(self): + for self.iter in range(self.max_iter): + self.before_iter() + self.train_one_iter() + self.after_iter() + + def train_one_iter(self): + iter_start_time = time.time() + + inps, targets = self.prefetcher.next() + inps = inps.to(self.data_type) + targets = targets.to(self.data_type) + targets.requires_grad = False + inps, targets = self.exp.preprocess(inps, targets, self.input_size) + data_end_time = time.time() + + with torch.cuda.amp.autocast(enabled=self.amp_training): + outputs = self.model(inps, targets) + + loss = outputs["total_loss"] + + self.optimizer.zero_grad() + self.scaler.scale(loss).backward() + self.scaler.step(self.optimizer) + self.scaler.update() + + if self.use_model_ema: + self.ema_model.update(self.model) + + lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1) + for param_group in self.optimizer.param_groups: + param_group["lr"] = lr + + iter_end_time = time.time() + self.meter.update( + iter_time=iter_end_time - iter_start_time, + data_time=data_end_time - iter_start_time, + lr=lr, + **outputs, + ) + + def before_train(self): + logger.info("args: {}".format(self.args)) + logger.info("exp value:\n{}".format(self.exp)) + + # model related init + if self.device != 'cpu': + torch.cuda.set_device(self.local_rank) + model = self.exp.get_model() + logger.info( + "Model Summary: {}".format(get_model_info(model, self.exp.test_size)) + ) + model.to(self.device) + + # solver related init + self.optimizer = self.exp.get_optimizer(self.args.batch_size) + + # value of epoch will be set in `resume_train` + model = self.resume_train(model) + + # data related init + self.no_aug = self.start_epoch >= self.max_epoch - self.exp.no_aug_epochs + self.train_loader = self.exp.get_data_loader( + batch_size=self.args.batch_size, + is_distributed=self.is_distributed, + no_aug=self.no_aug, + cache_img=self.args.cache, + ) + logger.info("init prefetcher, this might take one minute or less...") + self.prefetcher = DataPrefetcher(self.train_loader) + # max_iter means iters per epoch + self.max_iter = len(self.train_loader) + + self.lr_scheduler = self.exp.get_lr_scheduler( + self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter + ) + if self.args.occupy: + occupy_mem(self.local_rank) + + if self.is_distributed: + model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False) + + if self.use_model_ema: + self.ema_model = ModelEMA(model, 0.9998) + self.ema_model.updates = self.max_iter * self.start_epoch + + self.model = model + + self.evaluator = self.exp.get_evaluator( + batch_size=self.args.batch_size, is_distributed=self.is_distributed + ) + # Tensorboard and Wandb loggers + if self.rank == 0: + if self.args.logger == "tensorboard": + self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard")) + elif self.args.logger == "wandb": + self.wandb_logger = WandbLogger.initialize_wandb_logger( + self.args, + self.exp, + self.evaluator.dataloader.dataset + ) + else: + raise ValueError("logger must be either 'tensorboard' or 'wandb'") + + logger.info("Training start...") + logger.info("\n{}".format(model)) + + def after_train(self): + logger.info( + "Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100) + ) + if self.rank == 0: + if self.args.logger == "wandb": + self.wandb_logger.finish() + + def before_epoch(self): + logger.info("---> start train epoch{}".format(self.epoch + 1)) + + if self.epoch + 1 == self.max_epoch - self.exp.no_aug_epochs or self.no_aug: + logger.info("--->No mosaic aug now!") + self.train_loader.close_mosaic() + logger.info("--->Add additional L1 loss now!") + if self.is_distributed: + self.model.module.head.use_l1 = True + else: + self.model.head.use_l1 = True + self.exp.eval_interval = 1 + if not self.no_aug: + self.save_ckpt(ckpt_name="last_mosaic_epoch") + + def after_epoch(self): + self.save_ckpt(ckpt_name="latest") + + if (self.epoch + 1) % self.exp.eval_interval == 0: + all_reduce_norm(self.model) + self.evaluate_and_save_model() + + def before_iter(self): + pass + + def after_iter(self): + """ + `after_iter` contains two parts of logic: + * log information + * reset setting of resize + """ + # log needed information + if (self.iter + 1) % self.exp.print_interval == 0: + # TODO check ETA logic + left_iters = self.max_iter * self.max_epoch - (self.progress_in_iter + 1) + eta_seconds = self.meter["iter_time"].global_avg * left_iters + eta_str = "ETA: {}".format(datetime.timedelta(seconds=int(eta_seconds))) + + progress_str = "epoch: {}/{}, iter: {}/{}".format( + self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter + ) + loss_meter = self.meter.get_filtered_meter("loss") + loss_str = ", ".join( + ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] + ) + + time_meter = self.meter.get_filtered_meter("time") + time_str = ", ".join( + ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] + ) + + mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage()) + + logger.info( + "{}, {}, {}, {}, lr: {:.3e}".format( + progress_str, + mem_str, + time_str, + loss_str, + self.meter["lr"].latest, + ) + + (", size: {:d}, {}".format(self.input_size[0], eta_str)) + ) + + if self.rank == 0: + if self.args.logger == "tensorboard": + self.tblogger.add_scalar( + "train/lr", self.meter["lr"].latest, self.progress_in_iter) + for k, v in loss_meter.items(): + self.tblogger.add_scalar( + f"train/{k}", v.latest, self.progress_in_iter) + if self.args.logger == "wandb": + metrics = {"train/" + k: v.latest for k, v in loss_meter.items()} + metrics.update({ + "train/lr": self.meter["lr"].latest + }) + self.wandb_logger.log_metrics(metrics, step=self.progress_in_iter) + + self.meter.clear_meters() + + # random resizing + if (self.progress_in_iter + 1) % 10 == 0: + self.input_size = self.exp.random_resize( + self.train_loader, self.epoch, self.rank, self.is_distributed + ) + + @property + def progress_in_iter(self): + return self.epoch * self.max_iter + self.iter + + def resume_train(self, model): + if self.args.resume: + logger.info("resume training") + if self.args.ckpt is None: + ckpt_file = os.path.join(self.file_name, "latest" + "_ckpt.pth") + else: + ckpt_file = self.args.ckpt + + ckpt = torch.load(ckpt_file, map_location=self.device) + # resume the model/optimizer state dict + model.load_state_dict(ckpt["model"]) + self.optimizer.load_state_dict(ckpt["optimizer"]) + self.best_ap = ckpt.pop("best_ap", 0) + # resume the training states variables + start_epoch = ( + self.args.start_epoch - 1 + if self.args.start_epoch is not None + else ckpt["start_epoch"] + ) + self.start_epoch = start_epoch + logger.info( + "loaded checkpoint '{}' (epoch {})".format( + self.args.resume, self.start_epoch + ) + ) # noqa + else: + if self.args.ckpt is not None: + logger.info("loading checkpoint for fine tuning") + ckpt_file = self.args.ckpt + ckpt = torch.load(ckpt_file, map_location=self.device)["model"] + model = load_ckpt(model, ckpt) + self.start_epoch = 0 + + return model + + def evaluate_and_save_model(self): + if self.use_model_ema: + evalmodel = self.ema_model.ema + else: + evalmodel = self.model + if is_parallel(evalmodel): + evalmodel = evalmodel.module + + with adjust_status(evalmodel, training=False): + (ap50_95, ap50, summary), predictions = self.exp.eval( + evalmodel, self.evaluator, self.is_distributed, return_outputs=True + ) + + update_best_ckpt = ap50_95 > self.best_ap + self.best_ap = max(self.best_ap, ap50_95) + + if self.rank == 0: + if self.args.logger == "tensorboard": + self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1) + self.tblogger.add_scalar("val/COCOAP50_95", ap50_95, self.epoch + 1) + if self.args.logger == "wandb": + self.wandb_logger.log_metrics({ + "val/COCOAP50": ap50, + "val/COCOAP50_95": ap50_95, + "train/epoch": self.epoch + 1, + }) + self.wandb_logger.log_images(predictions) + logger.info("\n" + summary) + synchronize() + + self.save_ckpt("last_epoch", update_best_ckpt, ap=ap50_95) + if self.save_history_ckpt: + self.save_ckpt(f"epoch_{self.epoch + 1}", ap=ap50_95) + + def save_ckpt(self, ckpt_name, update_best_ckpt=False, ap=None): + if self.rank == 0: + save_model = self.ema_model.ema if self.use_model_ema else self.model + logger.info("Save weights to {}".format(self.file_name)) + ckpt_state = { + "start_epoch": self.epoch + 1, + "model": save_model.state_dict(), + "optimizer": self.optimizer.state_dict(), + "best_ap": self.best_ap, + "curr_ap": ap, + } + save_checkpoint( + ckpt_state, + update_best_ckpt, + self.file_name, + ckpt_name, + ) + + if self.args.logger == "wandb": + self.wandb_logger.save_checkpoint( + self.file_name, + ckpt_name, + update_best_ckpt, + metadata={ + "epoch": self.epoch + 1, + "optimizer": self.optimizer.state_dict(), + "best_ap": self.best_ap, + "curr_ap": ap + } + ) \ No newline at end of file diff --git a/yolort/utils/__init__.py b/yolort/utils/__init__.py index c16127d2..cf4c00b0 100644 --- a/yolort/utils/__init__.py +++ b/yolort/utils/__init__.py @@ -14,6 +14,15 @@ from .hooks import FeatureExtractor from .image_utils import cv2_imshow, get_image_from_url, read_image_to_tensor from .visualizer import Visualizer +from .allreduce_norm import * +from .boxes import * +from .checkpoint import load_ckpt, save_checkpoint +from .dist import * +from .ema import * +from .logger import WandbLogger, setup_logger +from .lr_scheduler import LRScheduler +from .metric import * +from .model_utils import * __all__ = [ diff --git a/yolort/utils/allreduce_norm.py b/yolort/utils/allreduce_norm.py new file mode 100644 index 00000000..142c76c7 --- /dev/null +++ b/yolort/utils/allreduce_norm.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import pickle +from collections import OrderedDict + +import torch +from torch import distributed as dist +from torch import nn + +from .dist import _get_global_gloo_group, get_world_size + +ASYNC_NORM = ( + nn.BatchNorm1d, + nn.BatchNorm2d, + nn.BatchNorm3d, + nn.InstanceNorm1d, + nn.InstanceNorm2d, + nn.InstanceNorm3d, +) + +__all__ = [ + "get_async_norm_states", + "pyobj2tensor", + "tensor2pyobj", + "all_reduce", + "all_reduce_norm", +] + + +def get_async_norm_states(module): + async_norm_states = OrderedDict() + for name, child in module.named_modules(): + if isinstance(child, ASYNC_NORM): + for k, v in child.state_dict().items(): + async_norm_states[".".join([name, k])] = v + return async_norm_states + + +def pyobj2tensor(pyobj, device="cuda"): + """serialize picklable python object to tensor""" + storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj)) + return torch.ByteTensor(storage).to(device=device) + + +def tensor2pyobj(tensor): + """deserialize tensor to picklable python object""" + return pickle.loads(tensor.cpu().numpy().tobytes()) + + +def _get_reduce_op(op_name): + return { + "sum": dist.ReduceOp.SUM, + "mean": dist.ReduceOp.SUM, + }[op_name.lower()] + + +def all_reduce(py_dict, op="sum", group=None): + """ + Apply all reduce function for python dict object. + NOTE: make sure that every py_dict has the same keys and values are in the same shape. + + Args: + py_dict (dict): dict to apply all reduce op. + op (str): operator, could be "sum" or "mean". + """ + world_size = get_world_size() + if world_size == 1: + return py_dict + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return py_dict + + # all reduce logic across different devices. + py_key = list(py_dict.keys()) + py_key_tensor = pyobj2tensor(py_key) + dist.broadcast(py_key_tensor, src=0) + py_key = tensor2pyobj(py_key_tensor) + + tensor_shapes = [py_dict[k].shape for k in py_key] + tensor_numels = [py_dict[k].numel() for k in py_key] + + flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key]) + dist.all_reduce(flatten_tensor, op=_get_reduce_op(op)) + if op == "mean": + flatten_tensor /= world_size + + split_tensors = [ + x.reshape(shape) + for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes) + ] + return OrderedDict({k: v for k, v in zip(py_key, split_tensors)}) + + +def all_reduce_norm(module): + """ + All reduce norm statistics in different devices. + """ + states = get_async_norm_states(module) + states = all_reduce(states, op="mean") + module.load_state_dict(states, strict=False) diff --git a/yolort/utils/boxes.py b/yolort/utils/boxes.py new file mode 100644 index 00000000..a8eaf3f4 --- /dev/null +++ b/yolort/utils/boxes.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +# Copyright (c) Megvii Inc. All rights reserved. + +import numpy as np + +import torch +import torchvision + +__all__ = [ + "filter_box", + "postprocess", + "bboxes_iou", + "matrix_iou", + "adjust_box_anns", + "xyxy2xywh", + "xyxy2cxcywh", + "cxcywh2xyxy", +] + + +def filter_box(output, scale_range): + """ + output: (N, 5+class) shape + """ + min_scale, max_scale = scale_range + w = output[:, 2] - output[:, 0] + h = output[:, 3] - output[:, 1] + keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale) + return output[keep] + + +def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): + box_corner = prediction.new(prediction.shape) + box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 + box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 + box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 + box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 + prediction[:, :, :4] = box_corner[:, :, :4] + + output = [None for _ in range(len(prediction))] + for i, image_pred in enumerate(prediction): + + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Get score and class with highest confidence + class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) + + conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() + # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) + detections = detections[conf_mask] + if not detections.size(0): + continue + + if class_agnostic: + nms_out_index = torchvision.ops.nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + nms_thre, + ) + else: + nms_out_index = torchvision.ops.batched_nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + detections[:, 6], + nms_thre, + ) + + detections = detections[nms_out_index] + if output[i] is None: + output[i] = detections + else: + output[i] = torch.cat((output[i], detections)) + + return output + + +def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): + if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: + raise IndexError + + if xyxy: + tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) + br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) + area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) + area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) + else: + tl = torch.max( + (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2), + ) + br = torch.min( + (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2), + ) + + area_a = torch.prod(bboxes_a[:, 2:], 1) + area_b = torch.prod(bboxes_b[:, 2:], 1) + en = (tl < br).type(tl.type()).prod(dim=2) + area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all()) + return area_i / (area_a[:, None] + area_b - area_i) + + +def matrix_iou(a, b): + """ + return iou of a and b, numpy version for data augenmentation + """ + lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) + rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) + + area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) + area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) + area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) + return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12) + + +def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max): + bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max) + bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max) + return bbox + + +def xyxy2xywh(bboxes): + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] + bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] + return bboxes + + +def xyxy2cxcywh(bboxes): + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] + bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] + bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5 + bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5 + return bboxes + + +def cxcywh2xyxy(bboxes): + bboxes[:, 0] = bboxes[:, 0] - bboxes[:, 2] * 0.5 + bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5 + bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] + bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] + return bboxes \ No newline at end of file diff --git a/yolort/utils/checkpoint.py b/yolort/utils/checkpoint.py new file mode 100644 index 00000000..a0c200e4 --- /dev/null +++ b/yolort/utils/checkpoint.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. +import os +import shutil +from loguru import logger + +import torch + + +def load_ckpt(model, ckpt): + model_state_dict = model.state_dict() + load_dict = {} + for key_model, v in model_state_dict.items(): + if key_model not in ckpt: + logger.warning( + "{} is not in the ckpt. Please double check and see if this is desired.".format( + key_model + ) + ) + continue + v_ckpt = ckpt[key_model] + if v.shape != v_ckpt.shape: + logger.warning( + "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( + key_model, v_ckpt.shape, key_model, v.shape + ) + ) + continue + load_dict[key_model] = v_ckpt + + model.load_state_dict(load_dict, strict=False) + return model + + +def save_checkpoint(state, is_best, save_dir, model_name=""): + if not os.path.exists(save_dir): + os.makedirs(save_dir) + filename = os.path.join(save_dir, model_name + "_ckpt.pth") + torch.save(state, filename) + if is_best: + best_filename = os.path.join(save_dir, "best_ckpt.pth") + shutil.copyfile(filename, best_filename) diff --git a/yolort/utils/dist.py b/yolort/utils/dist.py new file mode 100644 index 00000000..a4b46801 --- /dev/null +++ b/yolort/utils/dist.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# This file mainly comes from +# https://github.com/facebookresearch/detectron2/blob/master/detectron2/utils/comm.py +# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Megvii Inc. All rights reserved. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import functools +import os +import pickle +import time +from contextlib import contextmanager +from loguru import logger + +import numpy as np + +import torch +from torch import distributed as dist + +__all__ = [ + "get_num_devices", + "wait_for_the_master", + "is_main_process", + "synchronize", + "get_world_size", + "get_rank", + "get_local_rank", + "get_local_size", + "time_synchronized", + "gather", + "all_gather", +] + +_LOCAL_PROCESS_GROUP = None + + +def get_num_devices(): + gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None) + if gpu_list is not None: + return len(gpu_list.split(',')) + else: + devices_list_info = os.popen("nvidia-smi -L") + devices_list_info = devices_list_info.read().strip().split("\n") + return len(devices_list_info) + + +@contextmanager +def wait_for_the_master(local_rank: int = None): + """ + Make all processes waiting for the master to do some task. + + Args: + local_rank (int): the rank of the current process. Default to None. + If None, it will use the rank of the current process. + """ + if local_rank is None: + local_rank = get_local_rank() + + if local_rank > 0: + dist.barrier() + yield + if local_rank == 0: + if not dist.is_available(): + return + if not dist.is_initialized(): + return + else: + dist.barrier() + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if _LOCAL_PROCESS_GROUP is None: + return get_rank() + + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "nccl"] + device = torch.device("cpu" if backend == "gloo" else "cuda") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) + for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros( + (max_size - local_size,), dtype=torch.uint8, device=tensor.device + ) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) + for _ in size_list + ] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) + for _ in size_list + ] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def time_synchronized(): + """pytorch-accurate time""" + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() \ No newline at end of file diff --git a/yolort/utils/ema.py b/yolort/utils/ema.py new file mode 100644 index 00000000..364e8c87 --- /dev/null +++ b/yolort/utils/ema.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. +import math +from copy import deepcopy + +import torch +import torch.nn as nn + +__all__ = ["ModelEMA", "is_parallel"] + + +def is_parallel(model): + """check if model is in parallel mode.""" + parallel_type = ( + nn.parallel.DataParallel, + nn.parallel.DistributedDataParallel, + ) + return isinstance(model, parallel_type) + + +class ModelEMA: + """ + Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weights is necessary for some training schemes to perform well. + This class is sensitive where it is initialized in the sequence of model init, + GPU assignment and distributed training wrappers. + """ + + def __init__(self, model, decay=0.9999, updates=0): + """ + Args: + model (nn.Module): model to apply EMA. + decay (float): ema decay reate. + updates (int): counter of EMA updates. + """ + # Create EMA(FP32) + self.ema = deepcopy(model.module if is_parallel(model) else model).eval() + self.updates = updates + # decay exponential ramp (to help early epochs) + self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + msd = ( + model.module.state_dict() if is_parallel(model) else model.state_dict() + ) # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1.0 - d) * msd[k].detach() \ No newline at end of file diff --git a/yolort/utils/logger.py b/yolort/utils/logger.py index 866c189c..00f1d125 100644 --- a/yolort/utils/logger.py +++ b/yolort/utils/logger.py @@ -1,5 +1,11 @@ -import datetime +import os +import sys +import cv2 import time +import datetime +import inspect +import numpy as np +from loguru import logger from collections import defaultdict, deque import torch @@ -197,3 +203,429 @@ def get_rank(): def is_main_process(): return get_rank() == 0 + +def get_caller_name(depth=0): + """ + Args: + depth (int): Depth of caller conext, use 0 for caller depth. + Default value: 0. + + Returns: + str: module name of the caller + """ + # the following logic is a little bit faster than inspect.stack() logic + frame = inspect.currentframe().f_back + for _ in range(depth): + frame = frame.f_back + + return frame.f_globals["__name__"] + + +class StreamToLoguru: + """ + stream object that redirects writes to a logger instance. + """ + + def __init__(self, level="INFO", caller_names=("apex", "pycocotools")): + """ + Args: + level(str): log level string of loguru. Default value: "INFO". + caller_names(tuple): caller names of redirected module. + Default value: (apex, pycocotools). + """ + self.level = level + self.linebuf = "" + self.caller_names = caller_names + + def write(self, buf): + full_name = get_caller_name(depth=1) + module_name = full_name.rsplit(".", maxsplit=-1)[0] + if module_name in self.caller_names: + for line in buf.rstrip().splitlines(): + # use caller level log + logger.opt(depth=2).log(self.level, line.rstrip()) + else: + sys.__stdout__.write(buf) + + def flush(self): + # flush is related with CPR(cursor position report) in terminal + return sys.__stdout__.flush() + + def isatty(self): + # when using colab, jax is installed by default and issue like + # https://github.com/Megvii-BaseDetection/YOLOX/issues/1437 might be raised + # due to missing attribute like`isatty`. + # For more details, checked the following link: + # https://github.com/google/jax/blob/10720258ea7fb5bde997dfa2f3f71135ab7a6733/jax/_src/pretty_printer.py#L54 # noqa + return sys.__stdout__.isatty() + + def fileno(self): + # To solve the issue when using debug tools like pdb + return sys.__stdout__.fileno() + + +def redirect_sys_output(log_level="INFO"): + redirect_logger = StreamToLoguru(log_level) + sys.stderr = redirect_logger + sys.stdout = redirect_logger + + +def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"): + """setup logger for training and testing. + Args: + save_dir(str): location to save log file + distributed_rank(int): device rank when multi-gpu environment + filename (string): log save name. + mode(str): log file write mode, `append` or `override`. default is `a`. + + Return: + logger instance. + """ + loguru_format = ( + "{time:YYYY-MM-DD HH:mm:ss} | " + "{level: <8} | " + "{name}:{line} - {message}" + ) + + logger.remove() + save_file = os.path.join(save_dir, filename) + if mode == "o" and os.path.exists(save_file): + os.remove(save_file) + # only keep logger in rank0 process + if distributed_rank == 0: + logger.add( + sys.stderr, + format=loguru_format, + level="INFO", + enqueue=True, + ) + logger.add(save_file) + + # redirect stdout/stderr to loguru + redirect_sys_output("INFO") + + +class WandbLogger(object): + """ + Log training runs, datasets, models, and predictions to Weights & Biases. + This logger sends information to W&B at wandb.ai. + By default, this information includes hyperparameters, + system configuration and metrics, model metrics, + and basic data metrics and analyses. + + For more information, please refer to: + https://docs.wandb.ai/guides/track + https://docs.wandb.ai/guides/integrations/other/yolox + """ + def __init__(self, + project=None, + name=None, + id=None, + entity=None, + save_dir=None, + config=None, + val_dataset=None, + num_eval_images=100, + log_checkpoints=False, + **kwargs): + """ + Args: + project (str): wandb project name. + name (str): wandb run name. + id (str): wandb run id. + entity (str): wandb entity name. + save_dir (str): save directory. + config (dict): config dict. + val_dataset (Dataset): validation dataset. + num_eval_images (int): number of images from the validation set to log. + log_checkpoints (bool): log checkpoints + **kwargs: other kwargs. + + Usage: + Any arguments for wandb.init can be provided on the command line using + the prefix `wandb-`. + Example + ``` + python tools/train.py .... --logger wandb wandb-project \ + wandb-name \ + wandb-id \ + wandb-save_dir \ + wandb-num_eval_imges \ + wandb-log_checkpoints + ``` + The val_dataset argument is not open to the command line. + """ + try: + import wandb + self.wandb = wandb + except ModuleNotFoundError: + raise ModuleNotFoundError( + "wandb is not installed." + "Please install wandb using pip install wandb" + ) + + from yolox.data.datasets import VOCDetection + + self.project = project + self.name = name + self.id = id + self.save_dir = save_dir + self.config = config + self.kwargs = kwargs + self.entity = entity + self._run = None + self.val_artifact = None + if num_eval_images == -1: + self.num_log_images = len(val_dataset) + else: + self.num_log_images = min(num_eval_images, len(val_dataset)) + self.log_checkpoints = (log_checkpoints == "True" or log_checkpoints == "true") + self._wandb_init = dict( + project=self.project, + name=self.name, + id=self.id, + entity=self.entity, + dir=self.save_dir, + resume="allow" + ) + self._wandb_init.update(**kwargs) + + _ = self.run + + if self.config: + self.run.config.update(self.config) + self.run.define_metric("train/epoch") + self.run.define_metric("val/*", step_metric="train/epoch") + self.run.define_metric("train/step") + self.run.define_metric("train/*", step_metric="train/step") + + self.voc_dataset = VOCDetection + + if val_dataset and self.num_log_images != 0: + self.val_dataset = val_dataset + self.cats = val_dataset.cats + self.id_to_class = { + cls['id']: cls['name'] for cls in self.cats + } + self._log_validation_set(val_dataset) + + @property + def run(self): + if self._run is None: + if self.wandb.run is not None: + logger.info( + "There is a wandb run already in progress " + "and newly created instances of `WandbLogger` will reuse" + " this run. If this is not desired, call `wandb.finish()`" + "before instantiating `WandbLogger`." + ) + self._run = self.wandb.run + else: + self._run = self.wandb.init(**self._wandb_init) + return self._run + + def _log_validation_set(self, val_dataset): + """ + Log validation set to wandb. + + Args: + val_dataset (Dataset): validation dataset. + """ + if self.val_artifact is None: + self.val_artifact = self.wandb.Artifact(name="validation_images", type="dataset") + self.val_table = self.wandb.Table(columns=["id", "input"]) + + for i in range(self.num_log_images): + data_point = val_dataset[i] + img = data_point[0] + id = data_point[3] + img = np.transpose(img, (1, 2, 0)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if isinstance(id, torch.Tensor): + id = id.item() + + self.val_table.add_data( + id, + self.wandb.Image(img) + ) + + self.val_artifact.add(self.val_table, "validation_images_table") + self.run.use_artifact(self.val_artifact) + self.val_artifact.wait() + + def _convert_prediction_format(self, predictions): + image_wise_data = defaultdict(int) + + for key, val in predictions.items(): + img_id = key + + try: + bboxes, cls, scores = val + except KeyError: + bboxes, cls, scores = val["bboxes"], val["categories"], val["scores"] + + # These store information of actual bounding boxes i.e. the ones which are not None + act_box = [] + act_scores = [] + act_cls = [] + + if bboxes is not None: + for box, classes, score in zip(bboxes, cls, scores): + if box is None or score is None or classes is None: + continue + act_box.append(box) + act_scores.append(score) + act_cls.append(classes) + + image_wise_data.update({ + int(img_id): { + "bboxes": [box.numpy().tolist() for box in act_box], + "scores": [score.numpy().item() for score in act_scores], + "categories": [ + self.val_dataset.class_ids[int(act_cls[ind])] + for ind in range(len(act_box)) + ], + } + }) + + return image_wise_data + + def log_metrics(self, metrics, step=None): + """ + Args: + metrics (dict): metrics dict. + step (int): step number. + """ + + for k, v in metrics.items(): + if isinstance(v, torch.Tensor): + metrics[k] = v.item() + + if step is not None: + metrics.update({"train/step": step}) + self.run.log(metrics) + else: + self.run.log(metrics) + + def log_images(self, predictions): + if len(predictions) == 0 or self.val_artifact is None or self.num_log_images == 0: + return + + table_ref = self.val_artifact.get("validation_images_table") + + columns = ["id", "predicted"] + for cls in self.cats: + columns.append(cls["name"]) + + if isinstance(self.val_dataset, self.voc_dataset): + predictions = self._convert_prediction_format(predictions) + + result_table = self.wandb.Table(columns=columns) + + for idx, val in table_ref.iterrows(): + + avg_scores = defaultdict(int) + num_occurrences = defaultdict(int) + + id = val[0] + if isinstance(id, list): + id = id[0] + + if id in predictions: + prediction = predictions[id] + boxes = [] + for i in range(len(prediction["bboxes"])): + bbox = prediction["bboxes"][i] + x0 = bbox[0] + y0 = bbox[1] + x1 = bbox[2] + y1 = bbox[3] + box = { + "position": { + "minX": min(x0, x1), + "minY": min(y0, y1), + "maxX": max(x0, x1), + "maxY": max(y0, y1) + }, + "class_id": prediction["categories"][i], + "domain": "pixel" + } + avg_scores[ + self.id_to_class[prediction["categories"][i]] + ] += prediction["scores"][i] + num_occurrences[self.id_to_class[prediction["categories"][i]]] += 1 + boxes.append(box) + else: + boxes = [] + average_class_score = [] + for cls in self.cats: + if cls["name"] not in num_occurrences: + score = 0 + else: + score = avg_scores[cls["name"]] / num_occurrences[cls["name"]] + average_class_score.append(score) + result_table.add_data( + idx, + self.wandb.Image(val[1], boxes={ + "prediction": { + "box_data": boxes, + "class_labels": self.id_to_class + } + } + ), + *average_class_score + ) + + self.wandb.log({"val_results/result_table": result_table}) + + def save_checkpoint(self, save_dir, model_name, is_best, metadata=None): + """ + Args: + save_dir (str): save directory. + model_name (str): model name. + is_best (bool): whether the model is the best model. + metadata (dict): metadata to save corresponding to the checkpoint. + """ + + if not self.log_checkpoints: + return + + if "epoch" in metadata: + epoch = metadata["epoch"] + else: + epoch = None + + filename = os.path.join(save_dir, model_name + "_ckpt.pth") + artifact = self.wandb.Artifact( + name=f"run_{self.run.id}_model", + type="model", + metadata=metadata + ) + artifact.add_file(filename, name="model_ckpt.pth") + + aliases = ["latest"] + + if is_best: + aliases.append("best") + + if epoch: + aliases.append(f"epoch-{epoch}") + + self.run.log_artifact(artifact, aliases=aliases) + + def finish(self): + self.run.finish() + + @classmethod + def initialize_wandb_logger(cls, args, exp, val_dataset): + wandb_params = dict() + prefix = "wandb-" + for k, v in zip(args.opts[0::2], args.opts[1::2]): + if k.startswith("wandb-"): + try: + wandb_params.update({k[len(prefix):]: int(v)}) + except ValueError: + wandb_params.update({k[len(prefix):]: v}) + + return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params) \ No newline at end of file diff --git a/yolort/utils/lr_scheduler.py b/yolort/utils/lr_scheduler.py new file mode 100644 index 00000000..42c00cf2 --- /dev/null +++ b/yolort/utils/lr_scheduler.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import math +from functools import partial + + +class LRScheduler: + def __init__(self, name, lr, iters_per_epoch, total_epochs, **kwargs): + """ + Supported lr schedulers: [cos, warmcos, multistep] + + Args: + lr (float): learning rate. + iters_per_epoch (int): number of iterations in one epoch. + total_epochs (int): number of epochs in training. + kwargs (dict): + - cos: None + - warmcos: [warmup_epochs, warmup_lr_start (default 1e-6)] + - multistep: [milestones (epochs), gamma (default 0.1)] + """ + + self.lr = lr + self.iters_per_epoch = iters_per_epoch + self.total_epochs = total_epochs + self.total_iters = iters_per_epoch * total_epochs + + self.__dict__.update(kwargs) + + self.lr_func = self._get_lr_func(name) + + def update_lr(self, iters): + return self.lr_func(iters) + + def _get_lr_func(self, name): + if name == "cos": # cosine lr schedule + lr_func = partial(cos_lr, self.lr, self.total_iters) + elif name == "warmcos": + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + warmup_lr_start = getattr(self, "warmup_lr_start", 1e-6) + lr_func = partial( + warm_cos_lr, + self.lr, + self.total_iters, + warmup_total_iters, + warmup_lr_start, + ) + elif name == "yoloxwarmcos": + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + no_aug_iters = self.iters_per_epoch * self.no_aug_epochs + warmup_lr_start = getattr(self, "warmup_lr_start", 0) + min_lr_ratio = getattr(self, "min_lr_ratio", 0.2) + lr_func = partial( + yolox_warm_cos_lr, + self.lr, + min_lr_ratio, + self.total_iters, + warmup_total_iters, + warmup_lr_start, + no_aug_iters, + ) + elif name == "yoloxsemiwarmcos": + warmup_lr_start = getattr(self, "warmup_lr_start", 0) + min_lr_ratio = getattr(self, "min_lr_ratio", 0.2) + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + no_aug_iters = self.iters_per_epoch * self.no_aug_epochs + normal_iters = self.iters_per_epoch * self.semi_epoch + semi_iters = self.iters_per_epoch_semi * ( + self.total_epochs - self.semi_epoch - self.no_aug_epochs + ) + lr_func = partial( + yolox_semi_warm_cos_lr, + self.lr, + min_lr_ratio, + warmup_lr_start, + self.total_iters, + normal_iters, + no_aug_iters, + warmup_total_iters, + semi_iters, + self.iters_per_epoch, + self.iters_per_epoch_semi, + ) + elif name == "multistep": # stepwise lr schedule + milestones = [ + int(self.total_iters * milestone / self.total_epochs) + for milestone in self.milestones + ] + gamma = getattr(self, "gamma", 0.1) + lr_func = partial(multistep_lr, self.lr, milestones, gamma) + else: + raise ValueError("Scheduler version {} not supported.".format(name)) + return lr_func + + +def cos_lr(lr, total_iters, iters): + """Cosine learning rate""" + lr *= 0.5 * (1.0 + math.cos(math.pi * iters / total_iters)) + return lr + + +def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters): + """Cosine learning rate with warm up.""" + if iters <= warmup_total_iters: + lr = (lr - warmup_lr_start) * iters / float( + warmup_total_iters + ) + warmup_lr_start + else: + lr *= 0.5 * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters) + ) + ) + return lr + + +def yolox_warm_cos_lr( + lr, + min_lr_ratio, + total_iters, + warmup_total_iters, + warmup_lr_start, + no_aug_iter, + iters, +): + """Cosine learning rate with warm up.""" + min_lr = lr * min_lr_ratio + if iters <= warmup_total_iters: + # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow( + iters / float(warmup_total_iters), 2 + ) + warmup_lr_start + elif iters >= total_iters - no_aug_iter: + lr = min_lr + else: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters - no_aug_iter) + ) + ) + return lr + + +def yolox_semi_warm_cos_lr( + lr, + min_lr_ratio, + warmup_lr_start, + total_iters, + normal_iters, + no_aug_iters, + warmup_total_iters, + semi_iters, + iters_per_epoch, + iters_per_epoch_semi, + iters, +): + """Cosine learning rate with warm up.""" + min_lr = lr * min_lr_ratio + if iters <= warmup_total_iters: + # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow( + iters / float(warmup_total_iters), 2 + ) + warmup_lr_start + elif iters >= normal_iters + semi_iters: + lr = min_lr + elif iters <= normal_iters: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters - no_aug_iters) + ) + ) + else: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * ( + normal_iters + - warmup_total_iters + + (iters - normal_iters) + * iters_per_epoch + * 1.0 + / iters_per_epoch_semi + ) + / (total_iters - warmup_total_iters - no_aug_iters) + ) + ) + return lr + + +def multistep_lr(lr, milestones, gamma, iters): + """MultiStep learning rate""" + for milestone in milestones: + lr *= gamma if iters >= milestone else 1.0 + return lr diff --git a/yolort/utils/metric.py b/yolort/utils/metric.py new file mode 100644 index 00000000..f04013a3 --- /dev/null +++ b/yolort/utils/metric.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. +import functools +import os +import time +from collections import defaultdict, deque +import psutil + +import numpy as np + +import torch + +__all__ = [ + "AverageMeter", + "MeterBuffer", + "get_total_and_free_memory_in_Mb", + "occupy_mem", + "gpu_mem_usage", + "mem_usage" +] + + +def get_total_and_free_memory_in_Mb(cuda_device): + devices_info_str = os.popen( + "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader" + ) + devices_info = devices_info_str.read().strip().split("\n") + if "CUDA_VISIBLE_DEVICES" in os.environ: + visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',') + cuda_device = int(visible_devices[cuda_device]) + total, used = devices_info[int(cuda_device)].split(",") + return int(total), int(used) + + +def occupy_mem(cuda_device, mem_ratio=0.9): + """ + pre-allocate gpu memory for training to avoid memory Fragmentation. + """ + total, used = get_total_and_free_memory_in_Mb(cuda_device) + max_mem = int(total * mem_ratio) + block_mem = max_mem - used + x = torch.cuda.FloatTensor(256, 1024, block_mem) + del x + time.sleep(5) + + +def gpu_mem_usage(): + """ + Compute the GPU memory usage for the current device (MB). + """ + mem_usage_bytes = torch.cuda.max_memory_allocated() + return mem_usage_bytes / (1024 * 1024) + + +def mem_usage(): + """ + Compute the memory usage for the current machine (GB). + """ + gb = 1 << 30 + mem = psutil.virtual_memory() + return mem.used / gb + + +class AverageMeter: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=50): + self._deque = deque(maxlen=window_size) + self._total = 0.0 + self._count = 0 + + def update(self, value): + self._deque.append(value) + self._count += 1 + self._total += value + + @property + def median(self): + d = np.array(list(self._deque)) + return np.median(d) + + @property + def avg(self): + # if deque is empty, nan will be returned. + d = np.array(list(self._deque)) + return d.mean() + + @property + def global_avg(self): + return self._total / max(self._count, 1e-5) + + @property + def latest(self): + return self._deque[-1] if len(self._deque) > 0 else None + + @property + def total(self): + return self._total + + def reset(self): + self._deque.clear() + self._total = 0.0 + self._count = 0 + + def clear(self): + self._deque.clear() + + +class MeterBuffer(defaultdict): + """Computes and stores the average and current value""" + + def __init__(self, window_size=20): + factory = functools.partial(AverageMeter, window_size=window_size) + super().__init__(factory) + + def reset(self): + for v in self.values(): + v.reset() + + def get_filtered_meter(self, filter_key="time"): + return {k: v for k, v in self.items() if filter_key in k} + + def update(self, values=None, **kwargs): + if values is None: + values = {} + values.update(kwargs) + for k, v in values.items(): + if isinstance(v, torch.Tensor): + v = v.detach() + self[k].update(v) + + def clear_meters(self): + for v in self.values(): + v.clear() \ No newline at end of file diff --git a/yolort/utils/model_utils.py b/yolort/utils/model_utils.py new file mode 100644 index 00000000..0b848888 --- /dev/null +++ b/yolort/utils/model_utils.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import contextlib +from copy import deepcopy +from typing import Sequence + +import torch +import torch.nn as nn + +__all__ = [ + "get_model_info", + "adjust_status", +] + + +def get_model_info(model: nn.Module, tsize: Sequence[int]) -> str: + from thop import profile + + stride = 64 + img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device) + flops, params = profile(deepcopy(model), inputs=(img,), verbose=False) + params /= 1e6 + flops /= 1e9 + flops *= tsize[0] * tsize[1] / stride / stride * 2 # Gflops + info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops) + return info + + +@contextlib.contextmanager +def adjust_status(module: nn.Module, training: bool = False) -> nn.Module: + """Adjust module to training/eval mode temporarily. + + Args: + module (nn.Module): module to adjust status. + training (bool): training mode to set. True for train mode, False fro eval mode. + + Examples: + >>> with adjust_status(model, training=False): + ... model(data) + """ + status = {} + + def backup_status(module): + for m in module.modules(): + # save prev status to dict + status[m] = m.training + m.training = training + + def recover_status(module): + for m in module.modules(): + # recover prev status from dict + m.training = status.pop(m) + + backup_status(module) + yield module + recover_status(module) \ No newline at end of file From 9a245802fdfe70209c3fa50f7cdedd4cd2b36494 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Sep 2023 04:05:06 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- exps/default/yolov5l.py | 6 +- exps/default/yolov5m.py | 6 +- exps/default/yolov5m6.py | 6 +- exps/default/yolov5n.py | 6 +- exps/default/yolov5n6.py | 6 +- exps/default/yolov5s.py | 6 +- exps/default/yolov5s6.py | 6 +- exps/default/yolov5ts.py | 6 +- requirements.txt | 2 +- test/test_data_pipeline.py | 9 +- test/test_trainer.py | 27 +++--- tools/eval_metric.py | 2 +- yolort/data/__init__.py | 2 +- yolort/data/data_augment.py | 12 ++- yolort/data/data_module.py | 1 + yolort/data/dataloading.py | 3 +- yolort/data/datasets/coco.py | 10 +-- yolort/data/datasets/datasets_wrapper.py | 56 ++++++------ yolort/data/datasets/mosaicdetection.py | 54 +++++------- yolort/data/samplers.py | 7 +- yolort/evaluators/__init__.py | 2 +- yolort/evaluators/coco_evaluator.py | 79 ++++++++--------- yolort/exp/__init__.py | 2 +- yolort/exp/base_exp.py | 12 +-- yolort/exp/default/__init__.py | 1 - yolort/exp/yolox_base.py | 67 ++++++--------- yolort/trainer/trainer.py | 71 ++++++--------- yolort/utils/__init__.py | 2 +- yolort/utils/allreduce_norm.py | 6 +- yolort/utils/boxes.py | 4 +- yolort/utils/checkpoint.py | 6 +- yolort/utils/dist.py | 37 +++----- yolort/utils/ema.py | 6 +- yolort/utils/logger.py | 105 +++++++++++------------ yolort/utils/lr_scheduler.py | 35 ++------ yolort/utils/metric.py | 8 +- yolort/utils/model_utils.py | 2 +- 37 files changed, 292 insertions(+), 386 deletions(-) diff --git a/exps/default/yolov5l.py b/exps/default/yolov5l.py index b04d0f90..a838ae16 100644 --- a/exps/default/yolov5l.py +++ b/exps/default/yolov5l.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5l'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5l"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/exps/default/yolov5m.py b/exps/default/yolov5m.py index e33c2771..cd4ab778 100644 --- a/exps/default/yolov5m.py +++ b/exps/default/yolov5m.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5m'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5m"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/exps/default/yolov5m6.py b/exps/default/yolov5m6.py index 4ac71156..55c7e504 100644 --- a/exps/default/yolov5m6.py +++ b/exps/default/yolov5m6.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5m6'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5m6"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/exps/default/yolov5n.py b/exps/default/yolov5n.py index 72bf63e8..a36cb8e4 100644 --- a/exps/default/yolov5n.py +++ b/exps/default/yolov5n.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5n'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5n"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/exps/default/yolov5n6.py b/exps/default/yolov5n6.py index 3ac2cfd2..9cda7acc 100644 --- a/exps/default/yolov5n6.py +++ b/exps/default/yolov5n6.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5n6'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5n6"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/exps/default/yolov5s.py b/exps/default/yolov5s.py index 61736d25..00512d04 100644 --- a/exps/default/yolov5s.py +++ b/exps/default/yolov5s.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5s'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5s"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/exps/default/yolov5s6.py b/exps/default/yolov5s6.py index cda2a942..8b394afb 100644 --- a/exps/default/yolov5s6.py +++ b/exps/default/yolov5s6.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5s6'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5s6"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/exps/default/yolov5ts.py b/exps/default/yolov5ts.py index 365eab09..b71185df 100644 --- a/exps/default/yolov5ts.py +++ b/exps/default/yolov5ts.py @@ -15,6 +15,8 @@ def __init__(self): self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self): - self.model = models.__dict__['yolov5ts'](upstream_version="r6.0",) + self.model = models.__dict__["yolov5ts"]( + upstream_version="r6.0", + ) self.model.train() - return self.model \ No newline at end of file + return self.model diff --git a/requirements.txt b/requirements.txt index af814771..8fa37f38 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,4 +31,4 @@ pandas # pycocotools>=2.0.2 # corresponds to https://github.com/ppwwyyxx/cocoapi thop # FLOPs computation loguru # Python logging made (stupidly) simple -Ninja # a small build system with a focus on speed \ No newline at end of file +Ninja # a small build system with a focus on speed diff --git a/test/test_data_pipeline.py b/test/test_data_pipeline.py index 4e626a81..7d61e9c4 100644 --- a/test/test_data_pipeline.py +++ b/test/test_data_pipeline.py @@ -1,17 +1,17 @@ # Copyright (c) 2021, Zhiqiang Wang. All Rights Reserved. +import sys from pathlib import Path import numpy as np import pytest -import sys + sys.path.append("../yolort") import torch -from torch import Tensor -from yolort.exp import Exp +from torch import distributed as dist, Tensor from yolort.data import DataPrefetcher +from yolort.exp import Exp from yolort.utils import contains_any_tensor -from torch import distributed as dist def get_world_size() -> int: @@ -58,6 +58,7 @@ def test_get_dataloader(): assert len(targets) == batch_size assert isinstance(targets[0], Tensor) + test_get_dataloader() diff --git a/test/test_trainer.py b/test/test_trainer.py index be1573c7..76251945 100644 --- a/test/test_trainer.py +++ b/test/test_trainer.py @@ -4,17 +4,17 @@ import importlib import sys + sys.path.append("../yolort/") + def make_parser(): parser = argparse.ArgumentParser("YOLOX train parser") parser.add_argument("-expn", "--experiment-name", type=str, default="yolov5n") parser.add_argument("-n", "--name", type=str, default="yolov5n", help="model name") # distributed - parser.add_argument( - "--dist-backend", default="nccl", type=str, help="distributed backend" - ) + parser.add_argument("--dist-backend", default="nccl", type=str, help="distributed backend") parser.add_argument( "--dist-url", default=None, @@ -22,9 +22,7 @@ def make_parser(): help="url used to set up distributed training", ) parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") - parser.add_argument( - "-d", "--devices", default=None, type=int, help="device for training" - ) + parser.add_argument("-d", "--devices", default=None, type=int, help="device for training") parser.add_argument( "-f", "--exp_file", @@ -32,9 +30,7 @@ def make_parser(): type=str, help="plz input your experiment description file", ) - parser.add_argument( - "--resume", default=False, action="store_true", help="resume training" - ) + parser.add_argument("--resume", default=False, action="store_true", help="resume training") parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file") parser.add_argument( "-e", @@ -43,12 +39,8 @@ def make_parser(): type=int, help="resume training start epoch", ) - parser.add_argument( - "--num_machines", default=1, type=int, help="num of node for training" - ) - parser.add_argument( - "--machine_rank", default=0, type=int, help="node rank for multi-node training" - ) + parser.add_argument("--num_machines", default=1, type=int, help="num of node for training") + parser.add_argument("--machine_rank", default=0, type=int, help="node rank for multi-node training") parser.add_argument( "--fp16", dest="fp16", @@ -77,7 +69,7 @@ def make_parser(): type=str, help="Logger to be used for metrics. \ Implemented loggers include `tensorboard` and `wandb`.", - default="tensorboard" + default="tensorboard", ) parser.add_argument( "opts", @@ -87,6 +79,7 @@ def make_parser(): ) return parser + def test_training_step(): args = make_parser().parse_args() module_name = ".".join(["yolort", "exp", "default", args.name]) @@ -96,9 +89,11 @@ def test_training_step(): assert h % 32 == 0 and w % 32 == 0, "input size must be multiples of 32" from yolort.trainer import Trainer + trainer = Trainer(exp, args) trainer.train() + def test_test_epoch_end(): args = make_parser().parse_args() module_name = ".".join(["yolort", "exp", "default", args.name]) diff --git a/tools/eval_metric.py b/tools/eval_metric.py index 0538f0df..3b64632d 100644 --- a/tools/eval_metric.py +++ b/tools/eval_metric.py @@ -8,8 +8,8 @@ import torchvision import yolort from yolort.data import _helper as data_helper -from yolort.data.datasets.coco import COCODetection from yolort.data.coco_eval import COCOEvaluator +from yolort.data.datasets.coco import COCODetection from yolort.data.transforms import collate_fn, default_val_transforms from yolort.utils.logger import MetricLogger diff --git a/yolort/data/__init__.py b/yolort/data/__init__.py index 5740093a..aeaf4f93 100644 --- a/yolort/data/__init__.py +++ b/yolort/data/__init__.py @@ -6,4 +6,4 @@ from .data_prefetcher import DataPrefetcher from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed from .datasets import * -from .samplers import InfiniteSampler, YoloBatchSampler \ No newline at end of file +from .samplers import InfiniteSampler, YoloBatchSampler diff --git a/yolort/data/data_augment.py b/yolort/data/data_augment.py index 4e53f6c2..3c35f7fd 100644 --- a/yolort/data/data_augment.py +++ b/yolort/data/data_augment.py @@ -39,7 +39,9 @@ def get_aug_params(value, center=0): else: raise ValueError( "Affine params should be either a sequence containing two values\ - or single float values. Got {}".format(value) + or single float values. Got {}".format( + value + ) ) @@ -95,9 +97,7 @@ def apply_affine_to_bboxes(targets, target_size, M, scale): corner_xs = corner_points[:, 0::2] corner_ys = corner_points[:, 1::2] new_bboxes = ( - np.concatenate( - (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1)) - ) + np.concatenate((corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))) .reshape(4, num_gts) .T ) @@ -203,9 +203,7 @@ def __call__(self, image, targets, input_dim): targets_t = np.hstack((labels_t, boxes_t)) padded_labels = np.zeros((self.max_labels, 5)) - padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ - : self.max_labels - ] + padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[: self.max_labels] padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) return image_t, padded_labels diff --git a/yolort/data/data_module.py b/yolort/data/data_module.py index d17d7327..c2bd3134 100644 --- a/yolort/data/data_module.py +++ b/yolort/data/data_module.py @@ -11,6 +11,7 @@ from pytorch_lightning import LightningDataModule from yolort.data.datasets.coco import COCODetection + from .transforms import collate_fn, default_train_transforms, default_val_transforms from .voc import VOCDetection diff --git a/yolort/data/dataloading.py b/yolort/data/dataloading.py index 6fecf3f0..cf805b21 100644 --- a/yolort/data/dataloading.py +++ b/yolort/data/dataloading.py @@ -9,8 +9,7 @@ import numpy as np import torch -from torch.utils.data.dataloader import DataLoader as torchDataLoader -from torch.utils.data.dataloader import default_collate +from torch.utils.data.dataloader import DataLoader as torchDataLoader, default_collate from .samplers import YoloBatchSampler diff --git a/yolort/data/datasets/coco.py b/yolort/data/datasets/coco.py index 5ac225a0..6aedce86 100644 --- a/yolort/data/datasets/coco.py +++ b/yolort/data/datasets/coco.py @@ -8,7 +8,7 @@ import numpy as np from pycocotools.coco import COCO -from .datasets_wrapper import CacheDataset, cache_read_img +from .datasets_wrapper import cache_read_img, CacheDataset def remove_useless_info(coco): @@ -79,7 +79,7 @@ def __init__( cache_dir_name=f"cache_{name}", path_filename=path_filename, cache=cache, - cache_type=cache_type + cache_type=cache_type, ) def __len__(self): @@ -118,11 +118,7 @@ def load_anno_from_ids(self, id_): img_info = (height, width) resized_info = (int(height * r), int(width * r)) - file_name = ( - im_ann["file_name"] - if "file_name" in im_ann - else "{:012}".format(id_) + ".jpg" - ) + file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg" return (res, img_info, resized_info, file_name) diff --git a/yolort/data/datasets/datasets_wrapper.py b/yolort/data/datasets/datasets_wrapper.py index c45fe380..32f9b92f 100644 --- a/yolort/data/datasets/datasets_wrapper.py +++ b/yolort/data/datasets/datasets_wrapper.py @@ -9,14 +9,13 @@ from abc import ABCMeta, abstractmethod from functools import partial, wraps from multiprocessing.pool import ThreadPool -import psutil -from loguru import logger -from tqdm import tqdm import numpy as np +import psutil +from loguru import logger -from torch.utils.data.dataset import ConcatDataset as torchConcatDataset -from torch.utils.data.dataset import Dataset as torchDataset +from torch.utils.data.dataset import ConcatDataset as torchConcatDataset, Dataset as torchDataset +from tqdm import tqdm class ConcatDataset(torchConcatDataset): @@ -29,9 +28,7 @@ def __init__(self, datasets): def pull_item(self, idx): if idx < 0: if -idx > len(self): - raise ValueError( - "absolute value of index should not exceed dataset length" - ) + raise ValueError("absolute value of index should not exceed dataset length") idx = len(self) + idx dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) if dataset_idx == 0: @@ -54,9 +51,7 @@ def __getitem__(self, index): idx = index[1] if idx < 0: if -idx > len(self): - raise ValueError( - "absolute value of index should not exceed dataset length" - ) + raise ValueError("absolute value of index should not exceed dataset length") idx = len(self) + idx dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) if dataset_idx == 0: @@ -70,7 +65,7 @@ def __getitem__(self, index): class Dataset(torchDataset): - """ This class is a subclass of the base :class:`torch.utils.data.Dataset`, + """This class is a subclass of the base :class:`torch.utils.data.Dataset`, that enables on the fly resizing of the ``input_dim``. Args: @@ -125,7 +120,7 @@ def wrapper(self, index): class CacheDataset(Dataset, metaclass=ABCMeta): - """ This class is a subclass of the base :class:`yolox.data.datasets.Dataset`, + """This class is a subclass of the base :class:`yolox.data.datasets.Dataset`, that enables cache images to ram or disk. Args: @@ -196,8 +191,9 @@ def cache_images( ): assert num_imgs is not None, "num_imgs must be specified as the size of the dataset" if self.cache_type == "disk": - assert (data_dir and cache_dir_name and path_filename) is not None, \ - "data_dir, cache_name and path_filename must be specified if cache_type is disk" + assert ( + data_dir and cache_dir_name and path_filename + ) is not None, "data_dir, cache_name and path_filename must be specified if cache_type is disk" self.path_filename = path_filename mem = psutil.virtual_memory() @@ -216,10 +212,10 @@ def cache_images( ) if self.cache and self.imgs is None: - if self.cache_type == 'ram': + if self.cache_type == "ram": self.imgs = [None] * num_imgs logger.info("You are using cached images in RAM to accelerate training!") - else: # 'disk' + else: # 'disk' if not os.path.exists(self.cache_dir): os.mkdir(self.cache_dir) logger.warning( @@ -234,29 +230,22 @@ def cache_images( logger.info(f"Found disk cache at {self.cache_dir}") return - logger.info( - "Caching images...\n" - "This might take some time for your dataset" - ) + logger.info("Caching images...\n" "This might take some time for your dataset") num_threads = min(8, max(1, os.cpu_count() - 1)) b = 0 - load_imgs = ThreadPool(num_threads).imap( - partial(self.read_img, use_cache=False), - range(num_imgs) - ) + load_imgs = ThreadPool(num_threads).imap(partial(self.read_img, use_cache=False), range(num_imgs)) pbar = tqdm(enumerate(load_imgs), total=num_imgs) - for i, x in pbar: # x = self.read_img(self, i, use_cache=False) - if self.cache_type == 'ram': + for i, x in pbar: # x = self.read_img(self, i, use_cache=False) + if self.cache_type == "ram": self.imgs[i] = x - else: # 'disk' + else: # 'disk' cache_filename = f'{self.path_filename[i].split(".")[0]}.npy' cache_path_filename = os.path.join(self.cache_dir, cache_filename) os.makedirs(os.path.dirname(cache_path_filename), exist_ok=True) np.save(cache_path_filename, x) b += x.nbytes - pbar.desc = \ - f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})' + pbar.desc = f"Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})" pbar.close() def cal_cache_occupy(self, num_imgs): @@ -280,6 +269,7 @@ def decorator(read_img_fn): whether to read the image from cache. Defaults to True. """ + @wraps(read_img_fn) def wrapper(self, index, use_cache=use_cache): cache = self.cache and use_cache @@ -289,12 +279,14 @@ def wrapper(self, index, use_cache=use_cache): img = copy.deepcopy(img) elif self.cache_type == "disk": img = np.load( - os.path.join( - self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy")) + os.path.join(self.cache_dir, f"{self.path_filename[index].split('.')[0]}.npy") + ) else: raise ValueError(f"Unknown cache type: {self.cache_type}") else: img = read_img_fn(self, index) return img + return wrapper + return decorator diff --git a/yolort/data/datasets/mosaicdetection.py b/yolort/data/datasets/mosaicdetection.py index ba11cfdc..7f3b5f75 100644 --- a/yolort/data/datasets/mosaicdetection.py +++ b/yolort/data/datasets/mosaicdetection.py @@ -38,10 +38,20 @@ class MosaicDetection(Dataset): """Detection dataset wrapper that performs mixup for normal dataset.""" def __init__( - self, dataset, img_size, mosaic=True, preproc=None, - degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5), - mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True, - mosaic_prob=1.0, mixup_prob=1.0, *args + self, + dataset, + img_size, + mosaic=True, + preproc=None, + degrees=10.0, + translate=0.1, + mosaic_scale=(0.5, 1.5), + mixup_scale=(0.5, 1.5), + shear=2.0, + enable_mixup=True, + mosaic_prob=1.0, + mixup_prob=1.0, + *args, ): """ @@ -92,10 +102,8 @@ def __getitem__(self, idx): for i_mosaic, index in enumerate(indices): img, _labels, _, img_id = self._dataset.pull_item(index) h0, w0 = img.shape[:2] # orig hw - scale = min(1. * input_h / h0, 1. * input_w / w0) - img = cv2.resize( - img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR - ) + scale = min(1.0 * input_h / h0, 1.0 * input_w / w0) + img = cv2.resize(img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR) # generate output mosaic image (h, w, c) = img.shape[:3] if i_mosaic == 0: @@ -138,11 +146,7 @@ def __getitem__(self, idx): # ----------------------------------------------------------------- # CopyPaste: https://arxiv.org/abs/2012.07177 # ----------------------------------------------------------------- - if ( - self.enable_mixup - and not len(mosaic_labels) == 0 - and random.random() < self.mixup_prob - ): + if self.enable_mixup and not len(mosaic_labels) == 0 and random.random() < self.mixup_prob: mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim) mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim) img_info = (mix_img.shape[1], mix_img.shape[0]) @@ -180,9 +184,7 @@ def mixup(self, origin_img, origin_labels, input_dim): interpolation=cv2.INTER_LINEAR, ) - cp_img[ - : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio) - ] = resized_img + cp_img[: int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)] = resized_img cp_img = cv2.resize( cp_img, @@ -195,9 +197,7 @@ def mixup(self, origin_img, origin_labels, input_dim): origin_h, origin_w = cp_img.shape[:2] target_h, target_w = origin_img.shape[:2] - padded_img = np.zeros( - (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8 - ) + padded_img = np.zeros((max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8) padded_img[:origin_h, :origin_w] = cp_img x_offset, y_offset = 0, 0 @@ -205,24 +205,16 @@ def mixup(self, origin_img, origin_labels, input_dim): y_offset = random.randint(0, padded_img.shape[0] - target_h - 1) if padded_img.shape[1] > target_w: x_offset = random.randint(0, padded_img.shape[1] - target_w - 1) - padded_cropped_img = padded_img[ - y_offset: y_offset + target_h, x_offset: x_offset + target_w - ] + padded_cropped_img = padded_img[y_offset : y_offset + target_h, x_offset : x_offset + target_w] cp_bboxes_origin_np = adjust_box_anns( cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h ) if FLIP: - cp_bboxes_origin_np[:, 0::2] = ( - origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1] - ) + cp_bboxes_origin_np[:, 0::2] = origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1] cp_bboxes_transformed_np = cp_bboxes_origin_np.copy() - cp_bboxes_transformed_np[:, 0::2] = np.clip( - cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w - ) - cp_bboxes_transformed_np[:, 1::2] = np.clip( - cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h - ) + cp_bboxes_transformed_np[:, 0::2] = np.clip(cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w) + cp_bboxes_transformed_np[:, 1::2] = np.clip(cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h) cls_labels = cp_labels[:, 4:5].copy() box_labels = cp_bboxes_transformed_np diff --git a/yolort/data/samplers.py b/yolort/data/samplers.py index 6b7ea38d..b08b3d68 100644 --- a/yolort/data/samplers.py +++ b/yolort/data/samplers.py @@ -7,8 +7,7 @@ import torch import torch.distributed as dist -from torch.utils.data.sampler import BatchSampler as torchBatchSampler -from torch.utils.data.sampler import Sampler +from torch.utils.data.sampler import BatchSampler as torchBatchSampler, Sampler class YoloBatchSampler(torchBatchSampler): @@ -68,9 +67,7 @@ def __init__( def __iter__(self): start = self._rank - yield from itertools.islice( - self._infinite_indices(), start, None, self._world_size - ) + yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) def _infinite_indices(self): g = torch.Generator() diff --git a/yolort/evaluators/__init__.py b/yolort/evaluators/__init__.py index fc0b6875..83b5a9f1 100644 --- a/yolort/evaluators/__init__.py +++ b/yolort/evaluators/__init__.py @@ -2,4 +2,4 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. -from .coco_evaluator import COCOEvaluator \ No newline at end of file +from .coco_evaluator import COCOEvaluator diff --git a/yolort/evaluators/coco_evaluator.py b/yolort/evaluators/coco_evaluator.py index a97c6d41..75e79aa2 100644 --- a/yolort/evaluators/coco_evaluator.py +++ b/yolort/evaluators/coco_evaluator.py @@ -9,23 +9,16 @@ import tempfile import time from collections import ChainMap, defaultdict -from loguru import logger -from tabulate import tabulate -from tqdm import tqdm import numpy as np import torch +from loguru import logger +from tabulate import tabulate +from tqdm import tqdm from yolort.data.datasets import COCO_CLASSES -from yolort.utils import ( - gather, - is_main_process, - postprocess, - synchronize, - time_synchronized, - xyxy2xywh -) +from yolort.utils import gather, is_main_process, postprocess, synchronize, time_synchronized, xyxy2xywh def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "AR"], colums=6): @@ -46,7 +39,11 @@ def per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "A row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)]) table_headers = headers * (num_cols // len(headers)) table = tabulate( - row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left", + row_pair, + tablefmt="pipe", + floatfmt=".3f", + headers=table_headers, + numalign="left", ) return table @@ -71,7 +68,11 @@ def per_class_AP_table(coco_eval, class_names=COCO_CLASSES, headers=["class", "A row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)]) table_headers = headers * (num_cols // len(headers)) table = tabulate( - row_pair, tablefmt="pipe", floatfmt=".3f", headers=table_headers, numalign="left", + row_pair, + tablefmt="pipe", + floatfmt=".3f", + headers=table_headers, + numalign="left", ) return table @@ -114,8 +115,14 @@ def __init__( self.per_class_AR = per_class_AR def evaluate( - self, model, distributed=False, half=False, trt_file=None, - decoder=None, test_size=None, return_outputs=False + self, + model, + distributed=False, + half=False, + trt_file=None, + decoder=None, + test_size=None, + return_outputs=False, ): """ COCO average precision (AP) Evaluation. Iterate inference on the test dataset @@ -155,9 +162,7 @@ def evaluate( model(x) model = model_trt - for cur_iter, (imgs, _, info_imgs, ids) in enumerate( - progress_bar(self.dataloader) - ): + for cur_iter, (imgs, _, info_imgs, ids) in enumerate(progress_bar(self.dataloader)): with torch.no_grad(): imgs = imgs.type(tensor_type) @@ -174,15 +179,14 @@ def evaluate( infer_end = time_synchronized() inference_time += infer_end - start - outputs = postprocess( - outputs, self.num_classes, self.confthre, self.nmsthre - ) + outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) if is_time_record: nms_end = time_synchronized() nms_time += nms_end - infer_end data_list_elem, image_wise_data = self.convert_to_coco_format( - outputs, info_imgs, ids, return_outputs=True) + outputs, info_imgs, ids, return_outputs=True + ) data_list.extend(data_list_elem) output_data.update(image_wise_data) @@ -207,9 +211,7 @@ def evaluate( def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False): data_list = [] image_wise_data = defaultdict(dict) - for (output, img_h, img_w, img_id) in zip( - outputs, info_imgs[0], info_imgs[1], ids - ): + for (output, img_h, img_w, img_id) in zip(outputs, info_imgs[0], info_imgs[1], ids): if output is None: continue output = output.cpu() @@ -217,23 +219,22 @@ def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False): bboxes = output[:, 0:4] # preprocessing: resize - scale = min( - self.img_size[0] / float(img_h), self.img_size[1] / float(img_w) - ) + scale = min(self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)) bboxes /= scale cls = output[:, 6] scores = output[:, 4] * output[:, 5] - image_wise_data.update({ - int(img_id): { - "bboxes": [box.numpy().tolist() for box in bboxes], - "scores": [score.numpy().item() for score in scores], - "categories": [ - self.dataloader.dataset.class_ids[int(cls[ind])] - for ind in range(bboxes.shape[0]) - ], + image_wise_data.update( + { + int(img_id): { + "bboxes": [box.numpy().tolist() for box in bboxes], + "scores": [score.numpy().item() for score in scores], + "categories": [ + self.dataloader.dataset.class_ids[int(cls[ind])] for ind in range(bboxes.shape[0]) + ], + } } - }) + ) bboxes = xyxy2xywh(bboxes) @@ -305,7 +306,7 @@ def evaluate_prediction(self, data_dict, statistics): cocoEval.summarize() info += redirect_string.getvalue() cat_ids = list(cocoGt.cats.keys()) - cat_names = [cocoGt.cats[catId]['name'] for catId in sorted(cat_ids)] + cat_names = [cocoGt.cats[catId]["name"] for catId in sorted(cat_ids)] if self.per_class_AP: AP_table = per_class_AP_table(cocoEval, class_names=cat_names) info += "per class AP:\n" + AP_table + "\n" @@ -314,4 +315,4 @@ def evaluate_prediction(self, data_dict, statistics): info += "per class AR:\n" + AR_table + "\n" return cocoEval.stats[0], cocoEval.stats[1], info else: - return 0, 0, info \ No newline at end of file + return 0, 0, info diff --git a/yolort/exp/__init__.py b/yolort/exp/__init__.py index d7de27c8..94b059ce 100644 --- a/yolort/exp/__init__.py +++ b/yolort/exp/__init__.py @@ -2,4 +2,4 @@ # Copyright (c) Megvii Inc. All rights reserved. from .base_exp import BaseExp -from .yolox_base import Exp \ No newline at end of file +from .yolox_base import Exp diff --git a/yolort/exp/base_exp.py b/yolort/exp/base_exp.py index c0ae45fe..41506546 100644 --- a/yolort/exp/base_exp.py +++ b/yolort/exp/base_exp.py @@ -5,9 +5,9 @@ import pprint from abc import ABCMeta, abstractmethod from typing import Dict, List, Tuple -from tabulate import tabulate import torch +from tabulate import tabulate from torch.nn import Module from yolort.utils import LRScheduler @@ -42,9 +42,7 @@ def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer: pass @abstractmethod - def get_lr_scheduler( - self, lr: float, iters_per_epoch: int, **kwargs - ) -> LRScheduler: + def get_lr_scheduler(self, lr: float, iters_per_epoch: int, **kwargs) -> LRScheduler: pass @abstractmethod @@ -57,11 +55,7 @@ def eval(self, model, evaluator, weights): def __repr__(self): table_header = ["keys", "values"] - exp_table = [ - (str(k), pprint.pformat(v)) - for k, v in vars(self).items() - if not k.startswith("_") - ] + exp_table = [(str(k), pprint.pformat(v)) for k, v in vars(self).items() if not k.startswith("_")] return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") def merge(self, cfg_list): diff --git a/yolort/exp/default/__init__.py b/yolort/exp/default/__init__.py index 1f361d78..b439cbde 100644 --- a/yolort/exp/default/__init__.py +++ b/yolort/exp/default/__init__.py @@ -15,7 +15,6 @@ # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 class _ExpFinder(abc.MetaPathFinder): - def find_spec(self, name, path, target=None): if not name.startswith("yolort.exp.default"): return diff --git a/yolort/exp/yolox_base.py b/yolort/exp/yolox_base.py index f3147743..c46af070 100644 --- a/yolort/exp/yolox_base.py +++ b/yolort/exp/yolox_base.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 # Copyright (c) Megvii Inc. All rights reserved. +import logging import os import random -import logging -from zipfile import ZipFile from pathlib import Path, PosixPath +from zipfile import ZipFile import torch import torch.distributed as dist @@ -114,7 +114,9 @@ def __init__(self): def get_model(self): import yolort.models as models - self.model = models.__dict__['yolov5n'](upstream_version="r6.0", ) + self.model = models.__dict__["yolov5n"]( + upstream_version="r6.0", + ) self.model.train() return self.model @@ -136,25 +138,17 @@ def get_dataset(self, data_root: str, mode: str = "val", cache: bool = False, ca data_dir=self.data_dir, json_file=self.train_ann, img_size=self.input_size, - preproc=TrainTransform( - max_labels=50, - flip_prob=self.flip_prob, - hsv_prob=self.hsv_prob - ), + preproc=TrainTransform(max_labels=50, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob), cache=cache, cache_type=cache_type, ) elif mode == "val": - """ TODO """ + """TODO""" dataset = COCODataset( data_dir=self.data_dir, json_file=self.train_ann, img_size=self.input_size, - preproc=TrainTransform( - max_labels=50, - flip_prob=self.flip_prob, - hsv_prob=self.hsv_prob - ), + preproc=TrainTransform(max_labels=50, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob), cache=cache, cache_type=cache_type, ) @@ -174,12 +168,12 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s None: Do not use cache, in this case cache_data is also None. """ from yolort.data import ( - TrainTransform, - YoloBatchSampler, DataLoader, InfiniteSampler, MosaicDetection, + TrainTransform, worker_init_reset_seed, + YoloBatchSampler, ) from yolort.utils import wait_for_the_master @@ -187,18 +181,16 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s # else we will create dataset after launch if self.dataset is None: with wait_for_the_master(): - assert cache_img is None, \ - "cache_img must be None if you didn't create dataset before launch" - self.dataset = self.get_dataset(data_root="data-bin", mode="train", cache=False, cache_type=cache_img) + assert cache_img is None, "cache_img must be None if you didn't create dataset before launch" + self.dataset = self.get_dataset( + data_root="data-bin", mode="train", cache=False, cache_type=cache_img + ) self.dataset = MosaicDetection( dataset=self.dataset, mosaic=not no_aug, img_size=self.input_size, - preproc=TrainTransform( - max_labels=120, - flip_prob=self.flip_prob, - hsv_prob=self.hsv_prob), + preproc=TrainTransform(max_labels=120, flip_prob=self.flip_prob, hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, mosaic_scale=self.mosaic_scale, @@ -232,9 +224,10 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: s return train_loader - def prepare_coco128(self, - data_path: PosixPath, - dirname: str = "coco128", + def prepare_coco128( + self, + data_path: PosixPath, + dirname: str = "coco128", ) -> None: """ Prepare coco128 dataset to test. @@ -266,7 +259,7 @@ def random_resize(self, data_loader, epoch, rank, is_distributed): if rank == 0: size_factor = self.input_size[1] * 1.0 / self.input_size[0] - if not hasattr(self, 'random_size'): + if not hasattr(self, "random_size"): min_size = int(self.input_size[0] / 32) - self.multiscale_range max_size = int(self.input_size[0] / 32) + self.multiscale_range self.random_size = (min_size, max_size) @@ -286,9 +279,7 @@ def preprocess(self, inputs, targets, tsize): scale_y = tsize[0] / self.input_size[0] scale_x = tsize[1] / self.input_size[1] if scale_x != 1 or scale_y != 1: - inputs = nn.functional.interpolate( - inputs, size=tsize, mode="bilinear", align_corners=False - ) + inputs = nn.functional.interpolate(inputs, size=tsize, mode="bilinear", align_corners=False) targets[..., 1::2] = targets[..., 1::2] * scale_x targets[..., 2::2] = targets[..., 2::2] * scale_y return inputs, targets @@ -310,9 +301,7 @@ def get_optimizer(self, batch_size): elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay - optimizer = torch.optim.SGD( - pg0, lr=lr, momentum=self.momentum, nesterov=True - ) + optimizer = torch.optim.SGD(pg0, lr=lr, momentum=self.momentum, nesterov=True) optimizer.add_param_group( {"params": pg1, "weight_decay": self.weight_decay} ) # add pg1 with weight_decay @@ -338,12 +327,13 @@ def get_lr_scheduler(self, lr, iters_per_epoch): def get_eval_dataset(self, **kwargs): from yolort.data import COCODataset, ValTransform + testdev = kwargs.get("testdev", False) legacy = kwargs.get("legacy", False) return COCODataset( data_dir=self.data_dir, - json_file=self.train_ann, # 这里需要改为 + json_file=self.train_ann, # 这里需要改为 name="train2017" if not testdev else "train2017", # 测试数据 img_size=self.test_size, preproc=ValTransform(legacy=legacy), @@ -354,9 +344,7 @@ def get_eval_loader(self, batch_size, is_distributed, **kwargs): if is_distributed: batch_size = batch_size // dist.get_world_size() - sampler = torch.utils.data.distributed.DistributedSampler( - valdataset, shuffle=False - ) + sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False) else: sampler = torch.utils.data.SequentialSampler(valdataset) @@ -374,8 +362,7 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False) from yolort.evaluators import COCOEvaluator return COCOEvaluator( - dataloader=self.get_eval_loader(batch_size, is_distributed, - testdev=testdev, legacy=legacy), + dataloader=self.get_eval_loader(batch_size, is_distributed, testdev=testdev, legacy=legacy), img_size=self.test_size, confthre=self.test_conf, nmsthre=self.nmsthre, @@ -384,4 +371,4 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False) ) def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False): - return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs) \ No newline at end of file + return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs) diff --git a/yolort/trainer/trainer.py b/yolort/trainer/trainer.py index 28f1fbe1..aeb418db 100644 --- a/yolort/trainer/trainer.py +++ b/yolort/trainer/trainer.py @@ -4,18 +4,15 @@ import datetime import os import time -from loguru import logger import torch +from loguru import logger from torch.nn.parallel import DistributedDataParallel as DDP from torch.utils.tensorboard import SummaryWriter from yolort.data import DataPrefetcher from yolort.exp import Exp from yolort.utils import ( - MeterBuffer, - ModelEMA, - WandbLogger, adjust_status, all_reduce_norm, get_local_rank, @@ -26,14 +23,18 @@ is_parallel, load_ckpt, mem_usage, + MeterBuffer, + ModelEMA, occupy_mem, save_checkpoint, setup_logger, - synchronize + synchronize, + WandbLogger, ) __all__ = ["Trainer"] + class Trainer: def __init__(self, exp: Exp, args): # init function only defines some basic attr, other attrs like model, optimizer are built in @@ -48,7 +49,7 @@ def __init__(self, exp: Exp, args): self.is_distributed = get_world_size() > 1 self.rank = get_rank() self.local_rank = get_local_rank() - self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else 'cpu' + self.device = "cuda:{}".format(self.local_rank) if torch.cuda.is_available() else "cpu" self.use_model_ema = exp.ema self.save_history_ckpt = exp.save_history_ckpt @@ -132,12 +133,10 @@ def before_train(self): logger.info("exp value:\n{}".format(self.exp)) # model related init - if self.device != 'cpu': + if self.device != "cpu": torch.cuda.set_device(self.local_rank) model = self.exp.get_model() - logger.info( - "Model Summary: {}".format(get_model_info(model, self.exp.test_size)) - ) + logger.info("Model Summary: {}".format(get_model_info(model, self.exp.test_size))) model.to(self.device) # solver related init @@ -183,9 +182,7 @@ def before_train(self): self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard")) elif self.args.logger == "wandb": self.wandb_logger = WandbLogger.initialize_wandb_logger( - self.args, - self.exp, - self.evaluator.dataloader.dataset + self.args, self.exp, self.evaluator.dataloader.dataset ) else: raise ValueError("logger must be either 'tensorboard' or 'wandb'") @@ -194,9 +191,7 @@ def before_train(self): logger.info("\n{}".format(model)) def after_train(self): - logger.info( - "Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100) - ) + logger.info("Training of experiment is done and the best AP is {:.2f}".format(self.best_ap * 100)) if self.rank == 0: if self.args.logger == "wandb": self.wandb_logger.finish() @@ -243,14 +238,10 @@ def after_iter(self): self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter ) loss_meter = self.meter.get_filtered_meter("loss") - loss_str = ", ".join( - ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] - ) + loss_str = ", ".join(["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]) time_meter = self.meter.get_filtered_meter("time") - time_str = ", ".join( - ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] - ) + time_str = ", ".join(["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]) mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage()) @@ -267,16 +258,12 @@ def after_iter(self): if self.rank == 0: if self.args.logger == "tensorboard": - self.tblogger.add_scalar( - "train/lr", self.meter["lr"].latest, self.progress_in_iter) + self.tblogger.add_scalar("train/lr", self.meter["lr"].latest, self.progress_in_iter) for k, v in loss_meter.items(): - self.tblogger.add_scalar( - f"train/{k}", v.latest, self.progress_in_iter) + self.tblogger.add_scalar(f"train/{k}", v.latest, self.progress_in_iter) if self.args.logger == "wandb": metrics = {"train/" + k: v.latest for k, v in loss_meter.items()} - metrics.update({ - "train/lr": self.meter["lr"].latest - }) + metrics.update({"train/lr": self.meter["lr"].latest}) self.wandb_logger.log_metrics(metrics, step=self.progress_in_iter) self.meter.clear_meters() @@ -306,15 +293,11 @@ def resume_train(self, model): self.best_ap = ckpt.pop("best_ap", 0) # resume the training states variables start_epoch = ( - self.args.start_epoch - 1 - if self.args.start_epoch is not None - else ckpt["start_epoch"] + self.args.start_epoch - 1 if self.args.start_epoch is not None else ckpt["start_epoch"] ) self.start_epoch = start_epoch logger.info( - "loaded checkpoint '{}' (epoch {})".format( - self.args.resume, self.start_epoch - ) + "loaded checkpoint '{}' (epoch {})".format(self.args.resume, self.start_epoch) ) # noqa else: if self.args.ckpt is not None: @@ -347,11 +330,13 @@ def evaluate_and_save_model(self): self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1) self.tblogger.add_scalar("val/COCOAP50_95", ap50_95, self.epoch + 1) if self.args.logger == "wandb": - self.wandb_logger.log_metrics({ - "val/COCOAP50": ap50, - "val/COCOAP50_95": ap50_95, - "train/epoch": self.epoch + 1, - }) + self.wandb_logger.log_metrics( + { + "val/COCOAP50": ap50, + "val/COCOAP50_95": ap50_95, + "train/epoch": self.epoch + 1, + } + ) self.wandb_logger.log_images(predictions) logger.info("\n" + summary) synchronize() @@ -387,6 +372,6 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False, ap=None): "epoch": self.epoch + 1, "optimizer": self.optimizer.state_dict(), "best_ap": self.best_ap, - "curr_ap": ap - } - ) \ No newline at end of file + "curr_ap": ap, + }, + ) diff --git a/yolort/utils/__init__.py b/yolort/utils/__init__.py index cf4c00b0..ee0c373f 100644 --- a/yolort/utils/__init__.py +++ b/yolort/utils/__init__.py @@ -19,7 +19,7 @@ from .checkpoint import load_ckpt, save_checkpoint from .dist import * from .ema import * -from .logger import WandbLogger, setup_logger +from .logger import setup_logger, WandbLogger from .lr_scheduler import LRScheduler from .metric import * from .model_utils import * diff --git a/yolort/utils/allreduce_norm.py b/yolort/utils/allreduce_norm.py index 142c76c7..71881952 100644 --- a/yolort/utils/allreduce_norm.py +++ b/yolort/utils/allreduce_norm.py @@ -6,8 +6,7 @@ from collections import OrderedDict import torch -from torch import distributed as dist -from torch import nn +from torch import distributed as dist, nn from .dist import _get_global_gloo_group, get_world_size @@ -88,8 +87,7 @@ def all_reduce(py_dict, op="sum", group=None): flatten_tensor /= world_size split_tensors = [ - x.reshape(shape) - for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes) + x.reshape(shape) for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes) ] return OrderedDict({k: v for k, v in zip(py_key, split_tensors)}) diff --git a/yolort/utils/boxes.py b/yolort/utils/boxes.py index a8eaf3f4..7cffcd99 100644 --- a/yolort/utils/boxes.py +++ b/yolort/utils/boxes.py @@ -44,7 +44,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn if not image_pred.size(0): continue # Get score and class with highest confidence - class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) + class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True) conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) @@ -140,4 +140,4 @@ def cxcywh2xyxy(bboxes): bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5 bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] - return bboxes \ No newline at end of file + return bboxes diff --git a/yolort/utils/checkpoint.py b/yolort/utils/checkpoint.py index a0c200e4..d7dbe56e 100644 --- a/yolort/utils/checkpoint.py +++ b/yolort/utils/checkpoint.py @@ -3,9 +3,9 @@ # Copyright (c) Megvii Inc. All rights reserved. import os import shutil -from loguru import logger import torch +from loguru import logger def load_ckpt(model, ckpt): @@ -14,9 +14,7 @@ def load_ckpt(model, ckpt): for key_model, v in model_state_dict.items(): if key_model not in ckpt: logger.warning( - "{} is not in the ckpt. Please double check and see if this is desired.".format( - key_model - ) + "{} is not in the ckpt. Please double check and see if this is desired.".format(key_model) ) continue v_ckpt = ckpt[key_model] diff --git a/yolort/utils/dist.py b/yolort/utils/dist.py index a4b46801..1485c88b 100644 --- a/yolort/utils/dist.py +++ b/yolort/utils/dist.py @@ -14,11 +14,11 @@ import pickle import time from contextlib import contextmanager -from loguru import logger import numpy as np import torch +from loguru import logger from torch import distributed as dist __all__ = [ @@ -39,9 +39,9 @@ def get_num_devices(): - gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None) + gpu_list = os.getenv("CUDA_VISIBLE_DEVICES", None) if gpu_list is not None: - return len(gpu_list.split(',')) + return len(gpu_list.split(",")) else: devices_list_info = os.popen("nvidia-smi -L") devices_list_info = devices_list_info.read().strip().split("\n") @@ -151,10 +151,10 @@ def _serialize_to_tensor(data, group): device = torch.device("cpu" if backend == "gloo" else "cuda") buffer = pickle.dumps(data) - if len(buffer) > 1024 ** 3: + if len(buffer) > 1024**3: logger.warning( "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( - get_rank(), len(buffer) / (1024 ** 3), device + get_rank(), len(buffer) / (1024**3), device ) ) storage = torch.ByteStorage.from_buffer(buffer) @@ -169,14 +169,9 @@ def _pad_to_largest_tensor(tensor, group): Tensor: padded tensor that has the max size """ world_size = dist.get_world_size(group=group) - assert ( - world_size >= 1 - ), "comm.gather/all_gather must be called from ranks within the given group!" + assert world_size >= 1, "comm.gather/all_gather must be called from ranks within the given group!" local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) - size_list = [ - torch.zeros([1], dtype=torch.int64, device=tensor.device) - for _ in range(world_size) - ] + size_list = [torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)] dist.all_gather(size_list, local_size, group=group) size_list = [int(size.item()) for size in size_list] @@ -185,9 +180,7 @@ def _pad_to_largest_tensor(tensor, group): # we pad the tensor because torch all_gather does not support # gathering tensors of different shapes if local_size != max_size: - padding = torch.zeros( - (max_size - local_size,), dtype=torch.uint8, device=tensor.device - ) + padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device) tensor = torch.cat((tensor, padding), dim=0) return size_list, tensor @@ -216,10 +209,7 @@ def all_gather(data, group=None): max_size = max(size_list) # receiving Tensor from all ranks - tensor_list = [ - torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) - for _ in size_list - ] + tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list] dist.all_gather(tensor_list, tensor, group=group) data_list = [] @@ -258,10 +248,7 @@ def gather(data, dst=0, group=None): # receiving Tensor from all ranks if rank == dst: max_size = max(size_list) - tensor_list = [ - torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) - for _ in size_list - ] + tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list] dist.gather(tensor, tensor_list, dst=dst, group=group) data_list = [] @@ -282,7 +269,7 @@ def shared_random_seed(): create one. All workers must call this function, otherwise it will deadlock. """ - ints = np.random.randint(2 ** 31) + ints = np.random.randint(2**31) all_ints = all_gather(ints) return all_ints[0] @@ -291,4 +278,4 @@ def time_synchronized(): """pytorch-accurate time""" if torch.cuda.is_available(): torch.cuda.synchronize() - return time.time() \ No newline at end of file + return time.time() diff --git a/yolort/utils/ema.py b/yolort/utils/ema.py index 364e8c87..67734266 100644 --- a/yolort/utils/ema.py +++ b/yolort/utils/ema.py @@ -51,10 +51,8 @@ def update(self, model): self.updates += 1 d = self.decay(self.updates) - msd = ( - model.module.state_dict() if is_parallel(model) else model.state_dict() - ) # model state_dict + msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict for k, v in self.ema.state_dict().items(): if v.dtype.is_floating_point: v *= d - v += (1.0 - d) * msd[k].detach() \ No newline at end of file + v += (1.0 - d) * msd[k].detach() diff --git a/yolort/utils/logger.py b/yolort/utils/logger.py index 00f1d125..ed78a4cb 100644 --- a/yolort/utils/logger.py +++ b/yolort/utils/logger.py @@ -1,15 +1,16 @@ +import datetime +import inspect import os import sys -import cv2 import time -import datetime -import inspect -import numpy as np -from loguru import logger from collections import defaultdict, deque +import cv2 +import numpy as np + import torch import torch.distributed as dist +from loguru import logger from yolort.utils import is_module_available @@ -204,6 +205,7 @@ def get_rank(): def is_main_process(): return get_rank() == 0 + def get_caller_name(depth=0): """ Args: @@ -317,17 +319,20 @@ class WandbLogger(object): https://docs.wandb.ai/guides/track https://docs.wandb.ai/guides/integrations/other/yolox """ - def __init__(self, - project=None, - name=None, - id=None, - entity=None, - save_dir=None, - config=None, - val_dataset=None, - num_eval_images=100, - log_checkpoints=False, - **kwargs): + + def __init__( + self, + project=None, + name=None, + id=None, + entity=None, + save_dir=None, + config=None, + val_dataset=None, + num_eval_images=100, + log_checkpoints=False, + **kwargs, + ): """ Args: project (str): wandb project name. @@ -357,12 +362,12 @@ def __init__(self, """ try: import wandb + self.wandb = wandb except ModuleNotFoundError: raise ModuleNotFoundError( - "wandb is not installed." - "Please install wandb using pip install wandb" - ) + "wandb is not installed." "Please install wandb using pip install wandb" + ) from yolox.data.datasets import VOCDetection @@ -379,14 +384,14 @@ def __init__(self, self.num_log_images = len(val_dataset) else: self.num_log_images = min(num_eval_images, len(val_dataset)) - self.log_checkpoints = (log_checkpoints == "True" or log_checkpoints == "true") + self.log_checkpoints = log_checkpoints == "True" or log_checkpoints == "true" self._wandb_init = dict( project=self.project, name=self.name, id=self.id, entity=self.entity, dir=self.save_dir, - resume="allow" + resume="allow", ) self._wandb_init.update(**kwargs) @@ -404,9 +409,7 @@ def __init__(self, if val_dataset and self.num_log_images != 0: self.val_dataset = val_dataset self.cats = val_dataset.cats - self.id_to_class = { - cls['id']: cls['name'] for cls in self.cats - } + self.id_to_class = {cls["id"]: cls["name"] for cls in self.cats} self._log_validation_set(val_dataset) @property @@ -445,10 +448,7 @@ def _log_validation_set(self, val_dataset): if isinstance(id, torch.Tensor): id = id.item() - self.val_table.add_data( - id, - self.wandb.Image(img) - ) + self.val_table.add_data(id, self.wandb.Image(img)) self.val_artifact.add(self.val_table, "validation_images_table") self.run.use_artifact(self.val_artifact) @@ -478,16 +478,17 @@ def _convert_prediction_format(self, predictions): act_scores.append(score) act_cls.append(classes) - image_wise_data.update({ - int(img_id): { - "bboxes": [box.numpy().tolist() for box in act_box], - "scores": [score.numpy().item() for score in act_scores], - "categories": [ - self.val_dataset.class_ids[int(act_cls[ind])] - for ind in range(len(act_box)) - ], + image_wise_data.update( + { + int(img_id): { + "bboxes": [box.numpy().tolist() for box in act_box], + "scores": [score.numpy().item() for score in act_scores], + "categories": [ + self.val_dataset.class_ids[int(act_cls[ind])] for ind in range(len(act_box)) + ], + } } - }) + ) return image_wise_data @@ -546,14 +547,12 @@ def log_images(self, predictions): "minX": min(x0, x1), "minY": min(y0, y1), "maxX": max(x0, x1), - "maxY": max(y0, y1) + "maxY": max(y0, y1), }, "class_id": prediction["categories"][i], - "domain": "pixel" + "domain": "pixel", } - avg_scores[ - self.id_to_class[prediction["categories"][i]] - ] += prediction["scores"][i] + avg_scores[self.id_to_class[prediction["categories"][i]]] += prediction["scores"][i] num_occurrences[self.id_to_class[prediction["categories"][i]]] += 1 boxes.append(box) else: @@ -567,14 +566,10 @@ def log_images(self, predictions): average_class_score.append(score) result_table.add_data( idx, - self.wandb.Image(val[1], boxes={ - "prediction": { - "box_data": boxes, - "class_labels": self.id_to_class - } - } + self.wandb.Image( + val[1], boxes={"prediction": {"box_data": boxes, "class_labels": self.id_to_class}} ), - *average_class_score + *average_class_score, ) self.wandb.log({"val_results/result_table": result_table}) @@ -597,11 +592,7 @@ def save_checkpoint(self, save_dir, model_name, is_best, metadata=None): epoch = None filename = os.path.join(save_dir, model_name + "_ckpt.pth") - artifact = self.wandb.Artifact( - name=f"run_{self.run.id}_model", - type="model", - metadata=metadata - ) + artifact = self.wandb.Artifact(name=f"run_{self.run.id}_model", type="model", metadata=metadata) artifact.add_file(filename, name="model_ckpt.pth") aliases = ["latest"] @@ -624,8 +615,8 @@ def initialize_wandb_logger(cls, args, exp, val_dataset): for k, v in zip(args.opts[0::2], args.opts[1::2]): if k.startswith("wandb-"): try: - wandb_params.update({k[len(prefix):]: int(v)}) + wandb_params.update({k[len(prefix) :]: int(v)}) except ValueError: - wandb_params.update({k[len(prefix):]: v}) + wandb_params.update({k[len(prefix) :]: v}) - return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params) \ No newline at end of file + return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params) diff --git a/yolort/utils/lr_scheduler.py b/yolort/utils/lr_scheduler.py index 42c00cf2..777da407 100644 --- a/yolort/utils/lr_scheduler.py +++ b/yolort/utils/lr_scheduler.py @@ -84,8 +84,7 @@ def _get_lr_func(self, name): ) elif name == "multistep": # stepwise lr schedule milestones = [ - int(self.total_iters * milestone / self.total_epochs) - for milestone in self.milestones + int(self.total_iters * milestone / self.total_epochs) for milestone in self.milestones ] gamma = getattr(self, "gamma", 0.1) lr_func = partial(multistep_lr, self.lr, milestones, gamma) @@ -103,17 +102,10 @@ def cos_lr(lr, total_iters, iters): def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters): """Cosine learning rate with warm up.""" if iters <= warmup_total_iters: - lr = (lr - warmup_lr_start) * iters / float( - warmup_total_iters - ) + warmup_lr_start + lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start else: lr *= 0.5 * ( - 1.0 - + math.cos( - math.pi - * (iters - warmup_total_iters) - / (total_iters - warmup_total_iters) - ) + 1.0 + math.cos(math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters)) ) return lr @@ -131,18 +123,14 @@ def yolox_warm_cos_lr( min_lr = lr * min_lr_ratio if iters <= warmup_total_iters: # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start - lr = (lr - warmup_lr_start) * pow( - iters / float(warmup_total_iters), 2 - ) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start elif iters >= total_iters - no_aug_iter: lr = min_lr else: lr = min_lr + 0.5 * (lr - min_lr) * ( 1.0 + math.cos( - math.pi - * (iters - warmup_total_iters) - / (total_iters - warmup_total_iters - no_aug_iter) + math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter) ) ) return lr @@ -165,18 +153,14 @@ def yolox_semi_warm_cos_lr( min_lr = lr * min_lr_ratio if iters <= warmup_total_iters: # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start - lr = (lr - warmup_lr_start) * pow( - iters / float(warmup_total_iters), 2 - ) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start elif iters >= normal_iters + semi_iters: lr = min_lr elif iters <= normal_iters: lr = min_lr + 0.5 * (lr - min_lr) * ( 1.0 + math.cos( - math.pi - * (iters - warmup_total_iters) - / (total_iters - warmup_total_iters - no_aug_iters) + math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iters) ) ) else: @@ -187,10 +171,7 @@ def yolox_semi_warm_cos_lr( * ( normal_iters - warmup_total_iters - + (iters - normal_iters) - * iters_per_epoch - * 1.0 - / iters_per_epoch_semi + + (iters - normal_iters) * iters_per_epoch * 1.0 / iters_per_epoch_semi ) / (total_iters - warmup_total_iters - no_aug_iters) ) diff --git a/yolort/utils/metric.py b/yolort/utils/metric.py index f04013a3..2cb79271 100644 --- a/yolort/utils/metric.py +++ b/yolort/utils/metric.py @@ -5,9 +5,9 @@ import os import time from collections import defaultdict, deque -import psutil import numpy as np +import psutil import torch @@ -17,7 +17,7 @@ "get_total_and_free_memory_in_Mb", "occupy_mem", "gpu_mem_usage", - "mem_usage" + "mem_usage", ] @@ -27,7 +27,7 @@ def get_total_and_free_memory_in_Mb(cuda_device): ) devices_info = devices_info_str.read().strip().split("\n") if "CUDA_VISIBLE_DEVICES" in os.environ: - visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',') + visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(",") cuda_device = int(visible_devices[cuda_device]) total, used = devices_info[int(cuda_device)].split(",") return int(total), int(used) @@ -134,4 +134,4 @@ def update(self, values=None, **kwargs): def clear_meters(self): for v in self.values(): - v.clear() \ No newline at end of file + v.clear() diff --git a/yolort/utils/model_utils.py b/yolort/utils/model_utils.py index 0b848888..228c3851 100644 --- a/yolort/utils/model_utils.py +++ b/yolort/utils/model_utils.py @@ -55,4 +55,4 @@ def recover_status(module): backup_status(module) yield module - recover_status(module) \ No newline at end of file + recover_status(module)