Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing trainer with lightning #128

Merged
merged 6 commits into from
Jun 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
import numpy as np
from PIL import Image

import logging
logger = logging.getLogger(__name__)

def set_rng_seed(seed):
torch.manual_seed(seed)
Expand Down Expand Up @@ -132,7 +130,7 @@ def assertExpected(self, output, subname=None, prec=None, strip_suffix=None):

if ACCEPT:
filename = {os.path.basename(expected_file)}
logger.info("Accepting updated output for {}:\n\n{}".format(filename, output))
# logger.info("Accepting updated output for {}:\n\n{}".format(filename, output))
torch.save(output, expected_file)
MAX_PICKLE_SIZE = 50 * 1000 # 50 KB
binary_size = os.path.getsize(expected_file)
Expand Down
106 changes: 54 additions & 52 deletions test/test_data_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) 2021, Zhiqiang Wang. All Rights Reserved.
from pathlib import Path
import unittest
import numpy as np

import torch
Expand All @@ -11,60 +10,63 @@
from typing import Dict


class DataPipelineTester(unittest.TestCase):
def test_contains_any_tensor(self):
dummy_numpy = np.random.randn(3, 6)
self.assertFalse(contains_any_tensor(dummy_numpy))
dummy_tensor = torch.randn(3, 6)
self.assertTrue(contains_any_tensor(dummy_tensor))
dummy_tensors = [torch.randn(3, 6), torch.randn(9, 5)]
self.assertTrue(contains_any_tensor(dummy_tensors))
def test_contains_any_tensor():
dummy_numpy = np.random.randn(3, 6)
assert not contains_any_tensor(dummy_numpy)
dummy_tensor = torch.randn(3, 6)
assert contains_any_tensor(dummy_tensor)
dummy_tensors = [torch.randn(3, 6), torch.randn(9, 5)]
assert contains_any_tensor(dummy_tensors)

def test_get_dataset(self):
# Acquire the images and labels from the coco128 dataset
train_dataset = data_helper.get_dataset(data_root='data-bin', mode='train')
# Test the datasets
image, target = next(iter(train_dataset))
self.assertIsInstance(image, Tensor)
self.assertIsInstance(target, Dict)

def test_get_dataloader(self):
batch_size = 8
data_loader = data_helper.get_dataloader(data_root='data-bin', mode='train', batch_size=batch_size)
# Test the dataloader
images, targets = next(iter(data_loader))
def test_get_dataset():
# Acquire the images and labels from the coco128 dataset
train_dataset = data_helper.get_dataset(data_root='data-bin', mode='train')
# Test the datasets
image, target = next(iter(train_dataset))
assert isinstance(image, Tensor)
assert isinstance(target, Dict)

self.assertEqual(len(images), batch_size)
self.assertIsInstance(images[0], Tensor)
self.assertEqual(len(images[0]), 3)
self.assertEqual(len(targets), batch_size)
self.assertIsInstance(targets[0], Dict)
self.assertIsInstance(targets[0]["image_id"], Tensor)
self.assertIsInstance(targets[0]["boxes"], Tensor)
self.assertIsInstance(targets[0]["labels"], Tensor)
self.assertIsInstance(targets[0]["orig_size"], Tensor)

def test_detection_data_module(self):
# Setup the DataModule
batch_size = 4
train_dataset = data_helper.get_dataset(data_root='data-bin', mode='train')
data_module = DetectionDataModule(train_dataset, batch_size=batch_size)
self.assertEqual(data_module.batch_size, batch_size)
def test_get_dataloader():
batch_size = 8
data_loader = data_helper.get_dataloader(data_root='data-bin', mode='train', batch_size=batch_size)
# Test the dataloader
images, targets = next(iter(data_loader))

data_loader = data_module.train_dataloader(batch_size=batch_size)
images, targets = next(iter(data_loader))
self.assertEqual(len(images), batch_size)
self.assertIsInstance(images[0], Tensor)
self.assertEqual(len(images[0]), 3)
self.assertEqual(len(targets), batch_size)
self.assertIsInstance(targets[0], Dict)
self.assertIsInstance(targets[0]["image_id"], Tensor)
self.assertIsInstance(targets[0]["boxes"], Tensor)
self.assertIsInstance(targets[0]["labels"], Tensor)
assert len(images) == batch_size
assert isinstance(images[0], Tensor)
assert len(images[0]) == 3
assert len(targets) == batch_size
assert isinstance(targets[0], Dict)
assert isinstance(targets[0]["image_id"], Tensor)
assert isinstance(targets[0]["boxes"], Tensor)
assert isinstance(targets[0]["labels"], Tensor)
assert isinstance(targets[0]["orig_size"], Tensor)

def test_prepare_coco128(self):
data_path = Path('data-bin')
coco128_dirname = 'coco128'
data_helper.prepare_coco128(data_path, dirname=coco128_dirname)
annotation_file = data_path / coco128_dirname / 'annotations' / 'instances_train2017.json'
self.assertTrue(annotation_file.is_file())

def test_detection_data_module():
# Setup the DataModule
batch_size = 4
train_dataset = data_helper.get_dataset(data_root='data-bin', mode='train')
data_module = DetectionDataModule(train_dataset, batch_size=batch_size)
assert data_module.batch_size == batch_size

data_loader = data_module.train_dataloader()
images, targets = next(iter(data_loader))
assert len(images) == batch_size
assert isinstance(images[0], Tensor)
assert len(images[0]) == 3
assert len(targets) == batch_size
assert isinstance(targets[0], Dict)
assert isinstance(targets[0]["image_id"], Tensor)
assert isinstance(targets[0]["boxes"], Tensor)
assert isinstance(targets[0]["labels"], Tensor)


def test_prepare_coco128():
data_path = Path('data-bin')
coco128_dirname = 'coco128'
data_helper.prepare_coco128(data_path, dirname=coco128_dirname)
annotation_file = data_path / coco128_dirname / 'annotations' / 'instances_train2017.json'
assert annotation_file.is_file()
2 changes: 1 addition & 1 deletion test/test_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_yolotr(self):
upstream_version='r4.0',
export_friendly=True,
pretrained=True,
size=(640, 640),
size=(640, 640),
score_thresh=0.45,
)
model.eval()
Expand Down
42 changes: 22 additions & 20 deletions yolort/data/data_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(
train_dataset: Optional[Dataset] = None,
val_dataset: Optional[Dataset] = None,
test_dataset: Optional[Dataset] = None,
batch_size: int = 1,
batch_size: int = 16,
num_workers: int = 0,
*args: Any,
**kwargs: Any,
Expand All @@ -36,16 +36,19 @@ def __init__(
self.batch_size = batch_size
self.num_workers = num_workers

def train_dataloader(self, batch_size: int = 16) -> None:
def train_dataloader(self) -> None:
"""
VOCDetection and COCODetection
Args:
batch_size: size of batch
transforms: custom transforms
"""
# Creating data loaders
sampler = torch.utils.data.RandomSampler(self._train_dataset)
batch_sampler = torch.utils.data.BatchSampler(sampler, batch_size, drop_last=True)
batch_sampler = torch.utils.data.BatchSampler(
torch.utils.data.RandomSampler(self._train_dataset),
self.batch_size,
drop_last=True,
)

loader = torch.utils.data.DataLoader(
self._train_dataset,
Expand All @@ -56,7 +59,7 @@ def train_dataloader(self, batch_size: int = 16) -> None:

return loader

def val_dataloader(self, batch_size: int = 16) -> None:
def val_dataloader(self) -> None:
"""
VOCDetection and COCODetection
Args:
Expand All @@ -68,7 +71,7 @@ def val_dataloader(self, batch_size: int = 16) -> None:

loader = torch.utils.data.DataLoader(
self._val_dataset,
batch_size,
self.batch_size,
sampler=sampler,
drop_last=False,
collate_fn=collate_fn,
Expand All @@ -82,32 +85,31 @@ class COCODetectionDataModule(DetectionDataModule):
def __init__(
self,
data_path: str,
annotations_path: Optional[str] = None,
year: str = "2017",
anno_path: Optional[str] = None,
num_classes: int = 80,
data_task: str = "instances",
train_set: str = "train2017",
val_set: str = "val2017",
skip_train_set: bool = False,
skip_val_set: bool = False,
train_transform: Optional[Callable] = default_train_transforms,
val_transform: Optional[Callable] = default_val_transforms,
batch_size: int = 1,
num_workers: int = 0,
*args: Any,
**kwargs: Any,
) -> None:
if annotations_path is None:
annotations_path = Path(data_path) / 'annotations'
self.annotations_path = annotations_path
anno_path = Path(anno_path) if anno_path else Path(data_path) / 'annotations'
train_ann_file = anno_path / f"{data_task}_{train_set}.json"
val_ann_file = anno_path / f"{data_task}_{val_set}.json"

train_dataset = self.build_datasets(
data_path, image_set='train', year=year, transforms=train_transform)
val_dataset = self.build_datasets(
data_path, image_set='val', year=year, transforms=val_transform)
train_dataset = None if skip_train_set else COCODetection(data_path, train_ann_file, train_transform())
val_dataset = None if skip_val_set else COCODetection(data_path, val_ann_file, val_transform())

super().__init__(train_dataset=train_dataset, val_dataset=val_dataset,
batch_size=batch_size, num_workers=num_workers, *args, **kwargs)

self.num_classes = 80

def build_datasets(self, data_path, image_set, year, transforms):
ann_file = self.annotations_path / f"instances_{image_set}{year}.json"
return COCODetection(data_path, ann_file, transforms())
self.num_classes = num_classes


class VOCDetectionDataModule(DetectionDataModule):
Expand Down
12 changes: 5 additions & 7 deletions yolort/models/yolo_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class YOLOModule(LightningModule):
def __init__(
self,
lr: float = 0.01,
arch: str = 'yolov5_darknet_pan_s_r31',
arch: str = 'yolov5_darknet_pan_s_r40',
pretrained: bool = False,
progress: bool = True,
size: Tuple[int, int] = (640, 640),
Expand Down Expand Up @@ -177,7 +177,7 @@ def configure_optimizers(self):
self.model.parameters(),
lr=self.lr,
momentum=0.9,
weight_decay=0.005,
weight_decay=5e-4,
)

@torch.no_grad()
Expand Down Expand Up @@ -249,14 +249,12 @@ def collate_images(self, samples: Any, image_loader: Callable) -> List[Tensor]:
@staticmethod
def add_model_specific_args(parent_parser):
parser = argparse.ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument('--arch', default='yolov5_darknet_pan_s_r31',
parser.add_argument('--arch', default='yolov5_darknet_pan_s_r40',
help='model architecture')
parser.add_argument('--num_classes', default=80, type=int,
help='number classes of datasets')
parser.add_argument('--pretrained', action='store_true',
help='Use pre-trained models from the modelzoo')
parser.add_argument('--lr', default=0.02, type=float,
help='initial learning rate, 0.02 is the default value for training '
parser.add_argument('--lr', default=0.01, type=float,
help='initial learning rate, 0.01 is the default value for training '
'on 8 gpus and 2 images_per_gpu')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
Expand Down
41 changes: 24 additions & 17 deletions yolort/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,40 @@ def get_args_parser():

parser.add_argument('--arch', default='yolov5s',
help='model structure to train')
parser.add_argument('--data_path', default='./data-bin',
help='dataset')
parser.add_argument('--dataset_type', default='coco',
help='dataset')
parser.add_argument('--dataset_mode', default='instances',
help='dataset mode')
parser.add_argument('--years', default=['2017'], nargs='+',
help='dataset year')
parser.add_argument('--train_set', default='train',
help='set of train')
parser.add_argument('--val_set', default='val',
help='set of val')
parser.add_argument('--batch_size', default=32, type=int,
help='images per gpu, the total batch size is $NGPU x batch_size')
parser.add_argument('--max_epochs', default=1, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--num_gpus', default=1, type=int, metavar='N',
help='number of gpu utilizing (default: 1)')

parser.add_argument('--data_path', default='./data-bin',
help='root path of the dataset')
parser.add_argument('--anno_path', default=None,
help='root path of annotation files')
parser.add_argument('--num_classes', default=80, type=int,
help='number of classes')
parser.add_argument('--data_task', default='instances',
help='dataset mode')
parser.add_argument('--train_set', default='train2017',
help='name of train dataset')
parser.add_argument('--val_set', default='val2017',
help='name of val dataset')
parser.add_argument('--skip_train_set', action='store_true',
help='Skip train set')
parser.add_argument('--skip_val_set', action='store_true',
help='Skip val set')
parser.add_argument('--batch_size', default=32, type=int,
help='images per gpu, the total batch size is $NGPU x batch_size')
parser.add_argument('--num_workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--print_freq', default=20, type=int,
help='print frequency')

parser.add_argument('--output_dir', default='.',
help='path where to save')
return parser


def main(args):
# Load the data
datamodule = VOCDetectionDataModule.from_argparse_args(args)
datamodule = COCODetectionDataModule.from_argparse_args(args)

# Build the model
model = models.__dict__[args.arch](num_classes=datamodule.num_classes)
Expand All @@ -52,6 +56,9 @@ def main(args):
# Train the model
trainer.fit(model, datamodule=datamodule)

# Save it!
trainer.save_checkpoint("object_detection_model.pt")


if __name__ == "__main__":
parser = argparse.ArgumentParser('YOLOv5 training and evaluation script', parents=[get_args_parser()])
Expand Down