Skip to content

Commit

Permalink
init from yolov5_quant_sample
Browse files Browse the repository at this point in the history
  • Loading branch information
wjx052333 committed Apr 26, 2023
1 parent ff6a9ac commit 7a1a558
Show file tree
Hide file tree
Showing 26 changed files with 2,802 additions and 18 deletions.
33 changes: 33 additions & 0 deletions data/hyps/hyp.qat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Hyperparameters for COCO training from scratch
# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials


lr0: 0.0001 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 0.05 # box loss gain
cls: 0.5 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 1.0 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
iou_t: 0.20 # IoU training threshold
anchor_t: 4.0 # anchor-multiple threshold
# anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.5 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.5 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.0 # image mixup (probability)
2 changes: 2 additions & 0 deletions docker/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
docker build --network=host . --rm --pull --no-cache -t yolov5_quant
12 changes: 12 additions & 0 deletions docker/launch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

CMD=${1:-/bin/bash}
NV_VISIBLE_DEVICES=${2:-"0"}
DOCKER_BRIDGE=${3:-"host"}

docker run -it --rm --name yolov5_quant -p 80:8888 \
--gpus device=$NV_VISIBLE_DEVICES \
--net=$DOCKER_BRIDGE \
--shm-size=16g \
-v $(dirname $(pwd)):/root/space/projects \
yolov5_quant $CMD
66 changes: 66 additions & 0 deletions export_qat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
Usage:
$ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
"""

import argparse
import sys
import time
import warnings

sys.path.append('./') # to run '$ python *.py' files in subdirectories

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging
from utils.torch_utils import select_device

# To use Pytorch's own fake quantization functions
from pytorch_quantization import nn as quant_nn


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
parser.add_argument('--img-size', type=int, default=640, help='image size') # height, width
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
opt = parser.parse_args()
print(opt)
set_logging()
t = time.time()

# Load PyTorch model
device = select_device(opt.device)
model = attempt_load(opt.weights, map_location=device) # load FP32 model
model.eval()
quant_nn.TensorQuantizer.use_fb_fake_quant = True
model.model[-1].export = not opt.grid # set Detect() layer grid export


dummy_input = torch.rand(opt.batch_size, 3, opt.img_size, opt.img_size, device='cuda')

# ONNX export
try:
import onnx

print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx') # filename
torch.onnx.export(model, dummy_input, f, verbose=False, opset_version=13, input_names=['images'],
output_names= ['output_0', 'output_1', 'output_2'],
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}} if opt.dynamic else None)

# Checks
onnx_model = onnx.load(f) # load onnx model
onnx.checker.check_model(onnx_model) # check onnx model
# print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
print('ONNX export success, saved as %s' % f)
except Exception as e:
print('ONNX export failure: %s' % e)
13 changes: 11 additions & 2 deletions models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import copy_attr, smart_inference_mode

try:
from pytorch_quantization import nn as quant_nn
except ImportError:
raise ImportError(
"pytorch-quantization is not installed. Install from "
"https://github.com/NVIDIA/TensorRT/tree/master/tools/pytorch-quantization."
)

def autopad(k, p=None, d=1): # kernel, padding, dilation
# Pad to 'same' shape outputs
Expand All @@ -48,7 +55,8 @@ class Conv(nn.Module):

def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
#wjx self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.conv = quant_nn.QuantConv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

Expand Down Expand Up @@ -206,7 +214,8 @@ def __init__(self, c1, c2, k=(5, 9, 13)):
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
#wjx self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
self.m = nn.ModuleList([quant_nn.QuantMaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

def forward(self, x):
x = self.cv1(x)
Expand Down
6 changes: 5 additions & 1 deletion models/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ def attempt_load(weights, device=None, inplace=True, fuse=True):
if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
ckpt.names = dict(enumerate(ckpt.names)) # convert to dict

model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
#model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
# Modified by maggie.
# 1. Since we benchmark the speed using TensorRT backend, so it is not necesary to fuse.
# 2. If fuse, the fuse_conv_and_bn function will be called, then the quant_nn.QuantConv2d will be replace by noraml Conv2d wjx
model.append(ckpt.eval())

# Module compatibility updates
for m in model.modules():
Expand Down
5 changes: 3 additions & 2 deletions models/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from utils.plots import feature_visualization
from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
time_sync)

from pytorch_quantization import nn as quant_nn
try:
import thop # for FLOPs computation
except ImportError:
Expand All @@ -50,7 +50,8 @@ def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
#self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.m = nn.ModuleList(quant_nn.QuantConv2d(x, self.no * self.na, 1) for x in ch) # output conv wjx
self.inplace = inplace # use inplace ops (e.g. slice assignment)

def forward(self, x):
Expand Down
29 changes: 17 additions & 12 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,21 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
# Model
check_suffix(weights, '.pt') # check weights
pretrained = weights.endswith('.pt')
if pretrained:
with torch_distributed_zero_first(LOCAL_RANK):
weights = attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(csd, strict=False) # load
LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report
if model is None: # wjx
if pretrained:
with torch_distributed_zero_first(LOCAL_RANK):
weights = attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(csd, strict=False) # load
LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report
else:
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
else:
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
pretrained = False # For QAT finetuning wjx
amp = check_amp(model) # check AMP

# Freeze
Expand Down Expand Up @@ -415,7 +418,9 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
data_dict,
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
model=attempt_load(f, device).half(),
#model=attempt_load(f, device).half(),
# During QAT finetuning, close the half recision wjx
model=attempt_load(f, device),
iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65
single_cls=single_cls,
dataloader=val_loader,
Expand Down
Loading

0 comments on commit 7a1a558

Please sign in to comment.