Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add trtexec TensorRT export #6984

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,19 @@ def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
return None, None


def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
def export_engine(model,
im,
file,
train,
half,
simplify,
nms=False,
topk_all=None,
iou_thres=None,
conf_thres=None,
workspace=4,
verbose=False,
prefix=colorstr('TensorRT:')):
# YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
try:
assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'
Expand All @@ -233,6 +245,11 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
else: # TensorRT >= 8
check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0
export_onnx(model, im, file, 13, train, False, simplify) # opset 13
if nms:
from utils.general import nmsRegister
export_onnx(model, im, file, 12, train, False, simplify)
LOGGER.info(f'\n{prefix} EfficientNMS plugin only supprot TensorRT greater than 8.0.0 ...')
file = nmsRegister(file, train, topk_all=topk_all, iou_thres=iou_thres, conf_thres=conf_thres)
onnx = file.with_suffix('.onnx')

LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
Expand Down Expand Up @@ -261,8 +278,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
for out in outputs:
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')

LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')
if builder.platform_has_fast_fp16:
LOGGER.info(f'{prefix} building FP{16 if half and builder.platform_has_fast_fp16 else 32} engine in {f}')
if half and builder.platform_has_fast_fp16:
config.set_flag(trt.BuilderFlag.FP16)
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
t.write(engine.serialize())
Expand Down Expand Up @@ -515,7 +532,8 @@ def run(
if jit:
f[0] = export_torchscript(model, im, file, optimize)
if engine: # TensorRT required before ONNX
f[1] = export_engine(model, im, file, train, half, simplify, workspace, verbose)
f[1] = export_engine(model, im, file, train, half, simplify, nms, topk_all, iou_thres, conf_thres, workspace,
verbose)
if onnx or xml: # OpenVINO requires ONNX
f[2] = export_onnx(model, im, file, opset, train, dynamic, simplify)
if xml: # OpenVINO
Expand Down
17 changes: 17 additions & 0 deletions models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,3 +719,20 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, str
def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)


class AdditionNet(nn.Module):
# AdditionNet preprocess for registering EfficientNMS into TensorRT engine
# Only support static inputs
def __init__(self):
super().__init__()
self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
dtype=torch.float32)

def forward(self, x):
box = x[:, :, :4]
conf = x[:, :, 4:5]
score = x[:, :, 5:]
score *= conf
box @= self.convert_matrix
return box, score
367 changes: 367 additions & 0 deletions test-nms.ipynb

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import re
import shutil
import signal
import tempfile
import threading
import time
import urllib
Expand Down Expand Up @@ -989,3 +990,68 @@ def imshow(path, im):

# Variables ------------------------------------------------------------------------------------------------------------
NCOLS = 0 if is_docker() else shutil.get_terminal_size().columns # terminal window size for tqdm


# onnx-graphsurgeon ---------------------------------------------------------------------------------------------------
def nmsRegister(file, train, topk_all=100, iou_thres=0.45, conf_thres=0.25):
from models.common import AdditionNet
try:
import onnx_graphsurgeon as gs
except Exception:
if platform.system() == 'Linux':
check_requirements(('onnx-graphsurgeon',), cmds=('-U --index-url https://pypi.ngc.nvidia.com',))
import onnx_graphsurgeon as gs

check_requirements(('onnx',))
import onnx
gs_graph = gs.import_onnx(onnx.load(file.with_suffix('.onnx')))
additionNet = AdditionNet()
tmpFile = tempfile.NamedTemporaryFile(suffix=".onnx")
torch.onnx.export(additionNet,
torch.randn(gs_graph.outputs[0].shape),
tmpFile.name,
verbose=False,
opset_version=13,
training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
do_constant_folding=not train,
input_names=["tmpInput"],
output_names=["boxes", "scores"],
dynamic_axes=None)
tmp_graph = gs.import_onnx(onnx.load(tmpFile.name))
for node in tmp_graph.nodes:
node.name = "Addition_" + node.name
gs_graph.nodes.append(node)
try:
name = node.inputs[0].name
except Exception:
pass
else:
if name == "tmpInput":
node.inputs[0] = gs_graph.outputs[0]
gs_graph.outputs = tmp_graph.outputs
gs_graph.cleanup().toposort()
op_inputs = gs_graph.outputs
if not (topk_all and conf_thres and iou_thres):
topk_all, conf_thres, iou_thres = 100, 0.25, 0.45
op = "EfficientNMS_TRT"
attrs = {
"plugin_version": "1",
"background_class": -1, # no background class
"max_output_boxes": topk_all,
"score_threshold": conf_thres,
"iou_threshold": iou_thres,
"score_activation": False,
"box_coding": 0,}
# NMS Outputs
output_num_detections = gs.Variable(name="num_detections", dtype=np.int32,
shape=[gs_graph.outputs[0].shape[0], 1
]) # A scalar indicating the number of valid detections per batch image.
output_boxes = gs.Variable(name="detection_boxes", dtype=np.float32, shape=[gs_graph.outputs[0].shape[0], 100, 4])
output_scores = gs.Variable(name="detection_scores", dtype=np.float32, shape=[gs_graph.outputs[0].shape[0], 100])
output_labels = gs.Variable(name="detection_classes", dtype=np.int32, shape=[gs_graph.outputs[0].shape[0], 100])
op_outputs = [output_num_detections, output_boxes, output_scores, output_labels]
gs_graph.layer(op=op, name="batched_nms", inputs=op_inputs, outputs=op_outputs, attrs=attrs)
gs_graph.outputs = op_outputs
gs_graph.cleanup().toposort()
onnx.save(gs.export_onnx(gs_graph), str(file.with_suffix('.onnx')))
return file