From 8506f107fe0c3f8ab652a554b62fcea9d0a7e629 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 28 Mar 2021 20:23:40 +0200 Subject: [PATCH] PyTorch Hub amp.autocast() inference (#2641) I think this should help speed up CUDA inference, as currently models may be running in FP32 inference mode on CUDA devices unnecesarily. --- models/common.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/models/common.py b/models/common.py index 21a2ed5a2ca7..5c0e571b752f 100644 --- a/models/common.py +++ b/models/common.py @@ -8,6 +8,7 @@ import torch import torch.nn as nn from PIL import Image +from torch.cuda import amp from utils.datasets import letterbox from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh @@ -219,17 +220,17 @@ def forward(self, imgs, size=640, augment=False, profile=False): x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 t.append(time_synchronized()) - # Inference - with torch.no_grad(): + with torch.no_grad(), amp.autocast(enabled=p.device.type != 'cpu'): + # Inference y = self.model(x, augment, profile)[0] # forward - t.append(time_synchronized()) + t.append(time_synchronized()) - # Post-process - y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS - for i in range(n): - scale_coords(shape1, y[i][:, :4], shape0[i]) - t.append(time_synchronized()) + # Post-process + y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS + for i in range(n): + scale_coords(shape1, y[i][:, :4], shape0[i]) + t.append(time_synchronized()) return Detections(imgs, y, files, t, self.names, x.shape)