diff --git a/models/common.py b/models/common.py index a54050337d14..e543d129d907 100644 --- a/models/common.py +++ b/models/common.py @@ -116,22 +116,38 @@ def forward(self, x): class autoShape(nn.Module): - # auto-reshape image size model wrapper + # auto-reshape input image model wrapper img_size = 640 # inference size (pixels) def __init__(self, model): super(autoShape, self).__init__() self.model = model - def forward(self, x, shape=640, augment=False, profile=False): # x = cv2.imread('img.jpg') - x0shape = x.shape[:2] - p = next(self.model.parameters()) - x, ratio, (dw, dh) = letterbox(x, new_shape=make_divisible(shape or max(x0shape), int(self.stride.max()))) - x1shape = x.shape[:2] - x = np.ascontiguousarray(x[:, :, ::-1].transpose(2, 0, 1)) # BGR to RGB, to 3x640x640 - x = torch.from_numpy(x).to(p.device).type_as(p).unsqueeze(0) / 255. # uint8 to fp16/32 + def forward(self, x, shape=640, augment=False, profile=False): + # x is cv2/np/PIL RGB image, or list of images for batched inference, i.e. x = Image.open('image.jpg') + p = next(self.model.parameters()) # for device and type + if not isinstance(x, list): + x = [x] + batch = range(len(x)) # batch size + + shape0, shape1 = [], [] # image and inference shapes + for i in batch: + x[i] = np.array(x[i])[:, :, :3] # up to 3 channels if png + s = x[i].shape[:2] # HWC + shape0.append(s) # image shape + g = (shape / max(s)) # gain + shape1.append([y * g for y in s]) + shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape + + x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch] # pad + x = np.stack(x, 0) if batch[-1] else x[0][None] # stack + x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW + x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 + x = self.model(x, augment, profile) # forward - x[0][:, :4] = scale_coords(x1shape, x[0][:, :4], x0shape) + + for i in batch: + x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i]) # postprocess return x