Skip to content

Commit

Permalink
Tensor initialization on device improvements (#6959)
Browse files Browse the repository at this point in the history
* Update common.py speed improvements

Eliminate .to() ops where possible for reduced data transfer overhead. Primarily affects warmup and PyTorch Hub inference.

* Updates

* Updates

* Update detect.py

* Update val.py
  • Loading branch information
glenn-jocher committed Mar 12, 2022
1 parent 52c1399 commit 701e117
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once
if self.pt or self.jit or self.onnx or self.engine: # warmup types
if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
im = torch.zeros(*imgsz).to(self.device).type(torch.half if self.fp16 else torch.float) # input image
im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
self.forward(im) # warmup

@staticmethod
Expand Down
6 changes: 3 additions & 3 deletions val.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def process_batch(detections, labels, iouv):
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
# matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
matches = torch.Tensor(matches).to(iouv.device)
matches = torch.from_numpy(matches).to(iouv.device)
correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
return correct

Expand Down Expand Up @@ -155,7 +155,7 @@ def run(data,
cuda = device.type != 'cpu'
is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset
nc = 1 if single_cls else int(data['nc']) # number of classes
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95
niou = iouv.numel()

# Dataloader
Expand Down Expand Up @@ -196,7 +196,7 @@ def run(data,
loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls

# NMS
targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
t3 = time_sync()
out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
Expand Down

0 comments on commit 701e117

Please sign in to comment.