Skip to content

Commit

Permalink
update video rotation when doing detection
Browse files Browse the repository at this point in the history
  • Loading branch information
BruceWang94 committed Nov 3, 2020
1 parent 187f7c2 commit 28f966f
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 6 deletions.
6 changes: 3 additions & 3 deletions detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def detect(save_img=False):
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
for path, img, im0s, vid_cap, rotation in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
Expand Down Expand Up @@ -133,8 +133,8 @@ def detect(save_img=False):

fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) if not rotation else int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) if not rotation else int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ tensorboard>=2.2
torch>=1.6.0
torchvision>=0.7.0
tqdm>=4.41.0
scikit-video
ffmpeg

# logging -------------------------------------
# wandb
Expand Down
22 changes: 19 additions & 3 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import cv2
import math
import skvideo.io
import numpy as np
import torch
from PIL import Image, ExifTags
Expand Down Expand Up @@ -122,9 +123,19 @@ def __init__(self, path, img_size=640):
images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
ni, nv = len(images), len(videos)
videos_rotation = [None for _ in videos]
for index in range(nv):
metadata = skvideo.io.ffprobe(videos[index])
if 'video' in metadata and 'tag' in metadata['video']:
tags = metadata['video']['tag']

for tag in tags:
if tag['@key'] == 'rotate':
videos_rotation[index] = tag['@value']

self.img_size = img_size
self.files = images + videos
self.rotation = [None for _ in images] + videos_rotation
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
self.mode = 'images'
Expand All @@ -143,6 +154,7 @@ def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
rotation = self.rotation[self.count]

if self.video_flag[self.count]:
# Read video
Expand All @@ -168,6 +180,10 @@ def __next__(self):
assert img0 is not None, 'Image Not Found ' + path
print('image %g/%g %s: ' % (self.count, self.nf, path), end='')

# Rotation Valid
if rotation:
img0 = cv2.rotate(img0, 0)

# Padded resize
img = letterbox(img0, new_shape=self.img_size)[0]

Expand All @@ -176,7 +192,7 @@ def __next__(self):
img = np.ascontiguousarray(img)

# cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
return path, img, img0, self.cap
return path, img, img0, self.cap, rotation

def new_video(self, path):
self.frame = 0
Expand Down Expand Up @@ -246,7 +262,7 @@ def __next__(self):
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)

return img_path, img, img0, None
return img_path, img, img0, None, None

def __len__(self):
return 0
Expand Down Expand Up @@ -319,7 +335,7 @@ def __next__(self):
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)

return self.sources, img, img0, None
return self.sources, img, img0, None, None

def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
Expand Down

0 comments on commit 28f966f

Please sign in to comment.