-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
200 lines (170 loc) · 7.76 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import random
from PIL import Image
import numpy as np
import torch
from torch.utils.data import IterableDataset, Dataset
import torchvision
from torchvision.transforms import Normalize, Compose, Resize
import cv2
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
class SingleMaskedVideoTrainDataset(IterableDataset):
def __init__(self, image_path, mask_path, small=False, window=12):
super().__init__()
self.video, _, _ = torchvision.io.read_video(image_path)
self.mask, _, _ = torchvision.io.read_video(mask_path)
size = (256,256) if small else (240, 432)
self.transforms = Compose([
Resize(size, antialias=True),
Normalize(0.5, 0.5) # Change range [0,1] -> [-1, 1]
])
self.mask_transforms = Resize(size, interpolation=torchvision.transforms.InterpolationMode.NEAREST)
self.window = window
def __iter__(self):
def next_crop():
T = self.video.shape[0]
# Change window training size
hw = self.window // 2
# Use all frames or split train / test temporally
T_end = T
while True:
# Draw a time for a frame in the video (except the center of the video)
t = np.random.randint(hw, T_end - hw + 1)
frames = [self.transforms(self.video[i].permute(2, 0, 1) / 255.0) for i in range(t - hw, t + hw, 1)]
masks = [self.mask_transforms(self.mask[i].permute(2, 0, 1) / 255.0) for i in range(t - hw, t + hw, 1)]
yield torch.stack(frames, dim=1), torch.stack(masks, dim=1)
return iter(next_crop())
class SingleVideoTestDataset(Dataset):
def __init__(self, image_path, mask_path, small=False, n_frames=24):
"""Test on video, n_frames can be used to process only the first n frames of a video if running into
memory issues
"""
super().__init__()
self.video, _, _ = torchvision.io.read_video(image_path)
self.mask, _, _ = torchvision.io.read_video(mask_path)
size = (256,256) if small else (240, 432)
self.transforms = Compose([
Resize(size, antialias=True),
Normalize(0.5, 0.5) # Change range [0,1] -> [-1, 1]
])
self.mask_transforms = Resize(size, interpolation=torchvision.transforms.InterpolationMode.NEAREST)
self.n_frames = n_frames
def __len__(self):
return 1
def __getitem__(self, idx):
T = self.video.shape[0]
frames = [self.transforms(self.video[i].permute(2, 0, 1) / 255.0) for i in range(min(self.n_frames, T))]
masks = [self.mask_transforms(self.mask[i].permute(2, 0, 1) / 255.0) for i in range(min(self.n_frames, T))]
return torch.stack(frames, dim=1), torch.stack(masks, dim=1)
"""
Copyright JingjingRenabc
Code for generating moving shapes in videos for synthetic masks
"""
def get_random_shape(edge_num=9, ratio=0.7, width=432, height=240):
'''
There is the initial point and 3 points per cubic bezier curve.
Thus, the curve will only pass though n points, which will be the sharp edges.
The other 2 modify the shape of the bezier curve.
edge_num, Number of possibly sharp edges
points_num, number of points in the Path
ratio, (0, 1) magnitude of the perturbation from the unit circle,
'''
points_num = edge_num*3 + 1
angles = np.linspace(0, 2*np.pi, points_num)
codes = np.full(points_num, Path.CURVE4)
codes[0] = Path.MOVETO
# Using this instad of Path.CLOSEPOLY avoids an innecessary straight line
verts = np.stack((np.cos(angles), np.sin(angles))).T * \
(2*ratio*np.random.random(points_num)+1-ratio)[:, None]
verts[-1, :] = verts[0, :]
path = Path(verts, codes)
# draw paths into images
fig = plt.figure()
ax = fig.add_subplot(111)
patch = patches.PathPatch(path, facecolor='black', lw=2)
ax.add_patch(patch)
ax.set_xlim(np.min(verts)*1.1, np.max(verts)*1.1)
ax.set_ylim(np.min(verts)*1.1, np.max(verts)*1.1)
ax.axis('off') # removes the axis to leave only the shape
fig.canvas.draw()
# convert plt images into numpy images
data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
data = data.reshape((fig.canvas.get_width_height()[::-1] + (3,)))
plt.close(fig)
# postprocess
data = cv2.resize(data, (width, height))[:, :, 0]
data = (1 - np.array(data > 0).astype(np.uint8))*255
corrdinates = np.where(data > 0)
xmin, xmax, ymin, ymax = np.min(corrdinates[0]), np.max(
corrdinates[0]), np.min(corrdinates[1]), np.max(corrdinates[1])
region = Image.fromarray(data).crop((ymin, xmin, ymax, xmax))
return region
def create_random_shape_with_random_motion(video_length, imageHeight=240, imageWidth=432):
# get a random shape
height = random.randint(imageHeight//2, imageHeight)
width = random.randint(imageWidth//2, imageWidth)
edge_num = random.randint(6, 8)
ratio = random.randint(6, 8)/10
region = get_random_shape(
edge_num=edge_num, ratio=ratio, height=height, width=width)
region_width, region_height = region.size
# get random position
x, y = random.randint(
0, imageHeight-region_height), random.randint(0, imageWidth-region_width)
velocity = get_random_velocity(max_speed=2)
m = Image.fromarray(np.zeros((imageHeight, imageWidth)).astype(np.uint8))
m.paste(region, (y, x, y+region.size[0], x+region.size[1]))
masks = [m.convert('L')]
# return fixed masks
if random.uniform(0, 1) > 0.5:
return masks*video_length
# return moving masks
for _ in range(video_length-1):
x, y, velocity = random_move_control_points(
x, y, imageHeight, imageWidth, velocity, region.size, maxLineAcceleration=(2, 0.5), maxInitSpeed=2)
m = Image.fromarray(
np.zeros((imageHeight, imageWidth)).astype(np.uint8))
m.paste(region, (y, x, y+region.size[0], x+region.size[1]))
masks.append(m.convert('L'))
return masks
def random_accelerate(velocity, maxAcceleration, dist='uniform'):
speed, angle = velocity
d_speed, d_angle = maxAcceleration
if dist == 'uniform':
speed += np.random.uniform(-d_speed, d_speed)
angle += np.random.uniform(-d_angle, d_angle)
elif dist == 'guassian':
speed += np.random.normal(0, d_speed / 2)
angle += np.random.normal(0, d_angle / 2)
else:
raise NotImplementedError(
f'Distribution type {dist} is not supported.')
return (speed, angle)
def get_random_velocity(max_speed=3, dist='uniform'):
if dist == 'uniform':
speed = np.random.uniform(max_speed)
elif dist == 'guassian':
speed = np.abs(np.random.normal(0, max_speed / 2))
else:
raise NotImplementedError(
f'Distribution type {dist} is not supported.')
angle = np.random.uniform(0, 2 * np.pi)
return (speed, angle)
def random_move_control_points(X, Y, imageHeight, imageWidth, lineVelocity, region_size, maxLineAcceleration=(3, 0.5), maxInitSpeed=3):
region_width, region_height = region_size
speed, angle = lineVelocity
X += int(speed * np.cos(angle))
Y += int(speed * np.sin(angle))
lineVelocity = random_accelerate(
lineVelocity, maxLineAcceleration, dist='guassian')
if ((X > imageHeight - region_height) or (X < 0) or (Y > imageWidth - region_width) or (Y < 0)):
lineVelocity = get_random_velocity(maxInitSpeed, dist='guassian')
new_X = np.clip(X, 0, imageHeight - region_height)
new_Y = np.clip(Y, 0, imageWidth - region_width)
return new_X, new_Y, lineVelocity
if __name__ == '__main__':
"""Test the mask generation algorithm"""
vid = create_random_shape_with_random_motion(48)
for i in range(len(vid)):
vid[i].save(f"{i:03d}.png")