-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_data_files_folders.py
293 lines (247 loc) · 10.8 KB
/
create_data_files_folders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# adapted from https://github.com/pitsios-s/SVHN-Thesis/blob/master/src/multi_digit/svhn.py
import h5py
import os
import numpy as np
import pandas as pd
import PIL.Image as Image
import joblib
import random
import tensorflow as tf
tf.enable_eager_execution()
class ImageProcessor:
def __init__(self, output_dir, max_labels=5, normalise=True, grey=True):
self.PIXEL_DEPTH = 255
self.NUM_LABELS = 11
self.OUT_HEIGHT = 64
self.OUT_WIDTH = 64
self.OUT_CHANNELS = 3
self.max_labels = max_labels
self.output_dir = output_dir
self.file = None
self.digit_struct_name = None
self.digit_struct_bbox = None
self.normalise = normalise
self.greyscale = grey
def get_image_name(self, n):
"""Return current image's name
Args:
n: The index of image
Returns:
The name of the image
"""
name = ''.join([chr(char)
for char in self.file[self.digit_struct_name[n][0]].value])
return name
def bounding_box_att_extract(self, att):
"""Returns the specified attribute from the .mat bounding box struct
Args:
att: The attribute to extract
Returns:
The value of the attribute
"""
if len(att) > 1:
attribute = [self.file[att.value[i].item()].value[0][0]
for i in range(len(att))]
else:
attribute = [att.value[0][0]]
return attribute
def get_bounding_box(self, n):
"""Extracts and returns the bounding box info, regarding the n-th image
Args:
n: index of the image
Returns:
Bounding box information about the n-th image
"""
bbox = {}
bb = self.digit_struct_bbox[n].item()
bbox['label'] = self.bounding_box_att_extract(self.file[bb]['label'])
bbox['top'] = self.bounding_box_att_extract(self.file[bb]['top'])
bbox['left'] = self.bounding_box_att_extract(self.file[bb]['left'])
bbox['height'] = self.bounding_box_att_extract(self.file[bb]['height'])
bbox['width'] = self.bounding_box_att_extract(self.file[bb]['width'])
return bbox
def get_digit_structure(self, n):
"""Creates and returns the whole structure of the image, based on bbox
Args:
n: The index of image
Returns:
Image structure
"""
structure = self.get_bounding_box(n)
structure['name'] = self.get_image_name(n)
return structure
def get_all_images_and_digit_structure(self):
"""Loops through images and returns array containing structure of each image
"""
structures = []
for i in range(len(self.digit_struct_name)):
structure = self.get_digit_structure(i)
if len(structure['label']) <= self.max_labels:
structures.append(structure)
return structures
def save_data(self, original, cropped, labels, name):
"""
Saves image data and labels in hdf5 format
"""
# only create files if not created already
if not os.path.isfile(name+"_original.h5"):
original_output_file = h5py.File(os.path.join(
self.output_dir, name + "_original.h5"), 'w')
original_output_file.create_dataset(
name + "_dataset", data=original)
original_output_file.create_dataset(name+'_labels', data=labels)
if not os.path.isfile(name+"_cropped.h5"):
cropped_output_file = h5py.File(os.path.join(
self.output_dir, name + "_cropped.h5"), 'w')
cropped_output_file.create_dataset(name + "_dataset", data=cropped)
cropped_output_file.create_dataset(name+"_labels", data=labels)
def save_image(self, data_dir, original_image, cropped_image, rand_crop, label, name, original=False):
"""
Saves image in hdf5 format in a folder for each label
"""
if original == True:
if not os.path.exists(data_dir+"/original/"+label):
os.makedirs(data_dir+"/original/"+label)
if os.path.exists(data_dir+"/original/"+label):
im = Image.fromarray(original_image)
im.save(data_dir +
'/original/'+'/'+name)
elif original == False:
if not os.path.exists(data_dir+"/cropped/"):
os.makedirs(data_dir+'/cropped/')
if os.path.exists(data_dir+'/cropped/'):
im = Image.fromarray(cropped_image)
im.save(data_dir +
'/cropped/'+name)
im_rand = Image.fromarray(rand_crop)
im_rand.save(data_dir+'/cropped/'+'rand_crop_'+name)
def load_data(self, name, dataset):
"""Loads an hdf5 file that contains the image data and labels
Args:
name: the name of the image file
Returns:
The data and labels arrays
"""
input_file = h5py.File(os.path.join(
self.output_dir, name+"_"+dataset+".h5"), 'r')
data = input_file[name+'_dataset'][:]
labels = input_file[name+'_labels'][:]
return data, labels
def read_digit_struct(self, data_dir):
"""Reads the digitStruct file and returns the name of
the image along with its corresponding bounding box
"""
struct_file = data_dir + '/digitStruct.mat'
self.file = h5py.File(struct_file, 'r')
self.digit_struct_name = self.file['digitStruct']['name']
self.digit_struct_bbox = self.file['digitStruct']['bbox']
structs = self.get_all_images_and_digit_structure()
return structs
def process_file(self, data_dir, save_data_dir, dataset='train'):
"""Process all images one by one and return them, together with their labels
Args:
data_dir: The directory that contains all the images and the structure file
Returns:
The processed images and their labels
"""
structs = self.read_digit_struct(data_dir)
data_count = len(structs)
image_data = np.zeros((1, self.OUT_HEIGHT,
self.OUT_WIDTH, self.OUT_CHANNELS), dtype=np.float32)
cropped_data = np.zeros((1, self.OUT_HEIGHT,
self.OUT_WIDTH, self.OUT_CHANNELS), dtype=np.float32)
df = pd.DataFrame(columns=['filename', 'label'])
for i in range(data_count):
label = structs[i]['label']
file_name = os.path.join(data_dir, structs[i]['name'])
top = structs[i]['top']
left = structs[i]['left']
height = structs[i]['height']
width = structs[i]['width']
image_data, cropped_data, random_crop = self.create_image_array(
file_name, top, left, height, width)
df = df.append(
{'filename': save_data_dir+"/cropped/"+structs[i]["name"], 'label': label}, ignore_index=True)
df = df.append({"filename": save_data_dir+"/cropped/" + "rand_crop_" +
structs[i]['name'], "label": label}, ignore_index=True)
self.save_image(save_data_dir, image_data,
cropped_data, random_crop, label, structs[i]['name'])
joblib.dump(df, './Data/processed/'+dataset+'/metadata_df.h5')
def create_label_array(self, labels):
"""Creates the label array
"""
num_digits = len(labels)
labels_array = np.ones([self.max_labels], dtype=np.int32)*10
one_hot_labels = np.zeros(
(self.max_labels, self.NUM_LABELS), dtype=np.int32)
for n in range(num_digits):
if labels[n] == 10:
labels[n] = 0
labels_array[n] = labels[n]
for n in range(len(labels_array)):
one_hot_labels[n] = self.one_hot_encode(labels_array[n])
return one_hot_labels
def one_hot_encode(self, number):
one_hot = np.zeros(shape=self.NUM_LABELS, dtype=np.int32)
one_hot[number] = 1
return one_hot
def create_image_array(self, file_name, top, left, height, width):
# Load image
image = Image.open(file_name)
# Find the top corner of bounding box
image_top = np.amin(top)
# Find left corner of bounding box
image_left = np.amin(left)
image_height = np.amax(top) + height[np.argmax(top)] - image_top
image_width = np.amax(left) + width[np.argmax(left)] - image_left
# Find smallest possible bounding box + expand by 30%
# following advice by Ian Goodfellow et al.
bbox_left = np.floor(image_left - 0.1 * image_width)
bbox_top = np.floor(image_top - 0.1 * image_height)
bbox_right = np.amin(
[np.ceil(bbox_left + 1.3 * image_width), image.size[0]])
bbox_bottom = np.amin([
np.ceil(image_top + 1.3 * image_height), image.size[1]])
# Create cropped image
cropped_image = image.crop((bbox_left, bbox_top, bbox_right, bbox_bottom)).resize(
[self.OUT_HEIGHT, self.OUT_WIDTH])
# Resize original
image = image.resize([self.OUT_HEIGHT, self.OUT_WIDTH])
# Convert original image and cropped image to np array
image_array = np.array(image)
cropped_array = np.array(cropped_image)
# Randomly crop cropped image
rand_crop = tf.image.random_crop(cropped_array, size=(54, 54, 3))
# Resize cropped
cropped_image = cropped_image.resize((54, 54))
cropped_array = np.array(cropped_image)
return image_array, cropped_array, rand_crop.numpy()
def main():
image_processor = ImageProcessor(
"./Data/processed", max_labels=5, normalise=True, grey=False)
if os.path.exists('./Data/processed/Done'):
return
# Training set
if not os.path.isdir("./Data/processed"):
print("Making directory: ./Data/processed")
os.makedirs("./Data/processed")
if not os.path.isdir("./Data/processed/train"):
os.mkdir('./Data/processed/train')
print('Making directory ./Data/processed/train')
image_processor.process_file(
'./Data/train', './Data/processed/train', dataset='train')
# Test set
if not os.path.isdir('./Data/processed/test'):
print("Making directory ./Data/processed/test")
os.mkdir('./Data/processed/test')
image_processor.process_file(
'./Data/test', './Data/processed/test', dataset='test')
# Extra Set
if not os.path.isdir('./Data/processed/extra'):
print("Making directory ./Data/processed/extra")
os.mkdir('./Data/processed/extra')
image_processor.process_file(
'./Data/extra', './Data/processed/extra', dataset='extra')
os.makedirs('./Data/processed/Done')
if __name__ == "__main__":
main()