-
Notifications
You must be signed in to change notification settings - Fork 13
/
bottleneck.py
346 lines (294 loc) · 13.5 KB
/
bottleneck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os.path
import sys
import tarfile
from file_processing import get_all_images_path
import numpy as np
from six.moves import urllib
import tensorflow as tf
from tensorflow.python.platform import gfile
ALL_ARCHITECTURES = ['inception_v3', 'mobilenet_1.0_224', 'mobilenet_1.0_192', 'mobilenet_1.0_160',
'mobilenet_1.0_128',
'mobilenet_0.75_224', 'mobilenet_0.75_192', 'mobilenet_0.75_160', 'mobilenet_0.75_128',
'mobilenet_0.50_224', 'mobilenet_0.50_192', 'mobilenet_0.50_160', 'mobilenet_0.50_128',
'mobilenet_0.25_224', 'mobilenet_0.25_192', 'mobilenet_0.25_160', 'mobilenet_0.25_128']
def create_model_info(architecture):
"""Given the name of a model architecture, returns information about it.
There are different base image recognition pretrained models that can be
retrained using transfer learning, and this function translates from the name
of a model to the attributes that are needed to download and train with it.
Args:
architecture: Name of a model architecture.
Returns:
Dictionary of information about the model, or None if the name isn't
recognized
Raises:
ValueError: If architecture name is unknown.
"""
architecture = architecture.lower()
if architecture == 'inception_v3':
# pylint: disable=line-too-long
data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
# pylint: enable=line-too-long
bottleneck_tensor_name = 'pool_3/_reshape:0'
bottleneck_tensor_size = 2048
input_width = 299
input_height = 299
input_depth = 3
resized_input_tensor_name = 'Mul:0'
model_file_name = 'classify_image_graph_def.pb'
input_mean = 128
input_std = 128
elif architecture.startswith('mobilenet_'):
parts = architecture.split('_')
if len(parts) != 3 and len(parts) != 4:
tf.logging.error("Couldn't understand architecture name '%s'",
architecture)
return None
version_string = parts[1]
if (version_string != '1.0' and version_string != '0.75' and
version_string != '0.50' and version_string != '0.25'):
tf.logging.error(
""""The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25',
but found '%s' for architecture '%s'""",
version_string, architecture)
return None
size_string = parts[2]
if (size_string != '224' and size_string != '192' and
size_string != '160' and size_string != '128'):
tf.logging.error(
"""The Mobilenet input size should be '224', '192', '160', or '128',
but found '%s' for architecture '%s'""",
size_string, architecture)
return None
if len(parts) == 3:
is_quantized = False
else:
if parts[3] != 'quantized':
tf.logging.error(
"Couldn't understand architecture suffix '%s' for '%s'", parts[3],
architecture)
return None
is_quantized = True
data_url = 'http://download.tensorflow.org/models/mobilenet_v1_'
data_url += version_string + '_' + size_string + '_frozen.tgz'
bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0'
bottleneck_tensor_size = 1001
input_width = int(size_string)
input_height = int(size_string)
input_depth = 3
resized_input_tensor_name = 'input:0'
if is_quantized:
model_base_name = 'quantized_graph.pb'
else:
model_base_name = 'frozen_graph.pb'
model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string
model_file_name = os.path.join(model_dir_name, model_base_name)
input_mean = 127.5
input_std = 127.5
else:
tf.logging.error("Couldn't understand architecture name '%s'", architecture)
raise ValueError('Unknown architecture', architecture)
return {
'data_url': data_url,
'bottleneck_tensor_name': bottleneck_tensor_name,
'bottleneck_tensor_size': bottleneck_tensor_size,
'input_width': input_width,
'input_height': input_height,
'input_depth': input_depth,
'resized_input_tensor_name': resized_input_tensor_name,
'model_file_name': model_file_name,
'input_mean': input_mean,
'input_std': input_std,
}
def maybe_download_and_extract(data_url, dest_directory):
"""Download and extract model tar file.
If the pretrained model we're using doesn't already exist, this function
downloads it from the TensorFlow.org website and unpacks it into a directory.
Args:
data_url: Web location of the tar file containing the pretrained model.
dest_directory: Where our model will be stored
"""
if not os.path.exists(dest_directory):
os.makedirs(dest_directory)
filename = data_url.split('/')[-1]
filepath = os.path.join(dest_directory, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' %
(filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(data_url, filepath, _progress)
print()
statinfo = os.stat(filepath)
tf.logging.info('Successfully downloaded', filename, statinfo.st_size,
'bytes.')
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
def create_model_graph(model_info):
""""Creates a graph from saved GraphDef file and returns a Graph object.
Args:
model_info: Dictionary containing information about the model architecture.
Returns:
Graph holding the trained Inception network, and various tensors we'll be
manipulating.
"""
with tf.Graph().as_default() as graph:
model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name'])
with gfile.FastGFile(model_path, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
bottleneck_tensor, resized_input_tensor = (tf.import_graph_def(
graph_def,
name='',
return_elements=[
model_info['bottleneck_tensor_name'],
model_info['resized_input_tensor_name'],
]))
return graph, bottleneck_tensor, resized_input_tensor
def create_model_graph_bis(model_info, model_dir):
""""Creates a graph from saved GraphDef file and returns a Graph object.
Args:
model_info: Dictionary containing information about the model architecture.
model_dir: String containing where the model is saved.
Returns:
Graph holding the trained Inception network, and various tensors we'll be
manipulating.
"""
with tf.Graph().as_default() as graph:
model_path = os.path.join(model_dir, model_info['model_file_name'])
with gfile.FastGFile(model_path, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
bottleneck_tensor, resized_input_tensor = (tf.import_graph_def(
graph_def,
name='',
return_elements=[
model_info['bottleneck_tensor_name'],
model_info['resized_input_tensor_name'],
]))
return graph, bottleneck_tensor, resized_input_tensor
def add_jpeg_decoding(input_width, input_height, input_depth, input_mean,
input_std):
"""Adds operations that perform JPEG decoding and resizing to the graph..
Args:
input_width: Desired width of the image fed into the recognizer graph.
input_height: Desired width of the image fed into the recognizer graph.
input_depth: Desired channels of the image fed into the recognizer graph.
input_mean: Pixel value that should be zero in the image for the graph.
input_std: How much to divide the pixel values by before recognition.
Returns:
Tensors for the node to feed JPEG data into, and the output of the
preprocessing steps.
"""
jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
resize_shape = tf.stack([input_height, input_width])
resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
resized_image = tf.image.resize_bilinear(decoded_image_4d,
resize_shape_as_int)
offset_image = tf.subtract(resized_image, input_mean)
mul_image = tf.multiply(offset_image, 1.0 / input_std)
return jpeg_data, mul_image
def run_bottleneck_on_image(sess, image_data, image_data_tensor,
decoded_image_tensor, resized_input_tensor,
bottleneck_tensor):
"""Runs inference on an image to extract the 'bottleneck' summary layer.
Args:
sess: Current active TensorFlow Session.
image_data: String of raw JPEG data.
image_data_tensor: Input data layer in the graph.
decoded_image_tensor: Output of initial image resizing and preprocessing.
resized_input_tensor: The input node of the recognition graph.
bottleneck_tensor: Layer before the final softmax.
Returns:
Numpy array of bottleneck values.
"""
# First decode the JPEG image, resize it, and rescale the pixel values.
resized_input_values = sess.run(decoded_image_tensor,
{image_data_tensor: image_data})
# Then run it through the recognition network.
bottleneck_values = sess.run(bottleneck_tensor,
{resized_input_tensor: resized_input_values})
bottleneck_values = np.squeeze(bottleneck_values)
return bottleneck_values
def get_bottlenecks_values(image_dir, architecture='mobilenet_1.0_224', model_dir='./model/'):
# Needed to make sure the logging output is visible.
# See https://github.com/tensorflow/tensorflow/issues/3047
tf.logging.set_verbosity(tf.logging.INFO)
# Gather information about the model architecture we'll be using.
model_info = create_model_info(architecture)
if not model_info:
tf.logging.error('Did not recognize architecture flag')
return -1
# Set up the pre-trained graph.
maybe_download_and_extract(model_info['data_url'], model_dir)
graph, bottleneck_tensor, resized_image_tensor = (
create_model_graph_bis(model_info, model_dir))
# Look at the folder structure, and create lists of all the images.
image_paths = get_all_images_path(image_dir)
bottlenecks = np.zeros((len(image_paths), model_info['bottleneck_tensor_size']))
with tf.Session(graph=graph) as sess:
# Set up the image decoding sub-graph.
jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(
model_info['input_width'], model_info['input_height'],
model_info['input_depth'], model_info['input_mean'],
model_info['input_std'])
for idx, image_p in enumerate(image_paths):
image_data = gfile.FastGFile(image_p, 'rb').read()
bottlenecks[idx, :] = run_bottleneck_on_image(sess, image_data, jpeg_data_tensor, decoded_image_tensor,
resized_image_tensor, bottleneck_tensor)
return bottlenecks
def main(_):
get_bottlenecks_values(_)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--image_dir',
type=str,
default='',
help='Path to folders of labeled images.'
)
parser.add_argument(
'--summaries_dir',
type=str,
default='/tmp/retrain_logs',
help='Where to save summary logs for TensorBoard.'
)
parser.add_argument(
'--model_dir',
type=str,
default='/tmp/imagenet',
help="""\
Path to classify_image_graph_def.pb,
imagenet_synset_to_human_label_map.txt, and
imagenet_2012_challenge_label_map_proto.pbtxt.\
"""
)
parser.add_argument(
'--bottleneck_dir',
type=str,
default='/tmp/bottleneck',
help='Path to cache bottleneck layer values as files.'
)
parser.add_argument(
'--architecture',
type=str,
default='inception_v3',
help="""\
Which model architecture to use. 'inception_v3' is the most accurate, but
also the slowest. For faster or smaller models, chose a MobileNet with the
form 'mobilenet_<parameter size>_<input_size>[_quantized]'. For example,
'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224
pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much
less accurate, but smaller and faster network that's 920 KB on disk and
takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html
for more information on Mobilenet.\
""")
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)