-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
inception.py
625 lines (456 loc) · 21.7 KB
/
inception.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
########################################################################
#
# The Inception Model v3 for TensorFlow.
#
# This is a pre-trained Deep Neural Network for classifying images.
# You provide an image or filename for a jpeg-file which will be
# loaded and input to the Inception model, which will then output
# an array of numbers indicating how likely it is that the
# input-image is of each class.
#
# See the example code at the bottom of this file or in the
# accompanying Python Notebooks.
#
# Tutorial #07 shows how to use the Inception model.
# Tutorial #08 shows how to use it for Transfer Learning.
#
# What is Transfer Learning?
#
# Transfer Learning is the use of a Neural Network for classifying
# images from another data-set than it was trained on. For example,
# the Inception model was trained on the ImageNet data-set using
# a very powerful and expensive computer. But the Inception model
# can be re-used on data-sets it was not trained on without having
# to re-train the entire model, even though the number of classes
# are different for the two data-sets. This allows you to use the
# Inception model on your own data-sets without the need for a
# very powerful and expensive computer to train it.
#
# The last layer of the Inception model before the softmax-classifier
# is called the Transfer Layer because the output of that layer will
# be used as the input in your new softmax-classifier (or as the
# input for another neural network), which will then be trained on
# your own data-set.
#
# The output values of the Transfer Layer are called Transfer Values.
# These are the actual values that will be input to your new
# softmax-classifier or to another neural network that you create.
#
# The word 'bottleneck' is also sometimes used to refer to the
# Transfer Layer or Transfer Values, but it is a confusing word
# that is not used here.
#
# Implemented in Python 3.5 with TensorFlow v0.10.0rc0
#
########################################################################
#
# This file is part of the TensorFlow Tutorials available at:
#
# https://github.com/Hvass-Labs/TensorFlow-Tutorials
#
# Published under the MIT License. See the file LICENSE for details.
#
# Copyright 2016 by Magnus Erik Hvass Pedersen
#
########################################################################
import numpy as np
import tensorflow as tf
import download
from cache import cache
import os
import sys
########################################################################
# Various directories and file-names.
# Internet URL for the tar-file with the Inception model.
# Note that this might change in the future and will need to be updated.
data_url = "http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz"
# Directory to store the downloaded data.
data_dir = "inception/"
# File containing the mappings between class-number and uid. (Downloaded)
path_uid_to_cls = "imagenet_2012_challenge_label_map_proto.pbtxt"
# File containing the mappings between uid and string. (Downloaded)
path_uid_to_name = "imagenet_synset_to_human_label_map.txt"
# File containing the TensorFlow graph definition. (Downloaded)
path_graph_def = "classify_image_graph_def.pb"
########################################################################
def maybe_download():
"""
Download the Inception model from the internet if it does not already
exist in the data_dir. The file is about 85 MB.
"""
print("Downloading Inception v3 Model ...")
download.maybe_download_and_extract(url=data_url, download_dir=data_dir)
########################################################################
class NameLookup:
"""
Used for looking up the name associated with a class-number.
This is used to print the name of a class instead of its number,
e.g. "plant" or "horse".
Maps between:
- cls is the class-number as an integer between 1 and 1000 (inclusive).
- uid is a class-id as a string from the ImageNet data-set, e.g. "n00017222".
- name is the class-name as a string, e.g. "plant, flora, plant life"
There are actually 1008 output classes of the Inception model
but there are only 1000 named classes in these mapping-files.
The remaining 8 output classes of the model should not be used.
"""
def __init__(self):
# Mappings between uid, cls and name are dicts, where insertions and
# lookup have O(1) time-usage on average, but may be O(n) in worst case.
self._uid_to_cls = {} # Map from uid to cls.
self._uid_to_name = {} # Map from uid to name.
self._cls_to_uid = {} # Map from cls to uid.
# Read the uid-to-name mappings from file.
path = os.path.join(data_dir, path_uid_to_name)
with open(file=path, mode='r') as file:
# Read all lines from the file.
lines = file.readlines()
for line in lines:
# Remove newlines.
line = line.replace("\n", "")
# Split the line on tabs.
elements = line.split("\t")
# Get the uid.
uid = elements[0]
# Get the class-name.
name = elements[1]
# Insert into the lookup-dict.
self._uid_to_name[uid] = name
# Read the uid-to-cls mappings from file.
path = os.path.join(data_dir, path_uid_to_cls)
with open(file=path, mode='r') as file:
# Read all lines from the file.
lines = file.readlines()
for line in lines:
# We assume the file is in the proper format,
# so the following lines come in pairs. Other lines are ignored.
if line.startswith(" target_class: "):
# This line must be the class-number as an integer.
# Split the line.
elements = line.split(": ")
# Get the class-number as an integer.
cls = int(elements[1])
elif line.startswith(" target_class_string: "):
# This line must be the uid as a string.
# Split the line.
elements = line.split(": ")
# Get the uid as a string e.g. "n01494475"
uid = elements[1]
# Remove the enclosing "" from the string.
uid = uid[1:-2]
# Insert into the lookup-dicts for both ways between uid and cls.
self._uid_to_cls[uid] = cls
self._cls_to_uid[cls] = uid
def uid_to_cls(self, uid):
"""
Return the class-number as an integer for the given uid-string.
"""
return self._uid_to_cls[uid]
def uid_to_name(self, uid, only_first_name=False):
"""
Return the class-name for the given uid string.
Some class-names are lists of names, if you only want the first name,
then set only_first_name=True.
"""
# Lookup the name from the uid.
name = self._uid_to_name[uid]
# Only use the first name in the list?
if only_first_name:
name = name.split(",")[0]
return name
def cls_to_name(self, cls, only_first_name=False):
"""
Return the class-name from the integer class-number.
Some class-names are lists of names, if you only want the first name,
then set only_first_name=True.
"""
# Lookup the uid from the cls.
uid = self._cls_to_uid[cls]
# Lookup the name from the uid.
name = self.uid_to_name(uid=uid, only_first_name=only_first_name)
return name
########################################################################
class Inception:
"""
The Inception model is a Deep Neural Network which has already been
trained for classifying images into 1000 different categories.
When you create a new instance of this class, the Inception model
will be loaded and can be used immediately without training.
The Inception model can also be used for Transfer Learning.
"""
# Name of the tensor for feeding the input image as jpeg.
tensor_name_input_jpeg = "DecodeJpeg/contents:0"
# Name of the tensor for feeding the decoded input image.
# Use this for feeding images in other formats than jpeg.
tensor_name_input_image = "DecodeJpeg:0"
# Name of the tensor for the resized input image.
# This is used to retrieve the image after it has been resized.
tensor_name_resized_image = "ResizeBilinear:0"
# Name of the tensor for the output of the softmax-classifier.
# This is used for classifying images with the Inception model.
tensor_name_softmax = "softmax:0"
# Name of the tensor for the unscaled outputs of the softmax-classifier (aka. logits).
tensor_name_softmax_logits = "softmax/logits:0"
# Name of the tensor for the output of the Inception model.
# This is used for Transfer Learning.
tensor_name_transfer_layer = "pool_3:0"
def __init__(self):
# Mappings between class-numbers and class-names.
# Used to print the class-name as a string e.g. "horse" or "plant".
self.name_lookup = NameLookup()
# Now load the Inception model from file. The way TensorFlow
# does this is confusing and requires several steps.
# Create a new TensorFlow computational graph.
self.graph = tf.Graph()
# Set the new graph as the default.
with self.graph.as_default():
# TensorFlow graphs are saved to disk as so-called Protocol Buffers
# aka. proto-bufs which is a file-format that works on multiple
# platforms. In this case it is saved as a binary file.
# Open the graph-def file for binary reading.
path = os.path.join(data_dir, path_graph_def)
with tf.gfile.FastGFile(path, 'rb') as file:
# The graph-def is a saved copy of a TensorFlow graph.
# First we need to create an empty graph-def.
graph_def = tf.GraphDef()
# Then we load the proto-buf file into the graph-def.
graph_def.ParseFromString(file.read())
# Finally we import the graph-def to the default TensorFlow graph.
tf.import_graph_def(graph_def, name='')
# Now self.graph holds the Inception model from the proto-buf file.
# Get the output of the Inception model by looking up the tensor
# with the appropriate name for the output of the softmax-classifier.
self.y_pred = self.graph.get_tensor_by_name(self.tensor_name_softmax)
# Get the unscaled outputs for the Inception model (aka. softmax-logits).
self.y_logits = self.graph.get_tensor_by_name(self.tensor_name_softmax_logits)
# Get the tensor for the resized image that is input to the neural network.
self.resized_image = self.graph.get_tensor_by_name(self.tensor_name_resized_image)
# Get the tensor for the last layer of the graph, aka. the transfer-layer.
self.transfer_layer = self.graph.get_tensor_by_name(self.tensor_name_transfer_layer)
# Get the number of elements in the transfer-layer.
self.transfer_len = self.transfer_layer.get_shape()[3]
# Create a TensorFlow session for executing the graph.
self.session = tf.Session(graph=self.graph)
def close(self):
"""
Call this function when you are done using the Inception model.
It closes the TensorFlow session to release its resources.
"""
self.session.close()
def _write_summary(self, logdir='summary/'):
"""
Write graph to summary-file so it can be shown in TensorBoard.
This function is used for debugging and may be changed or removed in the future.
:param logdir:
Directory for writing the summary-files.
:return:
Nothing.
"""
writer = tf.train.SummaryWriter(logdir=logdir, graph=self.graph)
writer.close()
def _create_feed_dict(self, image_path=None, image=None):
"""
Create and return a feed-dict with an image.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
Dict for feeding to the Inception graph in TensorFlow.
"""
if image is not None:
# Image is passed in as a 3-dim array that is already decoded.
feed_dict = {self.tensor_name_input_image: image}
elif image_path is not None:
# Read the jpeg-image as an array of bytes.
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
# Image is passed in as a jpeg-encoded image.
feed_dict = {self.tensor_name_input_jpeg: image_data}
else:
raise ValueError("Either image or image_path must be set.")
return feed_dict
def classify(self, image_path=None, image=None):
"""
Use the Inception model to classify a single image.
The image will be resized automatically to 299 x 299 pixels,
see the discussion in the Python Notebook for Tutorial #07.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
Array of floats (aka. softmax-array) indicating how likely
the Inception model thinks the image is of each given class.
"""
# Create a feed-dict for the TensorFlow graph with the input image.
feed_dict = self._create_feed_dict(image_path=image_path, image=image)
# Execute the TensorFlow session to get the predicted labels.
pred = self.session.run(self.y_pred, feed_dict=feed_dict)
# Reduce the array to a single dimension.
pred = np.squeeze(pred)
return pred
def get_resized_image(self, image_path=None, image=None):
"""
Input an image to the Inception model and return
the resized image. The resized image can be plotted so
we can see what the neural network sees as its input.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
A 3-dim array holding the image.
"""
# Create a feed-dict for the TensorFlow graph with the input image.
feed_dict = self._create_feed_dict(image_path=image_path, image=image)
# Execute the TensorFlow session to get the predicted labels.
resized_image = self.session.run(self.resized_image, feed_dict=feed_dict)
# Remove the 1st dimension of the 4-dim tensor.
resized_image = resized_image.squeeze(axis=0)
# Scale pixels to be between 0.0 and 1.0
resized_image = resized_image.astype(float) / 255.0
return resized_image
def print_scores(self, pred, k=10, only_first_name=True):
"""
Print the scores (or probabilities) for the top-k predicted classes.
:param pred:
Predicted class-labels returned from the predict() function.
:param k:
How many classes to print.
:param only_first_name:
Some class-names are lists of names, if you only want the first name,
then set only_first_name=True.
:return:
Nothing.
"""
# Get a sorted index for the pred-array.
idx = pred.argsort()
# The index is sorted lowest-to-highest values. Take the last k.
top_k = idx[-k:]
# Iterate the top-k classes in reversed order (i.e. highest first).
for cls in reversed(top_k):
# Lookup the class-name.
name = self.name_lookup.cls_to_name(cls=cls, only_first_name=only_first_name)
# Predicted score (or probability) for this class.
score = pred[cls]
# Print the score and class-name.
print("{0:>6.2%} : {1}".format(score, name))
def transfer_values(self, image_path=None, image=None):
"""
Calculate the transfer-values for the given image.
These are the values of the last layer of the Inception model before
the softmax-layer, when inputting the image to the Inception model.
The transfer-values allow us to use the Inception model in so-called
Transfer Learning for other data-sets and different classifications.
It may take several hours or more to calculate the transfer-values
for all images in a data-set. It is therefore useful to cache the
results using the function transfer_values_cache() below.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
The transfer-values for those images.
"""
# Create a feed-dict for the TensorFlow graph with the input image.
feed_dict = self._create_feed_dict(image_path=image_path, image=image)
# Use TensorFlow to run the graph for the Inception model.
# This calculates the values for the last layer of the Inception model
# prior to the softmax-classification, which we call transfer-values.
transfer_values = self.session.run(self.transfer_layer, feed_dict=feed_dict)
# Reduce to a 1-dim array.
transfer_values = np.squeeze(transfer_values)
return transfer_values
########################################################################
# Batch-processing.
def process_images(fn, images=None, image_paths=None):
"""
Call the function fn() for each image, e.g. transfer_values() from
the Inception model above. All the results are concatenated and returned.
:param fn:
Function to be called for each image.
:param images:
List of images to process.
:param image_paths:
List of file-paths for the images to process.
:return:
Numpy array with the results.
"""
# Are we using images or image_paths?
using_images = images is not None
# Number of images.
if using_images:
num_images = len(images)
else:
num_images = len(image_paths)
# Pre-allocate list for the results.
# This holds references to other arrays. Initially the references are None.
result = [None] * num_images
# For each input image.
for i in range(num_images):
# Status-message. Note the \r which means the line should overwrite itself.
msg = "\r- Processing image: {0:>6} / {1}".format(i+1, num_images)
# Print the status message.
sys.stdout.write(msg)
sys.stdout.flush()
# Process the image and store the result for later use.
if using_images:
result[i] = fn(image=images[i])
else:
result[i] = fn(image_path=image_paths[i])
# Print newline.
print()
# Convert the result to a numpy array.
result = np.array(result)
return result
########################################################################
def transfer_values_cache(cache_path, model, images=None, image_paths=None):
"""
This function either loads the transfer-values if they have
already been calculated, otherwise it calculates the values
and saves them to a file that can be re-loaded again later.
Because the transfer-values can be expensive to compute, it can
be useful to cache the values through this function instead
of calling transfer_values() directly on the Inception model.
See Tutorial #08 for an example on how to use this function.
:param cache_path:
File containing the cached transfer-values for the images.
:param model:
Instance of the Inception model.
:param images:
4-dim array with images. [image_number, height, width, colour_channel]
:param image_paths:
Array of file-paths for images (must be jpeg-format).
:return:
The transfer-values from the Inception model for those images.
"""
# Helper-function for processing the images if the cache-file does not exist.
# This is needed because we cannot supply both fn=process_images
# and fn=model.transfer_values to the cache()-function.
def fn():
return process_images(fn=model.transfer_values, images=images, image_paths=image_paths)
# Read the transfer-values from a cache-file, or calculate them if the file does not exist.
transfer_values = cache(cache_path=cache_path, fn=fn)
return transfer_values
########################################################################
# Example usage.
if __name__ == '__main__':
print(tf.__version__)
# Download Inception model if not already done.
maybe_download()
# Load the Inception model so it is ready for classifying images.
model = Inception()
# Path for a jpeg-image that is included in the downloaded data.
image_path = os.path.join(data_dir, 'cropped_panda.jpg')
# Use the Inception model to classify the image.
pred = model.classify(image_path=image_path)
# Print the scores and names for the top-10 predictions.
model.print_scores(pred=pred, k=10)
# Close the TensorFlow session.
model.close()
# Transfer Learning is demonstrated in Tutorial #08.
########################################################################