-
Notifications
You must be signed in to change notification settings - Fork 198
/
create_data_files.py
61 lines (47 loc) · 1.43 KB
/
create_data_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import glob
import os
import numpy as np
import skimage.io
directories = glob.glob("data/train/*")
class_names = [os.path.basename(d) for d in directories]
class_names.sort()
num_classes = len(class_names)
paths_train = glob.glob("data/train/*/*")
paths_train.sort()
paths_test = glob.glob("data/test/*")
paths_test.sort()
paths = {
'train': paths_train,
'test': paths_test,
}
labels_train = np.zeros(len(paths['train']), dtype='int32')
for k, path in enumerate(paths['train']):
class_name = os.path.basename(os.path.dirname(path))
labels_train[k] = class_names.index(class_name)
def load(subset='train'):
"""
Load all images into memory for faster processing
"""
images = np.empty(len(paths[subset]), dtype='object')
for k, path in enumerate(paths[subset]):
img = skimage.io.imread(path, as_grey=True)
images[k] = img
return images
print "Saving train labels"
np.save("data/labels_train.npy", labels_train)
print "Gzipping train labels"
os.system("gzip data/labels_train.npy")
print "Loading train images"
images_train = load('train')
print "Saving train images"
np.save("data/images_train.npy", images_train)
del images_train
print "Gzipping train images"
os.system("gzip data/images_train.npy")
print "Loading test images"
images_test = load('test')
np.save("data/images_test.npy", images_test)
del images_test
print "Gzipping test images"
os.system("gzip data/images_test.npy")
print "Done"