-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathconvnet_extract.py
More file actions
100 lines (78 loc) · 3.73 KB
/
convnet_extract.py
File metadata and controls
100 lines (78 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
'''
Extract features from the pre-trained VGG.
Adapted from https://gist.github.com/fchollet/f35fbc80e066a49d65f1688a7e99f069
'''
import numpy as np
import os
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.utils import to_categorical
from data import DataGenerator
import utils
import argparse
import cPickle as pkl
from tqdm import tqdm
def get_create_results_dir(config_name, base_results_dir):
results_dir = os.path.join(base_results_dir, config_name)
if not os.path.exists(results_dir): os.makedirs(results_dir)
return results_dir
def save_representation(features, labels, results_dir, config):
for i, label in enumerate(labels):
target_dir = results_dir
if len(label) > 1:
category, source = label
target_dir = os.path.join(results_dir, category)
else:
source = label[0]
if not os.path.exists(target_dir): os.makedirs(target_dir)
features_file = '{}.pkl'.format(source)
filename = os.path.join(target_dir, features_file)
with open(filename, 'w') as f:
pkl.dump(features[i], f)
def save_bottleneck_features(config_name, data_dir, base_results_dir,
batch_size, input_shape, sample_step,
max_per_class=None, index_start=0,
classes=None, **config):
generator = DataGenerator(batch_size=batch_size,
return_sources=True,
fn_preprocess=preprocess_input,
shuffle=False, sample_step=sample_step,
target_size=input_shape[:2],
classes=classes,
index_start=index_start,
max_per_class=max_per_class)
generator = generator.flow_from_directory(data_dir)
output_generator = iter(generator)
# build the VGG16 network
model = VGG16(include_top=False, weights='imagenet',
input_shape=generator.data_shape)
results_dir = get_create_results_dir(config_name, base_results_dir)
n_batches = len(generator)
print('Number of batches: {}'.format(n_batches))
for i in tqdm(range(n_batches)):
X_, y_, sources = next(output_generator)
features_train = model.predict(X_, generator.batch_size)
y_batch = []
for s in sources:
path, source = os.path.split(s)
path, category = os.path.split(path)
path, data_split = os.path.split(path)
#source = '__'.join(source.split('__')[:-1])
source = source.replace('.jpg', '')
category_source = (category, source)
y_batch.append(category_source)
target_dir = os.path.join(results_dir, data_split)
save_representation(features_train, y_batch, target_dir, config)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Extract VGG features.')
parser.add_argument('config', help='experiment config name defined in settings.py')
FLAGS, unparsed = parser.parse_known_args()
#config = configs[FLAGS.config]
config_name, config = utils.get_config(vars(FLAGS))
print('\n==> Starting feature extraction: {}'.format(config['description']))
config_str = utils.get_config_str(config)
print('\n==> Using configuration:\n{}'.format(config_str))
save_bottleneck_features(config_name, config['training_data_dir'], **config)
save_bottleneck_features(config_name, config['validation_data_dir'], **config)