-
Notifications
You must be signed in to change notification settings - Fork 211
/
random_projection.py
203 lines (169 loc) Β· 6.47 KB
/
random_projection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# -*- coding: utf-8 -*-
from __future__ import print_function
from evaluate import evaluate_class
from DB import Database
from color import Color
from daisy import Daisy
from edge import Edge
from gabor import Gabor
from HOG import HOG
from vggnet import VGGNetFeat
from resnet import ResNetFeat
from sklearn.random_projection import johnson_lindenstrauss_min_dim
from sklearn import random_projection
import numpy as np
import itertools
import os
feat_pools = ['color', 'daisy', 'edge', 'gabor', 'hog', 'vgg', 'res']
keep_rate = 0.25
project_type = 'sparse'
# result dir
result_dir = 'result'
if not os.path.exists(result_dir):
os.makedirs(result_dir)
class RandomProjection(object):
def __init__(self, features, keep_rate=keep_rate, project_type=project_type):
assert len(features) > 0, "need to give at least one feature!"
self.features = features
self.keep_rate = keep_rate
self.project_type = project_type
self.samples = None
def make_samples(self, db, verbose=False):
if verbose:
print("Use features {}, {} RandomProject, keep {}".format(" & ".join(self.features), self.project_type, self.keep_rate))
if self.samples == None:
feats = []
for f_class in self.features:
feats.append(self._get_feat(db, f_class))
samples = self._concat_feat(db, feats)
samples, _ = self._rp(samples)
self.samples = samples # cache the result
return self.samples
def check_random_projection(self):
''' check if current smaple can fit to random project
return
a boolean
'''
if self.samples == None:
feats = []
for f_class in self.features:
feats.append(self._get_feat(db, f_class))
samples = self._concat_feat(db, feats)
samples, flag = self._rp(samples)
self.samples = samples # cache the result
return True if flag else False
def _get_feat(self, db, f_class):
if f_class == 'color':
f_c = Color()
elif f_class == 'daisy':
f_c = Daisy()
elif f_class == 'edge':
f_c = Edge()
elif f_class == 'gabor':
f_c = Gabor()
elif f_class == 'hog':
f_c = HOG()
elif f_class == 'vgg':
f_c = VGGNetFeat()
elif f_class == 'res':
f_c = ResNetFeat()
return f_c.make_samples(db, verbose=False)
def _concat_feat(self, db, feats):
samples = feats[0]
delete_idx = []
for idx in range(len(samples)):
for feat in feats[1:]:
feat = self._to_dict(feat)
key = samples[idx]['img']
if key not in feat:
delete_idx.append(idx)
continue
assert feat[key]['cls'] == samples[idx]['cls']
samples[idx]['hist'] = np.append(samples[idx]['hist'], feat[key]['hist'])
for d_idx in sorted(set(delete_idx), reverse=True):
del samples[d_idx]
if delete_idx != []:
print("Ignore %d samples" % len(set(delete_idx)))
return samples
def _to_dict(self, feat):
ret = {}
for f in feat:
ret[f['img']] = {
'cls': f['cls'],
'hist': f['hist']
}
return ret
def _rp(self, samples):
feats = np.array([s['hist'] for s in samples])
eps = self._get_eps(n_samples=feats.shape[0], n_dims=feats.shape[1])
if eps == -1:
import warnings
warnings.warn(
"Can't fit to random projection with keep_rate {}\n".format(self.keep_rate), RuntimeWarning
)
return samples, False
if self.project_type == 'gaussian':
transformer = random_projection.GaussianRandomProjection(eps=eps)
elif self.project_type == 'sparse':
transformer = random_projection.SparseRandomProjection(eps=eps)
feats = transformer.fit_transform(feats)
assert feats.shape[0] == len(samples)
for idx in range(len(samples)):
samples[idx]['hist'] = feats[idx]
return samples, True
def _get_eps(self, n_samples, n_dims, n_slice=int(1e4)):
new_dim = n_dims * self.keep_rate
for i in range(1, n_slice):
eps = i / n_slice
jl_dim = johnson_lindenstrauss_min_dim(n_samples=n_samples, eps=eps)
if jl_dim <= new_dim:
print("rate %.3f, n_dims %d, new_dim %d, dims error rate: %.4f" % (self.keep_rate, n_dims, jl_dim, ((new_dim-jl_dim) / new_dim)) )
return eps
return -1
def evaluate_feats(db, N, feat_pools=feat_pools, keep_rate=keep_rate, project_type=project_type, d_type='d1', depths=[None, 300, 200, 100, 50, 30, 10, 5, 3, 1]):
result = open(os.path.join(result_dir, 'feature_reduction-{}-keep{}-{}-{}feats.csv'.format(project_type, keep_rate, d_type, N)), 'w')
for i in range(N):
result.write("feat{},".format(i))
result.write("depth,distance,MMAP")
combinations = itertools.combinations(feat_pools, N)
for combination in combinations:
fusion = RandomProjection(features=list(combination), keep_rate=keep_rate, project_type=project_type)
if fusion.check_random_projection():
for d in depths:
APs = evaluate_class(db, f_instance=fusion, d_type=d_type, depth=d)
cls_MAPs = []
for cls, cls_APs in APs.items():
MAP = np.mean(cls_APs)
cls_MAPs.append(MAP)
r = "{},{},{},{}".format(",".join(combination), d, d_type, np.mean(cls_MAPs))
print(r)
result.write('\n'+r)
print()
result.close()
if __name__ == "__main__":
db = Database()
# evaluate features single-wise
evaluate_feats(db, N=1, d_type='d1', keep_rate=keep_rate, project_type=project_type)
# evaluate features double-wise
evaluate_feats(db, N=2, d_type='d1', keep_rate=keep_rate, project_type=project_type)
# evaluate features triple-wise
evaluate_feats(db, N=3, d_type='d1', keep_rate=keep_rate, project_type=project_type)
# evaluate features quadra-wise
evaluate_feats(db, N=4, d_type='d1', keep_rate=keep_rate, project_type=project_type)
# evaluate features penta-wise
evaluate_feats(db, N=5, d_type='d1', keep_rate=keep_rate, project_type=project_type)
# evaluate features hexa-wise
evaluate_feats(db, N=6, d_type='d1', keep_rate=keep_rate, project_type=project_type)
# evaluate features hepta-wise
evaluate_feats(db, N=7, d_type='d1', keep_rate=keep_rate, project_type=project_type)
# evaluate color feature
d_type = 'd1'
depth = 30
fusion = RandomProjection(features=['color'], keep_rate=keep_rate, project_type=project_type)
APs = evaluate_class(db, f_instance=fusion, d_type=d_type, depth=depth)
cls_MAPs = []
for cls, cls_APs in APs.items():
MAP = np.mean(cls_APs)
print("Class {}, MAP {}".format(cls, MAP))
cls_MAPs.append(MAP)
print("MMAP", np.mean(cls_MAPs))