forked from tsunghao-huang/SVM-Fruit-Image-Classifier
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
208 lines (200 loc) · 8.27 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import random
from sklearn import svm
import numpy as np
import os
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from skimage import io, filters
from skimage.transform import resize
from skimage.feature import hog, ORB, CENSURE, corner_peaks, corner_harris, BRIEF
from sklearn.externals import joblib
#enter fruit names here: Choose from apple, banana, pineapple, kiwi
#example:
classes = ['banana', 'pineapple']
def main():
DataSet = []
LabelSet = []
lengthV = []
trainPaths = ['./fruit/'+c+ '_train/' for c in classes ]
testPaths = ['./fruit/'+c+' test/' for c in classes ]
resList = []
boolList = []
pos = 0
ind = 0
#if you wish to automatically perform both feature selection optimzation and svm optimization at the same time
#comment out next line and comment in section above
#Warning: Very long runtime for algorithm because of grid search
useList = [True, True, True, True]
#print(useList)
for c in range(len(classes)):
#get label for features to be added
className = classes[c]
#get file path for folder with images
path = trainPaths[c]
#initialize feature detectors/extractors
#Censure extractor
detector = CENSURE()
#ORB extractor
detector2 = ORB(n_keypoints=50)
#get all file names from the folder
files = os.listdir(path)
nfiles = len(files)
#repeat for each file
for i in range(nfiles):
#initialize feature vector as empty list
featureVector = []
infile = files[i]
#read image as grayscale numpy.ndarray
img = io.imread(path+infile, as_grey=True)
#get histogram for grayscale value intensity
hist = np.histogram(img, bins=256)
#resize image
img = resize(img, (400,400))
#extract features but do not yet add them to feature vector
detector2.detect_and_extract(img)
#extract HOG features, add them to featurevector
a = fd = hog(img, orientations=9, pixels_per_cell=(32, 32),
cells_per_block=(1,1), visualise=False)
#add histogramm to featurevector
for h in hist:
fd = np.append(fd, h)
#if corresponding boolean in uselist is true add features to featureVector --> Feature selection happens here
if(useList[0]):
detector.detect(img)
fd = np.append(fd, [np.array(detector.keypoints).flatten()])
if(useList[1]):
fd = np.append(fd, detector2.keypoints)
if(useList[2]):
fd = np.append(fd, edgeExtract(img, 100))
if(useList[3]):
corners = corner_peaks(corner_harris(img),min_distance=1)
fd = np.append(fd, corners)
#get length of featurevector for later operations
lengthV.append(len(fd))
#add featureVector list to dataset that is fed into svm
DataSet.append(fd)
#get label name
ind = classes.index(className)
#add label to label dataset that is fed into svm
LabelSet.append(ind)
#get length of biggest sized featurevector
max = np.amax(lengthV)
lengthV = []
DataSet2 = []
#pad dataset with zeroes so that all featurevectors have the same length --> important for svm
for d in DataSet:
d = np.pad(d, (0, max - len(d)), 'constant')
DataSet2.append(d)
lengthV.append(len(d))
DataSet = DataSet2
#perform a grid search with maximum number of possible threads (usually 4)
if __name__=='__main__':
gridSearch(DataSet, LabelSet)
#train and examine svm with default values for comparison later
clf = svm.SVC(kernel='rbf', C=10.0, gamma=1.0000000000000001e-09)
clf.fit(DataSet, LabelSet)
joblib.dump(clf, classes[0]+' '+ classes[1]+'.pk1')
scores = cross_val_score(clf, DataSet, LabelSet, cv=10)
#print results of default svm
print(scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
#extract edge histogramm, bins number = 100
def edgeExtract(img, bins):
retVal = []
#apply vertical and horizontal sobel filters to get two histogramms, once of vertical and once of horizontal edges
#vertical
fs = filters.sobel_v(img)
#horizontal
angs = filters.sobel_h(img)
#compute histograms
lhist = np.histogram(fs,bins,normed=True,range=(0,1))
ahist = np.histogram(angs, bins,normed=True,range=(-180,180))
#fuse histograms into one list
retVal.extend(lhist[0].tolist())
retVal.extend(ahist[0].tolist())
return retVal
#Perform grid search,
# if optimum feature selection list is to be found verbalize = false
def gridSearch(DataSet, LabelSet, verbalize = True):
#define logspace/interval from which c and gamma valuest are computed and saved to a dictionary to be passed as a parameter
#c from 1e-2 to 1e10
C_range = np.logspace(-2, 10, 13)
#gamma from 1e-9 to 1e3
gamma_range = np.logspace(-9, 3, 13)
param_grid = dict(gamma=gamma_range, C=C_range)
#grid input parameter
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
#perform grid search with multiple threads for added performance
if(verbalize):
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv, n_jobs=-1)
#perform grid search with one thread and return value --> multiple threads do not work with return values
else:
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv, n_jobs=1)
#find optimal values
grid.fit(DataSet, LabelSet)
#print results of test for optimal values
if(verbalize):
print("The best parameters are %s with a score of %0.2f"
% (grid.best_params_, grid.best_score_))
else:
#return best scores for different feature selections
return grid.best_score_
#same as main function, however a sample of 100 images is drawn for each image
def selectFeatures(useList):
DataSet = []
LabelSet = []
lengthV = []
trainPaths = ['./fruit/'+c+ '_train/' for c in classes ]
testPaths = ['./fruit/'+c+' test/' for c in classes ]
for c in range(len(classes)):
className = classes[c]
path = trainPaths[c]
detector = CENSURE()
detector2 = ORB(n_keypoints=50)
detector3 = BRIEF(patch_size=49)
files = os.listdir(path)
#sample
files = random.sample(files, 100)
nfiles = len(files)
for i in range(nfiles):
featureVector = []
infile = files[i]
img = io.imread(path+infile, as_grey=True)
hist = np.histogram(img, bins=256)
img = resize(img, (400,400))
detector2.detect_and_extract(img)
detector.detect(img)
a = fd = hog(img, orientations=9, pixels_per_cell=(32, 32),
cells_per_block=(1,1), visualise=False)
for h in hist:
fd = np.append(fd, h)
if(useList[0]):
fd = np.append(fd, [np.array(detector.keypoints).flatten()])
if(useList[1]):
fd = np.append(fd, detector2.keypoints)
if(useList[2]):
fd = np.append(fd, edgeExtract(img, 100))
l1 = len(fd)
corners = corner_peaks(corner_harris(img),min_distance=1)
if(useList[3]):
fd = np.append(fd, corners)
lengthV.append(len(fd))
DataSet.append(fd)
ind = classes.index(className)
LabelSet.append(ind)
max = np.amax(lengthV)
lengthV = []
DataSet2 = []
for d in DataSet:
d = np.pad(d, (0, max - len(d)), 'constant')
DataSet2.append(d)
lengthV.append(len(d))
DataSet = DataSet2
res = 0
#perform gridsearch with one thread
if __name__=='__main__':
res = gridSearch(DataSet, LabelSet, False)
return res
main()