-
Notifications
You must be signed in to change notification settings - Fork 4
/
autogluon_classify.py
176 lines (153 loc) · 7.39 KB
/
autogluon_classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# Script with Autogluon modules
import pandas as pd
import os, sys
import matplotlib
import matplotlib.pyplot as plt
import loaders_multilabel as mll
import time
from autogluon.tabular import TabularDataset
from autogluon.tabular import TabularPredictor as task
from autogluon.core.utils import infer_problem_type
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import confusion_matrix
## ------------------------------ Training ------------------------------#
# Trains multiple models and ensembles them. Fits NN and tress ensembles.
# - Size of validation data determines VARIANCE of performance estimates
# - Bias: no. of modeling decisions based on validation performance
# K fold CV better for smaller datasets.
# AG ignores classes with < 10(changeable threshold) instances
# NOTE: You can specify proportion of validation set manually: fit(): tuning_data=validation_data
# dataf: train features with target column; target: name of target column
def train_main(dataf, targetcol, malinst, hostfts):
agdir = os.getcwd()+'/AGmodels'
dir = agdir+"/"+str(malinst)+"_"+str(hostfts)+"/"
if not os.path.exists(dir):
os.system("mkdir "+dir)
predictor = task(label=targetcol, path=dir, eval_metric='balanced_accuracy').fit(dataf, verbosity=4)
return predictor
# Multi layer stacking takes predictions of base models and feeds to stack models
# AG will auto choose k= 10 fold cv, n=20 bagging repeats,
# L: 2 layers of models in stack followed by weighted-ensemble (higher weight for the model that performed well);
# Aggregate model predictions based on model weights and produce final prediction
def train_multilayerstacking(traindf, target,malinst, hostfts):
dir = os.getcwd()+'/AGmodels/stacked/'+str(malinst)+"_"+str(hostfts)+"/"
if not os.path.exists(dir):
os.system("mkdir "+dir)
predstack = task(label=target, path=dir, eval_metric='balanced_accuracy').fit(train_data= traindf, auto_stack=True, verbosity=3)
return predstack
# ------------------------- Test functions ------------------------------#
# Prediction is done with model with best validation performance. AG creates
# ensembles to create maximized validation performance.
def test_main(Xtest, ytest, pred, testdf, traindf, calcftimpo=False):
modelperf = pred.leaderboard(testdf, silent= True)
print("[*]Model performance breakdown on Test data:")
print(modelperf)
ypred = pred.predict(Xtest)
ypredproba = pred.predict_proba(Xtest)
perf = pred.evaluate_predictions(y_true=ytest, y_pred=ypred, auxiliary_metrics= True)
print("[*]Predictions: ", ypred)
print("[*]Confidence in predictions:\n")
print(pd.DataFrame(ypredproba, columns=pred.class_labels))
# Each model score
print("Perf: ", perf)
print("Getting confusion matrix.....")
cmatrix = confusion_matrix(ytest, ypred).ravel().tolist()
print(cmatrix)
auc_score = roc_auc_score(ytest, ypredproba.iloc[:, 1])
print("AUC score for best model: ", auc_score)
if calcftimpo:
ftimpo = None
ftimpo = pred.feature_importance(traindf)
print("Feature Importance on test data: ", ftimpo)
else:
ftimpo = None
bestmodel = pred.get_model_best()
return modelperf, ftimpo, cmatrix, ytest, ypredproba, bestmodel, perf, auc_score
def test_NN(Xtest, ytest, pred):
ypred = pred.predict(Xtest, model="NeuralNetClassifier")
perf = pred.evaluate_predictions(y_true=ytest, y_pred=ypred, auxiliary_metrics= True)
print("Predictions: ", ypred)
return
# Validation performance
def result_summary(pred):
results = pred.fit_summary()
print("[*]Summary of models fitting: ", results)
return
def test_stack(Xtest, ytest, predstack, testdf, traindf, calcftimpo=False):
ypred = predstack.predict(Xtest)
ypredproba = predstack.predict_proba(Xtest)
perf = predstack.evaluate_predictions(y_true=ytest, y_pred=ypred, auxiliary_metrics= True)
print("[*]Predictions: ", ypred)
test_perf = predstack.leaderboard(testdf, silent=True)
print("$$$$$$$$ RESULT STACKING $$$$$$$$\n", test_perf)
ftimpo = None
if calcftimpo:
ftimpo = predstack.feature_importance(traindf)
print("Feature Importance on test data: ", ftimpo)
auc_score = roc_auc_score(ytest, ypredproba.iloc[:, 1])
cmatrix = confusion_matrix(ytest, ypred).ravel().tolist()
print("Confusion matrix stacked: ", cmatrix)
print("AUC using stacked model: ", auc_score)
return test_perf, ftimpo, cmatrix, auc_score
def evaluate(Xtest, ytest, mpath):
pred = task.load(mpath)
allmodels = pred.get_model_names()
bestmodel = pred.get_model_best()
print("All model names: ", allmodels,"\nBest performing model(on validation set): ",bestmodel)
print("Ytest: ", ytest[0:10])
ypred = pred.predict(Xtest)
ypredproba = pred.predict_proba(Xtest)
print("Ypred: ", ypred[0:10])
print("Ypredprob: ", ypredproba[0:10])
perf = pred.evaluate_predictions(y_true=ytest, y_pred=ypredproba, auxiliary_metrics= True)
tn, fp, fn, tp = confusion_matrix(ytest, ypred).ravel()
fpr = fp/(fp+tn)
tpr = tp/(tp+fn)
print("tn:%d fp:%d fn:%d tp:%d"%(tn, fp, fn, tp))
print("FPR: ", fpr*100)
print("TPR: ", tpr*100)
ap = average_precision_score(ytest, ypredproba.iloc[:, 1])
print("Average Precision score: ", ap)
print("Autogluon evaluation: ", perf)
print("Len ytest/pred/predproba: ", ytest.size, ypred.size, ypredproba.size)
return [ypred, ypredproba]
def zerodaytest(datadf, targetcol, malinst, hostfts, modelpath):
print("Loading model trained with dataset: D",malinst, "HostFts?: ", hostfts)
# split data to train/test
featdf = datadf.iloc[:,:-1].copy() # drop target column
labels = datadf.iloc[:,-1].copy()
print("Zero day testing DF (after dropping labels): ", featdf)
print("Model: ", modelpath)
#print(featdf.size, labels.size)
time.sleep(5)
print(":ZERODAY TEST RESULTS:")
[ypred, ypredproba] = evaluate(featdf, labels, modelpath)
return
# Invokes AutoGluon training and testing modules with respective dataframes
def main_ag(traindf, testdf, targetcol, malinst, hostfts):
# Displaying dataframe info
Xtest = testdf.iloc[:,:-1].copy()
ytest = testdf.iloc[:,-1].copy()
maltrain = traindf[traindf['target'] == 1].shape
maltest = testdf[testdf['target'] == 1].shape
bentrain = traindf[traindf['target'] == 0].shape
bentest = testdf[testdf['target'] == 0].shape
print("Train df (w/ target): ",traindf, traindf.shape)
print("Train mal: ", maltrain, "Train ben: ", bentrain)
print("Test df (w/ target): ",testdf, testdf.shape)
print("Test mal: ", maltest, "Test ben: ", bentest)
time.sleep(2)
# Training binary classifiers: 8 base models, 2 DL models
predictor = train_main(traindf, targetcol, malinst, hostfts)
predstack = train_multilayerstacking(traindf, targetcol, malinst, hostfts)
# Testing binary classifiers
print("###################~Testing Trained Models (30% PCAPs)~############################")
res1, fimp1, cmatrix, ytest, ypred_proba, bestmodel, perf, auc_score = test_main(Xtest, ytest, predictor, testdf, traindf)
# Uncomment for test results with feature importance (longer run time)
##res1, fimp1, cmatrix, ytest, ypred_proba, bestmodel, perf, auc_score = test_main(Xtest, ytest, predictor, testdf, traindf, True)
print("####################Stacking & Weighted Ensemble Testing###########################")
res2, fimp2, cmatrixstacked, aucstacked = test_stack(Xtest, ytest, predstack, testdf, traindf)
# With feature importance
##res2, fimp2, cmatrixstacked, aucstacked = test_stack(Xtest, ytest, predstack, testdf, traindf, True)
return [res1, res2, fimp1, fimp2, cmatrix, bestmodel, perf, auc_score]