Skip to content

Commit

Permalink
insighs added
Browse files Browse the repository at this point in the history
  • Loading branch information
ms8909 committed Jul 3, 2020
1 parent ff95609 commit ff5c9cd
Show file tree
Hide file tree
Showing 31 changed files with 1,519 additions and 455 deletions.
Binary file modified __pycache__/explain.cpython-37.pyc
Binary file not shown.
47 changes: 42 additions & 5 deletions explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
class explain():
def __init__(self):
super(explain, self).__init__()
self.param= None
self.param= {}

# is classification function?

Expand All @@ -47,8 +47,6 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
y_variable_predict= "y_prediction"


# is classification?
is_classification= self.is_classification_given_y_array(y)

# If yes, then different shap functuions are required.
# get the shap value based on predcton and make a new dataframe.
Expand All @@ -66,10 +64,14 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
else:
prediction_col = model.predict(df.to_numpy())

# is classification?
is_classification = self.is_classification_given_y_array(prediction_col)



#shap
c = calculate_shap()
self.df_final = c.find(model, df, prediction_col, is_classification, model_name=model_name)
self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name)

#prediction col
self.df_final[y_variable_predict] = prediction_col
Expand All @@ -78,8 +80,43 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):

self.df_final[y_variable] = y


#additional inputs.
if is_classification==True:
# find and add probabilities in the dataset.
prediction_col_prob = model.predict_proba(df.to_numpy())
pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

for c in pd_prediction_col_prob.columns:
self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])

classes = []
for c in pd_prediction_col_prob.columns:
classes.append(str(c))
self.param["classes"]=classes

try:
expected_values_by_class = self.explainer.expected_value
except:
expected_values_by_class=[]
for c in range(len(classes)):
expected_values_by_class.append(1/len(classes))


self.param["expected_values"]= expected_values_by_class
else:
try:
expected_values = self.explainer.expected_value
self.param["expected_values"] = [expected_values]
except:
expected_value = [round(np.array(y).mean(),2)]
self.param["expected_values"] = expected_value


self.param["is_classification"]= is_classification

d= dashboard()
d.find(self.df_final, y_variable, y_variable_predict, mode)
d.find(self.df_final, y_variable, y_variable_predict, mode, self.param)

return True

Expand Down
Binary file modified lib/__pycache__/calculate_shap.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/dashboard.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/feature_impact.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/feature_importance.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/imports.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file added lib/__pycache__/insight_regression.cpython-37.pyc
Binary file not shown.
Binary file added lib/__pycache__/insights.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file added lib/__pycache__/plotly_css.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/plotly_graphs.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/protodash.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/rescale_numeric_feature.cpython-37.pyc
Binary file not shown.
Binary file modified lib/__pycache__/summary_plot.cpython-37.pyc
Binary file not shown.
86 changes: 86 additions & 0 deletions lib/calculate_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@



class calculate_metrics():
def __init__(self):
super(calculate_metrics, self).__init__()
self.param = None

def classification_metrics(self, y_test, model_predict):

# Acuracy_score
acuracy_score = metrics.accuracy_score(y_test, model_predict)

# Log Loss
y_probs = model.predict_proba(X_test)
LogLoss = log_loss(y_test, y_probs, labels=model.classes_)

# Explained Variance Score
explained_variancescore = explained_variance_score(y_test, model_predict)


fpr, tpr, thresholds =metrics.roc_curve(y_test, model_predict, pos_label=2)
auc = metrics.auc(fpr, tpr)

# MAE
mae = mean_absolute_error(y_test, model_predict)
# MSE
mse = mean_squared_error(y_test, model_predict)
# RMS
rms = sqrt(mse)

# Precision, Recall, F1-score, Support
report = classification_report(y_test, model_predict,output_dict=True)
report_dataframe = pd.DataFrame(report)
report_dataframe = report_dataframe.transpose()

# Confusion Matrix
matrix = confusion_matrix(y_test, model_predict)
matrix_dataframe = pd.DataFrame(matrix)

# Make DataFrames
metric = ["Accuracy Score",'Cross-Entropy Loss','Area Under Curve','MAE','MSE','RMS']
values = [acuracy_score, LogLoss, auc, mae, mse, rms]
metrics_dataframe = pd.DataFrame({'metric': metric, 'values': values})
metrics_dataframe.set_index('metric', inplace = True)



return metrics_dataframe, report_dataframe, matrix_dataframe




def regression_metrics(self, y_test, model_predict):

# #Explained variance regression score
exp_variance_score = explained_variance_score(y_test, model_predict)

#max_error metric calculates the maximum residual error.
maxerror = max_error(y_test, model_predict)

#Mean absolute error regression loss
mae = mean_absolute_error(y_test, model_predict)

#Mean squared error regression loss
mse = mean_squared_error(y_test, model_predict)

# RMSE
rmse = sqrt(mse)

# R^2 (coefficient of determination) regression score function.
r2 = r2_score(y_test, model_predict)

def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_test, model_predict)

# Make DataFrames
metric = ["Explained Variance Score",'Max Error','R squared','MAE','MSE','RMSE','MAPE']
values = [exp_variance_score, maxerror, r2, mae, mse, rmse, mape]
metrics_dataframe = pd.DataFrame({'metric': metric, 'values': values})
metrics_dataframe.set_index('metric', inplace = True)

return metrics_dataframe
71 changes: 36 additions & 35 deletions lib/calculate_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def xgboost_shap(self, model, X):
for c in shap_columns:
Y[c] = list(pd_shap[c])

return Y
return Y, explainer

def catboost_shap(self, model, df, y_variable=None):
# explain the model's predictions using SHAP
Expand Down Expand Up @@ -96,7 +96,7 @@ def kernel_shap(self, model, X_train):
for c in shap_columns:
Y[c] = list(pd_shap[c])

return Y
return Y, explainer

def kernel_shap_classification(self, model, X_train,prediction_col):
# use Kernel SHAP to explain test set predictions
Expand All @@ -118,7 +118,7 @@ def kernel_shap_classification(self, model, X_train,prediction_col):
for c in shap_columns:
Y[c] = list(pd_shap[c])

return Y
return Y, explainer

def select_row_shap_values(self, shap_values,prediction_col):

Expand Down Expand Up @@ -156,7 +156,7 @@ def randomforest_shap_classification(self, model, X,prediction_col):
Y[c] = list(pd_shap[c])


return Y
return Y, explainer


def randomforest_shap(self, model, X):
Expand All @@ -177,7 +177,7 @@ def randomforest_shap(self, model, X):
Y[c] = list(pd_shap[c])


return Y
return Y, explainer


def get_shap_values(self, x_array, model, x_variable, cat_index):
Expand All @@ -195,73 +195,74 @@ def get_shap_values(self, x_array, model, x_variable, cat_index):
def find(self, model, df,prediction_col,is_classification, model_name="xgboost"):

if model_name == "xgboost":
df2 = self.xgboost_shap(model, df)
return df2
df2 , explainer= self.xgboost_shap(model, df)
return df2, explainer

elif model_name == "lightgbm":
df2 = self.xgboost_shap(model, df)
df2 , explainer= self.xgboost_shap(model, df)

return df2
return df2, explainer

elif model_name == "catboost":
df2 = self.catboost_shap(model, df)
return df2
explainer= None
return df2, explainer


elif model_name == "randomforest":
if is_classification:
df2 = self.randomforest_shap_classification(model, df, prediction_col)
df2 , explainer= self.randomforest_shap_classification(model, df, prediction_col)
else:
df2 = self.randomforest_shap(model, df)
return df2
df2 , explainer= self.randomforest_shap(model, df)
return df2, explainer

elif model_name == "svm":
if is_classification:
df2 = self.kernel_shap_classification(model, df,prediction_col)
df2 , explainer= self.kernel_shap_classification(model, df,prediction_col)
else:
df2 = self.kernel_shap(model, df)
return df2
df2, explainer = self.kernel_shap(model, df)
return df2, explainer

elif model_name == "knn":
if is_classification:
df2 = self.kernel_shap_classification(model, df,prediction_col)
df2, explainer = self.kernel_shap_classification(model, df,prediction_col)
else:
df2 = self.kernel_shap(model, df)
return df2
df2 , explainer= self.kernel_shap(model, df)
return df2, explainer

elif model_name == "logisticregression":
if is_classification:
df2 = self.kernel_shap_classification(model, df,prediction_col)
df2 , explainer= self.kernel_shap_classification(model, df,prediction_col)
else:
df2 = self.kernel_shap(model, df)
return df2
df2, explainer = self.kernel_shap(model, df)
return df2, explainer

elif model_name == "decisiontree":
if is_classification:
df2 = self.kernel_shap_classification(model, df,prediction_col)
df2, explainer = self.kernel_shap_classification(model, df,prediction_col)
else:
df2 = self.kernel_shap(model, df)
return df2
df2, explainer = self.kernel_shap(model, df)
return df2, explainer

elif model_name == "neuralnetwork":
if is_classification:
df2 = self.kernel_shap_classification(model, df,prediction_col)
df2, explainer = self.kernel_shap_classification(model, df,prediction_col)
else:
df2 = self.kernel_shap(model, df)
return df2
df2, explainer = self.kernel_shap(model, df)
return df2, explainer

elif model_name=="gradientboostingregressor":
df2 = self.xgboost_shap(model, df)
return df2
df2 , explainer= self.xgboost_shap(model, df)
return df2, explainer
elif "gradientboosting" in model_name:
df2 = self.xgboost_shap(model, df)
return df2
df2, explainer = self.xgboost_shap(model, df)
return df2, explainer
else:
if is_classification:
df2 = self.kernel_shap_classification(model, df,prediction_col)
df2 , explainer= self.kernel_shap_classification(model, df,prediction_col)
else:
df2 = self.kernel_shap(model, df)
return df2
df2 , explainer= self.kernel_shap(model, df)
return df2, explainer



Expand Down
Loading

0 comments on commit ff5c9cd

Please sign in to comment.