Skip to content

Commit

Permalink
Merge branch 'main' of github.com:SDM-TIB/InterpretME into main
Browse files Browse the repository at this point in the history
  • Loading branch information
yashrajchudasama26 committed Aug 25, 2022
2 parents 3d92d69 + beaf03c commit efd9339
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 68 deletions.
4 changes: 2 additions & 2 deletions InterpretME/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def binary_classification(sampled_data, sampled_target, imp_features, cross_vali
for f in range(important_features_size):
important_features.add(X.columns.values[indices[f]])

data = plot_feature_importance(estimator.feature_importances_, X.columns, model, st)
data = plot_feature_importance(estimator.feature_importances_, X.columns)
results['feature_importance'] = data

# Taking important features
Expand Down Expand Up @@ -401,7 +401,7 @@ def multiclass(sampled_data, sampled_target, imp_features, cv, classes, st, lime
for f in range(important_features_size):
important_features.add(X.columns.values[indices[f]])

data = plot_feature_importance(estimator.feature_importances_, X.columns, model, st)
data = plot_feature_importance(estimator.feature_importances_, X.columns)
results['feature_importance'] = data

# Taking important features
Expand Down
54 changes: 13 additions & 41 deletions InterpretME/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from validating_models.visualizations.classification import confusion_matrix_decomposition


def sampling(results,path):
def sampling(results, path):
"""Sampling strategy plots.
Parameters
Expand All @@ -14,23 +14,17 @@ def sampling(results,path):
path : str
Path to save plot results.
Returns
-------
"""

print("########################################################################")
print("************************* Sampling strategy ****************************")
print("########################################################################")
autopct = "%.2f"
val = results['sampling']
run = results['run_id']
file = path + f"/sampling_{results['run_id']}.png"
print("Saving sampling strategy plot to", file)
val.plot.pie(autopct=autopct)
plt.title("Sampling Strategy")
plt.savefig(path+f'/sampling_{run}.png')
plt.savefig(file)


def feature_importance(results,path):
def feature_importance(results, path):
"""
Parameters
Expand All @@ -40,15 +34,10 @@ def feature_importance(results,path):
path : str
Path to save plot results.
Returns
-------
"""
print("#####################################################################")
print("******************* Feature Importance plot *************************")
print("#####################################################################")
fi_df = results['feature_importance']
run = results['run_id']
file = path + f"/Feature Importance_{results['run_id']}.png"
print("Saving feature importance plot to", file)
# Define size of bar plot
plt.figure(figsize=(20, 15))
# Plot Searborn bar chart
Expand All @@ -57,7 +46,7 @@ def feature_importance(results,path):
plt.title('FEATURE IMPORTANCE')
plt.xlabel('FEATURE IMPORTANCE')
plt.ylabel('FEATURE NAMES')
plt.savefig(path +f'/Feature Importance_{run}.png')
plt.savefig(file)


def decision_trees(results, path):
Expand All @@ -70,16 +59,12 @@ def decision_trees(results, path):
path : str
Path to save plot results.
Returns
-------
"""
print("#####################################################################")
print("*********************** Decision Trees ******************************")
print("#####################################################################")
file = path + f"/Decision_trees_{results['run_id']}.svg"
print("Saving decision trees to", file)
vis = results['dtree']
run = results['run_id']
vis.save(path+f'/Decision_tree_{run}.svg')
vis.save(file)


def constraints_decision_trees(results, path, constraint_num):
"""
Expand All @@ -93,13 +78,8 @@ def constraints_decision_trees(results, path, constraint_num):
constraint_num : list
Number of constraints for saving plots.
Returns
-------
"""
print("#########################################################################")
print("*************************** Constraints Decision Trees ******************")
print("##########################################################################")
print("Saving constraints decision trees to", path)
run = results['run_id']
checker = results['checker']
shadow_tree = results['shadow_tree']
Expand All @@ -121,11 +101,3 @@ def constraints_decision_trees(results, path, constraint_num):
plot = constraint_viz.dtreeviz(shadow_tree, checker, constraints, coverage=True,
non_applicable_counts=non_applicable_counts)
plot.save(path + f'/constraints_validation_dtree_{run}.svg')








6 changes: 3 additions & 3 deletions LIBRARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ pipeline(path_config, sampling, cv, imp_features, test_split, model, lime_result

`pipeline()` executes the whole pipeline; including extracting data and metadata from the input KGs, validating SHACL constraints, preprocessing the data and running predictive models.
InterpretME aims at collecting metadata at each step of pipeline.
The current version of InterpretME resorts to interpretable surrogate tools like `LIME` [1].
The current version of InterpretME resorts to interpretable surrogate tools like LIME [1].
The user can provide a path to store the LIME results.
Even model performance metrics like accuracy, precision etc. are recorded as metadata.
The RDF mapping language (`RML`) is used to define mappings for the metadata collected from the predictive pipeline in order to integrate them into the **InterpretME KG**.
The RDF mapping language (RML) is used to define mappings for the metadata collected from the predictive pipeline in order to integrate them into the **InterpretME KG**.
The RML mappings are used by the SDM-RDFizer [2], an efficient RML engine for creating knowledge graphs, to semantify the metadata.
The function `pipeline()` returns results from the pipeline which are used later in traceability of a target entity.

Expand Down Expand Up @@ -131,4 +131,4 @@ A Python dictionary following the SPARQL protocol with the query result.

[2] E. Iglesias, S. Jozashoori, D. Chaves-Fraga, D. Collarana and M.-E. Vidal. SDM-RDFizer: An RML Interpreter for the Efficient Creation of RDF Knowledge Graphs. In: CIKM ’20:Proceedings of the 29th ACM International Conference on Information & Knowledge Management, ACM, New York, NY,USA, 2020. DOI: [10.1145/3340531.3412881](https://dl.acm.org/doi/pdf/10.1145/3340531.3412881).

[3] P.D. Rohde. DeTrusty v0.6.1, August 2022. DOI: [10.5281/zenodo.6998001](https://doi.org/10.5281/zenodo.6998001).
[3] P.D. Rohde. DeTrusty v0.6.1, August 2022. DOI: [10.5281/zenodo.6998001](https://doi.org/10.5281/zenodo.6998001).
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
include README.md
include InterpretME/mappings/*.ttl
include LIBRARY.md
include InterpretME/mappings/*.ttl
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
[![Latest Release](http://img.shields.io/github/release/SDM-TIB/InterpretME.svg?logo=github)](https://github.com/SDM-TIB/InterpretME/releases)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)

[![Python Versions](https://img.shields.io/pypi/pyversions/InterpretME)](https://pypi.org/project/InterpretME)
[![Package Format](https://img.shields.io/pypi/format/InterpretME)](https://pypi.org/project/InterpretME)
[![Package Status](https://img.shields.io/pypi/status/InterpretME)](https://pypi.org/project/InterpretME)
[![Package Version](https://img.shields.io/pypi/v/InterpretME)](https://pypi.org/project/InterpretME)

# InterpretME

![InterpretME Architecture](https://github.com/raw/SDM-TIB/InterpretME/main/images/architecture.png "InterpretME Architecture")
Expand Down
41 changes: 21 additions & 20 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from setuptools import find_packages, setup
from setuptools import setup

with open("README.md", "r", encoding="utf8") as fh:
with open("LIBRARY.md", "r", encoding="utf8") as fh:
long_description = fh.read()

setup(
name='InterpretME',
packages=['InterpretME'],
version='1.0.0',
version='1.1.0',
description='An interpretable machine learning pipeline over knowledge graphs',
long_description=long_description,
long_description_content_type="text/markdown",
Expand All @@ -26,21 +26,22 @@
'Operating System :: OS Independent'
],
python_requires='>=3.8, <3.10',
install_requires=['pandas>=1.4.1',
'imbalanced-learn>=0.9.0',
'lime>=0.2.0',
'pydotplus>=2.0.2',
'svglib>=1.2.1',
'colour>=0.1.5',
'matplotlib<=3.3.4',
'rdflib<=6.1.1',
'seaborn>=0.11.2',
'numpy>=1.21.6',
'dtreeviz>=1.3.0',
'python-slugify>=6.0.0',
'requests>=2.27.0',
'rdfizer>=4.5.4',
'Detrusty>=0.6.1',
'validating-models>=0.9.0'
]
install_requires=[
'pandas>=1.4.1',
'imbalanced-learn>=0.9.0',
'lime>=0.2.0',
'pydotplus>=2.0.2',
'svglib>=1.2.1',
'colour>=0.1.5',
'matplotlib<=3.3.4',
'rdflib<=6.1.1',
'seaborn>=0.11.2',
'numpy>=1.21.6',
'dtreeviz>=1.3.0',
'python-slugify>=6.0.0',
'requests>=2.27.0',
'rdfizer>=4.5.4',
'Detrusty>=0.6.1',
'validating-models>=0.9.0'
]
)

0 comments on commit efd9339

Please sign in to comment.