Skip to content

Commit

Permalink
Fix for #2. Added classification report
Browse files Browse the repository at this point in the history
  • Loading branch information
SanjeethKR committed Aug 15, 2020
1 parent b88d0f3 commit f98f21a
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions src/playground/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import math
# Appeding our src directory to sys path so that we can import modules.
sys.path.append('../..')
from src.tn.lib.sentimoji import get_emoji_sentiment_rank
#from src.tn.lib.sentimoji import get_emoji_sentiment_rank

nltk.download('movie_reviews')
#nltk_documents = [(list(movie_reviews.words(fileid)), category)
Expand Down Expand Up @@ -146,11 +146,17 @@ def document_ngram_feature(doc, features, n):
for ngram in doc_ngrams:
features['contains({})'.format("-".join(ngram))] = (True)

def get_classifier_metrics_report(classifier, inputset, features):
refset, guesset= [], []
for (d,c) in inputset:
refset.append(c)
guesset.append(classifier.classify(document_features(d, features)))
return classification_report(refset, guesset)

documents = load_docs("../../resources/data/tamil_dev.tsv")
random.shuffle(documents)
test_size = int(len(documents)/20.0)


feature_filters = [{'length': 1}, {'bag_of_words': 1}, {'ngram': [4]}, {'ngram': [5]}, {
'length': 1, 'ngram': [5]}, {'length': 1, 'ngram': [4]}, {'emojis': 1}, {'emojis': 1, 'ngram': [2, 3, 4]},
{'bag_of_words': 1, 'ngram': [2, 3, 4], 'length': 1, 'emojis': 1}]
Expand All @@ -160,9 +166,12 @@ def document_ngram_feature(doc, features, n):
(document_features(d, filter), c) for (d, c) in documents]
train_set, test_set = featuresets[test_size:], featuresets[:test_size]
classifier = nltk.NaiveBayesClassifier.train(train_set)
report = get_classifier_metrics_report(classifier, test_set, filter)
print("Classification report for classifier %s\n"
% (report))
# Test the classifier
print("{} -> {}". format(str(filter),
nltk.classify.accuracy(classifier, test_set)))
# print("{} -> {}". format(str(filter),
# nltk.classify.accuracy(classifier, test_set)))

# Classify a few docs and check
# for(d, c) in documents[:100]:
Expand Down

0 comments on commit f98f21a

Please sign in to comment.