-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluate_gap.py
85 lines (84 loc) · 4.14 KB
/
evaluate_gap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import tensorflow as tf
import coref_model_original as cm
import coref_model_adv as cm_adv
import util
import spacy
import os
import json
from demo import make_predictions
from gap_coreference.gap_scorer import Annotation, read_annotations, calculate_scores, make_scorecard
import sys
if __name__ == '__main__':
config = util.initialize_from_env()
config['context_embeddings']['path'] = config['context_embeddings_full']['path']
adv = False
if len(sys.argv) >= 6 and sys.argv[5].lower() == 'adv':
adv = True
if adv:
model = cm_adv.CorefModel(config)
else:
model = cm.CorefModel(config)
saver = tf.train.Saver()
f = open(sys.argv[2])
lines = f.readlines()
nlp = spacy.load('en')
system_annotations = {}
output_file = open(sys.argv[3], 'w+')
output_json_file = open(sys.argv[4], 'w+')
with tf.Session() as sess:
if adv:
saver.restore(sess, os.path.join(config['adv_log_root'], 'final', 'model.max.ckpt'))
else:
model.restore(sess)
for j, line in enumerate(lines[1:]):
parts = line.split('\t')
example_id = parts[0].strip()
text = parts[1].strip()
doc = nlp(unicode(text))
sentences = [[unicode(str(w)) for w in sent] for sent in doc.sents]
example = {'sentences': sentences, 'doc_key': 'nw', 'speakers': [['' for _ in sent] for sent in doc.sents], 'clusters': []}
result = make_predictions(text, model, sess, example)
words = util.flatten(result['sentences'])
c = 0
nameA = parts[4].strip()
nameA_offset = int(parts[5].strip())
nameB = parts[7].strip()
nameB_offset = int(parts[8].strip())
pronoun_char_offset = int(parts[3].strip())
pronoun_index = None
nameA_index = None
nameB_index = None
for k, token in enumerate(doc):
if token.idx == pronoun_char_offset or (pronoun_index is None and k+1 < len(doc) and doc[k+1].idx > pronoun_char_offset):
pronoun_index = token.i
elif token.idx == nameA_offset or (nameA_index is None and (k+1 >= len(doc) or doc[k+1].idx > nameA_offset)):
nameA_index = token.i
elif token.idx == nameB_offset or (nameB_index is None and (k+1 >= len(doc) or doc[k+1].idx > nameB_offset)):
nameB_index = token.i
nameA_end_index = nameA_index+len(nameA.split())-1
nameB_end_index = nameB_index+len(nameB.split())-1
clusterA = None
clusterB = None
annotation = Annotation()
for cluster in result['predicted_clusters']:
for mention in cluster:
if doc[nameA_index:nameA_end_index+1].root.i == doc[mention[0]:mention[1]+1].root.i or doc[nameA_index:nameA_end_index+1].root.head.i == doc[mention[0]:mention[1]+1].root.i:
clusterA = cluster
if doc[nameB_index:nameB_end_index+1].root.i == doc[mention[0]:mention[1]+1].root.i or doc[nameB_index:nameB_end_index+1].root.head.i == doc[mention[0]:mention[1]+1].root.i:
clusterB = cluster
if clusterA is None:
annotation.name_a_coref = False
else:
annotation.name_a_coref = ((pronoun_index, pronoun_index) in set(m for m in clusterA))
if clusterB is None:
annotation.name_b_coref = False
else:
annotation.name_b_coref = ((pronoun_index, pronoun_index) in set(m for m in clusterB))
system_annotations[example_id] = annotation
output_file.write(example_id+'\t'+str(annotation.name_a_coref)+'\t'+str(annotation.name_b_coref)+'\n')
json_output = {'example_id': example_id, 'sentences': sentences, 'predicted_clusters': result['predicted_clusters']}
output_json_file.write(json.dumps(json_output)+'\n')
output_file.close()
output_json_file.close()
gold_annotations = read_annotations(sys.argv[2], True)
print(make_scorecard(calculate_scores(gold_annotations, system_annotations)))