-
Notifications
You must be signed in to change notification settings - Fork 15
/
tools.py
71 lines (61 loc) · 1.91 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
import random
def shuffle(lol,seed):
'''
lol :: list of list as input
seed :: seed the shuffling
shuffle inplace each list in the same order
'''
for l in lol:
random.seed(seed)
random.shuffle(l)
def contextwin(l, win):
'''
win :: int corresponding to the size of the window
given a list of indexes composing a sentence
it will return a list of list of indexes corresponding
to context windows surrounding each word in the sentence
'''
assert (win % 2) == 1
assert win >=1
l = list(l)
lpadded = win/2 * [0] + l + win/2 * [0]
out = [ lpadded[i:i+win] for i in range(len(l)) ]
assert len(out) == len(l)
return out
def contextwin_2(ls,win):
assert (win % 2) == 1
assert win >=1
outs=[]
for l in ls:
outs.append(contextwin(l,win))
return outs
def getKeyphraseList(l):
res, now= [], []
for i in xrange(len(l)):
if l[i] != 0:
now.append(str(i))
if l[i] == 0 or i == len(l) - 1:
if len(now) != 0:
res.append(' '.join(now))
now = []
return set(res)
def conlleval(predictions, groundtruth, file):
assert len(predictions) == len(groundtruth)
res = {}
all_cnt, good_cnt = len(predictions), 0
p_cnt, r_cnt, pr_cnt = 0, 0, 0
for i in range(all_cnt):
# print i
if all(predictions[i][0:len(groundtruth[i])] == groundtruth[i]) == True:
good_cnt += 1
pKeyphraseList = getKeyphraseList(predictions[i][0:len(groundtruth[i])])
gKeyphraseList = getKeyphraseList(groundtruth[i])
p_cnt += len(pKeyphraseList)
r_cnt += len(gKeyphraseList)
pr_cnt += len(pKeyphraseList & gKeyphraseList)
res['a'] = 1.0*good_cnt/all_cnt
res['p'] = 1.0*good_cnt/p_cnt
res['r'] = 1.0*good_cnt/r_cnt
res['f'] = 2.0*res['p']*res['r']/(res['p']+res['r'])
return res