-
Notifications
You must be signed in to change notification settings - Fork 6
/
eisner.py
104 lines (89 loc) · 4.47 KB
/
eisner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
Source: https://github.com/LxMLS/lxmls-toolkit/blob/master/lxmls/parsing/dependency_decoder.py
Edited slightly by Daan van Stigt.
"""
import numpy as np
def eisner(scores):
"""Parse using Eisner's algorithm.
The matrix follows the following convention:
scores[i][j] = p(i=head, j=dep) = p(i --> j)
"""
rows, collumns = scores.shape
assert rows == collumns, 'scores matrix must be square'
num_words = rows - 1 # Number of words (excluding root).
# Initialize CKY table.
complete = np.zeros([num_words+1, num_words+1, 2]) # s, t, direction (right=1).
incomplete = np.zeros([num_words+1, num_words+1, 2]) # s, t, direction (right=1).
complete_backtrack = -np.ones([num_words+1, num_words+1, 2], dtype=int) # s, t, direction (right=1).
incomplete_backtrack = -np.ones([num_words+1, num_words+1, 2], dtype=int) # s, t, direction (right=1).
incomplete[0, :, 0] -= np.inf
# Loop from smaller items to larger items.
for k in range(1, num_words+1):
for s in range(num_words-k+1):
t = s + k
# First, create incomplete items.
# left tree
incomplete_vals0 = complete[s, s:t, 1] + complete[(s+1):(t+1), t, 0] + scores[t, s]
incomplete[s, t, 0] = np.max(incomplete_vals0)
incomplete_backtrack[s, t, 0] = s + np.argmax(incomplete_vals0)
# right tree
incomplete_vals1 = complete[s, s:t, 1] + complete[(s+1):(t+1), t, 0] + scores[s, t]
incomplete[s, t, 1] = np.max(incomplete_vals1)
incomplete_backtrack[s, t, 1] = s + np.argmax(incomplete_vals1)
# Second, create complete items.
# left tree
complete_vals0 = complete[s, s:t, 0] + incomplete[s:t, t, 0]
complete[s, t, 0] = np.max(complete_vals0)
complete_backtrack[s, t, 0] = s + np.argmax(complete_vals0)
# right tree
complete_vals1 = incomplete[s, (s+1):(t+1), 1] + complete[(s+1):(t+1), t, 1]
complete[s, t, 1] = np.max(complete_vals1)
complete_backtrack[s, t, 1] = s + 1 + np.argmax(complete_vals1)
value = complete[0][num_words][1]
heads = -np.ones(num_words + 1, dtype=int)
backtrack_eisner(incomplete_backtrack, complete_backtrack, 0, num_words, 1, 1, heads)
value_proj = 0.0
for m in range(1, num_words+1):
h = heads[m]
value_proj += scores[h, m]
return heads
def backtrack_eisner(incomplete_backtrack, complete_backtrack, s, t, direction, complete, heads):
"""
Backtracking step in Eisner's algorithm.
- incomplete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position,
an end position, and a direction flag (0 means left, 1 means right). This array contains
the arg-maxes of each step in the Eisner algorithm when building *incomplete* spans.
- complete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position,
an end position, and a direction flag (0 means left, 1 means right). This array contains
the arg-maxes of each step in the Eisner algorithm when building *complete* spans.
- s is the current start of the span
- t is the current end of the span
- direction is 0 (left attachment) or 1 (right attachment)
- complete is 1 if the current span is complete, and 0 otherwise
- heads is a (NW+1)-sized numpy array of integers which is a placeholder for storing the
head of each word.
"""
if s == t:
return
if complete:
r = complete_backtrack[s][t][direction]
if direction == 0:
backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 0, 1, heads)
backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 0, 0, heads)
return
else:
backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 0, heads)
backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 1, 1, heads)
return
else:
r = incomplete_backtrack[s][t][direction]
if direction == 0:
heads[s] = t
backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads)
backtrack_eisner(incomplete_backtrack, complete_backtrack, r+1, t, 0, 1, heads)
return
else:
heads[t] = s
backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads)
backtrack_eisner(incomplete_backtrack, complete_backtrack, r+1, t, 0, 1, heads)
return