-
Notifications
You must be signed in to change notification settings - Fork 41
/
model.py
164 lines (136 loc) · 7.83 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import numpy as np
import tensorflow as tf
def length(sequence):
"""
This function return the sequence length of each x in the batch.
:param sequence: the batch sequence of shape [batch_size, num_steps, feature_size]
:return length: A tensor of shape [batch_size]
"""
used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
seq_length = tf.reduce_sum(used, 1)
seq_length = tf.cast(seq_length, tf.int32)
return seq_length
# reference:
# https://github.com/tensorflow/models/blob/master/tutorials/rnn/translate/seq2seq_model.py
# https://github.com/davidoj/deepknowledgetracingTF/blob/master/model.py
class Model(object):
def __init__(self, num_problems,
hidden_layer_structure=(200,),
batch_size=32,
rnn_cell=tf.contrib.rnn.LSTMCell,
learning_rate=0.01,
max_grad_norm=5.0,
lambda_w1 = 0.0,
lambda_w2 = 0.0,
lambda_o = 0.0,
**kwargs):
# dataset-dependent attributes
self.num_problems = num_problems
self.hidden_layer_structure = hidden_layer_structure
self.batch_size = batch_size
self.rnn_cell = rnn_cell
self.learning_rate = learning_rate
self.max_grad_norm = max_grad_norm
self.lambda_w1 = lambda_w1 # regularization parameter for waviness for l1-norm
self.lambda_w2 = lambda_w2 # regularization parameter for waviness for l1-norm
self.lambda_o = lambda_o # regularization parameter for objective function
def _create_placeholder(self):
print("Creating placeholder...")
num_problems = self.num_problems
# placeholder
self.X = tf.placeholder(tf.float32, [None, None, 2 * num_problems], name='X')
self.y_seq = tf.placeholder(tf.float32, [None, None, num_problems], name='y_seq')
self.y_corr = tf.placeholder(tf.float32, [None, None, num_problems], name='y_corr')
self.keep_prob = tf.placeholder(tf.float32)
self.hidden_layer_input = self.X
self.seq_length = length(self.X)
def _influence(self):
print("Creating Loss...")
hidden_layer_structure = self.hidden_layer_structure
# Hidden Layer Construction
self.hidden_layers_outputs = []
self.hidden_layers_state = []
hidden_layer_input = self.hidden_layer_input
for i, layer_state_size in enumerate(hidden_layer_structure):
variable_scope_name = "hidden_layer_{}".format(i)
with tf.variable_scope(variable_scope_name, reuse=tf.get_variable_scope().reuse):
cell = self.rnn_cell(num_units=layer_state_size)
cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
outputs, state = tf.nn.dynamic_rnn(
cell,
hidden_layer_input,
dtype=tf.float32,
sequence_length=self.seq_length
)
self.hidden_layers_outputs.append(outputs)
self.hidden_layers_state.append(state)
hidden_layer_input = outputs
def _create_loss(self):
print("Creating Loss...")
last_layer_size = self.hidden_layer_structure[-1]
last_layer_outputs = self.hidden_layers_outputs[-1]
# Output Layer Construction
with tf.variable_scope("output_layer", reuse=tf.get_variable_scope().reuse):
W_yh = tf.get_variable("weights", shape=[last_layer_size, self.num_problems],
initializer=tf.random_normal_initializer(stddev=1.0 / np.sqrt(self.num_problems)))
b_yh = tf.get_variable("biases", shape=[self.num_problems, ],
initializer=tf.random_normal_initializer(stddev=1.0 / np.sqrt(self.num_problems)))
# Flatten the last layer output
num_steps = tf.shape(last_layer_outputs)[1]
self.outputs_flat = tf.reshape(last_layer_outputs, shape=[-1, last_layer_size])
self.logits_flat = tf.matmul(self.outputs_flat, W_yh) + b_yh
self.logits = tf.reshape(self.logits_flat, shape=[-1, num_steps, self.num_problems])
self.preds = tf.sigmoid(self.logits)
# self.preds_flat = tf.sigmoid(self.logits_flat)
# y_seq_flat = tf.cast(tf.reshape(self.y_seq, [-1, self.num_problems]), dtype=tf.float32)
# y_corr_flat = tf.cast(tf.reshape(self.y_corr, [-1, self.num_problems]), dtype=tf.float32)
# Filter out the target indices as follow:
# Get the indices where y_seq_flat are not equal to 0, where the indices
# implies that a student has answered the question in the time step and
# thereby exclude those time step that the student hasn't answered.
target_indices = tf.where(tf.not_equal(self.y_seq, 0))
self.target_logits = tf.gather_nd(self.logits, target_indices)
self.target_preds = tf.gather_nd(self.preds, target_indices) # needed to return AUC
self.target_labels = tf.gather_nd(self.y_corr, target_indices)
self.cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.target_logits,
labels=self.target_labels)
self.loss = tf.reduce_mean(self.cross_entropy)
# add current performance into consideration
current_seq = self.X[:,:,:self.num_problems] # slice out the answering exercise
current_corr = self.X[:,:,self.num_problems:]
self.target_indices_current = tf.where(tf.not_equal(current_seq, 0))
self.target_logits_current = tf.gather_nd(self.logits, self.target_indices_current)
self.target_preds_current = tf.gather_nd(self.preds, self.target_indices_current) # needed to return AUC
self.target_labels_current = tf.gather_nd(current_corr, self.target_indices_current)
self.cross_entropy_current = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.target_logits_current,
labels=self.target_labels_current)
self.loss += self.lambda_o * tf.reduce_mean(self.cross_entropy_current)
# Regularize the model to smoothen the network.
mask = length(self.y_seq)
self.total_num_steps = tf.reduce_sum(tf.cast(mask, tf.float32))
# l1-norm
# waviness_norm_l1 = tf.norm(self.preds[:, 1:, :] - self.preds[:, :-1, :], ord=1)
waviness_norm_l1 = tf.abs(self.preds[:, 1:, :] - self.preds[:, :-1, :])
self.waviness_l1 = tf.reduce_sum(waviness_norm_l1) / self.total_num_steps / self.num_problems
self.loss += self.lambda_w1 * self.waviness_l1
# l2-norm
# waviness_norm_l2 = tf.norm(self.preds[:, 1:, :] - self.preds[:, :-1, :], ord=2)
waviness_norm_l2 = tf.square(self.preds[:, 1:, :] - self.preds[:, :-1, :])
self.waviness_l2 = tf.reduce_sum(waviness_norm_l2) / self.total_num_steps / self.num_problems
self.loss += self.lambda_w2 * self.waviness_l2
def _create_optimizer(self):
print('Create optimizer...')
with tf.variable_scope('Optimizer'):
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
gvs = self.optimizer.compute_gradients(self.loss)
clipped_gvs = [(tf.clip_by_norm(grad, self.max_grad_norm), var) for grad, var in gvs]
self.train_op = self.optimizer.apply_gradients(clipped_gvs)
def _add_summary(self):
pass
def build_graph(self):
self._create_placeholder()
self._influence()
self._create_loss()
self._create_optimizer()
self._add_summary()
tf.get_variable_scope().reuse_variables() # better to add this to reuse the variable name.