Bring cumulative reward in training script, release pretrained checkp…

…oint.
kdexd · Nov 10, 2017 · d86ca77 · d86ca77
1 parent 3e7c7a6
commit d86ca77
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -173,7 +173,6 @@ Im: ['purple', 'triangle', 'filled'] -  Task: ['shape', 'color']
 ```
 
 **TODO: Visualizing evolution chart - showing emergence of grounded language.**
-**TODO: Release the checkpoint with 80.23% validation accuracy on 11 nov 2017.**
 
 References
 ----------

diff --git a/train.py b/train.py
@@ -73,9 +73,11 @@
 questioner = Questioner(OPT)
 answerer = Answerer(OPT)
 # this reward tensor is re-used every iteration
-reward = torch.Tensor(OPT['batch_size'], 1).fill_(- 10 * OPT['rl_scale'])
+reward = torch.Tensor(OPT['batch_size'], 1).fill_(-10 * OPT['rl_scale'])
+cumulative_reward = None
 if OPT.get('use_gpu'):
     questioner, answerer, reward = questioner.cuda(), answerer.cuda(), reward.cuda()
+
 print('Questioner and Answerer Bots: ')
 print(questioner)
 print(answerer)
@@ -123,9 +125,9 @@
 
         # record cumulative reward in world
         batch_reward = torch.mean(reward) / OPT['rl_scale']
-        if not world.cumulative_reward:
-            world.cumulative_reward = batch_reward
-        world.cumulative_reward = 0.95 * world.cumulative_reward + 0.05 * batch_reward
+        if not cumulative_reward:
+            cumulative_reward = batch_reward
+        cumulative_reward = 0.95 * cumulative_reward + 0.05 * batch_reward
 
         # qbot and abot observe rewards at end of episode
         world.qbot.observe({'reward': reward, 'episode_done': True})

diff --git a/world.py b/world.py
@@ -31,7 +31,6 @@ def __init__(self, opt, questioner, answerer, shared=None):
         self.abot = answerer
         self.acts = []
         self.episode_batch = None    # episode specific batch
-        self.cumulative_reward = 0
         super(QAWorld, self).__init__(opt, [self.qbot, self.abot], shared)
 
     def parley(self):