Skip to content

Commit

Permalink
Bring cumulative reward in training script, release pretrained checkp…
Browse files Browse the repository at this point in the history
…oint.
  • Loading branch information
Karan Desai committed Nov 10, 2017
1 parent 3e7c7a6 commit d86ca77
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ Im: ['purple', 'triangle', 'filled'] - Task: ['shape', 'color']
```

**TODO: Visualizing evolution chart - showing emergence of grounded language.**
**TODO: Release the checkpoint with 80.23% validation accuracy on 11 nov 2017.**

References
----------
Expand Down
10 changes: 6 additions & 4 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@
questioner = Questioner(OPT)
answerer = Answerer(OPT)
# this reward tensor is re-used every iteration
reward = torch.Tensor(OPT['batch_size'], 1).fill_(- 10 * OPT['rl_scale'])
reward = torch.Tensor(OPT['batch_size'], 1).fill_(-10 * OPT['rl_scale'])
cumulative_reward = None
if OPT.get('use_gpu'):
questioner, answerer, reward = questioner.cuda(), answerer.cuda(), reward.cuda()

print('Questioner and Answerer Bots: ')
print(questioner)
print(answerer)
Expand Down Expand Up @@ -123,9 +125,9 @@

# record cumulative reward in world
batch_reward = torch.mean(reward) / OPT['rl_scale']
if not world.cumulative_reward:
world.cumulative_reward = batch_reward
world.cumulative_reward = 0.95 * world.cumulative_reward + 0.05 * batch_reward
if not cumulative_reward:
cumulative_reward = batch_reward
cumulative_reward = 0.95 * cumulative_reward + 0.05 * batch_reward

# qbot and abot observe rewards at end of episode
world.qbot.observe({'reward': reward, 'episode_done': True})
Expand Down
1 change: 0 additions & 1 deletion world.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def __init__(self, opt, questioner, answerer, shared=None):
self.abot = answerer
self.acts = []
self.episode_batch = None # episode specific batch
self.cumulative_reward = 0
super(QAWorld, self).__init__(opt, [self.qbot, self.abot], shared)

def parley(self):
Expand Down

0 comments on commit d86ca77

Please sign in to comment.