Skip to content

Commit

Permalink
Attempt to fix Reinforce inf problem
Browse files Browse the repository at this point in the history
  • Loading branch information
masus04 committed Aug 18, 2018
1 parent 7c1f72c commit 5c83c8b
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions Othello/players/reinforcePlayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def update(self):
rewards = config.make_variable(torch.FloatTensor(rewards))
# rewards = self.normalize_rewards(rewards) # For now nothing to normalize, standard deviation = 0

policy_losses = [(-log_prob * reward) for log_prob, reward in zip(self.log_probs, rewards)]
policy_losses = [(-log_prob * reward / len(self.log_probs)) for log_prob, reward in zip(self.log_probs, rewards)]

self.optimizer.zero_grad()
policy_loss = torch.mean(policy_losses)
policy_loss = torch.cat(policy_losses).sum()
policy_loss.backward()
self.optimizer.step()

Expand Down

0 comments on commit 5c83c8b

Please sign in to comment.