-
Notifications
You must be signed in to change notification settings - Fork 1
/
fictitious_play.py
89 lines (63 loc) · 2.91 KB
/
fictitious_play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import numpy as np
import random
from plots import *
"""
we are modelling the game below:
###################
# (3, -3) (1, -1)#
# (2, -2) (4, -4)#
###################
"""
player1_payoffs = [[3, 1],
[2, 4]]
player2_payoffs = [[-3, -1],
[-2, -4]]
def best_response(player, belief):
if player == 1:
expected_util_action_1 = player1_payoffs[0][0] * belief[0] + player1_payoffs[0][1] * belief[1]
expected_util_action_2 = player1_payoffs[1][0] * belief[0] + player1_payoffs[1][1] * belief[1]
else:
expected_util_action_1 = player2_payoffs[0][0] * belief[0] + player2_payoffs[1][0] * belief[1]
expected_util_action_2 = player2_payoffs[0][1] * belief[0] + player2_payoffs[1][1] * belief[1]
if expected_util_action_1 == expected_util_action_2:
action = random.choice([0,1])
else:
action = np.argmax([expected_util_action_1, expected_util_action_2])
return action
def observe_and_update(action, player_counts):
if action == 0:
player_counts[0] += 1
else:
player_counts[1] += 1
total = player_counts[0] + player_counts[1]
return (player_counts[0] / total, player_counts[1] / total), player_counts
def final_expected_payoff(belief1,belief2):
expected_util_action_11 = player1_payoffs[0][0] * belief1[0] + player1_payoffs[0][1] * belief1[1]
expected_util_action_12 = player1_payoffs[1][0] * belief1[0] + player1_payoffs[1][1] * belief1[1]
expected_util_action_21 = player2_payoffs[0][0] * belief2[0] + player2_payoffs[1][0] * belief2[1]
expected_util_action_22 = player2_payoffs[0][1] * belief2[0] + player2_payoffs[1][1] * belief2[1]
return expected_util_action_11*belief2[0] + expected_util_action_12*belief2[1], expected_util_action_21*belief1[0] + expected_util_action_22*belief1[1]
max_iter = 1000
iteration = 0
player1_counts = [1, 0]
player2_counts = [1, 0]
player1_belief = (player1_counts[0] / (player1_counts[0] + player1_counts[1]),
player1_counts[1] / (player1_counts[0] + player1_counts[1]))
player2_belief = (player2_counts[0] / (player2_counts[0] + player2_counts[1]),
player2_counts[1] / (player2_counts[0] + player2_counts[1]))
policies = [[player2_belief,player1_belief]]
while iteration < max_iter:
action1 = best_response(1, player1_belief)
action2 = best_response(2, player2_belief)
player1_belief, player1_counts = observe_and_update(action2, player1_counts)
player2_belief, player2_counts = observe_and_update(action1, player2_counts)
policies.append([player2_belief,player1_belief])
iteration += 1
print('Nash Equilibria')
print(player2_belief) # Belief that player2 has about player1
print(player1_belief)
print('Expected payoffs')
print(final_expected_payoff(player1_belief,player2_belief))
policies = policies[:1000]
total_num_of_episodes = 1000
policy_iter(policies,total_num_of_episodes)