-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_this.py
56 lines (46 loc) · 2.02 KB
/
run_this.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
Reinforcement learning maze example.
Red rectangle: explorer.
Black rectangles: hells [reward = -1].
Yellow bin circle: paradise [reward = +1].
All other states: ground [reward = 0].
This script is the main part which controls the update method of this example.
The RL is in RL_brain.py.
View more on my tutorial page: https://morvanzhou.github.io/tutorials/
"""
from maze_env import Maze
from RL_brain import QLearningTable
"迭代更新部分"
def update():
"对应运行100次"
for episode in range(100):
"对观测值进行初始化"
# initial observation
observation = env.reset()
"while true在这里是什么意思?"
while True:
# fresh env,更新可视化环境
env.render()
# RL choose action based on observation,RL根据state的观测值挑选一个动作
action = RL.choose_action(str(observation))
# RL take action and get next observation and reward
#探索者实行这个动作,根据动作得出下一个状态、回报和是否是升上天空或者掉入地狱
observation_, reward, done = env.step(action)
# RL learn from this transition
#RL从这个序列state、action、reward、和接下来的state_中进行学习
RL.learn(str(observation), action, reward, str(observation_))
# swap observation,将观测值进行更新
observation = observation_
# break while loop when end of this episode,如果掉入地狱或者升上天空,这回合就结束了
if done:
break
# end of game,结束游戏,并关闭窗口
print('game over')
env.destroy()
if __name__ == "__main__":
#定义环境env和RL的方式
env = Maze()
RL = QLearningTable(actions=list(range(env.n_actions)))
#开始可视化环境,这个地方不懂?
env.after(100, update)
env.mainloop()