-
Notifications
You must be signed in to change notification settings - Fork 0
/
Ant Bot.py
88 lines (70 loc) · 2.41 KB
/
Ant Bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gym
import numpy as np
import pybullet as p
import pybulletgym.envs
import time
def relu(x):
return np.maximum(x, 0)
class SmallReactivePolicy:
"Simple multi-layer perceptron policy, no internal state"
def __init__(self, observation_space, action_space):
assert weights_dense1_w.shape == (observation_space.shape[0], 128)
assert weights_dense2_w.shape == (128, 64)
assert weights_final_w.shape == (64, action_space.shape[0])
@staticmethod
def act(ob):
x = ob
x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
x = np.dot(x, weights_final_w) + weights_final_b
return x
def main():
print("create env")
env = gym.make("AntPyBulletEnv-v0")
env.render(mode="human")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
env.reset()
torsoId = -1
for i in range(p.getNumBodies()):
print(p.getBodyInfo(i))
if p.getBodyInfo(i)[0].decode() == "torso":
torsoId = i
print("found torso")
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
print("frame")
while 1:
time.sleep(0.02)
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
print("reward")
print(r)
distance = 5
yaw = 0
humanPos, humanOrn = p.getBasePositionAndOrientation(torsoId)
p.resetDebugVisualizerCamera(distance, yaw, -20, humanPos)
still_open = env.render("human")
if still_open is None:
return
if not done:
continue
if restart_delay == 0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60*2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay == 0:
break
weights_dense1_w = np.random.randn(28,128)
weights_dense1_b = np.random.randn(128,)
weights_dense2_w = np.random.randn(128,64)
weights_dense2_b = np.random.randn(64,)
weights_final_w= np.random.randn(64, 8)
weights_final_b= np.random.randn(8,)
if __name__ == "__main__":
main()