-
Notifications
You must be signed in to change notification settings - Fork 0
/
pendulum_v2.py
85 lines (66 loc) · 2.52 KB
/
pendulum_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
from os import path
class PendulumEnv:
metadata = {
'render.modes' : ['human', 'rgb_array'],
'video.frames_per_second' : 30
}
def __init__(self, g=10.0):
self.max_speed=8
self.max_torque=2.
self.dt=.05
self.g = g
self.step_counter = 0
high = np.array([1., 1., self.max_speed])
self.min_state = -high
self.max_state = high
self.min_action = [-self.max_torque]
self.max_action = [self.max_torque]
self.step_limit = 200
self.viewer = None
def step(self,u):
th, thdot = self.state # th := theta
g = self.g
m = 1.
l = 1.
dt = self.dt
u = np.clip(u, -self.max_torque, self.max_torque)[0]
# self.last_u = u # for rendering
costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)
newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
newth = th + newthdot*dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111
self.state = np.array([newth, newthdot])
self.step_counter += 1
done = (self.step_counter == self.step_limit)
return self._get_obs(), -costs, done, {}
def reset(self):
high = np.array([np.pi, 1])
self.state = np.random.uniform(low=-high, high=high)
self.last_u = None
self.step_counter = 0
return self._get_obs()
def _get_obs(self):
theta, thetadot = self.state
return np.array([np.cos(theta), np.sin(theta), thetadot])
def render(self, mode='human'):
if self.viewer is None:
import rendering
self.viewer = rendering.Viewer(500,500)
self.viewer.set_bounds(-2.2,2.2,-2.2,2.2)
rod = rendering.make_capsule(1, .2)
rod.set_color(.8, .3, .3)
self.pole_transform = rendering.Transform()
rod.add_attr(self.pole_transform)
self.viewer.add_geom(rod)
axle = rendering.make_circle(.05)
axle.set_color(0,0,0)
self.viewer.add_geom(axle)
self.pole_transform.set_rotation(self.state[0] + np.pi/2)
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def angle_normalize(x):
return (((x+np.pi) % (2*np.pi)) - np.pi)