forked from muupan/async-rl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dqn_head.py
52 lines (40 loc) · 1.6 KB
/
dqn_head.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import chainer
from chainer import functions as F
from chainer import links as L
class NatureDQNHead(chainer.ChainList):
"""DQN's head (Nature version)"""
def __init__(self, n_input_channels=4, n_output_channels=512,
activation=F.relu, bias=0.1):
self.n_input_channels = n_input_channels
self.activation = activation
self.n_output_channels = n_output_channels
layers = [
L.Convolution2D(n_input_channels, 32, 8, stride=4, bias=bias),
L.Convolution2D(32, 64, 4, stride=2, bias=bias),
L.Convolution2D(64, 64, 3, stride=1, bias=bias),
L.Linear(3136, n_output_channels, bias=bias),
]
super(NatureDQNHead, self).__init__(*layers)
def __call__(self, state):
h = state
for layer in self:
h = self.activation(layer(h))
return h
class NIPSDQNHead(chainer.ChainList):
"""DQN's head (NIPS workshop version)"""
def __init__(self, n_input_channels=4, n_output_channels=256,
activation=F.relu, bias=0.1):
self.n_input_channels = n_input_channels
self.activation = activation
self.n_output_channels = n_output_channels
layers = [
L.Convolution2D(n_input_channels, 16, 8, stride=4, bias=bias),
L.Convolution2D(16, 32, 4, stride=2, bias=bias),
L.Linear(2592, n_output_channels, bias=bias),
]
super(NIPSDQNHead, self).__init__(*layers)
def __call__(self, state):
h = state
for layer in self:
h = self.activation(layer(h))
return h