-
Notifications
You must be signed in to change notification settings - Fork 7
/
agent.py
38 lines (30 loc) · 1.22 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
class Agent:
def __init__(self, input_dim, output_dim, learning_rate=1e-4):
self.learning_rate = learning_rate
self.input_dimension = input_dim
self.output_dimension = output_dim
self.model = self._build_policy()
def _build_policy(self):
model = Sequential()
model.add(Dense(20, input_shape=(self.input_dimension,), activation="relu"))
model.add(Dense(64, activation='relu', init='he_uniform'))
model.add(Dense(32, activation='relu', init='he_uniform'))
model.add(Dense(self.output_dimension, activation='softmax'))
opt = Adam(lr=self.learning_rate)
model.compile(loss='kullback_leibler_divergence', optimizer=opt)
return model
def decide(self, state):
X = state.reshape(1, state.shape[0])
return self.model.predict(X, batch_size=1)
def train(self, decision, reward):
# y_hat = np.around(y_hat)
decision = np.vstack([decision])
reward = np.vstack([reward])
self.model.fit(decision, reward, verbose=0)
pass