forked from SergeOlivierP/agentF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
39 lines (30 loc) · 1.14 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from policy_keras import Policy
from agent import Agent
from simulation import Simulation
from market import Market
from datetime import datetime
import numpy as np
# Model hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
num_iterations = 5
market = Market('IntelDataSet.csv')
D = np.shape(market.indices)[1]+2
policy = Policy(H, D, gamma, batch_size, decay_rate, learning_rate)
running_reward = []
# Output for further analysis, should also output model parameters
# output = open("{}".format(datetime.now().strftime('%Y/%m/%d %H:%M:%S')), "w")
for j in range(num_iterations):
agent = Agent(c=5000, q=0)
sim = Simulation(agent, policy, market)
policy, assets = sim.run()
running_reward.append(assets)
if j % 100 == 0:
mean = np.mean(running_reward)
running_reward = []
print("Average cumulated asset value (round {}): {:10.2f}".format(j, mean))
# output.write("{:10.2f}\n".format(mean))
# output.close()