-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy patha3c_lstm_tune_hps.txt
89 lines (79 loc) · 2.65 KB
/
a3c_lstm_tune_hps.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# IMPORTANT: These files list old grids of HPs that were tuned over and other static HPs (from Ray 0.7.3) for the discrete toy expts and would need to be brought in a form compatible with the new run_experiments.py file
#Grids of value for the hyperparameters over which they were tuned:
num_layerss = [1, 2, 3, 4]
layer_widths = [64, 128, 256]
learning_rates = [1e-2, 1e-3, 1e-4, 1e-5, 1e-6]
fcnet_activations = ["tanh", "relu", "sigmoid"]
lambdas = [0, 0.5, 0.95, 1.0]
grad_clips = [10, 30, 100]
vf_loss_coeffs = [0.1, 0.5, 2.5]
entropy_coeffs = [0.001, 0.01, 0.1, 1]
lstm_cell_sizes = [64, 256, 512]
lstm_use_prev_action_rewards = [False, True]
tune.run(
"A3C",
stop={
"timesteps_total": 150000,
},
config={
"sample_batch_size": 10,
"train_batch_size": 100,
"use_pytorch": False,
"lambda": 0.0,
"grad_clip": 10.0,
"lr": 0.0001,
"lr_schedule": None,
"vf_loss_coeff": 0.1,
"entropy_coeff": 0.1,
"min_iter_time_s": 0,
"sample_async": True,
"timesteps_per_iteration": 5000,
"num_workers": 3,
"num_envs_per_worker": 5,
"optimizer": {
"grads_per_step": 10
},
"env": "RLToy-v0",
"env_config": {
'dummy_seed': dummy_seed,
'seed': 0,
'state_space_type': 'discrete',
'action_space_type': 'discrete',
'state_space_size': state_space_size,
'action_space_size': action_space_size,
'generate_random_mdp': True,
'delay': delay,
'sequence_length': sequence_length,
'reward_density': reward_density,
'terminal_state_density': terminal_state_density,
'repeats_in_sequences': False,
'reward_unit': 1.0,
'make_denser': False,
'completely_connected': True
},
"model": {
"fcnet_hiddens": [128, 128, 128],
"custom_preprocessor": "ohe",
"custom_options": {},
"fcnet_activation": "tanh",
"use_lstm": True,
"max_seq_len": delay + sequence_length,
"lstm_cell_size": 64,
"lstm_use_prev_action_reward": True,
},
"callbacks": {
"on_episode_end": tune.function(on_episode_end),
"on_train_result": tune.function(on_train_result),
},
"evaluation_interval": 1,
"evaluation_config": {
"exploration_fraction": 0,
"exploration_final_eps": 0,
"batch_mode": "complete_episodes",
'horizon': 100,
"env_config": {
"dummy_eval": True,
}
},
},
)