forked from YangRui2015/Sparse-Reward-Algorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent_env_params.py
53 lines (41 loc) · 1.76 KB
/
agent_env_params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
def design_agent_and_env(FLAGS):
# environment params
env_params = {}
if FLAGS.env == "reach":
env_params["env_name"] = "FetchReach-v1"
env_params["has_object"] = False
FLAGS.total_steps = 20
else:
raise TypeError("No such environment till now")
# number of actions to achieve subgoals in HDDPG
x = pow(FLAGS.total_steps, 1/FLAGS.layers)
if x - int(x) == 0:
FLAGS.time_scale = int(x)
else:
FLAGS.time_scale = int(x) + 1
FLAGS.num_exploration_episodes = 100
FLAGS.num_test_episodes = 100
FLAGS.num_epochs = FLAGS.episodes // FLAGS.num_exploration_episodes
env_params["obj_range"] = 0.15
env_params["target_range"] = 0.15
env_params["max_actions"] = FLAGS.total_steps
distance_threshold = 0.05 # 5cm
env_params["end_goal_thresholds"] = distance_threshold
env_params["subgoal_thresholds"] = distance_threshold
if FLAGS.curriculum >= 2:
range_lis = list(np.linspace(0.05, 0.15, FLAGS.curriculum))
env_params['curriculum_list'] = range_lis
# agent params
agent_params = {}
agent_params["subgoal_test_perc"] = 0.3
agent_params["subgoal_penalty"] = -FLAGS.time_scale # Define subgoal penalty for missing subgoal.
agent_params["atomic_noise"] = 0.1
agent_params["subgoal_noise"] = 0.03
agent_params["epsilon"] = 0.1 # rate of choose random action
agent_params["episodes_to_store"] = 1000
agent_params["update_times"] = 40
agent_params["batch_size"] = 64
agent_params['imit_batch_size'] = 32
agent_params['imit_ratio'] = FLAGS.imit_ratio
return agent_params, env_params