-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
102 lines (98 loc) · 4.1 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include <assert.h>
#include <iostream>
#include <cstdlib>
#include <cmath>
#include <time.h>
#include <algorithm>
#include <AIToolbox/POMDP/Algorithms/POMCP.hpp>
#include <AIToolbox/POMDP/Types.hpp>
#include <AIToolbox/POMDP/Utils.hpp>
#include "BWCPOMCP.hpp"
#include "pomdp.h"
int main (int argc, char *argv[]) {
if (argc < 4) {
std::cerr << "Expected three input parameters: the input POMDP file name, "
<< "the worst-case lower bound "
<< "and the horizon to plan for" << std::endl;
std::cerr << "Call format:" << std::endl;
std::cerr << argv[0] << " filename bound horizon" << std::endl;
exit(1);
}
// total time computing next action
double temp_time;
double max_time = 0;
double total_time = 0;
// recover the POMDP from file and parse horizon
const float threshold = std::atof(argv[2]);
const long max_timestep = std::atoi(argv[3]);
BWC::POMDP M(argv[1]);
// check some of its properties to assert it is a valid MDP
M.print(std::cout);
assert(M.isValidMdp());
assert(M.hasObsWeights());
// obtain the initial belief
AIToolbox::POMDP::Belief new_belief, current_belief;
current_belief = M.getInitialBelief();
// obtain the worst-case value of the belief game
time_t now;
std::cout << "Solving the game" << std::endl;
BWC::POMDP N(M);
time(&now);
N.solveGameBeliefConstruction();
temp_time = difftime(time(NULL), now);
std::cout << "Done solving the game in "
<< temp_time << "s" << std::endl;
// make the model in which we will simulate playing
std::cout << "Start building AI-Toolbox model" << std::endl;
auto model = M.makeModel();
// create the model solver
AIToolbox::POMDP::BWCPOMCP<decltype(model)> solver(
model,
1000, // size of initial particle belief
10000, // number of episodes to run before completion
100000000.0, // the exploration constant
threshold, // the worst-case threshold
&M, // reference to original BWC POMDP
&N); // reference to belief-based game
// simulate the game
size_t current_state, new_state, action, current_obs, new_obs;
float reward;
float total_reward = 0;
float disc = M.getDiscFactor();
current_state = M.sampleInitialState();
current_obs = -1;
time(&now);
action = solver.sampleAction(current_belief,
max_timestep); // horizon to plan for
temp_time = difftime(time(NULL), now);
max_time = std::max(temp_time, max_time);
total_time += temp_time;
for (unsigned timestep = 0; timestep < max_timestep; ++timestep) {
std::tie(new_state, new_obs, reward) =
model.sampleSOR(current_state, action);
std::cout << "played action " << M.getActionName(action)
<< " from state " << M.getStateName(current_state)
<< " and received state, obs, reward = "
<< M.getStateName(new_state)
<< ", " << M.getObsName(new_obs) << ", "
<< reward << std::endl;
new_belief = AIToolbox::POMDP::updateBeliefUnnormalized(model,
current_belief,
action, new_obs);
total_reward += std::pow(disc, timestep) * reward;
current_state = new_state;
current_obs = new_obs;
current_belief = new_belief;
time(&now);
action = solver.sampleAction(action, current_obs,
max_timestep - (timestep + 1));
temp_time = difftime(time(NULL), now);
max_time = std::max(temp_time, max_time);
total_time += temp_time;
}
std::cout << "Obtained an accum reward of: " << total_reward << std::endl;
std::cout << "Max time waited for an action: " << max_time << "s" << std::endl;
std::cout << "Average time waited for actions: " << total_time / max_timestep << "s"
<< std::endl;
exit(EXIT_SUCCESS);
}