Skip to content

Commit

Permalink
Return best Q value in action_info (#105)
Browse files Browse the repository at this point in the history
The second field (`info`) in the return value of `action_info`
will now contain the highest approximate Q value,
which is associated with the "optimal" action.

Co-authored-by: Zachary Sunberg <[email protected]>
  • Loading branch information
thevolatilebit and zsunberg authored Oct 24, 2023
1 parent 39a3cdb commit f2e8fdb
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/dpw.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ function POMDPTools.action_info(p::DPWPlanner, s; tree_in_info=false)

sanode = best_sanode(tree, snode)
a = tree.a_labels[sanode] # choose action with highest approximate value
info[:best_Q] = tree.q[sanode] # export the approximate value for the action
catch ex
a = convert(actiontype(p.mdp), default_action(p.solver.default_action, p.mdp, s, ex))
info[:exception] = ex
Expand Down
2 changes: 1 addition & 1 deletion src/vanilla.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ end
function POMDPTools.action_info(p::AbstractMCTSPlanner, s)
tree = plan!(p, s)
best = best_sanode_Q(get_state_node(tree, s))
return action(best), (tree=tree,)
return action(best), (tree=tree, best_Q=q(best))
end

POMDPs.action(p::AbstractMCTSPlanner, s) = first(action_info(p, s))
Expand Down

0 comments on commit f2e8fdb

Please sign in to comment.