Return best Q value in action_info (#105)

The second field (`info`) in the return value of `action_info` will now contain the highest approximate Q value, which is associated with the "optimal" action. Co-authored-by: Zachary Sunberg <[email protected]>
JuliaPOMDP · Oct 24, 2023 · f2e8fdb · f2e8fdb
1 parent 39a3cdb
commit f2e8fdb
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 1 deletion.
diff --git a/src/dpw.jl b/src/dpw.jl
@@ -64,6 +64,7 @@ function POMDPTools.action_info(p::DPWPlanner, s; tree_in_info=false)
 
         sanode = best_sanode(tree, snode)
         a = tree.a_labels[sanode] # choose action with highest approximate value
+        info[:best_Q] = tree.q[sanode] # export the approximate value for the action
     catch ex
         a = convert(actiontype(p.mdp), default_action(p.solver.default_action, p.mdp, s, ex))
         info[:exception] = ex

diff --git a/src/vanilla.jl b/src/vanilla.jl
@@ -188,7 +188,7 @@ end
 function POMDPTools.action_info(p::AbstractMCTSPlanner, s)
     tree = plan!(p, s)
     best = best_sanode_Q(get_state_node(tree, s))
-    return action(best), (tree=tree,)
+    return action(best), (tree=tree, best_Q=q(best))
 end
 
 POMDPs.action(p::AbstractMCTSPlanner, s) = first(action_info(p, s))