Skip to content

Commit

Permalink
added x_prob() and x_val() functions to access individual parts of th…
Browse files Browse the repository at this point in the history
…e matrices.

we use not internally NA to represent * in the POMDP definition.

actions, states and observations are now factors in most places.
  • Loading branch information
mhahsler committed May 16, 2022
1 parent a1513af commit 8639f1a
Show file tree
Hide file tree
Showing 30 changed files with 587 additions and 233 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Collate:
'reward.R'
'round_stochchastic.R'
'sample_belief_space.R'
'simulate_MDP.R'
'simulate_POMDP.R'
'solve_MDP.R'
'solve_POMDP.R'
Expand Down
7 changes: 4 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ export(R_)
export(T_)
export(approx_MDP_policy_evaluation)
export(estimate_belief_for_nodes)
export(observation_function)
export(observation_matrix)
export(observation_prob)
export(optimal_action)
export(plot_belief_space)
export(plot_policy_graph)
Expand All @@ -23,18 +23,19 @@ export(q_values_MDP)
export(random_MDP_policy)
export(read_POMDP)
export(reward)
export(reward_function)
export(reward_matrix)
export(reward_node_action)
export(reward_val)
export(round_stochastic)
export(sample_belief_space)
export(simulate_MDP)
export(simulate_POMDP)
export(solve_MDP)
export(solve_POMDP)
export(solve_POMDP_parameter)
export(solve_SARSOP)
export(transition_function)
export(transition_matrix)
export(transition_prob)
export(update_belief)
export(write_POMDP)
import(graphics)
Expand Down
4 changes: 3 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
* reward() and reward_node_action() have now been separated.
* sample_belief_space() gained method 'trajectories'.
* simulate_POMDP(): supports not epsilon-greedy policies.
* observation_matrix() et al. functions are now created with a separate function ending in _function.
* added x_prob() and x_val() functions to access individual parts of the matrices.
* fixed converged finite-horizon case. It now only returns the converged graph/alpha.
* we use not internally NA to represent * in the POMDP definition.
* actions, states and observations are now factors in most places.

# pomdp 1.0.1 (03/25/2022)

Expand Down
60 changes: 55 additions & 5 deletions R/MDP.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#'
#' More details on specifying the model components can be found in the documentation
#' for [POMDP].
#'
#' @family MDP
#'
#' @include POMDP.R
#' @param states a character vector specifying the names of the states.
#' @param actions a character vector specifying the names of the available
Expand Down Expand Up @@ -103,7 +106,35 @@ MDP <- function(states,
}

#' @export
print.MDP <- print.POMDP
print.MDP <- function(x, ...) {
writeLines(paste(paste(class(x), collapse = ", "),
"-",
x$name))

if (!is.null(x$discount))
writeLines(sprintf(" Discount factor: %s",
paste(x$discount, collapse = "+")))

if (!is.null(x$horizon))
writeLines(sprintf(" Horizon: %s epochs",
paste(x$horizon, collapse = " + ")))

if (.solved_MDP(x))
writeLines(c(
" Solved:",
sprintf(" Solution converged: %s",
x$solution$converged)
)
)

writeLines(strwrap(
paste("List components:", paste(sQuote(names(
x
)), collapse = ", "), "\n"),
indent = 2,
exdent = 4
))
}

#' @rdname MDP
#' @export
Expand All @@ -117,14 +148,33 @@ MDP2POMDP <- function(x) {
ident_matrix <- diag(length(x$states))
dimnames(ident_matrix) <- list(x$states, x$observations)

x$observation_prob <- list('*' = ident_matrix)
x$observation_prob <- sapply(x$actions, FUN = function(x) ident_matrix, simplify = FALSE)
class(x) <- c("MDP", "POMDP", "list")
x
}

.solved_MDP <- function(x) {
.solved_MDP <- function(x, stop = FALSE) {
if (!inherits(x, "MDP"))
stop("x needs to be a POMDP object!")
if (is.null(x$solution))
stop("x needs to be a MDP object!")
solved <- !is.null(x$solution)
if (stop && !solved)
stop("x needs to be a solved MDP. Use solve_MDP() first.")

solved
}

## this is .get_pg_index for MDPs
.get_pol_index <- function(model, epoch) {

epoch <- as.integer(epoch)
if(epoch < 1L) stop("Epoch has to be >= 1")

### (converged) infinite horizon POMDPs. We ignore epoch.
if (length(model$solution$policy) == 1L) return(1L)

### regular epoch for finite/infinite horizon case
if (epoch > length(model$solution$policy))
stop("MDP model has only a policy up to epoch ", length(model$solution$policy))

return(epoch)
}
2 changes: 1 addition & 1 deletion R/Maze.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' The # (state `s_5`) in the middle of the maze is an obstruction and not reachable.
#' Rewards are associated with transitions. The default reward (penalty) is -0.04.
#' Transitioning to + (state `s_12`) gives a reward of 1.0, transitioning to - (state `s_11`)
#' has a reward of -1.0. States `s_11` and `s_12` are terminal states.
#' has a reward of -1.0. States `s_11` and `s_12` are terminal (absorbing) states.
#'
#' Actions are movements (`north`, `south`, `east`, `west`). The actions are unreliable with a .8 chance
#' to move in the correct direction and a 0.1 chance to instead to move in a
Expand Down
Loading

0 comments on commit 8639f1a

Please sign in to comment.