diff --git a/paynt/parser/sketch.py b/paynt/parser/sketch.py index 2712bdead..f26f88b2d 100644 --- a/paynt/parser/sketch.py +++ b/paynt/parser/sketch.py @@ -46,7 +46,7 @@ def make_rewards_action_based(model): for action in range(tm.get_row_group_start(state),tm.get_row_group_end(state)): action_reward[action] += state_reward - model.remove_reward_model(name) + payntbind.synthesis.remove_reward_model(model,name) new_reward_model = stormpy.storage.SparseRewardModel(optional_state_action_reward_vector=action_reward) model.add_reward_model(name, new_reward_model) diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 73d2a2b5b..4c936e903 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -235,12 +235,14 @@ def node_id(self): def add_nodes_to_graphviz_tree(self, graphviz_tree): node_label = "" if self.sat is False: - node_label = "X" + node_label = "∅" + # node_label = "X" elif self.sat is True: # node_label = "✓" node_label = f"p{self.policy_index}" graphviz_tree.node(self.node_id, label=node_label, shape="ellipse", width="0.15", height="0.15") - for child in self.child_nodes: + # enumerating in reverse to print policies in ascending order, from left to right + for child in reversed(self.child_nodes): child.add_nodes_to_graphviz_tree(graphviz_tree) def add_edges_to_graphviz_tree(self, graphviz_tree): diff --git a/payntbind/src/synthesis/helpers.cpp b/payntbind/src/synthesis/helpers.cpp index 00b6413ee..8040944c3 100644 --- a/payntbind/src/synthesis/helpers.cpp +++ b/payntbind/src/synthesis/helpers.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace synthesis { @@ -36,6 +37,11 @@ std::shared_ptr transformUntilToEventually( return modified_formula; } +template +void removeRewardModel(storm::models::sparse::Model & model, std::string const& reward_name) { + model.removeRewardModel(reward_name); +} + } @@ -51,6 +57,7 @@ void define_helpers(py::module& m) { }); m.def("transform_until_to_eventually", &synthesis::transformUntilToEventually, py::arg("formula")); + m.def("remove_reward_model", &synthesis::removeRewardModel, py::arg("model"), py::arg("reward_name")); m.def("multiply_with_vector", [] (storm::storage::SparseMatrix matrix,std::vector vector) { std::vector result(matrix.getRowCount()); diff --git a/payntbind/src/synthesis/pomdp/PomdpManager.cpp b/payntbind/src/synthesis/pomdp/PomdpManager.cpp index 63e35be8e..1e381dbab 100644 --- a/payntbind/src/synthesis/pomdp/PomdpManager.cpp +++ b/payntbind/src/synthesis/pomdp/PomdpManager.cpp @@ -6,316 +6,313 @@ #include "storm/storage/SparseMatrix.h" #include "storm/models/sparse/StandardRewardModel.h" -namespace storm { - namespace synthesis { - +namespace synthesis { + + template + PomdpManager::PomdpManager(storm::models::sparse::Pomdp const& pomdp) + : pomdp(pomdp) { + STORM_LOG_THROW(pomdp.isCanonic(), storm::exceptions::InvalidArgumentException, "POMDP must be canonic"); + + auto num_prototype_states = pomdp.getNumberOfStates(); + auto num_prototype_rows = pomdp.getNumberOfChoices(); + auto num_observations = pomdp.getNrObservations(); + this->observation_actions.resize(num_observations,0); + this->observation_successors.resize(num_observations); + this->prototype_row_index.resize(num_prototype_rows,0); + + std::vector> observation_successor_sets; + observation_successor_sets.resize(num_observations); + + for(uint64_t prototype_state = 0; prototype_state < num_prototype_states; prototype_state++) { + auto observation = pomdp.getObservation(prototype_state); - template - PomdpManager::PomdpManager(storm::models::sparse::Pomdp const& pomdp) - : pomdp(pomdp) { - STORM_LOG_THROW(pomdp.isCanonic(), storm::exceptions::InvalidArgumentException, "POMDP must be canonic"); - - auto num_prototype_states = pomdp.getNumberOfStates(); - auto num_prototype_rows = pomdp.getNumberOfChoices(); - auto num_observations = pomdp.getNrObservations(); - this->observation_actions.resize(num_observations,0); - this->observation_successors.resize(num_observations); - this->prototype_row_index.resize(num_prototype_rows,0); - - std::vector> observation_successor_sets; - observation_successor_sets.resize(num_observations); - - for(uint64_t prototype_state = 0; prototype_state < num_prototype_states; prototype_state++) { - auto observation = pomdp.getObservation(prototype_state); - - auto const& row_group_indices = pomdp.getTransitionMatrix().getRowGroupIndices(); - uint64_t row_index = 0; - for ( - uint64_t prototype_row = row_group_indices[prototype_state]; - prototype_row < row_group_indices[prototype_state + 1]; - prototype_row++ - ) { - this->prototype_row_index[prototype_row] = row_index; - row_index++; - - for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { - auto dst = entry.getColumn(); - auto dst_obs = this->pomdp.getObservation(dst); - observation_successor_sets[observation].insert(dst_obs); - } - } - - if(this->observation_actions[observation] != 0) { - continue; - } - this->observation_actions[observation] = pomdp.getTransitionMatrix().getRowGroupSize(prototype_state); - } - for(uint64_t obs = 0; obs < num_observations; obs++) { - this->observation_successors[obs] = std::vector( - observation_successor_sets[obs].begin(), - observation_successor_sets[obs].end() - ); - } + auto const& row_group_indices = pomdp.getTransitionMatrix().getRowGroupIndices(); + uint64_t row_index = 0; + for ( + uint64_t prototype_row = row_group_indices[prototype_state]; + prototype_row < row_group_indices[prototype_state + 1]; + prototype_row++ + ) { + this->prototype_row_index[prototype_row] = row_index; + row_index++; + + for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { + auto dst = entry.getColumn(); + auto dst_obs = this->pomdp.getObservation(dst); + observation_successor_sets[observation].insert(dst_obs); + } + } + + if(this->observation_actions[observation] != 0) { + continue; + } + this->observation_actions[observation] = pomdp.getTransitionMatrix().getRowGroupSize(prototype_state); + } + for(uint64_t obs = 0; obs < num_observations; obs++) { + this->observation_successors[obs] = std::vector( + observation_successor_sets[obs].begin(), + observation_successor_sets[obs].end() + ); + } - this->observation_memory_size.resize(num_observations, 1); - this->prototype_duplicates.resize(num_prototype_states); - - this->max_successor_memory_size.resize(num_observations); - } + this->observation_memory_size.resize(num_observations, 1); + this->prototype_duplicates.resize(num_prototype_states); + + this->max_successor_memory_size.resize(num_observations); + } - - template - void PomdpManager::buildStateSpace() { - this->num_states = 0; - this->state_prototype.clear(); - this->state_memory.clear(); - for(uint64_t prototype = 0; prototype < this->pomdp.getNumberOfStates(); prototype++) { - auto obs = this->pomdp.getObservation(prototype); - auto memory_size = this->observation_memory_size[obs]; - this->prototype_duplicates[prototype].clear(); - this->prototype_duplicates[prototype].reserve(memory_size); - for(uint64_t memory = 0; memory < memory_size; memory++) { - this->prototype_duplicates[prototype].push_back(this->num_states); - this->state_prototype.push_back(prototype); - this->state_memory.push_back(memory); - this->num_states++; - } - } - } - - template - uint64_t PomdpManager::translateState(uint64_t prototype, uint64_t memory) { - if(memory >= this->prototype_duplicates[prototype].size()) { - memory = 0; - } - return this->prototype_duplicates[prototype][memory]; + template + void PomdpManager::buildStateSpace() { + this->num_states = 0; + this->state_prototype.clear(); + this->state_memory.clear(); + for(uint64_t prototype = 0; prototype < this->pomdp.getNumberOfStates(); prototype++) { + auto obs = this->pomdp.getObservation(prototype); + auto memory_size = this->observation_memory_size[obs]; + this->prototype_duplicates[prototype].clear(); + this->prototype_duplicates[prototype].reserve(memory_size); + for(uint64_t memory = 0; memory < memory_size; memory++) { + this->prototype_duplicates[prototype].push_back(this->num_states); + this->state_prototype.push_back(prototype); + this->state_memory.push_back(memory); + this->num_states++; } + } + } - - template - void PomdpManager::buildTransitionMatrixSpurious() { - // for each observation, define the maximum successor memory size - // this will define the number of copies we need to make of each row - for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { - uint64_t max_mem_size = 0; - for(auto dst_obs: this->observation_successors[obs]) { - if(max_mem_size < this->observation_memory_size[dst_obs]) { - max_mem_size = this->observation_memory_size[dst_obs]; - } - } - this->max_successor_memory_size[obs] = max_mem_size; - } - this->row_groups.resize(this->num_states+1); - this->row_prototype.clear(); - this->row_memory.clear(); - - // TODO can simplify this: state (s,x) will have the same rows as state (s,0) - for(uint64_t state = 0; state < this->num_states; state++) { - this->row_groups[state] = this->row_prototype.size(); - auto prototype_state = this->state_prototype[state]; - auto observation = this->pomdp.getObservation(prototype_state); - auto const& row_group_indices = this->pomdp.getTransitionMatrix().getRowGroupIndices(); - for ( - uint64_t prototype_row = row_group_indices[prototype_state]; - prototype_row < row_group_indices[prototype_state + 1]; - prototype_row++ - ) { - // create the required number of copies of this row - // each transition will be associated with its own memory update - for(uint64_t dst_mem = 0; dst_mem < max_successor_memory_size[observation]; dst_mem++) { - this->row_prototype.push_back(prototype_row); - this->row_memory.push_back(dst_mem); - } - } + template + uint64_t PomdpManager::translateState(uint64_t prototype, uint64_t memory) { + if(memory >= this->prototype_duplicates[prototype].size()) { + memory = 0; + } + return this->prototype_duplicates[prototype][memory]; + } + + + template + void PomdpManager::buildTransitionMatrixSpurious() { + // for each observation, define the maximum successor memory size + // this will define the number of copies we need to make of each row + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + uint64_t max_mem_size = 0; + for(auto dst_obs: this->observation_successors[obs]) { + if(max_mem_size < this->observation_memory_size[dst_obs]) { + max_mem_size = this->observation_memory_size[dst_obs]; } - this->num_rows = this->row_prototype.size(); - this->row_groups[this->num_states] = this->num_rows; } + this->max_successor_memory_size[obs] = max_mem_size; + } + + this->row_groups.resize(this->num_states+1); + this->row_prototype.clear(); + this->row_memory.clear(); + + // TODO can simplify this: state (s,x) will have the same rows as state (s,0) + for(uint64_t state = 0; state < this->num_states; state++) { + this->row_groups[state] = this->row_prototype.size(); + auto prototype_state = this->state_prototype[state]; + auto observation = this->pomdp.getObservation(prototype_state); + auto const& row_group_indices = this->pomdp.getTransitionMatrix().getRowGroupIndices(); + for ( + uint64_t prototype_row = row_group_indices[prototype_state]; + prototype_row < row_group_indices[prototype_state + 1]; + prototype_row++ + ) { + // create the required number of copies of this row + // each transition will be associated with its own memory update + for(uint64_t dst_mem = 0; dst_mem < max_successor_memory_size[observation]; dst_mem++) { + this->row_prototype.push_back(prototype_row); + this->row_memory.push_back(dst_mem); + } + } + } + this->num_rows = this->row_prototype.size(); + this->row_groups[this->num_states] = this->num_rows; + } - template - void PomdpManager::resetDesignSpace() { - auto num_observations = this->pomdp.getNrObservations(); - this->num_holes = 0; - this->action_holes.clear(); - this->action_holes.resize(num_observations); - this->memory_holes.clear(); - this->memory_holes.resize(num_observations); - this->hole_options.clear(); - - this->row_action_hole.clear(); - this->row_action_hole.resize(this->num_rows); - this->row_action_option.clear(); - this->row_action_option.resize(this->num_rows); - this->row_memory_hole.clear(); - this->row_memory_hole.resize(this->num_rows); - this->row_memory_option.clear(); - this->row_memory_option.resize(this->num_rows); - } + template + void PomdpManager::resetDesignSpace() { + auto num_observations = this->pomdp.getNrObservations(); + this->num_holes = 0; + this->action_holes.clear(); + this->action_holes.resize(num_observations); + this->memory_holes.clear(); + this->memory_holes.resize(num_observations); + this->hole_options.clear(); + + this->row_action_hole.clear(); + this->row_action_hole.resize(this->num_rows); + this->row_action_option.clear(); + this->row_action_option.resize(this->num_rows); + this->row_memory_hole.clear(); + this->row_memory_hole.resize(this->num_rows); + this->row_memory_option.clear(); + this->row_memory_option.resize(this->num_rows); + } - template - void PomdpManager::buildDesignSpaceSpurious() { - this->resetDesignSpace(); - - // for each (z,n) create an action and a memory hole (if necessary) - // store hole range - // ? inverse mapping ? - for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { - if(this->observation_actions[obs] > 1) { - for(uint64_t mem = 0; mem < this->observation_memory_size[obs]; mem++) { - this->action_holes[obs].push_back(this->num_holes); - this->hole_options.push_back(this->observation_actions[obs]); - // std::cout << "created A(" << obs << "," << mem << ") = " << this->num_holes << " in {} of size " << this->observation_actions[obs] << std::endl; - this->num_holes++; - } - } - if(this->max_successor_memory_size[obs] > 1) { - for(uint64_t mem = 0; mem < this->observation_memory_size[obs]; mem++) { - this->memory_holes[obs].push_back(this->num_holes); - this->hole_options.push_back(this->max_successor_memory_size[obs]); - // std::cout << "created N(" << obs << "," << mem << ") = " << this->num_holes << " in {} of size " << this->max_successor_memory_size[obs] << std::endl; - this->num_holes++; - } - } + template + void PomdpManager::buildDesignSpaceSpurious() { + this->resetDesignSpace(); + + // for each (z,n) create an action and a memory hole (if necessary) + // store hole range + // ? inverse mapping ? + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + if(this->observation_actions[obs] > 1) { + for(uint64_t mem = 0; mem < this->observation_memory_size[obs]; mem++) { + this->action_holes[obs].push_back(this->num_holes); + this->hole_options.push_back(this->observation_actions[obs]); + // std::cout << "created A(" << obs << "," << mem << ") = " << this->num_holes << " in {} of size " << this->observation_actions[obs] << std::endl; + this->num_holes++; } - - // map each row to some action (memory) hole (if applicable) and its value - for(uint64_t state = 0; state < this->num_states; state++) { - auto prototype = this->state_prototype[state]; - auto obs = this->pomdp.getObservation(prototype); - auto mem = this->state_memory[state]; - for (uint64_t row = this->row_groups[state]; row < this->row_groups[state+1]; row++) { - auto prototype_row = this->row_prototype[row]; - auto row_index = this->prototype_row_index[prototype_row]; - auto row_mem = this->row_memory[row]; - if(this->observation_actions[obs] > 1) { - // there is an action hole that corresponds to this state - auto action_hole = this->action_holes[obs][mem]; - this->row_action_hole[row] = action_hole; - this->row_action_option[row] = row_index; - } else { - // no corresponding action hole - this->row_action_hole[row] = this->num_holes; - } - if(this->max_successor_memory_size[obs] > 1) { - // there is a memory hole that corresponds to this state - auto memory_hole = this->memory_holes[obs][mem]; - this->row_memory_hole[row] = memory_hole; - this->row_memory_option[row] = row_mem; - } else { - this->row_memory_hole[row] = this->num_holes; - } - // std::cout << "row " << row << ": A[" << row_action_hole[row] << "]=" << row_action_option[row] << ", N[" << row_memory_hole[row] << "]=" << row_memory_option[row] << std::endl; - } + } + if(this->max_successor_memory_size[obs] > 1) { + for(uint64_t mem = 0; mem < this->observation_memory_size[obs]; mem++) { + this->memory_holes[obs].push_back(this->num_holes); + this->hole_options.push_back(this->max_successor_memory_size[obs]); + // std::cout << "created N(" << obs << "," << mem << ") = " << this->num_holes << " in {} of size " << this->max_successor_memory_size[obs] << std::endl; + this->num_holes++; + } + } + } + + // map each row to some action (memory) hole (if applicable) and its value + for(uint64_t state = 0; state < this->num_states; state++) { + auto prototype = this->state_prototype[state]; + auto obs = this->pomdp.getObservation(prototype); + auto mem = this->state_memory[state]; + for (uint64_t row = this->row_groups[state]; row < this->row_groups[state+1]; row++) { + auto prototype_row = this->row_prototype[row]; + auto row_index = this->prototype_row_index[prototype_row]; + auto row_mem = this->row_memory[row]; + if(this->observation_actions[obs] > 1) { + // there is an action hole that corresponds to this state + auto action_hole = this->action_holes[obs][mem]; + this->row_action_hole[row] = action_hole; + this->row_action_option[row] = row_index; + } else { + // no corresponding action hole + this->row_action_hole[row] = this->num_holes; } - } + if(this->max_successor_memory_size[obs] > 1) { + // there is a memory hole that corresponds to this state + auto memory_hole = this->memory_holes[obs][mem]; + this->row_memory_hole[row] = memory_hole; + this->row_memory_option[row] = row_mem; + } else { + this->row_memory_hole[row] = this->num_holes; + } + // std::cout << "row " << row << ": A[" << row_action_hole[row] << "]=" << row_action_option[row] << ", N[" << row_memory_hole[row] << "]=" << row_memory_option[row] << std::endl; + } + } + } + + + template + std::shared_ptr> PomdpManager::constructMdp() { + this->buildStateSpace(); + this->buildTransitionMatrixSpurious(); + + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = this->constructTransitionMatrix(); + // TODO remove unreachable states + components.stateLabeling = this->constructStateLabeling(); + for (auto const& reward_model : this->pomdp.getRewardModels()) { + auto constructed = this->constructRewardModel(reward_model.second); + components.rewardModels.emplace(reward_model.first, constructed); + } + this->mdp = std::make_shared>(std::move(components)); + this->buildDesignSpaceSpurious(); + + return this->mdp; + } + template + storm::models::sparse::StateLabeling PomdpManager::constructStateLabeling() { + storm::models::sparse::StateLabeling labeling(this->num_states); + for (auto const& label : pomdp.getStateLabeling().getLabels()) { + storm::storage::BitVector label_flags(this->num_states, false); - template - std::shared_ptr> PomdpManager::constructMdp() { - this->buildStateSpace(); - this->buildTransitionMatrixSpurious(); - - storm::storage::sparse::ModelComponents components; - components.transitionMatrix = this->constructTransitionMatrix(); - // TODO remove unreachable states - components.stateLabeling = this->constructStateLabeling(); - for (auto const& reward_model : this->pomdp.getRewardModels()) { - auto constructed = this->constructRewardModel(reward_model.second); - components.rewardModels.emplace(reward_model.first, constructed); + if (label == "init") { + // init label is only assigned to states with the initial memory state + for (auto const& prototype : pomdp.getStateLabeling().getStates(label)) { + label_flags.set(translateState(prototype, 0)); } - this->mdp = std::make_shared>(std::move(components)); - this->buildDesignSpaceSpurious(); - - return this->mdp; - } - - template - storm::models::sparse::StateLabeling PomdpManager::constructStateLabeling() { - storm::models::sparse::StateLabeling labeling(this->num_states); - for (auto const& label : pomdp.getStateLabeling().getLabels()) { - storm::storage::BitVector label_flags(this->num_states, false); - - if (label == "init") { - // init label is only assigned to states with the initial memory state - for (auto const& prototype : pomdp.getStateLabeling().getStates(label)) { - label_flags.set(translateState(prototype, 0)); - } - } else { - for (auto const& prototype : pomdp.getStateLabeling().getStates(label)) { - for(auto duplicate: this->prototype_duplicates[prototype]) { - label_flags.set(duplicate); - } - } + } else { + for (auto const& prototype : pomdp.getStateLabeling().getStates(label)) { + for(auto duplicate: this->prototype_duplicates[prototype]) { + label_flags.set(duplicate); } - labeling.addLabel(label, std::move(label_flags)); } - return labeling; } + labeling.addLabel(label, std::move(label_flags)); + } + return labeling; + } - template - storm::storage::SparseMatrix PomdpManager::constructTransitionMatrix() { - storm::storage::SparseMatrixBuilder builder( - this->num_rows, this->num_states, 0, true, true, this->num_states - ); - for(uint64_t state = 0; state < this->num_states; state++) { - builder.newRowGroup(this->row_groups[state]); - for (uint64_t row = this->row_groups[state]; row < this->row_groups[state+1]; row++) { - auto prototype_row = this->row_prototype[row]; - auto dst_mem = this->row_memory[row]; - for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { - auto dst = this->translateState(entry.getColumn(),dst_mem); - builder.addNextValue(row, dst, entry.getValue()); - } - } + template + storm::storage::SparseMatrix PomdpManager::constructTransitionMatrix() { + storm::storage::SparseMatrixBuilder builder( + this->num_rows, this->num_states, 0, true, true, this->num_states + ); + for(uint64_t state = 0; state < this->num_states; state++) { + builder.newRowGroup(this->row_groups[state]); + for (uint64_t row = this->row_groups[state]; row < this->row_groups[state+1]; row++) { + auto prototype_row = this->row_prototype[row]; + auto dst_mem = this->row_memory[row]; + for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { + auto dst = this->translateState(entry.getColumn(),dst_mem); + builder.addNextValue(row, dst, entry.getValue()); } - - return builder.build(); } + } + return builder.build(); + } - template - storm::models::sparse::StandardRewardModel PomdpManager::constructRewardModel( - storm::models::sparse::StandardRewardModel const& reward_model - ) { - std::optional> state_rewards, action_rewards; - STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); - STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); - if(not reward_model.hasStateActionRewards()) { - STORM_LOG_WARN("Reward model exists but has no state-action value vector associated with it."); - return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); - } - action_rewards = std::vector(); - for(uint64_t row = 0; row < this->num_rows; row++) { - auto prototype = this->row_prototype[row]; - auto reward = reward_model.getStateActionReward(prototype); - action_rewards->push_back(reward); - } - return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); - } + template + storm::models::sparse::StandardRewardModel PomdpManager::constructRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model + ) { + std::optional> state_rewards, action_rewards; + STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); + STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); + if(not reward_model.hasStateActionRewards()) { + STORM_LOG_WARN("Reward model exists but has no state-action value vector associated with it."); + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } + + action_rewards = std::vector(); + for(uint64_t row = 0; row < this->num_rows; row++) { + auto prototype = this->row_prototype[row]; + auto reward = reward_model.getStateActionReward(prototype); + action_rewards->push_back(reward); + } + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } - template - void PomdpManager::setObservationMemorySize(uint64_t obs, uint64_t memory_size) { - assert(obs < this->pomdp.getNrObservations()); - this->observation_memory_size[obs] = memory_size; - } + template + void PomdpManager::setObservationMemorySize(uint64_t obs, uint64_t memory_size) { + assert(obs < this->pomdp.getNrObservations()); + this->observation_memory_size[obs] = memory_size; + } - template - void PomdpManager::setGlobalMemorySize(uint64_t memory_size) { - for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { - this->observation_memory_size[obs] = memory_size; - } - } + template + void PomdpManager::setGlobalMemorySize(uint64_t memory_size) { + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + this->observation_memory_size[obs] = memory_size; + } + } - template class PomdpManager; + template class PomdpManager; - } } \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/PomdpManager.h b/payntbind/src/synthesis/pomdp/PomdpManager.h index f9fb94a78..9ed59ad17 100644 --- a/payntbind/src/synthesis/pomdp/PomdpManager.h +++ b/payntbind/src/synthesis/pomdp/PomdpManager.h @@ -3,126 +3,124 @@ #include "storm/models/sparse/Mdp.h" #include "storm/models/sparse/Pomdp.h" -namespace storm { - namespace synthesis { - - template - class PomdpManager { - - public: - - PomdpManager(storm::models::sparse::Pomdp const& pomdp); - - // number of actions available at this observation - std::vector observation_actions; - // for each observation, a list of successor observations - std::vector> observation_successors; - - /** Memory manipulation . */ - - // for each observation contains the number of allocated memory states (initially 1) - std::vector observation_memory_size; - - // set memory size to a selected observation - void setObservationMemorySize(uint64_t obs, uint64_t memory_size); - // set memory size to all observations - void setGlobalMemorySize(uint64_t memory_size); - - // unfold memory model (a priori memory update) into the POMDP - std::shared_ptr> constructMdp(); - - /** Design space associated with this POMDP. */ - - // total number of holes - uint64_t num_holes; - // for each observation, a list of action holes - std::vector> action_holes; - // for each observation, a list of memory holes - std::vector> memory_holes; - // for each hole, its size - std::vector hole_options; - - /** Unfolded MDP stuff. */ - - // MDP obtained after last injection (initially contains MDP-ized POMDP) - std::shared_ptr> mdp; - - // for each state contains its prototype state (reverse of prototype_duplicates) - std::vector state_prototype; - // for each state contains its memory index - std::vector state_memory; - - // for each row, the corresponding action hole - std::vector row_action_hole; - // for each row, the corresponding option of the action hole - std::vector row_action_option; - // for each row, the corresponding memory hole - std::vector row_memory_hole; - // for each row, the corresponding option of the memory hole - std::vector row_memory_option; - - // for each observation contains the maximum memory size of a destination - // across all rows of a prototype state having this observation - std::vector max_successor_memory_size; - - - private: - - /** - * Build the state space: - * - compute total number of states (@num_states) - * - associate prototype states with their duplicates (@prototype_duplicates) - * - for each state, remember its prototype (@state_prototype) - * - for each state, remember its memory (@state_memory) - */ - void buildStateSpace(); - - /** - * Get index of the @memory equivalent of the @prototype. - * If the prototype does not have the corresponding memory - * equivalent, default to @memory=0. - */ - uint64_t translateState(uint64_t prototype, uint64_t memory); - - // compute max memory size among all destinations of a prototype row - uint64_t maxSuccessorMemorySize(uint64_t prototype_row); - - /** - * Build the shape of the transition matrix: - * - for each row store its prototype (@row_prototype) - * - for each row store its memory index (@row_memory) - * - deduce row groups of the resulting transition matrix (@row_groups) - * - deduce the overall number of rows (@num_rows) - */ - void buildTransitionMatrix(); - - void buildTransitionMatrixSpurious(); - - void resetDesignSpace(); - void buildDesignSpaceSpurious(); - - storm::models::sparse::StateLabeling constructStateLabeling(); - storm::storage::SparseMatrix constructTransitionMatrix(); - storm::models::sparse::StandardRewardModel constructRewardModel(storm::models::sparse::StandardRewardModel const& reward_model); - - // original POMDP - storm::models::sparse::Pomdp const& pomdp; - // for each row of a POMDP contains its index in its row group - std::vector prototype_row_index; - - // number of states in an unfolded MDP - uint64_t num_states; - // for each prototype state contains a list of its duplicates (including itself) - std::vector> prototype_duplicates; - - // number of rows in an unfolded MDP - uint64_t num_rows; - // row groups of the resulting transition matrix - std::vector row_groups; - // for each row contains index of the prototype row - std::vector row_prototype; - // for each row contains a memory update associated with it - std::vector row_memory; - }; - } +namespace synthesis { + + template + class PomdpManager { + + public: + + PomdpManager(storm::models::sparse::Pomdp const& pomdp); + + // number of actions available at this observation + std::vector observation_actions; + // for each observation, a list of successor observations + std::vector> observation_successors; + + /** Memory manipulation . */ + + // for each observation contains the number of allocated memory states (initially 1) + std::vector observation_memory_size; + + // set memory size to a selected observation + void setObservationMemorySize(uint64_t obs, uint64_t memory_size); + // set memory size to all observations + void setGlobalMemorySize(uint64_t memory_size); + + // unfold memory model (a priori memory update) into the POMDP + std::shared_ptr> constructMdp(); + + /** Design space associated with this POMDP. */ + + // total number of holes + uint64_t num_holes; + // for each observation, a list of action holes + std::vector> action_holes; + // for each observation, a list of memory holes + std::vector> memory_holes; + // for each hole, its size + std::vector hole_options; + + /** Unfolded MDP stuff. */ + + // MDP obtained after last injection (initially contains MDP-ized POMDP) + std::shared_ptr> mdp; + + // for each state contains its prototype state (reverse of prototype_duplicates) + std::vector state_prototype; + // for each state contains its memory index + std::vector state_memory; + + // for each row, the corresponding action hole + std::vector row_action_hole; + // for each row, the corresponding option of the action hole + std::vector row_action_option; + // for each row, the corresponding memory hole + std::vector row_memory_hole; + // for each row, the corresponding option of the memory hole + std::vector row_memory_option; + + // for each observation contains the maximum memory size of a destination + // across all rows of a prototype state having this observation + std::vector max_successor_memory_size; + + + private: + + /** + * Build the state space: + * - compute total number of states (@num_states) + * - associate prototype states with their duplicates (@prototype_duplicates) + * - for each state, remember its prototype (@state_prototype) + * - for each state, remember its memory (@state_memory) + */ + void buildStateSpace(); + + /** + * Get index of the @memory equivalent of the @prototype. + * If the prototype does not have the corresponding memory + * equivalent, default to @memory=0. + */ + uint64_t translateState(uint64_t prototype, uint64_t memory); + + // compute max memory size among all destinations of a prototype row + uint64_t maxSuccessorMemorySize(uint64_t prototype_row); + + /** + * Build the shape of the transition matrix: + * - for each row store its prototype (@row_prototype) + * - for each row store its memory index (@row_memory) + * - deduce row groups of the resulting transition matrix (@row_groups) + * - deduce the overall number of rows (@num_rows) + */ + void buildTransitionMatrix(); + + void buildTransitionMatrixSpurious(); + + void resetDesignSpace(); + void buildDesignSpaceSpurious(); + + storm::models::sparse::StateLabeling constructStateLabeling(); + storm::storage::SparseMatrix constructTransitionMatrix(); + storm::models::sparse::StandardRewardModel constructRewardModel(storm::models::sparse::StandardRewardModel const& reward_model); + + // original POMDP + storm::models::sparse::Pomdp const& pomdp; + // for each row of a POMDP contains its index in its row group + std::vector prototype_row_index; + + // number of states in an unfolded MDP + uint64_t num_states; + // for each prototype state contains a list of its duplicates (including itself) + std::vector> prototype_duplicates; + + // number of rows in an unfolded MDP + uint64_t num_rows; + // row groups of the resulting transition matrix + std::vector row_groups; + // for each row contains index of the prototype row + std::vector row_prototype; + // for each row contains a memory update associated with it + std::vector row_memory; + }; } \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp index 2c22d93a6..f7af7da63 100644 --- a/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp +++ b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp @@ -6,258 +6,255 @@ #include "storm/storage/SparseMatrix.h" #include "storm/models/sparse/StandardRewardModel.h" -namespace storm { - namespace synthesis { - - - template - PomdpManagerAposteriori::PomdpManagerAposteriori(storm::models::sparse::Pomdp const& pomdp) - : pomdp(pomdp) { - - auto num_observations = this->pomdp.getNrObservations(); - auto const& tm = this->pomdp.getTransitionMatrix(); - auto const& row_group_indices = tm.getRowGroupIndices(); - - this->observation_actions.resize(num_observations,0); - for(uint64_t state = 0; state < this->pomdp.getNumberOfStates(); state++) { - auto obs = this->pomdp.getObservation(state); - if(this->observation_actions[obs] == 0) { - this->observation_actions[obs] = tm.getRowGroupSize(state); - } - } - - this->row_posteriors.resize(tm.getRowCount()); - this->observation_posteriors.resize(num_observations); - for(uint64_t state = 0; state < this->pomdp.getNumberOfStates(); state++) { - auto prior = this->pomdp.getObservation(state); - for(auto row = row_group_indices[state]; row < row_group_indices[state+1]; row++) { - std::set posteriors; - for(auto const &entry: tm.getRow(row)) { - auto successor_state = entry.getColumn(); - auto posterior = this->pomdp.getObservation(successor_state); - posteriors.insert(posterior); - this->observation_posteriors[prior].insert(posterior); - } - this->row_posteriors[row] = std::vector(posteriors.begin(),posteriors.end()); - } - } - - this->observation_memory_size.resize(num_observations, 1); +namespace synthesis { + + template + PomdpManagerAposteriori::PomdpManagerAposteriori(storm::models::sparse::Pomdp const& pomdp) + : pomdp(pomdp) { + + auto num_observations = this->pomdp.getNrObservations(); + auto const& tm = this->pomdp.getTransitionMatrix(); + auto const& row_group_indices = tm.getRowGroupIndices(); + + this->observation_actions.resize(num_observations,0); + for(uint64_t state = 0; state < this->pomdp.getNumberOfStates(); state++) { + auto obs = this->pomdp.getObservation(state); + if(this->observation_actions[obs] == 0) { + this->observation_actions[obs] = tm.getRowGroupSize(state); } - - - template - void PomdpManagerAposteriori::setObservationMemorySize(uint64_t obs, uint64_t memory_size) { - assert(obs < this->pomdp.getNrObservations()); - this->observation_memory_size[obs] = memory_size; - } - - template - void PomdpManagerAposteriori::setGlobalMemorySize(uint64_t memory_size) { - for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { - this->observation_memory_size[obs] = memory_size; + } + + this->row_posteriors.resize(tm.getRowCount()); + this->observation_posteriors.resize(num_observations); + for(uint64_t state = 0; state < this->pomdp.getNumberOfStates(); state++) { + auto prior = this->pomdp.getObservation(state); + for(auto row = row_group_indices[state]; row < row_group_indices[state+1]; row++) { + std::set posteriors; + for(auto const &entry: tm.getRow(row)) { + auto successor_state = entry.getColumn(); + auto posterior = this->pomdp.getObservation(successor_state); + posteriors.insert(posterior); + this->observation_posteriors[prior].insert(posterior); } + this->row_posteriors[row] = std::vector(posteriors.begin(),posteriors.end()); } + } + this->observation_memory_size.resize(num_observations, 1); + } + + + template + void PomdpManagerAposteriori::setObservationMemorySize(uint64_t obs, uint64_t memory_size) { + assert(obs < this->pomdp.getNrObservations()); + this->observation_memory_size[obs] = memory_size; + } + template + void PomdpManagerAposteriori::setGlobalMemorySize(uint64_t memory_size) { + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + this->observation_memory_size[obs] = memory_size; + } + } - template - void PomdpManagerAposteriori::clear_before_unfolding() { - - this->num_states = 0; - this->prototype_state_copies.clear(); - this->state_prototype.clear(); - this->state_memory.clear(); - this->action_holes.clear(); - this->update_holes.clear(); - this->hole_num_options.clear(); - this->row_prototype.clear(); - this->coloring.clear(); - } + template + void PomdpManagerAposteriori::clear_before_unfolding() { + + this->num_states = 0; + this->prototype_state_copies.clear(); + this->state_prototype.clear(); + this->state_memory.clear(); - template - void PomdpManagerAposteriori::clear_after_unfolding() { - - this->num_states = 0; - this->prototype_state_copies.clear(); + this->action_holes.clear(); + this->update_holes.clear(); + this->hole_num_options.clear(); - this->row_prototype.clear(); - } + this->row_prototype.clear(); + this->coloring.clear(); + } + template + void PomdpManagerAposteriori::clear_after_unfolding() { + + this->num_states = 0; + this->prototype_state_copies.clear(); - template - void PomdpManagerAposteriori::buildStateSpace() { - this->prototype_state_copies.resize(this->pomdp.getNumberOfStates()); - for(uint64_t prototype = 0; prototype < this->pomdp.getNumberOfStates(); prototype++) { - auto obs = this->pomdp.getObservation(prototype); - auto memory_size = this->observation_memory_size[obs]; - this->prototype_state_copies[prototype].resize(memory_size); - for(uint64_t memory = 0; memory < memory_size; memory++) { - this->prototype_state_copies[prototype][memory] = this->num_states++; - this->state_prototype.push_back(prototype); - this->state_memory.push_back(memory); - } - } - } + this->row_prototype.clear(); + } - template - void PomdpManagerAposteriori::buildDesignSpace() { - uint64_t num_holes = 0; - for(uint64_t prior = 0; prior < this->pomdp.getNrObservations(); prior++) { - auto num_actions = this->observation_actions[prior]; - for(uint64_t mem = 0; mem < this->observation_memory_size[prior]; mem++) { - // gamma(n,z) in Act - this->action_holes[std::make_pair(mem,prior)] = num_holes++; - this->hole_num_options.push_back(num_actions); - - for(auto posterior: this->observation_posteriors[prior]) { - // delta(n,z,z') in mu(z') - auto num_updates = this->observation_memory_size[posterior]; - this->update_holes[std::make_tuple(mem,prior,posterior)] = num_holes++; - this->hole_num_options.push_back(num_updates); - } - } - } + template + void PomdpManagerAposteriori::buildStateSpace() { + this->prototype_state_copies.resize(this->pomdp.getNumberOfStates()); + for(uint64_t prototype = 0; prototype < this->pomdp.getNumberOfStates(); prototype++) { + auto obs = this->pomdp.getObservation(prototype); + auto memory_size = this->observation_memory_size[obs]; + this->prototype_state_copies[prototype].resize(memory_size); + for(uint64_t memory = 0; memory < memory_size; memory++) { + this->prototype_state_copies[prototype][memory] = this->num_states++; + this->state_prototype.push_back(prototype); + this->state_memory.push_back(memory); } + } + } - template - storm::models::sparse::StateLabeling PomdpManagerAposteriori::constructStateLabeling() { - storm::models::sparse::StateLabeling labeling(this->num_states); - for (auto const& label : this->pomdp.getStateLabeling().getLabels()) { - storm::storage::BitVector label_flags(this->num_states, false); - for (auto const& prototype : this->pomdp.getStateLabeling().getStates(label)) { - for(auto state: this->prototype_state_copies[prototype]) { - label_flags.set(state); - if (label == "init") { - break; - } - } - } - labeling.addLabel(label, std::move(label_flags)); + template + void PomdpManagerAposteriori::buildDesignSpace() { + uint64_t num_holes = 0; + for(uint64_t prior = 0; prior < this->pomdp.getNrObservations(); prior++) { + auto num_actions = this->observation_actions[prior]; + for(uint64_t mem = 0; mem < this->observation_memory_size[prior]; mem++) { + // gamma(n,z) in Act + this->action_holes[std::make_pair(mem,prior)] = num_holes++; + this->hole_num_options.push_back(num_actions); + + for(auto posterior: this->observation_posteriors[prior]) { + // delta(n,z,z') in mu(z') + auto num_updates = this->observation_memory_size[posterior]; + this->update_holes[std::make_tuple(mem,prior,posterior)] = num_holes++; + this->hole_num_options.push_back(num_updates); } - return labeling; } + } + } - template - void PomdpManagerAposteriori::unfoldRow( - storm::storage::SparseMatrixBuilder & builder, - uint64_t pomdp_state, uint64_t memory, uint64_t action - ) { - auto prior = this->pomdp.getObservation(pomdp_state); - auto prototype_row = this->pomdp.getTransitionMatrix().getRowGroupIndices()[pomdp_state] + action; - auto const& posteriors = this->row_posteriors[prototype_row]; - auto action_hole = this->action_holes[std::make_pair(memory,prior)]; - - // iterate over all combinations of memory updates - uint64_t num_combinations = 1; - for(auto posterior: posteriors) { - num_combinations *= this->observation_memory_size[posterior]; - } - std::map combination; - for(uint64_t c=0 ; c=0; i--) { - auto posterior = posteriors[i]; - auto posterior_size = this->observation_memory_size[posterior]; - combination[posterior] = index % posterior_size; - index = index / posterior_size; - } - - // add row - for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { - auto successor_pomdp_state = entry.getColumn(); - auto posterior = this->pomdp.getObservation(successor_pomdp_state); - auto successor_memory = combination[posterior]; - auto successor_state = this->prototype_state_copies[successor_pomdp_state][successor_memory]; - builder.addNextValue(this->num_unfolded_rows(),successor_state,entry.getValue()); - } - // add row coloring - std::map coloring; - coloring[action_hole] = action; - for(uint64_t index = 0; index < posteriors.size(); index++) { - auto posterior = posteriors[index]; - auto update_hole = this->update_holes[std::make_tuple(memory,prior,posterior)]; - coloring[update_hole] = combination[posterior]; + template + storm::models::sparse::StateLabeling PomdpManagerAposteriori::constructStateLabeling() { + storm::models::sparse::StateLabeling labeling(this->num_states); + for (auto const& label : this->pomdp.getStateLabeling().getLabels()) { + storm::storage::BitVector label_flags(this->num_states, false); + for (auto const& prototype : this->pomdp.getStateLabeling().getStates(label)) { + for(auto state: this->prototype_state_copies[prototype]) { + label_flags.set(state); + if (label == "init") { + break; } - this->coloring.push_back(std::move(coloring)); - - // register prototype - this->row_prototype.push_back(prototype_row); } } + labeling.addLabel(label, std::move(label_flags)); + } + return labeling; + } + template + void PomdpManagerAposteriori::unfoldRow( + storm::storage::SparseMatrixBuilder & builder, + uint64_t pomdp_state, uint64_t memory, uint64_t action + ) { + auto prior = this->pomdp.getObservation(pomdp_state); + auto prototype_row = this->pomdp.getTransitionMatrix().getRowGroupIndices()[pomdp_state] + action; + auto const& posteriors = this->row_posteriors[prototype_row]; + auto action_hole = this->action_holes[std::make_pair(memory,prior)]; + + // iterate over all combinations of memory updates + uint64_t num_combinations = 1; + for(auto posterior: posteriors) { + num_combinations *= this->observation_memory_size[posterior]; + } + std::map combination; + for(uint64_t c=0 ; c=0; i--) { + auto posterior = posteriors[i]; + auto posterior_size = this->observation_memory_size[posterior]; + combination[posterior] = index % posterior_size; + index = index / posterior_size; + } - template - storm::storage::SparseMatrix PomdpManagerAposteriori::constructTransitionMatrix() { + // add row + for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { + auto successor_pomdp_state = entry.getColumn(); + auto posterior = this->pomdp.getObservation(successor_pomdp_state); + auto successor_memory = combination[posterior]; + auto successor_state = this->prototype_state_copies[successor_pomdp_state][successor_memory]; + builder.addNextValue(this->num_unfolded_rows(),successor_state,entry.getValue()); + } - storm::storage::SparseMatrixBuilder builder( - 0, this->num_states, 0, true, true, this->num_states - ); - for(uint64_t pomdp_state = 0; pomdp_state < this->pomdp.getNumberOfStates(); pomdp_state++) { - auto prior = this->pomdp.getObservation(pomdp_state); - for(uint64_t memory = 0; memory < this->observation_memory_size[prior]; memory++) { - builder.newRowGroup(this->num_unfolded_rows()); - for(uint64_t action = 0; action < this->pomdp.getTransitionMatrix().getRowGroupSize(pomdp_state); action++) { - this->unfoldRow(builder,pomdp_state,memory,action); - } - } - } - return builder.build(); + // add row coloring + std::map coloring; + coloring[action_hole] = action; + for(uint64_t index = 0; index < posteriors.size(); index++) { + auto posterior = posteriors[index]; + auto update_hole = this->update_holes[std::make_tuple(memory,prior,posterior)]; + coloring[update_hole] = combination[posterior]; } + this->coloring.push_back(std::move(coloring)); + // register prototype + this->row_prototype.push_back(prototype_row); + } + } - template - storm::models::sparse::StandardRewardModel PomdpManagerAposteriori::constructRewardModel( - storm::models::sparse::StandardRewardModel const& reward_model - ) { - std::optional> state_rewards, action_rewards; - STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); - STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); - if(not reward_model.hasStateActionRewards()) { - STORM_LOG_WARN("Reward model exists but has no state-action value vector associated with it."); - return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); - } - action_rewards = std::vector(); - for(uint64_t row = 0; row < this->row_prototype.size(); row++) { - auto prototype = this->row_prototype[row]; - auto reward = reward_model.getStateActionReward(prototype); - action_rewards->push_back(reward); + template + storm::storage::SparseMatrix PomdpManagerAposteriori::constructTransitionMatrix() { + + storm::storage::SparseMatrixBuilder builder( + 0, this->num_states, 0, true, true, this->num_states + ); + for(uint64_t pomdp_state = 0; pomdp_state < this->pomdp.getNumberOfStates(); pomdp_state++) { + auto prior = this->pomdp.getObservation(pomdp_state); + for(uint64_t memory = 0; memory < this->observation_memory_size[prior]; memory++) { + builder.newRowGroup(this->num_unfolded_rows()); + for(uint64_t action = 0; action < this->pomdp.getTransitionMatrix().getRowGroupSize(pomdp_state); action++) { + this->unfoldRow(builder,pomdp_state,memory,action); } - return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); } - - template - std::shared_ptr> PomdpManagerAposteriori::constructMdp() { - - this->clear_before_unfolding(); - - this->buildStateSpace(); - this->buildDesignSpace(); - - storm::storage::sparse::ModelComponents components; - components.transitionMatrix = this->constructTransitionMatrix(); - assert(components.transitionMatrix.isProbabilistic()); - components.stateLabeling = this->constructStateLabeling(); - for (auto const& reward_model : this->pomdp.getRewardModels()) { - auto constructed = this->constructRewardModel(reward_model.second); - components.rewardModels.emplace(reward_model.first, constructed); - } - - this->mdp = std::make_shared>(std::move(components)); - - this->clear_after_unfolding(); + } + return builder.build(); + } - return this->mdp; - } + template + storm::models::sparse::StandardRewardModel PomdpManagerAposteriori::constructRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model + ) { + std::optional> state_rewards, action_rewards; + STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); + STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); + if(not reward_model.hasStateActionRewards()) { + STORM_LOG_WARN("Reward model exists but has no state-action value vector associated with it."); + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } + + action_rewards = std::vector(); + for(uint64_t row = 0; row < this->row_prototype.size(); row++) { + auto prototype = this->row_prototype[row]; + auto reward = reward_model.getStateActionReward(prototype); + action_rewards->push_back(reward); + } + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } + + template + std::shared_ptr> PomdpManagerAposteriori::constructMdp() { + + this->clear_before_unfolding(); + + this->buildStateSpace(); + this->buildDesignSpace(); + + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = this->constructTransitionMatrix(); + assert(components.transitionMatrix.isProbabilistic()); + components.stateLabeling = this->constructStateLabeling(); + for (auto const& reward_model : this->pomdp.getRewardModels()) { + auto constructed = this->constructRewardModel(reward_model.second); + components.rewardModels.emplace(reward_model.first, constructed); + } + + this->mdp = std::make_shared>(std::move(components)); + + this->clear_after_unfolding(); + + return this->mdp; - template class PomdpManagerAposteriori; } + + template class PomdpManagerAposteriori; } \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h index 94811f593..2c624201f 100644 --- a/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h +++ b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h @@ -6,91 +6,88 @@ #include -namespace storm { - namespace synthesis { +namespace synthesis { + + template + class PomdpManagerAposteriori { + + public: + + PomdpManagerAposteriori(storm::models::sparse::Pomdp const& pomdp); + + // unfold memory model (a aposteriori memory update) into the POMDP + std::shared_ptr> constructMdp(); + + // for each observation contains the number of allocated memory states (initially 1) + std::vector observation_memory_size; + // set memory size to a selected observation + void setObservationMemorySize(uint64_t obs, uint64_t memory_size); + // set memory size to all observations + void setGlobalMemorySize(uint64_t memory_size); + + // MDP obtained after last unfolding + std::shared_ptr> mdp; + // for each state contains its prototype state (reverse of prototype_duplicates) + std::vector state_prototype; + // for each state contains its memory index + std::vector state_memory; + + // for each unfolded row, its coloring + std::vector> coloring; + // for each hole, the number of its options + std::vector hole_num_options; + // hole identifier for each (memory,prior) combination + std::map,uint64_t> action_holes; + // hole identifier for each (memory,prior,posterior) combination + std::map,uint64_t> update_holes; + + private: + + // original POMDP + storm::models::sparse::Pomdp const& pomdp; + + // for each observation, number of available actions + std::vector observation_actions; + // for each POMDP row, a set of successor observations + std::vector> row_posteriors; + // for each observation, a set of successor observations + std::vector> observation_posteriors; + + // clear auxiliary data structures before unfolding + void clear_before_unfolding(); + // clear auxiliary data structures after unfolding + void clear_after_unfolding(); + + // current number of unfolded states + uint64_t num_states; + // for each POMDP state, a list of identifiers of unfolded states + std::vector> prototype_state_copies; + // establish the state space: to each state s, create mu(O(s)) of its copies + void buildStateSpace(); + + // establish the design space: create action and update holes + void buildDesignSpace(); + + // for each unfolded row, its prototype + std::vector row_prototype; + // get current number of unfolded rows + uint64_t num_unfolded_rows() { return this->coloring.size(); } + + + // unfold a given state-action pair + void unfoldRow( + storm::storage::SparseMatrixBuilder & builder, + uint64_t pomdp_state, uint64_t memory, uint64_t action + ); + storm::storage::SparseMatrix constructTransitionMatrix(); + + // translate state labeling for the unfolded MDP + storm::models::sparse::StateLabeling constructStateLabeling(); + // translate reward models for the unfolded MDP + storm::models::sparse::StandardRewardModel constructRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model + ); - template - class PomdpManagerAposteriori { - - public: - - PomdpManagerAposteriori(storm::models::sparse::Pomdp const& pomdp); - - // unfold memory model (a aposteriori memory update) into the POMDP - std::shared_ptr> constructMdp(); - - // for each observation contains the number of allocated memory states (initially 1) - std::vector observation_memory_size; - // set memory size to a selected observation - void setObservationMemorySize(uint64_t obs, uint64_t memory_size); - // set memory size to all observations - void setGlobalMemorySize(uint64_t memory_size); - - // MDP obtained after last unfolding - std::shared_ptr> mdp; - // for each state contains its prototype state (reverse of prototype_duplicates) - std::vector state_prototype; - // for each state contains its memory index - std::vector state_memory; - - // for each unfolded row, its coloring - std::vector> coloring; - // for each hole, the number of its options - std::vector hole_num_options; - // hole identifier for each (memory,prior) combination - std::map,uint64_t> action_holes; - // hole identifier for each (memory,prior,posterior) combination - std::map,uint64_t> update_holes; - - private: - - // original POMDP - storm::models::sparse::Pomdp const& pomdp; - - // for each observation, number of available actions - std::vector observation_actions; - // for each POMDP row, a set of successor observations - std::vector> row_posteriors; - // for each observation, a set of successor observations - std::vector> observation_posteriors; - - // clear auxiliary data structures before unfolding - void clear_before_unfolding(); - // clear auxiliary data structures after unfolding - void clear_after_unfolding(); - - // current number of unfolded states - uint64_t num_states; - // for each POMDP state, a list of identifiers of unfolded states - std::vector> prototype_state_copies; - // establish the state space: to each state s, create mu(O(s)) of its copies - void buildStateSpace(); - - // establish the design space: create action and update holes - void buildDesignSpace(); - - // for each unfolded row, its prototype - std::vector row_prototype; - // get current number of unfolded rows - uint64_t num_unfolded_rows() { return this->coloring.size(); } - - - // unfold a given state-action pair - void unfoldRow( - storm::storage::SparseMatrixBuilder & builder, - uint64_t pomdp_state, uint64_t memory, uint64_t action - ); - storm::storage::SparseMatrix constructTransitionMatrix(); - - // translate state labeling for the unfolded MDP - storm::models::sparse::StateLabeling constructStateLabeling(); - // translate reward models for the unfolded MDP - storm::models::sparse::StandardRewardModel constructRewardModel( - storm::models::sparse::StandardRewardModel const& reward_model - ); - - - }; - } -} \ No newline at end of file + }; +} diff --git a/payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp b/payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp index d623315c0..f58629553 100644 --- a/payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp +++ b/payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp @@ -9,7 +9,6 @@ #include namespace synthesis { - SubPomdpBuilder::SubPomdpBuilder( storm::models::sparse::Pomdp const& pomdp, diff --git a/payntbind/src/synthesis/pomdp/bindings.cpp b/payntbind/src/synthesis/pomdp/bindings.cpp index 5b47ad073..b7ee4a45f 100644 --- a/payntbind/src/synthesis/pomdp/bindings.cpp +++ b/payntbind/src/synthesis/pomdp/bindings.cpp @@ -6,38 +6,38 @@ void bindings_pomdp(py::module& m) { - py::class_>(m, "PomdpManager", "POMDP manager") + py::class_>(m, "PomdpManager", "POMDP manager") .def(py::init const&>(), "Constructor.", py::arg("pomdp")) - .def("set_observation_memory_size", &storm::synthesis::PomdpManager::setObservationMemorySize, "Set memory size to a selected observation.", py::arg("observation"), py::arg("memory_size")) - .def("set_global_memory_size", &storm::synthesis::PomdpManager::setGlobalMemorySize, "Set memory size to all observations.", py::arg("memory_size")) - .def("construct_mdp", &storm::synthesis::PomdpManager::constructMdp, "Unfold memory model (a priori memory update) into the POMDP.") - .def_property_readonly("state_prototype", [](storm::synthesis::PomdpManager& manager) {return manager.state_prototype;}) - .def_property_readonly("state_memory", [](storm::synthesis::PomdpManager& manager) {return manager.state_memory;}) - .def_property_readonly("observation_memory_size", [](storm::synthesis::PomdpManager& manager) {return manager.observation_memory_size;}) - .def_property_readonly("observation_actions", [](storm::synthesis::PomdpManager& manager) {return manager.observation_actions;}) - .def_property_readonly("observation_successors", [](storm::synthesis::PomdpManager& manager) {return manager.observation_successors;}) - .def_property_readonly("max_successor_memory_size", [](storm::synthesis::PomdpManager& manager) {return manager.max_successor_memory_size;}) - .def_property_readonly("num_holes", [](storm::synthesis::PomdpManager& manager) {return manager.num_holes;}) - .def_property_readonly("action_holes", [](storm::synthesis::PomdpManager& manager) {return manager.action_holes;}) - .def_property_readonly("memory_holes", [](storm::synthesis::PomdpManager& manager) {return manager.memory_holes;}) - .def_property_readonly("hole_options", [](storm::synthesis::PomdpManager& manager) {return manager.hole_options;}) - .def_property_readonly("row_action_hole", [](storm::synthesis::PomdpManager& manager) {return manager.row_action_hole;}) - .def_property_readonly("row_action_option", [](storm::synthesis::PomdpManager& manager) {return manager.row_action_option;}) - .def_property_readonly("row_memory_hole", [](storm::synthesis::PomdpManager& manager) {return manager.row_memory_hole;}) - .def_property_readonly("row_memory_option", [](storm::synthesis::PomdpManager& manager) {return manager.row_memory_option;}) + .def("set_observation_memory_size", &synthesis::PomdpManager::setObservationMemorySize, "Set memory size to a selected observation.", py::arg("observation"), py::arg("memory_size")) + .def("set_global_memory_size", &synthesis::PomdpManager::setGlobalMemorySize, "Set memory size to all observations.", py::arg("memory_size")) + .def("construct_mdp", &synthesis::PomdpManager::constructMdp, "Unfold memory model (a priori memory update) into the POMDP.") + .def_property_readonly("state_prototype", [](synthesis::PomdpManager& manager) {return manager.state_prototype;}) + .def_property_readonly("state_memory", [](synthesis::PomdpManager& manager) {return manager.state_memory;}) + .def_property_readonly("observation_memory_size", [](synthesis::PomdpManager& manager) {return manager.observation_memory_size;}) + .def_property_readonly("observation_actions", [](synthesis::PomdpManager& manager) {return manager.observation_actions;}) + .def_property_readonly("observation_successors", [](synthesis::PomdpManager& manager) {return manager.observation_successors;}) + .def_property_readonly("max_successor_memory_size", [](synthesis::PomdpManager& manager) {return manager.max_successor_memory_size;}) + .def_property_readonly("num_holes", [](synthesis::PomdpManager& manager) {return manager.num_holes;}) + .def_property_readonly("action_holes", [](synthesis::PomdpManager& manager) {return manager.action_holes;}) + .def_property_readonly("memory_holes", [](synthesis::PomdpManager& manager) {return manager.memory_holes;}) + .def_property_readonly("hole_options", [](synthesis::PomdpManager& manager) {return manager.hole_options;}) + .def_property_readonly("row_action_hole", [](synthesis::PomdpManager& manager) {return manager.row_action_hole;}) + .def_property_readonly("row_action_option", [](synthesis::PomdpManager& manager) {return manager.row_action_option;}) + .def_property_readonly("row_memory_hole", [](synthesis::PomdpManager& manager) {return manager.row_memory_hole;}) + .def_property_readonly("row_memory_option", [](synthesis::PomdpManager& manager) {return manager.row_memory_option;}) ; - py::class_>(m, "PomdpManagerAposteriori", "POMDP manager (a posteriori)") + py::class_>(m, "PomdpManagerAposteriori", "POMDP manager (a posteriori)") .def(py::init const&>(), "Constructor.") - .def("set_observation_memory_size", &storm::synthesis::PomdpManagerAposteriori::setObservationMemorySize) - .def("set_global_memory_size", &storm::synthesis::PomdpManagerAposteriori::setGlobalMemorySize) - .def("construct_mdp", &storm::synthesis::PomdpManagerAposteriori::constructMdp) - .def_property_readonly("state_prototype", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.state_prototype;}) - .def_property_readonly("state_memory", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.state_memory;}) - .def_property_readonly("coloring", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.coloring;}) - .def_property_readonly("hole_num_options", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.hole_num_options;}) - .def_property_readonly("action_holes", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.action_holes;}) - .def_property_readonly("update_holes", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.update_holes;}) + .def("set_observation_memory_size", &synthesis::PomdpManagerAposteriori::setObservationMemorySize) + .def("set_global_memory_size", &synthesis::PomdpManagerAposteriori::setGlobalMemorySize) + .def("construct_mdp", &synthesis::PomdpManagerAposteriori::constructMdp) + .def_property_readonly("state_prototype", [](synthesis::PomdpManagerAposteriori& manager) {return manager.state_prototype;}) + .def_property_readonly("state_memory", [](synthesis::PomdpManagerAposteriori& manager) {return manager.state_memory;}) + .def_property_readonly("coloring", [](synthesis::PomdpManagerAposteriori& manager) {return manager.coloring;}) + .def_property_readonly("hole_num_options", [](synthesis::PomdpManagerAposteriori& manager) {return manager.hole_num_options;}) + .def_property_readonly("action_holes", [](synthesis::PomdpManagerAposteriori& manager) {return manager.action_holes;}) + .def_property_readonly("update_holes", [](synthesis::PomdpManagerAposteriori& manager) {return manager.update_holes;}) ; py::class_>(m, "SubPomdpBuilder")