From 5f13de7c91195a4ac290a98bb13577fc2d05bbe8 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 22 Nov 2024 15:49:58 +0100 Subject: [PATCH 1/2] SMG abstraction for families of MDPs --- paynt/cli.py | 10 +- paynt/parser/drn_parser.py | 2 +- paynt/quotient/mdp_family.py | 2 +- paynt/synthesizer/policy_tree.py | 75 +------ .../pomdp_family/GameAbstractionSolver.cpp | 209 ++++++++---------- .../pomdp_family/GameAbstractionSolver.h | 30 +-- .../pomdp_family/ObservationEvaluator.cpp | 3 +- .../synthesis/pomdp_family/SmgAbstraction.cpp | 136 ++++++++++++ .../synthesis/pomdp_family/SmgAbstraction.h | 45 ++++ .../src/synthesis/pomdp_family/bindings.cpp | 28 ++- payntbind/src/synthesis/posmg/Posmg.cpp | 193 ++++++++-------- payntbind/src/synthesis/posmg/Posmg.h | 52 ++--- .../src/synthesis/posmg/PosmgManager.cpp | 68 ++++-- payntbind/src/synthesis/posmg/PosmgManager.h | 11 +- payntbind/src/synthesis/posmg/bindings.cpp | 63 +++--- payntbind/src/synthesis/quotient/Family.cpp | 9 +- payntbind/src/synthesis/smg/bindings.cpp | 36 +-- payntbind/src/synthesis/smg/smgModelChecker.h | 36 +++ .../synthesis/translation/ItemKeyTranslator.h | 1 - .../translation/componentTranslations.cpp | 6 + 20 files changed, 576 insertions(+), 439 deletions(-) create mode 100644 payntbind/src/synthesis/pomdp_family/SmgAbstraction.cpp create mode 100644 payntbind/src/synthesis/pomdp_family/SmgAbstraction.h create mode 100644 payntbind/src/synthesis/smg/smgModelChecker.h diff --git a/paynt/cli.py b/paynt/cli.py index 219486142..f43234109 100644 --- a/paynt/cli.py +++ b/paynt/cli.py @@ -114,14 +114,8 @@ def setup_logger(log_path = None): @click.option("--export-synthesis", type=click.Path(), default=None, help="base filename to output synthesis result") -@click.option("--mdp-split-wrt-mdp", is_flag=True, default=False, - help="if set, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used") @click.option("--mdp-discard-unreachable-choices", is_flag=True, default=False, help="if set, unreachable choices will be discarded from the splitting scheduler") -@click.option("--mdp-use-randomized-abstraction", is_flag=True, default=False, - help="if set, randomized abstraction guess-and-verify will be used instead of game abstraction;" + - " MDP abstraction scheduler will be used for splitting" -) @click.option("--tree-depth", default=0, type=int, help="decision tree synthesis: tree depth") @@ -152,7 +146,7 @@ def paynt_run( storm_pomdp, iterative_storm, get_storm_result, storm_options, prune_storm, use_storm_cutoffs, unfold_strategy_storm, export_fsc_storm, export_fsc_paynt, export_synthesis, - mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction, + mdp_discard_unreachable_choices, tree_depth, tree_enumeration, tree_map_scheduler, add_dont_care_action, constraint_bound, ce_generator, @@ -176,9 +170,7 @@ def paynt_run( paynt.quotient.decpomdp.DecPomdpQuotient.initial_memory_size = fsc_memory_size paynt.quotient.posmg.PosmgQuotient.initial_memory_size = fsc_memory_size - paynt.synthesizer.policy_tree.SynthesizerPolicyTree.split_wrt_mdp_scheduler = mdp_split_wrt_mdp paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_choices = mdp_discard_unreachable_choices - paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_depth = tree_depth paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_enumeration = tree_enumeration diff --git a/paynt/parser/drn_parser.py b/paynt/parser/drn_parser.py index 4f42a4f92..49461258e 100644 --- a/paynt/parser/drn_parser.py +++ b/paynt/parser/drn_parser.py @@ -27,7 +27,7 @@ def parse_drn(cls, sketch_path): pomdp_path = sketch_path + '.tmp' state_player_indications = DrnParser.pomdp_from_posmg(sketch_path, pomdp_path) pomdp = DrnParser.read_drn(pomdp_path) - explicit_model = payntbind.synthesis.create_posmg(pomdp, state_player_indications) + explicit_model = payntbind.synthesis.posmgFromPomdp(pomdp, state_player_indications) os.remove(pomdp_path) else: explicit_model = DrnParser.read_drn(sketch_path) diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index 497921045..504c14fe9 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -186,7 +186,7 @@ def build_game_abstraction_solver(self, prop): target_label = prop.get_target_label() precision = paynt.verification.property.Property.model_checking_precision solver = payntbind.synthesis.GameAbstractionSolver( - self.quotient_mdp, len(self.action_labels), self.choice_to_action, target_label, precision + self.quotient_mdp, len(self.action_labels), self.choice_to_action, prop.formula, prop.maximizing, target_label, precision ) return solver diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index a249a2fee..84cc9bbf5 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -483,12 +483,8 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer): # if True, tree leaves will be double-checked after synthesis double_check_policy_tree_leaves = False - # if True, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used - split_wrt_mdp_scheduler = False # if True, unreachable choices will be discarded from the splitting scheduler discard_unreachable_choices = False - # if True, randomized abstraction guess-and-verify will be used instead of game abstraction - use_randomized_abstraction = False @property def method_name(self): @@ -532,9 +528,12 @@ def solve_singleton(self, family, prop): def solve_game_abstraction(self, family, prop, game_solver): # construct and solve the game abstraction # logger.debug("solving game abstraction...") - game_solver.solve(family.selected_choices, prop.maximizing, prop.minimizing) - self.stat.iteration_game(family.mdp.states) + + # game_solver.solve_sg(family.selected_choices) + game_solver.solve_smg(family.selected_choices) + game_value = game_solver.solution_value + self.stat.iteration_game(family.mdp.states) game_sat = prop.satisfies_threshold_within_precision(game_value) # logger.debug("game solved, value is {}".format(game_value)) game_policy = game_solver.solution_state_to_player1_action @@ -546,35 +545,6 @@ def solve_game_abstraction(self, family, prop, game_solver): game_policy = game_policy_fixed return game_policy,game_sat - def try_randomized_abstraction(self, family, prop): - # build randomized abstraction - choice_to_action = [] - for choice in range(family.mdp.model.nr_choices): - action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]] - choice_to_action.append(action) - state_action_choices = self.quotient.map_state_action_to_choices(family.mdp.model,self.quotient.num_actions,choice_to_action) - model,choice_to_action = payntbind.synthesis.randomize_action_variant(family.mdp.model, state_action_choices) - - # model check - result = Property.model_check(model, prop.formula) - self.stat.iteration(model) - value = result.at(model.initial_states[0]) - policy_sat = prop.satisfies_threshold(value) # does this value matter? - - # extract policy for the quotient - scheduler = result.scheduler - policy = self.quotient.empty_policy() - for state in range(model.nr_states): - state_choice = scheduler.get_choice(state).get_deterministic_choice() - choice = model.transition_matrix.get_row_group_start(state) + state_choice - action = choice_to_action[choice] - quotient_state = family.mdp.quotient_state_map[state] - policy[quotient_state] = action - - # apply policy and check if it is SAT for all MDPs in the family - policy_sat = self.verify_policy(family, prop, policy) - return policy,policy_sat - def state_to_choice_to_hole_selection(self, state_to_choice): if SynthesizerPolicyTree.discard_unreachable_choices: state_to_choice = self.quotient.discard_unreachable_choices(state_to_choice) @@ -588,18 +558,6 @@ def parse_game_scheduler(self, game_solver): state_values = game_solver.solution_state_values return scheduler_choices,hole_selection,state_values - def parse_mdp_scheduler(self, family, mdp_result): - state_to_choice = self.quotient.scheduler_to_state_to_choice( - family.mdp, mdp_result.result.scheduler, discard_unreachable_choices=False - ) - scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice) - state_values = [0] * self.quotient.quotient_mdp.nr_states - for state in range(family.mdp.states): - quotient_state = family.mdp.quotient_state_map[state] - state_values[quotient_state] = mdp_result.result.at(state) - return scheduler_choices,hole_selection,state_values - - def verify_family(self, family, game_solver, prop): # logger.info("investigating family of size {}".format(family.size)) self.quotient.build(family) @@ -609,18 +567,10 @@ def verify_family(self, family, game_solver, prop): mdp_family_result.policy = self.solve_singleton(family,prop) return mdp_family_result - if not SynthesizerPolicyTree.use_randomized_abstraction: - if family.candidate_policy is None: - game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver) - else: - game_policy = family.candidate_policy - game_sat = False + if family.candidate_policy is None: + game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver) else: - randomization_policy,policy_sat = self.try_randomized_abstraction(family,prop) - if policy_sat: - mdp_family_result.policy = randomization_policy - return mdp_family_result - game_policy = None + game_policy = family.candidate_policy game_sat = False mdp_family_result.game_policy = game_policy @@ -638,10 +588,7 @@ def verify_family(self, family, game_solver, prop): return mdp_family_result # undecided: choose scheduler choices to be used for splitting - if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler): - scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver) - else: - scheduler_choices,hole_selection,state_values = self.parse_mdp_scheduler(family, mdp_result) + scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver) splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection) mdp_family_result.splitter = splitter @@ -723,7 +670,7 @@ def split(self, family, prop, hole_selection, splitter, policy): subfamily.candidate_policy = None subfamilies.append(subfamily) - if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_choices: + if not SynthesizerPolicyTree.discard_unreachable_choices: self.assign_candidate_policy(subfamilies, hole_selection, splitter, policy) return suboptions,subfamilies @@ -732,6 +679,7 @@ def split(self, family, prop, hole_selection, splitter, policy): def evaluate_all(self, family, prop, keep_value_only=False): assert not prop.reward, "expecting reachability probability propery" game_solver = self.quotient.build_game_abstraction_solver(prop) + # game_solver.enable_profiling(True) family.candidate_policy = None policy_tree = PolicyTree(family) @@ -781,6 +729,7 @@ def evaluate_all(self, family, prop, keep_value_only=False): self.stat.num_leaves_merged = len(policy_tree.collect_leaves()) self.stat.num_policies_merged = len(policy_tree.policies) self.policy_tree = policy_tree + # game_solver.print_profiling() # convert policy tree to family evaluation evaluations = [] diff --git a/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp index 2b23f83ef..2c62e4701 100644 --- a/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp +++ b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp @@ -1,108 +1,33 @@ #include "GameAbstractionSolver.h" - +#include "src/synthesis/pomdp_family/SmgAbstraction.h" +#include "src/synthesis/smg/smgModelChecker.h" +#include "src/synthesis/translation/componentTranslations.h" +#include "src/synthesis/translation/ItemKeyTranslator.h" +#include "src/synthesis/translation/ItemTranslator.h" + +#include +#include +#include +#include +#include +#include #include namespace synthesis { - - /*template - std::pair>,std::vector> randomizeActionVariant( - storm::models::sparse::Model const& model, - std::vector>> const& state_action_choices - ) { - storm::storage::sparse::ModelComponents components; - - // copy state labeling - storm::models::sparse::StateLabeling state_labeling(model.getNumberOfStates()); - for (auto const& label : model.getStateLabeling().getLabels()) { - state_labeling.addLabel(label, storm::storage::BitVector(model.getStates(label))); - } - components.stateLabeling = state_labeling; - - // build transition matrix and reward models - storm::storage::SparseMatrixBuilder builder(0, 0, 0, false, true, 0); - std::map> reward_vectors; - for(auto const& reward_model : model.getRewardModels()) { - reward_vectors.emplace(reward_model.first, std::vector()); - } - - uint64_t num_rows = 0; - std::vector choice_to_action; - auto num_actions = state_action_choices[0].size(); - for(uint64_t state=0; state dst_prob; - for(auto choice: choices) { - for(auto const &entry: model.getTransitionMatrix().getRow(choice)) { - dst_prob[entry.getColumn()] += entry.getValue() / num_choices; - } - } - for(auto const& [dst,prob] : dst_prob) { - builder.addNextValue(num_rows,dst,prob); - } - num_rows++; - - // handle reward models - for(auto const& reward_model : model.getRewardModels()) { - ValueType reward_value = 0; - for(auto choice: choices) { - reward_value += reward_model.second.getStateActionReward(choice) / num_choices; - } - reward_vectors[reward_model.first].push_back(reward_value); - } - } - } - components.transitionMatrix = builder.build(); - - for(auto const& [name,choice_rewards]: reward_vectors) { - std::optional> state_rewards; - components.rewardModels.emplace(name, storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(choice_rewards))); - } - - auto randomized_model = std::make_shared>(std::move(components)); - return std::make_pair(randomized_model,choice_to_action); - } - template std::pair>,std::vector> randomizeActionVariant(storm::models::sparse::Model const& model, std::vector>> const& state_action_choices);*/ - - - template - void print_matrix(storm::storage::SparseMatrix matrix) { - auto const& row_group_indices = matrix.getRowGroupIndices(); - for(uint64_t state=0; state < matrix.getRowGroupCount(); state++) { - std::cout << "state " << state << ": " << std::endl; - for(uint64_t row=row_group_indices[state]; row " << entry.getColumn() << " [" << entry.getValue() << "];"; - } - std::cout << std::endl; - } - } - std::cout << "-----" << std::endl; - } - - template void print_matrix(storm::storage::SparseMatrix matrix); - template void print_matrix(storm::storage::SparseMatrix matrix); template GameAbstractionSolver::GameAbstractionSolver( storm::models::sparse::Model const& quotient, uint64_t quotient_num_actions, std::vector const& choice_to_action, + std::shared_ptr formula, + bool player1_maximizing, std::string const& target_label, double precision - ) : quotient(quotient), quotient_num_actions(quotient_num_actions), choice_to_action(choice_to_action) { + ) : quotient(quotient), quotient_num_actions(quotient_num_actions), choice_to_action(choice_to_action), + player1_maximizing(player1_maximizing) { auto quotient_num_states = quotient.getNumberOfStates(); auto quotient_num_choices = quotient.getNumberOfChoices(); @@ -115,6 +40,11 @@ namespace synthesis { } this->setupSolverEnvironment(precision); + std::vector> coalition_vector; + coalition_vector.emplace_back((storm::storage::PlayerIndex)0); + storm::logic::PlayerCoalition coalition(coalition_vector); + this->game_formula = std::make_shared(coalition,formula); + // identify target states this->state_is_target = storm::storage::BitVector(quotient_num_states,false); for(auto state: quotient.getStateLabeling().getStates(target_label)) { @@ -149,17 +79,14 @@ namespace synthesis { template - void GameAbstractionSolver::solve( - storm::storage::BitVector quotient_choice_mask, - bool player1_maximizing, bool player2_maximizing - ) { + void GameAbstractionSolver::solveSg(storm::storage::BitVector const& quotient_choice_mask) { if(profiling_enabled) { this->timer_total.start(); } - auto quotient_num_states = this->quotient.getNumberOfStates(); - auto quotient_num_choices = this->quotient.getNumberOfChoices(); - auto quotient_initial_state = *(this->quotient.getInitialStates().begin()); + uint64_t quotient_num_states = this->quotient.getNumberOfStates(); + uint64_t quotient_num_choices = this->quotient.getNumberOfChoices(); + uint64_t quotient_initial_state = *(this->quotient.getInitialStates().begin()); ItemTranslator state_to_player1_state(quotient_num_states); ItemKeyTranslator state_action_to_player2_state(quotient_num_states); @@ -172,20 +99,20 @@ namespace synthesis { state_is_encountered.set(quotient_initial_state,true); auto const& quotient_row_group_indices = this->quotient.getTransitionMatrix().getRowGroupIndices(); while(not unexplored_states.empty()) { - auto state = unexplored_states.front(); + uint64_t state = unexplored_states.front(); unexplored_states.pop(); - auto player1_state = state_to_player1_state.translate(state); + uint64_t player1_state = state_to_player1_state.translate(state); player1_state_to_actions.resize(state_to_player1_state.numTranslations()); - for(auto choice = quotient_row_group_indices[state]; choice < quotient_row_group_indices[state+1]; choice++) { + for(uint64_t choice = quotient_row_group_indices[state]; choice < quotient_row_group_indices[state+1]; choice++) { if(not quotient_choice_mask[choice]) { continue; } - auto action = choice_to_action[choice]; + uint64_t action = choice_to_action[choice]; player1_state_to_actions[player1_state].insert(action); - auto player2_state = state_action_to_player2_state.translate(state,action); + uint64_t player2_state = state_action_to_player2_state.translate(state,action); player2_state_to_choices.resize(state_action_to_player2_state.numTranslations()); player2_state_to_choices[player2_state].push_back(choice); - for(auto state_dst: this->choice_to_destinations[choice]) { + for(uint64_t state_dst: this->choice_to_destinations[choice]) { if(state_is_encountered[state_dst]) { continue; } @@ -194,12 +121,12 @@ namespace synthesis { } } } - auto player1_num_states = state_to_player1_state.numTranslations(); - auto player2_num_states = state_action_to_player2_state.numTranslations(); + uint64_t player1_num_states = state_to_player1_state.numTranslations(); + uint64_t player2_num_states = state_action_to_player2_state.numTranslations(); // add fresh target states - auto player1_target_state = player1_num_states++; - auto player2_target_state = player2_num_states++; + uint64_t player1_target_state = player1_num_states++; + uint64_t player2_target_state = player2_num_states++; // build the matrix of Player 1 std::vector player1_choice_to_action; @@ -207,10 +134,10 @@ namespace synthesis { uint64_t player1_num_rows = 0; for(uint64_t player1_state=0; player1_state().create(env, player1_matrix, player2_matrix); solver->setTrackSchedulers(true); - auto player1_direction = this->getOptimizationDirection(player1_maximizing); - auto player2_direction = this->getOptimizationDirection(player2_maximizing); + auto player1_direction = this->getOptimizationDirection(this->player1_maximizing); + auto player2_direction = this->getOptimizationDirection(not this->player1_maximizing); std::vector player1_state_values(player1_num_states,0); if(profiling_enabled) { this->timer_game_solving.start(); @@ -311,7 +238,63 @@ namespace synthesis { } this->solution_value = this->solution_state_values[quotient_initial_state]; + } + + template + void GameAbstractionSolver::solveSmg(storm::storage::BitVector const& quotient_choice_mask) { + if(profiling_enabled) { + this->timer_total.start(); + } + + SmgAbstraction abstraction = synthesis::SmgAbstraction( + quotient,quotient_num_actions,choice_to_action,quotient_choice_mask + ); + + // solve the game + if(profiling_enabled) { + this->timer_game_solving.start(); + } + std::shared_ptr result = synthesis::modelCheckSmg(*(abstraction.smg),this->game_formula,false,true,this->env); + if(profiling_enabled) { + this->timer_game_solving.stop(); + } + storm::modelchecker::ExplicitQuantitativeCheckResult const& result_quan = result->asExplicitQuantitativeCheckResult(); + storm::storage::Scheduler const& scheduler = result_quan.getScheduler(); + this->solution_state_values = result_quan.getValueVector(); + + std::fill(this->solution_state_to_player1_action.begin(),this->solution_state_to_player1_action.end(),this->quotient_num_actions); + std::fill(this->solution_state_to_quotient_choice.begin(),this->solution_state_to_quotient_choice.end(),this->quotient.getNumberOfChoices()); + std::vector const& game_row_groups = abstraction.smg->getTransitionMatrix().getRowGroupIndices(); + // get actions selected by Player 0 + for(uint64_t game_state = 0; game_state < abstraction.smg->getNumberOfStates(); ++game_state) { + if(abstraction.smg->getPlayerOfState(game_state) != 0) { + continue; + } + auto [state,_] = abstraction.state_to_quotient_state_action[game_state]; + uint64_t game_choice = game_row_groups[game_state]+scheduler.getChoice(game_state).getDeterministicChoice(); + uint64_t player1_action = choice_to_action[abstraction.choice_to_quotient_choice[game_choice]]; + this->solution_state_to_player1_action[state] = player1_action; + } + // get action variants selected by Player 1 in corresponding states + for(uint64_t game_state = 0; game_state < abstraction.smg->getNumberOfStates(); ++game_state) { + if(abstraction.smg->getPlayerOfState(game_state) != 1) { + continue; + } + auto [state,action] = abstraction.state_to_quotient_state_action[game_state]; + if(action != this->solution_state_to_player1_action[state]) { + continue; + } + uint64_t game_choice = game_row_groups[game_state]+scheduler.getChoice(game_state).getDeterministicChoice(); + this->solution_state_to_quotient_choice[state] = abstraction.choice_to_quotient_choice[game_choice]; + } + + if(profiling_enabled) { + this->timer_total.stop(); + } + + uint64_t quotient_initial_state = *(this->quotient.getInitialStates().begin()); + this->solution_value = this->solution_state_values[quotient_initial_state]; } diff --git a/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h index 241f63b6c..9b812749d 100644 --- a/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h +++ b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h @@ -1,28 +1,12 @@ #pragma once #include -#include -#include #include -#include -#include #include - -#include "src/synthesis/translation/ItemTranslator.h" -#include "src/synthesis/translation/ItemKeyTranslator.h" +#include namespace synthesis { - - /** - * Given an MDP having multiple variants of actions, create an MDP in which this variant selection is randomized. - */ - template - std::pair>,std::vector> randomizeActionVariant( - storm::models::sparse::Model const& model, - std::vector>> const& state_action_choices - ); - template class GameAbstractionSolver { @@ -43,6 +27,8 @@ namespace synthesis { * @param quotient The quotient MDP. Sub-MDPs from the quotient will be used to construct sub-games. * @param quotient_num_action The total number of distinct actions in the quotient. * @param choice_to_action For each row of the quotient, the associated action. + * @param formula Game formula to model check. + * @param player1_maximizing Whether Player 1 maximizes. * @param target_label Label of the target states. * @param precision Game solving precision. */ @@ -50,6 +36,8 @@ namespace synthesis { storm::models::sparse::Model const& quotient, uint64_t quotient_num_actions, std::vector const& choice_to_action, + std::shared_ptr formula, + bool player1_maximizing, std::string const& target_label, double precision ); @@ -58,10 +46,8 @@ namespace synthesis { * Solve the game induced by the sub-MDP. * @param quotient_choice_mask Choices of the quotient that remained in the sub-MDP. */ - void solve( - storm::storage::BitVector quotient_choice_mask, - bool player1_maximizing, bool player2_maximizing - ); + void solveSg(storm::storage::BitVector const& quotient_choice_mask); + void solveSmg(storm::storage::BitVector const& quotient_choice_mask); /** State values for the solution. */ std::vector solution_state_values; @@ -88,6 +74,8 @@ namespace synthesis { storm::models::sparse::Model const& quotient; uint64_t quotient_num_actions; std::vector choice_to_action; + std::shared_ptr game_formula; + bool player1_maximizing; /** Identification of target states. */ storm::storage::BitVector state_is_target; diff --git a/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp b/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp index 1fd91e2a9..8506aaaf5 100644 --- a/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp +++ b/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp @@ -12,7 +12,7 @@ namespace synthesis { storm::prism::Program & prism, storm::models::sparse::Model const& model ) { - // substitute constanst and simplify formulas in the program + // substitute constants and simplify formulas in the program prism = prism.substituteConstantsFormulas(true,true); // identify names and types of observation labels @@ -79,7 +79,6 @@ namespace synthesis { return this->obs_class_to_evaluation[obs_class].getAsInt(OBS_EXPR_VALUE_SIZE*obs_expr,OBS_EXPR_VALUE_SIZE); } - template std::shared_ptr> ObservationEvaluator::addObservationsToSubMdp( storm::models::sparse::Mdp const& sub_mdp, diff --git a/payntbind/src/synthesis/pomdp_family/SmgAbstraction.cpp b/payntbind/src/synthesis/pomdp_family/SmgAbstraction.cpp new file mode 100644 index 000000000..8a8fc3f7a --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/SmgAbstraction.cpp @@ -0,0 +1,136 @@ +#include "SmgAbstraction.h" + +#include "src/synthesis/translation/componentTranslations.h" +#include "src/synthesis/translation/ItemKeyTranslator.h" +#include "src/synthesis/posmg/Posmg.h" + +#include +#include + +#include + +namespace synthesis { + + template + SmgAbstraction::SmgAbstraction( + storm::models::sparse::Model const& quotient, + uint64_t quotient_num_actions, + std::vector const& choice_to_action, + storm::storage::BitVector const& quotient_choice_mask + ) { + uint64_t quotient_num_states = quotient.getNumberOfStates(); + uint64_t quotient_initial_state = *(quotient.getInitialStates().begin()); + uint64_t quotient_num_choices = quotient.getNumberOfChoices(); + auto const& quotient_row_groups = quotient.getTransitionMatrix().getRowGroupIndices(); + + ItemKeyTranslator state_action_to_game_state(quotient_num_states); + // for Player 1 states, contains available actions; for Player 2 states, contains enabled choices + std::vector> game_state_to_choices; + + std::queue unexplored_states; + storm::storage::BitVector state_is_encountered(quotient_num_states,false); + unexplored_states.push(quotient_initial_state); + state_is_encountered.set(quotient_initial_state,true); + std::vector> state_enabled_actions(quotient_num_states); + std::vector> state_action_choice(quotient_num_states); + while(not unexplored_states.empty()) { + uint64_t state = unexplored_states.front(); + unexplored_states.pop(); + std::set enabled_actions; + state_action_choice[state] = std::vector(quotient_num_actions,quotient_num_choices); + uint64_t player1_state = state_action_to_game_state.translate(state,quotient_num_actions); + game_state_to_choices.resize(state_action_to_game_state.numTranslations()); + for(uint64_t choice = quotient_row_groups[state]; choice < quotient_row_groups[state+1]; ++choice) { + if(not quotient_choice_mask[choice]) { + continue; + } + uint64_t action = choice_to_action[choice]; + enabled_actions.insert(action); + state_action_choice[state][action] = choice; + uint64_t player2_state = state_action_to_game_state.translate(state,action); + game_state_to_choices.resize(state_action_to_game_state.numTranslations()); + game_state_to_choices[player2_state].push_back(choice); + for(auto const &entry: quotient.getTransitionMatrix().getRow(choice)) { + uint64_t state_dst = entry.getColumn(); + if(state_is_encountered[state_dst]) { + continue; + } + unexplored_states.push(state_dst); + state_is_encountered.set(state_dst,true); + } + } + state_enabled_actions[state] = std::vector(enabled_actions.begin(),enabled_actions.end()); + } + + uint64_t game_num_states = state_action_to_game_state.numTranslations(); + storm::storage::SparseMatrixBuilder game_matrix_builder(0,0,0,false,true); + uint64_t game_num_choices = 0; + // for Player 1, contains a representative choice with the appropriate label; for Player 2, contains the corresponding choice + std::vector game_choice_to_quotient_choice; + std::vector game_choice_is_player1; + + for(uint64_t game_state = 0; game_state < game_num_states; ++game_state) { + auto [state,state_action] = state_action_to_game_state.retrieve(game_state); + game_matrix_builder.newRowGroup(game_num_choices); + if(state_action == quotient_num_actions) { + // Player 1 state + for(uint64_t action: state_enabled_actions[state]) { + uint64_t player2_state = state_action_to_game_state.translate(state,action); + game_matrix_builder.addNextValue(game_num_choices,player2_state,1); + // game_state_to_choices[player1_state].push_back(action); + game_choice_to_quotient_choice.push_back(state_action_choice[state][action]); + game_choice_is_player1.push_back(true); + game_num_choices++; + } + } else { + // Player 2 state + uint64_t player2_state = game_state; + for(uint64_t choice: game_state_to_choices[game_state]) { + for(auto const& entry: quotient.getTransitionMatrix().getRow(choice)) { + uint64_t state_dst = entry.getColumn(); + uint64_t game_state_dst = state_action_to_game_state.translate(state_dst,quotient_num_actions); + game_matrix_builder.addNextValue(game_num_choices,game_state_dst,entry.getValue()); + } + game_choice_to_quotient_choice.push_back(choice); + game_choice_is_player1.push_back(false); + game_num_choices++; + } + } + } + + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = game_matrix_builder.build(); + uint64_t game_initial_state = state_action_to_game_state.translate(quotient_initial_state,quotient_num_actions); + + components.stateLabeling = synthesis::translateStateLabeling(quotient,state_action_to_game_state.translationToItem(),game_initial_state); + std::vector game_state_to_player; + for(auto [_,action]: state_action_to_game_state.translationToItemKey()) { + uint64_t player = action == quotient_num_actions ? 0 : 1; + game_state_to_player.push_back(player); + } + components.statePlayerIndications = std::move(game_state_to_player); + // skipping state valuations + + storm::storage::BitVector game_choice_mask(game_num_choices,true); + storm::storage::BitVector player1_choice_mask(game_num_choices); + for(uint64_t game_choice = 0; game_choice < game_num_choices; ++game_choice) { + player1_choice_mask.set(game_choice_is_player1[game_choice]); + } + components.choiceLabeling = synthesis::translateChoiceLabeling(quotient,game_choice_to_quotient_choice,game_choice_mask); + components.rewardModels = synthesis::translateRewardModels(quotient,game_choice_to_quotient_choice,player1_choice_mask); + // skipping choice origins + + if (quotient.getType() == storm::models::ModelType::Pomdp) { + components.observabilityClasses = synthesis::translateObservabilityClasses(quotient,state_action_to_game_state.translationToItem()); + // skipping observation valuations + this->smg = std::make_shared>(std::move(components)); + } else { + this->smg = std::make_shared>(std::move(components)); + } + + this->state_to_quotient_state_action = state_action_to_game_state.translationToItemKey(); + this->choice_to_quotient_choice = std::move(game_choice_to_quotient_choice); + } + + template class SmgAbstraction; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/SmgAbstraction.h b/payntbind/src/synthesis/pomdp_family/SmgAbstraction.h new file mode 100644 index 000000000..db442c8f1 --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/SmgAbstraction.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include + +namespace synthesis { + +/** + * SMG abstraction for MDP sketch. States of Player 0 are the states of the quotient. In each state s, Player 0 + * has picks action a, which leads to state (s,a) of Player 1. In state (s,a), Player 1 chooses the color of a + * to be executed. + */ +template +class SmgAbstraction { +public: + + /** + * Create game abstraction for the sub-MDP. + * @param quotient The quotient MDP. If the quotient is a POMDP, then @ref smg can be recast to a Posmg. + * @param quotient_num_action The total number of distinct actions in the quotient. + * @param choice_to_action For each row of the quotient, the associated action. + * @param quotient_choice_mask Choices of the quotient that remained in the sub-MDP. + */ + SmgAbstraction( + storm::models::sparse::Model const& quotient, + uint64_t quotient_num_actions, + std::vector const& choice_to_action, + storm::storage::BitVector const& quotient_choice_mask + ); + + /** The game. */ + std::shared_ptr> smg; + /** + * For each state s of the game, the corresponding (s,a) pair. + * @note States of Player 0 are encoded as (s,num_actions). + */ + std::vector> state_to_quotient_state_action; + /** + * For each choice of the game, the corresponding choice in the quotient. + * @note Choice of Player 0 executing an action is mapped to its arbitrary variant. + */ + std::vector choice_to_quotient_choice; +}; + +} diff --git a/payntbind/src/synthesis/pomdp_family/bindings.cpp b/payntbind/src/synthesis/pomdp_family/bindings.cpp index 97b837718..5cb2ccd4c 100644 --- a/payntbind/src/synthesis/pomdp_family/bindings.cpp +++ b/payntbind/src/synthesis/pomdp_family/bindings.cpp @@ -3,6 +3,7 @@ #include "ObservationEvaluator.h" #include "FscUnfolder.h" #include "GameAbstractionSolver.h" +#include "SmgAbstraction.h" void bindings_pomdp_family(py::module& m) { @@ -34,10 +35,19 @@ void bindings_pomdp_family(py::module& m) { // m.def("randomize_action_variant", &synthesis::randomizeActionVariant); py::class_>(m, "GameAbstractionSolver") .def( - py::init const&, uint64_t, std::vector const&, std::string const&, double>(), - py::arg("quotient"), py::arg("quoitent_num_actions"), py::arg("choice_to_action"), py::arg("target_label"), py::arg("precision") + py::init< + storm::models::sparse::Model const&, + uint64_t, + std::vector const&, + std::shared_ptr, + bool, + std::string const&, + double + >(), + py::arg("quotient"), py::arg("num_actions"), py::arg("choice_to_action"), py::arg("formula"), py::arg("player1_maximizing"), py::arg("target_label"), py::arg("precision") ) - .def("solve", &synthesis::GameAbstractionSolver::solve) + .def("solve_sg", &synthesis::GameAbstractionSolver::solveSg) + .def("solve_smg", &synthesis::GameAbstractionSolver::solveSmg) .def_property_readonly("solution_state_values", [](synthesis::GameAbstractionSolver& solver) {return solver.solution_state_values;}) .def_property_readonly("solution_value", [](synthesis::GameAbstractionSolver& solver) {return solver.solution_value;}) .def_property_readonly("solution_state_to_player1_action", [](synthesis::GameAbstractionSolver& solver) {return solver.solution_state_to_player1_action;}) @@ -45,4 +55,16 @@ void bindings_pomdp_family(py::module& m) { .def("enable_profiling", &synthesis::GameAbstractionSolver::enableProfiling) .def("print_profiling", &synthesis::GameAbstractionSolver::printProfiling) ; + + py::class_, std::shared_ptr>>(m, "SmgAbstraction") + .def(py::init< + storm::models::sparse::Model const&, + uint64_t, + std::vector const&, + storm::storage::BitVector const& + >(), py::arg("model"), py::arg("num_actions"), py::arg("choice_to_action"), py::arg("choice_mask")) + .def_readonly("smg", &synthesis::SmgAbstraction::smg) + .def_readonly("state_to_quotient_state_action", &synthesis::SmgAbstraction::state_to_quotient_state_action) + .def_readonly("choice_to_quotient_choice", &synthesis::SmgAbstraction::choice_to_quotient_choice) + ; } diff --git a/payntbind/src/synthesis/posmg/Posmg.cpp b/payntbind/src/synthesis/posmg/Posmg.cpp index 197a12978..9865a85ad 100644 --- a/payntbind/src/synthesis/posmg/Posmg.cpp +++ b/payntbind/src/synthesis/posmg/Posmg.cpp @@ -1,110 +1,105 @@ #include "Posmg.h" -namespace synthesis { - - Posmg::Posmg(storm::storage::sparse::ModelComponents const& components) - : Smg(components), observations(components.observabilityClasses.value()) - { - calculateP0ObservationCount(); - } - Posmg::Posmg(storm::storage::sparse::ModelComponents &&components) - : Smg(std::move(components)), observations(components.observabilityClasses.value()) - { - calculateP0ObservationCount(); - } +#include "src/synthesis/translation/componentTranslations.h" - std::vector const &Posmg::getObservations() const - { - return observations; - } - - uint32_t Posmg::getObservation(uint64_t state) const - { - return observations.at(state); - } - - uint64_t Posmg::getP0ObservationCount() const - { - return p0ObservationCount; - } - - - storm::models::sparse::Mdp Posmg::getMdp() - { - auto components = createComponents(*this); - - return storm::models::sparse::Mdp(std::move(components)); - } - - storm::models::sparse::Pomdp Posmg::getPomdp() - { - auto components = createComponents(*this); - components.observabilityClasses = this->observations; - - return storm::models::sparse::Pomdp(std::move(components)); - } - - void Posmg::calculateP0ObservationCount() - { - // For now, optimizing player always has index 0. - // If we need to optimize for another player or for more than one player, - // we can for example add a parameter to this method or add a 'optimizingPlayer' property. - uint64_t optimizingPlayer = 0; - - auto stateCount = this->getNumberOfStates(); - auto statePlayerIndications = this->getStatePlayerIndications(); - std::set p0Observations; - - for (uint64_t state = 0; state < stateCount; state++) - { - if (statePlayerIndications[state] == optimizingPlayer) - { - auto observation = observations[state]; - p0Observations.insert(observation); - } - } - - p0ObservationCount = p0Observations.size(); - } +namespace synthesis { - Posmg createPosmg(storm::models::sparse::Pomdp pomdp, - std::vector statePlayerIndications) +template +Posmg::Posmg(storm::storage::sparse::ModelComponents const& components) + : storm::models::sparse::Smg(components), observations(components.observabilityClasses.value()) +{ + calculateP0ObservationCount(); +} +template +Posmg::Posmg(storm::storage::sparse::ModelComponents &&components) + : storm::models::sparse::Smg(std::move(components)), observations(components.observabilityClasses.value()) +{ + calculateP0ObservationCount(); +} + +template +std::vector const &Posmg::getObservations() const +{ + return observations; +} + +template +uint32_t Posmg::getObservation(uint64_t state) const +{ + return observations.at(state); +} + +template +uint64_t Posmg::getP0ObservationCount() const +{ + return p0ObservationCount; +} + +template +storm::models::sparse::Mdp Posmg::getMdp() +{ + auto components = synthesis::componentsFromModel(*this); + return storm::models::sparse::Mdp(std::move(components)); +} + +template +storm::models::sparse::Pomdp Posmg::getPomdp() +{ + auto components = synthesis::componentsFromModel(*this); + components.observabilityClasses = this->observations; + return storm::models::sparse::Pomdp(std::move(components)); +} + +template +void Posmg::calculateP0ObservationCount() +{ + // For now, optimizing player always has index 0. + // If we need to optimize for another player or for more than one player, + // we can for example add a parameter to this method or add a 'optimizingPlayer' property. + uint64_t optimizingPlayer = 0; + + auto stateCount = this->getNumberOfStates(); + auto statePlayerIndications = this->getStatePlayerIndications(); + std::set p0Observations; + + for (uint64_t state = 0; state < stateCount; state++) { - auto components = createComponents(pomdp); - - // Smg components - components.statePlayerIndications = statePlayerIndications; - // todo playerNameToIndexMap ?? - - // Pomdp components - components.observabilityClasses = pomdp.getObservations(); - if (pomdp.hasObservationValuations()) + if (statePlayerIndications[state] == optimizingPlayer) { - components.observationValuations = pomdp.getObservationValuations(); + auto observation = observations[state]; + p0Observations.insert(observation); } - - return Posmg(components); } - storm::storage::sparse::ModelComponents createComponents( - storm::models::sparse::NondeterministicModel const& model) - { - storm::storage::sparse::ModelComponents components( - model.getTransitionMatrix(), - model.getStateLabeling(), - model.getRewardModels() - ); - if (model.hasChoiceLabeling()) { - components.choiceLabeling = model.getChoiceLabeling(); - } - if (model.hasStateValuations()) { - components.stateValuations = model.getStateValuations(); - } - if (model.hasChoiceOrigins()) { - components.choiceOrigins = model.getChoiceOrigins(); - } - - return components; - } + p0ObservationCount = p0Observations.size(); +} + +template class Posmg; + + +template +Posmg posmgFromPomdp( + storm::models::sparse::Pomdp pomdp, + std::vector statePlayerIndications) { + auto components = synthesis::componentsFromModel(pomdp); + components.statePlayerIndications = statePlayerIndications; + return Posmg(components); +} + +template +Posmg posmgFromSmg( + storm::models::sparse::Smg smg, + std::optional> observabilityClasses) { + auto components = synthesis::componentsFromModel(smg); + components.observabilityClasses = observabilityClasses; + return Posmg(components); +} + +template Posmg posmgFromPomdp( + storm::models::sparse::Pomdp pomdp, + std::vector statePlayerIndications); +template Posmg posmgFromSmg( + storm::models::sparse::Smg, + std::optional> observabilityClasses); } // namespace synthesis \ No newline at end of file diff --git a/payntbind/src/synthesis/posmg/Posmg.h b/payntbind/src/synthesis/posmg/Posmg.h index 9e5ef81e5..c3de1536a 100644 --- a/payntbind/src/synthesis/posmg/Posmg.h +++ b/payntbind/src/synthesis/posmg/Posmg.h @@ -1,29 +1,28 @@ #pragma once -#include "storm/models/sparse/Smg.h" -#include "storm/models/sparse/Pomdp.h" +#include +#include +#include #include namespace synthesis { /** - * @brief A class representing Partially observable multiplayer game - * @todo make generic with template + * @brief A class representing partially observable stochastic multiplayer game. */ -class Posmg : public storm::models::sparse::Smg { +template> +class Posmg : public storm::models::sparse::Smg { public: /** * @brief Construct a new Posmg object from model components * * @param components Both statePlayerIndications and observabilityClasses have to be filled */ - Posmg(storm::storage::sparse::ModelComponents const& components); - Posmg(storm::storage::sparse::ModelComponents &&components); + Posmg(storm::storage::sparse::ModelComponents const& components); + Posmg(storm::storage::sparse::ModelComponents&& components); /** * @brief Return a vector of observatinos - * - * @return std::vector const& */ std::vector const &getObservations() const; @@ -37,24 +36,18 @@ class Posmg : public storm::models::sparse::Smg { /** * @brief Return number of observations corresponding to player 0 states. - * - * @return uint64_t */ uint64_t getP0ObservationCount() const; /** * @brief Get the underlying MDP - * - * @return storm::models::sparse::Mdp */ - storm::models::sparse::Mdp getMdp(); + storm::models::sparse::Mdp getMdp(); /** * @brief Get the underlying POMDP - * - * @return storm::models::sparse::Pomdp */ - storm::models::sparse::Pomdp getPomdp(); + storm::models::sparse::Pomdp getPomdp(); private: /** @@ -70,22 +63,23 @@ class Posmg : public storm::models::sparse::Smg { }; /** - * @brief Create and return a Posmg object from pomdp and state indications - * - * @param pomdp Model information from this pomdp will be used to create the game + * @brief Create a POSMG from a POMDP and state indications. + * @param pomdp Base POMDP * @param statePlayerIndications Vector indicating which states belong to which player - * @return Posmg */ -Posmg createPosmg(storm::models::sparse::Pomdp pomdp, - std::vector statePlayerIndications); +template> +Posmg posmgFromPomdp( + storm::models::sparse::Pomdp pomdp, + std::vector statePlayerIndications); /** - * @brief Create and return a Components object based on the provided model - * - * @param model Properites to create the ModelComponents are taken from this model. - * @return storm::storage::sparse::ModelComponents + * @brief Create a POSMG from an SMG and observability classes. + * @param smg Base SMG + * @param observabilityClasses for each state an observability class */ -storm::storage::sparse::ModelComponents createComponents( - storm::models::sparse::NondeterministicModel const& model); +template> +Posmg posmgFromSmg( + storm::models::sparse::Smg smg, + std::optional> observabilityClasses); } // namespace synthesis \ No newline at end of file diff --git a/payntbind/src/synthesis/posmg/PosmgManager.cpp b/payntbind/src/synthesis/posmg/PosmgManager.cpp index e4891c6fe..2a80a0912 100644 --- a/payntbind/src/synthesis/posmg/PosmgManager.cpp +++ b/payntbind/src/synthesis/posmg/PosmgManager.cpp @@ -2,7 +2,8 @@ namespace synthesis { - PosmgManager::PosmgManager(Posmg const& posmg, uint64_t optimizingPlayer) + template + PosmgManager::PosmgManager(Posmg const& posmg, uint64_t optimizingPlayer) : posmg(posmg), optimizingPlayer(optimizingPlayer) { this->calculateObservationMap(); @@ -16,17 +17,20 @@ namespace synthesis { this->prototypeDuplicates.resize(posmg.getNumberOfStates()); } - std::vector PosmgManager::getObservationMapping() + template + std::vector PosmgManager::getObservationMapping() { return this->optPlayerObservationMap; } - void PosmgManager::setObservationMemorySize(uint64_t observation, uint64_t memorySize) + template + void PosmgManager::setObservationMemorySize(uint64_t observation, uint64_t memorySize) { this->optPlayerObservationMemorySize[observation] = memorySize; } - std::vector PosmgManager::getStatePlayerIndications() + template + std::vector PosmgManager::getStatePlayerIndications() { std::vector statePlayerIndications(this->stateCount); for (uint64_t state = 0; state < this->stateCount; state++) @@ -38,8 +42,8 @@ namespace synthesis { return statePlayerIndications; } - - void PosmgManager::calculateObservationMap() + template + void PosmgManager::calculateObservationMap() { this->optPlayerObservationMap.clear(); @@ -59,7 +63,8 @@ namespace synthesis { } } - void PosmgManager::calculateObservationSuccesors() + template + void PosmgManager::calculateObservationSuccesors() { auto transitionMat = this->posmg.getTransitionMatrix(); auto stateCount = this->posmg.getNumberOfStates(); @@ -83,7 +88,8 @@ namespace synthesis { } } - void PosmgManager::calculatePrototypeActionCount() + template + void PosmgManager::calculatePrototypeActionCount() { auto prototypeCount = this->posmg.getNumberOfStates(); this->prototypeActionCount.resize(prototypeCount); @@ -95,7 +101,8 @@ namespace synthesis { } } - void PosmgManager::calculateObservationActions() + template + void PosmgManager::calculateObservationActions() { for (uint64_t prototype = 0; prototype < this->posmg.getNumberOfStates(); prototype++) { @@ -108,7 +115,8 @@ namespace synthesis { } } - void PosmgManager::calculatePrototypeRowIndex() + template + void PosmgManager::calculatePrototypeRowIndex() { this->prototypeRowIndex.resize(this->posmg.getTransitionMatrix().getRowCount()); @@ -125,8 +133,8 @@ namespace synthesis { } } - - void PosmgManager::buildStateSpace() + template + void PosmgManager::buildStateSpace() { auto prototypeCount = this->posmg.getNumberOfStates(); this->stateDuplicateCount.resize(prototypeCount, 1); // je potreba resize??? @@ -183,7 +191,8 @@ namespace synthesis { } } - void PosmgManager::buildTransitionMatrixSpurious() { + template + void PosmgManager::buildTransitionMatrixSpurious() { // calculate maxSuccesorDuplicateCount for (auto entry : this->succesors) { @@ -236,14 +245,16 @@ namespace synthesis { this->rowGroups[this->stateCount] = this->rowCount; } - uint64_t PosmgManager::translateState(uint64_t prototype, uint64_t memory) { + template + uint64_t PosmgManager::translateState(uint64_t prototype, uint64_t memory) { if(memory >= this->prototypeDuplicates[prototype].size()) { memory = 0; } return this->prototypeDuplicates[prototype][memory]; } - storm::storage::SparseMatrix PosmgManager::constructTransitionMatrix() + template + storm::storage::SparseMatrix PosmgManager::constructTransitionMatrix() { storm::storage::SparseMatrixBuilder builder( this->rowCount, this->stateCount, 0, true, true, this->stateCount @@ -266,7 +277,8 @@ namespace synthesis { return builder.build(); } - storm::models::sparse::StateLabeling PosmgManager::constructStateLabeling() + template + storm::models::sparse::StateLabeling PosmgManager::constructStateLabeling() { storm::models::sparse::StateLabeling labeling(this->stateCount); for (auto const& label : this->posmg.getStateLabeling().getLabels()) @@ -297,7 +309,8 @@ namespace synthesis { return labeling; } - void PosmgManager::resetDesignSpace() + template + void PosmgManager::resetDesignSpace() { this->holeCount = 0; this->actionHoles.clear(); @@ -316,7 +329,8 @@ namespace synthesis { } // version where each state of non optimising players has it's own action hole - void PosmgManager::buildDesignSpaceSpurious() + template + void PosmgManager::buildDesignSpaceSpurious() { this->resetDesignSpace(); @@ -403,34 +417,40 @@ namespace synthesis { } - std::shared_ptr> PosmgManager::constructMdp() + template + std::shared_ptr> PosmgManager::constructMdp() { this->buildStateSpace(); this->buildTransitionMatrixSpurious(); - storm::storage::sparse::ModelComponents components; + storm::storage::sparse::ModelComponents components; components.transitionMatrix = this->constructTransitionMatrix(); components.stateLabeling = this->constructStateLabeling(); - this->mdp = std::make_shared>(std::move(components)); + this->mdp = std::make_shared>(std::move(components)); this->buildDesignSpaceSpurious(); return this->mdp; } - bool PosmgManager::isOptPlayerState(uint64_t state){ + template + bool PosmgManager::isOptPlayerState(uint64_t state){ return this->posmg.getStatePlayerIndications()[state] == this->optimizingPlayer; } - uint64_t PosmgManager::getActionCount(uint64_t state) + template + uint64_t PosmgManager::getActionCount(uint64_t state) { auto prototype = this->statePrototype[state]; return this->prototypeActionCount[prototype]; } - bool PosmgManager::contains(std::vector v, uint64_t elem) + template + bool PosmgManager::contains(std::vector v, uint64_t elem) { return std::find(v.begin(), v.end(), elem) != v.end(); } + template class PosmgManager; + } // namespace synthesis \ No newline at end of file diff --git a/payntbind/src/synthesis/posmg/PosmgManager.h b/payntbind/src/synthesis/posmg/PosmgManager.h index f6b9cceda..27c347b7b 100644 --- a/payntbind/src/synthesis/posmg/PosmgManager.h +++ b/payntbind/src/synthesis/posmg/PosmgManager.h @@ -5,17 +5,18 @@ namespace synthesis { +template class PosmgManager { public: - PosmgManager(Posmg const& posmg, uint64_t optimizingPlayer); + PosmgManager(Posmg const& posmg, uint64_t optimizingPlayer); /** * @brief unfold memory */ std::shared_ptr> constructMdp(); - std::vector getObservationMapping(); + std::vector getObservationMapping(); void setObservationMemorySize(uint64_t observation, uint64_t memorySize); @@ -164,7 +165,7 @@ class PosmgManager { bool contains(std::vector v, uint64_t elem); /** original POSMG */ - Posmg const& posmg; + Posmg const& posmg; /** index of optimizing player */ uint64_t optimizingPlayer; @@ -174,7 +175,7 @@ class PosmgManager { * observability and the other player has complete observability, we keep a vector of * optimizing player's observations, which also serves as a mapping. */ - std::vector optPlayerObservationMap; + std::vector optPlayerObservationMap; // For each row in original posmg contains its index withing its row group std::vector prototypeRowIndex; @@ -201,7 +202,7 @@ class PosmgManager { std::vector rowMemory; // Unfolded mdp created from posmg - std::shared_ptr> mdp; + std::shared_ptr> mdp; }; // class PosmgManager diff --git a/payntbind/src/synthesis/posmg/bindings.cpp b/payntbind/src/synthesis/posmg/bindings.cpp index 93687bec4..e4acb36d1 100644 --- a/payntbind/src/synthesis/posmg/bindings.cpp +++ b/payntbind/src/synthesis/posmg/bindings.cpp @@ -2,51 +2,52 @@ #include "Posmg.h" #include "PosmgManager.h" + #include "storm/models/sparse/Smg.h" void bindings_posmg(py::module &m) { - py::class_, storm::models::sparse::Smg>(m, "Posmg") + py::class_, std::shared_ptr>, storm::models::sparse::Smg>(m, "Posmg") // .def(py::init const&>(), py::arg("components")) // .def(py::init &&>(), py::arg("components")); - .def("get_observations", &synthesis::Posmg::getObservations) - .def("get_p0_observation_count", &synthesis::Posmg::getP0ObservationCount) - .def("get_mdp", &synthesis::Posmg::getMdp) - .def_property_readonly("nondeterministic_choice_indices", [](synthesis::Posmg const& m) { return m.getNondeterministicChoiceIndices(); }) - .def("get_pomdp", &synthesis::Posmg::getPomdp) + .def("get_observations", &synthesis::Posmg::getObservations) + .def("get_p0_observation_count", &synthesis::Posmg::getP0ObservationCount) + .def("get_mdp", &synthesis::Posmg::getMdp) + .def_property_readonly("nondeterministic_choice_indices", [](synthesis::Posmg const& m) { return m.getNondeterministicChoiceIndices(); }) + .def("get_pomdp", &synthesis::Posmg::getPomdp) // this binding (calculation) is done in stormpy for mdp, but posmg doesn't inherit from mdp, so it is also copied here - .def("get_nr_available_actions", [](synthesis::Posmg const& posmg, uint64_t stateIndex) + .def("get_nr_available_actions", [](synthesis::Posmg const& posmg, uint64_t stateIndex) { return posmg.getNondeterministicChoiceIndices()[stateIndex+1] - posmg.getNondeterministicChoiceIndices()[stateIndex] ; }, py::arg("state")) // same as ^ - .def("get_choice_index", [](synthesis::Posmg const& posmg, uint64_t state, uint64_t actOff) + .def("get_choice_index", [](synthesis::Posmg const& posmg, uint64_t state, uint64_t actOff) { return posmg.getNondeterministicChoiceIndices()[state]+actOff; }, py::arg("state"), py::arg("action_offset"), "gets the choice index for the offset action from the given state."); - m.def("create_posmg", &synthesis::createPosmg, py::arg("pomdp"), py::arg("state_player_indications")); + m.def("posmgFromPomdp", &synthesis::posmgFromPomdp, py::arg("pomdp"), py::arg("state_player_indications")); + m.def("posmgFromSmg", &synthesis::posmgFromSmg, py::arg("smg"), py::arg("observability_classes")); - py::class_>(m, "PosmgManager") - .def(py::init(), py::arg("posmg"), py::arg("optimizing_player")) - .def("construct_mdp", &synthesis::PosmgManager::constructMdp) - .def("get_observation_mapping", &synthesis::PosmgManager::getObservationMapping) - .def("set_observation_memory_size", &synthesis::PosmgManager::setObservationMemorySize, + py::class_, std::shared_ptr>>(m, "PosmgManager") + .def(py::init const&, uint64_t>(), py::arg("posmg"), py::arg("optimizing_player")) + .def("construct_mdp", &synthesis::PosmgManager::constructMdp) + .def("get_observation_mapping", &synthesis::PosmgManager::getObservationMapping) + .def("set_observation_memory_size", &synthesis::PosmgManager::setObservationMemorySize, py::arg("observation"), py::arg("memory_size")) - .def("get_state_player_indications", &synthesis::PosmgManager::getStatePlayerIndications) - .def("get_action_count", &synthesis::PosmgManager::getActionCount, py::arg("state")) - .def_property_readonly("state_prototype", [](synthesis::PosmgManager& manager) {return manager.statePrototype;}) - .def_property_readonly("state_memory", [](synthesis::PosmgManager& manager) {return manager.stateMemory;}) - .def_property_readonly("observation_memory_size", [](synthesis::PosmgManager& manager) {return manager.optPlayerObservationMemorySize;}) - .def_property_readonly("observation_actions", [](synthesis::PosmgManager& manager) {return manager.optPlayerObservationActions;}) - .def_property_readonly("observation_successors", [](synthesis::PosmgManager& manager) {return manager.succesors;}) - .def_property_readonly("max_successor_memory_size", [](synthesis::PosmgManager& manager) {return manager.maxSuccesorDuplicateCount;}) - .def_property_readonly("num_holes", [](synthesis::PosmgManager& manager) {return manager.holeCount;}) - .def_property_readonly("action_holes", [](synthesis::PosmgManager& manager) {return manager.actionHoles;}) - .def_property_readonly("memory_holes", [](synthesis::PosmgManager& manager) {return manager.memoryHoles;}) - .def_property_readonly("hole_options", [](synthesis::PosmgManager& manager) {return manager.holeOptionCount;}) - .def_property_readonly("row_action_hole", [](synthesis::PosmgManager& manager) {return manager.rowActionHole;}) - .def_property_readonly("row_action_option", [](synthesis::PosmgManager& manager) {return manager.rowActionOption;}) - .def_property_readonly("row_memory_hole", [](synthesis::PosmgManager& manager) {return manager.rowMemoryHole;}) - .def_property_readonly("row_memory_option", [](synthesis::PosmgManager& manager) {return manager.rowMemoryOption;}) + .def("get_state_player_indications", &synthesis::PosmgManager::getStatePlayerIndications) + .def("get_action_count", &synthesis::PosmgManager::getActionCount, py::arg("state")) + .def_property_readonly("state_prototype", [](synthesis::PosmgManager& manager) {return manager.statePrototype;}) + .def_property_readonly("state_memory", [](synthesis::PosmgManager& manager) {return manager.stateMemory;}) + .def_property_readonly("observation_memory_size", [](synthesis::PosmgManager& manager) {return manager.optPlayerObservationMemorySize;}) + .def_property_readonly("observation_actions", [](synthesis::PosmgManager& manager) {return manager.optPlayerObservationActions;}) + .def_property_readonly("observation_successors", [](synthesis::PosmgManager& manager) {return manager.succesors;}) + .def_property_readonly("max_successor_memory_size", [](synthesis::PosmgManager& manager) {return manager.maxSuccesorDuplicateCount;}) + .def_property_readonly("num_holes", [](synthesis::PosmgManager& manager) {return manager.holeCount;}) + .def_property_readonly("action_holes", [](synthesis::PosmgManager& manager) {return manager.actionHoles;}) + .def_property_readonly("memory_holes", [](synthesis::PosmgManager& manager) {return manager.memoryHoles;}) + .def_property_readonly("hole_options", [](synthesis::PosmgManager& manager) {return manager.holeOptionCount;}) + .def_property_readonly("row_action_hole", [](synthesis::PosmgManager& manager) {return manager.rowActionHole;}) + .def_property_readonly("row_action_option", [](synthesis::PosmgManager& manager) {return manager.rowActionOption;}) + .def_property_readonly("row_memory_hole", [](synthesis::PosmgManager& manager) {return manager.rowMemoryHole;}) + .def_property_readonly("row_memory_option", [](synthesis::PosmgManager& manager) {return manager.rowMemoryOption;}) ; - } diff --git a/payntbind/src/synthesis/quotient/Family.cpp b/payntbind/src/synthesis/quotient/Family.cpp index bdef7f5cc..47aeee7fb 100644 --- a/payntbind/src/synthesis/quotient/Family.cpp +++ b/payntbind/src/synthesis/quotient/Family.cpp @@ -30,15 +30,18 @@ uint64_t Family::addHole(uint64_t num_options) { } void Family::holeSetOptions(uint64_t hole, std::vector const& options) { - hole_options[hole] = options; hole_options_mask[hole].clear(); - for(auto option: options) { + for(uint64_t option: options) { hole_options_mask[hole].set(option); } + hole_options[hole].clear(); + for(uint64_t option: hole_options_mask[hole]) { + hole_options[hole].push_back(option); + } } void Family::holeSetOptions(uint64_t hole, BitVector const& options) { hole_options[hole].clear(); - for(auto option: options) { + for(uint64_t option: options) { hole_options[hole].push_back(option); } hole_options_mask[hole] = options; diff --git a/payntbind/src/synthesis/smg/bindings.cpp b/payntbind/src/synthesis/smg/bindings.cpp index 12aba5fbb..d1b07bb57 100644 --- a/payntbind/src/synthesis/smg/bindings.cpp +++ b/payntbind/src/synthesis/smg/bindings.cpp @@ -1,39 +1,7 @@ #include "../synthesis.h" -#include -#include - -#include "modelchecker/SparseSmgRpatlModelChecker.h" - - -namespace synthesis { - - template - std::shared_ptr smgModelChecking ( - storm::models::sparse::Smg const& smg, - std::shared_ptr formula, - bool only_initial_states = false, - bool set_produce_schedulers = true, - storm::Environment const& env = storm::Environment() - ) { - //prepare model checking task - auto task = storm::api::createTask(formula, only_initial_states); - task.setProduceSchedulers(set_produce_schedulers); - - // call model checker - std::shared_ptr result; - synthesis::SparseSmgRpatlModelChecker> modelchecker(smg); - if (modelchecker.canHandle(task)) { - result = modelchecker.check(env, task); - } - return result; - } - -} +#include "smgModelChecker.h" void bindings_smg(py::module& m) { - - m.def("smg_model_checking", &synthesis::smgModelChecking, py::arg("smg"), py::arg("formula"), py::arg("only_initial_states") = false, py::arg("set_produce_schedulers") = true, py::arg("env") = storm::Environment() ); - + m.def("model_check_smg", &synthesis::modelCheckSmg, py::arg("smg"), py::arg("formula"), py::arg("only_initial_states") = false, py::arg("set_produce_schedulers") = true, py::arg("env") = storm::Environment() ); } - diff --git a/payntbind/src/synthesis/smg/smgModelChecker.h b/payntbind/src/synthesis/smg/smgModelChecker.h new file mode 100644 index 000000000..66603178b --- /dev/null +++ b/payntbind/src/synthesis/smg/smgModelChecker.h @@ -0,0 +1,36 @@ +#include "../synthesis.h" + +#include +#include + +#include "modelchecker/SparseSmgRpatlModelChecker.h" + + +namespace synthesis { + + template + std::shared_ptr modelCheckSmg ( + storm::models::sparse::Smg const& smg, + std::shared_ptr formula, + bool only_initial_states = false, + bool set_produce_schedulers = true, + storm::Environment const& env = storm::Environment() + ) { + auto task = storm::api::createTask(formula, only_initial_states); + task.setProduceSchedulers(set_produce_schedulers); + std::shared_ptr result; + synthesis::SparseSmgRpatlModelChecker> modelchecker(smg); + if (modelchecker.canHandle(task)) { + result = modelchecker.check(env, task); + } + return result; + } + + template std::shared_ptr modelCheckSmg ( + storm::models::sparse::Smg const& smg, + std::shared_ptr formula, + bool only_initial_states = false, + bool set_produce_schedulers = true, + storm::Environment const& env = storm::Environment() + ); +} diff --git a/payntbind/src/synthesis/translation/ItemKeyTranslator.h b/payntbind/src/synthesis/translation/ItemKeyTranslator.h index ad38f4847..7aba64dcb 100644 --- a/payntbind/src/synthesis/translation/ItemKeyTranslator.h +++ b/payntbind/src/synthesis/translation/ItemKeyTranslator.h @@ -36,7 +36,6 @@ namespace synthesis { private: uint64_t num_items; - std::vector> item_key_to_translation; std::vector> translation_to_item_key; }; diff --git a/payntbind/src/synthesis/translation/componentTranslations.cpp b/payntbind/src/synthesis/translation/componentTranslations.cpp index 1d9deb817..a359497be 100644 --- a/payntbind/src/synthesis/translation/componentTranslations.cpp +++ b/payntbind/src/synthesis/translation/componentTranslations.cpp @@ -1,6 +1,7 @@ #include "componentTranslations.h" #include +#include #include #include #include @@ -23,6 +24,11 @@ storm::storage::sparse::ModelComponents componentsFromModel( components.observabilityClasses = pomdp.getObservations(); components.observationValuations = pomdp.getOptionalObservationValuations(); } + if (model.getType() == storm::models::ModelType::Smg) { + auto smg = static_cast const&>(model); + components.statePlayerIndications = smg.getStatePlayerIndications(); + // skipping playerNameToIndexMap since Smg does not directly exposes those + } return components; } From 3aa98e2ff01672e6bd40fcc21f512990df946cb9 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Mon, 25 Nov 2024 09:28:37 +0100 Subject: [PATCH 2/2] fix payntbind method names --- paynt/parser/drn_parser.py | 2 +- paynt/synthesizer/policy_tree.py | 2 -- payntbind/src/synthesis/posmg/bindings.cpp | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/paynt/parser/drn_parser.py b/paynt/parser/drn_parser.py index 49461258e..360de3bfb 100644 --- a/paynt/parser/drn_parser.py +++ b/paynt/parser/drn_parser.py @@ -27,7 +27,7 @@ def parse_drn(cls, sketch_path): pomdp_path = sketch_path + '.tmp' state_player_indications = DrnParser.pomdp_from_posmg(sketch_path, pomdp_path) pomdp = DrnParser.read_drn(pomdp_path) - explicit_model = payntbind.synthesis.posmgFromPomdp(pomdp, state_player_indications) + explicit_model = payntbind.synthesis.posmg_from_pomdp(pomdp, state_player_indications) os.remove(pomdp_path) else: explicit_model = DrnParser.read_drn(sketch_path) diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 84cc9bbf5..e02ec5cb4 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -679,7 +679,6 @@ def split(self, family, prop, hole_selection, splitter, policy): def evaluate_all(self, family, prop, keep_value_only=False): assert not prop.reward, "expecting reachability probability propery" game_solver = self.quotient.build_game_abstraction_solver(prop) - # game_solver.enable_profiling(True) family.candidate_policy = None policy_tree = PolicyTree(family) @@ -729,7 +728,6 @@ def evaluate_all(self, family, prop, keep_value_only=False): self.stat.num_leaves_merged = len(policy_tree.collect_leaves()) self.stat.num_policies_merged = len(policy_tree.policies) self.policy_tree = policy_tree - # game_solver.print_profiling() # convert policy tree to family evaluation evaluations = [] diff --git a/payntbind/src/synthesis/posmg/bindings.cpp b/payntbind/src/synthesis/posmg/bindings.cpp index e4acb36d1..d5eea1d02 100644 --- a/payntbind/src/synthesis/posmg/bindings.cpp +++ b/payntbind/src/synthesis/posmg/bindings.cpp @@ -24,8 +24,8 @@ void bindings_posmg(py::module &m) { py::arg("state"), py::arg("action_offset"), "gets the choice index for the offset action from the given state."); - m.def("posmgFromPomdp", &synthesis::posmgFromPomdp, py::arg("pomdp"), py::arg("state_player_indications")); - m.def("posmgFromSmg", &synthesis::posmgFromSmg, py::arg("smg"), py::arg("observability_classes")); + m.def("posmg_from_pomdp", &synthesis::posmgFromPomdp, py::arg("pomdp"), py::arg("state_player_indications")); + m.def("posmg_from_smg", &synthesis::posmgFromSmg, py::arg("smg"), py::arg("observability_classes")); py::class_, std::shared_ptr>>(m, "PosmgManager") .def(py::init const&, uint64_t>(), py::arg("posmg"), py::arg("optimizing_player"))