From fefb966cc9dbc942fd24c523bb13c5187082b14b Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Wed, 6 Dec 2023 16:25:16 +0100 Subject: [PATCH] move randomized abstraction building to Stormpy --- paynt/quotient/mdp_family.py | 62 +------------------------------- paynt/synthesizer/policy_tree.py | 39 ++++++++++---------- 2 files changed, 20 insertions(+), 81 deletions(-) diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index a652d503d..d2d2b953e 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -66,64 +66,7 @@ def compute_choice_destinations(mdp): choice_destinations.append(destinations) return choice_destinations - @staticmethod - def randomize_action_variant(mdp, state_action_choices): - ''' - Given an MDP having multiple variants of actions, create an MDP in which this variant selection is randomized. - ''' - - components = stormpy.storage.SparseModelComponents() - # copy state labeling - state_labeling = stormpy.storage.StateLabeling(mdp.nr_states) - for label in mdp.labeling.get_labels(): - state_labeling.add_label(label) - state_labeling.set_states(label, mdp.labeling.get_states(label)) - components.state_labeling = state_labeling - - # build transition matrix and reward models - reward_vectors = {name:[] for name in mdp.reward_models} - builder = stormpy.storage.SparseMatrixBuilder(has_custom_row_grouping=True) - tm = mdp.transition_matrix - num_rows = 0 - choice_to_action = [] - for state in range(mdp.nr_states): - builder.new_row_group(num_rows) - for action,choices in enumerate(state_action_choices[state]): - if not choices: - continue - - # new choice - choice_to_action.append(action) - - # handle transition matrix - num_choices = len(choices) - dst_prob = collections.defaultdict(int) - for choice in choices: - for entry in tm.get_row(choice): - dst_prob[entry.column] += entry.value()/num_choices - - for dst,prob in dst_prob.items(): - builder.add_next_value(num_rows,dst,prob) - num_rows += 1 - - # handle reward models - for name,reward_model in mdp.reward_models.items(): - reward_value = 0 - for choice in choices: - reward_value += reward_model.get_state_action_reward(choice) - reward_value /= num_choices - reward_vectors[name].append(reward_value) - - components.transition_matrix = builder.build() - components.reward_models = {} - for name,state_action_reward in reward_vectors.items(): - components.reward_models[name] = stormpy.SparseRewardModel(optional_state_action_reward_vector=state_action_reward) - model = stormpy.storage.SparseMdp(components) - - return model,choice_to_action - - def __init__(self, quotient_mdp, coloring, specification): super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification) @@ -134,10 +77,7 @@ def __init__(self, quotient_mdp, coloring, specification): self.quotient_mdp, self.num_actions, self.choice_to_action) self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices) self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp) - - self.randomized_abstraction = MdpFamilyQuotientContainer.randomize_action_variant( - self.quotient_mdp, self.state_action_choices) - + @property def num_actions(self): diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index c1174b295..0930c9baa 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -333,6 +333,11 @@ def postprocess(self, quotient, prop): class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer): + # if True (False), then the game (randomized) abstraction will be used + use_game_abstraction = True + # if True, then the game scheduler will be used for splitting (incompatible with randomized abstraction) + use_optimistic_splitting = True + @property def method_name(self): return "AR (policy tree)" @@ -400,7 +405,7 @@ def solve_randomized_abstraction(self, family, prop): action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]] choice_to_action.append(action) state_action_choices = self.quotient.map_state_action_to_choices(family.mdp.model,self.quotient.num_actions,choice_to_action) - model,choice_to_action = self.quotient.randomize_action_variant(family.mdp.model, state_action_choices) + model,choice_to_action, = stormpy.synthesis.randomize_action_variant(family.mdp.model, state_action_choices) assert family.mdp.model.nr_states == model.nr_states result = stormpy.synthesis.verify_mdp(paynt.quotient.models.MarkovChain.environment,model,prop.formula,True) @@ -421,7 +426,7 @@ def solve_randomized_abstraction(self, family, prop): return policy,policy_sat - + def verify_family(self, family, game_solver, prop, reference_policy=None): self.quotient.build(family) mdp_family_result = MdpFamilyResult() @@ -440,17 +445,7 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): mdp_family_result.policy_source = "singleton" return mdp_family_result - if False: - randomized_policy,randomized_sat = self.solve_randomized_abstraction(family,prop) - if randomized_sat: - randomized_policy_sat = self.verify_policy(family,prop,randomized_policy) - if randomized_policy_sat: - mdp_family_result.policy = randomized_policy - mdp_family_result.policy_source = "randomized abstraction" - return mdp_family_result - - if True: - # game abstraction + if SynthesizerPolicyTree.use_game_abstraction: game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver) if game_sat: game_policy_sat = self.verify_policy(family,prop,game_policy) @@ -460,8 +455,14 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): return mdp_family_result else: print("game YES but nor forall family of size ", family.size) - - + else: + randomized_policy,randomized_sat = self.solve_randomized_abstraction(family,prop) + if randomized_sat: + randomized_policy_sat = self.verify_policy(family,prop,randomized_policy) + if randomized_policy_sat: + mdp_family_result.policy = randomized_policy + mdp_family_result.policy_source = "randomized abstraction" + return mdp_family_result # solve primary-primary direction for the family # logger.debug("solving primary-primary direction...") @@ -474,12 +475,10 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): # undecided: choose scheduler choices to be used for splitting - if False: - # optimistic splitting + if SynthesizerPolicyTree.use_optimistic_splitting: scheduler_choices = game_solver.solution_reachable_choices state_values = game_solver.solution_state_values else: - # pessimistic splitting scheduler_choices = stormpy.BitVector(self.quotient.quotient_mdp.nr_choices,False) state_values = [0] * self.quotient.quotient_mdp.nr_states for state in range(family.mdp.states): @@ -579,7 +578,7 @@ def synthesize_policy_tree(self, family): self.last_splitter = -1 prop = self.quotient.specification.constraints[0] game_solver = self.quotient.build_game_abstraction_solver(prop) - game_solver.enable_profiling(True) + # game_solver.enable_profiling(True) policy_tree = PolicyTree(family) reference_policy = None @@ -610,7 +609,7 @@ def synthesize_policy_tree(self, family): policy_tree_node.split(result.splitter,suboptions,subfamilies) policy_tree_leaves = policy_tree_leaves + policy_tree_node.child_nodes - game_solver.print_profiling() + # game_solver.print_profiling() policy_tree.double_check_all_families(self.quotient, prop) policy_tree.print_stats() policy_tree.postprocess(self.quotient, prop)