From 6520ddbab1ca28db65a33593071dafc9523ed281 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 8 Dec 2023 15:03:36 +0100 Subject: [PATCH] force investigating the reachable part of a scheduler --- paynt/quotient/mdp_family.py | 65 +++------------------ paynt/quotient/quotient.py | 97 +++++++++++++++++++++----------- paynt/synthesizer/policy_tree.py | 12 ++-- 3 files changed, 77 insertions(+), 97 deletions(-) diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index d861996d8..6068e1d05 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -56,17 +56,7 @@ def map_state_to_available_actions(state_action_choices): state_to_actions.append(available_actions) return state_to_actions - @staticmethod - def compute_choice_destinations(mdp): - choice_destinations = [] - for choice in range(mdp.nr_choices): - destinations = [] - for entry in mdp.transition_matrix.get_row(choice): - destinations.append(entry.column) - choice_destinations.append(destinations) - return choice_destinations - - + def __init__(self, quotient_mdp, coloring, specification): super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification) @@ -80,68 +70,27 @@ def __init__(self, quotient_mdp, coloring, specification): self.state_action_choices = None # for each state of the quotient, a list of available actions self.state_to_actions = None - # for each choice of the quotient, a list of its state-destinations - self.choice_destinations = None self.action_labels,self.choice_to_action = MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp) self.state_action_choices = MdpFamilyQuotientContainer.map_state_action_to_choices( self.quotient_mdp, self.num_actions, self.choice_to_action) self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices) - self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp) @property def num_actions(self): return len(self.action_labels) - - def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None): - if mdp is None: - mdp = self.quotient_mdp - if choice_destinations is None: - choice_destinations = self.choice_destinations - state_visited = [False]*mdp.nr_states - initial_state = list(mdp.initial_states)[0] - state_visited[initial_state] = True - state_queue = [initial_state] - tm = mdp.transition_matrix - choice_mask_reachable = stormpy.BitVector(mdp.nr_choices,False) - while state_queue: - state = state_queue.pop() - for choice in mdp.transition_matrix.get_rows_for_group(state): - if not choice_mask[choice]: - continue - choice_mask_reachable.set(choice,True) - for dst in choice_destinations[choice]: - if not state_visited[dst]: - state_visited[dst] = True - state_queue.append(dst) - return choice_mask_reachable - - - - def choices_to_hole_selection(self, choice_mask): - hole_selection = [set() for hole_index in self.design_space.hole_indices] - for choice in choice_mask: - choice_options = self.coloring.action_to_hole_options[choice] - for hole_index,option in choice_options.items(): - hole_selection[hole_index].add(option) - hole_selection = [list(options) for options in hole_selection] - return hole_selection - def empty_policy(self): - return [None] * self.quotient_mdp.nr_states + return self.empty_scheduler() def scheduler_to_policy(self, scheduler, mdp): + state_to_choice = self.scheduler_to_state_to_choice(mdp,scheduler) policy = self.empty_policy() - nci = mdp.model.nondeterministic_choice_indices.copy() - for state in range(mdp.model.nr_states): - state_choice = scheduler.get_choice(state).get_deterministic_choice() - choice = nci[state] + state_choice - quotient_choice = mdp.quotient_choice_map[choice] - action = self.choice_to_action[quotient_choice] - quotient_state = mdp.quotient_state_map[state] - policy[quotient_state] = action + for state in range(self.quotient_mdp.nr_states): + choice = state_to_choice[state] + if choice is not None: + policy[state] = self.choice_to_action[choice] return policy diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 43bfdf642..073993115 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -21,6 +21,16 @@ class QuotientContainer: # if True, hole scores in the state will be multiplied with the number of expected visits of this state compute_expected_visits = True + @staticmethod + def compute_choice_destinations(mdp): + choice_destinations = [] + for choice in range(mdp.nr_choices): + destinations = [] + for entry in mdp.transition_matrix.get_row(choice): + destinations.append(entry.column) + choice_destinations.append(destinations) + return choice_destinations + def __init__(self, quotient_mdp = None, coloring = None, specification = None): @@ -36,6 +46,9 @@ def __init__(self, quotient_mdp = None, coloring = None, self.subsystem_builder_options.build_state_mapping = True self.subsystem_builder_options.build_action_mapping = True + # for each choice of the quotient, a list of its state-destinations + self.choice_destinations = QuotientContainer.compute_choice_destinations(self.quotient_mdp) + # (optional) counter of discarded assignments self.discarded = None @@ -133,45 +146,65 @@ def build_chain(self, family): return DTMC(dtmc,self,state_map,choice_map) - def scheduler_selection(self, mdp, scheduler): - ''' Get hole options involved in the scheduler selection. ''' + def empty_scheduler(self): + return [None] * self.quotient_mdp.nr_states + + def keep_reachable_choices_of_scheduler(self, state_to_choice): + state_to_choice_reachable = self.empty_scheduler() + state_visited = [False]*self.quotient_mdp.nr_states + initial_state = list(self.quotient_mdp.initial_states)[0] + state_visited[initial_state] = True + state_queue = [initial_state] + while state_queue: + state = state_queue.pop() + choice = state_to_choice[state] + state_to_choice_reachable[state] = choice + for dst in self.choice_destinations[choice]: + if not state_visited[dst]: + state_visited[dst] = True + state_queue.append(dst) + return state_to_choice_reachable + + def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True): assert scheduler.memoryless and scheduler.deterministic - - # construct DTMC that corresponds to this scheduler and filter reachable states/choices - choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) - dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices) - choices = [ choice_map[state] for state in range(dtmc.nr_states) ] - - # map relevant choices to hole options - selection = [set() for hole_index in mdp.design_space.hole_indices] + state_to_choice = self.empty_scheduler() + nci = mdp.model.nondeterministic_choice_indices.copy() + for state in range(mdp.model.nr_states): + choice = nci[state] + scheduler.get_choice(state).get_deterministic_choice() + quotient_choice = mdp.quotient_choice_map[choice] + quotient_state = mdp.quotient_state_map[state] + state_to_choice[quotient_state] = quotient_choice + if keep_reachable_choices: + state_to_choice = self.keep_reachable_choices_of_scheduler(state_to_choice) + return state_to_choice + + def state_to_choice_to_choices(self, state_to_choice): + choices = stormpy.BitVector(self.quotient_mdp.nr_choices,False) + for choice in state_to_choice: + if choice is not None: + choices.set(choice,True) + return choices + + + def choices_to_hole_selection(self, choices, coloring=None): + if coloring is None: + coloring = self.coloring + hole_selection = [set() for hole_index in self.design_space.hole_indices] for choice in choices: - global_choice = mdp.quotient_choice_map[choice] - choice_options = self.coloring.action_to_hole_options[global_choice] + choice_options = self.coloring.action_to_hole_options[choice] for hole_index,option in choice_options.items(): - selection[hole_index].add(option) - selection = [list(options) for options in selection] + hole_selection[hole_index].add(option) + hole_selection = [list(options) for options in hole_selection] + return hole_selection - return selection - def scheduler_selection_with_coloring(self, mdp, scheduler, coloring): + def scheduler_selection(self, mdp, scheduler, coloring=None): ''' Get hole options involved in the scheduler selection. ''' assert scheduler.memoryless and scheduler.deterministic - - # construct DTMC that corresponds to this scheduler and filter reachable states/choices - choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) - dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices) - choices = [ choice_map[state] for state in range(dtmc.nr_states) ] - - # map relevant choices to hole options - selection = [set() for hole_index in mdp.design_space.hole_indices] - for choice in choices: - global_choice = mdp.quotient_choice_map[choice] - choice_options = coloring.action_to_hole_options[global_choice] - for hole_index,option in choice_options.items(): - selection[hole_index].add(option) - selection = [list(options) for options in selection] - - return selection + state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler, keep_reachable=True) + choices = self.state_to_choice_to_choices(state_to_choice) + hole_selection = self.choices_to_hole_selection(choices,coloring) + return hole_selection @staticmethod diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 4e3423b29..8945e366e 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -481,20 +481,18 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): # undecided: choose scheduler choices to be used for splitting if SynthesizerPolicyTree.use_optimistic_splitting: - scheduler_choices = game_solver.solution_reachable_choices state_values = game_solver.solution_state_values + state_to_choice = game_solver.solution_state_to_quotient_choice + state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice) else: - scheduler_choices = stormpy.BitVector(self.quotient.quotient_mdp.nr_choices,False) - choices = primary_primary_result.result.scheduler.compute_action_support(family.mdp.model.nondeterministic_choice_indices) - for choice in choices: - scheduler_choices.set(family.mdp.quotient_choice_map[choice],True) + state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, primary_primary_result.result.scheduler) state_values = [0] * self.quotient.quotient_mdp.nr_states for state in range(family.mdp.states): quotient_state = family.mdp.quotient_state_map[state] state_values[quotient_state] = primary_primary_result.result.at(state) # map reachable scheduler choices to hole options - scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices) + scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices) if False: @@ -691,7 +689,7 @@ def synthesize_policy_for_family(self, family, prop, all_sat=False, iteration_li sat_mdp_policies[index] = policy current_results.append(primary_result) - selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring) + selection = self.quotient.scheduler_selection(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring) self.update_scores(score_lists, selection) scores = {hole:len(score_list) for hole, score_list in score_lists.items()}