From 16a86254108fa2b4a88909a20e6bb78f8489f6b4 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Thu, 7 Dec 2023 12:14:14 +0100 Subject: [PATCH] more robust policy double-checking --- paynt/parser/sketch.py | 2 + paynt/quotient/mdp_family.py | 11 ++++ paynt/quotient/models.py | 28 ++------ paynt/quotient/pomdp_family.py | 3 +- paynt/quotient/quotient.py | 2 +- paynt/synthesizer/policy_tree.py | 72 ++++++++------------- paynt/synthesizer/synthesizer_pomdp.py | 4 +- paynt/verification/property.py | 88 +++++++++++++++----------- 8 files changed, 101 insertions(+), 109 deletions(-) diff --git a/paynt/parser/sketch.py b/paynt/parser/sketch.py index bc1d76030..b6fec6577 100644 --- a/paynt/parser/sketch.py +++ b/paynt/parser/sketch.py @@ -9,6 +9,7 @@ import paynt.quotient.quotient import paynt.quotient.mdp_family import paynt.quotient.pomdp_family +import paynt.verification.property import logging logger = logging.getLogger(__name__) @@ -129,6 +130,7 @@ def load_sketch(cls, sketch_path, properties_path, logger.debug('WARNING: discount factor transformation has not been properly tested') paynt.quotient.models.MarkovChain.initialize(specification) + paynt.verification.property.Property.initialize() make_rewards_action_based(explicit_quotient) logger.debug("constructed explicit quotient having {} states and {} actions".format( diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index d2d2b953e..31f84b95e 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -121,6 +121,17 @@ def choices_to_hole_selection(self, choice_mask): def empty_policy(self): return [self.num_actions] * self.quotient_mdp.nr_states + def scheduler_to_policy(self, scheduler, mdp): + policy = self.empty_policy() + for state in range(mdp.model.nr_states): + state_choice = scheduler.get_choice(state).get_deterministic_choice() + choice = mdp.model.nondeterministic_choice_indices[state] + state_choice + quotient_choice = mdp.quotient_choice_map[choice] + action = self.choice_to_action[quotient_choice] + quotient_state = mdp.quotient_state_map[state] + policy[quotient_state] = action + return policy + def fix_policy_for_family(self, family, policy): ''' diff --git a/paynt/quotient/models.py b/paynt/quotient/models.py index 45cf72f77..92b82f1ba 100644 --- a/paynt/quotient/models.py +++ b/paynt/quotient/models.py @@ -10,8 +10,6 @@ class MarkovChain: # options for the construction of chains builder_options = None - # model checking environment (method & precision) - environment = None @classmethod def initialize(cls, specification): @@ -23,24 +21,6 @@ def initialize(cls, specification): cls.builder_options.set_add_overlapping_guards_label() cls.builder_options.set_build_observation_valuations(True) # cls.builder_options.set_exploration_checks(True) - - # model checking environment - cls.environment = stormpy.Environment() - se = cls.environment.solver_environment - - stormpy.synthesis.set_precision_native(se.native_solver_environment, Property.mc_precision) - stormpy.synthesis.set_precision_minmax(se.minmax_solver_environment, Property.mc_precision) - - se.set_linear_equation_solver_type(stormpy.EquationSolverType.native) - # se.set_linear_equation_solver_type(stormpy.EquationSolverType.gmmxx) - # se.set_linear_equation_solver_type(stormpy.EquationSolverType.eigen) - - # se.minmax_solver_environment.method = stormpy.MinMaxMethod.policy_iteration - se.minmax_solver_environment.method = stormpy.MinMaxMethod.value_iteration - # se.minmax_solver_environment.method = stormpy.MinMaxMethod.sound_value_iteration - # se.minmax_solver_environment.method = stormpy.MinMaxMethod.interval_iteration - # se.minmax_solver_environment.method = stormpy.MinMaxMethod.optimistic_value_iteration - # se.minmax_solver_environment.method = stormpy.MinMaxMethod.topological @classmethod def assert_no_overlapping_guards(cls, model): @@ -97,11 +77,11 @@ def initial_state(self): def model_check_formula(self, formula): if not self.is_deterministic: - return stormpy.synthesis.verify_mdp(self.environment,self.model,formula,True) + return stormpy.synthesis.verify_mdp(Property.environment,self.model,formula,True) return stormpy.model_checking( - self.model, formula, only_initial_states=False, + self.model, formula, extract_scheduler=(not self.is_deterministic), - environment=self.environment + environment=Property.environment ) def model_check_formula_hint(self, formula, hint): @@ -109,7 +89,7 @@ def model_check_formula_hint(self, formula, hint): stormpy.synthesis.set_loglevel_off() task = stormpy.core.CheckTask(formula, only_initial_states=False) task.set_produce_schedulers(produce_schedulers=True) - result = stormpy.synthesis.model_check_with_hint(self.model, task, self.environment, hint) + result = stormpy.synthesis.model_check_with_hint(self.model, task, Property.environment, hint) return result def model_check_property(self, prop, alt = False): diff --git a/paynt/quotient/pomdp_family.py b/paynt/quotient/pomdp_family.py index cddc66442..53fff4e6f 100644 --- a/paynt/quotient/pomdp_family.py +++ b/paynt/quotient/pomdp_family.py @@ -7,6 +7,7 @@ import paynt.quotient.quotient import paynt.quotient.coloring import paynt.quotient.mdp_family +import paynt.verification.property import json @@ -237,7 +238,7 @@ def compute_qvalues_for_fsc(self, dtmc_sketch): # model check product = dtmc_sketch.quotient_mdp formula = dtmc_sketch.get_property().formula - result = stormpy.model_checking(product, formula, environment=paynt.quotient.models.MarkovChain.environment) + result = stormpy.model_checking(product, formula, environment=paynt.verification.property.Property.environment) product_state_to_value = result.get_values() # map state values to the resulting map diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index dd6a46c02..43bfdf642 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -225,7 +225,7 @@ def expected_visits(self, mdp, prop, choices): dtmc = QuotientContainer.mdp_to_dtmc(sub_mdp) # compute visits - dtmc_visits = stormpy.compute_expected_number_of_visits(MarkovChain.environment, dtmc).get_values() + dtmc_visits = stormpy.compute_expected_number_of_visits(paynt.verification.property.Property.environment, dtmc).get_values() dtmc_visits = list(dtmc_visits) # handle infinity- and zero-visits diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index ddfcb19dc..fec4ce811 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -5,6 +5,8 @@ import paynt.quotient.models import paynt.synthesizer.synthesizer +from paynt.verification.property import Property + import logging logger = logging.getLogger(__name__) @@ -107,9 +109,7 @@ def double_check(self, quotient, prop): result = self.family.mdp.model_check_property(prop) assert not result.sat else: - mdp = quotient.apply_policy_to_family(self.family, self.policy) - result = mdp.model_check_property(prop, alt=True) - assert result.sat, self.family.size + SynthesizerPolicyTree.double_check_policy(quotient, self.family, prop, self.policy) def merge_if_single_child(self): @@ -338,41 +338,42 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer): use_game_abstraction = True # if True, then the game scheduler will be used for splitting (incompatible with randomized abstraction) use_optimistic_splitting = True - + @property def method_name(self): return "AR (policy tree)" + @staticmethod + def double_check_policy(quotient, family, prop, policy): + mdp = quotient.apply_policy_to_family(family, policy) + if family.size == 1: + quotient.assert_mdp_is_deterministic(mdp, family) + + DOUBLE_CHECK_PRECISION = 1e-8 + default_precision = Property.model_checking_precision + Property.set_model_checking_precision(DOUBLE_CHECK_PRECISION) + policy_result = mdp.model_check_property(prop, alt=True) + Property.set_model_checking_precision(default_precision) + if not policy_result.sat: + logger.warning("policy should be SAT but (most likely due to model checking precision) has value {}".format(policy_result.value)) + return + + def verify_policy(self, family, prop, policy): mdp = self.quotient.apply_policy_to_family(family, policy) - if family.size == 1: - self.quotient.assert_mdp_is_deterministic(mdp, family) policy_result = mdp.model_check_property(prop, alt=True) self.stat.iteration_mdp(mdp.states) return policy_result.sat def solve_singleton(self, family, prop): - result = family.mdp.model_check_property(prop) self.stat.iteration_mdp(family.mdp.states) if not result.sat: return False - - policy = self.quotient.empty_policy() - for state in range(family.mdp.states): - quotient_state = family.mdp.quotient_state_map[state] - - state_choice = result.result.scheduler.get_choice(state).get_deterministic_choice() - choice = family.mdp.model.nondeterministic_choice_indices[state] + state_choice - quotient_choice = family.mdp.quotient_choice_map[choice] - action = self.quotient.choice_to_action[quotient_choice] - - policy[quotient_state] = action - - # TODO uncomment this to check policy before double_check() - # assert self.verify_policy(family,prop,policy) - + policy = self.quotient.scheduler_to_policy(result.result.scheduler, family.mdp) + # uncomment below to preemptively double-check the policy + # SynthesizerPolicyTree.double_check_policy(self.quotient, family, prop, policy) return policy @@ -409,7 +410,7 @@ def solve_randomized_abstraction(self, family, prop): model,choice_to_action, = stormpy.synthesis.randomize_action_variant(family.mdp.model, state_action_choices) assert family.mdp.model.nr_states == model.nr_states - result = stormpy.synthesis.verify_mdp(paynt.quotient.models.MarkovChain.environment,model,prop.formula,True) + result = stormpy.synthesis.verify_mdp(Property.environment, model, prop.formula, True) self.stat.iteration_mdp(model.nr_states) value = result.at(model.initial_states[0]) policy_sat = prop.satisfies_threshold(value) @@ -613,23 +614,6 @@ def update_scores(self, score_lists, selection): for choice in selection[hole]: if choice not in score_list: score_list.append(choice) - - - def create_policy(self, scheduler, family): - choice_to_action = [] - for choice in range(family.mdp.choices): - action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]] - choice_to_action.append(action) - - policy = self.quotient.empty_policy() - for state in range(family.mdp.model.nr_states): - state_choice = scheduler.get_choice(state).get_deterministic_choice() - choice = family.mdp.model.transition_matrix.get_row_group_start(state) + state_choice - action = choice_to_action[choice] - quotient_state = family.mdp.quotient_state_map[state] - policy[quotient_state] = action - - return policy # synthesize one policy for family of MDPs (if such policy exists) @@ -663,7 +647,7 @@ def synthesize_policy_for_tree_node(self, family, prop, all_sat=False, iteration continue sat_mdp_families.append(subfamily) - policy = self.create_policy(primary_result.result.scheduler, subfamily) + policy = self.quotient.scheduler_to_policy(primary_result.result.scheduler, subfamily.mdp) sat_mdp_policies.append(policy) else: sat_mdp_families.append(subfamily) @@ -705,7 +689,7 @@ def synthesize_policy_for_tree_node(self, family, prop, all_sat=False, iteration # add policy if current mdp doesn't have one yet # TODO maybe this can be done after some number of controllers are consistent? if sat_mdp_policies[index] == None: - policy = self.create_policy(primary_result.result.scheduler, mdp_subfamily) + policy = self.quotient.scheduler_to_policy(primary_result.result.scheduler, mdp_subfamily.mdp) sat_mdp_policies[index] = policy current_results.append(primary_result) @@ -722,7 +706,7 @@ def synthesize_policy_for_tree_node(self, family, prop, all_sat=False, iteration break else: for index, (result, family) in enumerate(zip(current_results, sat_mdp_families)): - policy = self.create_policy(result.result.scheduler, family) + policy = self.quotient.scheduler_to_policy(result.result.scheduler, family.mdp) sat_mdp_policies[index] = policy return True, unsat_mdp_families, sat_mdp_families, sat_mdp_policies @@ -757,7 +741,7 @@ def synthesize_policy_for_tree_node(self, family, prop, all_sat=False, iteration self.quotient.build(sat_mdp_families[mdp_index]) primary_result = sat_mdp_families[mdp_index].mdp.model_check_property(prop) self.stat.iteration_mdp(sat_mdp_families[mdp_index].mdp.states) - policy = self.create_policy(primary_result.result.scheduler, sat_mdp_families[mdp_index]) + policy = self.quotient.scheduler_to_policy(primary_result.result.scheduler, sat_mdp_families[mdp_index].mdp) sat_mdp_policies[mdp_index] = policy return False, unsat_mdp_families, sat_mdp_families, sat_mdp_policies diff --git a/paynt/synthesizer/synthesizer_pomdp.py b/paynt/synthesizer/synthesizer_pomdp.py index 6a3c62961..f0cb31001 100644 --- a/paynt/synthesizer/synthesizer_pomdp.py +++ b/paynt/synthesizer/synthesizer_pomdp.py @@ -12,6 +12,8 @@ from ..quotient.quotient_pomdp import POMDPQuotientContainer from ..utils.profiler import Timer +import paynt.verification.property + import math from collections import defaultdict @@ -821,7 +823,7 @@ def strategy_expected_uai(self): dtmc = self.quotient.build_chain(synthesized_assignment) # compute expected visits for this dtmc - dtmc_visits = stormpy.compute_expected_number_of_visits(MarkovChain.environment, dtmc.model).get_values() + dtmc_visits = stormpy.compute_expected_number_of_visits(paynt.verification.property.Property.environment, dtmc.model).get_values() dtmc_visits = list(dtmc_visits) # handle infinity- and zero-visits diff --git a/paynt/verification/property.py b/paynt/verification/property.py index f9db8a02a..3d1f3dfc0 100644 --- a/paynt/verification/property.py +++ b/paynt/verification/property.py @@ -1,4 +1,5 @@ import stormpy +import stormpy.synthesis import math import operator @@ -16,13 +17,41 @@ def construct_reward_property(reward_name, minimizing, target_label): class Property: - + ''' Wrapper over a stormpy property. ''' + + # model checking environment (method & precision) + environment = None # model checking precision - mc_precision = 1e-4 - # precision for comparing floats - float_precision = 1e-10 + model_checking_precision = 1e-4 + + @classmethod + def set_model_checking_precision(cls, precision): + cls.model_checking_precision = precision + stormpy.synthesis.set_precision_native(cls.environment.solver_environment.native_solver_environment, precision) + stormpy.synthesis.set_precision_minmax(cls.environment.solver_environment.minmax_solver_environment, precision) + + @classmethod + def initialize(cls): + cls.environment = stormpy.Environment() + cls.set_model_checking_precision(cls.model_checking_precision) + + se = cls.environment.solver_environment + se.set_linear_equation_solver_type(stormpy.EquationSolverType.native) + # se.set_linear_equation_solver_type(stormpy.EquationSolverType.gmmxx) + # se.set_linear_equation_solver_type(stormpy.EquationSolverType.eigen) + + # se.minmax_solver_environment.method = stormpy.MinMaxMethod.policy_iteration + se.minmax_solver_environment.method = stormpy.MinMaxMethod.value_iteration + # se.minmax_solver_environment.method = stormpy.MinMaxMethod.sound_value_iteration + # se.minmax_solver_environment.method = stormpy.MinMaxMethod.interval_iteration + # se.minmax_solver_environment.method = stormpy.MinMaxMethod.optimistic_value_iteration + # se.minmax_solver_environment.method = stormpy.MinMaxMethod.topological + + @staticmethod + def above_model_checking_precision(a, b): + return abs(a-b) > Property.model_checking_precision + - ''' Wrapper over a stormpy property. ''' def __init__(self, prop, discount_factor=1): self.property = prop self.discount_factor = discount_factor @@ -76,6 +105,18 @@ def reward(self): def maximizing(self): return not self.minimizing + @property + def is_until(self): + return self.formula.subformula.is_until_formula + + def transform_until_to_eventually(self): + if not self.is_until: + return + logger.info("converting until formula to eventually...") + formula = stormpy.synthesis.transform_until_to_eventually(self.property.raw_formula) + prop = stormpy.core.Property("", formula) + self.__init__(prop, self.discount_factor) + def property_copy(self): formula_copy = self.property.raw_formula.clone() property_copy = stormpy.core.Property(self.property.name, formula_copy) @@ -84,40 +125,11 @@ def property_copy(self): def copy(self): return Property(self.property_copy(), self.discount_factor) - @staticmethod - def above_mc_precision(a, b): - return abs(a-b) > Property.mc_precision - - @staticmethod - def above_float_precision(a, b): - return abs(a-b) > Property.float_precision - - def meets_op(self, a, b): - ''' For constraints, we do not want to distinguish between small differences. ''' - # return not Property.above_float_precision(a,b) or self.op(a,b) - return self.op(a,b) - - def meets_threshold(self, value): - return self.meets_op(value, self.threshold) - def result_valid(self, value): return not self.reward or value != math.inf def satisfies_threshold(self, value): - ''' check if DTMC model checking result satisfies the property ''' - return self.result_valid(value) and self.meets_threshold(value) - - @property - def is_until(self): - return self.formula.subformula.is_until_formula - - def transform_until_to_eventually(self): - if not self.is_until: - return - logger.info("converting until formula to eventually...") - formula = stormpy.synthesis.transform_until_to_eventually(self.property.raw_formula) - prop = stormpy.core.Property("", formula) - self.__init__(prop, self.discount_factor) + return self.result_valid(value) and self.op(value, self.threshold) @property def can_be_improved(self): @@ -192,7 +204,7 @@ def reset(self): def meets_op(self, a, b): ''' For optimality objective, we want to accept improvements above model checking precision. ''' - return b is None or (Property.above_mc_precision(a,b) and self.op(a,b)) + return b is None or (Property.above_model_checking_precision(a,b) and self.op(a,b)) def satisfies_threshold(self, value): return self.result_valid(value) and self.meets_op(value, self.threshold) @@ -212,9 +224,9 @@ def update_optimum(self, optimum): def suboptimal_value(self): assert self.optimum is not None if self.minimizing: - return self.optimum * (1 + self.mc_precision) + return self.optimum * (1 + self.model_checking_precision) else: - return self.optimum * (1 - self.mc_precision) + return self.optimum * (1 - self.model_checking_precision) def transform_until_to_eventually(self): if not self.is_until: