From a2ea99734ca3f79c04f8a31e9c428a323a2d1a1e Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Wed, 24 Jul 2024 14:59:19 +0200 Subject: [PATCH] MDP controller synthesis via SMT coloring --- models/mdp/maze/sketch.templ | 10 + models/mdp/simple/sketch.templ | 2 +- paynt/family/family.py | 7 +- paynt/models/model_builder.py | 40 ++ paynt/{quotient => models}/models.py | 27 +- paynt/parser/jani.py | 7 +- paynt/parser/prism_parser.py | 6 +- paynt/parser/sketch.py | 17 +- paynt/quotient/mdp.py | 370 ++++++++++++++++++ paynt/quotient/mdp_family.py | 29 +- paynt/quotient/pomdp.py | 2 +- paynt/quotient/pomdp_family.py | 4 +- paynt/quotient/quotient.py | 77 +--- paynt/synthesizer/policy_tree.py | 1 - paynt/synthesizer/statistic.py | 15 +- paynt/synthesizer/synthesizer_ar.py | 4 +- payntbind/src/synthesis/quotient/Coloring.cpp | 11 - payntbind/src/synthesis/quotient/Coloring.h | 4 - .../src/synthesis/quotient/ColoringSmt.cpp | 297 ++++++++++++++ .../src/synthesis/quotient/ColoringSmt.h | 109 ++++++ payntbind/src/synthesis/quotient/bindings.cpp | 30 +- .../synthesis/translation/SubPomdpBuilder.cpp | 6 +- .../src/synthesis/translation/bindings.cpp | 189 ++++++++- .../translation/componentTranslations.cpp | 96 ++++- .../translation/componentTranslations.h | 16 + 25 files changed, 1182 insertions(+), 194 deletions(-) create mode 100644 paynt/models/model_builder.py rename paynt/{quotient => models}/models.py (56%) create mode 100644 paynt/quotient/mdp.py create mode 100644 payntbind/src/synthesis/quotient/ColoringSmt.cpp create mode 100644 payntbind/src/synthesis/quotient/ColoringSmt.h diff --git a/models/mdp/maze/sketch.templ b/models/mdp/maze/sketch.templ index 7ca759980..97ef72f42 100644 --- a/models/mdp/maze/sketch.templ +++ b/models/mdp/maze/sketch.templ @@ -68,8 +68,18 @@ module maze endmodule +module test + var : bool init false; + [up] true -> (var'=true); + [right] true -> (var'=false); + [down] true -> (var'=false); + [left] true -> (var'=false); +endmodule + // rewards +label "what" = mod(x,2)=0; + rewards "steps" clk=1: 1; endrewards diff --git a/models/mdp/simple/sketch.templ b/models/mdp/simple/sketch.templ index 4a71e1233..f3d252fc0 100644 --- a/models/mdp/simple/sketch.templ +++ b/models/mdp/simple/sketch.templ @@ -1,7 +1,7 @@ mdp module m - s : [0..4] init 0; + s : [0..3] init 0; [up] s=0 -> (s'=1); [down] s=0 -> (s'=2); diff --git a/paynt/family/family.py b/paynt/family/family.py index 6e6978e04..0d7ee9501 100644 --- a/paynt/family/family.py +++ b/paynt/family/family.py @@ -44,9 +44,7 @@ def hole_num_options_total(self, hole): return self.family.holeNumOptionsTotal(hole) def hole_set_options(self, hole, options): - assert len(options)>0 self.family.holeSetOptions(hole,options) - assert self.family.holeNumOptions(hole) == len(options) @property def size(self): @@ -63,7 +61,7 @@ def size_or_order(self): def hole_options_to_string(self, hole, options): name = self.hole_name(hole) - labels = [self.hole_to_option_labels[hole][option] for option in options] + labels = [str(self.hole_to_option_labels[hole][option]) for option in options] if len(labels) == 1: return f"{name}={labels[0]}" else: @@ -159,6 +157,7 @@ def __init__(self, family = None, parent_info = None): super().__init__(family) self.mdp = None + self.selected_choices = None # SMT encoding self.encoding = None @@ -179,11 +178,11 @@ def copy(self): def collect_parent_info(self, specification): pi = ParentInfo() + pi.selected_choices = self.selected_choices pi.refinement_depth = self.refinement_depth cr = self.analysis_result.constraints_result pi.constraint_indices = cr.undecided_constraints if cr is not None else [] pi.splitter = self.splitter - pi.mdp = self.mdp return pi def encode(self, smt_solver): diff --git a/paynt/models/model_builder.py b/paynt/models/model_builder.py new file mode 100644 index 000000000..15998d17d --- /dev/null +++ b/paynt/models/model_builder.py @@ -0,0 +1,40 @@ +import stormpy + +class ModelBuilder: + + @classmethod + def default_builder_options(cls, specification = None): + # builder options + if specification is not None: + formulae = specification.stormpy_formulae() + builder_options = stormpy.BuilderOptions(formulae) + else: + builder_options = stormpy.BuilderOptions() + builder_options.set_build_state_valuations(True) + builder_options.set_build_with_choice_origins(True) + builder_options.set_build_all_labels(True) + builder_options.set_build_choice_labels(True) + builder_options.set_add_overlapping_guards_label(True) + builder_options.set_build_observation_valuations(True) + # builder_options.set_exploration_checks(True) + return builder_options + + @classmethod + def from_jani(cls, program, specification = None): + builder_options = cls.default_builder_options(specification) + builder_options.set_build_choice_labels(False) + model = stormpy.build_sparse_model_with_options(program, builder_options) + return model + + @classmethod + def from_prism(cls, program, specification = None): + assert program.model_type in [stormpy.storage.PrismModelType.MDP, stormpy.storage.PrismModelType.POMDP] + builder_options = cls.default_builder_options(specification) + model = stormpy.build_sparse_model_with_options(program, builder_options) + return model + + @classmethod + def from_drn(cls, drn_path): + builder_options = stormpy.core.DirectEncodingParserOptions() + builder_options.build_choice_labels = True + return stormpy.build_model_from_drn(drn_path, builder_options) diff --git a/paynt/quotient/models.py b/paynt/models/models.py similarity index 56% rename from paynt/quotient/models.py rename to paynt/models/models.py index 1a6b75aa7..5895687e0 100644 --- a/paynt/quotient/models.py +++ b/paynt/models/models.py @@ -8,41 +8,16 @@ class Mdp: - # options for the construction of chains - builder_options = None - - @classmethod - def initialize(cls, specification): - # builder options - formulae = specification.stormpy_formulae() - cls.builder_options = stormpy.BuilderOptions(formulae) - cls.builder_options.set_build_with_choice_origins(True) - cls.builder_options.set_build_state_valuations(True) - cls.builder_options.set_add_overlapping_guards_label() - cls.builder_options.set_build_observation_valuations(True) - cls.builder_options.set_build_all_labels(True) - # cls.builder_options.set_exploration_checks(True) - @classmethod def assert_no_overlapping_guards(cls, model): if model.labeling.contains_label("overlap_guards"): assert model.labeling.get_states("overlap_guards").number_of_set_bits() == 0 - @classmethod - def from_prism(cls, prism): - assert prism.model_type in [stormpy.storage.PrismModelType.MDP, stormpy.storage.PrismModelType.POMDP] - # TODO why do we disable choice labels here? - Mdp.builder_options.set_build_choice_labels(True) - model = stormpy.build_sparse_model_with_options(prism, Mdp.builder_options) - Mdp.builder_options.set_build_choice_labels(False) - # Mdp.assert_no_overlapping_guards(model) - return model - def __init__(self, model): # Mdp.assert_no_overlapping_guards(model) + self.model = model if len(model.initial_states) > 1: logger.warning("WARNING: obtained model with multiple initial states") - self.model = model @property def states(self): diff --git a/paynt/parser/jani.py b/paynt/parser/jani.py index c3d575858..86cd8600d 100644 --- a/paynt/parser/jani.py +++ b/paynt/parser/jani.py @@ -2,7 +2,7 @@ import payntbind import paynt.verification.property -import paynt.quotient.models +import paynt.models.model_builder import itertools from collections import defaultdict @@ -56,7 +56,6 @@ def __init__(self, prism, hole_expressions, specification, family): p = paynt.verification.property.OptimalityProperty(prop_new,epsilon) properties_unpacked.append(p) self.specification = paynt.verification.property.Specification(properties_unpacked) - paynt.quotient.models.Mdp.initialize(self.specification) # unfold holes in the program self.hole_expressions = hole_expressions @@ -66,8 +65,8 @@ def __init__(self, prism, hole_expressions, specification, family): logger.debug("constructing the quotient...") # construct the explicit quotient - quotient_mdp = stormpy.build_sparse_model_with_options(self.jani_unfolded, paynt.quotient.models.Mdp.builder_options) - + quotient_mdp = paynt.models.model_builder.ModelBuilder.from_jani(self.jani_unfolded, self.specification) + # associate each action of a quotient MDP with hole options # reconstruct choice labels from choice origins logger.debug("associating choices of the quotient with hole assignments...") diff --git a/paynt/parser/prism_parser.py b/paynt/parser/prism_parser.py index 8e40058d0..5283aa4b3 100644 --- a/paynt/parser/prism_parser.py +++ b/paynt/parser/prism_parser.py @@ -4,7 +4,7 @@ import paynt.family.family import paynt.verification.property import paynt.parser.jani -import paynt.quotient.models +import paynt.models.model_builder import os import re @@ -53,13 +53,11 @@ def read_prism(cls, sketch_path, properties_path, relative_error): specification = jani_unfolder.specification quotient_mdp = jani_unfolder.quotient_mdp coloring = payntbind.synthesis.Coloring(family.family, quotient_mdp.nondeterministic_choice_indices, jani_unfolder.choice_to_hole_options) - paynt.quotient.models.Mdp.initialize(specification) if prism.model_type == stormpy.storage.PrismModelType.POMDP: obs_evaluator = payntbind.synthesis.ObservationEvaluator(prism, quotient_mdp) quotient_mdp = payntbind.synthesis.addChoiceLabelsFromJani(quotient_mdp) else: - paynt.quotient.models.Mdp.initialize(specification) - quotient_mdp = paynt.quotient.models.Mdp.from_prism(prism) + quotient_mdp = paynt.models.model_builder.ModelBuilder.from_prism(prism, specification) return prism, quotient_mdp, specification, family, coloring, jani_unfolder, obs_evaluator diff --git a/paynt/parser/sketch.py b/paynt/parser/sketch.py index 0088d4589..269fa9671 100644 --- a/paynt/parser/sketch.py +++ b/paynt/parser/sketch.py @@ -1,10 +1,10 @@ import stormpy import payntbind - -import paynt.quotient.models +import paynt.models.model_builder import paynt.quotient.quotient +import paynt.quotient.mdp import paynt.quotient.pomdp import paynt.quotient.decpomdp import paynt.quotient.posg @@ -54,12 +54,6 @@ def make_rewards_action_based(model): class Sketch: - @classmethod - def read_drn(cls, sketch_path): - builder_options = stormpy.core.DirectEncodingParserOptions() - builder_options.build_choice_labels = True - return stormpy.build_model_from_drn(sketch_path, builder_options) - @classmethod def load_sketch(cls, sketch_path, properties_path, export=None, relative_error=0, precision=1e-4, constraint_bound=None): @@ -91,7 +85,7 @@ def load_sketch(cls, sketch_path, properties_path, if filetype is None: try: logger.info(f"assuming sketch in DRN format...") - explicit_quotient = Sketch.read_drn(sketch_path) + explicit_quotient = paynt.models.model_builder.ModelBuilder.from_drn(sketch_path) specification = PrismParser.parse_specification(properties_path, relative_error) filetype = "drn" except: @@ -122,7 +116,6 @@ def load_sketch(cls, sketch_path, properties_path, assert filetype is not None, "unknow format of input file" logger.info("sketch parsing OK") - paynt.quotient.models.Mdp.initialize(specification) paynt.verification.property.Property.initialize() make_rewards_action_based(explicit_quotient) @@ -223,11 +216,13 @@ def build_quotient_container(cls, prism, jani_unfolder, explicit_quotient, famil elif prism.model_type == stormpy.storage.PrismModelType.POMDP: quotient_container = paynt.quotient.pomdp_family.PomdpFamilyQuotient(explicit_quotient, family, coloring, specification, obs_evaluator) else: - assert explicit_quotient.is_nondeterministic_model + assert explicit_quotient.is_nondeterministic_model, "expected nondeterministic model" if decpomdp_manager is not None and decpomdp_manager.num_agents > 1: quotient_container = paynt.quotient.decpomdp.DecPomdpQuotient(decpomdp_manager, specification) elif explicit_quotient.labeling.contains_label(paynt.quotient.posg.PosgQuotient.PLAYER_1_STATE_LABEL): quotient_container = paynt.quotient.posg.PosgQuotient(explicit_quotient, specification) + elif not explicit_quotient.is_partially_observable: + quotient_container = paynt.quotient.mdp.MdpQuotient(explicit_quotient, specification) else: quotient_container = paynt.quotient.pomdp.PomdpQuotient(explicit_quotient, specification, decpomdp_manager) return quotient_container diff --git a/paynt/quotient/mdp.py b/paynt/quotient/mdp.py new file mode 100644 index 000000000..ae8f14d2d --- /dev/null +++ b/paynt/quotient/mdp.py @@ -0,0 +1,370 @@ +import paynt.quotient.quotient + +import stormpy +import payntbind + +import logging +logger = logging.getLogger(__name__) + + +class Variable: + + def __init__(self, variable, model): + assert variable.has_boolean_type() or variable.has_integer_type(), \ + f"variable {variable.name} is neither integer nor boolean" + self.variable = variable + + assert model.has_state_valuations(), "model has no state valuations" + if self.has_integer_type: + value_getter = model.state_valuations.get_integer_value + else: + value_getter = model.state_valuations.get_boolean_value + domain = set() + for state in range(model.nr_states): + value = value_getter(state,self.variable) + domain.add(value) + domain = list(domain) + # conversion of boolean variables to integers + if self.has_boolean_type: + domain = [1 if value else 0 for value in domain] + domain = sorted(domain) + self.domain = domain + + + @property + def name(self): + return self.variable.name + + @property + def has_integer_type(self): + return self.variable.has_integer_type() + + @property + def has_boolean_type(self): + return self.variable.has_boolean_type() + + @property + def domain_min(self): + return self.domain[0] + + @property + def domain_max(self): + return self.domain[-1] + + @property + def hole_domain(self): + ''' + Hole domain does not include the maximum value. + ''' + return self.domain[:-1] + + def __str__(self): + # domain = "bool" if self.has_boolean_type else f"[{self.domain_min}..{self.domain_max}]" + domain = f"[{self.domain_min}..{self.domain_max}]" + return f"{self.name}:{domain}" + + @classmethod + def from_model(cls, model, program_variables): + variables = [Variable(v,model) for v in program_variables] + variables = [v for v in variables if len(v.domain) > 1] + return variables + + + +class DecisionTreeNode: + + def __init__(self, parent): + self.parent = parent + self.is_terminal = True + self.variable_index = None + self.hole = None + + @property + def child_nodes(self): + return [] if self.is_terminal else [self.child_true,self.child_false] + + @property + def is_true_child(self): + return self is self.parent.child_true + + def set_variable(self, variable_index:int): + ''' + Associate (an index of) a variable with the node. + ''' + assert self.is_terminal, "redefining existing variable for a tree node" + self.is_terminal = False + self.variable_index = variable_index + self.child_false = DecisionTreeNode(self) + self.child_true = DecisionTreeNode(self) + + def set_variable_by_name(self, variable_name:str, decision_tree): + name_to_variable_index = {var.name:index for index,var in enumerate(decision_tree.variables)} + variable_index = name_to_variable_index[variable_name] + self.set_variable(variable_index) + + def create_hole(self, family, action_labels, variables): + ''' + Create a unique hole associated with this node. Terminal nodes are associated with actions selection, where + additional "don't care" action is added. + ''' + # create a unique hole index based on the current number of holes + self.hole = family.num_holes + if self.is_terminal: + prefix = "A" + option_labels = action_labels #+ ["__dont_care__"] + else: + var = variables[self.variable_index] + prefix = variables[self.variable_index].name + option_labels = variables[self.variable_index].hole_domain + hole_name = f"{prefix}_{self.hole}" + family.add_hole(hole_name, option_labels) + + + def collect_bounds(self): + lower_bounds = [] + upper_bounds = [] + node = self + while node.parent is not None: + if node.is_true_child: + bounds = upper_bounds + else: + bounds = lower_bounds + bounds.append(node.parent.hole) + node = node.parent + return lower_bounds,upper_bounds + + + +class DecisionTree: + + def __init__(self, model, program_variables): + + self.model = model + self.variables = Variable.from_model(model,program_variables) + logger.debug(f"found the following variables: {[str(v) for v in self.variables]}") + self.num_nodes = 0 + self.root = DecisionTreeNode(None) + + def build_coloring(self): + coloring = None + return coloring + + def collect_nodes(self, node_condition=None): + node_queue = [self.root] + output_nodes = [] + while node_queue: + node = node_queue.pop(0) + if node_condition is None or node_condition(node): + output_nodes.append(node) + node_queue += node.child_nodes + return output_nodes + + def collect_terminals(self): + node_condition = lambda node : node.is_terminal + return self.collect_nodes(node_condition) + + def collect_nonterminals(self): + node_condition = lambda node : not node.is_terminal + return self.collect_nodes(node_condition) + + def create_family(self, action_labels): + family = paynt.family.family.Family() + for node in self.collect_nodes(): + node.create_hole(family, action_labels, self.variables) + return family + + +def custom_decision_tree(mdp, program_variables): + dt = DecisionTree(mdp, program_variables) + + decide = lambda node,var_name : node.set_variable_by_name(var_name,dt) + + decide(dt.root,"clk") + main = dt.root.child_false + + decide(dt.root.child_false, "y") + decide(dt.root.child_false.child_true, "x") + decide(dt.root.child_false.child_true.child_true, "x") + + # decide(main,"y") + # decide(main.child_false,"x") + # decide(main.child_true,"x") + # decide(main.child_true.child_true,"x") + + # decide(main, "y") + # decide(main.child_false, "x") + # decide(main.child_false.child_true, "x") + # decide(main.child_true, "x") + # decide(main.child_true.child_true, "x") + return dt + + + +class MdpQuotient(paynt.quotient.quotient.Quotient): + + def __init__(self, mdp, specification): + super().__init__(specification=specification) + + # get variables before choice origins are lost + assert mdp.has_choice_origins(), "model has no choice origins" + program_variables = mdp.choice_origins.program.variables + + target_states = self.identify_target_states(mdp,self.get_property()) + mdp = payntbind.synthesis.restoreActionsInTargetStates(mdp,target_states) + self.quotient_mdp = mdp + self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(self.quotient_mdp) + self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp) + + decision_tree = custom_decision_tree(mdp, program_variables) + family = decision_tree.create_family(self.action_labels) + print("family = ", family) + + hole_bounds = [None for hole in range(family.num_holes)] + for node in decision_tree.collect_nodes(): + hole_bounds[node.hole] = node.collect_bounds() + # print("hole bounds = ", hole_bounds) + + hole_variable = [len(decision_tree.variables) for _ in range(family.num_holes)] + hole_domain = [[] for h in range(family.num_holes)] + for node in decision_tree.collect_nonterminals(): + hole_variable[node.hole] = node.variable_index + hole_domain[node.hole] = family.hole_to_option_labels[node.hole] + # print("hole variables = ", hole_variable) + # print("hole domain = ", hole_domain) + stormpy_variables = [v.variable for v in decision_tree.variables] + + self.decision_tree = decision_tree + self.hole_variable = hole_variable + self.is_action_hole = [var == len(self.decision_tree.variables) for var in self.hole_variable] + self.coloring = payntbind.synthesis.ColoringSmt( + mdp.nondeterministic_choice_indices, self.choice_to_action, + mdp.state_valuations, stormpy_variables, + hole_variable, hole_bounds, + family.family, hole_domain + ) + self.design_space = paynt.family.family.DesignSpace(family) + + + def build_unsat_result(self): + constraints_result = paynt.verification.property_result.ConstraintsResult([]) + optimality_result = paynt.verification.property_result.MdpOptimalityResult(None) + optimality_result.can_improve = False + analysis_result = paynt.verification.property_result.MdpSpecificationResult(constraints_result,optimality_result) + return analysis_result + + def build(self, family): + # family.parent_info = None + if family.parent_info is None: + choices = self.coloring.selectCompatibleChoices(family.family) + else: + choices = self.coloring.selectCompatibleChoices(family.family, family.parent_info.selected_choices) + if choices.number_of_set_bits() == 0: + family.mdp = None + family.analysis_result = self.build_unsat_result() + return + + # proceed as before + family.selected_choices = choices + family.mdp = self.build_from_choice_mask(choices) + # assert family.mdp.model.nr_choices == self.quotient_mdp.nr_choices + family.mdp.design_space = family + + + def areChoicesConsistent(self, choices, mdp): + return self.coloring.areChoicesConsistent(choices, mdp.design_space.family) + + def scheduler_is_consistent(self, mdp, prop, result): + ''' Get hole options involved in the scheduler selection. ''' + + scheduler = result.scheduler + assert scheduler.memoryless and scheduler.deterministic + state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler) + choices = self.state_to_choice_to_choices(state_to_choice) + consistent,hole_selection = self.areChoicesConsistent(choices, mdp) + if mdp.is_deterministic: + assert consistent, "obtained a DTMC, but the scheduler is not consistent" + + # convert selection to actual hole options + for hole,values in enumerate(hole_selection): + if self.is_action_hole[hole]: + continue + hole_selection[hole] = [self.design_space.hole_to_option_labels[hole].index(value) for value in values] + for hole,options in enumerate(hole_selection): + for option in options: + assert option in mdp.design_space.hole_options(hole), f"option {option} is not in the family" + + return hole_selection, consistent + + + def scheduler_get_quantitative_values(self, mdp, prop, result, selection): + ''' + :return choice values + :return expected visits + :return hole scores + ''' + + inconsistent_assignments = {hole:options for hole,options in enumerate(selection) if len(options) > 0 } + inconsistent_action_holes = [(hole,options) for hole,options in inconsistent_assignments.items() if self.is_action_hole[hole]] + inconsistent_variable_holes = [(hole,options) for hole,options in inconsistent_assignments.items() if not self.is_action_hole[hole]] + + # choose splitter and force its score + splitter = None + + # try action holes first + for hole,options in inconsistent_action_holes: + if len(options) > 1: + splitter = hole + break + else: + for hole,values in inconsistent_variable_holes: + # pick an arbitrary value and find the corresponding hole option + value = values[0] + for option in mdp.design_space.hole_options(hole): + if mdp.design_space.hole_to_option_labels[hole][option] == value: + splitter = hole + selection[splitter] = [option] + # TODO make sure this selection will split this hole in half + break + else: + assert False, "this should not occur..." + assert splitter is not None, "inconsistent action hole with exactly 1 option?" + + inconsistent_differences = {splitter:10} + return None, None, inconsistent_differences + + def split(self, family, incomplete_search): + + mdp = family.mdp + assert not mdp.is_deterministic + + # split family wrt last undecided result + result = family.analysis_result.undecided_result() + hole_assignments = result.primary_selection + scores = result.primary_scores + + splitters = self.holes_with_max_score(scores) + splitter = splitters[0] + if self.is_action_hole[splitter]: + assert len(hole_assignments[splitter]) > 1 + core_suboptions,other_suboptions = self.suboptions_enumerate(mdp, splitter, hole_assignments[splitter]) + else: + assert len(hole_assignments[splitter]) == 1 + splitter_option = hole_assignments[splitter][0] + index = family.hole_options(splitter).index(splitter_option) + options = mdp.design_space.hole_options(splitter) + core_suboptions = [options[:index], options[index:]] + other_suboptions = [] + + new_design_space, suboptions = self.discard(mdp, hole_assignments, core_suboptions, other_suboptions, incomplete_search) + + # construct corresponding design subspaces + design_subspaces = [] + family.splitter = splitter + parent_info = family.collect_parent_info(self.specification) + for suboption in suboptions: + subholes = new_design_space.subholes(splitter, suboption) + design_subspace = paynt.family.family.DesignSpace(subholes, parent_info) + design_subspace.hole_set_options(splitter, suboption) + design_subspaces.append(design_subspace) + + return design_subspaces diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index 72646f7ea..9a9cb4ef2 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -3,7 +3,7 @@ import paynt.family.family import paynt.quotient.quotient -import paynt.quotient.models +import paynt.models.models import collections import json @@ -14,29 +14,6 @@ class MdpFamilyQuotient(paynt.quotient.quotient.Quotient): - @staticmethod - def extract_choice_labels(mdp): - ''' - :param mdp having a canonic choice labeling (exactly 1 label for each choice) - :returns a list of action labels - :returns for each choice, the corresponding action - ''' - assert mdp.has_choice_labeling, "MDP does not have a choice labeling" - action_labels = list(mdp.choice_labeling.get_labels()) - # sorting because get_labels() is not deterministic when passing through pybind - action_labels = sorted(action_labels) - label_to_action = {label:index for index,label in enumerate(action_labels)} - - logger.debug("associating choices with action labels...") - labeling = mdp.choice_labeling - choice_to_action = [None] * mdp.nr_choices - for choice in range(mdp.nr_choices): - label = list(labeling.get_labels_of_choice(choice))[0] - action = label_to_action[label] - choice_to_action[choice] = action - - return action_labels,choice_to_action - @staticmethod def map_state_action_to_choices(mdp, num_actions, choice_to_action): state_action_choices = [] @@ -76,7 +53,7 @@ def __init__(self, quotient_mdp, family, coloring, specification): # for each state of the quotient, a list of available actions self.state_to_actions = None - self.action_labels,self.choice_to_action = MdpFamilyQuotient.extract_choice_labels(self.quotient_mdp) + self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp) self.num_actions = len(self.action_labels) self.state_action_choices = MdpFamilyQuotient.map_state_action_to_choices( self.quotient_mdp, self.num_actions, self.choice_to_action) @@ -219,4 +196,4 @@ def build_assignment(self, family): assert family.size == 1, "expecting family of size 1" choices = self.coloring.selectCompatibleChoices(family.family) model,state_map,choice_map = self.restrict_quotient(choices) - return paynt.quotient.models.SubMdp(model,state_map,choice_map) + return paynt.models.models.SubMdp(model,state_map,choice_map) diff --git a/paynt/quotient/pomdp.py b/paynt/quotient/pomdp.py index 545ddbcff..f4895ae15 100644 --- a/paynt/quotient/pomdp.py +++ b/paynt/quotient/pomdp.py @@ -25,7 +25,7 @@ class PomdpQuotient(paynt.quotient.quotient.Quotient): def __init__(self, pomdp, specification, decpomdp_manager=None): - super().__init__(specification = specification) + super().__init__(specification=specification) # unfolded POMDP self.quotient_mdp = None diff --git a/paynt/quotient/pomdp_family.py b/paynt/quotient/pomdp_family.py index f6eab789e..289b21b53 100644 --- a/paynt/quotient/pomdp_family.py +++ b/paynt/quotient/pomdp_family.py @@ -1,7 +1,7 @@ import stormpy import payntbind -import paynt.quotient.models +import paynt.models.models import paynt.quotient.quotient import paynt.quotient.mdp_family import paynt.verification.property @@ -126,7 +126,7 @@ def build_dtmc_sketch(self, fsc): return dtmc_sketch - def compute_qvalues_for_product_submdp(self, product_submdp : paynt.quotient.models.SubMdp): + def compute_qvalues_for_product_submdp(self, product_submdp : paynt.models.models.SubMdp): ''' Given an MDP obtained after applying FSC to a family of POMDPs, compute for each state s, (reachable) memory node n, and action a, the Q-value Q((s,n),a). diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 73f461dc3..c9a4d6b6c 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -2,7 +2,7 @@ import payntbind import paynt.family.family -import paynt.quotient.models +import paynt.models.models import math import itertools @@ -23,7 +23,6 @@ def make_vector_defined(vector): vector_valid = [ value if value != math.inf else default_value for value in vector] return vector_valid - def __init__(self, quotient_mdp = None, family = None, coloring = None, specification = None): # colored qoutient MDP for the super-family @@ -75,7 +74,7 @@ def restrict_quotient(self, choices): def build_from_choice_mask(self, choices): mdp,state_map,choice_map = self.restrict_quotient(choices) - return paynt.quotient.models.SubMdp(mdp, state_map, choice_map) + return paynt.models.models.SubMdp(mdp, state_map, choice_map) def build(self, family): ''' Construct the quotient MDP for the family. ''' @@ -86,20 +85,6 @@ def build(self, family): family.mdp.design_space = family - def build_with_second_coloring(self, family, main_coloring, main_family): - ''' Construct the quotient MDP for the family. ''' - - # select actions compatible with the family and restrict the quotient - choices_alt = self.coloring.selectCompatibleChoices(family.family) - choices_main = main_coloring.selectCompatibleChoices(main_family.family) - - choices = choices_main.__and__(choices_alt) - main_family.mdp = self.build_from_choice_mask(choices) - main_family.mdp.design_space = main_family - family.mdp = self.build_from_choice_mask(choices) - family.mdp.design_space = family - - @staticmethod def mdp_to_dtmc(mdp): tm = mdp.transition_matrix @@ -114,7 +99,7 @@ def build_assignment(self, family): choices = self.coloring.selectCompatibleChoices(family.family) mdp,state_map,choice_map = self.restrict_quotient(choices) model = Quotient.mdp_to_dtmc(mdp) - return paynt.quotient.models.SubMdp(model,state_map,choice_map) + return paynt.models.models.SubMdp(model,state_map,choice_map) def empty_scheduler(self): return [None] * self.quotient_mdp.nr_states @@ -154,14 +139,12 @@ def state_to_choice_to_choices(self, state_to_choice): choices.set(choice,True) return choices - def scheduler_selection(self, mdp, scheduler, coloring=None): + def scheduler_selection(self, mdp, scheduler): ''' Get hole options involved in the scheduler selection. ''' assert scheduler.memoryless and scheduler.deterministic state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler) choices = self.state_to_choice_to_choices(state_to_choice) - if coloring is None: - coloring = self.coloring - hole_selection = coloring.collectHoleOptions(choices) + hole_selection = self.coloring.collectHoleOptions(choices) return hole_selection @@ -235,7 +218,6 @@ def scheduler_is_consistent(self, mdp, prop, result): :return hole assignment :return whether the scheduler is consistent ''' - # selection = self.scheduler_selection(mdp, result.scheduler) if mdp.is_deterministic: selection = [[mdp.design_space.hole_options(hole)[0]] for hole in range(mdp.design_space.num_holes)] return selection, True @@ -386,46 +368,10 @@ def double_check_assignment(self, assignment): else: return None, None - - def sample(self): - - # parameters - path_length = 1000 - num_paths = 100 - output_path = 'samples.txt' - - import json - - # assuming optimization of reward property - assert len(self.specification.constraints) == 0 - opt = self.specification.optimality - assert opt.reward - reward_name = opt.formula.reward_name - - # build the mdp - self.build(self.design_space) - mdp = self.design_space.mdp - state_row_group = mdp.prepare_sampling() - - paths = [] - for _ in range(num_paths): - path = mdp.random_path(path_length,state_row_group) - path_reward = mdp.evaluate_path(path,reward_name) - paths.append( {"path":path,"reward":path_reward} ) - - path_json = [json.dumps(path) for path in paths] - - output_json = "[\n" + ",\n".join(path_json) + "\n]\n" - - # logger.debug("attempting to reconstruct samples from JSON ...") - # json.loads(output_json) - # logger.debug("OK") - - logger.info("writing generated samples to {} ...".format(output_path)) - with open(output_path, 'w') as f: - print(output_json, end="", file=f) - logger.info("done") + def get_property(self): + assert self.specification.num_properties == 1, "expecting a single property" + return self.specification.all_properties()[0] def identify_absorbing_states(self, model): state_is_absorbing = [True] * model.nr_states @@ -440,10 +386,9 @@ def identify_absorbing_states(self, model): break return state_is_absorbing - def get_property(self): - assert self.specification.num_properties == 1, "expecting a single property" - return self.specification.all_properties()[0] - + def identify_target_states(self, model, prop): + target_label = prop.get_target_label() + return model.labeling.get_states(target_label) def check_specification_for_dtmc(self, dtmc, constraint_indices=None, short_evaluation=False): diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 113522687..efcde10a9 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -2,7 +2,6 @@ import payntbind import paynt.family.family -import paynt.quotient.models import paynt.synthesizer.synthesizer import paynt.quotient.quotient diff --git a/paynt/synthesizer/statistic.py b/paynt/synthesizer/statistic.py index 89090e5b9..5f74109b9 100644 --- a/paynt/synthesizer/statistic.py +++ b/paynt/synthesizer/statistic.py @@ -2,6 +2,7 @@ import paynt.utils.profiler import paynt.synthesizer.synthesizer +import paynt.models.models import math @@ -42,6 +43,8 @@ def __init__(self, synthesizer): self.acc_size_mdp = 0 self.avg_size_mdp = 0 + self.iterations_smt = None + self.iterations_game = None self.acc_size_game = 0 self.avg_size_game = 0 @@ -70,7 +73,7 @@ def start(self, family): def iteration(self, model): ''' Identify the type of the model and count corresponding iteration. ''' - if isinstance(model, paynt.quotient.models.Mdp): + if isinstance(model, paynt.models.models.Mdp): model = model.model if type(model) == stormpy.storage.SparseDtmc: self.iteration_dtmc(model.nr_states) @@ -86,6 +89,11 @@ def iteration_dtmc(self, size_dtmc): self.acc_size_dtmc += size_dtmc self.print_status() + def iteration_smt(self): + if self.iterations_smt is None: + self.iterations_smt = 0 + self.iterations_smt += 1 + def iteration_mdp(self, size_mdp): if self.iterations_mdp is None: self.iterations_mdp = 0 @@ -188,6 +196,10 @@ def get_summary_iterations(self): type_stats = f"MDP stats: avg MDP size: {avg_size}, iterations: {self.iterations_mdp}" iterations += f"{type_stats}\n" + if self.iterations_smt is not None: + type_stats = f"SMT stats: iterations: {self.iterations_smt}" + iterations += f"{type_stats}\n" + if self.iterations_dtmc is not None: avg_size = round(safe_division(self.acc_size_dtmc, self.iterations_dtmc)) type_stats = f"DTMC stats: avg DTMC size: {avg_size}, iterations: {self.iterations_dtmc}" @@ -240,6 +252,7 @@ def get_summary(self): def print(self): print(self.get_summary(),end="") + print("payntbind::selectCompatibleChoices = ", self.quotient.coloring.selectCompatibleChoicesTime()) # self.print_mdp_family_table_entries() diff --git a/paynt/synthesizer/synthesizer_ar.py b/paynt/synthesizer/synthesizer_ar.py index 79790ac04..e708fa550 100644 --- a/paynt/synthesizer/synthesizer_ar.py +++ b/paynt/synthesizer/synthesizer_ar.py @@ -12,7 +12,10 @@ def method_name(self): return "AR" def verify_family(self, family): + self.stat.iteration_smt() self.quotient.build(family) + if family.mdp is None: + return self.stat.iteration_mdp(family.mdp.states) res = self.quotient.check_specification_for_mdp(family.mdp, family.constraint_indices) if res.improving_assignment == "any": @@ -28,7 +31,6 @@ def update_optimum(self, family): print(ia) dtmc = self.quotient.build_assignment(ia) mc_result = self.quotient.check_specification_for_dtmc(dtmc) - print(mc_result) if isinstance(self.quotient, paynt.quotient.pomdp.PomdpQuotient): self.stat.new_fsc_found(family.analysis_result.improving_value, ia, self.quotient.policy_size(ia)) diff --git a/payntbind/src/synthesis/quotient/Coloring.cpp b/payntbind/src/synthesis/quotient/Coloring.cpp index 624f2c486..c96178585 100644 --- a/payntbind/src/synthesis/quotient/Coloring.cpp +++ b/payntbind/src/synthesis/quotient/Coloring.cpp @@ -1,10 +1,7 @@ #include "Coloring.h" -#include - namespace synthesis { - Coloring::Coloring( Family const& family, std::vector const& row_groups, std::vector>> choice_to_assignment @@ -34,15 +31,11 @@ Coloring::Coloring( auto num_states = row_groups.size()-1; state_to_holes.resize(num_states); - is_simple = true; for(uint64_t state = 0; state 1) { - is_simple = false; - } } } @@ -58,10 +51,6 @@ std::vector const& Coloring::getStateToHoles() const { return state_to_holes; } -BitVector const& Coloring::getUncoloredChoices() const { - return uncolored_choices; -} - BitVector Coloring::selectCompatibleChoices(Family const& subfamily) const { auto selection = BitVector(uncolored_choices); for(auto choice: colored_choices) { diff --git a/payntbind/src/synthesis/quotient/Coloring.h b/payntbind/src/synthesis/quotient/Coloring.h index 66670ca47..63999fd2e 100644 --- a/payntbind/src/synthesis/quotient/Coloring.h +++ b/payntbind/src/synthesis/quotient/Coloring.h @@ -25,8 +25,6 @@ class Coloring { std::vector>> const& getChoiceToAssignment() const; /** Get a mapping from states to holes involved in its choices. */ std::vector const& getStateToHoles() const; - /** Get mask of uncolored choices. */ - BitVector const& getUncoloredChoices() const; /** Get a mask of choices compatible with the family. */ BitVector selectCompatibleChoices(Family const& subfamily) const; @@ -47,8 +45,6 @@ class Coloring { std::vector choice_to_holes; /** For each state, identification of holes associated with its choices. */ std::vector state_to_holes; - /** Whether all states have at most one hole associated with its choices. */ - bool is_simple; /** Choices not labeled by any hole. */ BitVector uncolored_choices; diff --git a/payntbind/src/synthesis/quotient/ColoringSmt.cpp b/payntbind/src/synthesis/quotient/ColoringSmt.cpp new file mode 100644 index 000000000..cd94d676f --- /dev/null +++ b/payntbind/src/synthesis/quotient/ColoringSmt.cpp @@ -0,0 +1,297 @@ +#include "ColoringSmt.h" + +#include + +#include + +namespace synthesis { + + +ColoringSmt::ColoringSmt( + std::vector const& row_groups, + std::vector const& choice_to_action, + storm::storage::sparse::StateValuations const& state_valuations, + std::vector const& variables, + std::vector hole_variable, + std::vector,std::vector>> hole_bounds, + synthesis::Family const& family, + std::vector> hole_domain +) : choice_to_action(choice_to_action), row_groups(row_groups), family(family), hole_domain(hole_domain), solver(context) { + + num_actions = 1 + *max_element(choice_to_action.begin(),choice_to_action.end()); + hole_corresponds_to_program_variable = storm::storage::BitVector(family.numHoles()); + + // create solver variables for each hole + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + uint64_t var = hole_variable[hole]; + bool corresponds_to_program_variable = (var < variables.size()); + hole_corresponds_to_program_variable.set(hole,corresponds_to_program_variable); + std::string name; + if(corresponds_to_program_variable) { + name = variables[var].getName() + "_" + std::to_string(hole); + } else { + name = "A_" + std::to_string(hole); + } + z3::expr v = context.int_const(name.c_str()); + hole_to_solver_variable.push_back(v); + } + + // create formula describing hole relationships + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + uint64_t var = hole_variable[hole]; + z3::expr_vector restriction_clauses(context); + if(hole_corresponds_to_program_variable[hole]) { + // see if any of its bounds is a hole associated with the same variable + auto solver_variable = hole_to_solver_variable[hole]; + auto const& [lower_bounds,upper_bounds] = hole_bounds[hole]; + for(auto bound: lower_bounds) { + if(hole_variable[bound] == var) { + restriction_clauses.push_back(hole_to_solver_variable[bound] < solver_variable); + } + } + for(auto bound: upper_bounds) { + if(hole_variable[bound] == var) { + restriction_clauses.push_back(solver_variable <= hole_to_solver_variable[bound]); + } + } + } + hole_restriction.push_back(z3::mk_and(restriction_clauses)); + } + + // create choice colors + for(uint64_t state = 0; state < numStates(); ++state) { + for(uint64_t choice = row_groups[state]; choice < row_groups[state+1]; ++choice) { + uint64_t action = choice_to_action[choice]; + + std::vector terminals; + std::vector>> terminal_evaluation; + z3::expr_vector clauses(context); + std::vector clause_label; + // for each action hole, create clauses that describe this choice selection + for(uint64_t hole: ~hole_corresponds_to_program_variable) { + auto const& [lower_bounds,upper_bounds] = hole_bounds[hole]; + z3::expr_vector terminal_atoms(context); + std::vector> evaluation; + terminal_atoms.push_back((hole_to_solver_variable[hole] == (int)action)); + evaluation.emplace_back(hole,(int)action); + + // add atoms describing lower and upper bounds + // negate each atom due to boolean conversion + // translate variable values to hole options + for(auto bound: lower_bounds) { + auto program_variable = variables[hole_variable[bound]]; + auto solver_variable = hole_to_solver_variable[bound]; + int value = getVariableValueAtState(state_valuations,state,program_variable); + terminal_atoms.push_back(value <= solver_variable); + evaluation.emplace_back(bound,value); + } + for(auto bound: upper_bounds) { + auto program_variable = variables[hole_variable[bound]]; + auto solver_variable = hole_to_solver_variable[bound]; + int value = getVariableValueAtState(state_valuations,state,program_variable); + terminal_atoms.push_back(value > solver_variable); + evaluation.emplace_back(bound,value); + } + uint64_t clause_index = clauses.size(); + std::string label = "c" + std::to_string(choice) + "_" + std::to_string(clause_index); + + terminals.push_back(terminal_atoms); + terminal_evaluation.push_back(evaluation); + clauses.push_back(z3::mk_or(terminal_atoms)); + clause_label.push_back(label); + } + choice_terminal.push_back(terminals); + choice_clause.push_back(clauses); + choice_terminal_evaluation.push_back(terminal_evaluation); + choice_clause_label.push_back(clause_label); + } + } +} + + +const uint64_t ColoringSmt::numStates() const { + return row_groups.size()-1; +} + +const uint64_t ColoringSmt::numChoices() const { + return choice_to_action.size(); +} + +int ColoringSmt::getVariableValueAtState( + storm::storage::sparse::StateValuations const& state_valuations, uint64_t state, storm::expressions::Variable variable +) const { + if(variable.hasBooleanType()) { + return (int)state_valuations.getBooleanValue(state,variable); + } else { + return state_valuations.getIntegerValue(state,variable); + } + +} + +void ColoringSmt::addHoleEncoding(Family const& family, uint64_t hole) { + auto solver_variable = hole_to_solver_variable[hole]; + auto const& family_options = family.holeOptions(hole); + std::string label = "h" + std::to_string(hole); + const char* label_str = label.c_str(); + if(hole_corresponds_to_program_variable[hole]) { + // variable hole + auto const& domain = hole_domain[hole]; + // convention: hole options in the family are ordered + int domain_min = domain[family_options.front()]; + int domain_max = domain[family_options.back()]; + // z3::expr encoding = (domain_min <= solver_variable) and (solver_variable <= domain_max) and hole_restriction[hole]; + z3::expr encoding = (domain_min <= solver_variable) and (solver_variable <= domain_max); + solver.add(encoding, label_str); + } else { + // action hole + z3::expr_vector action_selection_clauses(context); + for(auto option: family_options) { + action_selection_clauses.push_back(solver_variable == (int)option); + } + z3::expr encoding = z3::mk_or(action_selection_clauses); + solver.add(encoding, label_str); + } +} + +void ColoringSmt::addFamilyEncoding(Family const& family) { + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + addHoleEncoding(family,hole); + } +} + +void ColoringSmt::addChoiceEncoding(uint64_t choice, bool add_label) { + if(not add_label) { + solver.add(choice_clause[choice]); + return; + } + auto const& clauses = choice_clause[choice]; + for(uint64_t index = 0; index < clauses.size(); ++index) { + solver.add(clauses[index], choice_clause_label[choice][index].c_str()); + } +} + +BitVector ColoringSmt::selectCompatibleChoices(Family const& subfamily) { + return selectCompatibleChoices(subfamily,BitVector(numChoices(),true)); +} + +BitVector ColoringSmt::selectCompatibleChoices(Family const& subfamily, BitVector const& base_choices) { + BitVector selection(numChoices(),false); + + // check if sub-family itself satisfies hole restrictions + solver.push(); + addFamilyEncoding(subfamily); + if(solver.check() == z3::unsat) { + solver.pop(); + return selection; + } + + // check individual choices + std::vector> state_enabled_choices(numStates()); + for(uint64_t state = 0; state < numStates(); ++state) { + bool all_choices_disabled = true; + for(uint64_t choice = row_groups[state]; choice < row_groups[state+1]; ++choice) { + if(not base_choices[choice]) { + continue; + } + solver.push(); + selectCompatibleChoicesTimer.start(); + addChoiceEncoding(choice,false); + z3::check_result result = solver.check(); + selectCompatibleChoicesTimer.stop(); + if(result == z3::sat) { + all_choices_disabled = false; + selection.set(choice,true); + state_enabled_choices[state].push_back(choice); + } + solver.pop(); + } + if(all_choices_disabled) { + solver.pop(); + selection.clear(); + return selection; + } + } + + // check selected choices simultaneously + solver.push(); + for(std::vector const& choices: state_enabled_choices) { + z3::expr_vector enabled_choices(context); + for(uint64_t choice: choices) { + enabled_choices.push_back(z3::mk_and(choice_clause[choice])); + } + solver.add(z3::mk_or(enabled_choices)); + } + if(solver.check() == z3::unsat) { + selection.clear(); + } + solver.pop(); + + solver.pop(); + return selection; +} + + +std::pair>> ColoringSmt::areChoicesConsistent(BitVector const& choices, Family const& subfamily) { + + solver.push(); + addFamilyEncoding(subfamily); + for(auto choice: choices) { + addChoiceEncoding(choice); + } + z3::check_result result = solver.check(); + + std::vector> hole_options(family.numHoles()); + std::vector> hole_options_vector(family.numHoles()); + if(result == z3::sat) { + z3::model model = solver.get_model(); + solver.pop(); + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + z3::expr solver_variable = hole_to_solver_variable[hole]; + uint64_t value = model.eval(solver_variable).get_numeral_int64(); + hole_options_vector[hole].push_back(value); + } + return std::make_pair(true,hole_options_vector); + } + + z3::expr_vector unsat_core = solver.unsat_core(); + solver.pop(); + for(auto expr: unsat_core) { + std::istringstream iss(expr.decl().name().str()); + char prefix; iss >> prefix; + if(prefix == 'h') { + continue; + } + + // choice clause + uint64_t choice,clause; iss >> choice; iss >> prefix; iss >> clause; + uint64_t action = choice_to_action[choice]; + uint64_t action_hole; + // filter clauses that are unsat within the subfamily + // std::cout << "checking: " << choice_clause[choice][clause] << std::endl; + solver.push(); + addFamilyEncoding(subfamily); + for(uint64_t terminal = 0; terminal < choice_terminal[choice][clause].size(); ++terminal) { + solver.push(); + solver.add(choice_terminal[choice][clause][terminal]); + auto result = solver.check(); + solver.pop(); + if(result == z3::unsat) { + continue; + } + // std::cout << choice_terminal[choice][clause][terminal] << std::endl; + auto const& [hole,value] = choice_terminal_evaluation[choice][clause][terminal]; + hole_options[hole].insert(value); + } + solver.pop(); + } + + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + hole_options_vector[hole].assign(hole_options[hole].begin(),hole_options[hole].end()); + } + return std::make_pair(false,hole_options_vector); +} + + + + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/ColoringSmt.h b/payntbind/src/synthesis/quotient/ColoringSmt.h new file mode 100644 index 000000000..f4e22ad86 --- /dev/null +++ b/payntbind/src/synthesis/quotient/ColoringSmt.h @@ -0,0 +1,109 @@ +#pragma once + +#include "src/synthesis/quotient/Family.h" + +#include +#include +#include + +#include +#include +#include + +#include + +namespace synthesis { + +using BitVector = storm::storage::BitVector; + + +class ColoringSmt { +public: + + ColoringSmt( + std::vector const& row_groups, + std::vector const& choice_to_action, + storm::storage::sparse::StateValuations const& state_valuations, + std::vector const& variables, + std::vector hole_variable, + std::vector,std::vector>> hole_bounds, + Family const& family, + std::vector> hole_domain + ); + + /** Get a mask of choices compatible with the family. */ + BitVector selectCompatibleChoices(Family const& subfamily); + /** Get a mask of sub-choices of \p base_choices compatible with the family. */ + BitVector selectCompatibleChoices(Family const& subfamily, BitVector const& base_choices); + + storm::utility::Stopwatch selectCompatibleChoicesTimer; + uint64_t selectCompatibleChoicesTime() { + return (uint64_t) selectCompatibleChoicesTimer.getTimeInMilliseconds(); + } + /** + * Verify whether the given choices have an associated non-conficting hole assignment (within the given family). + * @return a pair (A,B) where A is true iff the choice selection is consistent; B is either satisfying hole assignment + * if A holds, or a counterexample to be used for splitting. + * */ + std::pair>> areChoicesConsistent(BitVector const& choices, Family const& subfamily); + +protected: + + /** Number of MDP actions. */ + uint64_t num_actions; + /** For each choice, its unique action. */ + const std::vector choice_to_action; + /** Row groups of the quotient. */ + std::vector row_groups; + /** Number of states in the quotient. */ + const uint64_t numStates() const; + /** Number of choices in the quotient. */ + const uint64_t numChoices() const; + + /** SMT solver context. */ + z3::context context; + /** SMT solver. */ + z3::solver solver; + /** For each hole, an SMT variable. */ + std::vector hole_to_solver_variable; + /** For each hole, an expression describing its relation to other holes. */ + std::vector hole_restriction; + + /** + * For each choice, its color expressed as a list of conjuncts (clauses) of + * disjuncts of atomic clauses (terminals). + * */ + std::vector> choice_terminal; + /** For each terminal, a (unique) hole assignment involved. */ + std::vector>>> choice_terminal_evaluation; + + /** For each choice, its color expressed as a list of clauses. */ + std::vector choice_clause; + /** For each clause, a predefined label. */ + std::vector> choice_clause_label; + + + + + /** Reference to the unrefined family. */ + const Family family; + /** For each hole associated with an integer variable, a list of its integer options. */ + std::vector> hole_domain; + /** For each hole, whether it corresponds to a variable (and not to action selection). */ + storm::storage::BitVector hole_corresponds_to_program_variable; + + int getVariableValueAtState( + storm::storage::sparse::StateValuations const& state_valuations, uint64_t state, storm::expressions::Variable variable + ) const; + + /** Create SMT encoding for the given family and the given hole. */ + void addHoleEncoding(Family const& family, uint64_t hole); + /** Create SMT encoding for the given family. */ + void addFamilyEncoding(Family const& family); + + /** Create SMT encoding for the given choice. */ + void addChoiceEncoding(uint64_t choice, bool add_label=true); + +}; + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/bindings.cpp b/payntbind/src/synthesis/quotient/bindings.cpp index 6036fc59d..5d388ac7b 100644 --- a/payntbind/src/synthesis/quotient/bindings.cpp +++ b/payntbind/src/synthesis/quotient/bindings.cpp @@ -3,11 +3,13 @@ #include "JaniChoices.h" #include "Family.h" #include "Coloring.h" +#include "ColoringSmt.h" #include #include #include #include +#include #include @@ -59,7 +61,6 @@ std::pair std::vector> computeChoiceDestinations(storm::models::sparse::Mdp const& mdp) { uint64_t num_choices = mdp.getNumberOfChoices(); @@ -90,7 +91,8 @@ std::vector schedulerToStateToGlobalChoice( std::map computeInconsistentHoleVariance( Family const& family, - std::vector const& row_groups, std::vector const& choice_to_global_choice, std::vector const& choice_to_value, + std::vector const& row_groups, std::vector const& choice_to_global_choice, + std::vector const& choice_to_value, Coloring const& coloring, std::map> const& hole_to_inconsistent_options, std::vector const& state_to_expected_visits ) { @@ -272,8 +274,8 @@ void bindings_coloring(py::module& m) { py::class_(m, "Family") - .def(py::init<>(), "Constructor.") - .def(py::init(), "Constructor.", py::arg("other")) + .def(py::init<>()) + .def(py::init()) .def("numHoles", &synthesis::Family::numHoles) .def("addHole", &synthesis::Family::addHole) @@ -287,12 +289,28 @@ void bindings_coloring(py::module& m) { ; py::class_(m, "Coloring") - .def(py::init const&, std::vector>> >(), "Constructor.") + .def(py::init const&, std::vector>> >()) .def("getChoiceToAssignment", &synthesis::Coloring::getChoiceToAssignment) .def("getStateToHoles", &synthesis::Coloring::getStateToHoles) - .def("getUncoloredChoices", &synthesis::Coloring::getUncoloredChoices) .def("selectCompatibleChoices", &synthesis::Coloring::selectCompatibleChoices) .def("collectHoleOptions", &synthesis::Coloring::collectHoleOptions) ; + py::class_(m, "ColoringSmt") + .def(py::init< + std::vector const&, + std::vector const&, + storm::storage::sparse::StateValuations const&, + std::vector const&, + std::vector, + std::vector,std::vector>>, + synthesis::Family const&, + std::vector> + >()) + .def("selectCompatibleChoices", py::overload_cast(&synthesis::ColoringSmt::selectCompatibleChoices)) + .def("selectCompatibleChoices", py::overload_cast(&synthesis::ColoringSmt::selectCompatibleChoices)) + .def("selectCompatibleChoicesTime", &synthesis::ColoringSmt::selectCompatibleChoicesTime) + .def("areChoicesConsistent", &synthesis::ColoringSmt::areChoicesConsistent) + ; + } diff --git a/payntbind/src/synthesis/translation/SubPomdpBuilder.cpp b/payntbind/src/synthesis/translation/SubPomdpBuilder.cpp index 652cca8e3..ca5abb0ed 100644 --- a/payntbind/src/synthesis/translation/SubPomdpBuilder.cpp +++ b/payntbind/src/synthesis/translation/SubPomdpBuilder.cpp @@ -83,12 +83,8 @@ namespace synthesis { } components.transitionMatrix = builder.build(); - components.choiceLabeling = synthesis::translateChoiceLabeling(pomdp,choice_translator.translationToItem(),translated_choice_mask); - for (auto const& reward_model : pomdp.getRewardModels()) { - auto new_reward_model = synthesis::translateRewardModel(reward_model.second,choice_translator.translationToItem(),translated_choice_mask); - components.rewardModels.emplace(reward_model.first, new_reward_model); - } + components.rewardModels = synthesis::translateRewardModels(pomdp,choice_translator.translationToItem(),translated_choice_mask); // build state observations auto observability_classes = synthesis::translateObservabilityClasses( diff --git a/payntbind/src/synthesis/translation/bindings.cpp b/payntbind/src/synthesis/translation/bindings.cpp index 586c6fc84..0f5df6ec2 100644 --- a/payntbind/src/synthesis/translation/bindings.cpp +++ b/payntbind/src/synthesis/translation/bindings.cpp @@ -1,9 +1,196 @@ #include "../synthesis.h" - #include "SubPomdpBuilder.h" +#include "src/synthesis/translation/componentTranslations.h" + +#include +#include +#include + +namespace synthesis { + +/** + * Add an explicit label to the choices that do not have any. + */ +template +std::shared_ptr> addMissingChoiceLabels( + storm::models::sparse::Model const& model +) { + STORM_LOG_THROW(model.hasChoiceLabeling(), storm::exceptions::InvalidModelException, "model does not have the choice labeling"); + + storm::storage::sparse::ModelComponents components; + components.stateLabeling = model.getStateLabeling(); + components.stateValuations = model.getOptionalStateValuations(); + components.transitionMatrix = model.getTransitionMatrix(); + components.choiceLabeling = model.getChoiceLabeling(); + components.choiceOrigins = model.getOptionalChoiceOrigins(); + components.rewardModels = model.getRewardModels(); + + auto const& labeling = model.getChoiceLabeling(); + storm::storage::BitVector choices_without_label(model.getNumberOfChoices(),false); + for(uint64_t choice = 0; choice < model.getNumberOfChoices(); ++choice) { + if(labeling.getLabelsOfChoice(choice).empty()) { + choices_without_label.set(choice,true); + } + } + const std::string NO_ACTION_LABEL = "__no_label__"; + if(not choices_without_label.empty()) { + components.choiceLabeling->addUniqueLabel(NO_ACTION_LABEL,choices_without_label); + } + if (model.getType() == storm::models::ModelType::Pomdp) { + auto pomdp = static_cast const&>(model); + components.observabilityClasses = pomdp.getObservations(); + components.observationValuations = pomdp.getOptionalObservationValuations(); + } + + return storm::utility::builder::buildModelFromComponents(model.getType(),std::move(components)); +} + + +/** + * Given an MDP with canonic choice labeling, make sure that in each state from the provided \p state_maks all actions + * are available. If an action is not available in a state, add it to this state with the behavior of the first + * existing action. + */ +template +std::shared_ptr> enableAllActions( + storm::models::sparse::Model const& model, + storm::storage::BitVector const& state_mask +) { + auto [action_labels,choice_to_action] = synthesis::extractActionLabels(model); + uint64_t num_actions = action_labels.size(); + uint64_t num_states = model.getNumberOfStates(); + uint64_t num_choices = model.getNumberOfChoices(); + + // for each action, find some choice that corresponds to this action + std::vector action_reference_choice(num_actions); + for(uint64_t choice = 0; choice < num_choices; ++choice) { + action_reference_choice[choice_to_action[choice]] = choice; + } + + // for each state-action pair, find the corresponding choice + // translate choices + auto const& row_groups_old = model.getTransitionMatrix().getRowGroupIndices(); + std::vector translated_to_original_choice; + std::vector translated_to_original_choice_label; + std::vector row_groups_new; + for(uint64_t state = 0; state < num_states; ++state) { + row_groups_new.push_back(translated_to_original_choice.size()); + if(not state_mask[state]) { + for(uint64_t choice: model.getTransitionMatrix().getRowGroupIndices(state)) { + translated_to_original_choice.push_back(choice); + translated_to_original_choice_label.push_back(choice); + } + } else { + std::vector action_exists(num_actions,false); + for(uint64_t choice: model.getTransitionMatrix().getRowGroupIndices(state)) { + uint64_t action = choice_to_action[choice]; + action_exists[action] = true; + translated_to_original_choice.push_back(choice); + translated_to_original_choice_label.push_back(choice); + } + for(uint64_t action = 0; action < num_actions; ++action) { + if(action_exists[action]) { + continue; + } + uint64_t choice = row_groups_old[state]; + translated_to_original_choice.push_back(choice); + translated_to_original_choice_label.push_back(action_reference_choice[action]); + } + } + } + row_groups_new.push_back(translated_to_original_choice.size()); + uint64_t num_translated_choices = translated_to_original_choice.size(); + + // build components + storm::storage::sparse::ModelComponents components; + components.stateLabeling = model.getStateLabeling(); + components.stateValuations = model.getOptionalStateValuations(); + storm::storage::BitVector translated_choice_mask(num_translated_choices,true); + components.choiceLabeling = synthesis::translateChoiceLabeling(model,translated_to_original_choice_label,translated_choice_mask); + + storm::storage::SparseMatrixBuilder builder(num_translated_choices, num_states, 0, true, true, num_states); + for(uint64_t state = 0; state < num_states; ++state) { + builder.newRowGroup(row_groups_new[state]); + for(uint64_t translated_choice = row_groups_new[state]; translated_choice < row_groups_new[state+1]; ++translated_choice) { + uint64_t choice = translated_to_original_choice[translated_choice]; + for(auto entry: model.getTransitionMatrix().getRow(choice)) { + builder.addNextValue(translated_choice, entry.getColumn(), entry.getValue()); + } + } + } + components.transitionMatrix = builder.build(); + + components.rewardModels = synthesis::translateRewardModels(model,translated_to_original_choice,translated_choice_mask); + + if (model.getType() == storm::models::ModelType::Pomdp) { + auto pomdp = static_cast const&>(model); + components.observabilityClasses = pomdp.getObservations(); + components.observationValuations = pomdp.getOptionalObservationValuations(); + } + + return storm::utility::builder::buildModelFromComponents>(model.getType(),std::move(components)); +} + +/** + * Given an MDP with canonic choice labeling, remove an action from the given set \p state_mask of states. + */ +template +std::shared_ptr> removeAction( + storm::models::sparse::Model const& model, + std::string const& action_to_remove_label, + storm::storage::BitVector const& state_mask +) { + auto [action_labels,choice_to_action] = synthesis::extractActionLabels(model); + uint64_t num_actions = action_labels.size(); + uint64_t num_states = model.getNumberOfStates(); + uint64_t num_choices = model.getNumberOfChoices(); + uint64_t action_to_remove = num_actions; + for(action_to_remove = 0; action_to_remove < num_actions; ++action_to_remove) { + if(action_labels[action_to_remove] == action_to_remove_label) { + break; + } + } + + storm::storage::BitVector choice_enabled(num_choices,true); + for(uint64_t state: state_mask) { + for(uint64_t choice: model.getTransitionMatrix().getRowGroupIndices(state)) { + uint64_t action = choice_to_action[choice]; + if(action == action_to_remove) { + choice_enabled.set(choice,false); + } + } + } + storm::storage::BitVector all_states(num_states,true); + storm::transformer::SubsystemBuilderReturnType build_result = storm::transformer::buildSubsystem(model, all_states, choice_enabled); + return build_result.model; +} + +/** + * Given an MDP, for any state in the set \p target_states, mark any unlabeled action, explicitly add all availabled + * actions and subsequently removed unlabeled actions. + */ +template +std::shared_ptr> restoreActionsInTargetStates( + storm::models::sparse::Model const& model, + storm::storage::BitVector const& target_states +) { + auto model_canonic = synthesis::addMissingChoiceLabels(model); + auto model_target_enabled = synthesis::enableAllActions(*model_canonic, target_states); + const std::string NO_ACTION_LABEL = "__no_label__"; + auto model_target_fixed = synthesis::removeAction(*model_target_enabled, NO_ACTION_LABEL, target_states); + return model_target_fixed; +} + +} + void bindings_translation(py::module& m) { + m.def("addMissingChoiceLabels", &synthesis::addMissingChoiceLabels); + m.def("extractActionLabels", &synthesis::extractActionLabels); + m.def("enableAllActions", &synthesis::enableAllActions); + m.def("restoreActionsInTargetStates", &synthesis::restoreActionsInTargetStates); + py::class_, std::shared_ptr>>(m, "SubPomdpBuilder") .def(py::init const&>()) .def("start_from_belief", &synthesis::SubPomdpBuilder::startFromBelief) diff --git a/payntbind/src/synthesis/translation/componentTranslations.cpp b/payntbind/src/synthesis/translation/componentTranslations.cpp index 32b1297a8..27a6cab43 100644 --- a/payntbind/src/synthesis/translation/componentTranslations.cpp +++ b/payntbind/src/synthesis/translation/componentTranslations.cpp @@ -1,9 +1,46 @@ #include "componentTranslations.h" #include +#include namespace synthesis { + template + std::pair,std::vector> extractActionLabels( + storm::models::sparse::Model const& model + ) { + // collect action labels + STORM_LOG_THROW(model.hasChoiceLabeling(), storm::exceptions::InvalidModelException, + "model does not have the choice labeling"); + storm::models::sparse::ChoiceLabeling const& choice_labeling = model.getChoiceLabeling(); + std::vector action_labels; + std::map action_label_index; + std::set action_labels_set; + for(uint64_t choice = 0; choice < model.getNumberOfChoices(); ++choice) { + for(std::string const& label: choice_labeling.getLabelsOfChoice(choice)) { + action_labels_set.insert(label); + } + } + for(std::string const& label: action_labels_set) { + action_label_index[label] = action_labels.size(); + action_labels.push_back(label); + } + auto num_choices = model.getNumberOfChoices(); + + // assert that each choice has exactly one label + std::vector choice_to_action(num_choices); + for(uint64_t choice = 0; choice < model.getNumberOfChoices(); ++choice) { + auto const& labels = choice_labeling.getLabelsOfChoice(choice); + STORM_LOG_THROW(labels.size()==1, storm::exceptions::InvalidModelException, + "expected exactly 1 label for choice " << choice); + std::string const& label = *(labels.begin()); + uint64_t action = action_label_index[label]; + choice_to_action[choice] = action; + } + return std::make_pair(action_labels,choice_to_action); + } + + template storm::models::sparse::StateLabeling translateStateLabeling( storm::models::sparse::Model const& model, @@ -36,10 +73,10 @@ namespace synthesis { storm::models::sparse::Pomdp const& model, std::vector const& translated_to_original_state ) { - auto translated_num_states = translated_to_original_state.size(); + uint64_t translated_num_states = translated_to_original_state.size(); std::vector observation_classes(translated_num_states); for(uint64_t translated_state=0; translated_state= model.getNumberOfStates()) { continue; } @@ -55,17 +92,17 @@ namespace synthesis { std::vector const& translated_to_original_choice, storm::storage::BitVector const& translated_choice_mask ) { - auto translated_num_choices = translated_to_original_choice.size(); + uint64_t translated_num_choices = translated_to_original_choice.size(); storm::models::sparse::ChoiceLabeling translated_labeling(translated_num_choices); - for (auto const& label : model.getChoiceLabeling().getLabels()) { - translated_labeling.addLabel(label, storm::storage::BitVector(translated_num_choices,false)); - } - for(auto translated_choice: translated_choice_mask) { - auto choice = translated_to_original_choice[translated_choice]; + for(uint64_t translated_choice: translated_choice_mask) { + uint64_t choice = translated_to_original_choice[translated_choice]; if(choice >= model.getNumberOfChoices()) { continue; } - for (auto const& label : model.getChoiceLabeling().getLabelsOfChoice(choice)) { + for (std::string const& label : model.getChoiceLabeling().getLabelsOfChoice(choice)) { + if(not translated_labeling.containsLabel(label)) { + translated_labeling.addLabel(label); + } translated_labeling.addLabelToChoice(label,translated_choice); } } @@ -79,21 +116,35 @@ namespace synthesis { storm::storage::BitVector const& translated_choice_mask ) { std::optional> state_rewards; - STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); - STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); + STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, + "state rewards are currently not supported."); + STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, + "transition rewards are currently not supported."); - auto num_choices = reward_model.getStateActionRewardVector().size(); + uint64_t num_choices = reward_model.getStateActionRewardVector().size(); std::vector action_rewards(translated_to_original_choice.size()); - for(auto translated_choice: translated_choice_mask) { - auto choice = translated_to_original_choice[translated_choice]; + for(uint64_t translated_choice: translated_choice_mask) { + uint64_t choice = translated_to_original_choice[translated_choice]; if(choice >= num_choices) { continue; } - auto reward = reward_model.getStateActionReward(choice); - action_rewards[translated_choice] = reward; + action_rewards[translated_choice] = reward_model.getStateActionReward(choice); } return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); } + template + std::unordered_map> translateRewardModels( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask + ) { + std::unordered_map> reward_models; + for(auto const& reward_model : model.getRewardModels()) { + auto new_reward_model = translateRewardModel(reward_model.second,translated_to_original_choice,translated_choice_mask); + reward_models.emplace(reward_model.first, new_reward_model); + } + return reward_models; + } template void translateTransitionMatrixRow( @@ -103,9 +154,9 @@ namespace synthesis { std::vector const& original_to_translated_choice, uint64_t choice ) { - auto translated_choice = original_to_translated_choice[choice]; + uint64_t translated_choice = original_to_translated_choice[choice]; for(auto entry: model.getTransitionMatrix().getRow(choice)) { - auto translated_dst = original_to_translated_state[entry.getColumn()]; + uint64_t translated_dst = original_to_translated_state[entry.getColumn()]; builder.addNextValue(translated_choice, translated_dst, entry.getValue()); } } @@ -118,7 +169,7 @@ namespace synthesis { std::vector const& original_to_translated_choice, uint64_t state ) { - for(auto const& choice: model.getTransitionMatrix().getRowGroupIndices(state)) { + for(uint64_t const& choice: model.getTransitionMatrix().getRowGroupIndices(state)) { synthesis::translateTransitionMatrixRow( model, builder, original_to_translated_state, original_to_translated_choice, choice ); @@ -126,6 +177,9 @@ namespace synthesis { } + template std::pair,std::vector> extractActionLabels( + storm::models::sparse::Model const& model); + template storm::models::sparse::StateLabeling translateStateLabeling( storm::models::sparse::Model const& model, std::vector const& translated_to_original_state, @@ -152,6 +206,10 @@ namespace synthesis { storm::models::sparse::StandardRewardModel const& reward_model, std::vector const& translated_to_original_choice, storm::storage::BitVector const& translated_choice_mask); + template std::unordered_map> translateRewardModels( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask); template std::vector translateObservabilityClasses( storm::models::sparse::Pomdp const& model, diff --git a/payntbind/src/synthesis/translation/componentTranslations.h b/payntbind/src/synthesis/translation/componentTranslations.h index eccc9827a..0badb9b93 100644 --- a/payntbind/src/synthesis/translation/componentTranslations.h +++ b/payntbind/src/synthesis/translation/componentTranslations.h @@ -11,6 +11,16 @@ namespace synthesis { + /** + * Given a (nondeterministic) model with canonic choice labeling (exactly 1 label per choice), return a list of action + * labels and a choice-to-action mapping. + */ + template + std::pair,std::vector> extractActionLabels( + storm::models::sparse::Model const& model + ); + + template storm::models::sparse::StateLabeling translateStateLabeling( storm::models::sparse::Model const& model, @@ -37,6 +47,12 @@ namespace synthesis { std::vector const& translated_to_original_choice, storm::storage::BitVector const& translated_choice_mask ); + template + std::unordered_map> translateRewardModels( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask + ); template void translateTransitionMatrixRow(