Skip to content

Commit

Permalink
Merge pull request #52 from randriu/smg-abstraction
Browse files Browse the repository at this point in the history
SMG abstraction for families of MDPs
  • Loading branch information
randriu authored Nov 25, 2024
2 parents 5acbbc5 + 3aa98e2 commit b7ecfd3
Show file tree
Hide file tree
Showing 20 changed files with 574 additions and 439 deletions.
10 changes: 1 addition & 9 deletions paynt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,8 @@ def setup_logger(log_path = None):
@click.option("--export-synthesis", type=click.Path(), default=None,
help="base filename to output synthesis result")

@click.option("--mdp-split-wrt-mdp", is_flag=True, default=False,
help="if set, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used")
@click.option("--mdp-discard-unreachable-choices", is_flag=True, default=False,
help="if set, unreachable choices will be discarded from the splitting scheduler")
@click.option("--mdp-use-randomized-abstraction", is_flag=True, default=False,
help="if set, randomized abstraction guess-and-verify will be used instead of game abstraction;" +
" MDP abstraction scheduler will be used for splitting"
)

@click.option("--tree-depth", default=0, type=int,
help="decision tree synthesis: tree depth")
Expand Down Expand Up @@ -152,7 +146,7 @@ def paynt_run(
storm_pomdp, iterative_storm, get_storm_result, storm_options, prune_storm,
use_storm_cutoffs, unfold_strategy_storm,
export_fsc_storm, export_fsc_paynt, export_synthesis,
mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction,
mdp_discard_unreachable_choices,
tree_depth, tree_enumeration, tree_map_scheduler, add_dont_care_action,
constraint_bound,
ce_generator,
Expand All @@ -176,9 +170,7 @@ def paynt_run(
paynt.quotient.decpomdp.DecPomdpQuotient.initial_memory_size = fsc_memory_size
paynt.quotient.posmg.PosmgQuotient.initial_memory_size = fsc_memory_size

paynt.synthesizer.policy_tree.SynthesizerPolicyTree.split_wrt_mdp_scheduler = mdp_split_wrt_mdp
paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_choices = mdp_discard_unreachable_choices
paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction

paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_depth = tree_depth
paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_enumeration = tree_enumeration
Expand Down
2 changes: 1 addition & 1 deletion paynt/parser/drn_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def parse_drn(cls, sketch_path):
pomdp_path = sketch_path + '.tmp'
state_player_indications = DrnParser.pomdp_from_posmg(sketch_path, pomdp_path)
pomdp = DrnParser.read_drn(pomdp_path)
explicit_model = payntbind.synthesis.create_posmg(pomdp, state_player_indications)
explicit_model = payntbind.synthesis.posmg_from_pomdp(pomdp, state_player_indications)
os.remove(pomdp_path)
else:
explicit_model = DrnParser.read_drn(sketch_path)
Expand Down
2 changes: 1 addition & 1 deletion paynt/quotient/mdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def build_game_abstraction_solver(self, prop):
target_label = prop.get_target_label()
precision = paynt.verification.property.Property.model_checking_precision
solver = payntbind.synthesis.GameAbstractionSolver(
self.quotient_mdp, len(self.action_labels), self.choice_to_action, target_label, precision
self.quotient_mdp, len(self.action_labels), self.choice_to_action, prop.formula, prop.maximizing, target_label, precision
)
return solver

Expand Down
73 changes: 10 additions & 63 deletions paynt/synthesizer/policy_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,8 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer):

# if True, tree leaves will be double-checked after synthesis
double_check_policy_tree_leaves = False
# if True, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used
split_wrt_mdp_scheduler = False
# if True, unreachable choices will be discarded from the splitting scheduler
discard_unreachable_choices = False
# if True, randomized abstraction guess-and-verify will be used instead of game abstraction
use_randomized_abstraction = False

@property
def method_name(self):
Expand Down Expand Up @@ -532,9 +528,12 @@ def solve_singleton(self, family, prop):
def solve_game_abstraction(self, family, prop, game_solver):
# construct and solve the game abstraction
# logger.debug("solving game abstraction...")
game_solver.solve(family.selected_choices, prop.maximizing, prop.minimizing)
self.stat.iteration_game(family.mdp.states)

# game_solver.solve_sg(family.selected_choices)
game_solver.solve_smg(family.selected_choices)

game_value = game_solver.solution_value
self.stat.iteration_game(family.mdp.states)
game_sat = prop.satisfies_threshold_within_precision(game_value)
# logger.debug("game solved, value is {}".format(game_value))
game_policy = game_solver.solution_state_to_player1_action
Expand All @@ -546,35 +545,6 @@ def solve_game_abstraction(self, family, prop, game_solver):
game_policy = game_policy_fixed
return game_policy,game_sat

def try_randomized_abstraction(self, family, prop):
# build randomized abstraction
choice_to_action = []
for choice in range(family.mdp.model.nr_choices):
action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]]
choice_to_action.append(action)
state_action_choices = self.quotient.map_state_action_to_choices(family.mdp.model,self.quotient.num_actions,choice_to_action)
model,choice_to_action = payntbind.synthesis.randomize_action_variant(family.mdp.model, state_action_choices)

# model check
result = Property.model_check(model, prop.formula)
self.stat.iteration(model)
value = result.at(model.initial_states[0])
policy_sat = prop.satisfies_threshold(value) # does this value matter?

# extract policy for the quotient
scheduler = result.scheduler
policy = self.quotient.empty_policy()
for state in range(model.nr_states):
state_choice = scheduler.get_choice(state).get_deterministic_choice()
choice = model.transition_matrix.get_row_group_start(state) + state_choice
action = choice_to_action[choice]
quotient_state = family.mdp.quotient_state_map[state]
policy[quotient_state] = action

# apply policy and check if it is SAT for all MDPs in the family
policy_sat = self.verify_policy(family, prop, policy)
return policy,policy_sat

def state_to_choice_to_hole_selection(self, state_to_choice):
if SynthesizerPolicyTree.discard_unreachable_choices:
state_to_choice = self.quotient.discard_unreachable_choices(state_to_choice)
Expand All @@ -588,18 +558,6 @@ def parse_game_scheduler(self, game_solver):
state_values = game_solver.solution_state_values
return scheduler_choices,hole_selection,state_values

def parse_mdp_scheduler(self, family, mdp_result):
state_to_choice = self.quotient.scheduler_to_state_to_choice(
family.mdp, mdp_result.result.scheduler, discard_unreachable_choices=False
)
scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice)
state_values = [0] * self.quotient.quotient_mdp.nr_states
for state in range(family.mdp.states):
quotient_state = family.mdp.quotient_state_map[state]
state_values[quotient_state] = mdp_result.result.at(state)
return scheduler_choices,hole_selection,state_values


def verify_family(self, family, game_solver, prop):
# logger.info("investigating family of size {}".format(family.size))
self.quotient.build(family)
Expand All @@ -609,18 +567,10 @@ def verify_family(self, family, game_solver, prop):
mdp_family_result.policy = self.solve_singleton(family,prop)
return mdp_family_result

if not SynthesizerPolicyTree.use_randomized_abstraction:
if family.candidate_policy is None:
game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver)
else:
game_policy = family.candidate_policy
game_sat = False
if family.candidate_policy is None:
game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver)
else:
randomization_policy,policy_sat = self.try_randomized_abstraction(family,prop)
if policy_sat:
mdp_family_result.policy = randomization_policy
return mdp_family_result
game_policy = None
game_policy = family.candidate_policy
game_sat = False

mdp_family_result.game_policy = game_policy
Expand All @@ -638,10 +588,7 @@ def verify_family(self, family, game_solver, prop):
return mdp_family_result

# undecided: choose scheduler choices to be used for splitting
if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler):
scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver)
else:
scheduler_choices,hole_selection,state_values = self.parse_mdp_scheduler(family, mdp_result)
scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver)

splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection)
mdp_family_result.splitter = splitter
Expand Down Expand Up @@ -723,7 +670,7 @@ def split(self, family, prop, hole_selection, splitter, policy):
subfamily.candidate_policy = None
subfamilies.append(subfamily)

if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_choices:
if not SynthesizerPolicyTree.discard_unreachable_choices:
self.assign_candidate_policy(subfamilies, hole_selection, splitter, policy)

return suboptions,subfamilies
Expand Down
Loading

0 comments on commit b7ecfd3

Please sign in to comment.