Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SMG abstraction for families of MDPs #52

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions paynt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,8 @@ def setup_logger(log_path = None):
@click.option("--export-synthesis", type=click.Path(), default=None,
help="base filename to output synthesis result")

@click.option("--mdp-split-wrt-mdp", is_flag=True, default=False,
help="if set, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used")
@click.option("--mdp-discard-unreachable-choices", is_flag=True, default=False,
help="if set, unreachable choices will be discarded from the splitting scheduler")
@click.option("--mdp-use-randomized-abstraction", is_flag=True, default=False,
help="if set, randomized abstraction guess-and-verify will be used instead of game abstraction;" +
" MDP abstraction scheduler will be used for splitting"
)

@click.option("--tree-depth", default=0, type=int,
help="decision tree synthesis: tree depth")
Expand Down Expand Up @@ -152,7 +146,7 @@ def paynt_run(
storm_pomdp, iterative_storm, get_storm_result, storm_options, prune_storm,
use_storm_cutoffs, unfold_strategy_storm,
export_fsc_storm, export_fsc_paynt, export_synthesis,
mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction,
mdp_discard_unreachable_choices,
tree_depth, tree_enumeration, tree_map_scheduler, add_dont_care_action,
constraint_bound,
ce_generator,
Expand All @@ -176,9 +170,7 @@ def paynt_run(
paynt.quotient.decpomdp.DecPomdpQuotient.initial_memory_size = fsc_memory_size
paynt.quotient.posmg.PosmgQuotient.initial_memory_size = fsc_memory_size

paynt.synthesizer.policy_tree.SynthesizerPolicyTree.split_wrt_mdp_scheduler = mdp_split_wrt_mdp
paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_choices = mdp_discard_unreachable_choices
paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction

paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_depth = tree_depth
paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_enumeration = tree_enumeration
Expand Down
2 changes: 1 addition & 1 deletion paynt/parser/drn_parser.py
TheGreatfpmK marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def parse_drn(cls, sketch_path):
pomdp_path = sketch_path + '.tmp'
state_player_indications = DrnParser.pomdp_from_posmg(sketch_path, pomdp_path)
pomdp = DrnParser.read_drn(pomdp_path)
explicit_model = payntbind.synthesis.create_posmg(pomdp, state_player_indications)
explicit_model = payntbind.synthesis.posmgFromPomdp(pomdp, state_player_indications)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we mostly use the python naming conventions for bound C++ functions? So I would change this to posmg_from_pomdp.

os.remove(pomdp_path)
else:
explicit_model = DrnParser.read_drn(sketch_path)
Expand Down
2 changes: 1 addition & 1 deletion paynt/quotient/mdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def build_game_abstraction_solver(self, prop):
target_label = prop.get_target_label()
precision = paynt.verification.property.Property.model_checking_precision
solver = payntbind.synthesis.GameAbstractionSolver(
self.quotient_mdp, len(self.action_labels), self.choice_to_action, target_label, precision
self.quotient_mdp, len(self.action_labels), self.choice_to_action, prop.formula, prop.maximizing, target_label, precision
)
return solver

Expand Down
75 changes: 12 additions & 63 deletions paynt/synthesizer/policy_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,8 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer):

# if True, tree leaves will be double-checked after synthesis
double_check_policy_tree_leaves = False
# if True, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used
split_wrt_mdp_scheduler = False
# if True, unreachable choices will be discarded from the splitting scheduler
discard_unreachable_choices = False
# if True, randomized abstraction guess-and-verify will be used instead of game abstraction
use_randomized_abstraction = False

@property
def method_name(self):
Expand Down Expand Up @@ -532,9 +528,12 @@ def solve_singleton(self, family, prop):
def solve_game_abstraction(self, family, prop, game_solver):
# construct and solve the game abstraction
# logger.debug("solving game abstraction...")
game_solver.solve(family.selected_choices, prop.maximizing, prop.minimizing)
self.stat.iteration_game(family.mdp.states)

# game_solver.solve_sg(family.selected_choices)
game_solver.solve_smg(family.selected_choices)

game_value = game_solver.solution_value
self.stat.iteration_game(family.mdp.states)
game_sat = prop.satisfies_threshold_within_precision(game_value)
# logger.debug("game solved, value is {}".format(game_value))
game_policy = game_solver.solution_state_to_player1_action
Expand All @@ -546,35 +545,6 @@ def solve_game_abstraction(self, family, prop, game_solver):
game_policy = game_policy_fixed
return game_policy,game_sat

def try_randomized_abstraction(self, family, prop):
# build randomized abstraction
choice_to_action = []
for choice in range(family.mdp.model.nr_choices):
action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]]
choice_to_action.append(action)
state_action_choices = self.quotient.map_state_action_to_choices(family.mdp.model,self.quotient.num_actions,choice_to_action)
model,choice_to_action = payntbind.synthesis.randomize_action_variant(family.mdp.model, state_action_choices)

# model check
result = Property.model_check(model, prop.formula)
self.stat.iteration(model)
value = result.at(model.initial_states[0])
policy_sat = prop.satisfies_threshold(value) # does this value matter?

# extract policy for the quotient
scheduler = result.scheduler
policy = self.quotient.empty_policy()
for state in range(model.nr_states):
state_choice = scheduler.get_choice(state).get_deterministic_choice()
choice = model.transition_matrix.get_row_group_start(state) + state_choice
action = choice_to_action[choice]
quotient_state = family.mdp.quotient_state_map[state]
policy[quotient_state] = action

# apply policy and check if it is SAT for all MDPs in the family
policy_sat = self.verify_policy(family, prop, policy)
return policy,policy_sat

def state_to_choice_to_hole_selection(self, state_to_choice):
if SynthesizerPolicyTree.discard_unreachable_choices:
state_to_choice = self.quotient.discard_unreachable_choices(state_to_choice)
Expand All @@ -588,18 +558,6 @@ def parse_game_scheduler(self, game_solver):
state_values = game_solver.solution_state_values
return scheduler_choices,hole_selection,state_values

def parse_mdp_scheduler(self, family, mdp_result):
state_to_choice = self.quotient.scheduler_to_state_to_choice(
family.mdp, mdp_result.result.scheduler, discard_unreachable_choices=False
)
scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice)
state_values = [0] * self.quotient.quotient_mdp.nr_states
for state in range(family.mdp.states):
quotient_state = family.mdp.quotient_state_map[state]
state_values[quotient_state] = mdp_result.result.at(state)
return scheduler_choices,hole_selection,state_values


def verify_family(self, family, game_solver, prop):
# logger.info("investigating family of size {}".format(family.size))
self.quotient.build(family)
Expand All @@ -609,18 +567,10 @@ def verify_family(self, family, game_solver, prop):
mdp_family_result.policy = self.solve_singleton(family,prop)
return mdp_family_result

if not SynthesizerPolicyTree.use_randomized_abstraction:
if family.candidate_policy is None:
game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver)
else:
game_policy = family.candidate_policy
game_sat = False
if family.candidate_policy is None:
game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver)
else:
randomization_policy,policy_sat = self.try_randomized_abstraction(family,prop)
if policy_sat:
mdp_family_result.policy = randomization_policy
return mdp_family_result
game_policy = None
game_policy = family.candidate_policy
game_sat = False

mdp_family_result.game_policy = game_policy
Expand All @@ -638,10 +588,7 @@ def verify_family(self, family, game_solver, prop):
return mdp_family_result

# undecided: choose scheduler choices to be used for splitting
if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler):
scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver)
else:
scheduler_choices,hole_selection,state_values = self.parse_mdp_scheduler(family, mdp_result)
scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver)

splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection)
mdp_family_result.splitter = splitter
Expand Down Expand Up @@ -723,7 +670,7 @@ def split(self, family, prop, hole_selection, splitter, policy):
subfamily.candidate_policy = None
subfamilies.append(subfamily)

if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_choices:
if not SynthesizerPolicyTree.discard_unreachable_choices:
self.assign_candidate_policy(subfamilies, hole_selection, splitter, policy)

return suboptions,subfamilies
Expand All @@ -732,6 +679,7 @@ def split(self, family, prop, hole_selection, splitter, policy):
def evaluate_all(self, family, prop, keep_value_only=False):
assert not prop.reward, "expecting reachability probability propery"
game_solver = self.quotient.build_game_abstraction_solver(prop)
# game_solver.enable_profiling(True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove profiling comments unless we want to do more performance testing

family.candidate_policy = None
policy_tree = PolicyTree(family)

Expand Down Expand Up @@ -781,6 +729,7 @@ def evaluate_all(self, family, prop, keep_value_only=False):
self.stat.num_leaves_merged = len(policy_tree.collect_leaves())
self.stat.num_policies_merged = len(policy_tree.policies)
self.policy_tree = policy_tree
# game_solver.print_profiling()

# convert policy tree to family evaluation
evaluations = []
Expand Down
Loading