From fefb966cc9dbc942fd24c523bb13c5187082b14b Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Wed, 6 Dec 2023 16:25:16 +0100
Subject: [PATCH] move randomized abstraction building to Stormpy

---
 paynt/quotient/mdp_family.py     | 62 +-------------------------------
 paynt/synthesizer/policy_tree.py | 39 ++++++++++----------
 2 files changed, 20 insertions(+), 81 deletions(-)

diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
index a652d503d..d2d2b953e 100644
--- a/paynt/quotient/mdp_family.py
+++ b/paynt/quotient/mdp_family.py
@@ -66,64 +66,7 @@ def compute_choice_destinations(mdp):
             choice_destinations.append(destinations)
         return choice_destinations
 
-    @staticmethod
-    def randomize_action_variant(mdp, state_action_choices):
-        '''
-        Given an MDP having multiple variants of actions, create an MDP in which this variant selection is randomized.
-        '''
-
-        components = stormpy.storage.SparseModelComponents()
-        # copy state labeling
-        state_labeling = stormpy.storage.StateLabeling(mdp.nr_states)
-        for label in mdp.labeling.get_labels():
-            state_labeling.add_label(label)
-            state_labeling.set_states(label, mdp.labeling.get_states(label))
-        components.state_labeling = state_labeling
-        
-        # build transition matrix and reward models
-        reward_vectors = {name:[] for name in mdp.reward_models}
-        builder = stormpy.storage.SparseMatrixBuilder(has_custom_row_grouping=True)
-        tm = mdp.transition_matrix
-        num_rows = 0
-        choice_to_action = []
-        for state in range(mdp.nr_states):
-            builder.new_row_group(num_rows)
-            for action,choices in enumerate(state_action_choices[state]):
-                if not choices:
-                    continue
-
-                # new choice
-                choice_to_action.append(action)
-                
-                # handle transition matrix
-                num_choices = len(choices)
-                dst_prob = collections.defaultdict(int)
-                for choice in choices:
-                    for entry in tm.get_row(choice):
-                        dst_prob[entry.column] += entry.value()/num_choices
-
-                for dst,prob in dst_prob.items():
-                    builder.add_next_value(num_rows,dst,prob)
-                num_rows += 1
-
-                # handle reward models
-                for name,reward_model in mdp.reward_models.items():
-                    reward_value = 0
-                    for choice in choices:
-                        reward_value += reward_model.get_state_action_reward(choice)
-                    reward_value /= num_choices
-                    reward_vectors[name].append(reward_value)
-
-        components.transition_matrix = builder.build()
-        components.reward_models = {}
-        for name,state_action_reward in reward_vectors.items():
-            components.reward_models[name] = stormpy.SparseRewardModel(optional_state_action_reward_vector=state_action_reward)
-        model = stormpy.storage.SparseMdp(components)
-
-        return model,choice_to_action
         
-
-    
     def __init__(self, quotient_mdp, coloring, specification):
         super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)
 
@@ -134,10 +77,7 @@ def __init__(self, quotient_mdp, coloring, specification):
             self.quotient_mdp, self.num_actions, self.choice_to_action)
         self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices)
         self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp)
-        
-        self.randomized_abstraction = MdpFamilyQuotientContainer.randomize_action_variant(
-            self.quotient_mdp, self.state_action_choices)
-
+    
     
     @property
     def num_actions(self):
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index c1174b295..0930c9baa 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -333,6 +333,11 @@ def postprocess(self, quotient, prop):
 
 class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer):
 
+    # if True (False), then the game (randomized) abstraction will be used
+    use_game_abstraction = True
+    # if True, then the game scheduler will be used for splitting (incompatible with randomized abstraction)
+    use_optimistic_splitting = True
+
     @property
     def method_name(self):
         return "AR (policy tree)"
@@ -400,7 +405,7 @@ def solve_randomized_abstraction(self, family, prop):
             action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]]
             choice_to_action.append(action)
         state_action_choices = self.quotient.map_state_action_to_choices(family.mdp.model,self.quotient.num_actions,choice_to_action)
-        model,choice_to_action = self.quotient.randomize_action_variant(family.mdp.model, state_action_choices)
+        model,choice_to_action, = stormpy.synthesis.randomize_action_variant(family.mdp.model, state_action_choices)
         assert family.mdp.model.nr_states == model.nr_states 
         
         result = stormpy.synthesis.verify_mdp(paynt.quotient.models.MarkovChain.environment,model,prop.formula,True)
@@ -421,7 +426,7 @@ def solve_randomized_abstraction(self, family, prop):
         return policy,policy_sat
 
 
-
+    
     def verify_family(self, family, game_solver, prop, reference_policy=None):
         self.quotient.build(family)
         mdp_family_result = MdpFamilyResult()
@@ -440,17 +445,7 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
             mdp_family_result.policy_source = "singleton"
             return mdp_family_result
 
-        if False:
-            randomized_policy,randomized_sat = self.solve_randomized_abstraction(family,prop)
-            if randomized_sat:
-                randomized_policy_sat = self.verify_policy(family,prop,randomized_policy)
-                if randomized_policy_sat:
-                    mdp_family_result.policy = randomized_policy
-                    mdp_family_result.policy_source = "randomized abstraction"
-                    return mdp_family_result
-        
-        if True:
-            # game abstraction
+        if SynthesizerPolicyTree.use_game_abstraction:
             game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver)
             if game_sat:
                 game_policy_sat = self.verify_policy(family,prop,game_policy)
@@ -460,8 +455,14 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
                     return mdp_family_result
                 else:
                     print("game YES but nor forall family of size ", family.size)
-
-        
+        else:
+            randomized_policy,randomized_sat = self.solve_randomized_abstraction(family,prop)
+            if randomized_sat:
+                randomized_policy_sat = self.verify_policy(family,prop,randomized_policy)
+                if randomized_policy_sat:
+                    mdp_family_result.policy = randomized_policy
+                    mdp_family_result.policy_source = "randomized abstraction"
+                    return mdp_family_result
         
         # solve primary-primary direction for the family
         # logger.debug("solving primary-primary direction...")
@@ -474,12 +475,10 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
 
     
         # undecided: choose scheduler choices to be used for splitting
-        if False:
-            # optimistic splitting
+        if SynthesizerPolicyTree.use_optimistic_splitting:
             scheduler_choices = game_solver.solution_reachable_choices
             state_values = game_solver.solution_state_values
         else:
-            # pessimistic splitting
             scheduler_choices = stormpy.BitVector(self.quotient.quotient_mdp.nr_choices,False)
             state_values = [0] * self.quotient.quotient_mdp.nr_states
             for state in range(family.mdp.states):
@@ -579,7 +578,7 @@ def synthesize_policy_tree(self, family):
         self.last_splitter = -1
         prop = self.quotient.specification.constraints[0]
         game_solver = self.quotient.build_game_abstraction_solver(prop)
-        game_solver.enable_profiling(True)
+        # game_solver.enable_profiling(True)
         policy_tree = PolicyTree(family)
 
         reference_policy = None
@@ -610,7 +609,7 @@ def synthesize_policy_tree(self, family):
             policy_tree_node.split(result.splitter,suboptions,subfamilies)
             policy_tree_leaves = policy_tree_leaves + policy_tree_node.child_nodes
 
-        game_solver.print_profiling()
+        # game_solver.print_profiling()
         policy_tree.double_check_all_families(self.quotient, prop)
         policy_tree.print_stats()
         policy_tree.postprocess(self.quotient, prop)