From ec49e06f337e36ab545c4bc63c1879b101525648 Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Wed, 20 Dec 2023 14:33:15 +0100
Subject: [PATCH] policy export and visualization

---
 alias-paynt.sh                            |   4 +-
 paynt-specific-storm.dockerfile           |   3 +-
 paynt/cli.py                              |  11 ++-
 paynt/family/family.py                    |  15 ++--
 paynt/quotient/mdp_family.py              |  14 +--
 paynt/synthesizer/policy_tree.py          | 105 +++++++++++++++++++---
 paynt/synthesizer/statistic.py            |  10 +--
 paynt/synthesizer/synthesizer.py          |  24 +++--
 paynt/synthesizer/synthesizer_onebyone.py |  25 +++++-
 paynt/synthesizer/synthesizer_pomdp.py    |   2 +-
 10 files changed, 164 insertions(+), 49 deletions(-)

diff --git a/alias-paynt.sh b/alias-paynt.sh
index 0364e378f..195365ef9 100755
--- a/alias-paynt.sh
+++ b/alias-paynt.sh
@@ -15,7 +15,7 @@ alias envd='deactivate'
 
 storm-dependencies() {
     sudo apt update
-    sudo apt -y install build-essential git automake cmake libboost-all-dev libcln-dev libgmp-dev libginac-dev libglpk-dev libhwloc-dev libz3-dev libxerces-c-dev libeigen3-dev
+    sudo apt -y install build-essential git automake cmake libboost-all-dev libcln-dev libgmp-dev libginac-dev libglpk-dev libhwloc-dev libz3-dev libxerces-c-dev libeigen3-dev graphviz
     sudo apt -y install maven uuid-dev python3-dev libffi-dev libssl-dev python3-pip python3-venv
     # apt -y install texlive-latex-extra
     # update-alternatives --install /usr/bin/python python /usr/bin/python3 10
@@ -37,7 +37,7 @@ download-prerequisites() {
 python-environment() {
     python3 -m venv $PAYNT_ROOT/env
     enva
-    pip3 install pytest pytest-runner pytest-cov numpy scipy pysmt z3-solver click toml Cython scikit-build
+    pip3 install pytest pytest-runner pytest-cov numpy scipy pysmt z3-solver click toml Cython scikit-build graphviz
     envd
 }
 
diff --git a/paynt-specific-storm.dockerfile b/paynt-specific-storm.dockerfile
index 54d7ca644..7b7ae5806 100644
--- a/paynt-specific-storm.dockerfile
+++ b/paynt-specific-storm.dockerfile
@@ -75,7 +75,8 @@ RUN python setup.py build_ext $setup_args -j $no_threads develop
 RUN pip install -U pip setuptools wheel numpy
 
 # Paynt / extra dependencies
-RUN pip install pysmt z3-solver click
+RUN apt-get install -y graphviz
+RUN pip install click z3-solver pysmt graphviz
 
 # Build paynt
 #############
diff --git a/paynt/cli.py b/paynt/cli.py
index aaddda654..159b0f024 100644
--- a/paynt/cli.py
+++ b/paynt/cli.py
@@ -106,12 +106,11 @@ def setup_logger(log_path = None):
     help="path to output file for SAYNT belief FSC")
 @click.option("--export-fsc-paynt", type=click.Path(), default=None,
     help="path to output file for SAYNT inductive FSC")
+@click.option("--export-evaluation", type=click.Path(), default=None,
+    help="base filename to output evaluation result")
 
 @click.option(
-    "--ce-generator",
-    default="storm",
-    type=click.Choice(["storm", "mdp"]),
-    show_default=True,
+    "--ce-generator", type=click.Choice(["dtmc", "mdp"]), default="dtmc", show_default=True,
     help="counterexample generator",
 )
 @click.option("--profiling", is_flag=True, default=False,
@@ -125,7 +124,7 @@ def paynt_run(
     fsc_synthesis, pomdp_memory_size, posterior_aware,
     storm_pomdp, iterative_storm, get_storm_result, storm_options, prune_storm,
     use_storm_cutoffs, unfold_strategy_storm,
-    export_fsc_storm, export_fsc_paynt,
+    export_fsc_storm, export_fsc_paynt, export_evaluation,
     ce_generator,
     profiling
 ):
@@ -155,7 +154,7 @@ def paynt_run(
     properties_path = os.path.join(project, props)
     quotient = paynt.parser.sketch.Sketch.load_sketch(sketch_path, properties_path, export, relative_error, discount_factor)
     synthesizer = paynt.synthesizer.synthesizer.Synthesizer.choose_synthesizer(quotient, method, fsc_synthesis, storm_control)
-    synthesizer.run(optimum_threshold)
+    synthesizer.run(optimum_threshold, export_evaluation)
 
     if profiling:
         profiler.disable()
diff --git a/paynt/family/family.py b/paynt/family/family.py
index 9340f1af3..95fcf88dc 100644
--- a/paynt/family/family.py
+++ b/paynt/family/family.py
@@ -52,16 +52,19 @@ def hole_set_options(self, hole, options):
     def size(self):
         return math.prod([self.family.holeNumOptions(hole) for hole in range(self.num_holes)])
 
+    def hole_options_to_string(self, hole, options):
+        name = self.hole_name(hole)
+        labels = [self.hole_to_option_labels[hole][option] for option in options]
+        if len(labels) == 1:
+            return f"{name}={labels[0]}"
+        else:
+            return name + ": {" + ",".join(labels) + "}"
+
     def __str__(self):
         hole_strings = []
         for hole in range(self.num_holes):
-            name = self.hole_name(hole)
             options = self.hole_options(hole)
-            labels = [self.hole_to_option_labels[hole][option] for option in options]
-            if len(labels) == 1:
-                hole_str = "{}={}".format(name,labels[0]) 
-            else:
-                hole_str =  name + ": {" + ",".join(labels) + "}"
+            hole_str = self.hole_options_to_string(hole,options)
             hole_strings.append(hole_str)
         return ", ".join(hole_strings)
 
diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
index 7b717e660..85e49f47f 100644
--- a/paynt/quotient/mdp_family.py
+++ b/paynt/quotient/mdp_family.py
@@ -94,13 +94,18 @@ def scheduler_to_policy(self, scheduler, mdp):
                 policy[state] = self.choice_to_action[choice]
         return policy
 
-    def policy_to_json(self, policy):
-        '''Create a json representation for a policy. '''
+    def policy_to_state_valuation_actions(self, policy):
+        '''
+        Create a representation for a policy that associated action labels with state valuations. States with only
+        one available action are omitted.
+        '''
         sv = self.quotient_mdp.state_valuations
         state_valuation_to_action = []
         for state,action in enumerate(policy):
             if action is None:
                 continue
+            if len(self.state_to_actions[state])==1:
+                continue
             # get action label
             action = self.action_labels[action]
             if action == "empty_label":
@@ -115,10 +120,7 @@ def policy_to_json(self, policy):
                 valuation[variable] = value
 
             state_valuation_to_action.append( (valuation,action) )
-        string = json.dumps(state_valuation_to_action, indent=2)
-        return string
-
-
+        return state_valuation_to_action
 
     
     def fix_and_apply_policy_to_family(self, family, policy):
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index 1b41fe3c5..a5c65d9d8 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -4,17 +4,23 @@
 import paynt.quotient.models
 import paynt.synthesizer.synthesizer
 
+import paynt.quotient.quotient
+import paynt.verification.property_result
+from paynt.verification.property import Property
+
+import paynt.family.smt
 import paynt.synthesizer.conflict_generator.dtmc
 import paynt.synthesizer.conflict_generator.mdp
-import paynt.family.smt
 
-import paynt.verification.property_result
-from paynt.verification.property import Property
-import paynt.quotient.quotient
 
 import logging
 logger = logging.getLogger(__name__)
 
+# disable logging when importing graphviz to suppress warnings
+logging.disable(logging.CRITICAL)
+import graphviz
+logging.disable(logging.NOTSET)
+
 
 class MdpFamilyResult:
     def __init__(self):
@@ -119,6 +125,8 @@ def __init__(self, family):
         self.suboptions = []
         self.child_nodes = []
 
+        self.policy_number = None
+
     @property
     def is_leaf(self):
         return len(self.child_nodes) == 0
@@ -213,6 +221,50 @@ def merge_children_compatible(self, quotient, prop):
         # only 1 child node that can be moved to this node
         PolicyTreeNode.merged += 1
 
+    def assign_policy_number(self, num_policies):
+        if self.is_leaf:
+            if self.solved:
+                self.policy_number = num_policies
+                num_policies += 1
+            return num_policies
+        for child in self.child_nodes:
+            num_policies = child.assign_policy_number(num_policies)
+        return num_policies
+
+    def extract_policies(self, quotient):
+        if self.solved:
+            return { f"p{self.policy_number}" : quotient.policy_to_state_valuation_actions(self.policy) }
+        policies = {}
+        for child in self.child_nodes:
+            policies.update(child.extract_policies(quotient))
+        return policies
+
+    @property
+    def node_id(self):
+        return str(self.family).replace(' ','').replace(':','=')
+
+    def add_nodes_to_graphviz_tree(self, graphviz_tree):
+        node_label = ""
+        if self.policy is not None:
+            if self.policy is False:
+                node_label = "X"
+            else:
+                # node_label = "✓"
+                node_label = f"p{self.policy_number}"
+        graphviz_tree.node(self.node_id, label=node_label, shape="ellipse", width="0.15", height="0.15")
+        for child in self.child_nodes:
+            child.add_nodes_to_graphviz_tree(graphviz_tree)
+
+    def add_edges_to_graphviz_tree(self, graphviz_tree):
+        if self.splitter is None:
+            return
+        splitter_name = self.family.hole_name(self.splitter)
+        for index,child in enumerate(self.child_nodes):
+            edge_label = self.family.hole_options_to_string(self.splitter,self.suboptions[index])
+            graphviz_tree.edge(self.node_id,child.node_id,label=edge_label)
+            child.add_edges_to_graphviz_tree(graphviz_tree)
+
+
 
 class PolicyTree:
 
@@ -335,9 +387,6 @@ def print_stats(self):
         #     print(key, " - ", number)
         # print("--------------------")
 
-
-    
-
     
     def postprocess(self, quotient, prop, stat):
 
@@ -372,6 +421,20 @@ def postprocess(self, quotient, prop, stat):
         # stat.num_policies_yes = len(self.collect_solved())
         # self.print_stats()
 
+    def extract_policies(self, quotient):
+        self.root.assign_policy_number(0)
+        policies = self.root.extract_policies(quotient)
+        return policies
+
+    def extract_policy_tree(self, quotient):
+        logging.getLogger("graphviz").setLevel(logging.WARNING)
+        logging.getLogger("graphviz.sources").setLevel(logging.ERROR)
+        graphviz_tree = graphviz.Digraph(comment="policy_tree")
+        self.root.add_nodes_to_graphviz_tree(graphviz_tree)
+        self.root.add_edges_to_graphviz_tree(graphviz_tree)
+        return graphviz_tree
+
+
 
 
 class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer):
@@ -934,10 +997,30 @@ def evaluate_all(self, family, prop, keep_value_only=False):
             policy_tree.double_check(self.quotient, prop)
         policy_tree.print_stats()
         policy_tree.postprocess(self.quotient, prop, self.stat)
+        self.policy_tree = policy_tree
 
         # convert policy tree to family evaluation
-        family_to_evaluation = []
+        evaluations = []
         for node in policy_tree.collect_leaves():
-            evaluation = paynt.synthesizer.synthesizer.FamilyEvaluation(None,node.solved,policy=node.policy)
-            family_to_evaluation.append( (node.family,evaluation) )
-        return family_to_evaluation
+            evaluation = paynt.synthesizer.synthesizer.FamilyEvaluation(node.family,None,node.solved,policy=node.policy)
+            evaluations.append(evaluation)
+        return evaluations
+
+    def export_evaluation_result(self, evaluations, export_filename_base):
+        import json
+        policies = self.policy_tree.extract_policies(self.quotient)
+        policies_string = json.dumps(policies, indent=2)
+        policies_filename = export_filename_base + ".json"
+        with open(policies_filename, 'w') as file:
+            file.write(policies_string)
+        logger.info(f"exported policies to {policies_filename}")
+
+        tree = self.policy_tree.extract_policy_tree(self.quotient)
+        tree_filename = export_filename_base + ".dot"
+        with open(tree_filename, 'w') as file:
+            file.write(tree.source)
+        logger.info(f"exported policy tree to {tree_filename}")
+
+        tree_visualization_filename = export_filename_base + ".png"
+        tree.render(export_filename_base, format="png", cleanup=True) # using export_filename_base since graphviz appends .png by default
+        logger.info(f"exported policy tree visualization to {tree_visualization_filename}")
diff --git a/paynt/synthesizer/statistic.py b/paynt/synthesizer/statistic.py
index 716c310fa..b6c432596 100644
--- a/paynt/synthesizer/statistic.py
+++ b/paynt/synthesizer/statistic.py
@@ -160,10 +160,10 @@ def finished_synthesis(self, assignment):
         self.synthesis_timer.stop()
         self.synthesized_assignment = assignment
 
-    def finished_evaluation(self, family_to_evaluation):
+    def finished_evaluation(self, evaluations):
         self.job_type = "evaluation"
         self.synthesis_timer.stop()
-        self.family_to_evaluation = family_to_evaluation
+        self.evaluations = evaluations
         
 
     def get_summary_specification(self):
@@ -203,9 +203,9 @@ def get_summary_synthesis(self):
             return f"feasible: {feasible}"
 
     def get_summary_evaluation(self):
-        if not type(self.family_to_evaluation[0]) == paynt.synthesizer.synthesizer.FamilyEvaluation:
+        if not isinstance(self.evaluations[0], paynt.synthesizer.synthesizer.FamilyEvaluation):
             return ""
-        members_sat = sum( [family.size for family,evaluation in self.family_to_evaluation if evaluation.sat ])
+        members_sat = sum( [evaluation.family.size for evaluation in self.evaluations if evaluation.sat ])
         members_total = self.quotient.design_space.size
         members_sat_percentage = int(round(members_sat/members_total*100,0))
         return f"satisfied {members_sat}/{members_total} members ({members_sat_percentage}%)"
@@ -238,7 +238,7 @@ def get_summary(self):
         return summary
     
     def print(self):    
-        print(self.get_summary())
+        print(self.get_summary(),end="")
 
 
     def print_mdp_family_table_entries(self):
diff --git a/paynt/synthesizer/synthesizer.py b/paynt/synthesizer/synthesizer.py
index dded1fafd..73eee2fed 100644
--- a/paynt/synthesizer/synthesizer.py
+++ b/paynt/synthesizer/synthesizer.py
@@ -6,7 +6,8 @@
 
 class FamilyEvaluation:
     '''Result associated with a family after its evaluation. '''
-    def __init__(self, value, sat, policy):
+    def __init__(self, family, value, sat, policy):
+        self.family = family
         self.value = value
         self.sat = sat
         self.policy = policy
@@ -72,7 +73,11 @@ def evaluate_all(self, family, prop, keep_value_only=False):
         ''' to be overridden '''
         pass
 
-    def evaluate(self, family=None, prop=None, keep_value_only=False, print_stats=True):
+    def export_evaluation_result(self, evaluations, export_filename_base):
+        ''' to be overridden '''
+        pass
+
+    def evaluate(self, family=None, prop=None, keep_value_only=False, print_stats=True, export_filename_base=None):
         '''
         Evaluate each member of the family wrt the given property.
         :param family if None, then the design space of the quotient will be used
@@ -80,6 +85,7 @@ def evaluate(self, family=None, prop=None, keep_value_only=False, print_stats=Tr
             (assuming single-property specification)
         :param keep_value_only if True, only value will be associated with the family
         :param print_stats if True, synthesis statistic will be printed
+        :param export_filename_base base filename used to export the evaluation results
         :returns a list of (family,evaluation) pairs
         '''
         if family is None:
@@ -89,13 +95,17 @@ def evaluate(self, family=None, prop=None, keep_value_only=False, print_stats=Tr
 
         logger.info("evaluation initiated, design space: {}".format(family.size))
         self.stat.start(family)
-        family_to_evaluation = self.evaluate_all(family, prop, keep_value_only)
-        self.stat.finished_evaluation(family_to_evaluation)
+        evaluations = self.evaluate_all(family, prop, keep_value_only)
+        self.stat.finished_evaluation(evaluations)
         logger.info("evaluation finished")
 
+        if export_filename_base is not None:
+            self.export_evaluation_result(evaluations, export_filename_base)
+
         if print_stats:
             self.stat.print()
-        return family_to_evaluation
+        
+        return evaluations
 
     
     def synthesize_one(self, family):
@@ -137,8 +147,8 @@ def synthesize(self, family=None, optimum_threshold=None, return_all=False, prin
         return assignment
 
     
-    def run(self, optimum_threshold=None):
+    def run(self, optimum_threshold=None, export_evaluation=None):
         if isinstance(self.quotient, paynt.quotient.mdp_family.MdpFamilyQuotient):
-            self.evaluate()
+            self.evaluate(export_filename_base=export_evaluation)
         else:
             self.synthesize(optimum_threshold=optimum_threshold)
diff --git a/paynt/synthesizer/synthesizer_onebyone.py b/paynt/synthesizer/synthesizer_onebyone.py
index 37b7ca189..1c271278c 100644
--- a/paynt/synthesizer/synthesizer_onebyone.py
+++ b/paynt/synthesizer/synthesizer_onebyone.py
@@ -32,7 +32,7 @@ def synthesize_one(self, family):
 
     def evaluate_all(self, family, prop, keep_value_only=False):
 
-        family_to_evaluation = []
+        evaluations = []
         for hole_combination in family.all_combinations():
             assignment = family.construct_assignment(hole_combination)
             model = self.quotient.build_assignment(assignment)
@@ -44,7 +44,24 @@ def evaluate_all(self, family, prop, keep_value_only=False):
                 policy = None
                 if result.sat:
                     policy = self.quotient.scheduler_to_policy(result.result.scheduler, model)
-                evaluation = paynt.synthesizer.synthesizer.FamilyEvaluation(result.value, result.sat, policy)
-            family_to_evaluation.append( (assignment,evaluation) )
+                evaluation = paynt.synthesizer.synthesizer.FamilyEvaluation(assignment, result.value, result.sat, policy)
+            evaluations.append(evaluation)
             self.explore(assignment)
-        return family_to_evaluation
+        return evaluations
+
+    def export_evaluation_result(self, evaluations, export_filename_base):
+        import json
+        family_to_evaluation_parsed = []
+        for evaluation in evaluations:
+            family = evaluation.family
+            policy = evaluation.policy
+            if policy is None:
+                policy = "UNSAT"
+            else:
+                policy = self.quotient.policy_to_state_valuation_actions(policy)
+            family_to_evaluation_parsed.append( (str(family),policy) )
+        policies_string = json.dumps(family_to_evaluation_parsed, indent=2)
+        policies_filename = export_filename_base + ".json"
+        with open(policies_filename, 'w') as file:
+            file.write(policies_string)
+        logger.info(f"exported satisfied members and correponding policies to {policies_filename}")
diff --git a/paynt/synthesizer/synthesizer_pomdp.py b/paynt/synthesizer/synthesizer_pomdp.py
index 5a0b3a143..90456c4d9 100644
--- a/paynt/synthesizer/synthesizer_pomdp.py
+++ b/paynt/synthesizer/synthesizer_pomdp.py
@@ -622,7 +622,7 @@ def strategy_expected_uai(self):
                 
 
 
-    def run(self, optimum_threshold=None):
+    def run(self, optimum_threshold=None, export_evaluation=None):
         # choose the synthesis strategy:
         if self.use_storm:
             logger.info("Storm POMDP option enabled")