unfold randomized posterior-aware FSC into a POMDP sketch

randriu · Sep 5, 2024 · e555499 · e555499
1 parent 56f6a0f
commit e555499
Show file tree

Hide file tree

Showing 12 changed files with 352 additions and 425 deletions.
diff --git a/models/pomdp/sketches/avoid-discounted/sketch.props b/models/pomdp/sketches/avoid-discounted/sketch.props
@@ -0,0 +1 @@
+R{"penalty"}min=? [C{0.99}]
diff --git a/models/pomdp/sketches/avoid-discounted/sketch.templ b/models/pomdp/sketches/avoid-discounted/sketch.templ
@@ -0,0 +1,114 @@
+pomdp
+
+// grid dimensions
+const int N=5;
+const int xMIN = 0;
+const int yMIN = 0;
+const int xMAX = N-1;
+const int yMAX = N-1;
+
+formula goal = (x=xMAX) & (y=yMAX);
+
+observable "clk" = clk;
+observable "goal" = goal;
+observable "see" = see;
+
+// synchronization
+formula clk_next = mod(clk+1,4);
+module clk
+    clk : [-1..3] init -1;
+
+    [place] !goal & clk=-1 -> (clk'=clk_next);
+
+    [left]  !goal & clk=0 -> (clk'=clk_next);
+    [right] !goal & clk=0 -> (clk'=clk_next);
+    [down]  !goal & clk=0 -> (clk'=clk_next);
+    [up]    !goal & clk=0 -> (clk'=clk_next);
+    [wait]  !goal & clk=0 -> (clk'=clk_next);
+
+    [o]     !goal & clk=1 -> (clk'=clk_next);
+    
+    [detect1] !goal & clk=2 -> (clk'=clk_next);
+    [detect2] !goal & clk=3 -> (clk'=clk_next);
+endmodule
+
+
+// agent moving towards the exit
+const double slip = 0.0;
+module agent
+    x : [xMIN..xMAX] init xMIN;
+    y : [yMIN..yMAX] init yMIN;
+
+    [left]  true -> (1-slip): (x'=max(x-1,xMIN)) + slip: true;
+    [right] true -> (1-slip): (x'=min(x+1,xMAX)) + slip: true;
+    [down]  true -> (1-slip): (y'=max(y-1,yMIN)) + slip: true;
+    [up]    true -> (1-slip): (y'=min(y+1,yMAX)) + slip: true;
+    [wait]  true -> true;
+endmodule
+
+
+// obstacles oscillating on the x-axis
+hole int o1x_init in {0,1,2,3,4};
+hole int o2x_init in {0,1,2,3,4};
+
+hole int goright1_init in {0,1};
+hole int goright2_init in {0,1};
+
+hole int o1y in {1,2,3,4};
+hole int o2y in {1,2,3,4};
+
+module obstacle1
+    o1x : [xMIN..xMAX] init xMIN;
+    goright1 : bool init true;
+
+    [place] true -> (o1x'=o1x_init) & (goright1'=goright1_init=1);
+
+    [o]  goright1 & o1x < xMAX -> 1/2: (o1x'=min(xMAX,o1x+1)) + 1/2: true;
+    [o]  goright1 & o1x = xMAX -> (goright1'=false);
+
+    [o] !goright1 & o1x > xMIN -> 1/2: (o1x'=max(xMIN,o1x-1)) + 1/2: true;
+    [o] !goright1 & o1x = xMIN -> (goright1'=true);
+endmodule
+
+module obstacle2=obstacle1[o1x=o2x,goright1=goright2,o1x_init=o2x_init,goright1_init=goright2_init] endmodule
+
+
+// obstacle detection
+const int RADIUS = 1;
+formula see1 = (x-o1x<=RADIUS & o1x-x<=RADIUS) & (y-o1y<=RADIUS & o1y-y<=RADIUS);
+formula see2 = (x-o2x<=RADIUS & o2x-x<=RADIUS) & (y-o2y<=RADIUS & o2y-y<=RADIUS);
+module scanner
+    see: bool init false;
+    [detect1]   true -> (see'=see1);
+    [detect2]   true -> (see'=see2);
+endmodule
+
+// crash detection
+formula at1 = x=o1x & y=o1y;
+formula at2 = x=o2x & y=o2y;
+
+module crash1
+    crash1 : bool init false;
+    [detect1] true -> (crash1'=at1);
+
+    [up]    true -> (crash1'=false);
+    [down]  true -> (crash1'=false);
+    [left]  true -> (crash1'=false);
+    [right] true -> (crash1'=false);
+    [wait]  true -> (crash1'=false);
+endmodule
+
+module crash2=crash1[crash1=crash2,detect1=detect2,at1=at2] endmodule
+
+formula step_penalty = 1;
+formula crash_penalty = 100;
+formula num_crashes = (crash1?1:0)+(crash2?1:0);
+formula penalty = step_penalty + num_crashes*crash_penalty;
+
+rewards "penalty"
+    [up]    true : penalty;
+    [down]  true : penalty;
+    [left]  true : penalty;
+    [right] true : penalty;
+    [wait]  true : penalty;
+endrewards
diff --git a/paynt/quotient/fsc.py b/paynt/quotient/fsc.py
@@ -112,7 +112,7 @@ def fill_zero_updates(self):
             self.update_function[node] = [0] * self.num_observations
 
     # this fixes FSCs contructed from not fully unfolded quotients
-    # careful this can be used only when the current state of the FSC is correct
+    # this can only be used when the current state of the FSC is correct
     def fill_implicit_actions_and_updates(self):
         for node in range(self.num_nodes):
             for obs in range(self.num_observations):

diff --git a/paynt/quotient/pomdp_family.py b/paynt/quotient/pomdp_family.py
@@ -47,7 +47,7 @@ def __init__(self, quotient_mdp, family, coloring, specification, obs_evaluator)
         # for each observation, a list of actions (indices) available
         self.observation_to_actions = None
         # POMDP manager used for unfolding the memory model into the quotient POMDP
-        self.product_pomdp_fsc = None
+        self.fsc_unfolder = None
 
         # identify actions available at each observation
         self.observation_to_actions = [None] * self.num_observations
@@ -73,15 +73,6 @@ def observation_is_trivial(self, obs):
         return len(self.observation_to_actions[obs])==1
 
 
-    def initialize_fsc_unfolder(self, fsc_is_deterministic=False):
-        if fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpFsc):
-            self.product_pomdp_fsc = payntbind.synthesis.ProductPomdpFsc(
-                self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action)
-        if not fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpRandomizedFsc):
-            self.product_pomdp_fsc = payntbind.synthesis.ProductPomdpRandomizedFsc(
-                self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action)
-
-
     def build_pomdp(self, family):
         ''' Construct the sub-POMDP from the given hole assignment. '''
         assert family.size == 1, "expecting family of size 1"
@@ -92,17 +83,21 @@ def build_pomdp(self, family):
         return SubPomdp(pomdp,self,state_map,choice_map)
 
 
-    def build_dtmc_sketch(self, fsc):
+    def build_dtmc_sketch(self, fsc, negate_specification=True):
         '''
         Construct the family of DTMCs representing the execution of the given FSC in different environments.
         '''
 
         # create the product
-        fsc.check(self.observation_to_actions)
-        self.initialize_fsc_unfolder(fsc.is_deterministic)
-        self.product_pomdp_fsc.apply_fsc(fsc.action_function, fsc.update_function)
-        product = self.product_pomdp_fsc.product
-        product_choice_to_choice = self.product_pomdp_fsc.product_choice_to_choice
+        fsc.check_action_function(self.observation_to_actions)
+
+
+        self.fsc_unfolder = payntbind.synthesis.FscUnfolder(
+            self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action
+        )
+        self.fsc_unfolder.apply_fsc(fsc.action_function, fsc.update_function)
+        product = self.fsc_unfolder.product
+        product_choice_to_choice = self.fsc_unfolder.product_choice_to_choice
 
         # the product inherits the design space
         product_family = self.family.copy()
@@ -120,12 +115,20 @@ def build_dtmc_sketch(self, fsc):
             product_choice_to_hole_options.append(hole_options)
         product_coloring = payntbind.synthesis.Coloring(product_family.family, product.nondeterministic_choice_indices, product_choice_to_hole_options)
 
-        # handle specification
+        # copy the specification
         product_specification = self.specification.copy()
+        if negate_specification:
+            product_specification = product_specification.negate()
+
         dtmc_sketch = paynt.quotient.quotient.Quotient(product, product_family, product_coloring, product_specification)
         return dtmc_sketch
 
 
+
+
+
+### LEGACY CODE, NOT UP-TO-DATE ###
+
     def compute_qvalues_for_product_submdp(self, product_submdp : paynt.models.models.SubMdp):
         '''
         Given an MDP obtained after applying FSC to a family of POMDPs, compute for each state s, (reachable)

diff --git a/payntbind/src/synthesis/pomdp_family/FscUnfolder.cpp b/payntbind/src/synthesis/pomdp_family/FscUnfolder.cpp
@@ -0,0 +1,180 @@
+#include "FscUnfolder.h"
+
+#include "src/synthesis/translation/componentTranslations.h"
+
+namespace synthesis {
+
+
+    template<typename ValueType>
+    FscUnfolder<ValueType>::FscUnfolder(
+        storm::models::sparse::Model<ValueType> const& quotient,
+        std::vector<uint32_t> const& state_to_obs_class,
+        uint64_t num_actions,
+        std::vector<uint64_t> const& choice_to_action
+    ) : quotient(quotient), state_to_obs_class(state_to_obs_class),
+        num_actions(num_actions), choice_to_action(choice_to_action) {
+
+        this->state_translator = ItemKeyTranslator<std::tuple<uint64_t,uint64_t,bool>>();
+        this->state_action_choices.resize(this->quotient.getNumberOfStates());
+        std::vector<uint64_t> const& row_groups = this->quotient.getTransitionMatrix().getRowGroupIndices();
+        for(uint64_t state = 0; state < this->quotient.getNumberOfStates(); ++state) {
+            this->state_action_choices[state].resize(this->num_actions);
+            for (uint64_t choice = row_groups[state]; choice < row_groups[state+1]; ++choice) {
+                uint64_t action = this->choice_to_action[choice];
+                this->state_action_choices[state][action].insert(choice);
+            }
+        }
+    }
+
+    template<typename ValueType>
+    uint64_t FscUnfolder<ValueType>::invalidAction() {
+        return this->num_actions;
+    }
+
+    template<typename ValueType>
+    uint64_t FscUnfolder<ValueType>::invalidChoice() {
+        return this->quotient.getNumberOfChoices();
+    }
+
+    template<typename ValueType>
+    uint64_t FscUnfolder<ValueType>::numberOfTranslatedStates() {
+        return this->state_translator.numTranslations();
+    }
+
+    template<typename ValueType>
+    uint64_t FscUnfolder<ValueType>::numberOfTranslatedChoices() {
+        return this->product_choice_to_choice.size();
+    }
+
+    template<typename ValueType>
+    uint64_t FscUnfolder<ValueType>::translateInitialState() {
+        uint64_t initial_state = *(this->quotient.getInitialStates().begin());
+        uint64_t initial_memory = 0;
+        return this->state_translator.translate(initial_state,std::make_tuple(initial_memory,invalidAction(),false));
+    }
+
+
+    template<typename ValueType>
+    void FscUnfolder<ValueType>::buildStateSpace(
+        std::vector<std::vector<std::map<uint64_t,double>>> action_function,
+        std::vector<std::vector<std::map<uint64_t,double>>> update_function
+    ) {
+        this->state_translator.resize(this->quotient.getNumberOfStates());
+        uint64_t translated_state = this->translateInitialState();
+        while(true) {
+            auto[state,memory_action_transitioned] = this->state_translator.retrieve(translated_state);
+            auto[memory,action,transitioned] = memory_action_transitioned;
+            uint64_t observation = this->state_to_obs_class[state];
+            if(action == invalidAction() and not transitioned) {
+                // random choice of an action
+                for(auto [action,prob] : action_function[memory][observation]) {
+                    this->state_translator.translate(state,std::make_tuple(memory,action,false));
+                }
+            } else if(action != invalidAction()) {
+                // executing variants of the selected actions
+                for(uint64_t choice: this->state_action_choices[state][action]) {
+                    for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) {
+                        uint64_t state_dst = entry.getColumn();
+                        this->state_translator.translate(state_dst,std::make_tuple(memory,invalidAction(),true));
+                    }
+                }
+            } else { // action == invalidAction() and transitioned
+                // executing memory update
+                for(auto [memory_dst,prob] : update_function[memory][observation]) {
+                    this->state_translator.translate(state,std::make_tuple(memory_dst,invalidAction(),false));
+                }
+            }
+            translated_state++;
+            if(translated_state >= numberOfTranslatedStates()) {
+                break;
+            }
+        }
+
+        this->product_state_to_state = this->state_translator.translationToItem();
+        this->product_state_to_state_memory_action_transitioned = this->state_translator.translationToItemKey();
+    }
+
+    template<typename ValueType>
+    storm::storage::SparseMatrix<ValueType> FscUnfolder<ValueType>::buildTransitionMatrix(
+        std::vector<std::vector<std::map<uint64_t,double>>> action_function,
+        std::vector<std::vector<std::map<uint64_t,double>>> update_function
+    ) {
+        this->product_choice_to_choice.clear();
+        storm::storage::SparseMatrixBuilder<ValueType> builder(0, 0, 0, false, true, 0);
+        for(uint64_t translated_state = 0; translated_state < numberOfTranslatedStates(); ++translated_state) {
+            builder.newRowGroup(numberOfTranslatedChoices());
+            auto[state,memory_action_transitioned] = this->state_translator.retrieve(translated_state);
+            auto[memory,action,transitioned] = memory_action_transitioned;
+            uint64_t observation = this->state_to_obs_class[state];
+            if(action == invalidAction() and not transitioned) {
+                // random choice of an action
+                uint64_t product_choice = numberOfTranslatedChoices();
+                this->product_choice_to_choice.push_back(invalidChoice());
+                for(auto [action,prob] : action_function[memory][observation]) {
+                    uint64_t translated_dst = this->state_translator.translate(state,std::make_tuple(memory,action,false));
+                    builder.addNextValue(product_choice, translated_dst, prob);
+                }
+            } else if(action != invalidAction()) {
+                // executing variants of the selected actions
+                for(uint64_t choice: this->state_action_choices[state][action]) {
+                    uint64_t product_choice = numberOfTranslatedChoices();
+                    this->product_choice_to_choice.push_back(choice);
+                    for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) {
+                        uint64_t state_dst = entry.getColumn();
+                        uint64_t translated_dst = this->state_translator.translate(state_dst,std::make_tuple(memory,invalidAction(),true));
+                        builder.addNextValue(product_choice, translated_dst, entry.getValue());
+                    }
+                }
+            } else { // action == invalidAction() and transitioned
+                // executing memory update
+                uint64_t product_choice = numberOfTranslatedChoices();
+                this->product_choice_to_choice.push_back(invalidChoice());
+                for(auto [memory_dst,prob] : update_function[memory][observation]) {
+                    uint64_t translated_dst = this->state_translator.translate(state,std::make_tuple(memory_dst,invalidAction(),false));
+                    builder.addNextValue(product_choice, translated_dst, prob);
+                }
+            }
+        }
+
+        return builder.build();
+    }
+
+
+    template<typename ValueType>
+    void FscUnfolder<ValueType>::applyFsc(
+        std::vector<std::vector<std::map<uint64_t,double>>> action_function,
+        std::vector<std::vector<std::map<uint64_t,double>>> update_function
+    ) {
+        this->buildStateSpace(action_function,update_function);
+        storm::storage::sparse::ModelComponents<ValueType> components;
+        auto translated_initial_state = this->translateInitialState();
+        components.stateLabeling = synthesis::translateStateLabeling(
+            this->quotient,this->state_translator.translationToItem(),translated_initial_state
+        );
+
+        components.transitionMatrix = this->buildTransitionMatrix(action_function,update_function);
+        storm::storage::BitVector translated_choice_mask(numberOfTranslatedChoices(),true);
+        auto quotient_num_choices = this->quotient.getNumberOfChoices();
+        for(uint64_t translated_choice = 0; translated_choice<numberOfTranslatedChoices(); ++translated_choice) {
+            if(this->product_choice_to_choice[translated_choice]==quotient_num_choices) {
+                translated_choice_mask.set(translated_choice,false);
+            }
+        }
+        components.choiceLabeling = synthesis::translateChoiceLabeling(this->quotient,this->product_choice_to_choice,translated_choice_mask);
+        for (auto const& reward_model : this->quotient.getRewardModels()) {
+            auto new_reward_model = synthesis::translateRewardModel(reward_model.second,this->product_choice_to_choice,translated_choice_mask);
+            components.rewardModels.emplace(reward_model.first, new_reward_model);
+        }
+
+        this->clearMemory();
+        this->product = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(components));
+    }
+
+    template<typename ValueType>
+    void FscUnfolder<ValueType>::clearMemory() {
+        this->state_translator.clear();
+    }
+
+
+    template class FscUnfolder<double>;
+}