Skip to content

Commit

Permalink
unfold randomized posterior-aware FSC into a POMDP sketch
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Sep 5, 2024
1 parent 56f6a0f commit e555499
Show file tree
Hide file tree
Showing 12 changed files with 352 additions and 425 deletions.
1 change: 1 addition & 0 deletions models/pomdp/sketches/avoid-discounted/sketch.props
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
R{"penalty"}min=? [C{0.99}]
114 changes: 114 additions & 0 deletions models/pomdp/sketches/avoid-discounted/sketch.templ
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
pomdp

// grid dimensions
const int N=5;
const int xMIN = 0;
const int yMIN = 0;
const int xMAX = N-1;
const int yMAX = N-1;

formula goal = (x=xMAX) & (y=yMAX);

observable "clk" = clk;
observable "goal" = goal;
observable "see" = see;

// synchronization
formula clk_next = mod(clk+1,4);
module clk
clk : [-1..3] init -1;

[place] !goal & clk=-1 -> (clk'=clk_next);
[left] !goal & clk=0 -> (clk'=clk_next);
[right] !goal & clk=0 -> (clk'=clk_next);
[down] !goal & clk=0 -> (clk'=clk_next);
[up] !goal & clk=0 -> (clk'=clk_next);
[wait] !goal & clk=0 -> (clk'=clk_next);

[o] !goal & clk=1 -> (clk'=clk_next);
[detect1] !goal & clk=2 -> (clk'=clk_next);
[detect2] !goal & clk=3 -> (clk'=clk_next);
endmodule
// agent moving towards the exit
const double slip = 0.0;
module agent
x : [xMIN..xMAX] init xMIN;
y : [yMIN..yMAX] init yMIN;
[left] true -> (1-slip): (x'=max(x-1,xMIN)) + slip: true;
[right] true -> (1-slip): (x'=min(x+1,xMAX)) + slip: true;
[down] true -> (1-slip): (y'=max(y-1,yMIN)) + slip: true;
[up] true -> (1-slip): (y'=min(y+1,yMAX)) + slip: true;
[wait] true -> true;
endmodule
// obstacles oscillating on the x-axis
hole int o1x_init in {0,1,2,3,4};
hole int o2x_init in {0,1,2,3,4};
hole int goright1_init in {0,1};
hole int goright2_init in {0,1};
hole int o1y in {1,2,3,4};
hole int o2y in {1,2,3,4};
module obstacle1
o1x : [xMIN..xMAX] init xMIN;
goright1 : bool init true;
[place] true -> (o1x'=o1x_init) & (goright1'=goright1_init=1);
[o] goright1 & o1x < xMAX -> 1/2: (o1x'=min(xMAX,o1x+1)) + 1/2: true;
[o] goright1 & o1x = xMAX -> (goright1'=false);
[o] !goright1 & o1x > xMIN -> 1/2: (o1x'=max(xMIN,o1x-1)) + 1/2: true;
[o] !goright1 & o1x = xMIN -> (goright1'=true);
endmodule
module obstacle2=obstacle1[o1x=o2x,goright1=goright2,o1x_init=o2x_init,goright1_init=goright2_init] endmodule
// obstacle detection
const int RADIUS = 1;
formula see1 = (x-o1x<=RADIUS & o1x-x<=RADIUS) & (y-o1y<=RADIUS & o1y-y<=RADIUS);
formula see2 = (x-o2x<=RADIUS & o2x-x<=RADIUS) & (y-o2y<=RADIUS & o2y-y<=RADIUS);
module scanner
see: bool init false;
[detect1] true -> (see'=see1);
[detect2] true -> (see'=see2);
endmodule
// crash detection
formula at1 = x=o1x & y=o1y;
formula at2 = x=o2x & y=o2y;
module crash1
crash1 : bool init false;
[detect1] true -> (crash1'=at1);

[up] true -> (crash1'=false);
[down] true -> (crash1'=false);
[left] true -> (crash1'=false);
[right] true -> (crash1'=false);
[wait] true -> (crash1'=false);
endmodule
module crash2=crash1[crash1=crash2,detect1=detect2,at1=at2] endmodule
formula step_penalty = 1;
formula crash_penalty = 100;
formula num_crashes = (crash1?1:0)+(crash2?1:0);
formula penalty = step_penalty + num_crashes*crash_penalty;
rewards "penalty"
[up] true : penalty;
[down] true : penalty;
[left] true : penalty;
[right] true : penalty;
[wait] true : penalty;
endrewards
2 changes: 1 addition & 1 deletion paynt/quotient/fsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def fill_zero_updates(self):
self.update_function[node] = [0] * self.num_observations

# this fixes FSCs contructed from not fully unfolded quotients
# careful this can be used only when the current state of the FSC is correct
# this can only be used when the current state of the FSC is correct
def fill_implicit_actions_and_updates(self):
for node in range(self.num_nodes):
for obs in range(self.num_observations):
Expand Down
37 changes: 20 additions & 17 deletions paynt/quotient/pomdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, quotient_mdp, family, coloring, specification, obs_evaluator)
# for each observation, a list of actions (indices) available
self.observation_to_actions = None
# POMDP manager used for unfolding the memory model into the quotient POMDP
self.product_pomdp_fsc = None
self.fsc_unfolder = None

# identify actions available at each observation
self.observation_to_actions = [None] * self.num_observations
Expand All @@ -73,15 +73,6 @@ def observation_is_trivial(self, obs):
return len(self.observation_to_actions[obs])==1


def initialize_fsc_unfolder(self, fsc_is_deterministic=False):
if fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpFsc):
self.product_pomdp_fsc = payntbind.synthesis.ProductPomdpFsc(
self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action)
if not fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpRandomizedFsc):
self.product_pomdp_fsc = payntbind.synthesis.ProductPomdpRandomizedFsc(
self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action)


def build_pomdp(self, family):
''' Construct the sub-POMDP from the given hole assignment. '''
assert family.size == 1, "expecting family of size 1"
Expand All @@ -92,17 +83,21 @@ def build_pomdp(self, family):
return SubPomdp(pomdp,self,state_map,choice_map)


def build_dtmc_sketch(self, fsc):
def build_dtmc_sketch(self, fsc, negate_specification=True):
'''
Construct the family of DTMCs representing the execution of the given FSC in different environments.
'''

# create the product
fsc.check(self.observation_to_actions)
self.initialize_fsc_unfolder(fsc.is_deterministic)
self.product_pomdp_fsc.apply_fsc(fsc.action_function, fsc.update_function)
product = self.product_pomdp_fsc.product
product_choice_to_choice = self.product_pomdp_fsc.product_choice_to_choice
fsc.check_action_function(self.observation_to_actions)


self.fsc_unfolder = payntbind.synthesis.FscUnfolder(
self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action
)
self.fsc_unfolder.apply_fsc(fsc.action_function, fsc.update_function)
product = self.fsc_unfolder.product
product_choice_to_choice = self.fsc_unfolder.product_choice_to_choice

# the product inherits the design space
product_family = self.family.copy()
Expand All @@ -120,12 +115,20 @@ def build_dtmc_sketch(self, fsc):
product_choice_to_hole_options.append(hole_options)
product_coloring = payntbind.synthesis.Coloring(product_family.family, product.nondeterministic_choice_indices, product_choice_to_hole_options)

# handle specification
# copy the specification
product_specification = self.specification.copy()
if negate_specification:
product_specification = product_specification.negate()

dtmc_sketch = paynt.quotient.quotient.Quotient(product, product_family, product_coloring, product_specification)
return dtmc_sketch





### LEGACY CODE, NOT UP-TO-DATE ###

def compute_qvalues_for_product_submdp(self, product_submdp : paynt.models.models.SubMdp):
'''
Given an MDP obtained after applying FSC to a family of POMDPs, compute for each state s, (reachable)
Expand Down
180 changes: 180 additions & 0 deletions payntbind/src/synthesis/pomdp_family/FscUnfolder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#include "FscUnfolder.h"

#include "src/synthesis/translation/componentTranslations.h"

namespace synthesis {


template<typename ValueType>
FscUnfolder<ValueType>::FscUnfolder(
storm::models::sparse::Model<ValueType> const& quotient,
std::vector<uint32_t> const& state_to_obs_class,
uint64_t num_actions,
std::vector<uint64_t> const& choice_to_action
) : quotient(quotient), state_to_obs_class(state_to_obs_class),
num_actions(num_actions), choice_to_action(choice_to_action) {

this->state_translator = ItemKeyTranslator<std::tuple<uint64_t,uint64_t,bool>>();
this->state_action_choices.resize(this->quotient.getNumberOfStates());
std::vector<uint64_t> const& row_groups = this->quotient.getTransitionMatrix().getRowGroupIndices();
for(uint64_t state = 0; state < this->quotient.getNumberOfStates(); ++state) {
this->state_action_choices[state].resize(this->num_actions);
for (uint64_t choice = row_groups[state]; choice < row_groups[state+1]; ++choice) {
uint64_t action = this->choice_to_action[choice];
this->state_action_choices[state][action].insert(choice);
}
}
}

template<typename ValueType>
uint64_t FscUnfolder<ValueType>::invalidAction() {
return this->num_actions;
}

template<typename ValueType>
uint64_t FscUnfolder<ValueType>::invalidChoice() {
return this->quotient.getNumberOfChoices();
}

template<typename ValueType>
uint64_t FscUnfolder<ValueType>::numberOfTranslatedStates() {
return this->state_translator.numTranslations();
}

template<typename ValueType>
uint64_t FscUnfolder<ValueType>::numberOfTranslatedChoices() {
return this->product_choice_to_choice.size();
}

template<typename ValueType>
uint64_t FscUnfolder<ValueType>::translateInitialState() {
uint64_t initial_state = *(this->quotient.getInitialStates().begin());
uint64_t initial_memory = 0;
return this->state_translator.translate(initial_state,std::make_tuple(initial_memory,invalidAction(),false));
}


template<typename ValueType>
void FscUnfolder<ValueType>::buildStateSpace(
std::vector<std::vector<std::map<uint64_t,double>>> action_function,
std::vector<std::vector<std::map<uint64_t,double>>> update_function
) {
this->state_translator.resize(this->quotient.getNumberOfStates());
uint64_t translated_state = this->translateInitialState();
while(true) {
auto[state,memory_action_transitioned] = this->state_translator.retrieve(translated_state);
auto[memory,action,transitioned] = memory_action_transitioned;
uint64_t observation = this->state_to_obs_class[state];
if(action == invalidAction() and not transitioned) {
// random choice of an action
for(auto [action,prob] : action_function[memory][observation]) {
this->state_translator.translate(state,std::make_tuple(memory,action,false));
}
} else if(action != invalidAction()) {
// executing variants of the selected actions
for(uint64_t choice: this->state_action_choices[state][action]) {
for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) {
uint64_t state_dst = entry.getColumn();
this->state_translator.translate(state_dst,std::make_tuple(memory,invalidAction(),true));
}
}
} else { // action == invalidAction() and transitioned
// executing memory update
for(auto [memory_dst,prob] : update_function[memory][observation]) {
this->state_translator.translate(state,std::make_tuple(memory_dst,invalidAction(),false));
}
}
translated_state++;
if(translated_state >= numberOfTranslatedStates()) {
break;
}
}

this->product_state_to_state = this->state_translator.translationToItem();
this->product_state_to_state_memory_action_transitioned = this->state_translator.translationToItemKey();
}

template<typename ValueType>
storm::storage::SparseMatrix<ValueType> FscUnfolder<ValueType>::buildTransitionMatrix(
std::vector<std::vector<std::map<uint64_t,double>>> action_function,
std::vector<std::vector<std::map<uint64_t,double>>> update_function
) {
this->product_choice_to_choice.clear();
storm::storage::SparseMatrixBuilder<ValueType> builder(0, 0, 0, false, true, 0);
for(uint64_t translated_state = 0; translated_state < numberOfTranslatedStates(); ++translated_state) {
builder.newRowGroup(numberOfTranslatedChoices());
auto[state,memory_action_transitioned] = this->state_translator.retrieve(translated_state);
auto[memory,action,transitioned] = memory_action_transitioned;
uint64_t observation = this->state_to_obs_class[state];
if(action == invalidAction() and not transitioned) {
// random choice of an action
uint64_t product_choice = numberOfTranslatedChoices();
this->product_choice_to_choice.push_back(invalidChoice());
for(auto [action,prob] : action_function[memory][observation]) {
uint64_t translated_dst = this->state_translator.translate(state,std::make_tuple(memory,action,false));
builder.addNextValue(product_choice, translated_dst, prob);
}
} else if(action != invalidAction()) {
// executing variants of the selected actions
for(uint64_t choice: this->state_action_choices[state][action]) {
uint64_t product_choice = numberOfTranslatedChoices();
this->product_choice_to_choice.push_back(choice);
for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) {
uint64_t state_dst = entry.getColumn();
uint64_t translated_dst = this->state_translator.translate(state_dst,std::make_tuple(memory,invalidAction(),true));
builder.addNextValue(product_choice, translated_dst, entry.getValue());
}
}
} else { // action == invalidAction() and transitioned
// executing memory update
uint64_t product_choice = numberOfTranslatedChoices();
this->product_choice_to_choice.push_back(invalidChoice());
for(auto [memory_dst,prob] : update_function[memory][observation]) {
uint64_t translated_dst = this->state_translator.translate(state,std::make_tuple(memory_dst,invalidAction(),false));
builder.addNextValue(product_choice, translated_dst, prob);
}
}
}

return builder.build();
}


template<typename ValueType>
void FscUnfolder<ValueType>::applyFsc(
std::vector<std::vector<std::map<uint64_t,double>>> action_function,
std::vector<std::vector<std::map<uint64_t,double>>> update_function
) {
this->buildStateSpace(action_function,update_function);
storm::storage::sparse::ModelComponents<ValueType> components;
auto translated_initial_state = this->translateInitialState();
components.stateLabeling = synthesis::translateStateLabeling(
this->quotient,this->state_translator.translationToItem(),translated_initial_state
);

components.transitionMatrix = this->buildTransitionMatrix(action_function,update_function);
storm::storage::BitVector translated_choice_mask(numberOfTranslatedChoices(),true);
auto quotient_num_choices = this->quotient.getNumberOfChoices();
for(uint64_t translated_choice = 0; translated_choice<numberOfTranslatedChoices(); ++translated_choice) {
if(this->product_choice_to_choice[translated_choice]==quotient_num_choices) {
translated_choice_mask.set(translated_choice,false);
}
}
components.choiceLabeling = synthesis::translateChoiceLabeling(this->quotient,this->product_choice_to_choice,translated_choice_mask);
for (auto const& reward_model : this->quotient.getRewardModels()) {
auto new_reward_model = synthesis::translateRewardModel(reward_model.second,this->product_choice_to_choice,translated_choice_mask);
components.rewardModels.emplace(reward_model.first, new_reward_model);
}

this->clearMemory();
this->product = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(components));
}

template<typename ValueType>
void FscUnfolder<ValueType>::clearMemory() {
this->state_translator.clear();
}


template class FscUnfolder<double>;
}
Loading

0 comments on commit e555499

Please sign in to comment.