-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
unfold randomized posterior-aware FSC into a POMDP sketch
- Loading branch information
Roman Andriushchenko
committed
Sep 5, 2024
1 parent
56f6a0f
commit e555499
Showing
12 changed files
with
352 additions
and
425 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
R{"penalty"}min=? [C{0.99}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
pomdp | ||
|
||
// grid dimensions | ||
const int N=5; | ||
const int xMIN = 0; | ||
const int yMIN = 0; | ||
const int xMAX = N-1; | ||
const int yMAX = N-1; | ||
|
||
formula goal = (x=xMAX) & (y=yMAX); | ||
|
||
observable "clk" = clk; | ||
observable "goal" = goal; | ||
observable "see" = see; | ||
|
||
// synchronization | ||
formula clk_next = mod(clk+1,4); | ||
module clk | ||
clk : [-1..3] init -1; | ||
|
||
[place] !goal & clk=-1 -> (clk'=clk_next); | ||
[left] !goal & clk=0 -> (clk'=clk_next); | ||
[right] !goal & clk=0 -> (clk'=clk_next); | ||
[down] !goal & clk=0 -> (clk'=clk_next); | ||
[up] !goal & clk=0 -> (clk'=clk_next); | ||
[wait] !goal & clk=0 -> (clk'=clk_next); | ||
|
||
[o] !goal & clk=1 -> (clk'=clk_next); | ||
[detect1] !goal & clk=2 -> (clk'=clk_next); | ||
[detect2] !goal & clk=3 -> (clk'=clk_next); | ||
endmodule | ||
// agent moving towards the exit | ||
const double slip = 0.0; | ||
module agent | ||
x : [xMIN..xMAX] init xMIN; | ||
y : [yMIN..yMAX] init yMIN; | ||
[left] true -> (1-slip): (x'=max(x-1,xMIN)) + slip: true; | ||
[right] true -> (1-slip): (x'=min(x+1,xMAX)) + slip: true; | ||
[down] true -> (1-slip): (y'=max(y-1,yMIN)) + slip: true; | ||
[up] true -> (1-slip): (y'=min(y+1,yMAX)) + slip: true; | ||
[wait] true -> true; | ||
endmodule | ||
// obstacles oscillating on the x-axis | ||
hole int o1x_init in {0,1,2,3,4}; | ||
hole int o2x_init in {0,1,2,3,4}; | ||
hole int goright1_init in {0,1}; | ||
hole int goright2_init in {0,1}; | ||
hole int o1y in {1,2,3,4}; | ||
hole int o2y in {1,2,3,4}; | ||
module obstacle1 | ||
o1x : [xMIN..xMAX] init xMIN; | ||
goright1 : bool init true; | ||
[place] true -> (o1x'=o1x_init) & (goright1'=goright1_init=1); | ||
[o] goright1 & o1x < xMAX -> 1/2: (o1x'=min(xMAX,o1x+1)) + 1/2: true; | ||
[o] goright1 & o1x = xMAX -> (goright1'=false); | ||
[o] !goright1 & o1x > xMIN -> 1/2: (o1x'=max(xMIN,o1x-1)) + 1/2: true; | ||
[o] !goright1 & o1x = xMIN -> (goright1'=true); | ||
endmodule | ||
module obstacle2=obstacle1[o1x=o2x,goright1=goright2,o1x_init=o2x_init,goright1_init=goright2_init] endmodule | ||
// obstacle detection | ||
const int RADIUS = 1; | ||
formula see1 = (x-o1x<=RADIUS & o1x-x<=RADIUS) & (y-o1y<=RADIUS & o1y-y<=RADIUS); | ||
formula see2 = (x-o2x<=RADIUS & o2x-x<=RADIUS) & (y-o2y<=RADIUS & o2y-y<=RADIUS); | ||
module scanner | ||
see: bool init false; | ||
[detect1] true -> (see'=see1); | ||
[detect2] true -> (see'=see2); | ||
endmodule | ||
// crash detection | ||
formula at1 = x=o1x & y=o1y; | ||
formula at2 = x=o2x & y=o2y; | ||
module crash1 | ||
crash1 : bool init false; | ||
[detect1] true -> (crash1'=at1); | ||
|
||
[up] true -> (crash1'=false); | ||
[down] true -> (crash1'=false); | ||
[left] true -> (crash1'=false); | ||
[right] true -> (crash1'=false); | ||
[wait] true -> (crash1'=false); | ||
endmodule | ||
module crash2=crash1[crash1=crash2,detect1=detect2,at1=at2] endmodule | ||
formula step_penalty = 1; | ||
formula crash_penalty = 100; | ||
formula num_crashes = (crash1?1:0)+(crash2?1:0); | ||
formula penalty = step_penalty + num_crashes*crash_penalty; | ||
rewards "penalty" | ||
[up] true : penalty; | ||
[down] true : penalty; | ||
[left] true : penalty; | ||
[right] true : penalty; | ||
[wait] true : penalty; | ||
endrewards |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
#include "FscUnfolder.h" | ||
|
||
#include "src/synthesis/translation/componentTranslations.h" | ||
|
||
namespace synthesis { | ||
|
||
|
||
template<typename ValueType> | ||
FscUnfolder<ValueType>::FscUnfolder( | ||
storm::models::sparse::Model<ValueType> const& quotient, | ||
std::vector<uint32_t> const& state_to_obs_class, | ||
uint64_t num_actions, | ||
std::vector<uint64_t> const& choice_to_action | ||
) : quotient(quotient), state_to_obs_class(state_to_obs_class), | ||
num_actions(num_actions), choice_to_action(choice_to_action) { | ||
|
||
this->state_translator = ItemKeyTranslator<std::tuple<uint64_t,uint64_t,bool>>(); | ||
this->state_action_choices.resize(this->quotient.getNumberOfStates()); | ||
std::vector<uint64_t> const& row_groups = this->quotient.getTransitionMatrix().getRowGroupIndices(); | ||
for(uint64_t state = 0; state < this->quotient.getNumberOfStates(); ++state) { | ||
this->state_action_choices[state].resize(this->num_actions); | ||
for (uint64_t choice = row_groups[state]; choice < row_groups[state+1]; ++choice) { | ||
uint64_t action = this->choice_to_action[choice]; | ||
this->state_action_choices[state][action].insert(choice); | ||
} | ||
} | ||
} | ||
|
||
template<typename ValueType> | ||
uint64_t FscUnfolder<ValueType>::invalidAction() { | ||
return this->num_actions; | ||
} | ||
|
||
template<typename ValueType> | ||
uint64_t FscUnfolder<ValueType>::invalidChoice() { | ||
return this->quotient.getNumberOfChoices(); | ||
} | ||
|
||
template<typename ValueType> | ||
uint64_t FscUnfolder<ValueType>::numberOfTranslatedStates() { | ||
return this->state_translator.numTranslations(); | ||
} | ||
|
||
template<typename ValueType> | ||
uint64_t FscUnfolder<ValueType>::numberOfTranslatedChoices() { | ||
return this->product_choice_to_choice.size(); | ||
} | ||
|
||
template<typename ValueType> | ||
uint64_t FscUnfolder<ValueType>::translateInitialState() { | ||
uint64_t initial_state = *(this->quotient.getInitialStates().begin()); | ||
uint64_t initial_memory = 0; | ||
return this->state_translator.translate(initial_state,std::make_tuple(initial_memory,invalidAction(),false)); | ||
} | ||
|
||
|
||
template<typename ValueType> | ||
void FscUnfolder<ValueType>::buildStateSpace( | ||
std::vector<std::vector<std::map<uint64_t,double>>> action_function, | ||
std::vector<std::vector<std::map<uint64_t,double>>> update_function | ||
) { | ||
this->state_translator.resize(this->quotient.getNumberOfStates()); | ||
uint64_t translated_state = this->translateInitialState(); | ||
while(true) { | ||
auto[state,memory_action_transitioned] = this->state_translator.retrieve(translated_state); | ||
auto[memory,action,transitioned] = memory_action_transitioned; | ||
uint64_t observation = this->state_to_obs_class[state]; | ||
if(action == invalidAction() and not transitioned) { | ||
// random choice of an action | ||
for(auto [action,prob] : action_function[memory][observation]) { | ||
this->state_translator.translate(state,std::make_tuple(memory,action,false)); | ||
} | ||
} else if(action != invalidAction()) { | ||
// executing variants of the selected actions | ||
for(uint64_t choice: this->state_action_choices[state][action]) { | ||
for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) { | ||
uint64_t state_dst = entry.getColumn(); | ||
this->state_translator.translate(state_dst,std::make_tuple(memory,invalidAction(),true)); | ||
} | ||
} | ||
} else { // action == invalidAction() and transitioned | ||
// executing memory update | ||
for(auto [memory_dst,prob] : update_function[memory][observation]) { | ||
this->state_translator.translate(state,std::make_tuple(memory_dst,invalidAction(),false)); | ||
} | ||
} | ||
translated_state++; | ||
if(translated_state >= numberOfTranslatedStates()) { | ||
break; | ||
} | ||
} | ||
|
||
this->product_state_to_state = this->state_translator.translationToItem(); | ||
this->product_state_to_state_memory_action_transitioned = this->state_translator.translationToItemKey(); | ||
} | ||
|
||
template<typename ValueType> | ||
storm::storage::SparseMatrix<ValueType> FscUnfolder<ValueType>::buildTransitionMatrix( | ||
std::vector<std::vector<std::map<uint64_t,double>>> action_function, | ||
std::vector<std::vector<std::map<uint64_t,double>>> update_function | ||
) { | ||
this->product_choice_to_choice.clear(); | ||
storm::storage::SparseMatrixBuilder<ValueType> builder(0, 0, 0, false, true, 0); | ||
for(uint64_t translated_state = 0; translated_state < numberOfTranslatedStates(); ++translated_state) { | ||
builder.newRowGroup(numberOfTranslatedChoices()); | ||
auto[state,memory_action_transitioned] = this->state_translator.retrieve(translated_state); | ||
auto[memory,action,transitioned] = memory_action_transitioned; | ||
uint64_t observation = this->state_to_obs_class[state]; | ||
if(action == invalidAction() and not transitioned) { | ||
// random choice of an action | ||
uint64_t product_choice = numberOfTranslatedChoices(); | ||
this->product_choice_to_choice.push_back(invalidChoice()); | ||
for(auto [action,prob] : action_function[memory][observation]) { | ||
uint64_t translated_dst = this->state_translator.translate(state,std::make_tuple(memory,action,false)); | ||
builder.addNextValue(product_choice, translated_dst, prob); | ||
} | ||
} else if(action != invalidAction()) { | ||
// executing variants of the selected actions | ||
for(uint64_t choice: this->state_action_choices[state][action]) { | ||
uint64_t product_choice = numberOfTranslatedChoices(); | ||
this->product_choice_to_choice.push_back(choice); | ||
for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) { | ||
uint64_t state_dst = entry.getColumn(); | ||
uint64_t translated_dst = this->state_translator.translate(state_dst,std::make_tuple(memory,invalidAction(),true)); | ||
builder.addNextValue(product_choice, translated_dst, entry.getValue()); | ||
} | ||
} | ||
} else { // action == invalidAction() and transitioned | ||
// executing memory update | ||
uint64_t product_choice = numberOfTranslatedChoices(); | ||
this->product_choice_to_choice.push_back(invalidChoice()); | ||
for(auto [memory_dst,prob] : update_function[memory][observation]) { | ||
uint64_t translated_dst = this->state_translator.translate(state,std::make_tuple(memory_dst,invalidAction(),false)); | ||
builder.addNextValue(product_choice, translated_dst, prob); | ||
} | ||
} | ||
} | ||
|
||
return builder.build(); | ||
} | ||
|
||
|
||
template<typename ValueType> | ||
void FscUnfolder<ValueType>::applyFsc( | ||
std::vector<std::vector<std::map<uint64_t,double>>> action_function, | ||
std::vector<std::vector<std::map<uint64_t,double>>> update_function | ||
) { | ||
this->buildStateSpace(action_function,update_function); | ||
storm::storage::sparse::ModelComponents<ValueType> components; | ||
auto translated_initial_state = this->translateInitialState(); | ||
components.stateLabeling = synthesis::translateStateLabeling( | ||
this->quotient,this->state_translator.translationToItem(),translated_initial_state | ||
); | ||
|
||
components.transitionMatrix = this->buildTransitionMatrix(action_function,update_function); | ||
storm::storage::BitVector translated_choice_mask(numberOfTranslatedChoices(),true); | ||
auto quotient_num_choices = this->quotient.getNumberOfChoices(); | ||
for(uint64_t translated_choice = 0; translated_choice<numberOfTranslatedChoices(); ++translated_choice) { | ||
if(this->product_choice_to_choice[translated_choice]==quotient_num_choices) { | ||
translated_choice_mask.set(translated_choice,false); | ||
} | ||
} | ||
components.choiceLabeling = synthesis::translateChoiceLabeling(this->quotient,this->product_choice_to_choice,translated_choice_mask); | ||
for (auto const& reward_model : this->quotient.getRewardModels()) { | ||
auto new_reward_model = synthesis::translateRewardModel(reward_model.second,this->product_choice_to_choice,translated_choice_mask); | ||
components.rewardModels.emplace(reward_model.first, new_reward_model); | ||
} | ||
|
||
this->clearMemory(); | ||
this->product = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(components)); | ||
} | ||
|
||
template<typename ValueType> | ||
void FscUnfolder<ValueType>::clearMemory() { | ||
this->state_translator.clear(); | ||
} | ||
|
||
|
||
template class FscUnfolder<double>; | ||
} |
Oops, something went wrong.