From 2df20111017c380b02c0ed804d41405cb2744fac Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Thu, 21 Dec 2023 10:01:17 +0100 Subject: [PATCH] build Paynt wrt. vanilla Stormpy --- .github/workflows/buildtest.yml | 36 +- .gitignore | 6 - Dockerfile | 72 +- alias-paynt.sh | 96 -- install.sh | 67 + paynt-specific-storm.dockerfile | 86 -- paynt/family/family.py | 6 +- paynt/family/smt.py | 4 +- paynt/parser/jani.py | 3 +- paynt/parser/prism_parser.py | 9 +- paynt/parser/sketch.py | 8 +- paynt/quotient/decpomdp.py | 3 +- paynt/quotient/mdp_family.py | 8 +- paynt/quotient/pomdp.py | 10 +- paynt/quotient/pomdp_family.py | 17 +- paynt/quotient/quotient.py | 10 +- paynt/quotient/storm_pomdp_control.py | 2 +- paynt/simulation/pomcp.py | 5 +- paynt/simulation/simulation.py | 2 - paynt/synthesizer/conflict_generator/dtmc.py | 4 +- paynt/synthesizer/conflict_generator/mdp.py | 4 +- paynt/synthesizer/policy_tree.py | 7 +- paynt/synthesizer/synthesizer_cegis.py | 2 - paynt/verification/property.py | 10 +- payntbind/.dockerignore | 20 + payntbind/.gitignore | 15 + payntbind/CMakeLists.txt | 94 ++ payntbind/README.md | 25 + payntbind/cmake/CMakeLists.txt | 38 + payntbind/cmake/config.py.in | 12 + payntbind/cmake/macros.cmake | 45 + payntbind/lib/payntbind/__init__.py | 13 + payntbind/lib/payntbind/_version.py | 1 + payntbind/lib/payntbind/synthesis/__init__.py | 2 + payntbind/resources/include_pybind11.cmake | 17 + payntbind/resources/pybind11_2.10.0.patch | 22 + payntbind/resources/pybind11_2.11.1.patch | 22 + payntbind/setup.cfg | 8 + payntbind/setup.py | 185 +++ payntbind/setup/__init__.py | 1 + payntbind/setup/config.py | 89 ++ payntbind/setup/helper.py | 74 + payntbind/src/common.h | 22 + payntbind/src/config.h.in | 1 + payntbind/src/helpers.h | 40 + payntbind/src/mod_synthesis.cpp | 14 + .../counterexamples/Counterexample.cpp | 501 +++++++ .../counterexamples/Counterexample.h | 162 +++ .../counterexamples/CounterexampleMdp.cpp | 535 +++++++ .../counterexamples/CounterexampleMdp.h | 191 +++ .../synthesis/counterexamples/bindings.cpp | 39 + payntbind/src/synthesis/decpomdp/DecPomdp.cpp | 436 ++++++ payntbind/src/synthesis/decpomdp/DecPomdp.h | 141 ++ payntbind/src/synthesis/decpomdp/bindings.cpp | 25 + payntbind/src/synthesis/decpomdp/madp/AUTHORS | 49 + payntbind/src/synthesis/decpomdp/madp/COPYING | 720 +++++++++ payntbind/src/synthesis/decpomdp/madp/LICENSE | 675 +++++++++ .../synthesis/decpomdp/madp/src/base/Action.h | 46 + .../decpomdp/madp/src/base/ActionDiscrete.h | 55 + .../synthesis/decpomdp/madp/src/base/Agent.h | 52 + .../madp/src/base/CPDDiscreteInterface.h | 65 + .../madp/src/base/CPDKroneckerDelta.h | 56 + .../synthesis/decpomdp/madp/src/base/CPT.cpp | 111 ++ .../synthesis/decpomdp/madp/src/base/CPT.h | 96 ++ .../decpomdp/madp/src/base/DecPOMDP.cpp | 47 + .../decpomdp/madp/src/base/DecPOMDP.h | 91 ++ .../madp/src/base/DecPOMDPDiscrete.cpp | 268 ++++ .../decpomdp/madp/src/base/DecPOMDPDiscrete.h | 174 +++ .../madp/src/base/DecPOMDPDiscreteInterface.h | 87 ++ .../madp/src/base/DecPOMDPInterface.h | 78 + .../decpomdp/madp/src/base/DiscreteEntity.h | 63 + .../src/synthesis/decpomdp/madp/src/base/E.h | 62 + .../decpomdp/madp/src/base/EDeadline.h | 62 + .../decpomdp/madp/src/base/EInvalidIndex.h | 49 + .../decpomdp/madp/src/base/ENoSubScope.h | 49 + .../decpomdp/madp/src/base/ENotCached.h | 49 + .../decpomdp/madp/src/base/EOverflow.h | 49 + .../synthesis/decpomdp/madp/src/base/EParse.h | 48 + .../src/base/EventObservationModelMapping.cpp | 65 + .../src/base/EventObservationModelMapping.h | 85 ++ .../EventObservationModelMappingSparse.cpp | 65 + .../base/EventObservationModelMappingSparse.h | 98 ++ .../decpomdp/madp/src/base/FSDist_COF.cpp | 242 ++++ .../decpomdp/madp/src/base/FSDist_COF.h | 108 ++ .../src/base/FactoredDecPOMDPDiscrete.cpp | 705 +++++++++ .../madp/src/base/FactoredDecPOMDPDiscrete.h | 416 ++++++ .../base/FactoredDecPOMDPDiscreteInterface.h | 165 +++ .../madp/src/base/FactoredMMDPDiscrete.cpp | 89 ++ .../madp/src/base/FactoredMMDPDiscrete.h | 93 ++ .../base/FactoredQFunctionScopeForStage.cpp | 54 + .../src/base/FactoredQFunctionScopeForStage.h | 64 + .../src/base/FactoredStateDistribution.cpp | 40 + .../madp/src/base/FactoredStateDistribution.h | 73 + .../decpomdp/madp/src/base/Globals.cpp | 62 + .../decpomdp/madp/src/base/Globals.h | 99 ++ .../decpomdp/madp/src/base/IndexTools.cpp | 632 ++++++++ .../decpomdp/madp/src/base/IndexTools.h | 257 ++++ .../decpomdp/madp/src/base/JointAction.h | 53 + .../madp/src/base/JointActionDiscrete.cpp | 135 ++ .../madp/src/base/JointActionDiscrete.h | 95 ++ .../decpomdp/madp/src/base/JointObservation.h | 50 + .../src/base/JointObservationDiscrete.cpp | 129 ++ .../madp/src/base/JointObservationDiscrete.h | 92 ++ .../src/base/MADPComponentDiscreteActions.cpp | 588 ++++++++ .../src/base/MADPComponentDiscreteActions.h | 196 +++ .../MADPComponentDiscreteObservations.cpp | 581 ++++++++ .../base/MADPComponentDiscreteObservations.h | 193 +++ .../src/base/MADPComponentDiscreteStates.cpp | 300 ++++ .../src/base/MADPComponentDiscreteStates.h | 135 ++ .../src/base/MADPComponentFactoredStates.cpp | 335 +++++ .../src/base/MADPComponentFactoredStates.h | 222 +++ .../src/base/MultiAgentDecisionProcess.cpp | 116 ++ .../madp/src/base/MultiAgentDecisionProcess.h | 131 ++ .../MultiAgentDecisionProcessDiscrete.cpp | 362 +++++ .../base/MultiAgentDecisionProcessDiscrete.h | 343 +++++ ...tDecisionProcessDiscreteFactoredStates.cpp | 1115 ++++++++++++++ ...entDecisionProcessDiscreteFactoredStates.h | 573 ++++++++ ...onProcessDiscreteFactoredStatesInterface.h | 151 ++ ...ltiAgentDecisionProcessDiscreteInterface.h | 231 +++ .../base/MultiAgentDecisionProcessInterface.h | 66 + .../madp/src/base/NamedDescribedEntity.cpp | 39 + .../madp/src/base/NamedDescribedEntity.h | 75 + .../synthesis/decpomdp/madp/src/base/OGet.h | 121 ++ .../decpomdp/madp/src/base/Observation.h | 46 + .../madp/src/base/ObservationDiscrete.h | 54 + .../decpomdp/madp/src/base/ObservationModel.h | 51 + .../src/base/ObservationModelDiscrete.cpp | 94 ++ .../madp/src/base/ObservationModelDiscrete.h | 64 + .../base/ObservationModelDiscreteInterface.h | 65 + .../madp/src/base/ObservationModelMapping.cpp | 51 + .../madp/src/base/ObservationModelMapping.h | 82 ++ .../base/ObservationModelMappingSparse.cpp | 51 + .../src/base/ObservationModelMappingSparse.h | 93 ++ .../madp/src/base/PDDiscreteInterface.h | 69 + .../decpomdp/madp/src/base/POMDPDiscrete.cpp | 43 + .../decpomdp/madp/src/base/POMDPDiscrete.h | 74 + .../synthesis/decpomdp/madp/src/base/POSG.cpp | 84 ++ .../synthesis/decpomdp/madp/src/base/POSG.h | 87 ++ .../decpomdp/madp/src/base/POSGDiscrete.cpp | 107 ++ .../decpomdp/madp/src/base/POSGDiscrete.h | 113 ++ .../madp/src/base/POSGDiscreteInterface.h | 79 + .../decpomdp/madp/src/base/POSGInterface.h | 75 + .../decpomdp/madp/src/base/PrintTools.h | 334 +++++ .../decpomdp/madp/src/base/QTableInterface.h | 55 + .../synthesis/decpomdp/madp/src/base/RGet.h | 85 ++ .../decpomdp/madp/src/base/RewardModel.h | 56 + .../src/base/RewardModelDiscreteInterface.h | 68 + .../madp/src/base/RewardModelMapping.cpp | 58 + .../madp/src/base/RewardModelMapping.h | 84 ++ .../src/base/RewardModelMappingSparse.cpp | 50 + .../madp/src/base/RewardModelMappingSparse.h | 99 ++ .../base/RewardModelMappingSparseMapped.cpp | 50 + .../src/base/RewardModelMappingSparseMapped.h | 101 ++ .../madp/src/base/RewardModelTOISparse.cpp | 67 + .../madp/src/base/RewardModelTOISparse.h | 88 ++ .../decpomdp/madp/src/base/Scope.cpp | 254 ++++ .../synthesis/decpomdp/madp/src/base/Scope.h | 121 ++ .../synthesis/decpomdp/madp/src/base/State.h | 44 + .../decpomdp/madp/src/base/StateDiscrete.h | 55 + .../madp/src/base/StateDistribution.h | 54 + .../madp/src/base/StateDistributionVector.h | 98 ++ .../madp/src/base/StateFactorDiscrete.cpp | 76 + .../madp/src/base/StateFactorDiscrete.h | 97 ++ .../decpomdp/madp/src/base/StringTools.cpp | 39 + .../decpomdp/madp/src/base/StringTools.h | 38 + .../synthesis/decpomdp/madp/src/base/TGet.h | 79 + .../base/TOICompactRewardDecPOMDPDiscrete.cpp | 231 +++ .../base/TOICompactRewardDecPOMDPDiscrete.h | 91 ++ .../madp/src/base/TOIDecMDPDiscrete.cpp | 63 + .../madp/src/base/TOIDecMDPDiscrete.h | 64 + .../madp/src/base/TOIDecPOMDPDiscrete.cpp | 126 ++ .../madp/src/base/TOIDecPOMDPDiscrete.h | 208 +++ .../TOIFactoredRewardDecPOMDPDiscrete.cpp | 158 ++ .../base/TOIFactoredRewardDecPOMDPDiscrete.h | 84 ++ .../decpomdp/madp/src/base/TimeTools.cpp | 34 + .../decpomdp/madp/src/base/TimeTools.h | 32 + .../decpomdp/madp/src/base/TransitionModel.h | 54 + .../madp/src/base/TransitionModelDiscrete.cpp | 64 + .../madp/src/base/TransitionModelDiscrete.h | 59 + .../base/TransitionModelDiscreteInterface.h | 59 + .../madp/src/base/TransitionModelMapping.cpp | 52 + .../madp/src/base/TransitionModelMapping.h | 82 ++ .../src/base/TransitionModelMappingSparse.cpp | 49 + .../src/base/TransitionModelMappingSparse.h | 95 ++ ...tionObservationIndependentMADPDiscrete.cpp | 1283 +++++++++++++++++ ...sitionObservationIndependentMADPDiscrete.h | 586 ++++++++ .../base/TwoStageDynamicBayesianNetwork.cpp | 1236 ++++++++++++++++ .../src/base/TwoStageDynamicBayesianNetwork.h | 495 +++++++ .../decpomdp/madp/src/base/VectorTools.cpp | 22 + .../decpomdp/madp/src/base/VectorTools.h | 104 ++ .../decpomdp/madp/src/include/configuration.h | 31 + .../decpomdp/madp/src/include/versions.h | 37 + .../src/parser/CommentBlankOrNewlineParser.h | 105 ++ .../madp/src/parser/CommentOrBlankParser.h | 146 ++ .../decpomdp/madp/src/parser/MADPParser.cpp | 32 + .../decpomdp/madp/src/parser/MADPParser.h | 69 + .../src/parser/ParserDPOMDPFormat_Spirit.cpp | 1216 ++++++++++++++++ .../src/parser/ParserDPOMDPFormat_Spirit.h | 1044 ++++++++++++++ .../madp/src/parser/ParserInterface.h | 46 + .../madp/src/parser/ParserPOMDPDiscrete.cpp | 102 ++ .../madp/src/parser/ParserPOMDPDiscrete.h | 53 + .../src/parser/ParserPOMDPFormat_Spirit.cpp | 1261 ++++++++++++++++ .../src/parser/ParserPOMDPFormat_Spirit.h | 1094 ++++++++++++++ .../decpomdp/madp/src/parser/dpomdp.spirit | 480 ++++++ .../decpomdp/madp/src/parser/pomdp.spirit | 596 ++++++++ payntbind/src/synthesis/helpers.cpp | 62 + .../src/synthesis/pomdp/PomdpManager.cpp | 318 ++++ payntbind/src/synthesis/pomdp/PomdpManager.h | 128 ++ .../pomdp/PomdpManagerAposteriori.cpp | 259 ++++ .../synthesis/pomdp/PomdpManagerAposteriori.h | 96 ++ .../src/synthesis/pomdp/SubPomdpBuilder.cpp | 263 ++++ .../src/synthesis/pomdp/SubPomdpBuilder.h | 110 ++ payntbind/src/synthesis/pomdp/bindings.cpp | 54 + .../pomdp_family/GameAbstractionSolver.cpp | 332 +++++ .../pomdp_family/GameAbstractionSolver.h | 112 ++ .../pomdp_family/ObservationEvaluator.cpp | 109 ++ .../pomdp_family/ObservationEvaluator.h | 58 + .../pomdp_family/ProductPomdpFsc.cpp | 130 ++ .../synthesis/pomdp_family/ProductPomdpFsc.h | 77 + .../ProductPomdpRandomizedFsc.cpp | 157 ++ .../pomdp_family/ProductPomdpRandomizedFsc.h | 80 + .../src/synthesis/pomdp_family/bindings.cpp | 59 + payntbind/src/synthesis/quotient/Coloring.cpp | 103 ++ payntbind/src/synthesis/quotient/Coloring.h | 62 + payntbind/src/synthesis/quotient/Family.cpp | 133 ++ payntbind/src/synthesis/quotient/Family.h | 63 + .../src/synthesis/quotient/JaniChoices.cpp | 80 + .../src/synthesis/quotient/JaniChoices.h | 14 + payntbind/src/synthesis/quotient/bindings.cpp | 298 ++++ payntbind/src/synthesis/synthesis.cpp | 13 + payntbind/src/synthesis/synthesis.h | 13 + .../translation/ItemKeyTranslator.cpp | 67 + .../synthesis/translation/ItemKeyTranslator.h | 44 + .../synthesis/translation/ItemTranslator.cpp | 48 + .../synthesis/translation/ItemTranslator.h | 45 + .../translation/componentTranslations.cpp | 85 ++ .../translation/componentTranslations.h | 35 + .../verification/MdpModelChecker.cpp | 27 + .../synthesis/verification/MdpModelChecker.h | 18 + 239 files changed, 34593 insertions(+), 341 deletions(-) delete mode 100755 alias-paynt.sh create mode 100755 install.sh delete mode 100644 paynt-specific-storm.dockerfile create mode 100644 payntbind/.dockerignore create mode 100644 payntbind/.gitignore create mode 100644 payntbind/CMakeLists.txt create mode 100644 payntbind/README.md create mode 100644 payntbind/cmake/CMakeLists.txt create mode 100644 payntbind/cmake/config.py.in create mode 100644 payntbind/cmake/macros.cmake create mode 100644 payntbind/lib/payntbind/__init__.py create mode 100644 payntbind/lib/payntbind/_version.py create mode 100644 payntbind/lib/payntbind/synthesis/__init__.py create mode 100644 payntbind/resources/include_pybind11.cmake create mode 100644 payntbind/resources/pybind11_2.10.0.patch create mode 100644 payntbind/resources/pybind11_2.11.1.patch create mode 100644 payntbind/setup.cfg create mode 100755 payntbind/setup.py create mode 100644 payntbind/setup/__init__.py create mode 100644 payntbind/setup/config.py create mode 100755 payntbind/setup/helper.py create mode 100644 payntbind/src/common.h create mode 100644 payntbind/src/config.h.in create mode 100644 payntbind/src/helpers.h create mode 100644 payntbind/src/mod_synthesis.cpp create mode 100644 payntbind/src/synthesis/counterexamples/Counterexample.cpp create mode 100644 payntbind/src/synthesis/counterexamples/Counterexample.h create mode 100644 payntbind/src/synthesis/counterexamples/CounterexampleMdp.cpp create mode 100644 payntbind/src/synthesis/counterexamples/CounterexampleMdp.h create mode 100644 payntbind/src/synthesis/counterexamples/bindings.cpp create mode 100644 payntbind/src/synthesis/decpomdp/DecPomdp.cpp create mode 100644 payntbind/src/synthesis/decpomdp/DecPomdp.h create mode 100644 payntbind/src/synthesis/decpomdp/bindings.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/AUTHORS create mode 100644 payntbind/src/synthesis/decpomdp/madp/COPYING create mode 100644 payntbind/src/synthesis/decpomdp/madp/LICENSE create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/Action.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ActionDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/Agent.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/CPDDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/CPDKroneckerDelta.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/CPT.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/CPT.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/DiscreteEntity.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/E.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EDeadline.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EInvalidIndex.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ENoSubScope.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ENotCached.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EOverflow.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EParse.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/Globals.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/Globals.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/JointAction.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/JointObservation.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteActions.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteActions.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteObservations.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteObservations.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/OGet.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/Observation.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModel.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/PDDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POSG.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POSG.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/POSGInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/PrintTools.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/QTableInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RGet.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModel.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/Scope.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/Scope.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/State.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/StateDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/StateDistribution.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/StateDistributionVector.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TGet.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModel.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscreteInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/include/configuration.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/include/versions.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/CommentBlankOrNewlineParser.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/CommentOrBlankParser.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/ParserInterface.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.cpp create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.h create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/dpomdp.spirit create mode 100644 payntbind/src/synthesis/decpomdp/madp/src/parser/pomdp.spirit create mode 100644 payntbind/src/synthesis/helpers.cpp create mode 100644 payntbind/src/synthesis/pomdp/PomdpManager.cpp create mode 100644 payntbind/src/synthesis/pomdp/PomdpManager.h create mode 100644 payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp create mode 100644 payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h create mode 100644 payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp create mode 100644 payntbind/src/synthesis/pomdp/SubPomdpBuilder.h create mode 100644 payntbind/src/synthesis/pomdp/bindings.cpp create mode 100644 payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp create mode 100644 payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h create mode 100644 payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp create mode 100644 payntbind/src/synthesis/pomdp_family/ObservationEvaluator.h create mode 100644 payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.cpp create mode 100644 payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.h create mode 100644 payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.cpp create mode 100644 payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.h create mode 100644 payntbind/src/synthesis/pomdp_family/bindings.cpp create mode 100644 payntbind/src/synthesis/quotient/Coloring.cpp create mode 100644 payntbind/src/synthesis/quotient/Coloring.h create mode 100644 payntbind/src/synthesis/quotient/Family.cpp create mode 100644 payntbind/src/synthesis/quotient/Family.h create mode 100644 payntbind/src/synthesis/quotient/JaniChoices.cpp create mode 100644 payntbind/src/synthesis/quotient/JaniChoices.h create mode 100644 payntbind/src/synthesis/quotient/bindings.cpp create mode 100644 payntbind/src/synthesis/synthesis.cpp create mode 100644 payntbind/src/synthesis/synthesis.h create mode 100644 payntbind/src/synthesis/translation/ItemKeyTranslator.cpp create mode 100644 payntbind/src/synthesis/translation/ItemKeyTranslator.h create mode 100644 payntbind/src/synthesis/translation/ItemTranslator.cpp create mode 100644 payntbind/src/synthesis/translation/ItemTranslator.h create mode 100644 payntbind/src/synthesis/translation/componentTranslations.cpp create mode 100644 payntbind/src/synthesis/translation/componentTranslations.h create mode 100644 payntbind/src/synthesis/verification/MdpModelChecker.cpp create mode 100644 payntbind/src/synthesis/verification/MdpModelChecker.h diff --git a/.github/workflows/buildtest.yml b/.github/workflows/buildtest.yml index b4359c457..ff2a229d6 100644 --- a/.github/workflows/buildtest.yml +++ b/.github/workflows/buildtest.yml @@ -1,6 +1,5 @@ name: Build Test -# Builds and tests stormpy with different versions of Storm -# also deploys images to Dockerhub +# Builds and tests paynt and deploys images to Dockerhub on: push: @@ -20,33 +19,6 @@ env: NR_JOBS: "2" jobs: - deploytemp: - name: Test and Deploy on latest using specific Storm commit (${{ matrix.buildType.name }}) - runs-on: ubuntu-latest - strategy: - matrix: - buildType: - - {name: "Release", imageName : "randriu/paynt", dockerTag: "temp", buildArgs: "BUILD_TYPE=Release", setupArgs: "", stormCommit : "de718be210dc6bef4b4e4507436a46786b812145"} - fail-fast: false - steps: - - name: Git clone - uses: actions/checkout@v4 - - name: Build paynt from Dockerfile - run: docker build -t ${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} . -f paynt-specific-storm.dockerfile --build-arg storm_sha=${{ matrix.buildType.stormCommit }} --build-arg build_type=${{ matrix.buildType.buildArgs }} --build-arg setup_args=${{ matrix.buildType.setupArgs }} --build-arg setup_args_pycarl=${{ matrix.buildType.setupArgs }} --build-arg no_threads=${NR_JOBS} - - name: Build image for learner - run: docker build -t ${{ matrix.buildType.imageName }}-learner:${{ matrix.buildType.dockerTag }} . -f paynt-learner.dockerfile --build-arg paynt_base=${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} - - name: Login into docker - # Only login if using master on original repo (and not for pull requests or forks) - if: github.repository_owner == 'randriu' && github.ref == 'refs/heads/master' - run: echo '${{ secrets.STORMPY_CI_DOCKER_PASSWORD }}' | docker login -u randriu --password-stdin - - name: Deploy paynt with specific Storm version - # Only deploy if using master on original repo (and not for pull requests or forks) - if: github.repository_owner == 'randriu' && github.ref == 'refs/heads/master' - run: docker push ${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} - - name: Deploy paynt with specific Storm version and learner dependencies - # Only deploy if using master on original repo (and not for pull requests or forks) - if: github.repository_owner == 'randriu' && github.ref == 'refs/heads/master' - run: docker push ${{ matrix.buildType.imageName }}-learner:${{ matrix.buildType.dockerTag }} deploy: name: Deploy on latest (${{ matrix.buildType.name }}) @@ -54,13 +26,13 @@ jobs: strategy: matrix: buildType: - - {name: "Release", imageName : "randriu/paynt", dockerTag: "latest", stormTag: "ci", buildArgs: "BUILD_TYPE=Release", setupArgs: ""} + - {name: "Release", imageName : "randriu/paynt", dockerTag: "latest", setupArgs: ""} fail-fast: false steps: - name: Git clone uses: actions/checkout@v4 - name: Build stormpy from Dockerfile - run: docker build -t ${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} . --build-arg STORM_BASE=movesrwth/storm:${{ matrix.buildType.stormTag }} --build-arg build_type=${{ matrix.buildType.buildArgs }} --build-arg setup_args=${{ matrix.buildType.setupArgs }} --build-arg setup_args_pycarl=${{ matrix.buildType.setupArgs }} --build-arg no_threads=${NR_JOBS} + run: docker build -t ${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} . --build-arg setup_args=${{ matrix.buildType.setupArgs }} --build-arg setup_args_pycarl=${{ matrix.buildType.setupArgs }} --build-arg no_threads=${NR_JOBS} - name: Build image for learner run: docker build -t ${{ matrix.buildType.imageName }}-learner:${{ matrix.buildType.dockerTag }} . -f paynt-learner.dockerfile --build-arg paynt_base=${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} - name: Login into docker @@ -74,4 +46,4 @@ jobs: - name: Deploy paynt with learner dependencies # Only deploy if using master on original repo (and not for pull requests or forks) if: github.repository_owner == 'randriu' && github.ref == 'refs/heads/master' - run: docker push ${{ matrix.buildType.imageName }}-learner:${{ matrix.buildType.dockerTag }} \ No newline at end of file + run: docker push ${{ matrix.buildType.imageName }}-learner:${{ matrix.buildType.dockerTag }} diff --git a/.gitignore b/.gitignore index 6ef243426..9174f3971 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,2 @@ __pycache__/ - -env/ prerequisites/ -storm/ -stormpy/ -experiments/ - diff --git a/Dockerfile b/Dockerfile index 3c6e38a52..b476d7a9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,72 +1,22 @@ -FROM movesrwth/storm:ci +FROM movesrwth/stormpy:ci -# Install dependencies -###################### -# Uncomment to update packages beforehand -RUN apt-get update -qq -RUN apt-get install -y --no-install-recommends \ - uuid-dev \ - python3 \ - python3-venv - -# CMake build type -ARG build_type=Release -# Additional arguments for compiling stormpy +# Additional arguments for compiling payntbind ARG setup_args="" -# Additional arguments for compiling pycarl -ARG setup_args_pycarl="" # Number of threads to use for parallel compilation ARG no_threads=2 -# WORKDIR /opt/ - -# Obtain carl-parser from public repository -# RUN git clone https://github.com/moves-rwth/carl-parser.git - -# # Switch to build directory -# RUN mkdir -p /opt/carl-parser/build -# WORKDIR /opt/carl-parser/build - -# # Configure carl-parser -# RUN cmake .. -DCMAKE_BUILD_TYPE=$build_type - -# # Build carl-parser -# RUN make carl-parser -j $no_threads - -# Set-up virtual environment -############################ -ENV VIRTUAL_ENV=/opt/venv -RUN python3 -m venv $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" -RUN pip install -U pip setuptools wheel - -# Build pycarl -############## -WORKDIR /opt/pycarl - -# Obtain latest version of pycarl from public repository -RUN git clone --depth 1 https://github.com/moves-rwth/pycarl.git . +WORKDIR /opt/ -# Build pycarl -RUN python setup.py build_ext $setup_args_pycarl -j $no_threads develop - -# Build stormpy -############### -WORKDIR /opt/stormpy - -# Copy the content of the current local stormpy repository into the Docker image -RUN git clone --depth 1 --branch synthesis https://github.com/randriu/stormpy.git . - -# Build stormpy -RUN python setup.py build_ext $setup_args -j $no_threads develop - -# Paynt dependencies -RUN pip install pysmt z3-solver click numpy +# install dependencies +RUN apt-get update -qq +RUN apt-get install -y graphviz +RUN pip install click z3-solver graphviz -# Build paynt -############# +# build payntbind WORKDIR /opt/paynt - COPY . . +WORKDIR /opt/paynt/payntbind +RUN python setup.py build_ext $setup_args -j $no_threads develop +# install paynt RUN pip install -e . diff --git a/alias-paynt.sh b/alias-paynt.sh deleted file mode 100755 index 195365ef9..000000000 --- a/alias-paynt.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -# usage: source alias-storm.sh - -# multi-core compilation -export COMPILE_JOBS=$(nproc) -# single-core compilation: -# export COMPILE_JOBS=1 - -# environment variables -export PAYNT_ROOT=`pwd` - -# environment aliases -alias enva='source $PAYNT_ROOT/env/bin/activate' -alias envd='deactivate' - -storm-dependencies() { - sudo apt update - sudo apt -y install build-essential git automake cmake libboost-all-dev libcln-dev libgmp-dev libginac-dev libglpk-dev libhwloc-dev libz3-dev libxerces-c-dev libeigen3-dev graphviz - sudo apt -y install maven uuid-dev python3-dev libffi-dev libssl-dev python3-pip python3-venv - # apt -y install texlive-latex-extra - # update-alternatives --install /usr/bin/python python /usr/bin/python3 10 -} - -download-prerequisites() { - mkdir -p $PAYNT_ROOT/prerequisites - cd $PAYNT_ROOT/prerequisites - git clone --depth 1 https://github.com/moves-rwth/pycarl.git pycarl - git clone --depth 1 --branch cvc5-1.0.0 https://github.com/cvc5/cvc5.git cvc5 - cd - - cd $PAYNT_ROOT - git clone https://github.com/moves-rwth/storm.git storm - # git clone --branch stable https://github.com/moves-rwth/storm.git storm - git clone --branch synthesis git@github.com:randriu/stormpy.git stormpy - cd - -} - -python-environment() { - python3 -m venv $PAYNT_ROOT/env - enva - pip3 install pytest pytest-runner pytest-cov numpy scipy pysmt z3-solver click toml Cython scikit-build graphviz - envd -} - -pycarl-build() { - cd $PAYNT_ROOT/prerequisites/pycarl - enva - python3 setup.py build_ext --jobs $COMPILE_JOBS develop - #[TEST] python3 setup.py test - envd - cd - -} - -cvc5-build() { - cd $PAYNT_ROOT/prerequisites/cvc5 - enva - ./configure.sh --prefix="." --auto-download --python-bindings - cd build - make --jobs $COMPILE_JOBS - make install - envd - cd - -} - -storm-build() { - mkdir -p $PAYNT_ROOT/storm/build - cd $PAYNT_ROOT/storm/build - cmake .. - make storm-main storm-pomdp --jobs $COMPILE_JOBS - # make check --jobs $COMPILE_JOBS - cd - -} - -stormpy-build() { - cd $PAYNT_ROOT/stormpy - enva - python3 setup.py build_ext --jobs $COMPILE_JOBS develop - # python3 setup.py build_ext --storm-dir $PAYNT_ROOT/storm/build --jobs $COMPILE_JOBS develop - # python3 setup.py test - envd - cd - -} - -synthesis-install() { - storm-dependencies - download-prerequisites - python-environment - - # building cvc5 is optional - # cvc5-build - - # optional unless you don't have Storm installed - storm-build - - pycarl-build - stormpy-build -} diff --git a/install.sh b/install.sh new file mode 100755 index 000000000..9a36d9d2c --- /dev/null +++ b/install.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# multi-core compilation +COMPILE_JOBS=$(nproc) +# single-core compilation: +# export COMPILE_JOBS=1 + +# environment variables +PAYNT_ROOT=`pwd` + +# storm-dependencies +sudo apt update +sudo apt -y install build-essential git automake cmake libboost-all-dev libcln-dev libgmp-dev libginac-dev libglpk-dev libhwloc-dev libz3-dev libxerces-c-dev libeigen3-dev graphviz +sudo apt -y install maven uuid-dev python3-dev libffi-dev libssl-dev python3-pip python3-venv +# apt -y install texlive-latex-extra +# update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + +# prerequisites +mkdir -p ${PAYNT_ROOT}/prerequisites + +# cvc5-build (optional) +# cd ${PAYNT_ROOT}/prerequisites +# git clone --depth 1 --branch cvc5-1.0.0 https://github.com/cvc5/cvc5.git cvc5 +# cd ${PAYNT_ROOT}/prerequisites/cvc5 +# source ${PAYNT_ROOT}/env/bin/activate +# ./configure.sh --prefix="." --auto-download --python-bindings +# cd build +# make --jobs ${COMPILE_JOBS} +# make install +# deactivate + +# storm +cd ${PAYNT_ROOT}/prerequisites +git clone https://github.com/moves-rwth/storm.git storm +# git clone --branch stable https://github.com/moves-rwth/storm.git storm +mkdir -p ${PAYNT_ROOT}/storm/build +cd ${PAYNT_ROOT}/storm/build +cmake .. +make storm-main storm-pomdp --jobs ${COMPILE_JOBS} +# make check --jobs ${COMPILE_JOBS} + + +# python-environment +python3 -m venv ${PAYNT_ROOT}/env +source ${PAYNT_ROOT}/env/bin/activate +pip3 install pytest pytest-runner pytest-cov numpy scipy toml Cython scikit-build +pip3 install graphviz pysmt z3-solver click + +# pycarl-build +cd ${PAYNT_ROOT}/prerequisites +git clone https://github.com/moves-rwth/pycarl.git pycarl +cd ${PAYNT_ROOT}/prerequisites/pycarl +python3 setup.py build_ext --jobs ${COMPILE_JOBS} develop +#[TEST] python3 setup.py test + +# stormpy-build +cd ${PAYNT_ROOT}/prerequisites +git clone https://github.com/moves-rwth/stormpy.git stormpy +# git clone --branch stable https://github.com/moves-rwth/stormpy.git stormpy +cd ${PAYNT_ROOT}/prerequisites/stormpy +python3 setup.py build_ext --jobs ${COMPILE_JOBS} develop +# python3 setup.py build_ext --storm-dir ${PAYNT_ROOT}/prerequisites/storm/build --jobs ${COMPILE_JOBS} develop +# python3 setup.py test +deactivate + +# done +cd ${PAYNT_ROOT} diff --git a/paynt-specific-storm.dockerfile b/paynt-specific-storm.dockerfile deleted file mode 100644 index 7b7ae5806..000000000 --- a/paynt-specific-storm.dockerfile +++ /dev/null @@ -1,86 +0,0 @@ -FROM movesrwth/storm-basesystem:latest - -# Install dependencies -###################### -# Uncomment to update packages beforehand -RUN apt-get update -qq -RUN apt-get install -y --no-install-recommends \ - uuid-dev \ - python3 \ - python3-venv - -# CMake build type -ARG build_type=Release -# Additional arguments for compiling stormpy -ARG setup_args="" -# Additional arguments for compiling pycarl -ARG setup_args_pycarl="" -# Number of threads to use for parallel compilation -ARG no_threads=2 -# Specific storm git commit revision SHA -ARG storm_sha=dc7960b8f0222793b591f3d6489e2f6c7da1278f - -WORKDIR /opt/ -RUN git clone https://github.com/moves-rwth/storm.git storm -WORKDIR /opt/storm/build -RUN git reset --hard $storm_sha -RUN cmake .. -DCMAKE_BUILD_TYPE=$build_type -RUN make storm-main storm-pomdp --jobs $no_threads -ENV PATH="/opt/storm/build/bin:$PATH" - -# Obtain carl-parser from public repository -# RUN git clone https://github.com/moves-rwth/carl-parser.git - -# # Switch to build directory -# RUN mkdir -p /opt/carl-parser/build -# WORKDIR /opt/carl-parser/build - -# # Configure carl-parser -# RUN cmake .. -DCMAKE_BUILD_TYPE=$build_type - -# # Build carl-parser -# RUN make carl-parser -j $no_threads - - -# Set-up virtual environment -############################ -ENV VIRTUAL_ENV=/opt/venv -RUN python3 -m venv $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" -RUN pip install -U pip setuptools wheel - -# Build pycarl -############## -WORKDIR /opt/pycarl - -# Obtain latest version of pycarl from public repository -RUN git clone --depth 1 https://github.com/moves-rwth/pycarl.git . - -# Build pycarl -RUN python setup.py build_ext $setup_args_pycarl -j $no_threads develop - -# Build stormpy -############### -WORKDIR /opt/stormpy - -# Copy the content of the current local stormpy repository into the Docker image -RUN git clone --depth 1 --branch synthesis https://github.com/randriu/stormpy.git . - -# Build stormpy -RUN python setup.py build_ext $setup_args -j $no_threads develop - -# Additional dependencies -########################## - -RUN pip install -U pip setuptools wheel numpy - -# Paynt / extra dependencies -RUN apt-get install -y graphviz -RUN pip install click z3-solver pysmt graphviz - -# Build paynt -############# -WORKDIR /opt/paynt -COPY . . - -RUN pip install -e . diff --git a/paynt/family/family.py b/paynt/family/family.py index 95fcf88dc..c38b57c04 100644 --- a/paynt/family/family.py +++ b/paynt/family/family.py @@ -1,4 +1,4 @@ -import stormpy.synthesis +import payntbind.synthesis import paynt.family.smt @@ -14,11 +14,11 @@ class Family: def __init__(self, other=None): if other is None: - self.family = stormpy.synthesis.Family() + self.family = payntbind.synthesis.Family() self.hole_to_name = [] self.hole_to_option_labels = [] else: - self.family = stormpy.synthesis.Family(other.family) + self.family = payntbind.synthesis.Family(other.family) self.hole_to_name = other.hole_to_name self.hole_to_option_labels = other.hole_to_option_labels diff --git a/paynt/family/smt.py b/paynt/family/smt.py index 120ebd34a..e1feacae5 100644 --- a/paynt/family/smt.py +++ b/paynt/family/smt.py @@ -1,7 +1,5 @@ -import stormpy.synthesis - -import z3 import sys +import z3 # import pycvc5 if installed import importlib diff --git a/paynt/parser/jani.py b/paynt/parser/jani.py index dd8e975f7..58bdacea8 100644 --- a/paynt/parser/jani.py +++ b/paynt/parser/jani.py @@ -1,4 +1,5 @@ import stormpy +import payntbind import paynt.verification.property from ..quotient.models import MarkovChain @@ -75,7 +76,7 @@ def __init__(self, prism, hole_expressions, specification, family): # associate each action of a quotient MDP with hole options # reconstruct choice labels from choice origins logger.debug("associating choices of the quotient with hole assignments...") - choice_is_valid,choice_to_hole_options = stormpy.synthesis.janiMapChoicesToHoleAssignments( + choice_is_valid,choice_to_hole_options = payntbind.synthesis.janiMapChoicesToHoleAssignments( quotient_mdp,family.family,self.edge_to_hole_options ) diff --git a/paynt/parser/prism_parser.py b/paynt/parser/prism_parser.py index d482b8935..7cf3984be 100644 --- a/paynt/parser/prism_parser.py +++ b/paynt/parser/prism_parser.py @@ -1,4 +1,5 @@ -import stormpy.synthesis +import stormpy +import payntbind import paynt.family.family import paynt.verification.property @@ -53,11 +54,11 @@ def read_prism(cls, sketch_path, properties_path, relative_error, discount_facto jani_unfolder = paynt.parser.jani.JaniUnfolder(prism, hole_expressions, specification, family) specification = jani_unfolder.specification quotient_mdp = jani_unfolder.quotient_mdp - coloring = stormpy.synthesis.Coloring(family.family, quotient_mdp.nondeterministic_choice_indices, jani_unfolder.choice_to_hole_options) + coloring = payntbind.synthesis.Coloring(family.family, quotient_mdp.nondeterministic_choice_indices, jani_unfolder.choice_to_hole_options) MarkovChain.initialize(specification) if prism.model_type == stormpy.storage.PrismModelType.POMDP: - obs_evaluator = stormpy.synthesis.ObservationEvaluator(prism, quotient_mdp) - quotient_mdp = stormpy.synthesis.addChoiceLabelsFromJani(quotient_mdp) + obs_evaluator = payntbind.synthesis.ObservationEvaluator(prism, quotient_mdp) + quotient_mdp = payntbind.synthesis.addChoiceLabelsFromJani(quotient_mdp) else: MarkovChain.initialize(specification) quotient_mdp = MarkovChain.from_prism(prism) diff --git a/paynt/parser/sketch.py b/paynt/parser/sketch.py index e5edf68bf..d9af14a28 100644 --- a/paynt/parser/sketch.py +++ b/paynt/parser/sketch.py @@ -1,4 +1,5 @@ import stormpy +import payntbind from paynt.parser.prism_parser import PrismParser from paynt.parser.pomdp_parser import PomdpParser @@ -26,9 +27,6 @@ def substitute_suffix(string, delimiter, replacer): return output_string def make_rewards_action_based(model): - - import stormpy - tm = model.transition_matrix for name,reward_model in model.reward_models.items(): assert not reward_model.has_transition_rewards, "Paynt does not support transition rewards" @@ -94,7 +92,7 @@ def load_sketch(cls, sketch_path, properties_path, if filetype is None: try: logger.info(f"assuming sketch in Cassandra format...") - decpomdp_manager = stormpy.synthesis.parse_decpomdp(sketch_path) + decpomdp_manager = payntbind.synthesis.parse_decpomdp(sketch_path) if decpomdp_manager is None: raise SyntaxError logger.info("applying discount factor transformation...") @@ -123,7 +121,7 @@ def load_sketch(cls, sketch_path, properties_path, prop = specification.all_properties()[0] reward_name = prop.formula.reward_name target_label = str(prop.formula.subformula.subformula) - subpomdp_builder = stormpy.synthesis.SubPomdpBuilder(explicit_quotient, reward_name, target_label) + subpomdp_builder = payntbind.synthesis.SubPomdpBuilder(explicit_quotient, reward_name, target_label) subpomdp_builder.set_discount_factor(discount_factor) initial_distribution = {explicit_quotient.initial_states[0] : 1} relevant_observations = stormpy.storage.BitVector(explicit_quotient.nr_observations,True) diff --git a/paynt/quotient/decpomdp.py b/paynt/quotient/decpomdp.py index 5151e2b47..c280a0c89 100644 --- a/paynt/quotient/decpomdp.py +++ b/paynt/quotient/decpomdp.py @@ -1,6 +1,5 @@ import stormpy -import stormpy.synthesis -import stormpy.pomdp +import payntbind import paynt import paynt.quotient.quotient diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index 037fa63f9..6d15309ea 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -1,5 +1,5 @@ import stormpy -import stormpy.synthesis +import payntbind import paynt.family.family import paynt.quotient.quotient @@ -158,7 +158,7 @@ def fix_and_apply_policy_to_family(self, family, policy): policy_choices = [] for state,action in enumerate(policy): policy_choices += self.state_action_choices[state][action] - choices = stormpy.synthesis.policyToChoicesForFamily(policy_choices, family.selected_choices) + choices = payntbind.synthesis.policyToChoicesForFamily(policy_choices, family.selected_choices) # build MDP and keep only reachable states in policy mdp = self.build_from_choice_mask(choices) @@ -177,7 +177,7 @@ def apply_policy_to_family(self, family, policy): policy_choices += choice else: policy_choices += self.state_action_choices[state][action] - choices = stormpy.synthesis.policyToChoicesForFamily(policy_choices, family.selected_choices) + choices = payntbind.synthesis.policyToChoicesForFamily(policy_choices, family.selected_choices) mdp = self.build_from_choice_mask(choices) @@ -207,7 +207,7 @@ def assert_mdp_is_deterministic(self, mdp, family): def build_game_abstraction_solver(self, prop): target_label = prop.get_target_label() precision = paynt.verification.property.Property.model_checking_precision - solver = stormpy.synthesis.GameAbstractionSolver( + solver = payntbind.synthesis.GameAbstractionSolver( self.quotient_mdp, len(self.action_labels), self.choice_to_action, target_label, precision ) return solver diff --git a/paynt/quotient/pomdp.py b/paynt/quotient/pomdp.py index 7b9bd8a3e..19bbc15fb 100644 --- a/paynt/quotient/pomdp.py +++ b/paynt/quotient/pomdp.py @@ -1,6 +1,6 @@ import stormpy -import stormpy.synthesis import stormpy.pomdp +import payntbind import paynt.family.family import paynt.quotient.quotient @@ -112,9 +112,9 @@ def __init__(self, pomdp, specification, decpomdp_manager=None): # initialize POMDP manager if not self.posterior_aware: - self.pomdp_manager = stormpy.synthesis.PomdpManager(self.pomdp) + self.pomdp_manager = payntbind.synthesis.PomdpManager(self.pomdp) else: - self.pomdp_manager = stormpy.synthesis.PomdpManagerAposteriori(self.pomdp) + self.pomdp_manager = payntbind.synthesis.PomdpManagerAposteriori(self.pomdp) # do initial unfolding self.set_imperfect_memory_size(PomdpQuotient.initial_memory_size) @@ -349,7 +349,7 @@ def unfold_memory(self): "unfolding POMDP using the following memory allocation vector: {} ..." .format(self.observation_memory_size)) self.quotient_mdp = self.pomdp_manager.construct_mdp() - self.choice_destinations = stormpy.synthesis.computeChoiceDestinations(self.quotient_mdp) + self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(self.quotient_mdp) logger.debug(f"constructed quotient MDP having {self.quotient_mdp.nr_states} states and {self.quotient_mdp.nr_choices} actions.") if not PomdpQuotient.posterior_aware: @@ -357,7 +357,7 @@ def unfold_memory(self): else: family, choice_to_hole_options = self.create_coloring_aposteriori() - self.coloring = stormpy.synthesis.Coloring(family.family, self.quotient_mdp.nondeterministic_choice_indices, choice_to_hole_options) + self.coloring = payntbind.synthesis.Coloring(family.family, self.quotient_mdp.nondeterministic_choice_indices, choice_to_hole_options) self.state_to_holes = self.coloring.getStateToHoles().copy() # to each hole-option pair a list of actions colored by this combination diff --git a/paynt/quotient/pomdp_family.py b/paynt/quotient/pomdp_family.py index 0b4d28040..a3b5baaae 100644 --- a/paynt/quotient/pomdp_family.py +++ b/paynt/quotient/pomdp_family.py @@ -1,6 +1,5 @@ import stormpy -import stormpy.utility -import stormpy.synthesis +import payntbind import paynt.quotient.models import paynt.quotient.quotient @@ -171,11 +170,11 @@ def observation_is_trivial(self, obs): def initialize_fsc_unfolder(self, fsc_is_deterministic=False): - if fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, stormpy.synthesis.ProductPomdpFsc): - self.product_pomdp_fsc = stormpy.synthesis.ProductPomdpFsc( + if fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpFsc): + self.product_pomdp_fsc = payntbind.synthesis.ProductPomdpFsc( self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action) - if not fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, stormpy.synthesis.ProductPomdpRandomizedFsc): - self.product_pomdp_fsc = stormpy.synthesis.ProductPomdpRandomizedFsc( + if not fsc_is_deterministic and not isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpRandomizedFsc): + self.product_pomdp_fsc = payntbind.synthesis.ProductPomdpRandomizedFsc( self.quotient_mdp, self.state_to_observation, self.num_actions, self.choice_to_action) @@ -215,7 +214,7 @@ def build_dtmc_sketch(self, fsc): else: hole_options = [(hole,option) for hole,option in choice_to_hole_assignment[choice]] product_choice_to_hole_options.append(hole_options) - product_coloring = stormpy.synthesis.Coloring(product_family.family, product.nondeterministic_choice_indices, product_choice_to_hole_options) + product_coloring = payntbind.synthesis.Coloring(product_family.family, product.nondeterministic_choice_indices, product_choice_to_hole_options) # handle specification product_specification = self.specification.copy() @@ -232,7 +231,7 @@ def compute_qvalues_for_product_submdp(self, product_submdp : paynt.quotient.mod sketch was constructed afterwards :return a dictionary mapping (s,n,a) to Q((s,n),a) ''' - assert isinstance(self.product_pomdp_fsc, stormpy.synthesis.ProductPomdpRandomizedFsc), \ + assert isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpRandomizedFsc), \ "to compute Q-values, unfolder for randomized FSC must have been used" # model check @@ -289,7 +288,7 @@ def compute_witnessing_traces(self, dtmc_sketch, satisfying_assignment, num_trac :return a list of state-action pairs :note the method assumes that the DTMC sketch is the one that was last constructed using build_dtmc_sketch() ''' - fsc_is_randomized = isinstance(self.product_pomdp_fsc, stormpy.synthesis.ProductPomdpRandomizedFsc) + fsc_is_randomized = isinstance(self.product_pomdp_fsc, payntbind.synthesis.ProductPomdpRandomizedFsc) if fsc_is_randomized: # double the trace length to account for intermediate states trace_max_length *= 2 diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 08b5b506e..a5c35e85c 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -1,5 +1,5 @@ import stormpy -import stormpy.synthesis +import payntbind import paynt.family.family import paynt.quotient.models @@ -41,7 +41,7 @@ def __init__(self, quotient_mdp = None, family = None, coloring = None, specific # for each choice of the quotient, a list of its state-destinations self.choice_destinations = None if self.quotient_mdp is not None: - self.choice_destinations = stormpy.synthesis.computeChoiceDestinations(self.quotient_mdp) + self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(self.quotient_mdp) if coloring is not None: self.state_to_holes = coloring.getStateToHoles().copy() @@ -144,7 +144,7 @@ def keep_reachable_choices_of_scheduler(self, state_to_choice): return state_to_choice_reachable def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True): - state_to_quotient_choice = stormpy.synthesis.schedulerToStateToGlobalChoice(scheduler, mdp.model, mdp.quotient_choice_map) + state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, mdp.model, mdp.quotient_choice_map) state_to_choice = self.empty_scheduler() for state in range(mdp.model.nr_states): quotient_choice = state_to_quotient_choice[state] @@ -185,7 +185,7 @@ def choice_values(self, mdp, prop, state_values): ''' # multiply probability with model checking results - choice_values = stormpy.synthesis.multiply_with_vector(mdp.transition_matrix, state_values) + choice_values = payntbind.synthesis.multiply_with_vector(mdp.transition_matrix, state_values) choice_values = Quotient.make_vector_defined(choice_values) # if the associated reward model has state-action rewards, then these must be added to choice values @@ -235,7 +235,7 @@ def expected_visits(self, mdp, prop, choices): def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_values, expected_visits=None): if expected_visits is None: expected_visits = [1] * mdp.model.nr_states - hole_variance = stormpy.synthesis.computeInconsistentHoleVariance( + hole_variance = payntbind.synthesis.computeInconsistentHoleVariance( self.design_space.family, mdp.model.nondeterministic_choice_indices, mdp.quotient_choice_map, choice_values, self.coloring, inconsistent_assignments, expected_visits) return hole_variance diff --git a/paynt/quotient/storm_pomdp_control.py b/paynt/quotient/storm_pomdp_control.py index 4ea4d925c..a3f2e2710 100644 --- a/paynt/quotient/storm_pomdp_control.py +++ b/paynt/quotient/storm_pomdp_control.py @@ -1,6 +1,6 @@ import stormpy -import stormpy.synthesis import stormpy.pomdp +import payntbind from ..quotient.models import MarkovChain from ..utils.profiler import Timer diff --git a/paynt/simulation/pomcp.py b/paynt/simulation/pomcp.py index 67757cc0e..63614c059 100644 --- a/paynt/simulation/pomcp.py +++ b/paynt/simulation/pomcp.py @@ -1,4 +1,5 @@ import stormpy +import payntbind import paynt from paynt.utils.profiler import Timer @@ -422,7 +423,7 @@ def __init__(self, pomcp): self.last_synthesized = None # FIXME - self.subpomdp_builder = stormpy.synthesis.SubPomdpBuilder(self.pomdp, self.pomcp.reward_name, self.pomcp.target_label) + self.subpomdp_builder = payntbind.synthesis.SubPomdpBuilder(self.pomdp, self.pomcp.reward_name, self.pomcp.target_label) self.subpomdp_builder.set_discount_factor(self.discount_factor) self.fsc = None @@ -864,7 +865,7 @@ def __init__(self, quotient): )) if only_synthesis: - subpomdp_builder = stormpy.synthesis.SubPomdpBuilder(self.pomdp, self.reward_name, self.target_label) + subpomdp_builder = payntbind.synthesis.SubPomdpBuilder(self.pomdp, self.reward_name, self.target_label) subpomdp_builder.set_discount_factor(discount_factor) relevant_observations = stormpy.storage.BitVector(self.pomdp.nr_observations,True) initial_distribution = {self.simulated_model.initial_state : 1} diff --git a/paynt/simulation/simulation.py b/paynt/simulation/simulation.py index eea16e54d..3259c8765 100644 --- a/paynt/simulation/simulation.py +++ b/paynt/simulation/simulation.py @@ -1,5 +1,3 @@ -import stormpy.synthesis - import random import numpy import json diff --git a/paynt/synthesizer/conflict_generator/dtmc.py b/paynt/synthesizer/conflict_generator/dtmc.py index f9b31a7a9..350291945 100644 --- a/paynt/synthesizer/conflict_generator/dtmc.py +++ b/paynt/synthesizer/conflict_generator/dtmc.py @@ -1,4 +1,4 @@ -import stormpy.synthesis +import payntbind import logging logger = logging.getLogger(__name__) @@ -21,7 +21,7 @@ def initialize(self): holes = set([hole for hole in holes_bv]) state_to_holes.append(holes) formulae = self.quotient.specification.stormpy_formulae() - self.counterexample_generator = stormpy.synthesis.CounterexampleGenerator( + self.counterexample_generator = payntbind.synthesis.CounterexampleGenerator( self.quotient.quotient_mdp, self.quotient.design_space.num_holes, state_to_holes, formulae) diff --git a/paynt/synthesizer/conflict_generator/mdp.py b/paynt/synthesizer/conflict_generator/mdp.py index 025f421ca..b95a7947e 100644 --- a/paynt/synthesizer/conflict_generator/mdp.py +++ b/paynt/synthesizer/conflict_generator/mdp.py @@ -1,4 +1,4 @@ -import stormpy.synthesis +import payntbind import paynt.synthesizer.conflict_generator.dtmc import paynt.verification.property @@ -16,7 +16,7 @@ def initialize(self): holes = set([hole for hole in holes_bv]) state_to_holes.append(holes) formulae = self.quotient.specification.stormpy_formulae() - self.counterexample_generator = stormpy.synthesis.CounterexampleGeneratorMdp( + self.counterexample_generator = payntbind.synthesis.CounterexampleGeneratorMdp( self.quotient.quotient_mdp, self.quotient.design_space.num_holes, state_to_holes, formulae) diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index bb904ba26..6773938ca 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -1,4 +1,5 @@ -import stormpy.synthesis +import stormpy +import payntbind import paynt.family.family import paynt.quotient.models @@ -647,7 +648,7 @@ def create_action_coloring(self): for choice in self.quotient.state_action_choices[state][action]: choice_to_hole_options[choice] = color - coloring = stormpy.synthesis.Coloring(family.family, quotient_mdp.nondeterministic_choice_indices, choice_to_hole_options) + coloring = payntbind.synthesis.Coloring(family.family, quotient_mdp.nondeterministic_choice_indices, choice_to_hole_options) self.action_coloring_family = family self.action_coloring = coloring return @@ -927,7 +928,7 @@ def synthesize_policy_for_family_using_ceg(self, family, prop): quotient_choice = policy_quotient_mdp.quotient_choice_map[choice] choice_to_hole_options.append(quotient_assignment[quotient_choice]) - coloring = stormpy.synthesis.Coloring(family.family, policy_quotient_mdp.model.nondeterministic_choice_indices, choice_to_hole_options) + coloring = payntbind.synthesis.Coloring(family.family, policy_quotient_mdp.model.nondeterministic_choice_indices, choice_to_hole_options) quotient_container = paynt.quotient.quotient.DtmcFamilyQuotient(policy_quotient_mdp.model, family, coloring, self.quotient.specification.negate()) conflict_generator = paynt.synthesizer.conflict_generator.dtmc.ConflictGeneratorDtmc(quotient_container) # DTMC CE # conflict_generator = paynt.synthesizer.conflict_generator.mdp.ConflictGeneratorMdp(quotient_container) # MDP SAT CE diff --git a/paynt/synthesizer/synthesizer_cegis.py b/paynt/synthesizer/synthesizer_cegis.py index 9d7df2853..9ca9ff852 100644 --- a/paynt/synthesizer/synthesizer_cegis.py +++ b/paynt/synthesizer/synthesizer_cegis.py @@ -1,5 +1,3 @@ -import stormpy.synthesis - import paynt.synthesizer.synthesizer import paynt.synthesizer.conflict_generator.dtmc import paynt.synthesizer.conflict_generator.mdp diff --git a/paynt/verification/property.py b/paynt/verification/property.py index 0eaedb42f..14628d03a 100644 --- a/paynt/verification/property.py +++ b/paynt/verification/property.py @@ -1,5 +1,5 @@ import stormpy -import stormpy.synthesis +import payntbind import math import operator @@ -27,8 +27,8 @@ class Property: @classmethod def set_model_checking_precision(cls, precision): cls.model_checking_precision = precision - stormpy.synthesis.set_precision_native(cls.environment.solver_environment.native_solver_environment, precision) - stormpy.synthesis.set_precision_minmax(cls.environment.solver_environment.minmax_solver_environment, precision) + payntbind.synthesis.set_precision_native(cls.environment.solver_environment.native_solver_environment, precision) + payntbind.synthesis.set_precision_minmax(cls.environment.solver_environment.minmax_solver_environment, precision) @classmethod def initialize(cls): @@ -113,7 +113,7 @@ def transform_until_to_eventually(self): if not self.is_until: return logger.info("converting until formula to eventually...") - formula = stormpy.synthesis.transform_until_to_eventually(self.property.raw_formula) + formula = payntbind.synthesis.transform_until_to_eventually(self.property.raw_formula) prop = stormpy.core.Property("", formula) self.__init__(prop, self.discount_factor) @@ -231,7 +231,7 @@ def transform_until_to_eventually(self): if not self.is_until: return logger.info("converting until formula to eventually...") - formula = stormpy.synthesis.transform_until_to_eventually(self.property.raw_formula) + formula = payntbind.synthesis.transform_until_to_eventually(self.property.raw_formula) prop = stormpy.core.Property("", formula) self.__init__(prop, self.discount_factor, self.epsilon) diff --git a/payntbind/.dockerignore b/payntbind/.dockerignore new file mode 100644 index 000000000..9f1f9b8ab --- /dev/null +++ b/payntbind/.dockerignore @@ -0,0 +1,20 @@ +#Git files +.git +.github +.gitignore +#The remainder is based on .gitignore +**/*.so +**/*.py[cod] +lib/**/_config.py +**/.eggs/ +**/*.egg-info/ +**/build/ +**/dist/ +**/.idea/ +**/__pycache__/ +**/_build/ +**/.pytest_cache/ +**/.idea/ +**/cmake-build-debug/ + +**/.DS_Store diff --git a/payntbind/.gitignore b/payntbind/.gitignore new file mode 100644 index 000000000..b37085814 --- /dev/null +++ b/payntbind/.gitignore @@ -0,0 +1,15 @@ +*.so +*.py[cod] +lib/**/_config.py +.eggs/ +*.egg-info/ +build/ +dist/ +.idea/ +__pycache__/ +_build/ +.pytest_cache/ +.idea/ +cmake-build-debug/ + +.DS_Store diff --git a/payntbind/CMakeLists.txt b/payntbind/CMakeLists.txt new file mode 100644 index 000000000..fae8e16b3 --- /dev/null +++ b/payntbind/CMakeLists.txt @@ -0,0 +1,94 @@ +cmake_minimum_required(VERSION 3.15) + +project(payntbind) + +option(STORMPY_DISABLE_SIGNATURE_DOC "Disable the signature in the documentation" OFF) +MARK_AS_ADVANCED(STORMPY_DISABLE_SIGNATURE_DOC) +set(PYBIND_VERSION "" CACHE STRING "Pybind11 version to use") +MARK_AS_ADVANCED(PYBIND_VERSION) +set(STORM_DIR_HINT "" CACHE STRING "A hint where the Storm library can be found.") + +find_package(storm REQUIRED HINTS ${STORM_DIR_HINT}) + +find_package(Python COMPONENTS Interpreter Development REQUIRED) +include(resources/include_pybind11.cmake) + +include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/macros.cmake) + +set(CMAKE_CXX_STANDARD 17) + +# This sets interprocedural optimization off as this leads to some problems on some systems +set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF) +# This sets the default visibility from hidden to default, +# which is recommended *not* to do, but leads to errors otherwise. +set(CMAKE_CXX_VISIBILITY_PRESET "default") + + +# Workaround for issue with Boost >= 1.81 +find_package(Boost 1.65.1 QUIET REQUIRED COMPONENTS filesystem system) +if (Boost_FOUND) + if (${Boost_VERSION} VERSION_GREATER_EQUAL "1.81.0") + message(STATUS "Stormpy - Using workaround for Boost >= 1.81") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_PHOENIX_STL_TUPLE_H_") + endif() +endif () + + +# Set configurations +set(STORM_VERSION ${storm_VERSION}) +# Set number types from Carl +set_variable_string(STORM_USE_CLN_EA_BOOL ${STORM_USE_CLN_EA}) +set_variable_string(STORM_USE_CLN_RF_BOOL ${STORM_USE_CLN_RF}) +if (STORM_USE_CLN_EA) + set(PYCARL_EA_PACKAGE "cln") +else() + set(PYCARL_EA_PACKAGE "gmp") +endif() +if (STORM_USE_CLN_RF) + set(PYCARL_RF_PACKAGE "cln") +else() + set(PYCARL_RF_PACKAGE "gmp") +endif() +set(PYCARL_IMPORTS "import pycarl") +if (STORM_USE_CLN_EA OR STORM_USE_CLN_RF) + set(PYCARL_IMPORTS "${PYCARL_IMPORTS}\nimport pycarl.cln") +endif() +if (NOT STORM_USE_CLN_EA OR NOT STORM_USE_CLN_RF) + set(PYCARL_IMPORTS "${PYCARL_IMPORTS}\nimport pycarl.gmp") +endif() + +# Set dependency variables +set_dependency_var(SPOT) +set_dependency_var(XERCES) + +# Helper functions +function(payntbind_module NAME) + file(GLOB_RECURSE "STORM_${NAME}_SOURCES" "${CMAKE_CURRENT_SOURCE_DIR}/src/${NAME}/*.cpp") + pybind11_add_module(${NAME} "${CMAKE_CURRENT_SOURCE_DIR}/src/mod_${NAME}.cpp" ${STORM_${NAME}_SOURCES}) + target_include_directories(${NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${storm_INCLUDE_DIR} ${storm-parsers_INCLUDE_DIR} ${storm-counterexamples_INCLUDE_DIR} ${storm-version-info_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src) + target_link_libraries(${NAME} PRIVATE storm storm-parsers storm-counterexamples storm-version-info ) + if (NOT (${NAME} STREQUAL "core")) + set_target_properties(${NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${NAME}") + endif() +endfunction(payntbind_module) + +function(stormpy_optional_module NAME ADDITIONAL_LIBS ADDITIONAL_INCLUDES) + file(GLOB_RECURSE "STORM_${NAME}_SOURCES" "${CMAKE_CURRENT_SOURCE_DIR}/src/${NAME}/*.cpp") + pybind11_add_module(${NAME} "${CMAKE_CURRENT_SOURCE_DIR}/src/mod_${NAME}.cpp" ${STORM_${NAME}_SOURCES}) + target_include_directories(${NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${storm_INCLUDE_DIR} ${storm-parsers_INCLUDE_DIR} ${storm-counterexamples_INCLUDE_DIR} ${storm-version-info_INCLUDE_DIR} ${ADDITIONAL_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR}/src) + target_link_libraries(${NAME} PRIVATE storm storm-parsers storm-counterexamples storm-version-info ${ADDITIONAL_LIBS}) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/${NAME}_config.py.in ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${NAME}/_config.py @ONLY) + set_target_properties(${NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${NAME}") +endfunction(stormpy_optional_module) + +# Generate definitions used during compilation +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/src/config.h) + +payntbind_module(synthesis) +target_include_directories(synthesis + PRIVATE + src/synthesis/decpomdp/madp/src/base/ + src/synthesis/decpomdp/madp/src/include/ + src/synthesis/decpomdp/madp/src/parser/ +) +target_link_libraries(synthesis PUBLIC storm storm-pomdp) diff --git a/payntbind/README.md b/payntbind/README.md new file mode 100644 index 000000000..718d40bba --- /dev/null +++ b/payntbind/README.md @@ -0,0 +1,25 @@ +Stormpy - Python bindings for Storm +=================================== + +[![Build Status](https://github.com/moves-rwth/stormpy/workflows/Build%20Test/badge.svg)](https://github.com/moves-rwth/stormpy/actions) +[![GitHub release](https://img.shields.io/github/release/moves-rwth/stormpy.svg)](https://github.com/moves-rwth/stormpy/releases/) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7763208.svg)](https://doi.org/10.5281/zenodo.7763208) + +Python bindings for [Storm](https://www.stormchecker.org/), created using [PyBind11](https://pybind11.readthedocs.io/). + +### Get started +Get started with stormpy directly in the browser via JupyterLab: + +[![badge](https://img.shields.io/badge/try%20out-stormpy-579ACA.svg?logo=)](https://mybinder.org/v2/gh/moves-rwth/stormpy/master?urlpath=lab/tree/notebooks/getting_started.ipynb) + +For more information on stormpy, please check the [documentation](https://moves-rwth.github.io/stormpy/). + + +### Dependencies + +Stormpy depends on [pycarl](https://github.com/moves-rwth/pycarl/). + +### Authors + +- Sebastian Junges +- Matthias Volk diff --git a/payntbind/cmake/CMakeLists.txt b/payntbind/cmake/CMakeLists.txt new file mode 100644 index 000000000..6ceeeef12 --- /dev/null +++ b/payntbind/cmake/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.15) + +project(storm-version) +set(STORM_DIR_HINT "" CACHE STRING "A hint where the Storm library can be found.") + +find_package(storm REQUIRED HINTS ${STORM_DIR_HINT}) + +include(${CMAKE_CURRENT_SOURCE_DIR}/macros.cmake) + + +# Set configuration +set(STORM_DIR ${storm_DIR}) +set(STORM_VERSION ${storm_VERSION}) +set(STORM_LIBS ${storm_LIBRARIES}) + + +# Check for optional Storm libraries +storm_with_lib(DFT) +storm_with_lib(GSPN) +storm_with_lib(PARS) +storm_with_lib(POMDP) + + +# Set variables +set_variable_string(STORM_CLN_EA_BOOL ${STORM_USE_CLN_EA}) +set_variable_string(STORM_CLN_RF_BOOL ${STORM_USE_CLN_RF}) + +set_variable_string(STORM_XERCES_BOOL ${STORM_HAVE_XERCES}) +set_variable_string(STORM_SPOT_BOOL ${STORM_HAVE_SPOT}) + +set_variable_string(HAVE_STORM_DFT_BOOL ${HAVE_STORM_DFT}) +set_variable_string(HAVE_STORM_GSPN_BOOL ${HAVE_STORM_GSPN}) +set_variable_string(HAVE_STORM_PARS_BOOL ${HAVE_STORM_PARS}) +set_variable_string(HAVE_STORM_POMDP_BOOL ${HAVE_STORM_POMDP}) + + +# Write configuration file +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.py.in ${CMAKE_CURRENT_BINARY_DIR}/generated/config.py @ONLY) diff --git a/payntbind/cmake/config.py.in b/payntbind/cmake/config.py.in new file mode 100644 index 000000000..1ea28fc2c --- /dev/null +++ b/payntbind/cmake/config.py.in @@ -0,0 +1,12 @@ +# Auto-generated by CMake. + +STORM_DIR = "@STORM_DIR@" +STORM_VERSION = "@STORM_VERSION@" +STORM_CLN_EA = @STORM_CLN_EA_BOOL@ +STORM_CLN_RF = @STORM_CLN_RF_BOOL@ +STORM_XERCES = @STORM_XERCES_BOOL@ +STORM_SPOT = @STORM_SPOT_BOOL@ +HAVE_STORM_DFT = @HAVE_STORM_DFT_BOOL@ +HAVE_STORM_GSPN = @HAVE_STORM_GSPN_BOOL@ +HAVE_STORM_PARS = @HAVE_STORM_PARS_BOOL@ +HAVE_STORM_POMDP = @HAVE_STORM_POMDP_BOOL@ diff --git a/payntbind/cmake/macros.cmake b/payntbind/cmake/macros.cmake new file mode 100644 index 000000000..53636816b --- /dev/null +++ b/payntbind/cmake/macros.cmake @@ -0,0 +1,45 @@ +# Set variable to strings "True"/"False" according to condition +function(set_variable_string NAME CONDITION) + if(CONDITION) + set(${NAME} "True" PARENT_SCOPE) + else() + set(${NAME} "False" PARENT_SCOPE) + endif() +endfunction(set_variable_string) + +# Check whether Storm library is available +# Sets variable HAVE_STORM_XYZ +function(storm_with_lib NAME) + string(TOLOWER ${NAME} NAME_LOWER) + find_library(STORM_${NAME} NAMES storm-${NAME_LOWER} HINTS "${storm_DIR}/lib/") + if(STORM_${NAME}) + set(HAVE_STORM_${NAME} TRUE PARENT_SCOPE) + else() + set(HAVE_STORM_${NAME} FALSE PARENT_SCOPE) + endif() +endfunction(storm_with_lib) + +# Note that the following functions cannot simply call set_variable_string because the scope would change. +# Using 'macro' instead of 'function' is also not viable because arguments are then not handled as variables any more. + +# Set variable for Storm dependencies +# which can be checked with STORM_HAVE_XYZ +# Sets variable STORM_WITH_XYZ_BOOL +function(set_dependency_var NAME) + if (STORM_HAVE_${NAME}) + set(STORM_WITH_${NAME}_BOOL "True" PARENT_SCOPE) + else() + set(STORM_WITH_${NAME}_BOOL "False" PARENT_SCOPE) + endif() +endfunction(set_dependency_var) + +# Set variable for optional Storm libraries (if used) +# which can be checked with HAVE_STORM_XYZ and optiona USE_STORM_XYZ +# Sets variable STORM_WITH_XYZ_BOOL +function(set_optional_lib_var NAME) + if ((USE_STORM_${NAME}) AND (HAVE_STORM_${NAME})) + set(STORM_WITH_${NAME}_BOOL "True" PARENT_SCOPE) + else() + set(STORM_WITH_${NAME}_BOOL "False" PARENT_SCOPE) + endif() +endfunction(set_optional_lib_var) diff --git a/payntbind/lib/payntbind/__init__.py b/payntbind/lib/payntbind/__init__.py new file mode 100644 index 000000000..fb2aef545 --- /dev/null +++ b/payntbind/lib/payntbind/__init__.py @@ -0,0 +1,13 @@ +import sys + +if sys.version_info[0] == 2: + raise ImportError('Python 2.x is not supported for stormpy.') + +from .synthesis import * + +__version__ = "unknown" +try: + from ._version import __version__ +except ImportError: + # We're running in a tree that doesn't have a _version.py, so we don't know what our version is. + pass diff --git a/payntbind/lib/payntbind/_version.py b/payntbind/lib/payntbind/_version.py new file mode 100644 index 000000000..3dc1f76bc --- /dev/null +++ b/payntbind/lib/payntbind/_version.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/payntbind/lib/payntbind/synthesis/__init__.py b/payntbind/lib/payntbind/synthesis/__init__.py new file mode 100644 index 000000000..4010b85b1 --- /dev/null +++ b/payntbind/lib/payntbind/synthesis/__init__.py @@ -0,0 +1,2 @@ +from . import synthesis +from .synthesis import * diff --git a/payntbind/resources/include_pybind11.cmake b/payntbind/resources/include_pybind11.cmake new file mode 100644 index 000000000..56099e9c0 --- /dev/null +++ b/payntbind/resources/include_pybind11.cmake @@ -0,0 +1,17 @@ +#find_package(pybind11 ${PYBIND_VERSION} CONFIG QUIET) +#if(pybind11_FOUND) +# message(STATUS "Stormpy - Using system version of pybind11 (version ${PYBIND_VERSION}).") +#else() + message(STATUS "Stormpy - Using shipped version of pybind11 (version ${PYBIND_VERSION}).") + + include(FetchContent) + FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11 + GIT_TAG "v${PYBIND_VERSION}" + PATCH_COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/resources/pybind11_${PYBIND_VERSION}.patch || git apply ${CMAKE_CURRENT_SOURCE_DIR}/resources/pybind11_${PYBIND_VERSION}.patch --reverse --check + ) + + FetchContent_MakeAvailable(pybind11) +#endif() + diff --git a/payntbind/resources/pybind11_2.10.0.patch b/payntbind/resources/pybind11_2.10.0.patch new file mode 100644 index 000000000..547aad6f8 --- /dev/null +++ b/payntbind/resources/pybind11_2.10.0.patch @@ -0,0 +1,22 @@ +diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h +index a0e32281..f132d096 100644 +--- a/include/pybind11/cast.h ++++ b/include/pybind11/cast.h +@@ -657,7 +657,7 @@ public: + } + + static constexpr auto name +- = const_name("Tuple[") + concat(make_caster::name...) + const_name("]"); ++ = const_name("Tuple[") + pybind11::detail::concat(make_caster::name...) + const_name("]"); + + template + using cast_op_type = type; +@@ -1399,7 +1399,7 @@ public: + static_assert(args_pos == -1 || args_pos == constexpr_first(), + "py::args cannot be specified more than once"); + +- static constexpr auto arg_names = concat(type_descr(make_caster::name)...); ++ static constexpr auto arg_names = pybind11::detail::concat(type_descr(make_caster::name)...); + + bool load_args(function_call &call) { return load_impl_sequence(call, indices{}); } + diff --git a/payntbind/resources/pybind11_2.11.1.patch b/payntbind/resources/pybind11_2.11.1.patch new file mode 100644 index 000000000..a7f01313e --- /dev/null +++ b/payntbind/resources/pybind11_2.11.1.patch @@ -0,0 +1,22 @@ +diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h +index db393411..117680a1 100644 +--- a/include/pybind11/cast.h ++++ b/include/pybind11/cast.h +@@ -661,7 +661,7 @@ public: + } + + static constexpr auto name +- = const_name("Tuple[") + concat(make_caster::name...) + const_name("]"); ++ = const_name("Tuple[") + pybind11::detail::concat(make_caster::name...) + const_name("]"); + + template + using cast_op_type = type; +@@ -1438,7 +1438,7 @@ public: + static_assert(args_pos == -1 || args_pos == constexpr_first(), + "py::args cannot be specified more than once"); + +- static constexpr auto arg_names = concat(type_descr(make_caster::name)...); ++ static constexpr auto arg_names = pybind11::detail::concat(type_descr(make_caster::name)...); + + bool load_args(function_call &call) { return load_impl_sequence(call, indices{}); } + diff --git a/payntbind/setup.cfg b/payntbind/setup.cfg new file mode 100644 index 000000000..98f90431b --- /dev/null +++ b/payntbind/setup.cfg @@ -0,0 +1,8 @@ +[aliases] +test=pytest + +[tool:pytest] +addopts = --doctest-glob='*.rst' --nbval +testpaths = tests/ examples/ doc/ +python_files = test*.py examples/*.py +python_functions = *_test test_* example_* diff --git a/payntbind/setup.py b/payntbind/setup.py new file mode 100755 index 000000000..40dae6cfb --- /dev/null +++ b/payntbind/setup.py @@ -0,0 +1,185 @@ +import os +import sys +import subprocess +import datetime + +from setuptools import setup, Extension, find_packages +from setuptools.command.build_ext import build_ext + +import setup.helper as setup_helper +from setup.config import SetupConfig + +if sys.version_info[0] == 2: + sys.exit('Sorry, Python 2.x is not supported') + +# Minimal storm version required +storm_min_version = "1.8.2" + +# Get the long description from the README file +with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.md'), encoding='utf-8') as f: + long_description = f.read() + + +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=''): + print(name) + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + + +class CMakeBuild(build_ext): + user_options = build_ext.user_options + [ + ('storm-dir=', None, 'Path to storm root (binary) location'), + ('debug', None, 'Build in Debug mode'), + ('jobs=', 'j', 'Number of jobs to use for compiling'), + ('pybind-version=', None, 'Pybind11 version to use'), + ] + + config = SetupConfig() + + def _extdir(self, extname): + return os.path.abspath(os.path.dirname(self.get_ext_fullpath(extname))) + + def run(self): + try: + _ = subprocess.check_output(['cmake', '--version']) + except OSError: + raise RuntimeError("CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions)) + + # Build cmake version info + print("Stormpy - Building into {}".format(self.build_temp)) + build_temp_version = self.build_temp + "-version" + setup_helper.ensure_dir_exists(build_temp_version) + + # Write config + setup_helper.ensure_dir_exists(self.build_temp) + self.config.write_config(os.path.join(self.build_temp, "build_config.cfg")) + + cmake_args = [] + storm_dir = os.path.expanduser(self.config.get_as_string("storm_dir")) + if storm_dir: + cmake_args += ['-DSTORM_DIR_HINT=' + storm_dir] + _ = subprocess.check_output(['cmake', os.path.abspath("cmake")] + cmake_args, cwd=build_temp_version) + cmake_conf = setup_helper.load_cmake_config(os.path.join(build_temp_version, 'generated/config.py')) + + # Set storm directory + if storm_dir == "": + storm_dir = cmake_conf.STORM_DIR + if storm_dir != cmake_conf.STORM_DIR: + print("Stormpy - WARNING: Using different storm directory {} instead of given {}!".format( + cmake_conf.STORM_DIR, + storm_dir)) + storm_dir = cmake_conf.STORM_DIR + + # Check version + from packaging.version import Version # Need to import here because otherwise packaging cannot be automatically installed as required dependency + storm_version, storm_commit = setup_helper.parse_storm_version(cmake_conf.STORM_VERSION) + if Version(storm_version) < Version(storm_min_version): + print('Stormpy - Error: Storm version {} from \'{}\' is not supported anymore!'.format(storm_version, storm_dir)) + print(" For more information, see https://moves-rwth.github.io/payntbind/installation.html#compatibility-of-stormpy-and-storm") + sys.exit(42) # Custom exit code which can be used for incompatible checks + + # Set pybind version + from pycarl._config import PYBIND_VERSION as pycarl_pybind_version + pybind_version = self.config.get_as_string("pybind_version") + if pybind_version == "": + pybind_version = pycarl_pybind_version + elif Version(pybind_version) != Version(pycarl_pybind_version): + print("Stormpy - WARNING: Given pybind11 version {} differs from pycarl pybind11 version {}!".format(pybind_version, pycarl_pybind_version)) + + # Print build info + print("Stormpy - Using storm {} from {}".format(storm_version, storm_dir)) + print("Stormpy - Using pybind11 version {}".format(pybind_version)) + + build_type = 'Debug' if self.config.get_as_bool("debug") else 'Release' + # Set cmake build options + cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + self._extdir("core")] + cmake_args += ['-DPython_EXECUTABLE=' + sys.executable] + cmake_args += ['-DCMAKE_BUILD_TYPE=' + build_type] + cmake_args += ['-DPYBIND_VERSION=' + pybind_version] + if storm_dir is not None: + cmake_args += ['-DSTORM_DIR_HINT=' + storm_dir] + + # Configure extensions + env = os.environ.copy() + env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), self.distribution.get_version()) + setup_helper.ensure_dir_exists(self.build_temp) + print("Stormpy - CMake args={}".format(cmake_args)) + # Call cmake + subprocess.check_call(['cmake', os.path.abspath("")] + cmake_args, cwd=self.build_temp, env=env) + + # Set build args + build_args = ['--config', build_type] + build_args += ['--', '-j{}'.format(self.config.get_as_int("jobs"))] + + # Build extensions + for ext in self.extensions: + # Call make + subprocess.check_call(['cmake', '--build', '.', '--target', ext.name] + build_args, cwd=self.build_temp) + + def initialize_options(self): + build_ext.initialize_options(self) + # Set default values for custom cmdline flags + self.storm_dir = None + self.debug = None + self.jobs = None + self.pybind_version = None + + def finalize_options(self): + build_ext.finalize_options(self) + # Load setup config + # This can only be done after the finalization step, because otherwise build_temp is not initialized yet. + self.config.load_from_file(os.path.join(self.build_temp, "build_config.cfg")) + # Update setup config + self.config.update("storm_dir", self.storm_dir) + self.config.update("debug", self.debug) + self.config.update("jobs", self.jobs) + self.config.update("pybind_version", self.pybind_version) + + +setup( + name="payntbind", + version=setup_helper.obtain_version(), + author="M. Volk", + author_email="matthias.volk@cs.rwth-aachen.de", + maintainer="S. Junges", + maintainer_email="sebastian.junges@cs.rwth-aachen.de", + url="https://github.com/moves-rwth/stormpy/", + description="stormpy - Python Bindings for Storm", + long_description=long_description, + long_description_content_type='text/markdown', + project_urls={ + 'Documentation': 'https://moves-rwth.github.io/stormpy/', + 'Source': 'https://github.com/moves-rwth/stormpy/', + 'Bug reports': 'https://github.com/moves-rwth/stormpy/issues', + }, + classifiers=[ + 'Intended Audience :: Science/Research', + 'Topic :: Scientific/Engineering', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + + packages=find_packages('lib'), + package_dir={'': 'lib'}, + include_package_data=True, + package_data={'stormpy.examples': ['examples/files/*']}, + ext_package='payntbind', + ext_modules=[ + CMakeExtension('synthesis') + ], + cmdclass={'build_ext': CMakeBuild}, + zip_safe=False, + install_requires=['pycarl>=2.2.0'], + setup_requires=['pycarl>=2.2.0', # required to check pybind version used for pycarl + 'pytest-runner', + 'packaging' + ], + tests_require=['pytest', 'nbval', 'numpy'], + extras_require={ + "numpy": ["numpy"], + "plot": ["matplotlib","numpy","scipy"], + "doc": ["Sphinx", "sphinx-bootstrap-theme", "nbsphinx", "ipython", "ipykernel"], # also requires pandoc to be installed + }, + python_requires='>=3.7', # required by packaging +) diff --git a/payntbind/setup/__init__.py b/payntbind/setup/__init__.py new file mode 100644 index 000000000..e17edbb67 --- /dev/null +++ b/payntbind/setup/__init__.py @@ -0,0 +1 @@ +# Intentionally left empty \ No newline at end of file diff --git a/payntbind/setup/config.py b/payntbind/setup/config.py new file mode 100644 index 000000000..80896b1dc --- /dev/null +++ b/payntbind/setup/config.py @@ -0,0 +1,89 @@ +import configparser +import os +import multiprocessing + + +class SetupConfig: + """ + Configuration for setup. + """ + + def __init__(self): + """ + Create config with default values + """ + self.config = configparser.ConfigParser() + self.config["build_ext"] = self._default_values() + + @staticmethod + def _default_values(): + """ + Return default values for config. + + :return: Dict with default values for build settings. + """ + try: + no_jobs = multiprocessing.cpu_count() if multiprocessing.cpu_count() is not None else 1 + except NotImplementedError: + no_jobs = 1 + return { + "storm_dir": "", + "debug": False, + "jobs": str(no_jobs), + "pybind_version": "" + } + + def load_from_file(self, path): + """ + Load config from file. + :param path Path to config file. + """ + if os.path.isfile(path): + self.config.read(path) + if not self.config.has_section("build_ext"): + self.config["build_ext"] = self._default_values() + + def write_config(self, path): + """ + Save config with build settings. + :param path Path to config file. + """ + with open(path, 'w') as configfile: + self.config.write(configfile) + + def get_as_bool(self, name): + """ + Get the boolean value for option name. + :param name: Name of option. + + :return Value as bool. + """ + return self.config.getboolean("build_ext", name) + + def get_as_int(self, name): + """ + Get the int value for option name. + :param name: Name of option. + + :return Value as integer. + """ + return self.config.getint("build_ext", name) + + def get_as_string(self, name): + """ + Get the string value for option name. + :param name: Name of option. + + :return Value as string. + """ + return self.config.get("build_ext", name) + + def update(self, name, value): + """ + Update name with given value if value is not None. + :param name: Name of option. + :param value: New value or None + """ + if value is not None: + assert self.config.has_option("build_ext", name) + self.config.set("build_ext", name, str(value)) diff --git a/payntbind/setup/helper.py b/payntbind/setup/helper.py new file mode 100755 index 000000000..8f81de895 --- /dev/null +++ b/payntbind/setup/helper.py @@ -0,0 +1,74 @@ +import importlib +import os +import re +import sys + + +def ensure_dir_exists(path): + """ + Check whether the directory exists and creates it if not. + """ + assert path is not None + try: + os.makedirs(path) + except FileExistsError: + pass + except OSError as exception: + if exception.errno != errno.EEXIST: + raise IOError("Cannot create directory: " + path) + except BaseException: + raise IOError("Path " + path + " seems not valid") + + +def parse_storm_version(version_string): + """ + Parses the version of storm. + + :param version_string: String containing version information. + :return: Tuple (version, commit) + """ + split = version_string.split('-') + version = split[0] + commit = "" + if len(split) > 1: + commit = split[1] + return version, commit + + +def obtain_version(): + """ + Obtains the version as specified in stormpy. + + :return: Version + """ + verstr = "unknown" + try: + verstrline = open('lib/payntbind/_version.py', "rt").read() + except EnvironmentError: + pass # Okay, there is no version file. + else: + VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" + mo = re.search(VSRE, verstrline, re.M) + if mo: + verstr = mo.group(1) + else: + raise RuntimeError("unable to find version in payntbind/_version.py") + return verstr + + +def load_cmake_config(path): + """ + Load cmake config. + :param path: Path. + :return: Configuration. + """ + if sys.version_info[1] >= 5: + # Method for Python >= 3.5 + spec = importlib.util.spec_from_file_location("genconfig", path) + conf = importlib.util.module_from_spec(spec) + spec.loader.exec_module(conf) + return conf + else: + # Deprecated method for Python <= 3.4 + from importlib.machinery import SourceFileLoader + return SourceFileLoader("genconfig", path).load_module() diff --git a/payntbind/src/common.h b/payntbind/src/common.h new file mode 100644 index 000000000..7fbfb275e --- /dev/null +++ b/payntbind/src/common.h @@ -0,0 +1,22 @@ +#pragma once + +#include "config.h" + +#include +#include +#include +#include + +namespace py = pybind11; +using namespace pybind11::literals; + +#if PY_MAJOR_VERSION >= 3 +#define PY_DIV "__truediv__" +#define PY_RDIV "__rtruediv__" +#else +#define PY_DIV "__div__" +#define PY_RDIV "__rdiv__" +#endif + +PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr) +PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr) diff --git a/payntbind/src/config.h.in b/payntbind/src/config.h.in new file mode 100644 index 000000000..02c4716f4 --- /dev/null +++ b/payntbind/src/config.h.in @@ -0,0 +1 @@ +#cmakedefine STORMPY_DISABLE_SIGNATURE_DOC diff --git a/payntbind/src/helpers.h b/payntbind/src/helpers.h new file mode 100644 index 000000000..0635ef649 --- /dev/null +++ b/payntbind/src/helpers.h @@ -0,0 +1,40 @@ +/* + * helpers.h + * + * Created on: 16 Apr 2016 + * Author: harold + */ + +#ifndef PYTHON_HELPERS_H_ +#define PYTHON_HELPERS_H_ + +#include +#include + +/** + * Helper function to get a string out of the stream operator. + * Used for __str__ functions. + */ +template +std::string streamToString(T const& t) { + std::stringstream ss; + ss << t; + return ss.str(); +} + +template +std::string containerToString(T& t) { + // is there a way to make ^this const&? + // I guess not all containers have const iterators + std::stringstream ss; + for (auto const& e : t) { + ss << e << ", "; + } + return ss.str(); +} + +// Be warned: Enabling something like this will break everything about Monomial, +// as to Python the shared_ptr (Arg) IS the Monomial +// //PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr); + +#endif /* PYTHON_HELPERS_H_ */ diff --git a/payntbind/src/mod_synthesis.cpp b/payntbind/src/mod_synthesis.cpp new file mode 100644 index 000000000..206bc3ab2 --- /dev/null +++ b/payntbind/src/mod_synthesis.cpp @@ -0,0 +1,14 @@ +#include "common.h" + +#include "synthesis/synthesis.h" + +PYBIND11_MODULE(synthesis, m) { + m.doc() = "Synthesis extension of Stormpy."; + +#ifdef STORMPY_DISABLE_SIGNATURE_DOC + py::options options; + options.disable_function_signatures(); +#endif + + define_synthesis(m); +} diff --git a/payntbind/src/synthesis/counterexamples/Counterexample.cpp b/payntbind/src/synthesis/counterexamples/Counterexample.cpp new file mode 100644 index 000000000..b7ec7304c --- /dev/null +++ b/payntbind/src/synthesis/counterexamples/Counterexample.cpp @@ -0,0 +1,501 @@ +#include "Counterexample.h" + +#include "storm/storage/BitVector.h" +#include "storm/exceptions/UnexpectedException.h" + +#include "storm/storage/sparse/JaniChoiceOrigins.h" +#include "storm/storage/sparse/StateValuations.h" + +#include "storm/utility/builder.h" +#include "storm/storage/SparseMatrix.h" +#include "storm/storage/sparse/ModelComponents.h" +#include "storm/models/sparse/StateLabeling.h" + +#include "storm/solver/OptimizationDirection.h" + +#include "storm/api/verification.h" +#include "storm/logic/Bound.h" +#include "storm/modelchecker/CheckTask.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.h" + +#include "storm/environment/Environment.h" +#include "storm/environment/solver/SolverEnvironment.h" + +#include + +namespace synthesis { + + template + std::shared_ptr CounterexampleGenerator::labelStates( + storm::models::sparse::Mdp const& mdp, + storm::logic::Formula const& label + ) { + std::shared_ptr> mdp_shared = std::make_shared>(mdp); + bool onlyInitialStatesRelevant = false; + storm::modelchecker::CheckTask task(label, onlyInitialStatesRelevant); + std::unique_ptr result_ptr = storm::api::verifyWithSparseEngine(mdp_shared, task); + std::shared_ptr mdp_target = std::make_shared(result_ptr->asExplicitQualitativeCheckResult()); + return mdp_target; + } + + template + CounterexampleGenerator::CounterexampleGenerator ( + storm::models::sparse::Mdp const& quotient_mdp, + uint64_t hole_count, + std::vector> const& mdp_holes, + std::vector> const& formulae + ) : quotient_mdp(quotient_mdp), hole_count(hole_count), mdp_holes(mdp_holes) { + + // create label formulae for our own labels + std::shared_ptr const& target_label_formula = std::make_shared(this->target_label); + std::shared_ptr const& until_label_formula = std::make_shared(this->until_label); + + // process all formulae + for(auto formula: formulae) { + + // store formula type and optimality type + assert(formula->isOperatorFormula()); + storm::logic::OperatorFormula const& of = formula->asOperatorFormula(); + + assert(of.hasOptimalityType()); + storm::solver::OptimizationDirection ot = of.getOptimalityType(); + bool is_safety = ot == storm::solver::OptimizationDirection::Minimize; + this->formula_safety.push_back(is_safety); + + bool is_reward = formula->isRewardOperatorFormula(); + this->formula_reward.push_back(is_reward); + if(!is_reward) { + this->formula_reward_name.push_back(""); + } else { + STORM_LOG_THROW(formula->asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::InvalidArgumentException, "Name of the reward model must be specified."); + this->formula_reward_name.push_back(formula->asRewardOperatorFormula().getRewardModelName()); + } + + // extract predicate for until and target states and identify such states + storm::logic::Formula const& osf = of.getSubformula(); + if(!osf.isUntilFormula() && !osf.isEventuallyFormula()) { + throw storm::exceptions::NotImplementedException() << "Only until or reachability formulae supported."; + } + + std::shared_ptr modified_subformula; + if(osf.isUntilFormula()) { + storm::logic::UntilFormula const& uf = osf.asUntilFormula(); + + auto mdp_until = this->labelStates(this->quotient_mdp,uf.getLeftSubformula()); + this->mdp_untils.push_back(mdp_until); + + auto mdp_target = this->labelStates(this->quotient_mdp, uf.getRightSubformula()); + this->mdp_targets.push_back(mdp_target); + + modified_subformula = std::make_shared(until_label_formula, target_label_formula); + } else if(osf.isEventuallyFormula()) { + storm::logic::EventuallyFormula const& ef = osf.asEventuallyFormula(); + + this->mdp_untils.push_back(NULL); + + auto mdp_target = this->labelStates(this->quotient_mdp,ef.getSubformula()); + this->mdp_targets.push_back(mdp_target); + + modified_subformula = std::make_shared(target_label_formula, ef.getContext()); + } + + // integrate formula into original context + std::shared_ptr modified_formula; + if(!is_reward) { + modified_formula = std::make_shared(modified_subformula, of.getOperatorInformation()); + } else { + modified_formula = std::make_shared(modified_subformula, this->formula_reward_name.back(), of.getOperatorInformation()); + } + this->formula_modified.push_back(modified_formula); + } + } + + template + void CounterexampleGenerator::prepareDtmc( + storm::models::sparse::Dtmc const& dtmc, + std::vector const& state_map + ) { + + // Clear up previous DTMC metadata + this->hole_wave.clear(); + this->wave_states.clear(); + + // Get DTMC info + this->dtmc = std::make_shared>(dtmc); + this->state_map = state_map; + uint64_t dtmc_states = this->dtmc->getNumberOfStates(); + StateType initial_state = *(this->dtmc->getInitialStates().begin()); + storm::storage::SparseMatrix const& transition_matrix = this->dtmc->getTransitionMatrix(); + + // Mark all holes as unregistered + this->hole_wave.resize(this->hole_count,0); + + // Associate states of a DTMC with relevant holes and store their count + std::vector> dtmc_holes(dtmc_states); + std::vector unregistered_holes_count(dtmc_states, 0); + for(StateType state = 0; state < dtmc_states; state++) { + dtmc_holes[state] = this->mdp_holes[state_map[state]]; + unregistered_holes_count[state] = dtmc_holes[state].size(); + } + + // Prepare to explore + // wave increases by one when new holes of a blocking candidate are registered + uint64_t current_wave = 0; + // true if the state was reached during exploration (expanded states + both horizons) + storm::storage::BitVector reachable_flag(dtmc_states, false); + // non-blocking horizon + std::stack state_horizon; + // horizon containing, for a current wave, only blocking states + std::vector state_horizon_blocking; + // blocking state containing currently the least number of unregistered holes + flag if the value was set + bool blocking_candidate_set = false; + StateType blocking_candidate; + + // Round 0: encounter initial state first (important) + this->wave_states.push_back(std::vector()); + reachable_flag.set(initial_state); + if(unregistered_holes_count[initial_state] == 0) { + // non-blocking + state_horizon.push(initial_state); + } else { + // blocking + state_horizon_blocking.push_back(initial_state); + blocking_candidate_set = true; + blocking_candidate = initial_state; + } + + // Explore the state space + while(true) { + // Expand the non-blocking horizon + while(!state_horizon.empty()) { + StateType state = state_horizon.top(); + state_horizon.pop(); + this->wave_states.back().push_back(state); + + // Reach successors + for(auto entry: transition_matrix.getRow(state)) { + StateType successor = entry.getColumn(); + if(reachable_flag[successor]) { + // already reached + continue; + } + // new state reached + reachable_flag.set(successor); + if(unregistered_holes_count[successor] == 0) { + // non-blocking + state_horizon.push(successor); + } else { + // blocking + state_horizon_blocking.push_back(successor); + if(!blocking_candidate_set || unregistered_holes_count[successor] < unregistered_holes_count[blocking_candidate]) { + // new blocking candidate + blocking_candidate_set = true; + blocking_candidate = successor; + } + } + } + } + + // Non-blocking horizon exhausted + if(!blocking_candidate_set) { + // Nothing more to expand + break; + } + + // Start a new wave + current_wave++; + this->wave_states.push_back(std::vector()); + blocking_candidate_set = false; + + // Register all unregistered holes of this blocking state + for(uint64_t hole: dtmc_holes[blocking_candidate]) { + if(this->hole_wave[hole] == 0) { + hole_wave[hole] = current_wave; + // std::cout << "[storm] hole " << hole << " expanded in wave " << current_wave << std::endl; + } + } + + // Recompute number of unregistered holes in each state + for(StateType state = 0; state < dtmc_states; state++) { + unregistered_holes_count[state] = 0; + for(uint64_t hole: dtmc_holes[state]) { + if(this->hole_wave[hole] == 0) { + unregistered_holes_count[state]++; + } + } + } + + // Unblock the states from the blocking horizon + std::vector old_blocking_horizon; + old_blocking_horizon.swap(state_horizon_blocking); + for(StateType state: old_blocking_horizon) { + if(unregistered_holes_count[state] == 0) { + // state unblocked + state_horizon.push(state); + } else { + // still blocking + state_horizon_blocking.push_back(state); + if(!blocking_candidate_set || unregistered_holes_count[state] < unregistered_holes_count[blocking_candidate]) { + // new blocking candidate + blocking_candidate_set = true; + blocking_candidate = state; + } + } + } + } + } + + template + void CounterexampleGenerator::prepareSubdtmc ( + uint64_t formula_index, + std::shared_ptr const> mdp_bounds, + std::vector const& mdp_quotient_state_map, + std::vector>> & matrix_subdtmc, + storm::models::sparse::StateLabeling & labeling_subdtmc, + std::unordered_map> & reward_models_subdtmc + ) { + + // Get DTMC info + StateType dtmc_states = dtmc->getNumberOfStates(); + + // Introduce expanded state space + uint64_t sink_state_false = dtmc_states; + uint64_t sink_state_true = dtmc_states+1; + + // Label target states of a DTMC + std::shared_ptr mdp_target = this->mdp_targets[formula_index]; + std::shared_ptr mdp_until = this->mdp_untils[formula_index]; + labeling_subdtmc.addLabel(this->target_label); + labeling_subdtmc.addLabel(this->until_label); + for(StateType state = 0; state < dtmc_states; state++) { + StateType mdp_state = this->state_map[state]; + if((*mdp_target)[mdp_state]) { + labeling_subdtmc.addLabelToState(this->target_label, state); + } + if(mdp_until != NULL && (*mdp_until)[mdp_state]) { + labeling_subdtmc.addLabelToState(this->until_label, state); + } + } + // Associate true sink with the target label + labeling_subdtmc.addLabelToState(this->target_label, sink_state_true); + + // Map MDP bounds onto the state space of a quotient MDP + bool have_bounds = mdp_bounds != NULL; + std::vector quotient_mdp_bounds; + if(have_bounds) { + auto const& mdp_values = mdp_bounds->getValueVector(); + quotient_mdp_bounds.resize(this->quotient_mdp.getNumberOfStates()); + uint64_t mdp_states = mdp_values.size(); + for(StateType state = 0; state < mdp_states; state++) { + quotient_mdp_bounds[mdp_quotient_state_map[state]] = mdp_values[state]; + } + } + + + + // Construct transition matrix (as well as the reward model) for the subdtmc + if(!this->formula_reward[formula_index]) { + // Probability formula: no reward models + double default_bound = this->formula_safety[formula_index] ? 0 : 1; + for(StateType state = 0; state < dtmc_states; state++) { + StateType mdp_state = this->state_map[state]; + std::vector> r; + double probability = have_bounds ? quotient_mdp_bounds[mdp_state] : default_bound; + r.emplace_back(sink_state_false, 1-probability); + r.emplace_back(sink_state_true, probability); + matrix_subdtmc.push_back(r); + } + } else { + // Reward formula: one reward model + assert(dtmc->hasRewardModel(this->formula_reward_name[formula_index])); + + std::vector state_rewards_subdtmc(dtmc_states+2); + double default_reward = 0; + for(StateType state = 0; state < dtmc_states; state++) { + StateType mdp_state = this->state_map[state]; + double reward = have_bounds ? quotient_mdp_bounds[mdp_state] : default_reward; + state_rewards_subdtmc[state] = reward; + + std::vector> r; + r.emplace_back(sink_state_true, 1); + matrix_subdtmc.push_back(r); + } + storm::models::sparse::StandardRewardModel reward_model_subdtmc(state_rewards_subdtmc); + reward_models_subdtmc.emplace(this->formula_reward_name[formula_index], reward_model_subdtmc); + } + + // Add self-loops to sink states + for(StateType state = sink_state_false; state <= sink_state_true; state++) { + std::vector> r; + r.emplace_back(state, 1); + matrix_subdtmc.push_back(r); + } + } + + template + bool CounterexampleGenerator::expandAndCheck ( + uint64_t index, + ValueType formula_bound, + std::vector>> & matrix_subdtmc, + storm::models::sparse::StateLabeling const& labeling_subdtmc, + std::unordered_map> & reward_models_subdtmc, + std::vector const& to_expand + ) { + + // Get DTMC info + uint64_t dtmc_states = this->dtmc->getNumberOfStates(); + storm::storage::SparseMatrix const& transition_matrix = this->dtmc->getTransitionMatrix(); + StateType initial_state = *(this->dtmc->getInitialStates().begin()); + + // Expand states from the new wave: + // - expand transition probabilities + // std::cout << "expanded " << to_expand.size() << " states in this wave " << std::endl; + for(StateType state : to_expand) { + // std::cout << "holes in state " << state << " : "; + /*for(auto hole: this->mdp_holes[this->state_map[state]]) { + std::cout << hole << ","; + }*/ + // std::cout << std::endl; + matrix_subdtmc[state].clear(); + for(auto entry: transition_matrix.getRow(state)) { + matrix_subdtmc[state].emplace_back(entry.getColumn(), entry.getValue()); + } + } + // std::cout << std::endl; + + if(this->formula_reward[index]) { + // - expand state rewards + storm::models::sparse::StandardRewardModel const& reward_model_dtmc = dtmc->getRewardModel(this->formula_reward_name[index]); + assert(reward_model_dtmc.hasStateRewards() or reward_model_dtmc.hasStateActionRewards()); + storm::models::sparse::StandardRewardModel & reward_model_subdtmc = (reward_models_subdtmc.find(this->formula_reward_name[index]))->second; + for(StateType state : to_expand) { + ValueType reward; + if(reward_model_dtmc.hasStateRewards()) { + reward = reward_model_dtmc.getStateReward(state); + } else { + reward = reward_model_dtmc.getStateActionReward(state); + } + reward_model_subdtmc.setStateReward(state, reward); + } + } + + // Construct sub-DTMC + storm::storage::SparseMatrixBuilder transitionMatrixBuilder(0, 0, 0, false, false, 0); + for(StateType state = 0; state < dtmc_states+2; state++) { + for(auto row_entry: matrix_subdtmc[state]) { + transitionMatrixBuilder.addNextValue(state, row_entry.first, row_entry.second); + } + } + storm::storage::SparseMatrix sub_matrix = transitionMatrixBuilder.build(); + assert(sub_matrix.isProbabilistic()); + storm::storage::sparse::ModelComponents components(sub_matrix, labeling_subdtmc, reward_models_subdtmc); + std::shared_ptr> subdtmc = storm::utility::builder::buildModelFromComponents(storm::models::ModelType::Dtmc, std::move(components)); + // std::cout << "[storm] sub-dtmc has " << subdtmc->getNumberOfStates() << " states" << std::endl; + + + // Construct MC task + bool onlyInitialStatesRelevant = false; + storm::modelchecker::CheckTask task(*(this->formula_modified[index]), onlyInitialStatesRelevant); + if(this->hint_result != NULL) { + // Add hints from previous wave + storm::modelchecker::ExplicitModelCheckerHint hint; + hint.setComputeOnlyMaybeStates(false); + hint.setResultHint(this->hint_result->template asExplicitQuantitativeCheckResult().getValueVector()); + task.setHint(std::make_shared>(hint)); + } + storm::Environment env; + // storm::SolverEnvironment & solver = env.solver(); + // std::cout << solver.getLinearEquationSolverType() << std::endl; + // std::cout << solver.getPrecisionOfLinearEquationSolver() << std::endl; + + + // Model check + // std::unique_ptr result_ptr = storm::api::verifyWithSparseEngine(subdtmc, task); + // storm::modelchecker::ExplicitQuantitativeCheckResult& result = result_ptr->asExplicitQuantitativeCheckResult(); + this->timer_model_check.start(); + this->hint_result = storm::api::verifyWithSparseEngine(env, subdtmc, task); + this->timer_model_check.stop(); + storm::modelchecker::ExplicitQuantitativeCheckResult& result = this->hint_result->template asExplicitQuantitativeCheckResult(); + bool satisfied; + if(this->formula_safety[index]) { + satisfied = result[initial_state] < formula_bound; + } else { + satisfied = result[initial_state] > formula_bound; + } + + return satisfied; + } + + template + std::vector CounterexampleGenerator::constructConflict ( + uint64_t formula_index, + ValueType formula_bound, + std::shared_ptr const> mdp_bounds, + std::vector const& mdp_quotient_state_map + ) { + this->timer_conflict.start(); + + // Clear hint result + this->hint_result = NULL; + + // Get DTMC info + StateType dtmc_states = this->dtmc->getNumberOfStates(); + + // Prepare to construct sub-DTMCs + std::vector>> matrix_subdtmc; + storm::models::sparse::StateLabeling labeling_subdtmc(dtmc_states+2); + std::unordered_map> reward_models_subdtmc; + this->prepareSubdtmc( + formula_index, mdp_bounds, mdp_quotient_state_map, matrix_subdtmc, labeling_subdtmc, reward_models_subdtmc + ); + + // Explore subDTMCs wave by wave + uint64_t wave_last = this->wave_states.size()-1; + uint64_t wave = 0; + + /*std::cout << "[storm] hole-wave: "; + for(uint64_t hole = 0; hole < this->hole_count; hole++) { + std::cout << this->hole_wave[hole] << ","; + } + std::cout << std::endl;*/ + while(true) { + bool satisfied = this->expandAndCheck( + formula_index, formula_bound, matrix_subdtmc, labeling_subdtmc, + reward_models_subdtmc, this->wave_states[wave] + ); + // std::cout << "[storm] wave " << wave << "/" << wave_last << " : " << satisfied << std::endl; + if(!satisfied) { + break; + } + if(wave == wave_last) { + break; + } + wave++; + } + + // Return a set of critical holes + std::vector critical_holes; + for(uint64_t hole = 0; hole < this->hole_count; hole++) { + uint64_t wave_registered = this->hole_wave[hole]; + if(wave_registered > 0 && wave_registered <= wave) { + critical_holes.push_back(hole); + } + } + this->timer_conflict.stop(); + + return critical_holes; + } + + template + void CounterexampleGenerator::printProfiling() { + std::cout << "[s] conflict: " << this->timer_conflict << std::endl; + std::cout << "[s] model checking: " << this->timer_model_check << std::endl; + } + + + + + // Explicitly instantiate functions and classes. + template class CounterexampleGenerator; + +} diff --git a/payntbind/src/synthesis/counterexamples/Counterexample.h b/payntbind/src/synthesis/counterexamples/Counterexample.h new file mode 100644 index 000000000..31b2a2bce --- /dev/null +++ b/payntbind/src/synthesis/counterexamples/Counterexample.h @@ -0,0 +1,162 @@ +#pragma once + +#include "storm/storage/jani/Model.h" +#include "storm/logic/Formula.h" +#include "storm/models/sparse/Mdp.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" + +#include "storm/models/sparse/Dtmc.h" +#include "storm/utility/Stopwatch.h" + +namespace synthesis { + + template + class CounterexampleGenerator { + public: + + /*! + * Preprocess the quotient MDP and its bound on the reachability + * probability before constructing counterexamples from various + * deterministic sub-MDPs (DTMCs). + * @param quotient_mdp The quotient MDP. + * @param hole_count Total number of holes. + * @param mdp_holes For each state of a quotient MDP, a set of + * indices of significant holes. + * @param formulae Formulae to check, can be both + * probabilistic and reward-based. + */ + CounterexampleGenerator( + storm::models::sparse::Mdp const& quotient_mdp, + uint64_t hole_count, + std::vector> const& mdp_holes, + std::vector> const& formulae + ); + + /*! + * Preprocess the DTMC by establishing the state expansion order (waves): + * - explore the reachable state space wave by wave + * - during each wave, expand only 'non-blocking' states (states with registered holes) + * - if no non-blocking state remains, pick a blocking candidate with the least amount of unregistered holes + * - register all holes in this blocking candidate, thus unblocking this state (and possibly many others) + * - rinse and repeat + * @param dtmc A deterministic MDP (DTMC). + * @param state_map DTMC-MDP state mapping. + + */ + void prepareDtmc( + storm::models::sparse::Dtmc const& dtmc, + std::vector const& state_map + ); + + /*! + * Construct a counterexample to a prepared DTMC and a formula with + * the given index. + * @param formula_index Formula index. + * @param formula_bound Formula threshold for CE construction. + * @param mdp_bounds MDP model checking result in the primary direction (NULL if not used). + * @param mdp_quotient_state_mdp A mapping of MDP states to the states of a quotient MDP. + * @return A list of holes relevant in the CE. + */ + std::vector constructConflict( + uint64_t formula_index, + ValueType formula_bound, + std::shared_ptr const> mdp_bounds, + std::vector const& mdp_quotient_state_map + ); + + /*! + * TODO + */ + void printProfiling(); + + protected: + + /** Identify states of an MDP having some label. */ + std::shared_ptr labelStates( + storm::models::sparse::Mdp const& mdp, + storm::logic::Formula const& label + ); + + /** + * Prepare data structures for sub-DTMC construction. + * @param formula_index Formula index. + * @param mdp_bounds MDP model checking result in the primary direction. + * @param matrix_dtmc (output) Copy of the transition matrix of the DTMC. + * @param matrix_subdtmc (output) Matrix of shortcuts. + * @param labeling_subdtdmc (output) Labeling marking target states. + * @param reward_model_subdtmc (output) If the reward property is + * investigated, this map will contain exactly one reward model + * for the initial sub-DTMC. + */ + void prepareSubdtmc( + uint64_t formula_index, + std::shared_ptr const> mdp_bounds, + std::vector const& mdp_quotient_state_map, + std::vector>> & matrix_subdtmc, + storm::models::sparse::StateLabeling & labeling_subdtmc, + std::unordered_map> & reward_models_subdtmc + ); + + /** + * Expand new wave and model check resulting rerouting of a DTMC. + * @param dtmc A DTMC. + * @param labeling Prototype labeling. + * @param matrix_dtmc Original transition matrix. + * @param matrix_subdtmc Rerouting of the transition matrix wrt. unexpanded states. + * @param reward_models_subdtmc Reward models for the initial sub-DTMC. + * @param to_expand States expanded during this wave. + * @return true if the rerouting still satisfies the formula + */ + bool expandAndCheck( + uint64_t index, + ValueType formula_bound, + std::vector>> & matrix_subdtmc, + storm::models::sparse::StateLabeling const& labeling_subdtmc, + std::unordered_map> & reward_models_subdtmc, + std::vector const& to_expand + ); + + // Quotient MDP + storm::models::sparse::Mdp const& quotient_mdp; + // Number of significant holes + uint64_t hole_count; + // Significant holes in MDP states + std::vector> mdp_holes; + + // Formula bounds: safety (<,<=) or liveness (>,>=) + std::vector formula_safety; + // Formula types: probability (false) or reward-based (true) + std::vector formula_reward; + // Reward model names for reward formulae + std::vector formula_reward_name; + + // Until label for sub-dtmcs + const std::string until_label = "__until__"; + // Target label for sub-dtmcs + const std::string target_label = "__target__"; + // Modified operator formulae to apply to sub-dtmcs: P~?["__until" U "__target__"] or P~?[F "__target__"] + std::vector> formula_modified; + // Flags for until states + std::vector> mdp_untils; + // Flags for target states + std::vector> mdp_targets; + + // DTMC under investigation + std::shared_ptr> dtmc; + // DTMC to MDP state mapping + std::vector state_map; + // For each hole, a wave when it was registered (0 = unregistered). + std::vector hole_wave; + // For each wave, a set of states that were expanded. + std::vector> wave_states; + + // Hint for future model checking. + std::unique_ptr hint_result; + + // Profiling + storm::utility::Stopwatch timer_conflict; + storm::utility::Stopwatch timer_model_check; + + }; +} diff --git a/payntbind/src/synthesis/counterexamples/CounterexampleMdp.cpp b/payntbind/src/synthesis/counterexamples/CounterexampleMdp.cpp new file mode 100644 index 000000000..746187d20 --- /dev/null +++ b/payntbind/src/synthesis/counterexamples/CounterexampleMdp.cpp @@ -0,0 +1,535 @@ +#include "CounterexampleMdp.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace synthesis { + + + +// labelStates +template +std::shared_ptr CounterexampleGeneratorMdp::labelStates( + storm::models::sparse::Mdp const& mdp, + storm::logic::Formula const& label +) { + std::shared_ptr> mdp_shared = std::make_shared>(mdp); + bool onlyInitialStatesRelevant = false; + storm::modelchecker::CheckTask task(label, onlyInitialStatesRelevant); + std::unique_ptr result_ptr = storm::api::verifyWithSparseEngine(mdp_shared, task); + std::shared_ptr mdp_target = std::make_shared(result_ptr->asExplicitQualitativeCheckResult()); + return mdp_target; +} + + +// constructor +template +CounterexampleGeneratorMdp::CounterexampleGeneratorMdp ( + storm::models::sparse::Mdp const& quotient_mdp, + uint64_t hole_count, + std::vector> const& quotient_holes, + std::vector> const& formulae + ) : quotient_mdp(quotient_mdp), hole_count(hole_count), quotient_holes(quotient_holes) { + + // create label formulae for our own labels + std::shared_ptr const& target_label_formula = std::make_shared(this->target_label); + std::shared_ptr const& until_label_formula = std::make_shared(this->until_label); + + // process all formulae + for(auto formula: formulae) { + + // store formula type and optimality type + assert(formula->isOperatorFormula()); + storm::logic::OperatorFormula const& of = formula->asOperatorFormula(); + + assert(of.hasOptimalityType()); + storm::solver::OptimizationDirection ot = of.getOptimalityType(); + bool is_safety = ot == storm::solver::OptimizationDirection::Minimize; + this->formula_safety.push_back(is_safety); + + bool is_reward = formula->isRewardOperatorFormula(); + this->formula_reward.push_back(is_reward); + if(!is_reward) { + this->formula_reward_name.push_back(""); + } else { + STORM_LOG_THROW(formula->asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::InvalidArgumentException, "Name of the reward model must be specified."); + this->formula_reward_name.push_back(formula->asRewardOperatorFormula().getRewardModelName()); + } + + // extract predicate for until and target states and identify such states + storm::logic::Formula const& osf = of.getSubformula(); + if(!osf.isUntilFormula() && !osf.isEventuallyFormula()) { + throw storm::exceptions::NotImplementedException() << "Only until or reachability formulae supported."; + } + + std::shared_ptr modified_subformula; + if(osf.isUntilFormula()) { + storm::logic::UntilFormula const& uf = osf.asUntilFormula(); + + auto mdp_until = this->labelStates(this->quotient_mdp,uf.getLeftSubformula()); + this->mdp_untils.push_back(mdp_until); + + auto mdp_target = this->labelStates(this->quotient_mdp, uf.getRightSubformula()); + this->mdp_targets.push_back(mdp_target); + + modified_subformula = std::make_shared(until_label_formula, target_label_formula); + } else if(osf.isEventuallyFormula()) { + storm::logic::EventuallyFormula const& ef = osf.asEventuallyFormula(); + + this->mdp_untils.push_back(NULL); + + auto mdp_target = this->labelStates(this->quotient_mdp,ef.getSubformula()); + // std::cout << (*mdp_target).getTruthValuesVector() << std::endl; + this->mdp_targets.push_back(mdp_target); + + modified_subformula = std::make_shared(target_label_formula, ef.getContext()); + // std::cout << (*modified_subformula).asEventuallyFormula().getSubformula() << std::endl; + } + + // integrate formula into original context + std::shared_ptr modified_formula; + if(!is_reward) { + modified_formula = std::make_shared(modified_subformula, of.getOperatorInformation()); + } else { + modified_formula = std::make_shared(modified_subformula, this->formula_reward_name.back(), of.getOperatorInformation()); + } + this->formula_modified.push_back(modified_formula); + } +} + +// prepareMdp +template +void CounterexampleGeneratorMdp::prepareMdp( + storm::models::sparse::Mdp const& mdp, + std::vector const& state_map + // storm::storage::BitVector initial_expand + ) { + + // Get MDP info + this->mdp = std::make_shared>(mdp); + this->state_map = state_map; + uint64_t mdp_states = this->mdp->getNumberOfStates(); + StateType initial_state = *(this->mdp->getInitialStates().begin()); + + // Clear up previous MDP exploration metadata + while(!this->state_horizon.empty()) { + this->state_horizon.pop(); + } + this->hole_wave.clear(); + this->wave_states.clear(); + this->state_horizon_blocking.clear(); + this->unregistered_holes_count = std::vector(mdp_states); + this->mdp_holes = std::vector>(mdp_states); + this->current_wave = 0; + this->reachable_flag = storm::storage::BitVector(mdp_states, false); + this->blocking_candidate_set = false; + + + // Mark all simple holes registered and rest unregistered + this->hole_wave.resize(this->hole_count,0); + + // Associate states of a MDP with relevant holes and store their count + for(StateType state = 0; state < mdp_states; state++) { + this->mdp_holes[state] = this->quotient_holes[state_map[state]]; + for(uint64_t hole : this->mdp_holes[state]) { + // Hole is unregistered + if(this->hole_wave[hole] == 0) { + unregistered_holes_count[state]++; + } + } + } + + // Round 0: encounter initial state first (important) + this->wave_states.push_back(std::vector()); + reachable_flag.set(initial_state); + if(unregistered_holes_count[initial_state] == 0) { + // non-blocking + state_horizon.push(initial_state); + } else { + // blocking + state_horizon_blocking.push_back(initial_state); + blocking_candidate_set = true; + blocking_candidate = initial_state; + } +} + +template +bool CounterexampleGeneratorMdp::exploreWave () { + + storm::storage::SparseMatrix const& transition_matrix = this->mdp->getTransitionMatrix(); + std::vector row_group_indices = transition_matrix.getRowGroupIndices(); + uint64_t mdp_states = this->mdp->getNumberOfStates(); + + // Expand the non-blocking horizon + while(!state_horizon.empty()) { + StateType state = state_horizon.top(); + state_horizon.pop(); + this->wave_states.back().push_back(state); + + // Reach successors + for(uint64_t row_index = row_group_indices[state]; row_index < row_group_indices[state+1]; row_index++) { + for(auto entry : transition_matrix.getRow(row_index)) { + // TODO CHECK + StateType successor = entry.getColumn(); + if(reachable_flag[successor]) { + // already reached + continue; + } + // new state reached + reachable_flag.set(successor); + if(unregistered_holes_count[successor] == 0) { + // non-blocking + state_horizon.push(successor); + } else { + // blocking + state_horizon_blocking.push_back(successor); + if(!blocking_candidate_set || unregistered_holes_count[successor] < unregistered_holes_count[blocking_candidate]) { + // new blocking candidate + blocking_candidate_set = true; + blocking_candidate = successor; + } + } + } + } + } + + // Non-blocking horizon exhausted + if(!blocking_candidate_set) { + // fully explored - nothing more to expand + return true; + } + + // Start a new wave + this->current_wave++; + this->wave_states.push_back(std::vector()); + blocking_candidate_set = false; + + // Register all unregistered holes of this blocking state + for(uint64_t hole: mdp_holes[blocking_candidate]) { + if(this->hole_wave[hole] == 0) { + hole_wave[hole] = current_wave; + // std::cout << "[storm] hole " << hole << " expanded in wave " << current_wave << std::endl; + } + } + + // Recompute number of unregistered holes in each state + for(StateType state = 0; state < mdp_states; state++) { + unregistered_holes_count[state] = 0; + for(uint64_t hole: mdp_holes[state]) { + if(this->hole_wave[hole] == 0) { + unregistered_holes_count[state]++; + } + } + } + + // Unblock the states from the blocking horizon + std::vector old_blocking_horizon; + old_blocking_horizon.swap(state_horizon_blocking); + for(StateType state: old_blocking_horizon) { + if(unregistered_holes_count[state] == 0) { + // state unblocked + state_horizon.push(state); + } else { + // still blocking + state_horizon_blocking.push_back(state); + if(!blocking_candidate_set || unregistered_holes_count[state] < unregistered_holes_count[blocking_candidate]) { + // new blocking candidate + blocking_candidate_set = true; + blocking_candidate = state; + } + } + } + + // not fully explored + return false; +} + +// prepareSubmdp +template +void CounterexampleGeneratorMdp::prepareSubmdp ( + uint64_t formula_index, + std::shared_ptr const> mdp_bounds, + std::vector const& mdp_quotient_state_map, + std::vector> & matrix_submdp, + storm::models::sparse::StateLabeling & labeling_submdp, + std::unordered_map> & reward_models_submdp + ) { + + // Get MDP info + StateType mdp_states = mdp->getNumberOfStates(); + storm::storage::SparseMatrix const& transition_matrix = this->mdp->getTransitionMatrix(); + + // Introduce expanded state space + uint64_t sink_state_false = mdp_states; + uint64_t sink_state_true = mdp_states+1; + + // Label target states of a MDP + std::shared_ptr mdp_target = this->mdp_targets[formula_index]; + std::shared_ptr mdp_until = this->mdp_untils[formula_index]; + labeling_submdp.addLabel(this->target_label); + labeling_submdp.addLabel(this->until_label); + for(StateType state = 0; state < mdp_states; state++) { + StateType mdp_state = this->state_map[state]; + if((*mdp_target)[mdp_state]) { + labeling_submdp.addLabelToState(this->target_label, state); + } + if(mdp_until != NULL && (*mdp_until)[mdp_state]) { + labeling_submdp.addLabelToState(this->until_label, state); + } + } + // Associate true sink with the target label + labeling_submdp.addLabelToState(this->target_label, sink_state_true); + + // Map MDP bounds onto the state space of a quotient MDP + bool have_bounds = mdp_bounds != NULL; + std::vector quotient_mdp_bounds; + if(have_bounds) { + auto const& mdp_values = mdp_bounds->getValueVector(); + quotient_mdp_bounds.resize(this->quotient_mdp.getNumberOfStates()); + uint64_t mdp_states = mdp_values.size(); + for(StateType state = 0; state < mdp_states; state++) { + quotient_mdp_bounds[mdp_quotient_state_map[state]] = mdp_values[state]; + } + } + + // Construct transition matrix (as well as the reward model) for the submdp + if(!this->formula_reward[formula_index]) { + // Probability formula: no reward models + double default_bound = this->formula_safety[formula_index] ? 0 : 1; + for(StateType state = 0; state < mdp_states; state++) { + // matrix_submdp.push_back(std::vector()); + StateType mdp_state = this->state_map[state]; + + // FIXME later? - multiple same actions + uint64_t state_actions = transition_matrix.getRowGroupSize(state); + for(uint64_t action = 0; action < state_actions; action++) { + StormRow r; + double probability = have_bounds ? quotient_mdp_bounds[mdp_state] : default_bound; + r.emplace_back(sink_state_false, 1-probability); + r.emplace_back(sink_state_true, probability); + matrix_submdp[state].push_back(r); + } + } + } else { + // Reward formula: one reward model + assert(mdp->hasRewardModel(this->formula_reward_name[formula_index])); + + std::optional> state_rewards; + std::vector state_action_rewards_submdp(transition_matrix.getRowCount()+2); + double default_reward = 0; + uint64_t row_index = 0; + for(StateType state = 0; state < mdp_states; state++) { + StateType mdp_state = this->state_map[state]; + // matrix_submdp.push_back(std::vector()); + // FIXME - quotient_mdp_bounds + double reward = have_bounds ? quotient_mdp_bounds[mdp_state] : default_reward; + + // FIXME later? - multiple same actions + uint64_t state_actions = transition_matrix.getRowGroupSize(state); + for(uint64_t action = 0; action < state_actions; action++) { + state_action_rewards_submdp[row_index] = reward; + row_index++; + StormRow r; + r.emplace_back(sink_state_true, 1); + matrix_submdp[state].push_back(r); + } + + + } + storm::models::sparse::StandardRewardModel reward_model_submdp(state_rewards, state_action_rewards_submdp); + reward_models_submdp.emplace(this->formula_reward_name[formula_index], reward_model_submdp); + } + + // Add self-loops to sink states + for(StateType state = sink_state_false; state <= sink_state_true; state++) { + StormRow r; + r.emplace_back(state, 1); + matrix_submdp[state].push_back(r); + } +} + +// expandAndCheck +template +std::pair CounterexampleGeneratorMdp::expandAndCheck ( + uint64_t formula_index, + ValueType formula_bound, + // std::vector>> & matrix_submdp, + std::vector> & matrix_submdp, + storm::models::sparse::StateLabeling const& labeling_submdp, + std::unordered_map> & reward_models_submdp +) { + // result.first - wave exploration finish status + // result.second - formula satisfied + std::pair result(false, true); + + // explore one row_indexrow_index + // FIXME later? - we don't need to explore last wave + bool fully_explored = exploreWave(); + result.first = fully_explored; + + // all waves explored + if(fully_explored) { + return result; + } + + + // Get MDP info + uint64_t mdp_states = this->mdp->getNumberOfStates(); + storm::storage::SparseMatrix const& transition_matrix = this->mdp->getTransitionMatrix(); + std::vector row_group_indices = transition_matrix.getRowGroupIndices(); + StateType initial_state = *(this->mdp->getInitialStates().begin()); + std::vector to_expand = this->wave_states[current_wave-1]; + // Expand states from the new wave: + // - expand transition probabilities + for(StateType state : to_expand) { + matrix_submdp[state].clear(); + + for(uint64_t row_index = row_group_indices[state]; row_index < row_group_indices[state+1]; row_index++) { + StormRow r; + for(auto entry : transition_matrix.getRow(row_index)) { + r.emplace_back(entry.getColumn(), entry.getValue()); + } + matrix_submdp[state].push_back(r); + } + } + + if(this->formula_reward[formula_index]) { + // - expand state rewards + storm::models::sparse::StandardRewardModel const& reward_model_mdp = mdp->getRewardModel(this->formula_reward_name[formula_index]); + assert(reward_model_mdp.hasStateActionRewards()); + storm::models::sparse::StandardRewardModel & reward_model_submdp = (reward_models_submdp.find(this->formula_reward_name[formula_index]))->second; + + for(StateType state : to_expand) { + for(uint64_t row_index = row_group_indices[state]; row_index < row_group_indices[state+1]; row_index++) { + ValueType reward = reward_model_mdp.getStateActionReward(row_index); + reward_model_submdp.setStateActionReward(row_index, reward); + } + } + } + + // Construct sub-MDP + storm::storage::SparseMatrixBuilder transitionMatrixBuilder(0, 0, 0, false, true, row_group_indices.size()); + uint64_t row_index = 0; + for(StateType state = 0; state < mdp_states+2; state++) { + transitionMatrixBuilder.newRowGroup(row_index); + for(auto row: matrix_submdp[state]) { + for(auto row_entry: row) { + transitionMatrixBuilder.addNextValue(row_index, row_entry.first, row_entry.second); + } + row_index++; + } + } + storm::storage::SparseMatrix sub_matrix = transitionMatrixBuilder.build(); + // std::cout << sub_matrix << std::endl; + assert(sub_matrix.isProbabilistic()); + storm::storage::sparse::ModelComponents components(sub_matrix, labeling_submdp, reward_models_submdp); + std::shared_ptr> submdp = storm::utility::builder::buildModelFromComponents(storm::models::ModelType::Mdp, std::move(components)); + // std::cout << "[storm] sub-mdp has " << submdp->getNumberOfStates() << " states" << std::endl; + + + // Construct MC task + bool onlyInitialStatesRelevant = false; + storm::modelchecker::CheckTask task(*(this->formula_modified[formula_index]), onlyInitialStatesRelevant); + if(this->hint_result != NULL) { + // Add hints from previous wave + storm::modelchecker::ExplicitModelCheckerHint hint; + hint.setComputeOnlyMaybeStates(false); + hint.setResultHint(boost::make_optional(this->hint_result->template asExplicitQuantitativeCheckResult().getValueVector())); + task.setHint(std::make_shared>(hint)); + } + storm::Environment env; + // storm::SolverEnvironment & solver = env.solver(); + // std::cout << solver.getLinearEquationSolverType() << std::endl; + // std::cout << solver.getPrecisionOfLinearEquationSolver() << std::endl; + + + // Model check + // std::unique_ptr result_ptr = storm::api::verifyWithSparseEngine(submdp, task); + // storm::modelchecker::ExplicitQuantitativeCheckResult& result = result_ptr->asExplicitQuantitativeCheckResult(); + this->timer_model_check.start(); + this->hint_result = storm::api::verifyWithSparseEngine(env, submdp, task); + this->timer_model_check.stop(); + storm::modelchecker::ExplicitQuantitativeCheckResult& model_check_result = this->hint_result->template asExplicitQuantitativeCheckResult(); + bool satisfied; + if(this->formula_safety[formula_index]) { + satisfied = model_check_result[initial_state] < formula_bound; + // std::cout << model_check_result[initial_state] << " < " << formula_bound << std::endl; + } else { + satisfied = model_check_result[initial_state] > formula_bound; + // std::cout << model_check_result[initial_state] << " > " << formula_bound << std::endl; + } + result.second = satisfied; + + return result; +} + + +// constructConflict +template +std::vector CounterexampleGeneratorMdp::constructConflict ( + uint64_t formula_index, + ValueType formula_bound, + // TODO: revise rest of the args + std::shared_ptr const> mdp_bounds, // placeholders for Hybrid + std::vector const& mdp_quotient_state_map // maybe not needed as we don't need to quotient MDP +) { + this->timer_conflict.start(); + StateType mdp_states = this->mdp->getNumberOfStates(); + + // Prepare to construct sub-MDPs + std::vector> matrix_submdp(mdp_states+2); + storm::models::sparse::StateLabeling labeling_submdp(mdp_states+2); + std::unordered_map> reward_models_submdp; + this->prepareSubmdp( + formula_index, mdp_bounds, mdp_quotient_state_map, matrix_submdp, labeling_submdp, reward_models_submdp + ); + + while(true) { + std::pair result = this->expandAndCheck( + formula_index, formula_bound, matrix_submdp, labeling_submdp, reward_models_submdp + ); + bool last_wave = result.first; + bool satisfied = result.second; + + if(!satisfied || last_wave) { + break; + } + } + + // Return a set of critical holes + std::vector critical_holes; + for(uint64_t hole = 0; hole < this->hole_count; hole++) { + uint64_t wave_registered = this->hole_wave[hole]; + if(wave_registered > 0 && wave_registered <= current_wave) { + critical_holes.push_back(hole); + } + } + this->timer_conflict.stop(); + + return critical_holes; +} + +// printProfiling +template +void CounterexampleGeneratorMdp::printProfiling() { + std::cout << "[s] conflict: " << this->timer_conflict << std::endl; + std::cout << "[s] model checking: " << this->timer_model_check << std::endl; +} + + // Explicitly instantiate functions and classes. +template class CounterexampleGeneratorMdp; + +} diff --git a/payntbind/src/synthesis/counterexamples/CounterexampleMdp.h b/payntbind/src/synthesis/counterexamples/CounterexampleMdp.h new file mode 100644 index 000000000..f1d01e8b4 --- /dev/null +++ b/payntbind/src/synthesis/counterexamples/CounterexampleMdp.h @@ -0,0 +1,191 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace synthesis { + +template +class CounterexampleGeneratorMdp { + +using StormRow = std::vector>; + +public: + + /*! + * Preprocess the quotient MDP and its bound on the reachability + * probability before constructing counterexamples from various + * deterministic sub-MDPs (DTMCs). + * @param quotient_mdp The quotient MDP. + * @param hole_count Total number of holes. + * @param mdp_holes For each state of a quotient MDP, a set of + * indices of significant holes. + * @param formulae Formulae to check, can be both + * probabilistic and reward-based. + */ + CounterexampleGeneratorMdp( + storm::models::sparse::Mdp const& quotient_mdp, + uint64_t hole_count, + std::vector> const& quotient_holes, + std::vector> const& formulae + ); + + /*! + * Preprocess the DTMC by establishing the state expansion order (waves): + * - explore the reachable state space wave by wave + * - during each wave, expand only 'non-blocking' states (states with registered holes) + * - if no non-blocking state remains, pick a blocking candidate with the least amount of unregistered holes + * - register all holes in this blocking candidate, thus unblocking this state (and possibly many others) + * - rinse and repeat + * @param dtmc A deterministic MDP (DTMC). + * @param state_map DTMC-MDP state mapping. + + */ + void prepareMdp( + storm::models::sparse::Mdp const& Mdp, + std::vector const& state_map + // std::set initial_expand + // storm::storage::BitVector initial_expand + ); + + /*! + * TODO + */ + bool exploreWave (); + + /*! + * Construct a counterexample to a prepared DTMC and a formula with + * the given index. + * @param formula_index Formula index. + * @param formula_bound Formula threshold for CE construction. + * @param mdp_bounds MDP model checking result in the primary direction (NULL if not used). + * @param mdp_quotient_state_mdp A mapping of MDP states to the states of a quotient MDP. + * @return A list of holes relevant in the CE. + */ + std::vector constructConflict( + uint64_t formula_index, + ValueType formula_bound, + std::shared_ptr const> mdp_bounds, + std::vector const& mdp_quotient_state_map + ); + + /*! + * TODO + */ + void printProfiling(); + +protected: + + /** Identify states of an MDP having some label. */ + std::shared_ptr labelStates( + storm::models::sparse::Mdp const& mdp, + storm::logic::Formula const& label + ); + + /** + * Prepare data structures for sub-DTMC construction. + * @param formula_index Formula index. + * @param mdp_bounds MDP model checking result in the primary direction. + * @param matrix_dtmc (output) Copy of the transition matrix of the DTMC. + * @param matrix_subdtmc (output) Matrix of shortcuts. + * @param labeling_subdtdmc (output) Labeling marking target states. + * @param reward_model_subdtmc (output) If the reward property is + * investigated, this map will contain exactly one reward model + * for the initial sub-DTMC. + */ + void prepareSubmdp( + uint64_t formula_index, + std::shared_ptr const> mdp_bounds, + std::vector const& mdp_quotient_state_map, + std::vector> & matrix_subdtmc, + storm::models::sparse::StateLabeling & labeling_subdtmc, + std::unordered_map> & reward_models_subdtmc + ); + + /** + * Expand new wave and model check resulting rerouting of a DTMC. + * @param dtmc A DTMC. + * @param labeling Prototype labeling. + * @param matrix_dtmc Original transition matrix. + * @param matrix_subdtmc Rerouting of the transition matrix wrt. unexpanded states. + * @param reward_models_subdtmc Reward models for the initial sub-DTMC. + * @param to_expand States expanded during this wave. + * @return first: true if the rerouting still satisfies the formula, second: true if this is the last wave + */ + std::pair expandAndCheck( + uint64_t formula_index, + ValueType formula_bound, + // std::vector>> & matrix_submdp, + std::vector> & matrix_submdp, + storm::models::sparse::StateLabeling const& labeling_submdp, + std::unordered_map> & reward_models_submdp + ); + + // Quotient MDP + storm::models::sparse::Mdp const& quotient_mdp; + // Number of significant holes + uint64_t hole_count; + // Significant holes in Quotient states + std::vector> quotient_holes; + + + // Formula bounds: safety (<,<=) or liveness (>,>=) + std::vector formula_safety; + // Formula types: probability (false) or reward-based (true) + std::vector formula_reward; + // Reward model names for reward formulae + std::vector formula_reward_name; + + // Until label for sub-dtmcs + const std::string until_label = "__until__"; + // Target label for sub-dtmcs + const std::string target_label = "__target__"; + // Modified operator formulae to apply to sub-dtmcs: P~?["__until" U "__target__"] or P~?[F "__target__"] + std::vector> formula_modified; + // Flags for until states + std::vector> mdp_untils; + // Flags for target states + std::vector> mdp_targets; + + // MDP under investigation + std::shared_ptr> mdp; + // std::shared_ptr> dtmc; + // MDP to quotient MDP state mapping + std::vector state_map; + // For each hole, a wave when it was registered (0 = unregistered). + std::vector hole_wave; + // For each wave, a set of states that were expanded. + std::vector> wave_states; + // non-blocking horizon + std::stack state_horizon; + // horizon containing, for a current wave, only blocking states + std::vector state_horizon_blocking; + // relevant holes + std::vector> mdp_holes; + // relevant holes count + std::vector unregistered_holes_count; + // true if the state was reached during exploration (expanded states + both horizons) + storm::storage::BitVector reachable_flag; + // blocking state containing currently the least number of unregistered holes + flag if the value was set + StateType blocking_candidate; + bool blocking_candidate_set; + // wave increases by one when new holes of a blocking candidate are registered + uint64_t current_wave; + + // Hint for future model checking. + std::unique_ptr hint_result; + + // Profiling + storm::utility::Stopwatch timer_conflict; + storm::utility::Stopwatch timer_model_check; + +}; + +} diff --git a/payntbind/src/synthesis/counterexamples/bindings.cpp b/payntbind/src/synthesis/counterexamples/bindings.cpp new file mode 100644 index 000000000..b03271370 --- /dev/null +++ b/payntbind/src/synthesis/counterexamples/bindings.cpp @@ -0,0 +1,39 @@ +#include "../synthesis.h" + +#include "Counterexample.h" +#include "CounterexampleMdp.h" + +void bindings_counterexamples(py::module& m) { + + py::class_>(m, "CounterexampleGenerator", "Counterexample generation") + .def( + py::init< + storm::models::sparse::Mdp const&, uint64_t, + std::vector> const&, + std::vector> const& + >(), + py::arg("quotient_mdp"), py::arg("hole_count"), py::arg("mdp_holes"), py::arg("formulae") + ) + .def("prepare_dtmc", &synthesis::CounterexampleGenerator<>::prepareDtmc, py::arg("dtmc"), py::arg("quotient_state_map")) + .def( + "construct_conflict", &synthesis::CounterexampleGenerator<>::constructConflict, + py::arg("formula_index"), py::arg("formula_bound"), py::arg("mdp_bounds"), py::arg("mdp_quotient_state_map") + ) + .def("print_profiling", &synthesis::CounterexampleGenerator<>::printProfiling) + ; + + + // MDP Counterexample generation + py::class_>(m, "CounterexampleGeneratorMdp", "Counterexample generation") + .def( + py::init< + storm::models::sparse::Mdp const&, uint64_t, + std::vector> const&, + std::vector> const& + >(), + py::arg("quotient_mdp"), py::arg("hole_count"), py::arg("mdp_holes"), py::arg("formulae") + ) + .def("prepare_mdp", &synthesis::CounterexampleGeneratorMdp<>::prepareMdp) + .def("construct_conflict", &synthesis::CounterexampleGeneratorMdp<>::constructConflict) + ; +} diff --git a/payntbind/src/synthesis/decpomdp/DecPomdp.cpp b/payntbind/src/synthesis/decpomdp/DecPomdp.cpp new file mode 100644 index 000000000..8723ce883 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/DecPomdp.cpp @@ -0,0 +1,436 @@ +#include "DecPomdp.h" + +#include "madp/src/base/Globals.h" +#include "madp/src/base/E.h" +#include "madp/src/parser/MADPParser.h" + +#include + +namespace synthesis { + + + void DecPomdp::collectActions(DecPOMDPDiscrete *model) { + + // individual actions + this->agent_action_labels.resize(this->num_agents); + for(uint_fast64_t agent = 0; agent < this->num_agents; agent++) { + uint_fast64_t num_actions = model->GetNrActions(agent); + this->agent_action_labels[agent].resize(num_actions); + std::vector action_labels(num_actions); + for(uint_fast64_t action = 0; action < num_actions; action++) { + this->agent_action_labels[agent][action] = model->GetAction(agent,action)->GetName(); + } + } + + // joint actions + this->joint_actions.resize(model->GetNrJointActions()); + for(uint_fast64_t joint_action_index = 0; joint_action_index < model->GetNrJointActions(); joint_action_index++) { + for(auto action: model->JointToIndividualActionIndices(joint_action_index)) { + this->joint_actions[joint_action_index].push_back(action); + } + } + } + + void DecPomdp::collectObservations(DecPOMDPDiscrete *model) { + + // individual observations + this->agent_observation_labels.resize(this->num_agents); + for(uint_fast64_t agent = 0; agent < this->num_agents; agent++) { + for(uint_fast64_t obs = 0; obs < model->GetNrObservations(agent); obs++) { + this->agent_observation_labels[agent].push_back(model->GetObservation(agent,obs)->GetName()); + } + } + + // joint observations + uint_fast64_t num_joint_observations = model->GetNrJointObservations(); + this->joint_observations.resize(num_joint_observations); + for(uint_fast64_t joint_observation_index = 0; joint_observation_index < num_joint_observations; joint_observation_index++) { + for(auto observation: model->JointToIndividualObservationIndices(joint_observation_index)) { + this->joint_observations[joint_observation_index].push_back(observation); + } + } + } + + + bool DecPomdp::haveMadpState(MadpState madp_state) { + return this->madp_to_storm_states.find(madp_state) != this->madp_to_storm_states.end(); + } + + uint_fast64_t DecPomdp::mapMadpState(MadpState madp_state) { + uint_fast64_t new_state = this->num_states(); + auto const result = this->madp_to_storm_states.insert(std::make_pair(madp_state, new_state)); + if (result.second) { + // new state + this->storm_to_madp_states.push_back(madp_state); + this->transition_matrix.resize(this->num_states()); + this->row_joint_action.resize(this->num_states()); + this->row_reward.resize(this->num_states()); + + this->state_joint_observation.resize(this->num_states()); + this->state_joint_observation[new_state] = madp_state.second; + } + return result.first->second; + } + + + uint_fast64_t DecPomdp::freshJointAction(std::string action_label) { + std::vector action_tuple(this->num_agents); + for(uint_fast64_t agent = 0; agent < this->num_agents; agent++) { + action_tuple[agent] = this->agent_num_actions(agent); + this->agent_action_labels[agent].push_back(action_label); + } + uint_fast64_t joint_action = this->num_joint_actions(); + this->joint_actions.push_back(std::move(action_tuple)); + return joint_action; + } + + uint_fast64_t DecPomdp::freshJointObservation(std::string observation_label) { + std::vector observation_tuple(this->num_agents); + for(uint_fast64_t agent = 0; agent < this->num_agents; agent++) { + observation_tuple[agent] = this->agent_num_observations(agent); + this->agent_observation_labels[agent].push_back(observation_label); + } + uint_fast64_t joint_observation = this->num_joint_observations(); + this->joint_observations.push_back(std::move(observation_tuple)); + return joint_observation; + } + + uint_fast64_t DecPomdp::freshSink(std::string label) { + + uint_fast64_t joint_observation = this->freshJointObservation(label); + MadpState madp_new_state = std::make_pair(0,joint_observation); + uint_fast64_t new_state = this->mapMadpState(madp_new_state); + + uint_fast64_t sink_action = this->freshJointAction(label); + this->row_joint_action[new_state] = std::vector(1, sink_action); + this->row_reward[new_state] = std::vector(1, 0); + this->transition_matrix[new_state] = std::vector(1, StormRow(1, std::make_pair(new_state,1))); + + return new_state; + } + + + + DecPomdp::DecPomdp(DecPOMDPDiscrete *model) { + + // agents + this->num_agents = model->GetNrAgents(); + this->discount_factor = model->GetDiscount(); + this->reward_minimizing = model->GetRewardType() == COST; + + this->collectActions(model); + this->collectObservations(model); + + // multiply transition and observation probabilities + std::vector>>> madp_transition_matrix; + for(uint_fast64_t src = 0; src < model->GetNrStates(); src++) { + std::vector>> row_group; + + for(uint_fast64_t joint_action = 0; joint_action < model->GetNrJointActions(); joint_action++) { + std::vector> row; + + for(uint_fast64_t dst = 0; dst < model->GetNrStates(); dst++) { + double transition_prob = model->GetTransitionProbability(src, joint_action, dst); + if(transition_prob == 0) { + continue; + } + + for(uint_fast64_t obs = 0; obs < model->GetNrJointObservations(); obs++) { + double observation_prob = model->GetObservationProbability(joint_action, dst, obs); + if(observation_prob == 0) { + continue; + } + row.push_back(std::make_pair(std::make_pair(dst,obs), transition_prob*observation_prob)); + } + } + row_group.push_back(row); + } + madp_transition_matrix.push_back(row_group); + } + + // create initial observation for the (unique) initial state + uint_fast64_t init_joint_observation = this->freshJointObservation(this->init_label); + // create action that corresponds to the execution of the initial distribution + uint_fast64_t init_joint_action = this->freshJointAction(this->init_label); + // create empty observation for states in the initial distribution + uint_fast64_t empty_joint_observation = this->freshJointObservation(this->no_obs_label); + + // collect initial distribution + std::vector initial_distribution_row_group(1); + uint_fast64_t state = 0; + for(auto prob: model->GetISD()->ToVectorOfDoubles()) { + if(prob > 0) { + initial_distribution_row_group[0].push_back(std::make_pair(std::make_pair(state,empty_joint_observation),prob)); + } + state++; + } + + // explore the reachable state space from the initial state + std::stack reachable_states; + MadpState madp_initial = std::make_pair(0,init_joint_observation); + this->initial_state = this->mapMadpState(madp_initial); + reachable_states.push(madp_initial); + while(!reachable_states.empty()) { + MadpState madp_src = reachable_states.top(); + reachable_states.pop(); + uint_fast64_t storm_src = this->mapMadpState(madp_src); + + std::vector>> *row_group; + if(storm_src == this->initial_state) { + row_group = &initial_distribution_row_group; + } else { + row_group = &madp_transition_matrix[madp_src.first]; + } + + std::vector storm_row_group; + for(auto &row : *row_group) { + StormRow storm_row; + for(auto &madp_state_prob: row) { + MadpState madp_dst = madp_state_prob.first; + if(!this->haveMadpState(madp_dst)) { + reachable_states.push(madp_dst); + } + uint_fast64_t storm_dst = this->mapMadpState(madp_dst); + storm_row.push_back(std::make_pair(storm_dst, madp_state_prob.second)); + } + storm_row_group.push_back(std::move(storm_row)); + } + this->transition_matrix[storm_src] = std::move(storm_row_group); + } + + // map rows to joint actions and rewards + std::vector madp_row_group; + for(uint_fast64_t joint_action = 0; joint_action < model->GetNrJointActions(); joint_action++) { + madp_row_group.push_back(joint_action); + } + assert(this->row_joint_action.size() == this->num_states()); + assert(this->row_reward.size() == this->num_states()); + for(uint_fast64_t storm_state = 0; storm_state < this->num_states(); storm_state++) { + MadpState madp_state = this->storm_to_madp_states[storm_state]; + if(storm_state == this->initial_state) { + this->row_joint_action[storm_state] = std::vector(1,init_joint_action); + this->row_reward[storm_state] = std::vector(1,0); + } else { + this->row_joint_action[storm_state] = madp_row_group; + std::vector rewards; + for(uint_fast64_t joint_action = 0; joint_action < model->GetNrJointActions(); joint_action++) { + rewards.push_back(model->GetReward(madp_state.first, joint_action)); + } + this->row_reward[storm_state] = std::move(rewards); + } + } + } + + uint_fast64_t DecPomdp::num_rows() { + uint_fast64_t count = 0; + for(auto row_group: this->transition_matrix) { + count += row_group.size(); + } + return count; + } + + + + storm::models::sparse::StateLabeling DecPomdp::constructStateLabeling() { + storm::models::sparse::StateLabeling labeling(this->num_states()); + + storm::storage::BitVector init_flags(this->num_states(), false); + init_flags.set(this->initial_state); + labeling.addLabel(this->init_label, std::move(init_flags)); + + if(this->discounted) { + storm::storage::BitVector discount_sink_flags(this->num_states(), false); + discount_sink_flags.set(this->discount_sink_state); + labeling.addLabel(this->discount_sink_label, std::move(discount_sink_flags)); + } + + return labeling; + } + + storm::models::sparse::ChoiceLabeling DecPomdp::constructChoiceLabeling() { + uint_fast64_t num_rows = this->num_rows(); + + storm::models::sparse::ChoiceLabeling labeling(num_rows); + uint_fast64_t current_row = 0; + std::vector row_label(num_rows); + std::set all_labels; + for(auto row_group: this->row_joint_action) { + for(auto joint_action_index: row_group) { + std::ostringstream sb; + sb << "("; + auto joint_action = this->joint_actions[joint_action_index]; + for(uint32_t agent = 0; agent < this->num_agents; agent++) { + auto agent_action = joint_action[agent]; + auto agent_action_label = this->agent_action_labels[agent][agent_action]; + sb << agent_action_label; + if(agent < this->num_agents-1) { + sb << ","; + } + } + sb << ")"; + std::string label = sb.str(); + all_labels.insert(label); + row_label[current_row] = label; + current_row++; + } + } + for(auto label: all_labels) { + storm::storage::BitVector flags(num_rows, false); + labeling.addLabel(label, flags); + } + for(uint64_t row = 0; row < num_rows; row++) { + labeling.addLabelToChoice(row_label[row], row); + } + + return labeling; + } + + storm::storage::SparseMatrix DecPomdp::constructTransitionMatrix() { + + + storm::storage::SparseMatrixBuilder builder( + this->num_rows(), this->num_states(), 0, true, true, this->num_states() + ); + uint64_t current_row = 0; + for(uint64_t state = 0; state < this->num_states(); state++) { + builder.newRowGroup(current_row); + for(auto row: this->transition_matrix[state]) { + for(auto entry: row) { + builder.addNextValue(current_row, entry.first, entry.second); + } + current_row++; + } + } + return builder.build(); + } + + storm::models::sparse::StandardRewardModel DecPomdp::constructRewardModel() { + std::optional> state_rewards; + std::vector action_rewards; + for(uint64_t state = 0; state < this->num_states(); state++) { + for(uint64_t row = 0; row < this->transition_matrix[state].size(); row++) { + auto reward = this->row_reward[state][row]; + action_rewards.push_back(reward); + } + } + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } + + + + std::vector DecPomdp::constructObservabilityClasses() { + std::vector observation_classes(this->num_states()); + for(uint64_t state = 0; state < this->num_states(); state++) { + observation_classes[state] = this->state_joint_observation[state]; + } + return observation_classes; + } + + + std::shared_ptr> DecPomdp::constructMdp() { + storm::storage::sparse::ModelComponents components; + components.stateLabeling = this->constructStateLabeling(); + components.choiceLabeling = this->constructChoiceLabeling(); + components.transitionMatrix = this->constructTransitionMatrix(); + components.rewardModels.emplace(this->reward_model_name, this->constructRewardModel()); + return std::make_shared>(std::move(components)); + } + + std::shared_ptr> DecPomdp::constructPomdp() { + storm::storage::sparse::ModelComponents components; + components.stateLabeling = this->constructStateLabeling(); + components.choiceLabeling = this->constructChoiceLabeling(); + components.transitionMatrix = this->constructTransitionMatrix(); + components.rewardModels.emplace(this->reward_model_name, this->constructRewardModel()); + components.observabilityClasses = this->constructObservabilityClasses(); + return std::make_shared>(std::move(components)); + } + + + POMDPDiscrete *parse_as_pomdp(std::string filename) { + try { + POMDPDiscrete *model = new POMDPDiscrete("","",filename); + model->SetSparse(true); + MADPParser parser(model); + return model; + } catch(E& e) { + e.Print(); + return NULL; + } + } + + DecPOMDPDiscrete *parse_as_decpomdp(std::string filename) { + try { + DecPOMDPDiscrete *model = new DecPOMDPDiscrete("","",filename); + model->SetSparse(true); + MADPParser parser(model); + return model; + } catch(E& e) { + e.Print(); + return NULL; + } + } + + DecPOMDPDiscrete *parseMadp(std::string filename) { + + DecPOMDPDiscrete *model; + + STORM_PRINT_AND_LOG("MADP: trying to parse as POMDP...\n"); + model = parse_as_pomdp(filename); + if(model != NULL) { + STORM_PRINT_AND_LOG("MADP: parsing success\n"); + return model; + } + + STORM_PRINT_AND_LOG("MADP: parsing success\n"); + STORM_PRINT_AND_LOG("MADP: trying to parse as dec-POMDP...\n"); + model = parse_as_decpomdp(filename); + if(model != NULL) { + STORM_PRINT_AND_LOG("MADP: parsing success\n"); + return model; + } + + if(model == NULL) { + STORM_PRINT_AND_LOG("MADP: parsing failure\n"); + } + return model; + + } + + std::unique_ptr parseDecPomdp(std::string filename) { + DecPOMDPDiscrete *madp_decpomdp = parseMadp(filename); + if(madp_decpomdp == NULL) { + return NULL; + } + // debug: MADP info + // std::cerr << madp_decpomdp->SoftPrint() << std::endl; + std::unique_ptr decpomdp = std::make_unique(madp_decpomdp); + free(madp_decpomdp); + return decpomdp; + } + + + void DecPomdp::applyDiscountFactorTransformation() { + + if(this->discounted || this->discount_factor == 1) { + return; + } + + this->discount_sink_state = this->freshSink(this->discount_sink_label); + for(uint_fast64_t state = 0; state < this->num_states(); state++) { + if(state == this->initial_state || state == this->discount_sink_state) { + // no discounting in the initial state because it selects the actual initial state + continue; + } + for(StormRow &row: this->transition_matrix[state]) { + for(auto &entry: row) { + entry.second *= this->discount_factor; + } + row.push_back(std::make_pair(this->discount_sink_state,1-this->discount_factor)); + } + } + this->discounted = true; + } + + +} diff --git a/payntbind/src/synthesis/decpomdp/DecPomdp.h b/payntbind/src/synthesis/decpomdp/DecPomdp.h new file mode 100644 index 000000000..2a9f645f5 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/DecPomdp.h @@ -0,0 +1,141 @@ +#pragma once + +#include "madp/src/base/POMDPDiscrete.h" +#include "madp/src/base/DecPOMDPDiscrete.h" + +#include "storm/models/sparse/Mdp.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" + +#include + +namespace synthesis { + + using MadpState = std::pair; // state + observation + using MadpRow = std::vector>; + using StormRow = std::vector>; + + + class DecPomdp { + + public: + DecPomdp(DecPOMDPDiscrete *model); + + /** Number of agents. */ + uint_fast64_t num_agents; + + /** For each agent, a list of its action labels. */ + std::vector> agent_action_labels; + /** A list of tuples of actions. */ + std::vector> joint_actions; + + /** For each agent, a list of its observation labels. */ + std::vector> agent_observation_labels; + /** A list of tuples of observations. */ + std::vector> joint_observations; + + /** The unique initial state. */ + uint_fast64_t initial_state; + /** Storm-esque transition matrix: for each state, a row group. */ + std::vector> transition_matrix; + /** For each state (row group), a mapping of a row to a joint action. */ + std::vector> row_joint_action; + /** State to joint observation map. */ + std::vector state_joint_observation; + /** For each state (row group), a mapping of a row to its reward. */ + std::vector> row_reward; + + + + uint_fast64_t agent_num_actions(uint_fast64_t agent) { + return this->agent_action_labels[agent].size(); + } + uint_fast64_t num_joint_actions() { + return this->joint_actions.size(); + } + uint_fast64_t agent_num_observations(uint_fast64_t agent) { + return this->agent_observation_labels[agent].size(); + } + uint_fast64_t num_joint_observations() { + return this->joint_observations.size(); + } + + uint_fast64_t num_states() { + return this->storm_to_madp_states.size(); + } + + uint_fast64_t num_rows(); + + /** Retrieve the underlying MDP. */ + std::shared_ptr> constructMdp(); + /** Retrieve the underlying POMDP. */ + std::shared_ptr> constructPomdp(); + + /** If true, the rewards are interpreted as costs. */ + bool reward_minimizing; + /** Label associated with the reward model. */ + std::string reward_model_name = "reward"; + + double discount_factor; + + void applyDiscountFactorTransformation(); + + /** Label for the state that simulates initial distribution. */ + std::string init_label = "init"; + /** Label for the states in the initial distribution. */ + std::string no_obs_label = "__no_obs__"; + + /** Whether discounting transformation took place. */ + bool discounted = false; + /** Index of the sink state. */ + uint_fast64_t discount_sink_state; + /** Label associated with the sink. */ + std::string discount_sink_label = "discount_sink"; + + private: + + /** Madp to Storm state map. */ + std::map madp_to_storm_states; + /** Storm to Madp state map. */ + std::vector storm_to_madp_states; + + void collectActions(DecPOMDPDiscrete *model); + void collectObservations(DecPOMDPDiscrete *model); + + bool haveMadpState(MadpState madp_state); + /** + * TODO + */ + uint_fast64_t mapMadpState(MadpState madp_state); + + uint_fast64_t freshJointAction(std::string action_label); + uint_fast64_t freshJointObservation(std::string observation_label); + /** + * Add new state having fresh observation with its self-loop denoted + * by a fresh joint action with zero reward. + * @return index of the created state + */ + uint_fast64_t freshSink(std::string label); + + storm::models::sparse::StateLabeling constructStateLabeling(); + storm::models::sparse::ChoiceLabeling constructChoiceLabeling(); + storm::storage::SparseMatrix constructTransitionMatrix(); + storm::models::sparse::StandardRewardModel constructRewardModel(); + std::vector constructObservabilityClasses(); + + + + + }; + + + /** + * Parse MADP file and convert transition matrix as well as + * probabilistic observations of the resulting dec-POMDP to a + * Storm-friendly representation. + * @return NULL on parsing error + */ + std::unique_ptr parseDecPomdp(std::string filename); + +} // namespace synthesis + diff --git a/payntbind/src/synthesis/decpomdp/bindings.cpp b/payntbind/src/synthesis/decpomdp/bindings.cpp new file mode 100644 index 000000000..78e054c02 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/bindings.cpp @@ -0,0 +1,25 @@ +#include "../synthesis.h" + +#include "DecPomdp.h" + +void bindings_decpomdp(py::module& m) { + + py::class_(m, "DecPomdp", "dec-POMDP") + // .def(py::init(), "constructor.", py::arg("filename")); + .def("construct_mdp", &synthesis::DecPomdp::constructMdp) + .def("construct_pomdp", &synthesis::DecPomdp::constructPomdp) + .def_property_readonly("num_agents", [](synthesis::DecPomdp& decpomdp) {return decpomdp.num_agents;}) + .def_property_readonly("joint_observations", [](synthesis::DecPomdp& decpomdp) {return decpomdp.joint_observations;}) + .def_property_readonly("agent_observation_labels", [](synthesis::DecPomdp& decpomdp) {return decpomdp.agent_observation_labels;}) + + .def_property_readonly("reward_model_name", [](synthesis::DecPomdp& decpomdp) {return decpomdp.reward_model_name;}) + .def_property_readonly("reward_minimizing", [](synthesis::DecPomdp& decpomdp) {return decpomdp.reward_minimizing;}) + .def_property_readonly("discount_factor", [](synthesis::DecPomdp& decpomdp) {return decpomdp.discount_factor;}) + + .def("apply_discount_factor_transformation", &synthesis::DecPomdp::applyDiscountFactorTransformation) + .def_property_readonly("discount_sink_label", [](synthesis::DecPomdp& decpomdp) {return decpomdp.discount_sink_label;}) + ; + + m.def("parse_decpomdp", &synthesis::parseDecPomdp, py::arg("filename")); + +} diff --git a/payntbind/src/synthesis/decpomdp/madp/AUTHORS b/payntbind/src/synthesis/decpomdp/madp/AUTHORS new file mode 100644 index 000000000..8dcc2e748 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/AUTHORS @@ -0,0 +1,49 @@ +=Main Authors= + +Frans Oliehoek +Department of Computer Science, University of Liverpool +Liverpool, United Kingdom +AMLab, University of Amsterdam +Amsterdam, The Netherlands + +Matthijs Spaan +Delft University of Technology +Delft, The Netherlands + +Bas Terwijn +AMLab, University of Amsterdam +Amsterdam, The Netherlands + +João Messias +Institute for Systems and Robotics (ISR), Instituto Superior Técnico (IST) +Lisbon, Portugal + +Philipp Robbel +Media Lab, Massachusetts Institute of Technology +Cambridge, MA, USA + +F.O. is funded by NWO Innovational Research Incentives Scheme Veni +\#639.021.336. Previous development efforts were funded (in part) by AFOSR +MURI project \#FA9550-09-1-0538, and the Interactive Collaborative Information +Systems (ICIS) project, supported by the Dutch Ministry of Economic Affairs, +grant nr: BSIK03024. + +M.S. was funded by the FP7 Marie Curie Actions Individual Fellowship #275217 +(FP7-PEOPLE-2010-IEF) and was previously supported by Fundacao para a Ciencia +e a Tecnologia (ISR/IST pluriannual funding) through the POS_Conhecimento +Program that includes FEDER funds and through grant PTDC/EEA-ACR/73266/2006. + +=Other Contributors= +Philipp Beau +Abdeslam Boularias +Timon Kanters +Francisco Melo +Julian Kooij +Tiago Veiga +Erwin Walraven +Xuanjie Liu + + + + + diff --git a/payntbind/src/synthesis/decpomdp/madp/COPYING b/payntbind/src/synthesis/decpomdp/madp/COPYING new file mode 100644 index 000000000..da15dea85 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/COPYING @@ -0,0 +1,720 @@ +This module uses MADP Toolbox for parsing .pomdp and .dpomdp files. Below is a +list of used MADP Toolbox components (all under GNU GPL v3 license found below): + +src/base +src/include +src/parser + +The following are the contents of the original COPYING file. + +---------------------------------------------------------------------- + +The Multiagent Decision Process Toolbox +Copyright (C) 2007,2008,2014,2015,2016 + +The MADP Toolbox is released under a GNU GPL v3 license, which is +found below. Note that the MADP distribution includes other software +which is distributed under its own license. To avoid any confusion, we +explicitly list the components included in this distribution here. + +MADP Toolbox components (all under GPL v3): +src/base +src/example +src/include +src/parser +src/planning +src/solvers +src/support +src/utils +src/tests + +Included other software: + +src/libDAI - libDAI +http://staff.science.uva.nl/~jmooij1/libDAI/ +libDAI is licensed under the BSD 2-clause license (also known as the FreeBSD license). + +src/boost - boost libraries +http://www.boost.org/ +Licensed under the Boost Software License. + + + + + +---------------------------------------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/payntbind/src/synthesis/decpomdp/madp/LICENSE b/payntbind/src/synthesis/decpomdp/madp/LICENSE new file mode 100644 index 000000000..6b156fe1d --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/LICENSE @@ -0,0 +1,675 @@ +GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/Action.h b/payntbind/src/synthesis/decpomdp/madp/src/base/Action.h new file mode 100644 index 000000000..130df8917 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/Action.h @@ -0,0 +1,46 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _ACTION_H_ +#define _ACTION_H_ 1 + +/* the include directives */ +#include +#include + +#include "NamedDescribedEntity.h" + +/// Action is a class that represent actions. +class Action : public NamedDescribedEntity +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + Action(const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")) : + NamedDescribedEntity(name, description){}; + +}; + +#endif /* !_ACTION_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ActionDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ActionDiscrete.h new file mode 100644 index 000000000..2d28bf632 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ActionDiscrete.h @@ -0,0 +1,55 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _ACTIONDISCRETE_H_ +#define _ACTIONDISCRETE_H_ 1 + +/* the include directives */ +#include +#include + +#include "Globals.h" +#include "Action.h" +#include "DiscreteEntity.h" + +/// ActionDiscrete represents discrete actions. + +/** + * ActionDiscrete is a class that represent actions in a discrete + * action set, which are identified by their index. */ +class ActionDiscrete : public Action, + public DiscreteEntity +{ +private: + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + ActionDiscrete(Index i=INDEX_MAX, + const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")) : + Action(name, description), + DiscreteEntity(i){}; + +}; + +#endif /* !_ACTIONDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/Agent.h b/payntbind/src/synthesis/decpomdp/madp/src/base/Agent.h new file mode 100644 index 000000000..f8439c5c7 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/Agent.h @@ -0,0 +1,52 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _AGENT_H_ +#define _AGENT_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "NamedDescribedEntity.h" +#include "DiscreteEntity.h" + + +/// Agent represents an agent. +/**Agent is a class that represents an agent, which can be identified + * by its index. */ +class Agent : public NamedDescribedEntity, + public DiscreteEntity +{ +private: + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + Agent(Index i=INDEX_MAX, + const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")) : + NamedDescribedEntity(name, description), + DiscreteEntity(i){}; + +}; + +#endif /* !_AGENT_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/CPDDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/CPDDiscreteInterface.h new file mode 100644 index 000000000..5dcec8587 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/CPDDiscreteInterface.h @@ -0,0 +1,65 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _CPDDISCRETEINTERFACE_H_ +#define _CPDDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include "Globals.h" + +/** \brief CPDDiscreteInterface is an abstract base class that represents + * a conditional probability distribution \f$ Pr(x|y) \f$. + * + * The interface (so far) only implements Get. + * In the future + * \li we may want to add a function that allows multiplication of + * CPDDiscreteInterface's ? + * \li I think it might not be convenient to add Set() functions (each + * implementation might use different set functions? E.g. a CPD based on rules + * may use quite a different mechanism to set probabilities than a CPT) + * + * */ +class CPDDiscreteInterface +{ +private: + +protected: + +public: + /// Destructor. + virtual ~CPDDiscreteInterface(){}; + + ///return the probability \f$ Pr(x|y) \f$ of x given y + virtual double Get(Index x, Index y) const = 0; + ///set the probability \f$ Pr(x|y) \f$ of x given y + virtual void Set(Index x, Index y, double p) = 0; + + /// Returns an (index of a) x drawn according to \f$ P(x|y) \f$ + virtual Index Sample(Index y) const = 0; + + virtual void SanityCheck() const = 0; + + /// Returns a pointer to a copy of this class. + virtual CPDDiscreteInterface* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; +}; + +#endif /* !_CPDDISCRETEINTERFACE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/CPDKroneckerDelta.h b/payntbind/src/synthesis/decpomdp/madp/src/base/CPDKroneckerDelta.h new file mode 100644 index 000000000..0c19dfc01 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/CPDKroneckerDelta.h @@ -0,0 +1,56 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Philipp Robbel + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _CPDKRONECKERDELTA_H_ +#define _CPDKRONECKERDELTA_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "CPDDiscreteInterface.h" + +/// CPDKroneckerDelta implements a Kronecker delta-style CPD. +/**Centers probability mass of 1 on x == y.*/ +class CPDKroneckerDelta : public CPDDiscreteInterface +{ +public: + /// Constructor. + CPDKroneckerDelta() {} + + /// Returns \f$ P(x|y) \f$ + double Get(Index x, Index y) const + { return x == y ? 1.0 : 0.0; } + + /// Returns an (index of a) x drawn according to \f$ P(x|y) \f$ + Index Sample (Index y) const + { return y; } + + ///Sets P(x|y) + /**Doesn't apply to Kronecker delta function.*/ + void Set(Index x, Index y, double prob) + { std::cerr << "CPDKroneckerDelta::Set is unnecessary." << std::endl; } + + void SanityCheck() const + { std::cerr << "CPDKroneckerDelta::SanityCheck is unnecessary." << std::endl; } + + /// Returns a pointer to a copy of this class. + CPDKroneckerDelta* Clone() const + { return new CPDKroneckerDelta(*this); } + + std::string SoftPrint() const + { return "Kronecker delta-style CPD. Returns 1 iff x==y"; } + +}; + +#endif /* !_CPDKRONECKERDELTA_H_ */ diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/CPT.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/CPT.cpp new file mode 100644 index 000000000..f704723ba --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/CPT.cpp @@ -0,0 +1,111 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "CPT.h" + +using namespace std; + +CPT::CPT() : + _m_probTable(0,0) +{ +} + +CPT::CPT(size_t X, size_t Y) + : + _m_probTable(X,Y) +{ + for(size_t x=0; x < X; x++) + for(size_t y=0; y < Y; y++) + _m_probTable(x,y)=0; +} + +CPT:: +CPT(const CPT& cpt) + : + _m_probTable( cpt._m_probTable) +{ +} + +CPT::~CPT() +{ + _m_probTable.clear(); +} + +void CPT::SanityCheck() const +{ + //cout << "Starting CPT::SanityCheck()"< Matrix; + +private: + + Matrix _m_probTable; + +protected: + size_t nrX() const + { return _m_probTable.size1(); } + size_t nrY() const + { return _m_probTable.size2(); } + +public: + // Constructor, destructor and copy assignment. + /// Constructor without arguments, needed for serialization. + CPT(); + + /// Constructor with sizes of sets + CPT(size_t X, size_t Y); + + /// Copy constructor. + CPT(const CPT& cpt); + + /// Destructor. + ~CPT(); + + /// Returns \f$ P(x|y) \f$ + double Get(Index x, Index y) const + { return( _m_probTable(x,y)); } + + /// Returns an (index of a) x drawn according to \f$ P(x|y) \f$ + Index Sample (Index y) const; + + //data manipulation funtions: + ///Sets P(x|y) + /**x, y are indices of the 'state': e.g. x is an index to the x-th element + * in X (the set of values x can take) + * taken joint action and resulting successor state. prob is + * the probability. The order of events is s, ja, s', so is the arg. list + */ + void Set(Index x, Index y, double prob) + { _m_probTable(x,y)=prob; } + + /// Get a pointer to the CPT. + const Matrix* GetMatrixPtr() const + { return(&_m_probTable); } + + virtual void SanityCheck() const; + + /// Returns a pointer to a copy of this class. + virtual CPT* Clone() const + { return new CPT(*this); } + + std::string SoftPrint() const; + + ///This will randomize the CPT + void SetRandom(); + +}; + +#endif /* !_CPT_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.cpp new file mode 100644 index 000000000..3462cf40d --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.cpp @@ -0,0 +1,47 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "DecPOMDP.h" + +using namespace std; + +DecPOMDP::DecPOMDP() +{ + _m_rewardType = REWARD; + _m_discount = 1.0; +} + +void DecPOMDP::SetDiscount(double d) +{ + if(d>=0 && d<=1) + _m_discount=d; + else + throw(E("DecPOMDP::SetDiscount() discount not valid, should be >=0 and <=1")); +} + +string DecPOMDP::SoftPrint() const +{ + stringstream ss; + ss << "Discount factor: " << _m_discount << endl; + ss << "Reward type: " << _m_rewardType << endl; + return ss.str(); +} + +void DecPOMDP::SetRewardType(reward_t r) +{ + if(r!=REWARD) + throw(E("DecPOMDP::SetRewardType only reward type REWARD is supported")); + _m_rewardType = r; +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.h b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.h new file mode 100644 index 000000000..08b5a7426 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDP.h @@ -0,0 +1,91 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#ifndef _DECPOMDP_H_ +#define _DECPOMDP_H_ 1 + +/* the include directives */ + +#include +#include "Globals.h" +#include "DecPOMDPInterface.h" + + +/**\brief DecPOMDP is a simple implementation of DecPOMDPInterface. + * + * It defines a couple of functions that relate to the (types of) + * rewards and discount factor. + * + * Conceptually an MultiAgentDecisionProcess that implements this interface, is + * a Dec-POMDP: the system is cooperative and there is only 1 reward function. + */ +class DecPOMDP : + virtual public DecPOMDPInterface +{ + private: + + /// The discount parameter. + /** When agents have different interests (the POSG setting), + * they may also have different discount factors. For a + * Dec-POMDP, however, we have one global discount factor + * (which typically is 1.0 in the finite horizon case).*/ + double _m_discount; + /// Do the agents get rewards or costs? + reward_t _m_rewardType; + protected: + + public: + + // constructors etc. + /// Default constructor. sets RewardType to REWARD and discount to 1.0. + DecPOMDP(); + + /// Sets the discount parameter to d. + void SetDiscount(double d); + /// Returns the discount parameter. + double GetDiscount() const {return _m_discount;} + /// Sets the reward type to reward_t r. + /** At the moment only REWARD is supported. */ + void SetRewardType(reward_t r); + /// Returns the reward type. + reward_t GetRewardType() const {return _m_rewardType;} + + /// SoftPrints some information on the DecPOMDP. + std::string SoftPrint() const; + + ///Functions needed for POSGInterface: + void SetDiscountForAgent(Index agentI, double d) + {SetDiscount(d);} + + /// Returns the discount parameter. + double GetDiscountForAgent(Index agentI) const + {return GetDiscount();} + + /// Sets the reward type to reward_t r. + void SetRewardTypeForAgent(Index agentI, reward_t r) + {SetRewardType(r);} + + /// Returns the reward type. + reward_t GetRewardTypeForAgent(Index agentI) const + {return GetRewardType();} + + +}; + +#endif //! _DECPOMDP_H_ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscrete.cpp new file mode 100644 index 000000000..d0463a191 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscrete.cpp @@ -0,0 +1,268 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "DecPOMDPDiscrete.h" +#include "RGet.h" +#include "RewardModelMapping.h" +#include "RewardModelMappingSparse.h" + +using namespace std; + +#define DEBUG_DPOMDPD 0 +//Debug Fill DecPOMDP Discrete - functions to initialize the DecPOMDPDiscrete +//(typically from a file) +#define DEBUG_FILLDPD 0 + +DecPOMDPDiscrete::DecPOMDPDiscrete(const string &name, + const string &descr, + const string &pf) : + MultiAgentDecisionProcessDiscrete(name, descr, pf) +{ + _m_initialized = false; + _m_p_rModel = 0; +} + +DecPOMDPDiscrete::~DecPOMDPDiscrete() +{ + if(DEBUG_DPOMDPD) + cout << "deleting DecPOMDPDiscrete (deleting rewards)"<SoftPrint(); + } + else + throw E("DecPOMDPDiscrete components (reward model) not initialized"); + + return(ss.str()); +} + +void DecPOMDPDiscrete::SetReward(Index sI, Index jaI, Index sucSI, double r) +{ + //this adds the fraction for this s' to R(s,a)= \sum_s P(s'|s,a) R(s,a,s') + double rOld=GetReward(sI,jaI), + rExp=GetTransitionProbability(sI,jaI,sucSI)*r; + SetReward(sI,jaI,rOld+rExp); +} + +void DecPOMDPDiscrete::SetReward(Index sI, Index jaI, Index sucSI, + Index joI, double r) +{ + //this adds the fraction for this s', o to R(s,a)= \sum_s' \sum_o P(s',o|s,a) R(s,a,s',o) + double rOld=GetReward(sI,jaI), + rExp=GetObservationProbability(sI, jaI, sucSI, joI) * GetTransitionProbability(sI,jaI,sucSI) * r; + SetReward(sI,jaI,rOld+rExp); +} + +void +DecPOMDPDiscrete::ExtractMADPDiscrete(MultiAgentDecisionProcessDiscrete *madp) +{ + madp->SetNrAgents(GetNrAgents()); + madp->SetName(GetName()); + madp->SetDescription(GetDescription()); + + // transition model + madp->SetTransitionModelPtr( + const_cast(GetTransitionModelDiscretePtr())); + + // observation model + madp->SetObservationModelPtr( + const_cast(GetObservationModelDiscretePtr())); + + // MADPComponentDiscreteStates + for(Index s=0;s!=GetNrStates();++s) + madp->AddState(GetState(s)->GetName()); + + madp->SetISD(GetISD()); + + // MADPComponentDiscreteObservations + for(Index id=0;id!=GetNrAgents();++id) + for(Index o=0;o!=GetNrObservations(id);++o) + madp->AddObservation(id,GetObservation(id,o)->GetName()); + madp->ConstructJointObservations(); + + // MADPComponentDiscreteActions + for(Index id=0;id!=GetNrAgents();++id) + for(Index o=0;o!=GetNrActions(id);++o) + madp->AddAction(id,GetAction(id,o)->GetName()); + madp->ConstructJointActions(); + + madp->Initialize(); +} + + +void DecPOMDPDiscrete::CompareModels( + const DecPOMDPDiscreteInterface& d1, + const DecPOMDPDiscreteInterface& d2) +{ + + //MultiAgentDecisionProcessInterface level + //size_t GetNrAgents () + size_t nrAg = d1.GetNrAgents(); + if(nrAg != d2.GetNrAgents()) + { + cout << "number of agents not the same, stopping comparison"<Set(sI, jaI, r);} + + /// Set the reward for state, joint action , suc. state indices + void SetReward(Index sI, Index jaI, Index sucSI, double r); + + /// Set the reward for state, joint action, suc.state, joint obs indices + void SetReward(Index sI, Index jaI, Index sucSI, Index joI, + double r); + + // 'get' functions: + /// Return the reward for state, joint action indices + double GetReward(Index sI, Index jaI) const + { return(_m_p_rModel->Get(sI, jaI));} + + /// Prints some information on the DecPOMDPDiscrete. + std::string SoftPrint() const; + + /// Get the MADPDiscrete components from this DecPOMDPDiscrete. + void ExtractMADPDiscrete(MultiAgentDecisionProcessDiscrete *madp); + + /// Get a pointer to the reward model. + RewardModel* GetRewardModelPtr() const + { return(_m_p_rModel); } + + //functions to implement the DecPOMDPInterface: + ///implements the DecPOMDPInterface + double GetReward(State* s, JointAction* ja) const + { + return GetReward( + ((StateDiscrete*)s)->GetIndex(), + ((JointActionDiscrete*)ja)->GetIndex()); + } + ///implements the DecPOMDPInterface + void SetReward(State* s, JointAction* ja, double r) + { + return SetReward( + ((StateDiscrete*)s)->GetIndex(), + ((JointActionDiscrete*)ja)->GetIndex(), r); + } + + + + //functions to implement the POSGInterface: + + /// Function that sets the reward for an agent, state and joint action. + /** This should be very generic.*/ + void SetRewardForAgent(Index agentI, State* s, JointAction* ja, double r) + {SetReward(s, ja, r); } + /// Function that returns the reward for a state and joint action. + /** This should be very generic.*/ + double GetRewardForAgent(Index agentI, State* s, JointAction* ja) const + {return GetReward(s, ja); } + + //functions to implement the POSGDiscreteInterface: + + ///implementation of POSGDiscreteInterface + void CreateNewRewardModelForAgent(Index agentI) + {CreateNewRewardModel();} + + void SetRewardForAgent(Index agentI, Index sI, Index jaI, double r) + {SetReward(sI, jaI, r);} + + /// Set the reward for state, joint action , suc. state indices + void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, double r) + {SetReward(sI, jaI, sucSI, r);} + + /// Set the reward for state, joint action, suc.state, joint obs indices + void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, Index joI, double r) + {SetReward(sI, jaI, sucSI, joI, r);} + + /// Return the reward for state, joint action indices + double GetRewardForAgent(Index agentI, Index sI, Index jaI) const + {return GetReward(sI, jaI);} + + static void CompareModels( + const DecPOMDPDiscreteInterface& d1, + const DecPOMDPDiscreteInterface& d2 + ); + + RGet* GetRGet() const; + + /// Returns a pointer to a copy of this class. + virtual DecPOMDPDiscrete* Clone() const + { return new DecPOMDPDiscrete(*this); } + +}; +#endif /* !_DECPOMDPDISCRETE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscreteInterface.h new file mode 100644 index 000000000..bb2123fc2 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPDiscreteInterface.h @@ -0,0 +1,87 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _DECPOMDPDISCRETEINTERFACE_H_ +#define _DECPOMDPDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "DecPOMDPInterface.h" +#include "MultiAgentDecisionProcessDiscreteInterface.h" + +class RGet; + +/**\brief DecPOMDPDiscreteInterface is the interface for + * a discrete DEC-POMDP model: it defines the set/get reward functions. + * + * DecPOMDPDiscreteInterface is an interface (i.e. pure abstract class) for + * a discrete DEC-POMDP model. This means that there is a single reward function + * and that states, actions and observations are discrete. + * + * Classes that implement this interface are, for instance, DecPOMDPDiscrete + * and TransitionObservationIndependentDecPOMDPDiscrete. + **/ +class DecPOMDPDiscreteInterface : + virtual public DecPOMDPInterface, + virtual public MultiAgentDecisionProcessDiscreteInterface +{ + private: + + protected: + + public: + ///import the GetReward function from the base class in current scope. +/* + using POSGDiscreteInterface::GetReward; + using POSGDiscreteInterface::SetReward; + using DecPOMDPInterface::GetReward;*/ + + /// Destructor. + virtual ~DecPOMDPDiscreteInterface() {}; + + //data manipulation (set) functions: + + /// Creates a new reward model mapping. + virtual void CreateNewRewardModel() = 0; + + + /// Set the reward for state, joint action indices + virtual void SetReward(Index sI, Index jaI, double r) = 0; + + /// Set the reward for state, joint action , suc. state indices + virtual void SetReward(Index sI, Index jaI, + Index sucSI, double r) = 0; + + /// Set the reward for state, joint action, suc.state, joint obs indices + virtual void SetReward(Index sI, Index jaI, Index sucSI, Index joI, + double r) = 0; + + // 'get' functions: + /// Return the reward for state, joint action indices + virtual double GetReward(Index sI, Index jaI) const = 0; + virtual RGet * GetRGet() const = 0; + + /// Returns a pointer to a copy of this class. + virtual DecPOMDPDiscreteInterface* Clone() const = 0; + +}; + +#endif /* !_DECPOMDPDISCRETEINTERFACE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPInterface.h new file mode 100644 index 000000000..fa325c670 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/DecPOMDPInterface.h @@ -0,0 +1,78 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#ifndef _DECPOMDPINTERFACE_H_ +#define _DECPOMDPINTERFACE_H_ 1 + +/* the include directives */ + +#include +#include "Globals.h" +#include "POSGInterface.h" +class State; +class JointAction; + +/**\brief DecPOMDPInterface is an interface for DecPOMDPs. It declares a couple + * of function that relate to the (types of) rewards and discount factor. + * + * Conceptually an MultiAgentDecisionProcess that implements this interface, is + * a Dec-POMDP: the system is cooperative and there is only 1 reward function. + */ +class DecPOMDPInterface : virtual public POSGInterface +{ + private: + + protected: + + public: + /*using POSGInterface::SetDiscount; + using POSGInterface::GetDiscount; + using POSGInterface::GetRewardType; + using POSGInterface::SetRewardType; + using POSGInterface::GetReward; + using POSGInterface::SetReward;*/ + + /// Virtual destructor. + virtual ~DecPOMDPInterface() {}; + + /// Sets the discount parameter to 0 < d <= 1. + virtual void SetDiscount(double d) = 0; + + /// Returns the discount parameter. + virtual double GetDiscount() const = 0; + + /// Sets the reward type to reward_t r. + virtual void SetRewardType(reward_t r) = 0; + + /// Returns the reward type. + virtual reward_t GetRewardType() const = 0; + + /// Function that returns the reward for a state and joint action. + /** This should be very generic.*/ + virtual double GetReward(State* s, JointAction* ja) const = 0; + + /// Function that sets the reward for a state and joint action. + /** This should be very generic.*/ + virtual void SetReward(State* s, JointAction* ja, double r) = 0; + + /// Returns a pointer to a copy of this class. + virtual DecPOMDPInterface* Clone() const = 0; +}; + +#endif //! _DECPOMDPINTERFACE_H_ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/DiscreteEntity.h b/payntbind/src/synthesis/decpomdp/madp/src/base/DiscreteEntity.h new file mode 100644 index 000000000..7572d639d --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/DiscreteEntity.h @@ -0,0 +1,63 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _DISCRETEENTITY_H_ +#define _DISCRETEENTITY_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" + +/// DiscreteEntity is a general class for tracking discrete entities. +/**DiscreteEntity represents entities in discrete spaces, that hence + * can be represented by an index. For example, actions in a finite + * action space. */ +class DiscreteEntity +{ +private: + + + /// The index of this discrete entity. + Index _m_index; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + DiscreteEntity(Index i=INDEX_MAX) : _m_index(i){} + + /// Destructor. + virtual ~DiscreteEntity(){} + + /// Return this DiscreteEntity's index. + Index GetIndex() const { return(_m_index); } + + /// Set this DiscreteEntity's index. + void SetIndex(Index i) { _m_index=i; } + + /// The less (<) operator. This is needed to put DiscreteEntities in a set. + bool operator< (const DiscreteEntity& a) const { + return( _m_index < a._m_index );} + +}; + + +#endif /* !_DISCRETEENTITY_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/E.h b/payntbind/src/synthesis/decpomdp/madp/src/base/E.h new file mode 100644 index 000000000..20c51b9ca --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/E.h @@ -0,0 +1,62 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _E_H_ +#define _E_H_ 1 + +/* the include directives */ +#include +#include +#include + +/// E is a class that represents a basic exception. +class E +{ + private: + + protected: + + public: + + /// The error message for this particular exception. + std::string _m_error; + + // Constructor, destructor and copy assignment. + + /// Constructor with a C-style string + E(const char* arg): _m_error(arg) {} + /// Constructor with an STL string + E(std::string arg) : _m_error(arg) {} + /// Constructor with an STL stringstream + E(const std::stringstream& arg) : _m_error(arg.str()) {} + + /// Destructor + virtual ~E(){}; + + /// Returns the error message. + std::string SoftPrint() const {return _m_error;} + /// Prints the error message to cout. + void Print() const + { std::cout << "ERROR: "<< _m_error << std::endl;} +}; + + +#endif /* !_E_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EDeadline.h b/payntbind/src/synthesis/decpomdp/madp/src/base/EDeadline.h new file mode 100644 index 000000000..08b76d66d --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EDeadline.h @@ -0,0 +1,62 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _EDEADLINE_H_ +#define _EDEADLINE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "E.h" + +/// EDeadline represents a deadline exceeded expection. +class EDeadline : public E +{ + private: + + protected: + + public: + + double _m_expectedTimeForCompletion; + + + // Constructor, destructor and copy assignment. + /// Constructor with a C-style string + EDeadline(const char* arg, double expectedTimeForCompletion=0): + E(arg), + _m_expectedTimeForCompletion(expectedTimeForCompletion) + {} + /// Constructor with an STL string + EDeadline(std::string arg, double expectedTimeForCompletion=0): + E(arg), + _m_expectedTimeForCompletion(expectedTimeForCompletion) + {} + /// Constructor with an STL stringstream + EDeadline(const std::stringstream& arg, double expectedTimeForCompletion=0) : + E(arg), + _m_expectedTimeForCompletion(expectedTimeForCompletion) + {} + +}; + + +#endif /* !_EDEADLINE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EInvalidIndex.h b/payntbind/src/synthesis/decpomdp/madp/src/base/EInvalidIndex.h new file mode 100644 index 000000000..a6370cbf2 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EInvalidIndex.h @@ -0,0 +1,49 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _EINVALIDINDEX_H_ +#define _EINVALIDINDEX_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "E.h" + +/// EInvalidIndex represents an invalid index exception. +class EInvalidIndex : public E +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// Constructor with a C-style string + EInvalidIndex(const char* arg):E(arg) {} + /// Constructor with an STL string + EInvalidIndex(std::string arg):E(arg) {} + /// Constructor with an STL stringstream + EInvalidIndex(const std::stringstream& arg) : E(arg) {} + +}; + + +#endif /* !_EINVALIDINDEX_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ENoSubScope.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ENoSubScope.h new file mode 100644 index 000000000..309f5058b --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ENoSubScope.h @@ -0,0 +1,49 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _ENOSUBSCOPE_H_ +#define _ENOSUBSCOPE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "E.h" + +/// ENoSubScope represents an invalid index exception. +class ENoSubScope : public E +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// Constructor with a C-style string + ENoSubScope(const char* arg):E(arg) {} + /// Constructor with an STL string + ENoSubScope(std::string arg):E(arg) {} + /// Constructor with an STL stringstream + ENoSubScope(const std::stringstream& arg) : E(arg) {} + +}; + + +#endif /* !_ENOSUBSCOPE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ENotCached.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ENotCached.h new file mode 100644 index 000000000..d892d3caf --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ENotCached.h @@ -0,0 +1,49 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _ENOTCACHED_H_ +#define _ENOTCACHED_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "E.h" + +/// ENotCached represents an invalid index exception. +class ENotCached : public E +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// Constructor with a C-style string + ENotCached(const char* arg):E(arg) {} + /// Constructor with an STL string + ENotCached(std::string arg):E(arg) {} + /// Constructor with an STL stringstream + ENotCached(const std::stringstream& arg) : E(arg) {} + +}; + + +#endif /* !_ENOTCACHED_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EOverflow.h b/payntbind/src/synthesis/decpomdp/madp/src/base/EOverflow.h new file mode 100644 index 000000000..3b7c201b1 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EOverflow.h @@ -0,0 +1,49 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _EOVERFLOW_H_ +#define _EOVERFLOW_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "E.h" + +/// EOverflow represents an integer overflow exception. +class EOverflow : public E +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// Constructor with a C-style string + EOverflow(const char* arg):E(arg) {} + /// Constructor with an STL string + EOverflow(std::string arg):E(arg) {} + /// Constructor with an STL stringstream + EOverflow(const std::stringstream& arg) : E(arg) {} + +}; + + +#endif /* !_EOVERFLOW_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EParse.h b/payntbind/src/synthesis/decpomdp/madp/src/base/EParse.h new file mode 100644 index 000000000..876fe3efb --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EParse.h @@ -0,0 +1,48 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _EPARSE_H_ +#define _EPARSE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "E.h" + +///EParse represents a parser exception. +class EParse : public E +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// Constructor with a C-style string + EParse(const char* arg):E(arg) {} + /// Constructor with an STL string + EParse(std::string arg):E(arg) {} + /// Constructor with an STL stringstream + EParse(const std::stringstream& arg) : E(arg) {} +}; + + +#endif /* !_EPARSE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.cpp new file mode 100644 index 000000000..d1d91ee90 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.cpp @@ -0,0 +1,65 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * João Messias + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "EventObservationModelMapping.h" + +using namespace std; + +EventObservationModelMapping::EventObservationModelMapping(int nrS, int nrJA, + int nrJO) : + ObservationModelDiscrete(nrS, nrJA, nrJO) +{ + Matrix *O; + for(int a=0;a!=nrJA;++a) + { + std::vector S; + for(int joI=0;joI!=nrJO;++joI) + { + O=new Matrix(nrS,nrS); + O->clear(); + S.push_back(O); + } + _m_O.push_back(S); + } +} + +EventObservationModelMapping:: +EventObservationModelMapping(const EventObservationModelMapping& OM) : + ObservationModelDiscrete(OM) +{ + Matrix *O; + for(unsigned int a=0;a!=OM._m_O.size();++a) + { + std::vector S; + for(unsigned int joI=0;joI!=OM._m_O.at(0).size();++joI) + { + O=new Matrix(*OM._m_O[a][joI]); + S.push_back(O); + } + _m_O.push_back(S); + } +} + +EventObservationModelMapping::~EventObservationModelMapping() +{ + for(size_t i = 0; i < _m_O.size(); i++) + { + for(vector::iterator it=_m_O.at(i).begin(); it!=_m_O.at(i).end(); ++it) + delete(*it); + _m_O.at(i).clear(); + } + _m_O.clear(); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.h b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.h new file mode 100644 index 000000000..a5e4065fb --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMapping.h @@ -0,0 +1,85 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * João Messias + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _EVENTOBSERVATIONMODELMAPPING_H_ +#define _EVENTOBSERVATIONMODELMAPPING_H_ 1 + +/* the include directives */ +#include "boost/numeric/ublas/matrix.hpp" +#include "Globals.h" +#include "ObservationModelDiscrete.h" +class OGet; +class OGet_EventObservationModelMapping; + +/// EventObservationModelMapping implements an ObservationModelDiscrete which +/// depends not only on the resulting state but also on the current state of +/// the system, i.e. P(o(k+1) | s(k), ja(k), s(k+1)) +class EventObservationModelMapping : + public ObservationModelDiscrete +{ +public: + + typedef boost::numeric::ublas::matrix Matrix; + +private: + + std::vector< std::vector > _m_O; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + EventObservationModelMapping(int nrS = 1, int nrJA = 1, int nrJO = 1); + + /// Copy constructor. + EventObservationModelMapping(const EventObservationModelMapping& OM); + /// Destructor. + ~EventObservationModelMapping(); + + double Get(Index ja_i, Index suc_s_i, Index jo_i) const + { throw E("Cannot refer to an Event Observation Model with (o,s',a). Use Get(s,a,s',o) instead."); return(0); } + double Get(Index s_i, Index ja_i, Index suc_s_i, Index jo_i) const + { return (*_m_O[ja_i][jo_i])(s_i,suc_s_i); } + //data manipulation funtions: + /// Sets P(o|s,ja,s') + /** jo_i, Index s_i, Index ja_i, Index suc_s_i, are indices of the joint + * observation, taken joint action and resulting successor + * state. prob is the probability. The order of events is s, ja, s', + * o, so is the arg. list + */ + void Set(Index ja_i, Index suc_s_i, Index jo_i, double prob) + { throw E("Cannot refer to an Event Observation Model with (o,s',a,p). Use Set(s,a,s',o,p) instead."); } + void Set(Index s_i, Index ja_i, Index suc_s_i, Index jo_i, double prob) + { (*_m_O[ja_i][jo_i])(s_i,suc_s_i)=prob; } + + const Matrix* GetMatrixPtr(Index a, Index jo_i) const + { return(_m_O.at(a).at(jo_i)); } + + /// Returns a pointer to a copy of this class. + virtual EventObservationModelMapping* Clone() const + { return new EventObservationModelMapping(*this); } + + friend class OGet_EventObservationModelMapping; +}; + +#endif /* !_EVENTOBSERVATIONMODELMAPPING_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.cpp new file mode 100644 index 000000000..0eba1d857 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.cpp @@ -0,0 +1,65 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * João Messias + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "EventObservationModelMappingSparse.h" + +using namespace std; + +EventObservationModelMappingSparse::EventObservationModelMappingSparse(int nrS, + int nrJA, + int nrJO) : + ObservationModelDiscrete(nrS, nrJA, nrJO) +{ + SparseMatrix *O; + for(int a=0;a!=nrJA;++a) + { + std::vector S; + for(int joI=0;joI!=nrJO;++joI) + { + O=new SparseMatrix(nrS,nrS); + O->clear(); + S.push_back(O); + } + _m_O.push_back(S); + } +} + +EventObservationModelMappingSparse:: +EventObservationModelMappingSparse(const EventObservationModelMappingSparse& OM) : + ObservationModelDiscrete(OM) +{ + SparseMatrix *O; + for(unsigned int a=0;a!=OM._m_O.size();++a) + { + std::vector S; + for(unsigned int joI=0;joI!=OM._m_O.at(0).size();++joI) + { + O=new SparseMatrix(*OM._m_O[a][joI]); + S.push_back(O); + } + _m_O.push_back(S); + } +} + +EventObservationModelMappingSparse::~EventObservationModelMappingSparse() +{ + for(size_t i = 0; i < _m_O.size(); i++) + { + for(vector::iterator it=_m_O.at(i).begin(); it!=_m_O.at(i).end(); ++it) + delete(*it); + _m_O.at(i).clear(); + } +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.h b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.h new file mode 100644 index 000000000..19ffd3493 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/EventObservationModelMappingSparse.h @@ -0,0 +1,98 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * João Messias + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _EVENTOBSERVATIONMODELMAPPINGSPARSE_H_ +#define _EVENTOBSERVATIONMODELMAPPINGSPARSE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "ObservationModelDiscrete.h" +#include "boost/numeric/ublas/matrix_sparse.hpp" +class OGet; +class OGet_ObservationModelMapping; + +/// EventObservationModelMappingSparse implements an ObservationModelDiscrete. +/** Uses sparse matrices. */ +class EventObservationModelMappingSparse : + public ObservationModelDiscrete +{ +public: +#if BOOST_1_32_OR_LOWER // they renamed sparse_vector to mapped_vector + typedef boost::numeric::ublas::sparse_matrix SparseMatrix; +#else + typedef boost::numeric::ublas::compressed_matrix SparseMatrix; +#endif + + +private: + + std::vector< std::vector > _m_O; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + EventObservationModelMappingSparse(int nrS = 1, int nrJA = 1, int nrJO = 1); + + /// Copy constructor. + EventObservationModelMappingSparse(const EventObservationModelMappingSparse& OM); + /// Destructor. + ~EventObservationModelMappingSparse(); + + /// Returns P(jo|ja,s') + double Get(Index ja_i, Index suc_s_i, Index jo_i) const + { throw E("Cannot refer to an Event Observation Model with (o,s',a). Use Get(s,a,s',o) instead."); return(0); } + double Get(Index s_i, Index ja_i, Index suc_s_i, Index jo_i) const + { return (*_m_O[ja_i][jo_i])(s_i,suc_s_i); } + + //data manipulation funtions: + /// Sets P(o|s,ja,s') + /** jo_i, Index s_i, Index ja_i, Index suc_s_i, are indices of the joint + * observation, taken joint action and resulting successor + * state. prob is the probability. The order of events is s, ja, s', + * o, so is the arg. list + */ + void Set(Index ja_i, Index suc_s_i, Index jo_i, double prob) + { throw E("Cannot refer to an Event Observation Model with (o,s',a,p). Use Set(s,a,s',o,p) instead."); } + void Set(Index s_i, Index ja_i, Index suc_s_i, Index jo_i, double prob) + { + // make sure probability is not 0 + if(prob > PROB_PRECISION) + (*_m_O[ja_i][jo_i])(s_i,suc_s_i)=prob; + // check if we already defined this element, if so remove it + else if((*_m_O[ja_i][jo_i])(s_i,suc_s_i)>PROB_PRECISION) + (*_m_O[ja_i][jo_i]).erase_element(s_i,suc_s_i); + } + + /// Get a pointer to a transition matrix for a particular action / observation pair. + const SparseMatrix* GetMatrixPtr(Index a, Index jo_i) const + { return(_m_O.at(a).at(jo_i)); } + + /// Returns a pointer to a copy of this class. + virtual EventObservationModelMappingSparse* Clone() const + { return new EventObservationModelMappingSparse(*this); } + + friend class OGet_EventObservationModelMappingSparse; +}; + +#endif /* !_EVENTOBSERVATIONMODELMAPPINGSPARSE_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.cpp new file mode 100644 index 000000000..735e2fcbc --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.cpp @@ -0,0 +1,242 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include +#include "FSDist_COF.h" +#include "StateFactorDiscrete.h" +#include "MADPComponentFactoredStates.h" +#include "MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h" +using namespace std; + +//Default constructor +//FSDist_COF::FSDist_COF() {} + +FSDist_COF::FSDist_COF() : + _m_nrStateFactors(0), + _m_sfacDomainSizes(0), + _m_stepSize(0), + _m_probs(0) +{ +} + +FSDist_COF::FSDist_COF(const MADPComponentFactoredStates& a) : + _m_nrStateFactors(a.GetNrStateFactors()), + _m_sfacDomainSizes(a.GetNrStateFactors()), + _m_probs(a.GetNrStateFactors()) +{ + for(Index i=0; i < a.GetNrStateFactors(); i++) + { + _m_sfacDomainSizes[i] = a.GetNrValuesForFactor(i); + _m_probs[i] = vector( a.GetNrValuesForFactor( i ), 0.0 ); + } + _m_stepSize = IndexTools::CalculateStepSize(_m_sfacDomainSizes); +} + +FSDist_COF::FSDist_COF(const MultiAgentDecisionProcessDiscreteFactoredStatesInterface& a) : + _m_nrStateFactors(a.GetNrStateFactors()), + _m_sfacDomainSizes(a.GetNrStateFactors()), + _m_probs(a.GetNrStateFactors()) +{ + for(Index i=0; i < a.GetNrStateFactors(); i++) + { + _m_sfacDomainSizes[i] = a.GetNrValuesForFactor(i); + _m_probs[i] = vector( a.GetNrValuesForFactor( i ), 0.0 ); + } + _m_stepSize = IndexTools::CalculateStepSize(_m_sfacDomainSizes); +} + +//Copy constructor. +FSDist_COF::FSDist_COF(const FSDist_COF& o) : + _m_nrStateFactors(o._m_nrStateFactors), + _m_sfacDomainSizes(o._m_sfacDomainSizes), + _m_probs(o._m_probs) +{ + _m_stepSize = IndexTools::CalculateStepSize(_m_sfacDomainSizes); +} + +//Destructor +FSDist_COF::~FSDist_COF() +{ + delete [] _m_stepSize; +} + +//Copy assignment operator +FSDist_COF& FSDist_COF::operator= (const FSDist_COF& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + _m_nrStateFactors = o._m_nrStateFactors; + _m_sfacDomainSizes = o._m_sfacDomainSizes; + _m_probs = o._m_probs; + _m_stepSize = IndexTools::CalculateStepSize(_m_sfacDomainSizes); + + return *this; +} + +void FSDist_COF::SetUniform() +{ + for(Index i=0; i < _m_nrStateFactors; i++) + { + size_t nrVals = _m_sfacDomainSizes[i]; + double p = 1.0 / (double) nrVals; + for(Index j=0; j < nrVals; j++) + _m_probs[i][j] = p; + } +} + +void FSDist_COF::SetZero() +{ + for(Index i=0; i < _m_nrStateFactors; ++i) + { + size_t nrVals = _m_sfacDomainSizes[i]; + double p = 0.0;// / (double) nrVals; + for(Index j=0; j < nrVals; j++) + _m_probs[i][j] = p; + } +} + +double FSDist_COF::GetProbability(Index sI) const +{ + vector sfacValues=IndexTools::JointToIndividualIndicesStepSize( + sI, _m_stepSize, _m_nrStateFactors); + return GetProbability(sfacValues); +} + +double FSDist_COF::GetProbability(const vector& sfacValues) const +{ + double p = 1.0; + for(Index i=0; i < _m_nrStateFactors; i++) + p *= _m_probs.at(i).at( sfacValues.at(i) ); + return p; +} + +double FSDist_COF::GetProbability(const Scope& sfSc, + const std::vector& sfacValues) const +{ + double p = 1.0; + for(Index i=0; i < sfSc.size(); i++) + { + Index sfI = sfSc.at(i); + Index sfValI_sfI = sfacValues.at(i); + double p_this_sf = _m_probs.at(sfI).at(sfValI_sfI); + p *= p_this_sf; + } + + return p; +} + +vector FSDist_COF::ToVectorOfDoubles() const +{ + size_t nrStates=GetNrStates(); + vector sfacValues(_m_nrStateFactors, 0); + Index sI = 0; + vector flatDist( nrStates, 0.0 ); + do { + double p = GetProbability(sfacValues); + flatDist.at(sI) = p; + sI++; + }while ( ! IndexTools::Increment(sfacValues, _m_sfacDomainSizes) ); + return flatDist; +} + +size_t FSDist_COF::GetNrStates() const +{ + size_t nrStates = 1; + for(Index i=0; i!=_m_nrStateFactors; ++i) + nrStates*=_m_sfacDomainSizes.at(i); + return(nrStates); +} + +vector FSDist_COF::SampleState() const +{ + vector state(_m_nrStateFactors); + + for(Index i=0; i < _m_nrStateFactors; i++) + { + double randNr=rand() / (RAND_MAX + 1.0); + double sum=0; + for(Index valI = 0; valI < _m_sfacDomainSizes[i]; valI++) + { + sum+=_m_probs[i][valI]; + if(randNr<=sum) + { + state.at(i)=valI; + break; + } + } + } + return state; +} + +string FSDist_COF::SoftPrint() const +{ + stringstream ss; + for(Index i=0; i < _m_probs.size(); i++) + { + ss << "SF" << i; + ss << " - "; + ss << SoftPrintVector(_m_probs.at(i) ) << endl; + } + return (ss.str()); +} + +void FSDist_COF::SanityCheck() +{ + //cout << "Starting FSDist_COF::SanityCheck()"< 0) + { + for(Index valI=0; valI < _m_probs.at(sfacI).size(); valI++) + _m_probs[sfacI][valI] /= sum; + } + else + { + for(Index valI=0; valI < _m_probs.at(sfacI).size(); valI++) + _m_probs[sfacI][valI] = 1.0/_m_probs[sfacI].size(); + } +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.h b/payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.h new file mode 100644 index 000000000..79b210853 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FSDist_COF.h @@ -0,0 +1,108 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _FSDIST_COF_H_ +#define _FSDIST_COF_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "FactoredStateDistribution.h" + +class MADPComponentFactoredStates; +class MultiAgentDecisionProcessDiscreteFactoredStatesInterface; +class Scope; + +/** \brief FSDist_COF is a class that represents a completely factored state + * distribution. I.e., a distribution represented as the product of marginal + * state factor probabilities. + * */ +class FSDist_COF : public FactoredStateDistribution +{ +private: + + ///The number of state factors + size_t _m_nrStateFactors; + + ///Vector with size of the domain of each state factor (the nr. values) + /**This is used to compute state indices.*/ + std::vector _m_sfacDomainSizes; + ///Array caching the stepsize - used for computing indices. + /**Computed during initialization.*/ + size_t* _m_stepSize; + + ///_m_probs[sfacI][valI] contains probability of valI for SF sfacI. + std::vector< std::vector > _m_probs; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// Constructor without arguments, needed for serialization. + FSDist_COF(); + FSDist_COF(const MADPComponentFactoredStates& a); + FSDist_COF(const MultiAgentDecisionProcessDiscreteFactoredStatesInterface& a); + /// Copy constructor. + FSDist_COF(const FSDist_COF& a); + /// Destructor. + virtual ~FSDist_COF(); + + /// Copy assignment operator + FSDist_COF& operator= (const FSDist_COF& o); + + //operators: + + //data manipulation (set) functions: + virtual void SetZero(); + virtual void SetUniform(); + + //get (data) functions: + double& GetReferrence(Index sfacI, Index sfacValueI) + { return _m_probs[sfacI][sfacValueI]; } + + virtual double GetProbability( Index sI) const; + double GetProbability(const std::vector& sfacValues) const; + double GetProbability(const Scope& sfSc, + const std::vector& sfacValues) const; + std::vector SampleState() const; + + void SetProbability(Index sfacI, Index valI, double prob) + { + _m_probs.at(sfacI).at(valI)=prob; + } + + virtual std::string SoftPrint() const; + + virtual std::vector ToVectorOfDoubles() const; + + virtual size_t GetNrStates() const; + + /// Returns a pointer to a copy of this class. + virtual FSDist_COF* Clone() const + { return new FSDist_COF(*this); } + + void SanityCheck(); + + //Normalization of the distribution of a given state factor. The normalization constant can be an input, if known. + virtual void Normalize(Index sfacI); + void Normalize(Index sfacI, double sum); +}; + + +#endif /* !_FSDIST_COF_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.cpp new file mode 100644 index 000000000..498df0f95 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.cpp @@ -0,0 +1,705 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "FactoredDecPOMDPDiscrete.h" +#include "DecPOMDPDiscrete.h" +#include "StateFactorDiscrete.h" +#include +#include + +#include + +#define DEBUG_SETR 0 + +using namespace std; + +bool FactoredDecPOMDPDiscrete:: +ConsistentVectorsOnSpecifiedScopes( + const std::vector& v1, + const Scope& scope1, + const std::vector& v2, + const Scope& scope2) +{ + Scope::const_iterator s2_it = scope2.begin(); + Scope::const_iterator s2_last = scope2.end(); + Index v2_I = 0; + while(s2_it != s2_last) + { + //check that v1 specifies the same value for variable *s2_it + Index varI = *s2_it; + Index pos_in_v1 = scope1.GetPositionForIndex(varI); + if(v1[pos_in_v1] != v2[v2_I] ) + return false; + + s2_it++; + v2_I++; + } + return true; + +} + +FactoredDecPOMDPDiscrete::FactoredDecPOMDPDiscrete(string name, string descr, string pf) : + FactoredDecPOMDPDiscreteInterface() + ,MultiAgentDecisionProcessDiscreteFactoredStates(name, descr, pf) + ,_m_p_rModel() + ,_m_cached_FlatRM(false) + ,_m_sparse_FlatRM(false) +{ + } + +//Destructor +FactoredDecPOMDPDiscrete::~FactoredDecPOMDPDiscrete() +{ + for(Index i=0;i!=_m_LRFs.size();++i) + delete _m_LRFs.at(i); + delete _m_p_rModel; +} + +string FactoredDecPOMDPDiscrete::SoftPrint() const +{ + stringstream ss; + ss << MultiAgentDecisionProcessDiscreteFactoredStates::SoftPrint(); + ss << DecPOMDP::SoftPrint(); + ss << "FactoredDecPOMDPDiscrete contains " << _m_nrLRFs + << " local reward functions." << endl; + for(Index i=0;i!=_m_nrLRFs;++i) + { + ss << "LRF " << i << " statefactor scope " << _m_sfScopes.at(i) + << " agent scope " << _m_agScopes.at(i) << endl; + ss << _m_LRFs.at(i)->SoftPrint() << endl; + } + + //print components specific to FactoredDecPOMDPDiscrete: + return(ss.str()); +} + + +void FactoredDecPOMDPDiscrete::InitializeStorage() +{ + _m_sfScopes.clear(); + _m_agScopes.clear(); + _m_LRFs.clear(); + _m_sfScopes.resize(_m_nrLRFs); + _m_agScopes.resize(_m_nrLRFs); + _m_LRFs.resize(_m_nrLRFs, 0); + + //allocate storage for the instantiation information + _m_nrXIs.resize(_m_nrLRFs); + _m_nrAIs.resize(_m_nrLRFs); + _m_nrSFVals.resize(_m_nrLRFs); + _m_nrActionVals.resize(_m_nrLRFs); +} + +void FactoredDecPOMDPDiscrete::SetScopeForLRF(Index LRF, + const Scope& X, //the X scope + const Scope& A, //the A scope + const Scope& Y, + const Scope& O + ) +{ + Scope Xbackedup = StateScopeBackup( Y, O); + Scope Abackedup = AgentScopeBackup( Y, O); + // X'' = X' + X + Xbackedup.Insert(X); + Xbackedup.Sort(); + // A'' = A' + A + Abackedup.Insert(A); + Abackedup.Sort(); + SetScopeForLRF(LRF, Xbackedup, Abackedup); +} + +const FactoredQFunctionScopeForStage +FactoredDecPOMDPDiscrete::GetImmediateRewardScopes() const +{ + FactoredQFunctionScopeForStage immRewScope; + //add scopes: + for(Index i=0; i < _m_nrLRFs; i++) + { + immRewScope.AddLocalQ( _m_sfScopes.at(i), _m_agScopes.at(i) ); + } + return immRewScope; +} + +void FactoredDecPOMDPDiscrete::InitializeInstantiationInformation() +{ + const vector< size_t>& nrVals = GetNrValuesPerFactor(); + const vector< size_t>& nrActions = GetNrActions(); + for(Index e = 0; e < GetNrLRFs(); e++) + { + const Scope& X = GetStateFactorScopeForLRF(e); + vector< size_t> restrXVals(X.size()); + IndexTools::RestrictIndividualIndicesToScope(nrVals, X, restrXVals); + _m_nrSFVals.at(e) = restrXVals; + size_t nrXIs = 1; + for( vector< size_t >::const_iterator it = restrXVals.begin(); + it != restrXVals.end(); + it++) + nrXIs *= *it; + _m_nrXIs.at(e) = nrXIs; + + const Scope& A = GetAgentScopeForLRF(e); + vector< size_t> restrAVals(A.size()); + IndexTools::RestrictIndividualIndicesToScope(nrActions, A, restrAVals); + _m_nrActionVals.at(e) = restrAVals; + size_t nrAIs = 1; + for( vector< size_t >::const_iterator it = restrAVals.begin(); + it != restrAVals.end(); + it++) + nrAIs *= *it; + _m_nrAIs.at(e) = nrAIs; + + } + + +} + +size_t FactoredDecPOMDPDiscrete::GetNrXIs(Index lrf) const +{ + return(_m_nrXIs.at(lrf)); +} + +size_t FactoredDecPOMDPDiscrete::GetNrAIs(Index lrf) const +{ + return(_m_nrAIs.at(lrf)); +} + + +void FactoredDecPOMDPDiscrete:: +SetRewardForLRF(Index LRF, + const vector& Xs, + const vector& As, + double reward + ) +{ +#if DEBUG_SETR + string indent = "\t\t"; + cerr< local joint index + Index jointXIndex = RestrictedStateVectorToJointIndex(LRF, Xs); + Index jointAIndex = RestrictedActionVectorToJointIndex(LRF, As); + + _m_LRFs.at(LRF)->Set(jointXIndex, jointAIndex, reward); +} + +void FactoredDecPOMDPDiscrete::SetRewardForLRF(Index LRF, + const vector& Xs, + const vector& As, + const Scope& Y, + const vector& Ys, + const Scope& O, + const vector& Os, + double reward + ) +{ +#if DEBUG_SETR + string indent = "\t"; + cerr<Get(sI,jaI); + + //sum over local reward functions + double r = 0.0; + for(Index e=0; e < GetNrLRFs(); e++) + { + double this_e_r = GetLRFRewardFlat(e, sI, jaI); + r += this_e_r; + } + return(r); +} + +double FactoredDecPOMDPDiscrete::GetReward(const vector &sIs, + const vector &aIs) const +{ + //sum over local reward functions + double r = 0.0; + for(Index e=0; e < GetNrLRFs(); e++) + { + double this_e_r = GetLRFRewardFlat(e, sIs, aIs); + r += this_e_r; + } + return(r); +} + +RGet * FactoredDecPOMDPDiscrete::GetRGet() const +{ + if(!_m_cached_FlatRM) + return 0; + //throw E("FactoredDecPOMDPDiscrete: can't get RGet if flat reward model not chached!"); + + if(_m_sparse_FlatRM) + return new RGet_RewardModelMappingSparse( + ((RewardModelMappingSparse*)_m_p_rModel) ); + else + return new RGet_RewardModelMapping( + ((RewardModelMapping*)_m_p_rModel) ); +} + + + +double FactoredDecPOMDPDiscrete:: +GetLRFRewardFlat(Index lrf, Index flat_s, Index full_ja) const +{ + vector sfacs = StateIndexToFactorValueIndices(flat_s); + const vector& ja = JointToIndividualActionIndices(full_ja); + return(GetLRFRewardFlat(lrf,sfacs,ja)); +} + +double FactoredDecPOMDPDiscrete:: +GetLRFRewardFlat(Index lrf, + const vector& sfacs, + const vector& as) const +{ + const Scope &Xsc = GetStateFactorScopeForLRF(lrf); + const Scope &Asc = GetAgentScopeForLRF(lrf); + + vector restr_X(Xsc.size()); + IndexTools::RestrictIndividualIndicesToScope( + sfacs, Xsc, restr_X); + vector restr_A(Asc.size()); + IndexTools::RestrictIndividualIndicesToScope( + as, Asc, restr_A); + + double r = GetLRFReward(lrf, restr_X, restr_A); + return r; +} + +double FactoredDecPOMDPDiscrete:: +GetLRFReward(Index lrf, + const vector& s_e_vec, + const vector& a_e_vec)const +{ + Index s_e = RestrictedStateVectorToJointIndex(lrf, s_e_vec); + Index a_e = RestrictedActionVectorToJointIndex(lrf, a_e_vec); + double r = GetLRFReward(lrf, s_e, a_e); + return r; +} +/* inline +double FactoredDecPOMDPDiscrete:: +GetLRFReward(Index lrf, Index sI_e, Index jaI_e) const +{ + return( _m_LRFs[lrf]->Get(sI_e, jaI_e) ); +} +*/ + + + +Index FactoredDecPOMDPDiscrete::RestrictedStateVectorToJointIndex( + Index LRF, const vector& stateVec_e) const +{ + const vector& nrSFVals = _m_nrSFVals.at(LRF); + Index jointXI = IndexTools::IndividualToJointIndices(stateVec_e, nrSFVals); + return jointXI; +} + +Index FactoredDecPOMDPDiscrete::RestrictedActionVectorToJointIndex( + Index LRF, const vector& actionVec_e) const +{ + const vector& nrActionVals = _m_nrActionVals.at(LRF); + Index jointAI = + IndexTools::IndividualToJointIndices(actionVec_e, nrActionVals); + return jointAI; +} + + + +void FactoredDecPOMDPDiscrete::CacheFlatRewardModel(bool sparse) +{ + if(_m_cached_FlatRM) + { + _m_cached_FlatRM=false; // set to false, otherwise GetReward() + // call below will wrongly assume the + // rewards have already been cached + // and return only zeroes + delete(_m_p_rModel); + } + _m_sparse_FlatRM = sparse; + if(sparse) + _m_p_rModel=new RewardModelMappingSparse(GetNrStates(), + GetNrJointActions()); + else + _m_p_rModel=new RewardModelMapping(GetNrStates(), + GetNrJointActions()); + + + //cout << "caching rewards"<Get(sI, jaI) << endl; + + } + + _m_cached_FlatRM = true; +} + +void FactoredDecPOMDPDiscrete::CacheFlatModels(bool sparse) +{ + // if we are generating the full models anyway, also create + // the joint actions/observations + ConstructJointActions(); + ConstructJointObservations(); + + CacheFlatTransitionModel(sparse); + CacheFlatObservationModel(sparse); + CacheFlatRewardModel(sparse); +} + +void FactoredDecPOMDPDiscrete::ExportSpuddFile(const string& filename) const +{ + ofstream fp(filename.c_str()); + if(!fp.is_open()) { + cerr << "FactoredDecPOMDPDiscrete::ExportSpuddFile: failed to " + << "open file " << filename << endl; + return; + } + + // write header + fp << "// Automatically produced by FactoredDecPOMDPDiscrete::ExportSpuddFile" + << endl << "// SPUDD / Symbolic Perseus Format for '" << GetName() << "'" + << endl << endl; + + // write variables + fp << "(variables" << endl; + for(Index yI = 0; yI < GetNrStateFactors(); yI++) { + const StateFactorDiscrete* sfac = GetStateFactorDiscrete(yI); + fp << " (" << sfac->GetName(); + for(Index valI=0; valI < GetNrValuesForFactor(yI); valI++) + fp << " " << sfac->GetStateFactorValue(valI); + fp << ")" << endl; + } + fp << ")" << endl << endl; + + // write actions + const Scope& jAsc = GetAllAgentScope(); + for(Index jaI = 0; jaI < GetNrJointActions(); jaI++) { + vector A = JointToIndividualActionIndices(jaI); + + // construct and print joint action name + stringstream ss; + for(Index agentI = 0; agentI < GetNrAgents(); agentI++) + ss << GetAgentNameByIndex(agentI) << "_" + << GetAction(agentI, A[agentI])->GetName() << "__"; + string aname = ss.str(); + fp << "action " << aname.substr(0, aname.length()-2) << endl; + + // write out CPT for each state factor + for(Index y = 0; y < GetNrStateFactors(); y++) { + fp << GetStateFactorDiscrete(y)->GetName() << endl; + + // figure out action subset for ii + const Scope& ASoI_y = Get2DBN()->GetASoI_Y(y); + size_t ASoI_y_size = ASoI_y.size(); + vector As_restr(ASoI_y_size); + IndexTools::RestrictIndividualIndicesToNarrowerScope(A, jAsc, ASoI_y, As_restr); + + // loop over X instantiations + const Scope& XSoI_y = Get2DBN()->GetXSoI_Y(y); + size_t XSoI_y_size = XSoI_y.size(); // number of variables X in y's scope + vector r_nrX = Get2DBN()->GetNrVals_XSoI_Y(y); // number of values for X + vector Xs(XSoI_y_size, 0 ); // instantiation for X variables in XSoI_y + vector prevXs(XSoI_y_size, 1 ); // previous iteration + const Scope emptySc; // for Y variables (no within-stage influences now) + bool firstIter = true; + + do { // for each permutation + //cout << SoftPrintVector(Xs) << endl; + // close previously opened variable blocks + size_t nrXchanges = 0; + for(Index scI=0; scI < XSoI_y_size; scI++) { // for all state factors in ii + if(prevXs[scI] != Xs[scI]) + nrXchanges++; + } + if(!firstIter) fp << string(2*nrXchanges,')') << endl; else firstIter = false; + + // check where indices changed from previous iteration + for(Index scI=0; scI < XSoI_y_size; scI++) { // for all state factors in ii + Index sfI = XSoI_y.at(scI); + const StateFactorDiscrete* sfac = GetStateFactorDiscrete(sfI); + + if(prevXs[scI] != Xs[scI]) { + if(Xs[scI] == 0) { + // write out next variable name + string name = sfac->GetName(); + fp << " (" << name; + } + // print variable value + string value = sfac->GetStateFactorValue(Xs[scI]); + fp << " (" << value; + } + } + + // write distribution as vector + vector dist = Get2DBN()->GetYProbabilitiesExactScopes(Xs, As_restr, emptySc, y); + // if(p > Globals::PROB_PRECISION) + fp << " ("; + for(vector::const_iterator pI = dist.begin(); pI != dist.end(); ++pI) + fp << *pI << " "; + + prevXs = Xs; + + } while(! IndexTools::Increment( Xs, r_nrX ) ); + // write out last closing braces + fp << string(XSoI_y_size*2+1,')') << endl << endl; + } + + // write generic cost term + fp << "cost [+" << endl; + for(Index rI=0; rI < GetNrLRFs(); rI++) { + const Scope& agSC = GetAgentScopeForLRF(rI); + size_t agSC_size = agSC.size(); + vector As_restr(agSC_size); + IndexTools::RestrictIndividualIndicesToNarrowerScope(A, jAsc, agSC, As_restr); + + //XXX this lookup can be replaced with cached values + // in _m_nrSFVals.at(LRF), cf. line 286 + const vector< size_t>& nrVals = GetNrValuesPerFactor(); //XXX move out here + const Scope& sfSC = GetStateFactorScopeForLRF(rI); + size_t sfSC_size = sfSC.size(); + vector< size_t> restrXVals(sfSC_size); + IndexTools::RestrictIndividualIndicesToScope(nrVals, sfSC, restrXVals); + + vector Xs2(sfSC_size, 0 ); + vector prevXs2(sfSC_size, 1 ); // previous iteration + const Scope emptySc; + bool firstIter = true; + + do { // for each permutation + // close previously opened variable blocks + size_t nrXchanges = 0; + for(Index scI=0; scI < sfSC_size; scI++) { // for all ii state factors + if(prevXs2[scI] != Xs2[scI]) + nrXchanges++; + } + if(!firstIter) fp << string(2*nrXchanges,')') << endl; else firstIter = false; + + // check where indices changed from previous iteration + for(Index scI=0; scI < sfSC_size; scI++) { // for all ii state factors + Index sfI = sfSC.at(scI); + const StateFactorDiscrete* sfac = GetStateFactorDiscrete(sfI); + + if(prevXs2[scI] != Xs2[scI]) { + if(Xs2[scI] == 0) { + // write out next variable name + string name = sfac->GetName(); + fp << " (" << name; + } + // print variable value + string value = sfac->GetStateFactorValue(Xs2[scI]); + fp << " (" << value; + } + } + + // write reward as cost for this ii instantiation + double reward = GetLRFReward(rI, Xs2, As_restr); + fp << " (" << -reward; + + prevXs2 = Xs2; + + } while(! IndexTools::Increment( Xs2, restrXVals ) ); + // write out last closing braces + fp << string(sfSC_size*2+1,')') << endl; + } + fp << " ]" << endl + << "endaction" << endl << endl; + } + + // write reward function (note: subsumed as costs inside each individual action) + fp << "reward (0.0)" << endl << endl; + + // write footer + fp << "discount " << GetDiscount() << endl //XXX add warning + << "//horizon 10" << endl + << "tolerance 0.1" << endl; +} + +void FactoredDecPOMDPDiscrete::ClipRewardModel(Index sf, bool sparse) +{ + if(_m_cached_FlatRM){ + delete(_m_p_rModel); + _m_cached_FlatRM = false; + } + + _m_sparse_FlatRM = sparse; + + size_t nrS = 1; + for(size_t i = 0; i < GetNrStateFactors(); i++){ + if(i == sf) + continue; + size_t nrX = GetNrValuesForFactor(i); + nrS *= nrX; + } + + if(sparse) + _m_p_rModel=new RewardModelMappingSparse(nrS, + GetNrJointActions()); + else + _m_p_rModel=new RewardModelMapping(nrS, + GetNrJointActions()); + + for(Index sI=0; sI= REWARD_PRECISION ) + _m_p_rModel->Set(sI, jaI, r); + } + + _m_cached_FlatRM = true; +} + +void FactoredDecPOMDPDiscrete::MarginalizeISD(Index sf, vector& factor_sizes, const FactoredStateDistribution* fsd) +{ + vector X(factor_sizes.size(),0); + vector new_factor_sizes = factor_sizes; + new_factor_sizes[sf] = 1; + + vector > f_isd; + for(size_t i = 0; i < X.size(); i++){ + if(i == sf) + continue; + new_factor_sizes = factor_sizes; + new_factor_sizes[i] = 1; //Going to iterate over every factor except i (there's no easier way). + X.assign(X.size(),0); + vector dist; + for(size_t j = 0; j < factor_sizes[i]; j++){ + double p = 0; + do{ + X[i] = j; + p += fsd->GetProbability(X); + }while(!IndexTools::Increment( X, new_factor_sizes)); + if(p > 1 + PROB_PRECISION) + { + cout << "FactoredDecPOMDPDiscrete::MarginalizeISD - probability does not sum to 1 but to " << p << " instead. Correcting." << endl; + p = 1; + } + dist.push_back(p); + } + f_isd.push_back(dist); + } + + FSDist_COF* new_isd = new FSDist_COF(*this); + + for(size_t i = 0; i < f_isd.size(); i++) + for(size_t j = 0; j < f_isd[i].size(); j++) + new_isd->SetProbability(i,j,f_isd[i][j]); + + new_isd->SanityCheck(); + + delete(GetISD()); + SetISD(new_isd); +} + +void FactoredDecPOMDPDiscrete::MarginalizeStateFactor(Index sf, bool sparse) +{ + vector old_factor_sizes = GetNrValuesPerFactor(); + const FactoredStateDistribution* old_isd = GetFactoredISD(); + + ClipRewardModel(sf, sparse); + MarginalizeTransitionObservationModel(sf, sparse); + MarginalizeISD(sf, old_factor_sizes, old_isd); + + delete(old_isd); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.h new file mode 100644 index 000000000..e14cfba7e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscrete.h @@ -0,0 +1,416 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _FACTOREDDECPOMDPDISCRETE_H_ +#define _FACTOREDDECPOMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "FactoredDecPOMDPDiscreteInterface.h" +#include "MultiAgentDecisionProcessDiscreteFactoredStates.h" +#include "DecPOMDP.h" + +//class DecPOMDPDiscrete; +//class JointBeliefInterface; +#include "RewardModel.h" +#include "StateDiscrete.h" +#include "FactoredQFunctionScopeForStage.h" + +/**FactoredDecPOMDPDiscrete is implements a factored DecPOMDPDiscrete. + * + * This class implements FactoredDecPOMDPDiscreteInterface is the interface for + * a Dec-POMDP with factored states. It defines the get/set reward functions. + * + * This implementation maintains a vector of Local reward functions (LRFs), + * each of which is a RewardModel. The implementation of each RewardModel + * (which subclass of RewardModel is actually used) can differ per LRF. + * + * Each reward model is defined over a subset of state factors and states: + * the state factor and agent scope of the particular LRF. This class is + * responsible of all index conversions. (so the referred RewardModels do + * not know anything about scopes etc.) + * + * The `edges' in this code refer to edges of the graph constructed from + * the factored immediate reward function. (i.e., each `edge' corresponds + * to a local reward function) + * + * + * */ +class FactoredDecPOMDPDiscrete : + virtual public FactoredDecPOMDPDiscreteInterface, + public MultiAgentDecisionProcessDiscreteFactoredStates, + public DecPOMDP +{ +private: + //maintain the number of local reward functions + size_t _m_nrLRFs; + //maintain the state factor scopes + std::vector< Scope > _m_sfScopes; + //maintain the agent scopes + std::vector< Scope > _m_agScopes; + //maintaint the actual reward models + std::vector< RewardModel* > _m_LRFs; + + ///GetImmediateRewardScopes returns a reference to the following: + FactoredQFunctionScopeForStage _m_immRewScope; + +//We maintain additional instantiation information for each LRF +//this information is computed by the function +//InitializeInstantiationInformation, which should be called after the scopes +//of all LRFs are specified. + + ///for each LRF we maintain the nr of X instantiations + /**I.e., the size of its local state space. + */ + std::vector< size_t> _m_nrXIs; + ///for each LRF we maintain the nr of action instantiations + /**I.e., the number of local joint actions (group actions). + */ + std::vector< size_t> _m_nrAIs; + ///We maintain the nr of values for each state factor in the scope of lrf + /**these vectors are used in the conversion from/to joint indices + **/ + std::vector< std::vector > _m_nrSFVals; + ///For each LRF, we maintain the nr of actions for each agent in its scope + std::vector< std::vector > _m_nrActionVals; + + //variables for caching flat rewards: + /// Pointer to model + RewardModel* _m_p_rModel; + bool _m_cached_FlatRM; + bool _m_sparse_FlatRM; + + +protected: + ///Auxiliary function that returns whether v1 is consistent with v2. + /**The scope of v2 (scope 2) should be a subset of scope 1 + */ + static bool ConsistentVectorsOnSpecifiedScopes( + const std::vector& v1, + const Scope& scope1, + const std::vector& v2, + const Scope& scope2); +public: + /**\brief Default constructor. + * Constructor that sets the name, description, and problem file, + * and subsequently loads this problem file. */ + FactoredDecPOMDPDiscrete(std::string + name="received unspec. by FactoredDecPOMDPDiscrete", + std::string descr="received unspec. by FactoredDecPOMDPDiscrete", + std::string pf="received unspec. by FactoredDecPOMDPDiscrete"); + + /// Destructor. + virtual ~FactoredDecPOMDPDiscrete(); + + + virtual std::string SoftPrint() const; + void Print() const { std::cout << SoftPrint() << std::endl; } + +//functions to create the reward model + void SetNrLRFs(size_t nr) + { + _m_nrLRFs = nr; + InitializeStorage(); + } + ///makes sure that _m_sfScopes, _m_agScopes, _m_LRFs are set to proper size + void InitializeStorage(); + virtual void SetYScopes(){} + virtual void SetOScopes(){} + + virtual double ComputeTransitionProb( + Index y, + Index yVal, + const std::vector< Index>& Xs, + const std::vector< Index>& As, + const std::vector< Index>& Ys + ) const + {return 0;} + virtual double ComputeObservationProb( + Index o, + Index oVal, + const std::vector< Index>& As, + const std::vector< Index>& Ys, + const std::vector< Index>& Os + ) const + {return 0;} + + ///Sets the scope for LRF. + /**It is in the desired form, so can copied directly in to _m_sfScopes and + * _m_agScopes. + */ + void SetScopeForLRF(Index LRF, + const Scope& X, //the X scope + const Scope& A //the A scope + ) + { + _m_sfScopes.at(LRF) = X; + _m_agScopes.at(LRF) = A; + } + ///Sets the scope for LRF. + /**Sets the scopes that will be used to set the reward with + * SetRewardForLRF(). + * + * The scopes Y and O are back-projected to obtain augmented scopes + * X' and A', through: + * \f[ X' = X \cup \Gamma_x( Y \cup O ) \f] + * \f[ A' = A \cup \Gamma_a( Y \cup O ) \f] + * where \f$ \Gamma_x \f$ is the state factor scope backup and + * \f$ \Gamma_a \f$ is the agent scope backup. + * \sa + * StateScopeBackup + * AgentScopeBackup + */ + void SetScopeForLRF(Index LRF, + const Scope& X, //the X scope + const Scope& A, //the A scope + const Scope& Y, + const Scope& O + ); + + ///Initializes some meta information computed from the scopes. + /**This function computes the number of X and A instantiations, + * the vectors with the number of values for sfacs and step-size + * arrays used in conversion from individual <-> joint indices. + */ + void InitializeInstantiationInformation(); + + ///Get the number of X instantiations for LRF. + /**Each LRF has a local state space X. This function returns the size + * of this local state space. + * Should be called after InitializeInstantiationInformation() + */ + size_t GetNrXIs(Index LRF) const; + ///Get the number of action instantiations. I + /**I.e., the number of local joint (group) actions + * Should be called after InitializeInstantiationInformation() + */ + size_t GetNrAIs(Index LRF) const; + + ///Add the LRF-th reward model + /**It can already contain all rewards, or the reward model can now be + * filled using the SetRewardForLRF functions. + */ + void SetRM(Index LRF, RewardModel* rm) + { _m_LRFs.at(LRF) = rm; } + + ///Set reward. + /**The reward model should already be added. This function does not require + * any back-projection or whatsoever. + */ + void SetRewardForLRF(Index LRF, + const std::vector& Xs, + const std::vector& As, + double reward + ); + + ///Set reward. + /**The reward model should already be added. Because we maintain reward + * functions of the form R(s,a), this function distributes the reward + * according the the expectation of y, o. + * I.e., Let X, A be the registered scope for LRF, and x,a be instantiations + * of those scopes. Then + * + * \f[ R(x,a) = \sum_{y,o} R(x,a,y,o) \Pr(y,o|x,a) \f] + * + * Therefore this function performs + * \f[ R(x,a) += R(x,a,y,o) \Pr(y,o|x,a) \f] + * (by calling SetRewardForLRF(Index LRF, const std::vector& Xs,const std::vector& As, double reward) ) + * + * Repeatedly calling this function (for all y, o) should result in a + * valid R(x,a). + * + * This means that + * \li Xs should be a vector of SFValue indices for registered scope + * _m_sfScopes[LRF]. + * \li As should be a vector specifying an action for each agent specified + * by agent scope _m_agScopes[LRF]. + * \li \f$ \Pr(y,o|x,a) \f$ should be defined. (i.e., the scope backup + * of o and y should not involve variables not in X,A). + */ + void SetRewardForLRF(Index LRF, + const std::vector& Xs, + const std::vector& As, + const Scope& Y, + const std::vector& Ys, + const Scope& O, + const std::vector& Os, + double reward + ); + + ///Set Reward. + /**This set reward function is best illustrated by an example of the + * factored firefighting problem [Oliehoek08AAMAS]. There we have that + * the reward function for the first house can be expressed as. + * \f[ \forall_{f_1,f_2,a_1} \quad R(f_1,f_2,a_1) = \sum_{f_1'} + * \Pr(f_1'|f_1,f_2,a_1) R(f_1') \f] + * + * That is, if the scope is underspecified (X and A only are subsets of the + * registered scopes) than it holds \em{for all} non-specified factors and + * actions. + * + * This version of SetRewardForLRF should be called with + * \li X a subset of the registered state factor scope for LRF + * \li A a subset of the registered agent scope for LRF + * + * Consequently it will call + * void SetRewardForLRF(Index LRF, const vector& Xs, const vector& As, const Scope& Y, const vector& Ys, const Scope& O, const vector& Os, double reward ) + * for all non specified state factors and actions. + * + */ + void SetRewardForLRF(Index LRF, + const Scope& X, + const std::vector& Xs, + const Scope& A, + const std::vector& As, + const Scope& Y, + const std::vector& Ys, + const Scope& O, + const std::vector& Os, + double reward + ); + + + + + +//implement the FactoredDecPOMDPDiscreteInterface + RewardModel* GetLRF(Index LRF) const + { return _m_LRFs.at(LRF); } + double GetLRFReward(Index LRF, Index sI_e, Index jaI_e) const + { return _m_LRFs.at(LRF)->Get(sI_e, jaI_e); } + /**might be necessary? + * Returns reward for LRF, given a flat state index, and a full joint + * action.*/ + double GetLRFRewardFlat(Index LRF, Index flat_s, Index full_ja) const; + double GetLRFRewardFlat(Index lrf, + const std::vector& sfacs, + const std::vector& as) const; + double GetLRFReward(Index LRF, + const std::vector& sI_e, const std::vector& jaI_e)const; + + size_t GetNrLRFs() const + {return _m_nrLRFs;} + const Scope& GetStateFactorScopeForLRF(Index LRF) const + {return _m_sfScopes.at(LRF); } + const Scope& GetAgentScopeForLRF(Index LRF) const + {return _m_agScopes.at(LRF); } + const FactoredQFunctionScopeForStage GetImmediateRewardScopes() const; + Index RestrictedStateVectorToJointIndex( + Index LRF, const std::vector& stateVec_e) const; + Index RestrictedActionVectorToJointIndex(Index LRF, + const std::vector& actionVec_e) const; + + double GetReward(const std::vector &sIs, + const std::vector &aIs) const; + +//implement the POSGDiscreteInterface + void CreateNewRewardModelForAgent(Globals::Index) + {CreateNewRewardModel();} + void SetRewardForAgent(Index agentI, Index sI, Index jaI, double r) + {SetReward(sI, jaI, r);} + /// Set the reward for state, joint action , suc. state indices + void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, double r) + {SetReward(sI, jaI, sucSI, r);} + /// Set the reward for state, joint action, suc.state, joint obs indices + void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, Index joI, double r) + {SetReward(sI, jaI, sucSI, joI, r);} + /// Return the reward for state, joint action indices + double GetRewardForAgent(Index agentI, Index sI, Index jaI) const + {return GetReward(sI, jaI);} + +//implement the DecPOMDPDiscreteInterface + void CreateNewRewardModel() + { throw E("FactoredDecPOMDPDiscrete::CreateNewRewardModelForAgent check if this is necessary!");} + + /// Set the reward for state, joint action indices + void SetReward(Index sI, Index jaI, double r) + { throw E("FactoredDecPOMDPDiscrete:: SetReward(Index sI, Index jaI, double r) - do we want to divide the reward r equally over the local reward functions?!");} + + /// Set the reward for state, joint action , suc. state indices + void SetReward(Index sI, Index jaI, Index sucSI, double r); + + /// Set the reward for state, joint action, suc.state, joint obs indices + void SetReward(Index sI, Index jaI, Index sucSI, Index joI, + double r); + + //double GetReward(Globals::Index, Globals::Index) const; + double GetReward(Index sI, Index jaI) const; + RGet * GetRGet() const; + +//implement the DecPOMDPInterface + double GetReward(State*s, JointAction* ja) const + { + return GetReward( + ((StateDiscrete*)s)->GetIndex(), + ((JointActionDiscrete*)ja)->GetIndex()); + } + + void SetReward(State* s, JointAction* ja, double r) + { + SetReward( + ((StateDiscrete*)s)->GetIndex(), + ((JointActionDiscrete*)ja)->GetIndex(), r); + } + +//implement the POSGInterface + void SetRewardForAgent(Index agI, State* s, JointAction* ja,double r) + { SetReward( s, ja, r); } + double GetRewardForAgent(Index agI, State* s, JointAction* ja) const + { return GetReward( s, ja); } + +//var + void CacheFlatRewardModel(bool sparse=false); + virtual void CacheFlatModels(bool sparse); + + /// Returns a pointer to a copy of this class. + virtual FactoredDecPOMDPDiscrete* Clone() const + { return new FactoredDecPOMDPDiscrete(*this); } + + void ConvertFiniteToInfiniteHorizon(size_t horizon) { + throw(E("ConvertFiniteToInfiniteHorizon not implemented for FactoredDecPOMDPDiscrete")); + } + + ///Export fully-observable subset of this FactoredDecPOMDPDiscrete to spudd file. + /**This function is virtual since some specific problem instances may + * want to override variable names or other elements to conform with the spudd + * file format. + */ + virtual void ExportSpuddFile(const std::string& filename) const; + + /**\brief Attempts to remove an extraneous state factor from the problem. + * Such state factors are typically useful when modeling large systems graphically, + * but hinder performance when solving with flat algorithms (such as Perseus). + * Currently, it only supports the marginalization of nodes without + * NS dependencies, and which do not directly influence any LRF + * */ + void MarginalizeStateFactor(Index sf, bool sparse); + + ///Adjusts the reward model by ignoring a state factor + void ClipRewardModel(Index sf, bool sparse); + + ///Marginalizes the ISD over a given state factor. + void MarginalizeISD(Index sf, std::vector& factor_sizes, const FactoredStateDistribution* fsd); +}; + + +#endif /* !_FACTOREDDECPOMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscreteInterface.h new file mode 100644 index 000000000..1cd0bc06f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredDecPOMDPDiscreteInterface.h @@ -0,0 +1,165 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _FACTOREDDECPOMDPDISCRETEINTERFACE_H_ +#define _FACTOREDDECPOMDPDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "DecPOMDPDiscreteInterface.h" +#include "MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h" +#include "Scope.h" + +class RewardModel; +//class DecPOMDPDiscrete; +class JointBeliefInterface; +class FactoredQFunctionScopeForStage; + +/**\brief FactoredDecPOMDPDiscreteInterface is the interface for a Dec-POMDP + * with factored states. It declares the get/set reward functions. + * + * FactoredDecPOMDPDiscrete represents a factored DecPOMDPDiscrete + * FactoredDecPOMDPDiscreteInterface is an interface (i.e. pure abstract class) + * for a discrete factored DEC-POMDP model. Classes that implement this + * interface are, for instance, FactoredDecPOMDPDiscrete. + * + * Because a factored Dec-POMDP can also be accessed as a regular Dec-POMDP + * (i.e., by indexing 'flat' states), this interface also derives from + * DecPOMDPDiscreteInterface (which defines the get/set-reward functions for + * a regular (non-factored) discrete Dec-POMDP). + * + * The `LRFs' in this code refer to edges of the graph constructed from + * the factored immediate reward function. (i.e., each `edge' corresponds + * to a local reward function) + * + * a `flat state' - a state s= + * (which specifies a value for each state factor). + * + * a `full joint action' - a joint action that specifies an action for each + * individual agent. + * + * a `joint/group action' - an action for a subset of agents. + * + * + * */ +class FactoredDecPOMDPDiscreteInterface : + virtual public DecPOMDPDiscreteInterface, + virtual public MultiAgentDecisionProcessDiscreteFactoredStatesInterface +{ +private: + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + + /// Destructor.Can't make a virt.destr. pure abstract! + virtual ~FactoredDecPOMDPDiscreteInterface() {}; + + ///Returns a pointer to the LRF-th reward model component. + virtual RewardModel* GetLRF(Index LRF) const = 0; + /**Returns reward for LRF, given + * ja_e - the joint index for a group action for the subset of agents + * as specified by the agent scope of LRF. + * s_e - the (joint) index of the subset of factors specified by the + * state factor scope of LRF. + * + * For instance, let the agents scope of LRF be <1,3>, then + * group action <3,5> means that agent 1 select action 3, while agent + * 3 performs its 5th action. + * Using indextools we can find agSc_jaI. E.g. + * agSc_jaI = IndividualToJointIndices( <3,5>, <6,6> ) + * (where <6,6> is a vector which specifies the number of actions per + * agent, see IndexTools.h for more info). + * + * */ + virtual double GetLRFReward(Index LRF, Index s_e, Index ja_e) const=0; + virtual double GetLRFReward(Index LRF, + const std::vector& sI_e, const std::vector& jaI_e)const=0; + /**might be necessary? + * Returns reward for LRF, given a flat state index, and a full joint + * action.*/ + virtual double GetLRFRewardFlat(Index LRF, Index flat_s, + Index full_ja) const=0; + + + + + + /// Get the number of LRFs in the factored representation. + virtual size_t GetNrLRFs() const = 0; + + /// Get all the state factors in the scope of an LRF. + virtual const Scope& GetStateFactorScopeForLRF(Index LRF) const = 0; + /// Get all the agents in the scope of an LRF. + virtual const Scope& GetAgentScopeForLRF(Index LRF) const = 0; + /// Returns all scopes of the immediate reward function in one object + virtual const FactoredQFunctionScopeForStage GetImmediateRewardScopes() + const = 0; + /// Get the vector of Factor indices corresponding to stateI + // this function doesn't belong here... + // this functionality should be provided by MultiAgentDecisionProcessDiscreteFactoredStates + //virtual std::vector GetStateFactorValuesForLRF(Index e, Index stateI) const = 0; + + /**\brief convert a state vector of restricted scope to a joint index s_e. + * + * This is a convenience function that performs indiv->joint state index + * conversion for a specific edge e (LRF). + * + * \li stateVec_e is an assignment of all state factors in the state factor + * scope of e. + * \li the funtion returns a joint (group) index s_e. + */ + virtual Index RestrictedStateVectorToJointIndex(Index LRF, + const std::vector& stateVec_e) const = 0; + /**\brief convert an action vector of restricted scope to a joint index a_e. + * + * This is a convenience function that performs indiv->joint action index + * conversion for a specific edge e (LRF). (i.e., this function is + * typically called when requesting the immediate reward) + * + * \li actionVec_e is an assignment of all actions in the agent scope + * scope of e. + * \li the funtion returns a joint (group) index a_e. + */ + virtual Index RestrictedActionVectorToJointIndex(Index LRF, + const std::vector& actionVec_e) const = 0; + +//rewards: + + // Get the reward for a state factor given its value. + //this function doesn't make sense?! + //virtual double GetFactorReward(Index factor, Index sx) const = 0; + + using DecPOMDPDiscreteInterface::GetReward; + virtual double GetReward(const std::vector &sIs, + const std::vector &aIs) const = 0; + + /// Returns a pointer to a copy of this class. + virtual FactoredDecPOMDPDiscreteInterface* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; + virtual void Print() const = 0; +}; + + +#endif /* !_FACTOREDDECPOMDPDISCRETEINTERFACE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.cpp new file mode 100644 index 000000000..7534cabb4 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.cpp @@ -0,0 +1,89 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Philipp Robbel + * + * For contact information please see the included AUTHORS file. + */ + +#include "FactoredMMDPDiscrete.h" +#include "CPDKroneckerDelta.h" + +using namespace std; + +string FactoredMMDPDiscrete::SoftPrint() const { + stringstream ss; + ss << "Fully-observable MMDP" << endl; + ss << FactoredDecPOMDPDiscrete::SoftPrint(); + return ss.str(); +} + +// Don't support flattening of observations by default +void FactoredMMDPDiscrete::CacheFlatModels(bool sparse) { +#if MADP_DFS_WARNINGS + cout << "FactoredMMDPDiscrete::CacheFlatModels() does not flatten observation model" << endl; +#endif + ConstructJointActions(); + + CacheFlatTransitionModel(sparse); + CacheFlatRewardModel(sparse); +} + +/// Initialize a fully-observable transition and observation DBN. +void FactoredMMDPDiscrete::Initialize2DBN() +{ + // construct observations + ConstructObservations(); + SetObservationsInitialized(true); // Note: joint indices are likely to break + + BoundScopeFunctor sf(this,&FactoredMMDPDiscrete::SetScopes); + BoundTransitionProbFunctor tf(this,&FactoredMMDPDiscrete::ComputeTransitionProb); + EmptyObservationProbFunctor of; + + MultiAgentDecisionProcessDiscreteFactoredStates::Initialize2DBN(sf, tf, of); + + // above calls SetOScopes and initializes CPD vector for observation variables + Initialize2DBNObservations(); // set actual CPDs +} + +void FactoredMMDPDiscrete::ConstructObservations() { + size_t nrAgents = GetNrAgents(); + size_t nrStateFactors = GetNrStateFactors(); + if(nrAgents == 0 || nrStateFactors == 0) + throw(E("FactoredMMDPDiscrete::ConstructObservations() no agents specified or state space empty")); + + size_t nrStates = GetNrStates(); + for(Index i=0; i"), asfS, Scope("<>") ); + } +} + +void FactoredMMDPDiscrete::Initialize2DBNObservations() { + size_t nrAgents = GetNrAgents(); + if(nrAgents == 0) + throw(E("FactoredMMDPDiscrete::Initialize2DBNObservations() no agents specified")); + + for(Index i=0; iSetCPD_O(i, new CPDKroneckerDelta()); + } +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.h new file mode 100644 index 000000000..0c54c431e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredMMDPDiscrete.h @@ -0,0 +1,93 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Philipp Robbel + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _FACTOREDMMDPDISCRETE_H_ +#define _FACTOREDMMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "FactoredDecPOMDPDiscrete.h" + +class FactoredMMDPDiscrete : public FactoredDecPOMDPDiscrete +{ +public: + /**\brief Default constructor. + * Constructor that sets the name, description, and problem file, + * and subsequently loads this problem file. */ +FactoredMMDPDiscrete( + std::string name="received unspec. by FactoredMMDPDiscrete", + std::string descr="received unspec. by FactoredMMDPDiscrete", + std::string pf="received unspec. by FactoredMMDPDiscrete") + : FactoredDecPOMDPDiscrete(name, descr, pf) + { + //SetFlatObservationModel(new ObservationModelIdentityMapping()); + + //MADPComponentDiscreteObservations::SetInitialized(true) can + //overflow _m_nrJointObservations (indicated by _m_jointIndicesValid). + //(likely) + + //Problem occurs if single observation variable's stepsize + //calculations already overflow (happens in 2DBN::InitializeIIs()). + //(unlikely) + } + + virtual std::string SoftPrint() const; + + /// Returns a pointer to a copy of this class. + virtual FactoredMMDPDiscrete* Clone() const + { return new FactoredMMDPDiscrete(*this); } + + void CacheFlatModels(bool sparse); + + void Initialize2DBN(); + + // Called in MultiAgentDecisionProcessDiscreteFactoredStates::Initialize2DBN CPT initialization but not utilized in FactoredMMDPDiscrete. + // Intentionally breaks inheritance chain (no fully-observable subclass + // can re-implement this and hope to get this executed) + double ComputeObservationProb( + Index o, + Index oVal, + const std::vector< Index>& As, + const std::vector< Index>& Ys, + const std::vector< Index>& Os ) const + { + // could first check whether GetYSoI_O(o) is full state space scope + return oVal == IndexTools::IndividualToJointIndices(Ys, GetNrValuesPerFactor()) ? 1.0 : 0.0; + } + + // Intentionally breaks inheritance chain + void SetScopes() //reimplemented from base class + {SetYScopes(); SetOScopes();} + void SetOScopes(); + +protected: + +private: + /**\brief Check whether models appear valid probability + * distributions. + * + * Different from the version in the parent class, observations + * are not exhaustively checked here.*/ + bool SanityCheckObservations() const + {return true;} + + /// Construct all the observations for fully-observable case. + void ConstructObservations(); + void Initialize2DBNObservations(); + +}; + +#endif diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.cpp new file mode 100644 index 000000000..8d1e4ab2f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.cpp @@ -0,0 +1,54 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "FactoredQFunctionScopeForStage.h" + +using namespace std; + +//Default constructor +FactoredQFunctionScopeForStage::FactoredQFunctionScopeForStage() + : + _m_agentScopes(0), + _m_sfacScopes(0) +{ +} + +void +FactoredQFunctionScopeForStage::AddLocalQ( const Scope& sfacS, const Scope& agS) +{ + _m_sfacScopes.push_back(sfacS); + _m_agentScopes.push_back(agS); +} + +void +FactoredQFunctionScopeForStage::RemoveLocalQ( Index j) +{ + if(j >= _m_sfacScopes.size()) + throw E("FactoredQFunctionScopeForStage::RemoveLocalQ( Index j), index j out of bounds"); + + _m_sfacScopes.erase(_m_sfacScopes.begin() + j ); + _m_agentScopes.erase(_m_agentScopes.begin() + j ); +} + +string FactoredQFunctionScopeForStage::SoftPrint() const +{ + stringstream ss; + for(Index q=0; q < _m_sfacScopes.size(); q++) + { + ss << q << "-th local Q function, agentScope="<< _m_agentScopes.at(q) << + ", sfacScope=" << _m_sfacScopes.at(q) << endl; + } + return (ss.str()); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.h b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.h new file mode 100644 index 000000000..5888ccaf3 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredQFunctionScopeForStage.h @@ -0,0 +1,64 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _FACTOREDQFUNCTIONSCOPEFORSTAGE_H_ +#define _FACTOREDQFUNCTIONSCOPEFORSTAGE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "Scope.h" + +/** \brief FactoredQFunctionScopeForStage represents a Scope for one + * stage of a factored QFunction. */ +class FactoredQFunctionScopeForStage +{ + private: + + std::vector< Scope > _m_agentScopes; + std::vector< Scope > _m_sfacScopes; + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + FactoredQFunctionScopeForStage(); + + //operators: + + //data manipulation (set) functions: + ///Add a local Q function scope component + void AddLocalQ( const Scope& sfacS, const Scope& agS); + ///Remove the j-th local Q function scope component + void RemoveLocalQ( Index j); + + //get (data) functions: + const Scope& GetStateFactorScope(Index lqf) const + {return _m_sfacScopes.at(lqf);} + const Scope& GetAgentScope(Index lqf) const + {return _m_agentScopes.at(lqf);} + size_t GetNrLQFs() const + {return _m_sfacScopes.size();} + std::string SoftPrint() const; + +}; + + +#endif /* !_FACTOREDQFUNCTIONSCOPEFORSTAGE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.cpp new file mode 100644 index 000000000..573535061 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.cpp @@ -0,0 +1,40 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "FactoredStateDistribution.h" + +using namespace std; +/* +//Default constructor +FactoredStateDistribution::FactoredStateDistribution() +{ +} +//Copy constructor. +FactoredStateDistribution::FactoredStateDistribution(const FactoredStateDistribution& o) +{ +} +//Destructor +FactoredStateDistribution::~FactoredStateDistribution() +{ +} +//Copy assignment operator +FactoredStateDistribution& FactoredStateDistribution::operator= (const FactoredStateDistribution& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + return *this; +} +*/ diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.h b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.h new file mode 100644 index 000000000..1d5fb9fb1 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/FactoredStateDistribution.h @@ -0,0 +1,73 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _FACTOREDSTATEDISTRIBUTION_H_ +#define _FACTOREDSTATEDISTRIBUTION_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "StateDistribution.h" + +/** \brief FactoredStateDistribution is a class that represents + * a base class for factored state distributions. + * + * A FactoredStateDistribution is a distribution over factored states. + * Such a distribution might be flat (i.e. any distribution can be represented), + * but typically this is infeasible, and therefore we use some other + * approximate representation. + * + * The actual representation is determined by the derived class. + * */ +class FactoredStateDistribution + : public StateDistribution +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// Destructor. + virtual ~FactoredStateDistribution(){}; +/* + /// (default) Constructor + FactoredStateDistribution(); + /// Copy constructor. + FactoredStateDistribution(const FactoredStateDistribution& a); + /// Copy assignment operator + FactoredStateDistribution& operator= (const FactoredStateDistribution& o); +*/ + //operators: + + //data manipulation (set) functions: + virtual void SetUniform() = 0; + + //get (data) functions: + virtual double GetProbability(const std::vector& sfacValues) const = 0; + + /// Returns a pointer to a copy of this class. + virtual FactoredStateDistribution* Clone() const = 0; + + virtual std::vector SampleState() const = 0; + virtual std::string SoftPrint() const = 0; +}; + + +#endif /* !_FACTOREDSTATEDISTRIBUTION_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/Globals.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/Globals.cpp new file mode 100644 index 000000000..de08d00b0 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/Globals.cpp @@ -0,0 +1,62 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "Globals.h" +#include "EOverflow.h" +#include + +using namespace std; + +bool Globals::EqualProbability(double p1, double p2) +{ + return ( abs(p1-p2) < PROB_PRECISION ) ; +} + +bool Globals::EqualReward(double r1, double r2) +{ + return ( abs(r1-r2) < REWARD_PRECISION ) ; +} + +#if USE_ARBITRARY_PRECISION_INDEX +Index Globals::CastLIndexToIndex(LIndex i) +{ + Index j=0; + if(i.fits_ulong_p()) + j=i.get_ui(); + else + { + stringstream ss; + ss << "LIndex with value " + << i + << " does not fit in an Index"; + throw(EOverflow(ss)); + } + return(j); +} +#else +Index Globals::CastLIndexToIndex(LIndex i) { return(i); } +#endif + +#if USE_ARBITRARY_PRECISION_INDEX +double Globals::CastLIndexToDouble(LIndex i) +{ + mpf_t y; + mpf_init(y); + mpf_set_z(y,i.get_mpz_t()); + return mpf_get_d(y); +} +#else +double Globals::CastLIndexToDouble(LIndex i) { return(static_cast(i)); } +#endif diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/Globals.h b/payntbind/src/synthesis/decpomdp/madp/src/base/Globals.h new file mode 100644 index 000000000..f0c1eb621 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/Globals.h @@ -0,0 +1,99 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _GLOBALS_H_ +#define _GLOBALS_H_ 1 + +#include +#include +#include +#include "versions.h" +#include "configuration.h" +#include + +#if USE_ARBITRARY_PRECISION_INDEX +#include +#endif + +/// Globals contains several definitions global to the MADP toolbox. +namespace Globals { + +#define INDEX_MAX UINT_MAX + +/// A general index. +typedef unsigned int Index; +/// A long long index. +#if USE_ARBITRARY_PRECISION_INDEX +typedef mpz_class LIndex; +#else +typedef unsigned long long int LIndex; +#endif + +/* constants */ + +/// The highest horizon we will consider. +/** When the horizon of a problem is set to this value, we consider it + * an infinite-horizon problem. */ +const unsigned int MAXHORIZON=999999; + +///constant to denote *all* solutions (e.g., nrDesiredSolutions = ALL_SOLUTIONS ) +const size_t ALL_SOLUTIONS=0; + + +/// The precision for probabilities. + +/** Used to determine when two probabilities are considered equal, for + * instance when converting full beliefs to sparse beliefs. */ +const double PROB_PRECISION=1e-12; +/** Used to determine when two (immediate) rewards are considered equal */ +const double REWARD_PRECISION=1e-12; + +bool EqualProbability(double p1, double p2); +bool EqualReward(double r1, double r2); +Index CastLIndexToIndex(LIndex i); +double CastLIndexToDouble(LIndex i); + +/// The initial (=empty) joint observation history index. +const Index INITIAL_JOHI=0; +/// The initial (=empty) joint action-observation history index. +const Index INITIAL_JAOHI=0; + +/// Inherited from Tony's POMDP file format. +enum reward_t {REWARD, COST}; + +} + +using namespace Globals; + +//Frans: should not matter if NDEBUG is defined? +//http://lists.boost.org/MailArchives/ublas/2007/02/1764.php +// Tell Boost Ublas to not use exceptions for speed reasons. +//#define BOOST_UBLAS_NO_EXCEPTIONS 1 + + +#include "PrintTools.h" +using namespace PrintTools; + +#include "E.h" +#include "ENoSubScope.h" +#include "EInvalidIndex.h" + +#endif /* !_GLOBALS_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.cpp new file mode 100644 index 000000000..6bb48e226 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.cpp @@ -0,0 +1,632 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "IndexTools.h" +#include "Scope.h" + +using namespace std; + +bool IndexTools::Increment(Index& index, size_t nrElems) +{ + index = (index+1) % nrElems; + //carry_over: + return(index == 0); +} + +/** + * takes 2 vectors of size vec_size: + * indexVec gives for each vector index(i) an element index( indexVec[i] ) + * e.g. < 2, 4, 0 > + * nrElems gives for each vector index(i) the number of elements + * e.g. < 3, 5, 2 > + * Meaning that (in this example) the highest indexVec vector is + * < 2, 4, 1 > + * + * Staying with this example, Incrementing < 1, 3, 1> will yield < 1, 4, 0> + * + * returns a bool indicating carryover signal (true = on) + */ +bool IndexTools::Increment(vector& indexVec, + const vector& nrElems ) +{ + size_t vec_size = indexVec.size(); + if(nrElems.size() != indexVec.size()) + throw E("IndexTools::Increment - nrElems.size() != indexVec.size()"); + if(vec_size == 0) + { + return true; + //return true is sufficient + //throw E("IndexTools::Increment - vec_size == 0"); + + } + + bool carry_over = true; + Index i = vec_size - 1; + while(carry_over /*&& i >= 0* - i is unsigned!*/ ) + { + //start towards templatization: + carry_over = Increment( indexVec[i] , nrElems[i] ); + //old code + //indexVec[i] = (indexVec[i] + 1) % nrElems[i]; + //carry_over = (indexVec[i] == 0); + if(i==0) + break;//we just incremented the first element + else + i--;//proceed with previous elem (if carry_over, of course) + } + + return(carry_over); + +} + +// ind -> joint + +/**Calculate the joint index from individual indices i each taken from + * sets with nrElems[i] elems (i.e., index i ranges from + * 0...nrElems[i]-1 + * + * Note: this only works if all joint indices occur, so you can use this + * for joint actions, but not joint observation histories (this would + * assume there is a joint observation history index corresponding to + * indiv. observation histories of different lengths). + */ +Index IndexTools::IndividualToJointIndices(const vector& indices, + const vector& nrElems) +{ + size_t vec_size = nrElems.size(); + if(vec_size == 0) + return 0; + size_t* step_size=CalculateStepSize(nrElems); + + Index jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + delete [] step_size; + + return(jointI); + +} +/**Calculate the joint index from individual indices i each taken from + * sets with nrElems[i] elems (i.e., index i ranges from + * 0...nrElems[i]-1 + * + * Note: this only works if all joint indices occur, so you can use this + * for joint actions, but not joint observation histories (this would + * assume there is a joint observation history index corresponding to + * indiv. observation histories of different lengths). + */ +Index IndexTools::IndividualToJointIndices(const vector& indices, + const vector& nrElems, size_t n) +{ + size_t vec_size = n; //let's assume compiler optimizes this away...(?) + if(vec_size == 0) + return 0; + size_t* step_size=CalculateStepSize(nrElems, n); + + Index jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + delete [] step_size; + + return(jointI); + +} +Index IndexTools::IndividualToJointIndicesArray(const Index* indices, + const vector& nrElems) +{ + size_t vec_size = nrElems.size(); + size_t* step_size=CalculateStepSize(nrElems); + + Index jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + delete [] step_size; + + return(jointI); +} +Index IndexTools::IndividualToJointIndicesStepSize(const vector& indices, + const vector& step_size) +{ + size_t vec_size = indices.size(); + + Index jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + return(jointI); +} + +Index IndexTools::IndividualToJointIndicesStepSize(const vector& indices, + const size_t * step_size) +{ + size_t vec_size = indices.size(); + + Index jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + return(jointI); +} +Index IndexTools::IndividualToJointIndicesArrayStepSize(const Index* indices, + const size_t* step_size, size_t vec_size) +{ + Index jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + return(jointI); +} + +Index IndexTools::IndividualToJointIndicesArrayStepSize(const Index* indices, + const vector &step_size, size_t vec_size) +{ + Index jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + return(jointI); +} + +//joint->ind + +vector IndexTools::JointToIndividualIndices(Index jointI, + const vector& nrElems) +{ + size_t vec_size = nrElems.size(); + vector result(vec_size); + JointToIndividualIndices(jointI, nrElems, result); + return result; +} +void IndexTools::JointToIndividualIndices(Index jointI, + const vector& nrElems, vector& result ) +{ + size_t* step_size=0; + size_t vec_size = nrElems.size(); + if(result.size() != vec_size) + result.resize(vec_size); + + if(vec_size > 0) + { + step_size=CalculateStepSize(nrElems); + + Index remainder = jointI; + for(Index i=0; i < vec_size ; i++) + { + Index aI = remainder / step_size[i]; + result[i]= aI;//store this indiv. index + remainder = remainder % step_size[i]; + } + delete [] step_size; + } +} + +vector IndexTools::JointToIndividualIndicesStepSize(Index jointI, + const size_t * step_size, size_t vec_size ) +{ + Index remainder = jointI; + + Index resultArr[vec_size]; + for(Index i=0; i < vec_size ; i++) + { + Index aI = remainder / step_size[i]; + resultArr[i] = aI;//store this indiv. index + //remainder = remainder % step_size[i]; + remainder -= step_size[i] * aI; + } + vector result(&resultArr[0], &resultArr[vec_size]); + return(result); +} + +vector IndexTools::JointToIndividualIndicesStepSize( + Index jointI, + const vector &step_size, + size_t vec_size + ) +{ + Index remainder = jointI; + + Index resultArr[vec_size]; + for(Index i=0; i < vec_size ; i++) + { + Index aI = remainder / step_size[i]; + resultArr[i] = aI;//store this indiv. index + //remainder = remainder % step_size[i]; + remainder -= step_size[i] * aI; + } + vector result(&resultArr[0], &resultArr[vec_size]); + return(result); +} +std::vector IndexTools::JointToIndividualIndicesStepSize( + Index jointI, + const std::vector &stepSize + ) +{ + return JointToIndividualIndicesStepSize( + jointI, stepSize, stepSize.size() + ); +} + +const Index* IndexTools::JointToIndividualIndicesArrayStepSize(Index jointI, + const size_t * step_size, size_t vec_size ) +{ + Index remainder = jointI; + + Index* resultArr = new Index[vec_size]; + for(Index i=0; i < vec_size ; i++) + { + Index aI = remainder / step_size[i]; + resultArr[i] = aI;//store this indiv. index + //remainder = remainder % step_size[i]; + remainder -= step_size[i] * aI; + } + return(resultArr); +} +size_t * IndexTools::CalculateStepSize(const vector& nrElems) +{ + size_t vec_size = nrElems.size(); + //increment indicates for each agent how many the joint index is + //incremented to get the next individual action... + size_t *step_size = new size_t[vec_size]; + if (vec_size == 0) + return(step_size); + + //the step_size for the last agent is 1 + step_size[vec_size-1] = 1; + if(vec_size != 1) //i.e. vec_size > 1 + { + Index i = vec_size-2; + while(1) + { + if(i>0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + i--; + } + else if(i==0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + break; + } + } + } + return(step_size); +} +std::vector IndexTools::CalculateStepSizeVector(const std::vector& nrElems) +{ + size_t vec_size = nrElems.size(); + //increment indicates for each agent how many the joint index is + //incremented to get the next individual action... + vector step_size(vec_size); + if (vec_size == 0) + return(step_size); + //the step_size for the last agent is 1 + step_size[vec_size-1] = 1; + if(vec_size != 1) + { + Index i = vec_size-2; + while(1) + { + if(i>0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + i--; + } + else if(i==0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + break; + } + } + } + return(step_size); +} + +size_t * IndexTools::CalculateStepSize(const vector& nrElems, size_t n) +{ + size_t vec_size = n; + //increment indicates for each agent how many the joint index is + //incremented to get the next individual action... + size_t *step_size = new size_t[vec_size]; + if (vec_size == 0) + return(step_size); + //the step_size for the last agent is 1 + step_size[vec_size-1] = 1; + if(vec_size != 1) + { + Index i = vec_size-2; + while(1) + { + if(i>0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + i--; + } + else if(i==0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + break; + } + } + } + return(step_size); +} + +size_t IndexTools::CalculateNumberOfSequences(size_t o, size_t seqLength) +{ + // sequences have length seqLength, + // if the number of options(the branching factor) each time-step is o + // then we get: + // sum_t=0...seqLength o^t == (o^(seqLength+1) - 1) / (o - 1) + // or + // sum_t=0...h-1 o^t == (o^h - 1) / (o - 1) + return( (size_t) + ( + ( pow((double)o,(double)(seqLength + 1)) - 1 ) + / + ( o - 1 ) + ) + ); +} + + +// LIndex versions + + + +bool IndexTools::Increment(LIndex& index, LIndex nrElems ) +{ + index = (index+1) % nrElems; + //carry_over: + return(index == 0); +} + +bool IndexTools::Increment(vector& indexVec, const vector& nrElems ) +{ + if(nrElems.size() != indexVec.size()) + throw E("IndexTools::Increment - nrElems.size() != indexVec.size()"); + if(indexVec.size() == 0) + { + return true; + //return true is sufficient + //throw E("IndexTools::Increment - vec_size == 0"); + + } + + bool carry_over = true; + Index i = indexVec.size() - 1; + while(carry_over /*&& i >= 0* - i is unsigned!*/ ) + { + //start towards templatization: + carry_over = Increment( indexVec[i] , nrElems[i] ); + //old code + //indexVec[i] = (indexVec[i] + 1) % nrElems[i]; + //carry_over = (indexVec[i] == 0); + if(i==0) + break;//we just incremented the first element + else + i--;//proceed with previous elem (if carry_over, of course) + } + + return(carry_over); + +} +LIndex IndexTools::IndividualToJointIndices(const vector& indices, + const vector& nrElems) +{ + LIndex* step_size=CalculateStepSize(nrElems); + size_t vec_size = nrElems.size(); + + LIndex jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + delete [] step_size; + + return(jointI); + +} +LIndex IndexTools::IndividualToJointIndicesArray(LIndex* indices, + const vector& nrElems) +{ + size_t vec_size = nrElems.size(); + LIndex* step_size=CalculateStepSize(nrElems); + + LIndex jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + delete [] step_size; + + return(jointI); +} +LIndex IndexTools::IndividualToJointIndicesStepSize(const vector& indices, + const vector& step_size) +{ + size_t vec_size = indices.size(); + + LIndex jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + return(jointI); +} + +LIndex IndexTools::IndividualToJointIndicesStepSize(const vector& indices, + const LIndex * step_size) +{ + size_t vec_size = indices.size(); + + LIndex jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + return(jointI); +} +LIndex IndexTools::IndividualToJointIndicesArrayStepSize(LIndex* indices, + const LIndex* step_size, size_t vec_size) +{ + LIndex jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + + return(jointI); +} +LIndex IndexTools::IndividualToJointIndicesArrayStepSize(LIndex* indices, + const vector &step_size, size_t vec_size) +{ + LIndex jointI = 0; + for(Index i=0; i < vec_size ; i++) + jointI += indices[i] * step_size[i]; + return(jointI); +} +vector IndexTools::JointToIndividualIndices(LIndex jointI, + const vector& nrElems) +{ + LIndex* step_size=0; + Index vec_size = nrElems.size(); + vector result(vec_size); + if(vec_size > 0) + { + step_size=CalculateStepSize(nrElems); + LIndex remainder = jointI; + for(Index i=0; i < vec_size ; i++) + { + LIndex aI = remainder / step_size[i]; + result[i]= aI;//store this indiv. index + remainder = remainder % step_size[i]; + } + delete [] step_size; + } + return(result); +} +vector IndexTools::JointToIndividualIndicesStepSize(LIndex jointI, + const LIndex * step_size, size_t vec_size ) +{ + LIndex remainder = jointI; + + LIndex resultArr[vec_size]; + for(Index i=0; i < vec_size ; i++) + { + LIndex aI = remainder / step_size[i]; + resultArr[i] = aI;//store this indiv. index + //remainder = remainder % step_size[i]; + remainder -= step_size[i] * aI; + } + vector result(&resultArr[0], &resultArr[vec_size]); + return(result); +} +vector IndexTools::JointToIndividualIndicesStepSize(LIndex jointI, + const vector &step_size, size_t vec_size ) +{ + LIndex remainder = jointI; + + LIndex resultArr[vec_size]; + for(Index i=0; i < vec_size ; i++) + { + LIndex aI = remainder / step_size[i]; + resultArr[i] = aI;//store this indiv. index + //remainder = remainder % step_size[i]; + remainder -= step_size[i] * aI; + } + vector result(&resultArr[0], &resultArr[vec_size]); + return(result); +} + +std::vector IndexTools::JointToIndividualIndicesStepSize(LIndex jointI, + const std::vector &stepSize) +{ + return JointToIndividualIndicesStepSize (jointI, stepSize, + stepSize.size()); +} +LIndex * IndexTools::CalculateStepSize(const vector& nrElems) +{ + size_t vec_size = nrElems.size(); + //increment indicates for each agent how many the joint index is + //incremented to get the next individual action... + LIndex * step_size = new LIndex[vec_size]; + //the step_size for the last agent is 1 + step_size[vec_size-1] = 1; + if(vec_size != 1) + { + Index i = vec_size-2; + while(1) + { + if(i>0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + i--; + } + else if(i==0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + break; + } + } + } + return(step_size); +} +vector IndexTools::CalculateStepSizeVector(const vector& nrElems) +{ + size_t vec_size = nrElems.size(); + //increment indicates for each agent how many the joint index is + //incremented to get the next individual action... + vector step_size(vec_size); + //the step_size for the last agent is 1 + step_size[vec_size-1] = 1; + if(vec_size != 1) + { + Index i = vec_size-2; + while(1) + { + if(i>0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + i--; + } + else if(i==0) + { + step_size[i] = nrElems[i+1] * step_size[i+1]; + break; + } + } + } + return(step_size); +} + +Index +IndexTools::ActionAndObservation_to_ActionObservationIndex(Index aI, + Index oI, size_t nrA, size_t nrO) +{ + return ( aI * nrO + oI ); +} +Index +IndexTools::ActionObservation_to_ActionIndex(Index aoI, size_t nrA, size_t nrO) +{ + return ( (size_t) (aoI / nrO) ); +} +Index +IndexTools::ActionObservation_to_ObservationIndex(Index aoI, size_t nrA, size_t nrO) +{ + return ( (size_t) (aoI % nrO) ); +} + + + + + + + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.h b/payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.h new file mode 100644 index 000000000..8eebbc524 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/IndexTools.h @@ -0,0 +1,257 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _INDEXTOOLS_H_ +#define _INDEXTOOLS_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "Scope.h" + +template +std::vector convertVector(std::vector v) +{ + std::vector r; + for (typename std::vector::iterator it=v.begin();it!=v.end();it++) + r.push_back(*it); + return r; +} + +/// IndexTools contains functionality for manipulating indices. +/** A detailed description of how joint indices etc are constructed, + * see doc/manually_maintained/MADPToolbox-Histories+indices.ps.gz . + */ +namespace IndexTools { + /// Increments index which ranges over nrElems + bool Increment(Index& index, size_t nrElems); + + /// Increments vector of indices that range over nrElems + bool Increment(std::vector& indexVec, const std::vector& nrElems ); + + /// Convert individual to joint indices. + Index IndividualToJointIndices(const std::vector& indices, + const std::vector& nrElems); + /// Convert individual to joint indices. Only uses first n entries of vecs. + Index IndividualToJointIndices(const std::vector& indices, + const std::vector& nrElems, size_t n); + /// A variant that takes an array instead of a vector for extra speed. + Index IndividualToJointIndicesArray(const Index* indices, + const std::vector& nrElems); + ///A variant that takes a cached step_size vector for extra speed. + Index IndividualToJointIndicesStepSize(const std::vector& indices, + const std::vector& step_size); + ///A variant that takes a cached step_size array for extra speed. + Index IndividualToJointIndicesStepSize(const std::vector& indices, + const size_t * step_size); + /** \brief A variant that 1) takes an array instead of a vector + * and 2) takes a cached step_size array for extra speed.*/ + Index IndividualToJointIndicesArrayStepSize(const Index* indices, + const size_t * step_size, size_t vec_size); + + /// A variant with a step_size vector. + Index IndividualToJointIndicesArrayStepSize(const Index* indices, + const std::vector &step_size, size_t vec_size); + + ///Convert individual to joint indices. + std::vector JointToIndividualIndices(Index jointI, + const std::vector& nrElems ); + ///Convert individual to joint indices. + void JointToIndividualIndices(Index jointI, + const std::vector& nrElems, std::vector& result ); + /** \brief Convert individual to joint indices - taking the + * stepSize array as an argument. */ + std::vector JointToIndividualIndicesStepSize(Index jointI, + const size_t * stepSize, size_t vec_size ) ; + /** Convert individual to joint indices, taking the + * stepSize array as an argument. avoids return by value. */ + void JointToIndividualIndicesStepSize(Index jointI, + const size_t * stepSize, size_t vec_size, std::vector& result ) ; + /** \brief Convert individual to joint indices - taking the + * stepSize array as an argument. */ + std::vector JointToIndividualIndicesStepSize( + Index jointI, + const std::vector &stepSize, + size_t vec_size + ) ; + + std::vector JointToIndividualIndicesStepSize( + Index jointI, + const std::vector &stepSize + ); + /** \brief Convert individual to joint indices - taking the + * stepSize array as an argument and returning a pointer to a + * array. + * + * Note: the returned array is allocate with new, so it must be + * freed.*/ + const Index * JointToIndividualIndicesArrayStepSize(Index jointI, + const size_t * stepSize, size_t vec_size ) ; + + /** \brief Calculates the step size array for nrElems. (so is of + * the same size as nrElems). + */ + size_t * CalculateStepSize(const std::vector& nrElems); + /** \brief Calculates the step size vector for nrElems. (so is of + * the same size as nrElems). + */ + std::vector CalculateStepSizeVector(const std::vector& nrElems); + /**\brief Calculates the step size array from the first n entries + * of nrElems. + * (so the array's size is n). + */ + size_t * CalculateStepSize(const std::vector& nrElems, size_t n); + + // LIndex versions of all functions + + /// LIndex equivalent function. + bool Increment(LIndex& index, LIndex nrElems ); + /// LIndex equivalent function. + bool Increment(std::vector& indexVec, const std::vector& nrElems ); + + /// LIndex equivalent function. + LIndex IndividualToJointIndices(const std::vector& indices, + const std::vector& nrElems) ; + /// LIndex equivalent function. + LIndex IndividualToJointIndicesArray(LIndex* indices, + const std::vector& nrElems); + /// LIndex equivalent function. + LIndex IndividualToJointIndicesStepSize(const std::vector& indices, + const std::vector& step_size) ; + /// LIndex equivalent function. + LIndex IndividualToJointIndicesStepSize(const std::vector& indices, + const LIndex * step_size) ; + /// LIndex equivalent function. + LIndex IndividualToJointIndicesArrayStepSize(LIndex* indices, + const LIndex * step_size, size_t vec_size); + /// LIndex equivalent function. + LIndex IndividualToJointIndicesArrayStepSize(LIndex* indices, + const std::vector &step_size, size_t vec_size); + /// LIndex equivalent function. + const LIndex * JointToIndividualIndicesArrayStepSize( + LIndex jointI, + const LIndex * stepSize, size_t vec_size ) ; + /// LIndex equivalent function. + std::vector JointToIndividualIndicesStepSize(LIndex jointI, + const LIndex * stepSize, size_t vec_size ) ; + /// LIndex equivalent function. + std::vector JointToIndividualIndicesStepSize(LIndex jointI, + const std::vector &stepSize, size_t vec_size ) ; + std::vector JointToIndividualIndicesStepSize(LIndex jointI, + const std::vector &stepSize); + /// LIndex equivalent function. + std::vector JointToIndividualIndices(LIndex jointI, + const std::vector& nrElems ) ; + /// LIndex equivalent function. + LIndex * CalculateStepSize(const std::vector& nrElems); + /// LIndex equivalent function. + std::vector CalculateStepSizeVector(const std::vector& nrElems); + + /** \brief Computation of a index for (joint) actionObservations + * + * ActionObservation indices (aoI's) are used as the basis for indexing + * (Joint)ActionObservationHistories. + * This function computes them. + * + * \sa manually maintained documentation + */ + Index ActionAndObservation_to_ActionObservationIndex(Index aI, + Index oI, size_t nrA, size_t nrO); + + /// Convert (joint) ActionObservation indices to (joint) Action indices. + Index ActionObservation_to_ActionIndex(Index aoI, size_t nrA, size_t nrO); + + /** \brief Convert (joint) ActionObservation indices to (joint) + * Observation indices. + */ + Index ActionObservation_to_ObservationIndex(Index aoI, size_t nrA,size_t nrO); + + ///Restrict a vector of indices to a scope. + /** + * sc is a Scope (also a vector of indices), each index i in sc identifies + * an element of indivIndices: namely indivIndices[i] + * + * this function changes the vector restrictedIndivIndices to contain + * < indivIndices[i] > + * for all i in scope + * + * restrictedIndivIndices should already have the correct size: + * I.e., it should be at least at big as the number of elements in the + * scope. However, restrictedIndivIndices may be larger than the number + * of elements in the scope. In this case, the superfluous elements + * remain untouched. (this is actually used in some places to + * increase performance). + */ + template + void RestrictIndividualIndicesToScope( + const std::vector& indivIndices, + const Scope& sc, + std::vector &restrictedIndivIndices) + { + size_t scSize = sc.size(); + for(Index s_I = 0; s_I < scSize; s_I++) + { + Index indivIndicesI=sc[s_I]; + Index indivIndicesIVal = indivIndices[indivIndicesI]; + restrictedIndivIndices[s_I]=indivIndicesIVal; + } + } + + ///Restricts a vector of indices with a current scope to a narrower scope + template + void RestrictIndividualIndicesToNarrowerScope( + const std::vector& indivIndices, + const Scope& old_sc, + const Scope& new_sc, + std::vector &restrictedIndivIndices) + { + size_t scSize = new_sc.size(); +// std::vector restrictedIndivIndices; + for(Index s_I = 0; s_I < scSize; s_I++) + { + //find which element is next according to new scope + Index indivIndicesI=new_sc[s_I]; + //find location of that element according to old scope + Index old_scope_i = 0; + try { + old_scope_i = old_sc.GetPositionForIndex(indivIndicesI); + } catch( E& ) { + throw ENoSubScope("IndexTools::RestrictIndividualIndicesToNarrowerScope -the new scope is no sub-scope of the old scope!"); + } + Index indivIndicesIVal = indivIndices[old_scope_i]; +// restrictedIndivIndices.push_back(indivIndicesIVal); + restrictedIndivIndices[s_I]=indivIndicesIVal; + } +// return(restrictedIndivIndices); + } + + + /** \brief Calculate the number of sequences of length up to + * seqLength, for which at every time step o options are + * available. + * + * Calculation includes 1 empty sequence (of length 0). * + * + */ + size_t CalculateNumberOfSequences(size_t o, size_t seqLength); + +} + +#endif /* !_INDEXTOOLS_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/JointAction.h b/payntbind/src/synthesis/decpomdp/madp/src/base/JointAction.h new file mode 100644 index 000000000..cdc400b01 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/JointAction.h @@ -0,0 +1,53 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _JOINTACTION_H_ +#define _JOINTACTION_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" + +/// JointAction represents a joint action. +class JointAction +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + + /// Destructor. + virtual ~JointAction(){} + + /// Returns a pointer to a copy of this class. + virtual JointAction* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; + virtual std::string SoftPrintBrief() const = 0; + virtual void Print() const { std::cout << SoftPrint();} + virtual void PrintBrief() const{ std::cout << SoftPrintBrief();} + +}; + + +#endif /* !_JOINTACTION_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.cpp new file mode 100644 index 000000000..9c3e23777 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.cpp @@ -0,0 +1,135 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "JointActionDiscrete.h" +#include "ActionDiscrete.h" + +using namespace std; + +#define DEBUG_JAD 0 + +//Default constructor +JointActionDiscrete::JointActionDiscrete(Index index) : + DiscreteEntity(index) +{ +} + +JointActionDiscrete::JointActionDiscrete(Index index, + vector a) : + DiscreteEntity(index), + _m_apVector(a) +{ + _m_aIndexVector = ConstructIndividualActionDiscretesIndices(); +} + +//Copy assignment constructor. +JointActionDiscrete::JointActionDiscrete(const JointActionDiscrete& o) : + DiscreteEntity(o) +{ +if(DEBUG_JAD) cout << " cloning joint action "; + + vector::const_iterator itp = o._m_apVector.begin(); + vector::const_iterator lastp = o._m_apVector.end(); + while(itp != lastp) + { + const ActionDiscrete* t = *itp; + _m_apVector.push_back(t); + itp++; + } + _m_aIndexVector = o._m_aIndexVector; +} + +//Destructor +JointActionDiscrete::~JointActionDiscrete() +{ +if(DEBUG_JAD) cout << "deleting joint action"; + +/* Do not delete the individual actions that are pointed to (there is only + * one copy of those, so that will lead to segfaults) + for(Index i=0; i<_m_apVector.size(); i++) + delete _m_apVector[i];*/ + + _m_apVector.clear(); +} + +void JointActionDiscrete::DeleteIndividualActions() +{ + for(vector::size_type i=0; i<_m_apVector.size(); i++) + delete _m_apVector[i]; +} + +void JointActionDiscrete::AddIndividualAction(const ActionDiscrete* a, + Index agentI) +{ + if( static_cast< vector::size_type >(agentI) != + _m_apVector.size() ) + { + stringstream ss; + ss << "WARNING! AddIndividualAction: size of _m_apVector does not match index of agent!\n _m_apVector.size()=" + << _m_apVector.size()<<" - agentI="<GetIndex()); +} + +string JointActionDiscrete::SoftPrint() const +{ + stringstream ss; + vector::const_iterator it = _m_apVector.begin(); + vector::const_iterator last = _m_apVector.end(); + + ss << "JA" << GetIndex(); + + while(it != last) + { + if(*it != 0) + ss << "_" << (*it)->SoftPrintBrief(); + it++; + } + return(ss.str()); +} + +string JointActionDiscrete::SoftPrintBrief() const +{ + stringstream ss; + vector::const_iterator it = _m_apVector.begin(); + vector::const_iterator last = _m_apVector.end(); + + while(it != last) + { + if(*it != 0) + ss << (*it)->SoftPrintBrief(); + if(it != last-1) + ss << "_"; + it++; + } + return(ss.str()); +} + +vector JointActionDiscrete::ConstructIndividualActionDiscretesIndices() const +{ + vector iv; + vector::const_iterator it = _m_apVector.begin(); + vector::const_iterator last = _m_apVector.end(); + while(it != last) + { + + Index index = (*it)->GetIndex(); + iv.push_back( index ); + it++; + } + return(iv); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.h new file mode 100644 index 000000000..d20178bfe --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/JointActionDiscrete.h @@ -0,0 +1,95 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _JOINTACTIONINTEGER_H_ +#define _JOINTACTIONINTEGER_H_ 1 + +/* the include directives */ + +#include +#include "Globals.h" +#include "JointAction.h" +#include "DiscreteEntity.h" + +class ActionDiscrete; + +/// JointActionDiscrete represents discrete joint actions. +class JointActionDiscrete : public JointAction, + public DiscreteEntity +{ + private: + + /// Pointers to the individual actions that make up this joint action. + std::vector _m_aIndexVector; + /// Indices of the individual actions that make up this joint action. + std::vector _m_apVector; + + /// Constructs the vector of individual Action indices from _m_apVector + std::vector ConstructIndividualActionDiscretesIndices() const; + + protected: + + + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + JointActionDiscrete(Index index = INDEX_MAX); + /// Constructor with an index and a vector of individual actions. + JointActionDiscrete(Index index, std::vector a); + /// Copy constructor. + JointActionDiscrete(const JointActionDiscrete& a); + /// Destructor. + virtual ~JointActionDiscrete(); + + //operators: + + //data manipulation (set) functions + /// Deletes the individual actions to which this joint action points. + /** This function will typically *NOT* be used: normally multiple + * joint actions share their individual actions...*/ + void DeleteIndividualActions(); + + /// Adds an individual action for agentI to this joint action. + /** This has to be called ordered: i.e., first for agent 0, then + * for agent 1, etc. up to nrAgents. This function is also + * typically only used to construct the joint actions.*/ + void AddIndividualAction(const ActionDiscrete* a, Index agentI); + + //get (data) functions: + /// Get the ActionDiscretes for this joint action. + const std::vector& GetIndividualActionDiscretes() const + { return(_m_apVector);} + /// Get the Action indices for this joint action. + const std::vector& GetIndividualActionDiscretesIndices() const + { return(_m_aIndexVector);} + + /// Returns a pointer to a copy of this class. + virtual JointActionDiscrete* Clone() const + { return new JointActionDiscrete(*this); } + + //other + std::string SoftPrint() const; + std::string SoftPrintBrief() const; +}; + + +#endif /* !_JOINTACTIONINTEGER_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservation.h b/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservation.h new file mode 100644 index 000000000..f4e0020c3 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservation.h @@ -0,0 +1,50 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _JOINTOBSERVATION_H_ +#define _JOINTOBSERVATION_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" + +/// JointObservation is represents joint observations. +class JointObservation +{ + private: + + protected: + + public: + + /// Destructor. + virtual ~JointObservation() {}; + + /// Returns a pointer to a copy of this class. + virtual JointObservation* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; + virtual std::string SoftPrintBrief() const = 0; + virtual void Print() const { std::cout << SoftPrint();} + virtual void PrintBrief() const { std::cout << SoftPrintBrief();} +}; + + +#endif /* !_JOINTOBSERVATION_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.cpp new file mode 100644 index 000000000..c2fe0a786 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.cpp @@ -0,0 +1,129 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "JointObservationDiscrete.h" +#include "ObservationDiscrete.h" + +using namespace std; + +#define DEBUG_JOD 0 + +//Default constructor +JointObservationDiscrete::JointObservationDiscrete(Index index) : + DiscreteEntity(index) +{ +} + +JointObservationDiscrete::JointObservationDiscrete(Index index, + vector a) : + DiscreteEntity(index), + _m_opVector(a) + +{ + _m_oIndexVector = ConstructIndividualObservationDiscretesIndices(); +} + +//Copy assignment constructor. +JointObservationDiscrete::JointObservationDiscrete(const + JointObservationDiscrete& o) : + DiscreteEntity(o) +{ +if(DEBUG_JOD) cout << " cloning JointObservationDiscrete "; + vector::const_iterator itp = o._m_opVector.begin(); + vector::const_iterator lastp = o._m_opVector.end(); + while(itp != lastp) + { + const ObservationDiscrete* t = *itp; + _m_opVector.push_back(t); + itp++; + } + _m_oIndexVector = o._m_oIndexVector; +} + +//Destructor +JointObservationDiscrete::~JointObservationDiscrete() +{ +if(DEBUG_JOD) cout << "deleting JointObservationDiscrete"; +/* Do not delete the individual observations - only one copy of these, so + * would lead to segfaults. + for(Index i=0; i<_m_opVector.size(); i++) + delete _m_opVector[i];*/ + _m_opVector.clear(); +} + +void JointObservationDiscrete::AddIndividualObservation(const ObservationDiscrete* a, Index agentI) +{ + if (agentI != _m_opVector.size() ) + cout << "WARNING! AddIndividualAction: size of _m_opVector does not match index of agent!\n _m_opVector.size()="<< _m_opVector.size()<<" - agentI="<GetIndex()); +} + +string JointObservationDiscrete::SoftPrint() const +{ + stringstream ss; + vector::const_iterator it = _m_opVector.begin(); + vector::const_iterator last = _m_opVector.end(); + + ss << "JO" << GetIndex(); + + while(it != last) + { + if(*it != 0) + ss << "_" << (*it)->SoftPrintBrief(); + it++; + } + //ss << "(vector="<< PrintTools::SoftPrintVector(_m_oIndexVector) <<", size=" + //<<_m_oIndexVector.size()<<")"; + return(ss.str()); +} + +string JointObservationDiscrete::SoftPrintBrief() const +{ + stringstream ss; + vector::const_iterator it = _m_opVector.begin(); + vector::const_iterator last = _m_opVector.end(); + + while(it != last) + { + if(*it != 0) + ss << (*it)->SoftPrintBrief(); + if(it != last-1) + ss << "_"; + + it++; + } + return(ss.str()); +} + + +vector JointObservationDiscrete::ConstructIndividualObservationDiscretesIndices() const +{ + vector iv; + vector::const_iterator it = _m_opVector.begin(); + vector::const_iterator last = _m_opVector.end(); +if(DEBUG_JOD){ cout << " JointObservationDiscrete::" << + "GetIndividualObservationDiscretesIndices() _m_opVector size =" + << _m_opVector.size() << endl; +} + while(it != last) + { + + Index index = (*it)->GetIndex(); + iv.push_back( index ); + it++; + } + return(iv); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.h new file mode 100644 index 000000000..682e992ea --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/JointObservationDiscrete.h @@ -0,0 +1,92 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _JOINTOBSERVATIONINTEGER_H_ +#define _JOINTOBSERVATIONINTEGER_H_ 1 + +/* the include directives */ + +#include +#include +#include "Globals.h" +#include "JointObservation.h" +#include "DiscreteEntity.h" + +class ObservationDiscrete; + +/// JointObservationDiscrete represents discrete joint observations. +class JointObservationDiscrete : public JointObservation, + public DiscreteEntity +{ + private: + + + protected: + + ///Indices of individual observations that make up this joint observation. + std::vector _m_oIndexVector; + ///Pointers to individual observations that make up this joint observation. + std::vector _m_opVector; + + /// Constructs the vector of individual Observation indices from _m_apVector + std::vector ConstructIndividualObservationDiscretesIndices() const; + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + JointObservationDiscrete(Index index = INDEX_MAX); + /// Constructor with an index and a vector of individual observations. + JointObservationDiscrete(Index index, std::vector a); + /// Copy constructor. + JointObservationDiscrete(const JointObservationDiscrete& a); + /// Destructor. + virtual ~JointObservationDiscrete(); + + //operators: + + /// Adds an individual observation for agentI to this joint observation. + /** This has to be called ordered: i.e., first for agent 0, + * then for agent 1, etc. up to nrAgents. This function is + * also typically only used to construct the joint + * observations.*/ + void AddIndividualObservation(const ObservationDiscrete* a, Index agentI); + + //get (data) functions: + + /// Get the ObservationDiscretes for this joint action. + const std::vector& + GetIndividualObservationDiscretes() const + { return _m_opVector;} + /// Get the Observation indices for this joint action. + const std::vector& GetIndividualObservationDiscretesIndices() const + { return _m_oIndexVector; } + + /// Returns a pointer to a copy of this class. + virtual JointObservationDiscrete* Clone() const + { return new JointObservationDiscrete(*this); } + + //other + std::string SoftPrint() const; + std::string SoftPrintBrief() const; +}; + + +#endif /* !_JOINTOBSERVATIONINTEGER_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteActions.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteActions.cpp new file mode 100644 index 000000000..35598202e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteActions.cpp @@ -0,0 +1,588 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MADPComponentDiscreteActions.h" +#include "VectorTools.h" + +using namespace std; + +#define DEBUG_GETJA_COPYVEC 0 +#define DEBUG_CJA 0 +#define DEBUG_ADD_DA 0 +#define DEBUG_MADP_DA 0 + +//the following variable enables some runtime checks - enable it for debugging +#define RUNTIME_CHECKS 0 + + + +//Default constructor +MADPComponentDiscreteActions::MADPComponentDiscreteActions() +{ + _m_initialized = false; + _m_cachedAllJointActions = false; + _m_actionStepSize = 0; + _m_jointActionIndices=0; + _m_jointIndicesValid=true; +} + +MADPComponentDiscreteActions::MADPComponentDiscreteActions( + const MADPComponentDiscreteActions& a) +{ + _m_initialized=a._m_initialized; + _m_cachedAllJointActions=a._m_cachedAllJointActions; + _m_jointIndicesValid=a._m_jointIndicesValid; + _m_nrJointActions=a._m_nrJointActions; + _m_nrActions=a._m_nrActions; + _m_actionStepSize=IndexTools::CalculateStepSize(_m_nrActions); + _m_actionVecs=a._m_actionVecs; + + _m_jointActionVec.clear(); + for(Index ja=0;ja!=a._m_jointActionVec.size();++ja) + _m_jointActionVec.push_back(new JointActionDiscrete(*a._m_jointActionVec.at(ja))); + + _m_jointActionIndices=new map *>(); + if(a._m_jointActionIndices) + { + map *>::const_iterator iter; + for(iter = a._m_jointActionIndices->begin(); + iter != a._m_jointActionIndices->end(); + ++iter) + _m_jointActionIndices->insert(make_pair( iter->first, new vector(*iter->second ))); + } +} + +//Destructor +MADPComponentDiscreteActions::~MADPComponentDiscreteActions() +{ + _m_nrActions.clear(); + vector >::iterator it = _m_actionVecs.begin(); + vector >::iterator last = _m_actionVecs.end(); + while(it != last) + { + // (*it) isa vector + (*it).clear(); + it++; + } + _m_actionVecs.clear(); + vector::iterator it2 = _m_jointActionVec.begin(); + vector::iterator last2 = _m_jointActionVec.end(); + while(it2 != last2) + { + delete *it2; //removes the joint action pointed to... + it2++; + } + _m_jointActionVec.clear(); + if(_m_jointActionIndices) + { + while(!_m_jointActionIndices->empty()) + { + delete (*_m_jointActionIndices->begin()).second; + _m_jointActionIndices->erase( _m_jointActionIndices->begin() ); + } + delete _m_jointActionIndices; + } +// if(_m_jointActionIndices) +// { +// vector*>::iterator it3 = _m_jointActionIndices->begin(); +// vector*>::iterator last3 = _m_jointActionIndices->end(); +// while(it3 != last3) +// { +// delete *it3; +// it3++; +// } +// } + + delete[] _m_actionStepSize; +} + +//data manipulation (set) functions: + +/** This creates nrA unnamed actions.*/ +void MADPComponentDiscreteActions::SetNrActions(Index AI, size_t nrA) +{ + if(_m_nrActions.size() != AI) + { + stringstream ss; + ss << "MADPComponentDiscreteActions::SetNrAction("< thisAgentsActions; + for(Index i=0;i thisAgentsActions; + ActionDiscrete ad(0, name, description); + thisAgentsActions.push_back(ad); + _m_actionVecs.push_back(thisAgentsActions); + } + else + { + //we add an action for this agent - increment his nr_actions + Index newActionIndex = _m_nrActions[AI]++; + ActionDiscrete ad(newActionIndex, name, description); + _m_actionVecs[AI].push_back(ad); + } +} + +/** Calls ConstructJointActionsRecursively() on a new (empty) joint + * action.*/ +size_t MADPComponentDiscreteActions::ConstructJointActions() +{ + JointActionDiscrete* ja = new JointActionDiscrete(); + size_t NRJA = ConstructJointActionsRecursively(0, *ja, 0); + _m_cachedAllJointActions=true; + return NRJA; +} + +/** Creates (_m_jointActionVec) using _m_actionVecs (which need to be + * initialized before calling this function...) */ +size_t MADPComponentDiscreteActions::ConstructJointActionsRecursively( + Index curAgentI, JointActionDiscrete& ja, Index jaI) +{ + + bool lastAgent=false; + if(curAgentI == _m_nrActions.size()-1) + { + lastAgent = true; + } + if(curAgentI >= _m_actionVecs.size()) + { + stringstream ss; + ss << "ConstructJointActionsRecursively - current Agent index ("<< + curAgentI<<") out of bounds! (_m_actionVecs contains actions for "<< + _m_actionVecs.size() << " agents...)\n"; + throw E(ss); + } + + ActionDVec::iterator first = _m_actionVecs[curAgentI].begin(); + ActionDVec::iterator it = _m_actionVecs[curAgentI].begin(); + ActionDVec::iterator last = _m_actionVecs[curAgentI].end(); + ActionDVec::iterator beforelast = _m_actionVecs[curAgentI].end(); + beforelast--; + + if(it == last) + { + stringstream ss; ss << "ERROR empty action set for agent " << curAgentI; + throw E(ss); + } + //first action extends the received ja + JointActionDiscrete* p_jaReceivedArgCopy = new JointActionDiscrete(ja); + JointActionDiscrete* p_ja; + + while( it != last) // other actions extend duplicates of ja + { + if(DEBUG_CJA) cerr << "\nnext action"; + if(it == first) // + { + if(DEBUG_CJA) + cerr << "(first action - not making copy)\n"; + p_ja = &ja; + } + else if (it == beforelast)//this is the last valid it -> last action + { + if(DEBUG_CJA) cerr << "(last action - not making copy)\n"; + p_ja = p_jaReceivedArgCopy; //don't make a new copy + } + else //make a new copy + { + if(DEBUG_CJA) cerr << "(intermed. action - making copy)\n"; + p_ja = new JointActionDiscrete(*p_jaReceivedArgCopy); + } + if(lastAgent) + { + p_ja->SetIndex(jaI); + if(DEBUG_CJA)cerr << "setting index of this joint action to: " + << jaI <AddIndividualAction(ai, curAgentI); + + if(lastAgent) //jointAction is now completed: add it to jointAction set. + { + if(DEBUG_CJA){cerr << "INSERTING the joint action:"; + p_ja->Print();cerr<>ProblemDecTiger::ConstructJointActionsRecursively(Index "<< + curAgentI<<", JointActionDiscrete& ja, Index "<< jaI<<") FINISHED" + < *>(); + } + else + _m_nrJointActions=_m_jointActionVec.size(); + _m_initialized = b; + } + return(true); +} + +size_t MADPComponentDiscreteActions::GetNrActions(Index agentI) const +{ + if(agentI < _m_nrActions.size()) + return _m_nrActions[agentI]; + else + { + stringstream ss; + ss << "Warning: MADPComponentDiscreteActions::GetNrActions(Index agentI) - index out of bounds"<0) + { + const vector& nrActions = GetNrActions(); + vector restr_nrAs(agScope.size()); + IndexTools::RestrictIndividualIndicesToScope( + nrActions, agScope, restr_nrAs); + size_t restr_nrJA = VectorTools::VectorProduct(restr_nrAs); + return restr_nrJA; + } + else + return(0); +} + +/** Throws an exception if there is no action with name s.*/ +Index MADPComponentDiscreteActions::GetActionIndexByName(const string &s, + Index agentI) const +{ + if(!_m_initialized) + throw E("MADPComponentDiscreteActions::GetActionIndexByName - not initialized!"); + + if(agentI >= _m_actionVecs.size()) + { + stringstream ss; + ss << "GetActionIndexByName - Agent index ("<< + agentI<<") out of bounds! (_m_actionVecs contains actions for "<< + _m_actionVecs.size() << " agents...)\n"; + throw E(ss); + } + vector::const_iterator it = _m_actionVecs[agentI].begin(); + vector::const_iterator last = _m_actionVecs[agentI].end(); + while(it != last) + { + string s2 = (*it).GetName(); + if(s == s2) + //if(strcmp(s,s2) == 0)//match + return( (*it).GetIndex() ); + it++; + } + //not found + stringstream ss; + ss << "GetActionIndexByName - action \"" << s << "\" of agent " << agentI << + " not found." << endl; + throw E(ss.str().c_str()); + +} + +const Action* MADPComponentDiscreteActions::GetAction(Index agentI, Index a) const +{ + return ((Action*) GetActionDiscrete( agentI, a) ); +} +const ActionDiscrete* MADPComponentDiscreteActions::GetActionDiscrete(Index agentI, Index a) const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteActions::GetAction("<< + agentI<<") - Error: not initialized. "<& indivActionIndices)const +{ +#if RUNTIME_CHECKS + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteActions::GetJointActionIndex("<< + "vector& indivActionIndices) -Error: not initialized."<& ja_e, const Scope& agSC) const +{ + vector nr_A_e(agSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + GetNrActions(), agSC, nr_A_e); + Index jaI = IndexTools::IndividualToJointIndices( ja_e, nr_A_e); + return(jaI); +} +std::vector MADPComponentDiscreteActions:: +JointToIndividualActionIndices( + Index ja_e, const Scope& agSC) const +{ + vector nr_A_e(agSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + GetNrActions(), agSC, nr_A_e); + vector ja_e_vec = IndexTools::JointToIndividualIndices(ja_e, nr_A_e); + return(ja_e_vec); +} +Index MADPComponentDiscreteActions:: +JointToRestrictedJointActionIndex( + Index jaI, const Scope& agSc_e ) const +{ + const vector& ja_vec = JointToIndividualActionIndices(jaI); + vector ja_vec_e(agSc_e.size()); + IndexTools::RestrictIndividualIndicesToScope(ja_vec, agSc_e, ja_vec_e); + Index ja_e = IndividualToJointActionIndices(ja_vec_e, agSc_e); + return(ja_e); + +} +string MADPComponentDiscreteActions::SoftPrint() const +{ + stringstream ss; + if(DEBUG_MADP_DA){ss << "MADPComponentDiscreteActions::Print()" << endl;} + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteActions::Print("<< + ") - Error: not initialized. "<::const_iterator f = + _m_actionVecs[agentIndex].begin(); + vector::const_iterator l = + _m_actionVecs[agentIndex].end(); + while(f != l) + { + ss << (*f).SoftPrint() << endl; + // (*f).GetName() << " - " << (*f).GetDescription()<::const_iterator ja_it = + _m_jointActionVec.begin(); + vector::const_iterator ja_last = + _m_jointActionVec.end(); + while(ja_it != ja_last) + { + ss << (*ja_it)->SoftPrint()< +#include +#include "Globals.h" +#include "ActionDiscrete.h" +#include "JointActionDiscrete.h" +#include "IndexTools.h" +#include "EOverflow.h" + +#include + + +namespace { + typedef std::vector ActionDVec; +} + +/** \brief MADPComponentDiscreteActions contains functionality for + * discrete action spaces. + * + * It implements a part of the + * MultiAgentDecisionProcessDiscreteInterface. */ +class MADPComponentDiscreteActions +{ + private: + + bool _m_initialized; + bool _m_cachedAllJointActions; + bool _m_jointIndicesValid; + size_t _m_nrJointActions; + + /// The stepsize array - used for indiv->joint index calculation. + size_t * _m_actionStepSize; + + ///The vector storing pointers to joint actions. + /** To use this, ConstructJointActions() should be called */ + std::vector _m_jointActionVec; + + /// When not all joint actions have been created, here we cache + /// the individual indices created by + /// JointToIndividualActionIndices() + std::map *> *_m_jointActionIndices; + + /// Recursively creates the joint actions. + size_t ConstructJointActionsRecursively( Index curAgentI, + JointActionDiscrete& ja, Index jaI); + + std::string SoftPrintActionSets() const; + std::string SoftPrintJointActionSet() const; + + /// The number of actions for each agent. + std::vector _m_nrActions; + /// The vectors of actions (vectors of ActionDiscrete) for each agent. + std::vector > _m_actionVecs; + + protected: + public: + + /// Default constructor. + MADPComponentDiscreteActions(); + + /// Copy constructor. + MADPComponentDiscreteActions(const MADPComponentDiscreteActions& a); + + /// Destructor. + virtual ~MADPComponentDiscreteActions(); + + //data manipulation (set) functions: + + /// Sets _m_initialized to b. + bool SetInitialized(bool b); + + /// Sets the number of actions for agent AI. + void SetNrActions(Index AI, size_t nrA); + + /// Add a new action with name "name" to the actions of agent AI. + void AddAction(Index AI, const std::string &name, + const std::string &description=""); + + /// Recursively constructs all the joint actions. + size_t ConstructJointActions(); + + //get (data) functions: + + /// Return the number of actions vector. + const std::vector& GetNrActions() const + {return _m_nrActions;} + + /// Return the number of actions of agent agentI. + size_t GetNrActions(Index AgentI) const; + + /// Return the number of joiny actions. + size_t GetNrJointActions() const; + ///Get the number of joint actions the agents in agScope can form + size_t GetNrJointActions(const Scope& agScope) const; + + /// Find out if there is an overflow in the joint indices variable. + bool JointIndicesValid() const + {return _m_jointIndicesValid;} + + /// Returns the action index of the agent I's action s. + Index GetActionIndexByName(const std::string &s, Index agentI) const; + + /// Returns the name of a particular action a of agent i. + std::string GetActionName(Index a, Index i) const { + return(_m_actionVecs.at(i).at(a).GetName()); } + + /// Returns the name of a particular joint action a. + std::string GetJointActionName(Index a) const { + return(_m_jointActionVec.at(a)->SoftPrint()); } + + /// Return a ref to the a-th action of agent agentI. + const Action* GetAction(Index agentI, Index a) const; + + /// Return a ref to the a-th action of agent agentI. + const ActionDiscrete* GetActionDiscrete(Index agentI, Index a) const; + + /// Return a ref to the i-th joint action. + const JointAction* GetJointAction(Index i) const; + + /// Return a ref to the i-th joint action (a JointActionDiscrete). + const JointActionDiscrete* GetJointActionDiscrete(Index i) const; + + /** \brief Returns the joint action index that corresponds to + * the vector of specified individual action indices.*/ + Index IndividualToJointActionIndices(const std::vector& + indivActionIndices) const; + + /** \brief Returns the joint action index that corresponds to + * the array of specified individual action indices.*/ + Index IndividualToJointActionIndices(const Index* IndexArray) const + {return IndexTools::IndividualToJointIndicesArrayStepSize( + IndexArray, _m_actionStepSize, _m_nrActions.size());} + + /** \brief Returns a vector of indices to indiv. action + * indicies corr. to joint action index jaI.*/ + const std::vector& JointToIndividualActionIndices(Index jaI)const + { + if(!_m_jointIndicesValid) + { + throw(EOverflow("MADPComponentDiscreteActions::JointToIndividualActionIndices() joint indices are not available, overflow detected")); + } + if(_m_cachedAllJointActions) + return GetJointActionDiscrete(jaI)-> + GetIndividualActionDiscretesIndices(); + else if(_m_jointActionIndices->find(jaI)!= + _m_jointActionIndices->end()) + return(*_m_jointActionIndices->find(jaI)->second); + else // create new + { + std::vector *indices=new std::vector(); + *indices=IndexTools::JointToIndividualIndices(jaI, + GetNrActions()); + _m_jointActionIndices->insert(make_pair(jaI,indices)); + return(*indices); + } + } + + Index IndividualToJointActionIndices( + const std::vector& ja_e, const Scope& agSC) const; + std::vector JointToIndividualActionIndices( + Index ja_e, const Scope& agSC) const; + Index JointToRestrictedJointActionIndex( + Index jaI, const Scope& agSc_e ) const; + + std::string SoftPrint() const; + void Print() const + {std::cout << MADPComponentDiscreteActions::SoftPrint();} +}; + + +#endif /* !_MADPCOMPONENTDISCRETEACTIONS_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteObservations.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteObservations.cpp new file mode 100644 index 000000000..9245fdc66 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteObservations.cpp @@ -0,0 +1,581 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MADPComponentDiscreteObservations.h" +#include "IndexTools.h" + +using namespace std; + +#define DEBUG_GETJO_COPYVEC 0 +#define DEBUG_CJO 0 +#define DEBUG_ADD_DO 0 + +//Default constructor +MADPComponentDiscreteObservations::MADPComponentDiscreteObservations() +{ + _m_initialized = false; + _m_observationStepSize = 0; + _m_cachedAllJointObservations=false; + _m_jointObservationIndices=0; + _m_jointIndicesValid=true; + _m_nrJointObservations=0; +} + +MADPComponentDiscreteObservations::MADPComponentDiscreteObservations( + const MADPComponentDiscreteObservations& a) +{ + _m_initialized=a._m_initialized; + _m_cachedAllJointObservations=a._m_cachedAllJointObservations; + _m_jointIndicesValid=a._m_jointIndicesValid; + _m_nrJointObservations=a._m_nrJointObservations; + _m_nrObservations=a._m_nrObservations; + _m_observationStepSize=IndexTools::CalculateStepSize(_m_nrObservations); + _m_observationVecs=a._m_observationVecs; + + _m_jointObservationVec.clear(); + for(Index ja=0;ja!=a._m_jointObservationVec.size();++ja) + _m_jointObservationVec.push_back(new JointObservationDiscrete(*a._m_jointObservationVec.at(ja))); + + _m_jointObservationIndices=new map *>(); + if(a._m_jointObservationIndices) + { + map *>::const_iterator iter; + for(iter = a._m_jointObservationIndices->begin(); + iter != a._m_jointObservationIndices->end(); + ++iter) + _m_jointObservationIndices->insert(make_pair( iter->first, new vector(*iter->second ))); + } +} + +//Destructor +MADPComponentDiscreteObservations::~MADPComponentDiscreteObservations() +{ + _m_nrObservations.clear(); + vector >::iterator it = + _m_observationVecs.begin(); + vector >::iterator last = + _m_observationVecs.end(); + while(it != last) + { + // (*it) isa vector + (*it).clear(); + it++; + } + _m_observationVecs.clear(); + vector::iterator it2 = + _m_jointObservationVec.begin(); + vector::iterator last2 = + _m_jointObservationVec.end(); + while(it2 != last2) + { + delete *it2; //removes the joint observation pointed to... + it2++; + } + + _m_jointObservationVec.clear(); + + if(_m_jointObservationIndices) + { + while(!_m_jointObservationIndices->empty()) + { + delete (*_m_jointObservationIndices->begin()).second; + _m_jointObservationIndices->erase( _m_jointObservationIndices->begin() ); + } + delete _m_jointObservationIndices; + } +#if 0 + if(_m_jointObservationIndices) + { + vector*>::iterator it3 = _m_jointObservationIndices->begin(); + vector*>::iterator last3 = _m_jointObservationIndices->end(); + while(it3 != last3) + { + delete *it3; //removes the joint observation pointed to... + it3++; + } + } +#endif + delete[] _m_observationStepSize; +} + +//data manipulation (set) functions: + +/** Calls ConstructJointObservationsRecursively on a new (empty) joint + * observation. this function is typically called from the parser + * (parser/ParserDecPOMDPDiscrete.h) */ +size_t MADPComponentDiscreteObservations::ConstructJointObservations() +{ +if(DEBUG_CJO) cerr << "ConstructJointObservations() - called"; + JointObservationDiscrete* jo = new JointObservationDiscrete(); + size_t NRJO = ConstructJointObservationsRecursively(0, *jo, 0); + _m_cachedAllJointObservations=true; + return NRJO; +} + +/** Creates (_m_jointObservationVec) using _m_observationVecs (which + * need to be initialized before calling this function...) */ +size_t MADPComponentDiscreteObservations::ConstructJointObservationsRecursively( + Index curAgentI, JointObservationDiscrete& jo, Index joI) +{ +if(DEBUG_CJO) cerr << ">> MADPComponentDiscreteObservations::ConstructJointObservations(Index "<= _m_observationVecs.size()) + { + stringstream ss; + ss << "ConstructJointObservationsRecursively - current Agent index ("<< + curAgentI<<") out of bounds! (_m_observationVecs contains "<< + "observations for "<<_m_observationVecs.size() << " agents...)\n"; + throw E(ss); + } + ObservationDVec::iterator first = _m_observationVecs[curAgentI].begin(); + ObservationDVec::iterator it = _m_observationVecs[curAgentI].begin(); + ObservationDVec::iterator last = _m_observationVecs[curAgentI].end(); + ObservationDVec::iterator beforelast = _m_observationVecs[curAgentI].end(); + beforelast--; + + if(it == last) + { + stringstream ss; + ss << " empty observation set for agent " << curAgentI; + throw E(ss); + } + //first observation extends the received jo + JointObservationDiscrete* p_joReceivedArgCopy = new + JointObservationDiscrete(jo); + JointObservationDiscrete* p_jo; + + while( it != last) // other observations extend duplicates of jo + { + if(DEBUG_CJO) + cerr << "\nnext observation"; + if(it == first) // + { + if(DEBUG_CJO) cerr << "(first observation - not making copy)\n"; + p_jo = &jo; + } + else if(it == beforelast)//this is the last valid it ->last observation + { + if(DEBUG_CJO) cerr << "(last observation - not making copy)\n"; + p_jo = p_joReceivedArgCopy; //don't make a new copy + } + else //make a new copy + { + if(DEBUG_CJO) cerr << "(intermed. observation - making copy)\n"; + p_jo = new JointObservationDiscrete(*p_joReceivedArgCopy); + } + if(lastAgent) + { + p_jo->SetIndex(joI); + if(DEBUG_CJO) + cerr << "setting index of this joint observation to: "<< joI + <AddIndividualObservation(ai, curAgentI); + + if(lastAgent)//jointObservation is now completed:add it to the jointObservation set. + { + if(DEBUG_CJO) + {cerr<<"INSERTING the joint observation:"; p_jo->Print();cerr<> MADPComponentDiscreteObservations::ConstructJointObservationsRecursively(Index "< thisAgentsObservations; + for(Index i=0;i thisAgentsObservations; + ObservationDiscrete ad(0, name, description); + thisAgentsObservations.push_back(ad); + _m_observationVecs.push_back(thisAgentsObservations); + } + else + { + //we add an observation for this agent - increment his nr_observations + Index newObservationIndex = _m_nrObservations[AI]++; + ObservationDiscrete ad(newObservationIndex, name, description); + _m_observationVecs[AI].push_back(ad); + } +} + +/** When setting to true, a verification of member elements is + * performed. (i.e. a check whether all vectors have the correct size + * and non-zero entries) */ +bool MADPComponentDiscreteObservations::SetInitialized(bool b) +{ + if(b == false) + { + if(_m_initialized == true) + delete [] _m_observationStepSize; + _m_initialized = b; + return true; + } + if(_m_initialized == true && b == true) + { + //first free mem before re-initialize: + delete [] _m_observationStepSize; + } + if(b == true) + { + if(_m_nrObservations.size() == 0) + throw(E("MADPComponentDiscreteObservations::SetInitialized() no observations specified")); + _m_observationStepSize= + IndexTools::CalculateStepSize(_m_nrObservations); + + if(!_m_cachedAllJointObservations) + { + size_t nrJO=1; + size_t prevNrJO=nrJO; + for(Index i=0;i!=_m_nrObservations.size();++i) + { + nrJO*=_m_nrObservations[i]; + // detect overflow + if(nrJO *>(); + } + else + _m_nrJointObservations=_m_jointObservationVec.size(); + _m_initialized = b; + } + return(true); +} + +size_t MADPComponentDiscreteObservations::GetNrObservations(Index agentI) const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteObservations::GetNrObservations("<< + agentI<<") - Error: not initialized. "<= _m_observationVecs.size()) + { + stringstream ss; + ss << "GetObservationIndexByName - Agent index ("<< + agentI<<") out of bounds! (_m_observationVecs contains observations" + <<" for " << _m_observationVecs.size() << " agents...)\n"; + throw E(ss); + } + vector::const_iterator it = + _m_observationVecs[agentI].begin(); + vector::const_iterator last = + _m_observationVecs[agentI].end(); + while(it != last) + { + string s2 = (*it).GetName(); + if(s == s2) + return( (*it).GetIndex() ); + it++; + } + //not found + //return(-1); + stringstream ss; + ss << "GetObservationIndexByName - observation \"" << s << "\" of agent " << + agentI << " not found." << endl; + throw E(ss); +} + + +const ObservationDiscrete* MADPComponentDiscreteObservations::GetObservationDiscrete(Index agentI, Index a) const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteObservations::GetObservationDiscrete("<< + agentI<<","<< a <<") - Error: not initialized. "<& indivObservationIndices)const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteObservations::GetJointObservationIndex("<< + "vector& indivObservationIndices) - Error: not initialized." + <& jo_e, const Scope& agSC) const +{ + vector nr_A_e(agSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + GetNrObservations(), agSC, nr_A_e); + Index joI = IndexTools::IndividualToJointIndices( jo_e, nr_A_e); + return(joI); +} +std::vector MADPComponentDiscreteObservations:: +JointToIndividualObservationIndices( + Index jo_e, const Scope& agSC) const +{ + vector nr_A_e(agSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + GetNrObservations(), agSC, nr_A_e); + vector jo_e_vec = IndexTools::JointToIndividualIndices(jo_e, nr_A_e); + return(jo_e_vec); +} +Index MADPComponentDiscreteObservations:: +JointToRestrictedJointObservationIndex( + Index joI, const Scope& agSc_e ) const +{ + const vector& jo_vec = JointToIndividualObservationIndices(joI); + vector jo_vec_e(agSc_e.size()); + IndexTools::RestrictIndividualIndicesToScope(jo_vec, agSc_e, jo_vec_e); + Index jo_e = IndividualToJointObservationIndices(jo_vec_e, agSc_e); + return(jo_e); + +} + + + + + +string MADPComponentDiscreteObservations::SoftPrint() const +{ + stringstream ss; + ss << "MADPComponentDiscreteObservations::Print()" << endl; + ss << "#joint observations="<::const_iterator f = + _m_observationVecs[agentIndex].begin(); + vector::const_iterator l = + _m_observationVecs[agentIndex].end(); + while(f != l) + { + ss << (*f).SoftPrint() << endl; +// GetName() << " - " << (*f).GetDescription()<::const_iterator jo_it = + _m_jointObservationVec.begin(); + vector::const_iterator jo_last = + _m_jointObservationVec.end(); + while(jo_it != jo_last) + { + ss << (*jo_it)->SoftPrint()< +#include +#include "Globals.h" +#include "JointObservationDiscrete.h" +#include "ObservationDiscrete.h" +#include "IndexTools.h" +#include "EOverflow.h" + +#include + +namespace { + typedef std::vector ObservationDVec; +} + +/** + * \brief MADPComponentDiscreteObservations contains functionality for + * discrete observation spaces. + * + * It implements a part of the + * MultiAgentDecisionProcessDiscreteInterface. */ +class MADPComponentDiscreteObservations +{ + private: + + bool _m_initialized; + bool _m_cachedAllJointObservations; + bool _m_jointIndicesValid; + size_t _m_nrJointObservations; + + /// The stepsize array - used for indiv->joint index calculation. + size_t * _m_observationStepSize; + + /// The vector storing the joint observations + /** To use this, ConstructJointObservations() should be called */ + std::vector _m_jointObservationVec; + + /// When not all joint observations have been created, here we cache + /// the individual indices created by + /// JointToIndividualObservationIndices() + std::map *> *_m_jointObservationIndices; + + /// Recursively creates the joint observations. + size_t ConstructJointObservationsRecursively( Index curAgentI, + JointObservationDiscrete& jo, Index joI); + + std::string SoftPrintObservationSets() const; + std::string SoftPrintJointObservationSet() const; + + /// The number of observations for each agent. + std::vector _m_nrObservations; + /** \brief The vectors of observations (vectors of + * ObservationDiscrete) for each agent.*/ + std::vector > _m_observationVecs; + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + MADPComponentDiscreteObservations(); + + /// Copy constructor. + MADPComponentDiscreteObservations(const MADPComponentDiscreteObservations& a); + + /// Destructor. + virtual ~MADPComponentDiscreteObservations(); + + //operators: + + //data manipulation (set) functions: + /// Sets _m_initialized to b. + bool SetInitialized(bool b); + + /// Sets the number of observations for agent AI. + void SetNrObservations(Index AI, size_t nrO); + + /// Add a new observation with name "name" to the observ. of agent AI. + void AddObservation(Index AI, const std::string &name, + const std::string &description=""); + + /// Recursively constructs all the joint observations. + size_t ConstructJointObservations(); + + //get (data) functions: + + /// Return the number of observations vector. + const std::vector& GetNrObservations() const + {return _m_nrObservations;} + + /// Return the number of observations of agent agentI + size_t GetNrObservations(Index AgentI) const; + + /// Return the number of joint observations. + size_t GetNrJointObservations() const; + + /// Find out if there is an overflow in the joint indices variable. + bool JointIndicesValid() const + {return _m_jointIndicesValid;} + + /// Returns the observation index of the agent I's observation s. + Index GetObservationIndexByName(const std::string &s, Index agentI) const; + + /// Returns the name of a particular observation o of agent i. + std::string GetObservationName(Index o, Index i) const { + return(_m_observationVecs.at(i).at(o).GetName()); } + + /// Returns the name of a particular joint observation o. + std::string GetJointObservationName(Index o) const { + return(_m_jointObservationVec.at(o)->SoftPrint()); } + + /// Return a ref to the a-th observation of agent agentI. + const ObservationDiscrete* GetObservationDiscrete(Index agentI, + Index a) const; + + /// Return a ref to the a-th observation of agent agentI. + const Observation* GetObservation(Index agentI, Index a) const + {return( (Observation*) GetObservationDiscrete(agentI,a) );} + + /// Return a ref to the i-th joint observation. + const JointObservation* GetJointObservation(Index i) const; + + /// Return a ref to the i-th joint observation integer. + const JointObservationDiscrete* GetJointObservationDiscrete(Index i) + const; + + /** \brief Returns the joint observation index that + * corresponds to the vector of specified individual + * observation indices.*/ + Index IndividualToJointObservationIndices(const std::vector& + indivObservationIndices)const; + + /** \brief Returns a vector of indices to indiv. observation + * indicies corr. to joint observation index joI.*/ + const std::vector& + JointToIndividualObservationIndices(Index joI) const + { + if(!_m_jointIndicesValid) + { + throw(EOverflow("MADPComponentDiscreteObservations::JointToIndividualObservationIndices() joint indices are not available, overflow detected")); + } + if(_m_cachedAllJointObservations) + return GetJointObservationDiscrete(joI)-> + GetIndividualObservationDiscretesIndices(); + else if(_m_jointObservationIndices->find(joI)!= + _m_jointObservationIndices->end()) + return(*_m_jointObservationIndices->find(joI)->second); + else // create new + { + std::vector *indices=new std::vector(); + *indices=IndexTools::JointToIndividualIndices(joI, + GetNrObservations()); + _m_jointObservationIndices->insert(make_pair(joI,indices)); + return(*indices); // deleted in dtor + } + } + + Index IndividualToJointObservationIndices( + const std::vector& jo_e, const Scope& agSC) const; + std::vector JointToIndividualObservationIndices( + Index jo_e, const Scope& agSC) const; + Index JointToRestrictedJointObservationIndex( + Index joI, const Scope& agSc_e ) const; + + std::string SoftPrint() const; + void Print() const + {std::cout << SoftPrint();} +}; + + +#endif /* !_MADPCOMPONENTDISCRETEOBSERVATIONS_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.cpp new file mode 100644 index 000000000..261285755 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.cpp @@ -0,0 +1,300 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MADPComponentDiscreteStates.h" +#include + +using namespace std; + +#define DEBUG_MADP_DSTATES 0 + +//Default constructor +MADPComponentDiscreteStates::MADPComponentDiscreteStates() + : + _m_initialized(false) + ,_m_nrStates(0) + ,_m_initialStateDistribution(0) +{ +} + +MADPComponentDiscreteStates::MADPComponentDiscreteStates(size_t nrS) + : + _m_initialized(false) + ,_m_nrStates(nrS) + ,_m_initialStateDistribution(0) +{ + SetNrStates(nrS); + _m_initialized = true; +} + +MADPComponentDiscreteStates::MADPComponentDiscreteStates( + const MADPComponentDiscreteStates& a) +{ + _m_initialized=a._m_initialized; + _m_nrStates=a._m_nrStates; + _m_stateVec=a._m_stateVec; + _m_initialStateDistribution= + new StateDistributionVector(*a._m_initialStateDistribution); +} + + +//Destructor +MADPComponentDiscreteStates::~MADPComponentDiscreteStates() +{ + delete _m_initialStateDistribution; +} + +bool MADPComponentDiscreteStates::SetInitialized(bool b) +{ + if(b) + { + if(_m_nrStates==0) + throw(E("MADPComponentDiscreteStates::SetInitialized problem has 0 states")); + + // alike to Belief::SanityCheck() + + bool validISD=true; + + // check for negative and entries>1 + double sum=0; + for(vector::const_iterator it= + _m_initialStateDistribution->begin(); + it!=_m_initialStateDistribution->end(); ++it) + { + if(*it<0) + validISD=false; + if(*it>1) + validISD=false; + if(std::isnan(*it)) + validISD=false; + sum+=*it; + } + + // check if sums to 1 + if(abs(sum-1)>PROB_PRECISION) + validISD=false; + + // but if it sums to 0, this means the ISD simply has not been set. + // in this case, we set to uniform: + if(abs(sum) < PROB_PRECISION) + { + cerr << "Warning, initial state distribution was not set, assuming uniform..." << endl; + this->SetUniformISD(); + validISD = true; + } + + // check whether the size is correct + if(_m_initialStateDistribution->size()!=_m_nrStates) + validISD=false; + + if(!validISD) + throw(E("MADPComponentDiscreteStates::SetInitialized initial state distribution is not a valid probability distribution")); + } + + _m_initialized = b; + return(b); +} + +void MADPComponentDiscreteStates::AddState(const string &StateName) +{ + if(_m_initialStateDistribution == 0) + _m_initialStateDistribution = new StateDistributionVector(); + + if(!_m_initialized) + { + _m_nrStates=0; + _m_initialStateDistribution->clear(); + _m_stateVec.clear(); + + _m_initialized = true; + } + _m_stateVec.push_back(StateDiscrete( _m_nrStates++, StateName)); + _m_initialStateDistribution->push_back(0.0); +// _m_nrStates++; - already done... +} +void MADPComponentDiscreteStates::SetNrStates(size_t nrS) +{ + _m_nrStates = nrS; + _m_stateVec.clear(); + if(_m_initialStateDistribution == 0) + _m_initialStateDistribution = new StateDistributionVector(); + + _m_initialStateDistribution->clear(); + for(Index i=0; i < nrS; i++) + { + _m_stateVec.push_back(StateDiscrete(i)); + _m_initialStateDistribution->push_back(0.0); + } + _m_initialized = true; +} + +void MADPComponentDiscreteStates::SetUniformISD() +{ + //cout << " MADPComponentDiscreteStates::SetUniformISD called"<::iterator it = _m_initialStateDistribution->begin(); + vector::iterator last = _m_initialStateDistribution->end(); + while(it!=last) + { + *it = uprob; + it++; + } +} + +void MADPComponentDiscreteStates::SetISD(StateDistribution* p) +{ + StateDistributionVector* p2 = dynamic_cast(p); + if (p2 == 0) + throw E("MADPComponentDiscreteStates::SetISD(StateDistribution* p) dynamic cast failed"); + SetISD(p2); +} + +void MADPComponentDiscreteStates::SetISD(vector v) +{ + if(v.size() != _m_nrStates) + throw E("MADPComponentDiscreteStates::SetISD - ERROR: nrStates don't match!"); + + _m_initialStateDistribution->clear(); + *_m_initialStateDistribution = v; +} + +Index MADPComponentDiscreteStates::GetStateIndex(StateDiscrete s) const +{ + return(s.GetIndex()); +} + +/** Throws an exception if there is no state with name s.*/ +Index MADPComponentDiscreteStates::GetStateIndexByName(const string &s) const +{ + if(!_m_initialized) + throw E("MADPComponentDiscreteStates::GetStateIndexByName - not initialized!"); + + vector::const_iterator it = _m_stateVec.begin(); + vector::const_iterator last = _m_stateVec.end(); + while(it != last) + { + string s2 = (*it).GetName(); + if(s == s2) + //if(strcmp(s,s2) == 0)//match + return( (*it).GetIndex() ); + it++; + } + //not found + //return(-1); + stringstream ss; + ss << "GetStateIndexByName - state \"" << s << "\" not found." << endl; + throw E(ss); + +} + + + +double MADPComponentDiscreteStates::GetInitialStateProbability(Index sI) const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteStates::GetInitialStateProbability("<< + sI<<") - Error: not initialized. " <size() ) + { + if(DEBUG_MADP_DSTATES) cout << "MultiAgentDecisionProcess::GetInitialStateProbability - index "<size() <at(sI)); + } + //else + stringstream ss; + ss << "MultiAgentDecisionProcess::GetInitialStateProbability - ERROR GetInitialStateProbability: index out of bounds"; + throw E(ss); +} + + +string MADPComponentDiscreteStates::SoftPrintInitialStateDistribution() const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentDiscreteStates::PrintInitialStateDistribution("<< + ") - Error: not initialized. " <SoftPrint()); +} + +string MADPComponentDiscreteStates::SoftPrintStates() const +{ + stringstream ss; + vector::const_iterator it = _m_stateVec.begin(); + vector::const_iterator last = _m_stateVec.end(); + while(it!=last) + { + ss << (*it).SoftPrint(); + ss << endl; + it++; + } + return(ss.str()); +} +string MADPComponentDiscreteStates::SoftPrint() const +{ + stringstream ss; + if(!_m_initialized) + { + stringstream ss2; + ss2 << "MADPComponentDiscreteStates::SoftPrint("<< + ") - Error: not initialized. " <size();i++) + { + sum+= _m_initialStateDistribution->at(i); + if(randNr<=sum) + { + state=i; + break; + } + } + + return(state); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.h new file mode 100644 index 000000000..af457e320 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentDiscreteStates.h @@ -0,0 +1,135 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MADPCOMPONENTDISCRETESTATES_H_ +#define _MADPCOMPONENTDISCRETESTATES_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" + +#include "StateDiscrete.h" +#include "StateDistributionVector.h" + +/** + * \brief MADPComponentDiscreteStates is a class that represents a discrete + * state space. + * + * It implements a part of the + * MultiAgentDecisionProcessDiscreteInterface. */ +class MADPComponentDiscreteStates +{ + private: + bool _m_initialized; + size_t _m_nrStates; + + ///Returns a string with the states + std::string SoftPrintStates() const; + ///Returns a string with the initial state distribution. + std::string SoftPrintInitialStateDistribution() const; + + /// A vector that contains all the states. + std::vector _m_stateVec; + /// A StateDistributionVector that represents the initial state distribution. + StateDistributionVector* _m_initialStateDistribution; + + protected: + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + MADPComponentDiscreteStates(); + /// Constructor that sets the number of states. + MADPComponentDiscreteStates(size_t nrS); + + /// Copy constructor. + MADPComponentDiscreteStates(const MADPComponentDiscreteStates& a); + + /// Destructor. + virtual ~MADPComponentDiscreteStates(); + + //operators: + + //data manipulation (set) functions: + /// + /** \brief Sets _m_initialized to b. + * + * If no initial state distribution had been set, this function + * will call SetUniformISD(). (To accomodate .pomdp files where + * specification of ISD is optional.) + */ + bool SetInitialized(bool b); + + /// Adds a new state. + void AddState(const std::string &StateName); + + /// Sets the number of states to nrS. + void SetNrStates(size_t nrS); + + /// Sets the initial state distribution to a uniform one. + void SetUniformISD(); + + /// Sets the initial state distribution to v. + void SetISD(StateDistribution* p); + void SetISD(StateDistributionVector* p) + { _m_initialStateDistribution = p;} + void SetISD(std::vector v); + + //get (data) functions: + /// Return the number of states. + size_t GetNrStates() const {return _m_nrStates;}; + + /// Returns the state index of state s. + Index GetStateIndex(StateDiscrete s) const; + + /// Returns the state index of state s. + Index GetStateIndexByName(const std::string &s) const; + + /// Returns a pointer to state i. */ + const State* GetState(Index i) const + {return(&_m_stateVec.at(i)); } + + virtual std::string SoftPrintState(Index sI) const + { return GetStateName(sI);} + /// Returns the name of a particular state i. + std::string GetStateName(Index i) const { + return(_m_stateVec.at(i).SoftPrintBrief()); } + + /// Return the initial state distribution. + double GetInitialStateProbability(Index sI) const; + + /// Returns the complete initial state distribution. + //std::vector GetISD() const { return(_m_initialStateDistribution); } + virtual StateDistribution* GetISD() + { return(_m_initialStateDistribution); } + virtual StateDistribution* GetISD() const + { return(_m_initialStateDistribution); } + + /// Sample a state according to the initial state PDF. + Index SampleInitialState() const; + + std::string SoftPrint() const; + void Print() const + {std::cout << SoftPrint();} +}; + + +#endif /* !_MADPCOMPONENTDISCRETESTATES_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.cpp new file mode 100644 index 000000000..fd9d5133f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.cpp @@ -0,0 +1,335 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MADPComponentFactoredStates.h" +#include "StateFactorDiscrete.h" +#include +#include "FSDist_COF.h" +#include "State.h" + +using namespace std; + +#define DEBUG_MCFS 1 +//Default constructor +MADPComponentFactoredStates::MADPComponentFactoredStates() + : + _m_initialized(false) + , _m_nrStates(0) + , _m_nrStateFactors(0) + , _m_stepSize(0) + , _m_initialStateDistribution(0) +{ + _m_jointStatesMap=new map, State*>; +} +//Copy constructor. +MADPComponentFactoredStates::MADPComponentFactoredStates(const MADPComponentFactoredStates& o) +{ +} +//Destructor +MADPComponentFactoredStates::~MADPComponentFactoredStates() +{ + for(Index i=0;i!=_m_stateFactors.size();++i) + delete _m_stateFactors.at(i); + delete _m_initialStateDistribution; + + while( !_m_jointStatesMap->empty() ) + { + delete (*_m_jointStatesMap->begin()).second; + _m_jointStatesMap->erase( _m_jointStatesMap->begin() ); + } + delete _m_jointStatesMap; + delete [] _m_stepSize; +} +//Copy assignment operator +MADPComponentFactoredStates& MADPComponentFactoredStates::operator= (const MADPComponentFactoredStates& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + return *this; +} + +string MADPComponentFactoredStates::SoftPrintInitialStateDistribution() const +{ + AssureInitialized(":SoftPrintInitialStateDistribution()"); + if(_m_initialStateDistribution == 0) + throw E("MADPComponentFactoredStates::SoftPrintInitialStateDistribution No ISD set yet (use SetISD first) !"); + return( _m_initialStateDistribution->SoftPrint() ); +} + +string MADPComponentFactoredStates::SoftPrintStates() const +{ + AssureInitialized("SoftPrintStates()"); + stringstream ss; + ss << "Number of state factors: " << _m_nrStateFactors <::const_iterator it = _m_stateFactors.begin(); + vector::const_iterator last = _m_stateFactors.end(); + Index i=0; + while(it!=last) + { + ss << "SF index "<SoftPrint(); + ss << endl; + it++; + } + return(ss.str()); +} +string MADPComponentFactoredStates::SoftPrintState(Index sI) const +{ + stringstream ss; + vector s_vec = StateIndexToFactorValueIndices(sI); + ss << SoftPrintVector(s_vec); + return(ss.str()); + +} +string MADPComponentFactoredStates::SoftPrint() const +{ + stringstream ss; + AssureInitialized("SoftPrint()"); + ss << "- nr. states="<< GetNrStates()<& sfValues) const +{ + if(sfSc.size() != sfValues.size()) + throw E("MADPComponentFactoredStates::SoftPrintPartialState: sfSc.size() != sfValues.size()"); + + stringstream ss; + ss << "<"; + + for(Index scI=0; scI < sfSc.size(); scI++) + { + Index sfI = sfSc.at(scI); + Index sfValI = sfValues.at(scI); + StateFactorDiscrete* sfac = _m_stateFactors.at(sfI); + string name = sfac->GetName(); + string value = sfac->GetStateFactorValue(sfValI); + if(scI > 0) + ss << ","; + ss << name << "=" << value; + } + ss << ">"; + return(ss.str()); +} + +void MADPComponentFactoredStates::AssureInitialized(string caller) const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentFactoredStates::"<SetUniform(); +/* + size_t nrS = GetNrStates(); + double uprob = 1.0 / ((double)nrS); +#if 0 && DEBUG_MCFS + cout << "uprob="<::iterator it = _m_initialStateDistribution->begin(); + vector::iterator last = _m_initialStateDistribution->end(); + while(it!=last) + { + *it = uprob; + it++; + } +#if 0 && DEBUG_MCFS + PrintDebugStuff(); +#endif +*/ +} + +/* +void MADPComponentFactoredStates::SetISD(const vector& v) +{ + AssureInitialized("SetISD(vector v)"); +// _m_initialStateDistribution->clear(); + _m_initialStateDistribution = v; +} +*/ + +double MADPComponentFactoredStates::GetInitialStateProbability(Index sI) const +{ + AssureInitialized("GetInitialStateProbability(Index sI)"); + vector sfacValues = StateIndexToFactorValueIndices(sI); + return(_m_initialStateDistribution->GetProbability(sfacValues) ); +} + + +Index MADPComponentFactoredStates::SampleInitialState() const +{ + if(!_m_initialized) + { + stringstream ss; + ss << "MADPComponentFactoredStates::SampleInitialState()" << + " - Error: not initialized. " << endl; + throw E(ss); + } + + vector sfacValues; + SampleInitialState(sfacValues); + Index state = FactorValueIndicesToStateIndex(sfacValues); + return(state); +} + +void MADPComponentFactoredStates::SampleInitialState(vector &sIs) const +{ + sIs=_m_initialStateDistribution->SampleState(); +} + +Index MADPComponentFactoredStates:: +AddStateFactor(const string &n, const string &d) +{ + if(_m_initialized) + throw E("Can't add state factor to initialized MADPComponentFactoredStates!"); + + _m_stateFactors.push_back( new StateFactorDiscrete(n,d) ); + _m_sfacDomainSizes.push_back(0); + Index i = _m_nrStateFactors++; + _m_allStateFactorScope.Insert(i); + return i; + +} + +Index MADPComponentFactoredStates:: +AddStateFactorValue(Index sf, const string &v) +{ + Index i = _m_stateFactors.at(sf)->AddStateFactorValue(v); + _m_sfacDomainSizes.at(sf)++; + return(i); + +} + +void MADPComponentFactoredStates:: +RemoveStateFactor(const Index sf) +{ + delete(_m_stateFactors.at(sf)); + _m_stateFactors.erase(_m_stateFactors.begin()+sf); + _m_sfacDomainSizes.erase(_m_sfacDomainSizes.begin()+sf); + _m_nrStateFactors--; + _m_allStateFactorScope.Remove(sf); + +} + + +Index MADPComponentFactoredStates:: +FactorValueIndicesToStateIndex(const vector& s_e_vec, + const Scope& sfSC) const +{ + vector nr_sf_e(sfSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + _m_sfacDomainSizes, sfSC, nr_sf_e); + Index sI_e = IndexTools::IndividualToJointIndices( s_e_vec, nr_sf_e); + return(sI_e); +} + +vector MADPComponentFactoredStates:: +StateIndexToFactorValueIndices(Index s_e, + const Scope& sfSC) const +{ + vector nr_sf_e(sfSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + _m_sfacDomainSizes, sfSC, nr_sf_e); + vector s_e_vec = IndexTools::JointToIndividualIndices(s_e, nr_sf_e); + return(s_e_vec); +} + +const State* MADPComponentFactoredStates::GetState(Index i) const +{ + vector sIs=StateIndexToFactorValueIndices(i); + + // we cached the ones already asked for + if(_m_jointStatesMap->find(sIs)!=_m_jointStatesMap->end()) + return(_m_jointStatesMap->find(sIs)->second); + else // create new joint state and add it to cache + { + State *state=new State; // not a StateDiscrete, since the + // index might overflow + string name=""; + for(Index y = 0; y < _m_nrStateFactors; y++) + { + if(y>0) + name+="_"; + name+=GetStateFactorDiscrete(y)->GetStateFactorValue(sIs[y]); + } + state->SetName(name); + state->SetDescription(""); + _m_jointStatesMap->insert(make_pair(sIs,state)); + return(state); + } +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.h new file mode 100644 index 000000000..b5ca14d0e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MADPComponentFactoredStates.h @@ -0,0 +1,222 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MADPCOMPONENTFACTOREDSTATES_H_ +#define _MADPCOMPONENTFACTOREDSTATES_H_ 1 +/* the include directives */ +#include +#include "Globals.h" +#include "IndexTools.h" +#include +#include "State.h" + +//need to include (need to know that it is a StateDistribution): +#include "FactoredStateDistribution.h" + +class StateFactorDiscrete; + +/**\brief MADPComponentFactoredStates is a class that represents a factored + * states space. + * + * It implements part of the MultiAgentDecisionProcessDiscreteInterface + * (at the moment it implements the same part as MADPComponentDiscreteStates, + * although this class offers more functions. Eventually a + * MultiAgentDecisionProcessFactoredStatesInterface might be made.) + * + * A factored state is one that is defined as an assignment to a set of + * state variables, or factors. + * A factored state space is defined as the cross-product of the set of + * state factors. This is in contrast to a 'flat' state space that explicitly + * enumarates all possible states. + * + * */ +class MADPComponentFactoredStates +// : +// virtual public MultiAgentDecisionProcessDiscreteFactoredStatesInterface + //which is partly implemented by this class +{ + private: + + + + ///Is the state space initialized? (is it completely specified?) + bool _m_initialized; + ///Var that keeps track of the nr. of 'flat' states. + size_t _m_nrStates; + + ///The number of state factors + size_t _m_nrStateFactors; + + ///Vector with size of the domain of each state factor (the nr. values) + /**This is used to compute state indices.*/ + std::vector _m_sfacDomainSizes; + ///Array caching the stepsize - used for computing indices. + /**Computed during initialization.*/ + size_t* _m_stepSize; + /// A std::vector with a pointers to the different state factors. + /**The state factors themself specify their name, description, + * (size of) their domain, etc. + */ + std::vector _m_stateFactors; + + //store the scope containing all state factors for convenience + Scope _m_allStateFactorScope; + + FactoredStateDistribution* _m_initialStateDistribution; + + ///Assure that \em this is inititialed. (throw E otherwise) + void AssureInitialized(std::string caller="unspecified caller") const; + + std::map, State*> *_m_jointStatesMap; + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + MADPComponentFactoredStates(); + /// Copy constructor. + MADPComponentFactoredStates(const MADPComponentFactoredStates& a); + /// Destructor. + virtual ~MADPComponentFactoredStates(); + /// Copy assignment operator + MADPComponentFactoredStates& operator= (const + MADPComponentFactoredStates& o); + + //operators: + + //data manipulation (set) functions: + /**\brief Initializes the states space. Should be called when all + * factors are added. + * + * From _m_stateFactors, this function initializes all the other + * member variables. + */ + bool SetInitialized(bool b); + //void Initialize(); + + ///Adds a factor to the statespace. + /**Can only be used when not \em this is initialized yet. + * returns the index of the state factor. + */ + Index AddStateFactor(const std::string &n="undef. name", + const std::string &d="undef. descr."); + + ///Adds a state factor value \a to the \a sf -th state factor. + /**Can only be used when not \em this is initialized yet. + * returns the index of the state factor value. * + */ + Index AddStateFactorValue(Index sf, const std::string &v="undef"); + + //get (data) functions: + + const StateFactorDiscrete* GetStateFactorDiscrete(Index sfacI) const + { return _m_stateFactors.at(sfacI); } + + /// Removes state factor sf from the problem description, and fixes + /// indices accordingly. + void RemoveStateFactor(Index sf); + + + //implementing MultiAgentDecisionProcessDiscreteFactoredStatesInterface + + + + ///SoftPrints the factored state space. + std::string SoftPrintStates() const; + ///SoftPrints a particular state + std::string SoftPrintState(Index sI) const; + ///SoftPrints the ISD + std::string SoftPrintInitialStateDistribution() const; + ///SoftPrints the factored state space and ISD. + std::string SoftPrint() const; + std::string SoftPrintPartialState(const Scope& sfSc, + const std::vector& sfValues) const; + + +//implement the MultiAgentDecisionProcessDiscreteFactoredStatesInterface! + + /// return the number of state factors + size_t GetNrStateFactors () const + { return _m_nrStateFactors;} + const Scope& GetAllStateFactorScope() const + { return _m_allStateFactorScope; } + /// return the std::vector with the number of values per factor + const std::vector< size_t >& GetNrValuesPerFactor() const + { return _m_sfacDomainSizes;} + /// return the std::vector with the number of values per factor + const size_t GetNrValuesForFactor(Index sf) const + { return _m_sfacDomainSizes.at(sf);} + /// convert a state index to the std::vector of factor value indices. + std::vector StateIndexToFactorValueIndices(Index stateI) const + { return IndexTools::JointToIndividualIndicesStepSize( + stateI, _m_stepSize, _m_nrStateFactors);} + /// returns the index of the value of factor fI + Index StateIndexToFactorValueIndex(Index fI, Index s) const + { return StateIndexToFactorValueIndices(s).at(fI); } + ///convert std::vector of (indices of) factor values to (flat) state index. + Index FactorValueIndicesToStateIndex(const std::vector &fv) const + { return IndexTools::IndividualToJointIndicesStepSize(fv, _m_stepSize);} + + //functions with explicitly specified scope + Index FactorValueIndicesToStateIndex(const std::vector& s_e_vec, + const Scope& sfSC) const; + std::vector StateIndexToFactorValueIndices(Index s_e, + const Scope& sfSC) const; + + +//implement the MultiAgentDecisionProcessDiscreteInterface + /// Return the number of states. + size_t GetNrStates() const; + + ///Returns a pointer to state i. + /**This function is defined in + * MultiAgentDecisionProcessDiscreteInterface. + * We cache any requests made on the fly. + */ + const State* GetState(Index i) const; + + Index SampleInitialState () const; + void SampleInitialState(std::vector &sIs) const; + double GetInitialStateProbability (Index sI) const; + virtual const FactoredStateDistribution* GetFactoredISD () const + {return _m_initialStateDistribution;} + virtual FactoredStateDistribution* GetFactoredISD () + {return _m_initialStateDistribution;} + virtual StateDistribution* GetISD () const + {return _m_initialStateDistribution;} + +//convenience, but not require by interface + /// Sets the initial state distribution to a uniform one. + void SetUniformISD(); + //void SetISD(const std::vector& v); + void SetISD( FactoredStateDistribution* p) + { _m_initialStateDistribution = p;} + + + + void PrintDebugStuff() const; +}; + +inline +size_t MADPComponentFactoredStates::GetNrStates() const +{return _m_nrStates;}; + +#endif /* !_MADPCOMPONENTFACTOREDSTATES_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.cpp new file mode 100644 index 000000000..3ab96adbe --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.cpp @@ -0,0 +1,116 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MultiAgentDecisionProcess.h" +#include "Scope.h" + +using namespace std; + +#define DEBUG_MADP 0 + +MultiAgentDecisionProcess::MultiAgentDecisionProcess(size_t nrAgents, + const string &name, const string &description, const string &pf) : + _m_name(name, description), + _m_nrAgents(nrAgents) + ,_m_allAgentsScope("<>") + ,_m_problemFile(pf) +{ + InitializeUnixName(); + for(Index i=0; i") + ,_m_problemFile(pf) +{ + InitializeUnixName(); +} + +void MultiAgentDecisionProcess::InitializeUnixName() +{ + // strip everything before and including the last / + string unixName=_m_problemFile.substr(_m_problemFile.find_last_of('/') + 1); + + // and after the last . + _m_unixName=unixName.substr(0,unixName.find_last_of('.')); +} + +MultiAgentDecisionProcess::~MultiAgentDecisionProcess() +{ + _m_problemFile.clear(); +} + +size_t MultiAgentDecisionProcess::GetNrAgents() const +{ + return(_m_nrAgents); +} + +void MultiAgentDecisionProcess::SetNrAgents(size_t nrAgents) +{ + _m_nrAgents = nrAgents; + _m_agents.clear(); + for(Index i = 0; i < nrAgents; i++) + { + _m_agents.push_back(Agent(i)); + _m_allAgentsScope.Insert(i); + } +} + +void MultiAgentDecisionProcess::AddAgent(string name) +{ + Index agI = _m_nrAgents++; + _m_agents.push_back( Agent(agI, name) ); + _m_allAgentsScope.Insert(agI); +} + +Index MultiAgentDecisionProcess::GetAgentIndexByName(const string &s) const +{ + vector::const_iterator it; + for(it = _m_agents.begin(); it != _m_agents.end(); it++) + { + string s2 = (*it).GetName(); + if(s == s2) + return( (*it).GetIndex() ); + } + //not found + stringstream ss; + ss << "GetAgentIndexByName - agent \"" << s << " not found." << endl; + throw E(ss.str().c_str()); +} + +string MultiAgentDecisionProcess::GetAgentNameByIndex(Index i) const +{ + return _m_agents[i].GetName(); +} + +string MultiAgentDecisionProcess::GetProblemFile() const +{ + return(_m_problemFile); +} + +string MultiAgentDecisionProcess::SoftPrint() const +{ + stringstream ss; + ss << "Problem:"<< _m_name.GetName()<< endl; + ss << "descr.:"<< _m_name.GetDescription() << endl; + ss << "nrAgents=" << _m_nrAgents << endl; + ss << "problem file=" << _m_problemFile << endl; + ss << "unixName=" << _m_unixName << endl; + return(ss.str()); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.h new file mode 100644 index 000000000..d8f3e962b --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcess.h @@ -0,0 +1,131 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MULTIAGENTDECISIONPROCESS_H_ +#define _MULTIAGENTDECISIONPROCESS_H_ 1 + +/* the include directives */ +#include +#include + +#include "NamedDescribedEntity.h" +#include "MultiAgentDecisionProcessInterface.h" +#include "Agent.h" +#include "Globals.h" +#include "Scope.h" + +/**\brief MultiAgentDecisionProcess is an class that defines the primary + * properties of a decision process. + * + * These primary properties are: + * -the number of agents + * -possibly, a vector of (named) agents + * -the filename to be parsed, if applicable. + * + * It is the only current implementation of MultiAgentDecisionProcessInterface + * */ +class MultiAgentDecisionProcess : + virtual public MultiAgentDecisionProcessInterface + //,public NamedDescribedEntity +{ + private: + NamedDescribedEntity _m_name; + + void InitializeUnixName(); + + protected: + /**The number of agents participating in the MADP.*/ + size_t _m_nrAgents; + /**Vector containing Agent objects, which are indexed named entities.*/ + std::vector _m_agents; + /**Scope containing all agents - usefull sometimes*/ + Scope _m_allAgentsScope; + + /**String holding the filename of the problem file to be parsed - if + * applicable.*/ + std::string _m_problemFile; + + /**String for the unix name of the MADP.*/ + std::string _m_unixName; + + public: + // Constructor, destructor and copy assignment. + /// Constructor. + MultiAgentDecisionProcess( + size_t nrAgents, + const std::string &name="received unspec. by MultiAgentDecisionProcess", + const std::string &description="received unspec. by MultiAgentDecisionProcess", + const std::string &pf="received unspec. by MultiAgentDecisionProcess"); + + /// Default Constructor without specifying the number of agents. + MultiAgentDecisionProcess( + const std::string &name="received unspec. by MultiAgentDecisionProcess", + const std::string &description="received unspec. by MultiAgentDecisionProcess", + const std::string &pf="received unspec. by MultiAgentDecisionProcess"); + + ///Destructor. + virtual ~MultiAgentDecisionProcess(); + + /**Sets the number of agents. this creates nrAgents unnamed agents.*/ + void SetNrAgents(size_t nrAgents); + /**Add a new agent with name "name" to the MADP. + * NOTE: This increments the number of agents as well!*/ + void AddAgent(std::string name); + + /**Returns the index of an agent given its name, if it exists. */ + Index GetAgentIndexByName(const std::string &s) const; + + /**Returns the name of the agent at the given index. */ + std::string GetAgentNameByIndex(Index i) const; + + /**Returns the number of agents in this MultiAgentDecisionProcess. */ + size_t GetNrAgents() const; + + const Scope& GetAllAgentScope() const + {return _m_allAgentsScope;} + + /**Returns the name of the problem file. */ + std::string GetProblemFile() const; + + /// \brief Returns a name which can be in unix path, by + /// default the base part of the problem filename. + std::string GetUnixName() const { return(_m_unixName); } + + /// Sets the name which can be used inin unix paths. + void SetUnixName(std::string unixName) { _m_unixName=unixName; } + + /** Prints some information on the MultiAgentDecisionProcess.*/ + std::string SoftPrint() const; + void Print() const + {std::cout << SoftPrint();} + + // forwards to _m_name - remove these as much as possible! + std::string GetName() const {return _m_name.GetName();}; + std::string GetDescription() const {return _m_name.GetDescription();}; + void SetName(const std::string &name) {_m_name.SetName(name);} + void SetDescription(const std::string &description){_m_name.SetDescription(description);} + //virtual std::string SoftPrint() const; + //virtual std::string SoftPrintBrief() const; + //void Print() const {std::cout << SoftPrint() << std::endl; } + //void PrintBrief() const {std::cout << SoftPrintBrief() << std::endl; } +}; + +#endif /* !_MULTIAGENTDECISIONPROCESS_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.cpp new file mode 100644 index 000000000..255892669 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.cpp @@ -0,0 +1,362 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MultiAgentDecisionProcessDiscrete.h" +#include "ObservationModelMapping.h" +#include "ObservationModelMappingSparse.h" +#include "EventObservationModelMapping.h" +#include "EventObservationModelMappingSparse.h" +#include "TransitionModelMapping.h" +#include "TransitionModelMappingSparse.h" +#include + +#include "TGet.h" +#include "OGet.h" + + +using namespace std; + +#define DEBUG_MADPD 0 + +MultiAgentDecisionProcessDiscrete::MultiAgentDecisionProcessDiscrete( + const string &name, const string &descr, const string &pf) : + MultiAgentDecisionProcessDiscreteInterface(), + MultiAgentDecisionProcess(name, descr, pf) +{ + _m_initialized = false; + _m_sparse = false; + _m_eventObservability = false; + _m_p_tModel = 0; + _m_p_oModel = 0; + _m_falseNegativeObs = -1; +} + +MultiAgentDecisionProcessDiscrete::MultiAgentDecisionProcessDiscrete( + size_t nrAgents, size_t nrS, + const string &name, const string &descr, const string &pf) : + MultiAgentDecisionProcessDiscreteInterface(), + MultiAgentDecisionProcess(nrAgents,name, descr, pf), + _m_S(nrS) +{ + _m_initialized = false; + _m_sparse = false; + _m_eventObservability = false; + _m_p_tModel = 0; + _m_p_oModel = 0; + _m_falseNegativeObs = -1; +} + +MultiAgentDecisionProcessDiscrete::MultiAgentDecisionProcessDiscrete( + const MultiAgentDecisionProcessDiscrete& a) : + MultiAgentDecisionProcessDiscreteInterface(a), + MultiAgentDecisionProcess(a) +{ + _m_S=a._m_S; + _m_A=a._m_A; + _m_O=a._m_O; + _m_initialized=a._m_initialized; + _m_sparse=a._m_sparse; + _m_eventObservability=a._m_eventObservability; + _m_p_tModel=a._m_p_tModel->Clone(); + _m_p_oModel=a._m_p_oModel->Clone(); + _m_falseNegativeObs = a._m_falseNegativeObs; +} + +MultiAgentDecisionProcessDiscrete::~MultiAgentDecisionProcessDiscrete() +{ + if(DEBUG_MADPD) + cout << "deleting MultiAgentDecisionProcessDiscrete " + << "(deleting T and O model )"<SoftPrint(); + ss << "Observation model: " << endl; + ss << _m_p_oModel->SoftPrint(); + } + return(ss.str()); +} + +bool MultiAgentDecisionProcessDiscrete::SanityCheck() +{ + size_t nrJA=GetNrJointActions(), + nrS=GetNrStates(), + nrJO=GetNrJointObservations(); + + double sum,p; + bool sane=true; + + // check transition model + for(Index a=0;a(1.0 + PROB_PRECISION/2)) || + (sum < (1.0 - PROB_PRECISION/2))) + { + sane=false; + stringstream ss; + //string float_str; + char float_str[30]; + sprintf(float_str, "%10.10f", sum); + ss << "MultiAgentDecisionProcessDiscrete::SanityCheck failed:"<< + " transition does not sum to 1 but to:\n" << float_str << + "\n for (s,a)==(" << _m_S.GetStateName(from) << "[" << from << + "]," << _m_A.GetJointActionName(a) << "[" << a << "])"; + throw E(ss); + } + } + } + + // check observation model + + for(Index from=0;from(1.0 + PROB_PRECISION/2)) || + (sum < (1.0 - PROB_PRECISION/2))) + { + char float_str[30]; + sprintf(float_str, "%10.10f", sum); + sane=false; + stringstream ss; + ss << "MultiAgentDecisionProcessDiscrete::SanityCheck " + << "failed: observation does not sum to 1 but to \n" + << float_str << "\n for (s',a)==(" << _m_S.GetStateName(to) + << "[" << to << "]," + << _m_A.GetJointActionName(a) + << "[" << a << "])"; + throw E(ss); + } + } + } + if(!_m_eventObservability) break; + } + + return(sane); +} + + +void MultiAgentDecisionProcessDiscrete::SetSparse(bool sparse) +{ + _m_sparse=sparse; +} + +void MultiAgentDecisionProcessDiscrete::SetEventObservability(bool eventO) +{ + _m_eventObservability=eventO; +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.h new file mode 100644 index 000000000..8f7f86de9 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscrete.h @@ -0,0 +1,343 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MULTIAGENTDECISIONPROCESSDISCRETE_H_ +#define _MULTIAGENTDECISIONPROCESSDISCRETE_H_ 1 + +/* the include directives */ +#include +#include + +#include "MultiAgentDecisionProcess.h" +#include "MultiAgentDecisionProcessDiscreteInterface.h" +#include "MADPComponentDiscreteActions.h" +#include "MADPComponentDiscreteObservations.h" +#include "MADPComponentDiscreteStates.h" +#include "StateDistributionVector.h" +#include "StateDistribution.h" +class TGet; + +/**\brief MultiAgentDecisionProcessDiscrete is defines the primary properties + * of a discrete decision process. + * + * MultiAgentDecisionProcessDiscrete is defines the primary properties + * of a discrete decision process. + * + * It extends MultiAgentDecisionProcess, MADPComponentDiscreteActions and + * MADPComponentDiscreteObservations, + * such that, for each of the MultiAgentDecisionProcess::GetNrAgents() it stores + * -the discrete action sets + * -the discrete observation sets + * + * It also extends MADPComponentDiscreteStates such that a discrete state space + * is implemented. + * + * Finally, this class itself stores (pointers to) + * -the transition model + * -the observation model */ +class MultiAgentDecisionProcessDiscrete : + virtual public MultiAgentDecisionProcessDiscreteInterface, + virtual public MultiAgentDecisionProcessInterface, + public MultiAgentDecisionProcess +{ +private: + + MADPComponentDiscreteStates _m_S; + MADPComponentDiscreteActions _m_A; + MADPComponentDiscreteObservations _m_O; + + ///Boolean to indicate whether this MADPDiscrete has been initialized. + bool _m_initialized; + + /**\brief Boolean that controls whether sparse transition and + * observation models are used. + */ + bool _m_sparse; + + /**\brief Boolean that controls whether the observation model is defined over events. + */ + bool _m_eventObservability; + + /**\brief Check whether models appear valid probability + * distributions. + * + * This is a private function as to avoid + * doubts as to what is and is not checked. (when called from + * a derived class, no derived features are checked).*/ + bool SanityCheck(void); + + /// Pointer to transition model + TransitionModelDiscrete* _m_p_tModel; + + /// Pointer to observation model + ObservationModelDiscrete* _m_p_oModel; + + /**\brief An index representing false negative observations, which are seen by + * planners, but not by agents. This allows us to model and simulate unobservable transitions. + * A negative value indicates that this property has not been set. + */ + int _m_falseNegativeObs; + +protected: + + //data manipulation (set) functions: + /**\brief Sets _m_initialized to b. + * + * When setting to true, a verification of + * member elements is performed. (i.e. a check whether all vectors + * have the correct size and non-zero entries) */ + bool SetInitialized(bool b); +public: + // Constructor, destructor and copy assignment. + + /**\brief Constructor that sets the + * \li name + * \li description + * \li problem file for the MADP.*/ + MultiAgentDecisionProcessDiscrete( + const std::string &name="received unspec. by MultiAgentDecisionProcessDiscrete", + const std::string &descr="received unspec.by MultiAgentDecisionProcessDiscrete", + const std::string &pf="received unspec. by MultiAgentDecisionProcessDiscrete"); + /**\brief Constructor that sets the + * \li nrAgents + * \li nrStates + * \li name + * \li description + * \li problem file for the MADP.*/ + MultiAgentDecisionProcessDiscrete( + size_t nrAgents, size_t nrS, + const std::string &name="received unspec. by MultiAgentDecisionProcessDiscrete", + const std::string &descr="received unspec.by MultiAgentDecisionProcessDiscrete", + const std::string &pf="received unspec. by MultiAgentDecisionProcessDiscrete"); + + /// Copy constructor. + MultiAgentDecisionProcessDiscrete(const MultiAgentDecisionProcessDiscrete& a); + + ///Destructor. + ~MultiAgentDecisionProcessDiscrete(); + + size_t GetNrStates() const { return(_m_S.GetNrStates()); } + const State* GetState(Index i) const { return(_m_S.GetState(i)); } + std::string SoftPrintState(Index sI) const { return(_m_S.SoftPrintState(sI)); } + double GetInitialStateProbability(Index sI) const { return(_m_S.GetInitialStateProbability(sI)); } + StateDistribution* GetISD() const { return(_m_S.GetISD()); } + Index SampleInitialState() const { return(_m_S.SampleInitialState()); } + + const std::vector& GetNrActions() const { return(_m_A.GetNrActions()); } + size_t GetNrActions(Index AgentI) const { return(_m_A.GetNrActions(AgentI)); } + size_t GetNrJointActions() const { return(_m_A.GetNrJointActions()); } + size_t GetNrJointActions(const Scope& agScope) const { return(_m_A.GetNrJointActions(agScope)); } + const Action* GetAction(Index agentI, Index a) const { return(_m_A.GetAction(agentI,a)); } + const JointAction* GetJointAction(Index i) const { return(_m_A.GetJointAction(i)); } + Index IndividualToJointActionIndices(const Index* AI_ar) const + { return(_m_A.IndividualToJointActionIndices(AI_ar)); } + Index IndividualToJointActionIndices(const std::vector& indivActionIndices) const + { return(_m_A.IndividualToJointActionIndices(indivActionIndices)); } + const std::vector& JointToIndividualActionIndices(Index jaI) const + { return(_m_A.JointToIndividualActionIndices(jaI)); } + Index IndividualToJointActionIndices(const std::vector& ja_e, const Scope& agSC) const + { return(_m_A.IndividualToJointActionIndices(ja_e, agSC)); } + std::vector JointToIndividualActionIndices(Index ja_e, const Scope& agSC) const + { return(_m_A.JointToIndividualActionIndices(ja_e, agSC)); } + Index JointToRestrictedJointActionIndex(Index jaI, const Scope& agSc_e ) const + { return(_m_A.JointToRestrictedJointActionIndex(jaI, agSc_e)); } + + const std::vector& GetNrObservations() const { return(_m_O.GetNrObservations()); } + size_t GetNrObservations(Index AgentI) const { return(_m_O.GetNrObservations(AgentI)); } + size_t GetNrJointObservations() const { return(_m_O.GetNrJointObservations()); } + const Observation* GetObservation(Index agentI, Index a) const + { return(_m_O.GetObservation(agentI,a)); } + const JointObservation* GetJointObservation(Index i) const + { return(_m_O.GetJointObservation(i)); } + Index IndividualToJointObservationIndices(const std::vector& indivObservationIndices) const + { return(_m_O.IndividualToJointObservationIndices(indivObservationIndices)); } + const std::vector& JointToIndividualObservationIndices(Index joI) const + { return(_m_O.JointToIndividualObservationIndices(joI)); } + Index IndividualToJointObservationIndices( + const std::vector& jo_e, const Scope& agSC) const + { return(_m_O.IndividualToJointObservationIndices(jo_e,agSC)); } + std::vector JointToIndividualObservationIndices(Index jo_e, const Scope& agSC) const + { return(_m_O.JointToIndividualObservationIndices(jo_e,agSC)); } + Index JointToRestrictedJointObservationIndex(Index joI, const Scope& agSc_e ) const + { return(_m_O.JointToRestrictedJointObservationIndex(joI,agSc_e)); } + + + void SetNrStates(size_t nrS) { _m_S.SetNrStates(nrS); } + void AddState(const std::string &StateName) { _m_S.AddState(StateName); } + void SetISD(StateDistribution* p) { _m_S.SetISD(p); } +// void SetISD(StateDistributionVector* p) { _m_S.SetISD(p); } + void SetUniformISD() { _m_S.SetUniformISD(); } + Index GetStateIndexByName(const std::string &s) const + { return(_m_S.GetStateIndexByName(s)); } + bool SetStatesInitialized(bool b) { return(_m_S.SetInitialized(b)); } + + void SetNrObservations(Index AI, size_t nrO) { _m_O.SetNrObservations(AI,nrO); } + void AddObservation(Index AI, const std::string &name, const std::string &description="") + { _m_O.AddObservation(AI,name, description); } + const ObservationDiscrete* GetObservationDiscrete(Index agentI, + Index a) const + { return(_m_O.GetObservationDiscrete(agentI,a)); } + size_t ConstructJointObservations() { return(_m_O.ConstructJointObservations()); } + Index GetObservationIndexByName(const std::string &o, Index agentI) const + { return(_m_O.GetObservationIndexByName(o,agentI)); } + bool SetObservationsInitialized(bool b) { return(_m_O.SetInitialized(b)); } + + void SetNrActions(Index AI, size_t nrA) { _m_A.SetNrActions(AI,nrA); } + void AddAction(Index AI, const std::string &name, const std::string &description="") + { _m_A.AddAction(AI,name, description); } + const ActionDiscrete* GetActionDiscrete(Index agentI, Index a) const + { return(_m_A.GetActionDiscrete(agentI,a)); } + size_t ConstructJointActions() { return(_m_A.ConstructJointActions()); } + Index GetActionIndexByName(const std::string &a, Index agentI) const + { return(_m_A.GetActionIndexByName(a,agentI)); } + bool SetActionsInitialized(bool b) { return(_m_A.SetInitialized(b)); } + + /**\brief A function that can be called by other classes in order to + * request a MultiAgentDecisionProcessDiscrete to (try to) + * initialize.*/ + bool Initialize() + {return SetInitialized(true);} + + ///Creates a new transition model mapping. + void CreateNewTransitionModel(); + ///Creates a new observation model mapping. + void CreateNewObservationModel(); + + ///Set the probability of successor state sucSI: P(sucSI|sI,jaI). + void SetTransitionProbability(Index sI, Index jaI, Index sucSI, + double p); + + ///Set the probability of joint observation joI: P(joI|jaI,sucSI). + void SetObservationProbability(Index jaI, Index sucSI, Index joI, + double p); + void SetObservationProbability(Index sI, Index jaI, Index sucSI, Index joI, + double p); + // 'get' functions: + ///Return the probability of successor state sucSI: P(sucSI|sI,jaI). + double GetTransitionProbability(Index sI, Index jaI, Index sucSI) + const; + + TGet* GetTGet() const; + OGet* GetOGet() const; + + ///Return the probability of joint observation joI: P(joI|jaI,sucSI). + double GetObservationProbability(Index jaI, Index sucSI, Index joI) + const; + double GetObservationProbability(Index sI, Index jaI, Index sucSI, Index joI) + const; + + /// Sample a successor state. + Index SampleSuccessorState(Index sI, Index jaI) const; + + /// Sample an observation. + Index SampleJointObservation(Index jaI, Index sucI) const; + Index SampleJointObservation(Index sI, Index jaI, Index sucI) const; + + + ///SoftPrints information on the MultiAgentDecisionProcessDiscrete. + std::string SoftPrint() const; + ///Prints some information on the MultiAgentDecisionProcessDiscrete. + void Print() const + { std::cout << SoftPrint();} + + /**\brief Indicate whether sparse transition and observation models + * should be used. + * + * Default is to not use sparse models. Only + * has effect before the class has been initialized. */ + void SetSparse(bool sparse); + + /// Are we using sparse transition and observation models? + bool GetSparse() const { return(_m_sparse); } + + /**\brief Indicate whether the observation model + * is defined over (s',a,s) (an event-driven model) + * or the standard (s',a) + * + * Default is to not use event-driven models.*/ + void SetEventObservability(bool eventO); + + ///Sets the index for false negative observations (see above) + void SetFalseNegativeObs(Index falseNegativeObs) + {_m_falseNegativeObs = falseNegativeObs;} + + /// Are we using an event observation model? + bool GetEventObservability() const { return(_m_eventObservability); } + + ///Gets the index for false negative observations (if any). + ///A negative value means that are none (which is the default case). + int GetFalseNegativeObs() const { return(_m_falseNegativeObs); } + + const TransitionModelDiscrete* GetTransitionModelDiscretePtr() const + { return(_m_p_tModel); } + + const ObservationModelDiscrete* GetObservationModelDiscretePtr() const + { return(_m_p_oModel); } + + /// Set the transition model. + void SetTransitionModelPtr(TransitionModelDiscrete* ptr) + { _m_p_tModel=ptr; } + + /// Set the obversation model. + void SetObservationModelPtr(ObservationModelDiscrete* ptr) + { _m_p_oModel=ptr; } + + /// Returns a pointer to a copy of this class. + virtual MultiAgentDecisionProcessDiscrete* Clone() const + { return new MultiAgentDecisionProcessDiscrete(*this); } + +}; + +#include "TransitionModelDiscrete.h" +#include "ObservationModelDiscrete.h" + +inline void MultiAgentDecisionProcessDiscrete::SetTransitionProbability(Index + sI, Index jaI, Index sucSI, double p) +{ _m_p_tModel->Set(sI, jaI, sucSI, p);} +inline void MultiAgentDecisionProcessDiscrete::SetObservationProbability(Index + jaI, Index sucSI, Index joI, double p) +{ _m_p_oModel->Set(jaI, sucSI, joI,p);} +inline void MultiAgentDecisionProcessDiscrete::SetObservationProbability(Index sI, Index + jaI, Index sucSI, Index joI, double p) +{ _m_p_oModel->Set(sI, jaI, sucSI, joI,p);} +inline double +MultiAgentDecisionProcessDiscrete::GetTransitionProbability(Index sI, Index + jaI, Index sucSI) const +{ return(_m_p_tModel->Get(sI, jaI, sucSI));} +inline double +MultiAgentDecisionProcessDiscrete::GetObservationProbability(Index jaI, Index + sucSI, Index joI) const +{ return(_m_p_oModel->Get(jaI, sucSI, joI));} +inline double +MultiAgentDecisionProcessDiscrete::GetObservationProbability(Index sI, Index jaI, Index + sucSI, Index joI) const +{ return(_m_p_oModel->Get(sI, jaI, sucSI, joI));} +inline Index MultiAgentDecisionProcessDiscrete::SampleSuccessorState(Index + sI, Index jaI) const +{ return(_m_p_tModel->SampleSuccessorState(sI,jaI));} +inline Index MultiAgentDecisionProcessDiscrete::SampleJointObservation(Index + jaI, Index sucI) const +{ return(_m_p_oModel->SampleJointObservation(jaI,sucI)); } +inline Index MultiAgentDecisionProcessDiscrete::SampleJointObservation(Index sI, Index + jaI, Index sucI) const +{ return(_m_p_oModel->SampleJointObservation(sI,jaI,sucI)); } + +#endif /* !_MULTIAGENTDECISIONPROCESS_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.cpp new file mode 100644 index 000000000..ec5f20c25 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.cpp @@ -0,0 +1,1115 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MultiAgentDecisionProcessDiscreteFactoredStates.h" +#include "TransitionModelMappingSparse.h" +#include "TransitionModelMapping.h" +#include "ObservationModelMappingSparse.h" +#include "ObservationModelMapping.h" +#include "EventObservationModelMappingSparse.h" +#include "EventObservationModelMapping.h" +#include "TGet.h" +#include "OGet.h" +#include "VectorTools.h" +#include "CPT.h" +#include "StateFactorDiscrete.h" + +using namespace std; + +#define DEBUG_2DBN 0 +#define DEBUG_DOSANITYCHECK 0 +#define SKIP_IMPOSSIBLE_EVENTS 0 + +//Default constructor +MultiAgentDecisionProcessDiscreteFactoredStates:: +MultiAgentDecisionProcessDiscreteFactoredStates( + const string &name, const string &descr, const string &pf) : + MultiAgentDecisionProcess(name, descr, pf) + ,_m_p_tModel(0) + ,_m_p_oModel(0) + ,_m_cached_FlatTM(false) + ,_m_sparse_FlatTM(false) + ,_m_cached_FlatOM(false) + ,_m_sparse_FlatOM(false) + ,_m_eventObservability(false) + ,_m_2dbn(*this) +{ +} +//Copy constructor. +MultiAgentDecisionProcessDiscreteFactoredStates::MultiAgentDecisionProcessDiscreteFactoredStates(const MultiAgentDecisionProcessDiscreteFactoredStates& o) + : + _m_2dbn(o._m_2dbn) +{ +} +//Destructor +MultiAgentDecisionProcessDiscreteFactoredStates::~MultiAgentDecisionProcessDiscreteFactoredStates() +{ + delete _m_p_tModel; + delete _m_p_oModel; +} +//Copy assignment operator +MultiAgentDecisionProcessDiscreteFactoredStates& MultiAgentDecisionProcessDiscreteFactoredStates::operator= (const MultiAgentDecisionProcessDiscreteFactoredStates& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + _m_2dbn = o._m_2dbn; + throw(E("MultiAgentDecisionProcessDiscreteFactoredStates: ctor not yet implemented")); + return *this; +} + +string MultiAgentDecisionProcessDiscreteFactoredStates::SoftPrint() const +{ + stringstream ss; + + ss << MultiAgentDecisionProcess::SoftPrint(); + ss << _m_S.SoftPrint(); + ss << _m_A.SoftPrint(); + ss << _m_O.SoftPrint(); + ss << _m_2dbn.SoftPrint(); + if(_m_initialized) + { + ss << "Transition model: " << + "(not yet implemented)" + << endl; + //ss << _m_p_tModel->SoftPrint(); + ss << "Observation model: " << + "(not yet implemented)" + << endl; + //ss << _m_p_oModel->SoftPrint(); + } + return(ss.str()); +} + +bool MultiAgentDecisionProcessDiscreteFactoredStates::SetInitialized(bool b) +{ + + if(b == true) + { + if( !_m_A.SetInitialized(b) + || !_m_O.SetInitialized(b) + || !_m_S.SetInitialized(b) ) + { + //error in initialization of sub-components. + _m_initialized = false; + return(false); + } + ////check if transition- and observation model are present... + //if(_m_p_tModel == 0) + //{ + //throw E("MultiAgentDecisionProcessDiscrete::SetInitialized() -initializing a MultiAgentDecisionProcessDiscrete which has no transition model! - make sure that CreateNewObservationModel() has been called before SetInitialized()"); + //} + //if(_m_p_oModel == 0) + //{ + //throw E("MultiAgentDecisionProcessDiscrete::SetInitialized() -initializing a MultiAgentDecisionProcessDiscrete which has no observation model! - make sure that CreateNewObservationModel() has been called before SetInitialized()"); + + //} + + if( SanityCheck() ) + { + _m_initialized = true; + return(true); + } + else + { + _m_initialized = false; + return(false); + } + } + else + { + _m_A.SetInitialized(b); + _m_O.SetInitialized(b); + _m_S.SetInitialized(b); + _m_initialized = false; + return(true); + } + +} + +bool MultiAgentDecisionProcessDiscreteFactoredStates::SanityCheckTransitions(void) const +{ + // mainly a copy from + // MultiAgentDecisionProcessDiscrete::SanityCheck(), should + // perhaps be more customized for the factored version + + bool sane=true; + +#if DEBUG_DOSANITYCHECK + size_t nrJA=GetNrJointActions(), + nrS=GetNrStates(); + + double sum,p; + + // check transition model + for(Index a=0;a(1.0 + PROB_PRECISION/2)) || + (sum < (1.0 - PROB_PRECISION/2))) + { + sane=false; + stringstream ss; + //string float_str; + char float_str[30]; + sprintf(float_str, "%10.10f", sum); + ss << "MultiAgentDecisionProcessDiscreteFactoredStates::SanityCheck failed:"<< + " transition does not sum to 1 but to:\n" << float_str << + "\n for (s,a)==(" << SoftPrintVector(StateIndexToFactorValueIndices(from)) << "," << SoftPrintVector(JointToIndividualActionIndices(a)) << ")"; + throw E(ss); + } + } + } +#else +#if MADP_DFS_WARNINGS + cout << "MultiAgentDecisionProcessDiscreteFactoredStates::SanityCheckTransitions() not implemented, no check performed" << endl; +#endif +#endif + return(sane); +} + +bool MultiAgentDecisionProcessDiscreteFactoredStates::SanityCheckObservations(void) const +{ + bool sane=true; + +#if DEBUG_DOSANITYCHECK + size_t nrJA=GetNrJointActions(), + nrS=GetNrStates(), + nrJO=GetNrJointObservations(); + + double sum,p; + + // check observation model + for(Index from=0;from(1.0 + PROB_PRECISION/2)) || + (sum < (1.0 - PROB_PRECISION/2))) + { + char float_str[30]; + sprintf(float_str, "%10.10f", sum); + sane=false; + stringstream ss; + ss << "MultiAgentDecisionProcessDiscreteFactoredStates::SanityCheck " + << "failed: observation does not sum to 1 but to \n" + << float_str << "\n for (s',a)==(" + << SoftPrintVector(StateIndexToFactorValueIndices(to)) + << "," + << SoftPrintVector(JointToIndividualActionIndices(a)) + << ")"; + throw E(ss); + } + } + } + } +#else +#if MADP_DFS_WARNINGS + cout << "MultiAgentDecisionProcessDiscreteFactoredStates::SanityCheckObservations() not implemented, no check performed" << endl; +#endif +#endif + return(sane); +} + +double MultiAgentDecisionProcessDiscreteFactoredStates:: +GetTransitionProbability (Index sI, Index jaI, Index sucSI) const +{ + if(_m_cached_FlatTM) + return _m_p_tModel->Get(sI, jaI, sucSI); + + vector X = StateIndexToFactorValueIndices(sI); + vector Y = StateIndexToFactorValueIndices(sucSI); + vector A = JointToIndividualActionIndices(jaI); + return(_m_2dbn.GetYProbability(X,A,Y)); + +} + +TGet* MultiAgentDecisionProcessDiscreteFactoredStates:: +GetTGet() const +{ + if(!_m_cached_FlatTM) + return 0; + + if(_m_sparse_FlatTM) + return new TGet_TransitionModelMappingSparse( + ((TransitionModelMappingSparse*)_m_p_tModel) ); + else + return new TGet_TransitionModelMapping( + ((TransitionModelMapping*)_m_p_tModel) ); + + +} + +double MultiAgentDecisionProcessDiscreteFactoredStates:: +GetObservationProbability (Index jaI, Index sucSI, Index joI) const +{ + if(_m_cached_FlatOM) + return _m_p_oModel->Get(jaI, sucSI, joI); + vector O = JointToIndividualObservationIndices(joI); + vector Y = StateIndexToFactorValueIndices(sucSI); + vector A = JointToIndividualActionIndices(jaI); + return(_m_2dbn.GetOProbability(A,Y,O)); +} + +double MultiAgentDecisionProcessDiscreteFactoredStates:: +GetObservationProbability (Index sI, Index jaI, Index sucSI, Index joI) const +{ + if(_m_cached_FlatOM) + return _m_p_oModel->Get(sI, jaI, sucSI, joI); + vector X = StateIndexToFactorValueIndices(sI); + vector O = JointToIndividualObservationIndices(joI); + vector Y = StateIndexToFactorValueIndices(sucSI); + vector A = JointToIndividualActionIndices(jaI); + return(_m_2dbn.GetOProbability(X,A,Y,O)); +} + +// hack: base class shouldn't need to know about +// derived observation models +OGet* MultiAgentDecisionProcessDiscreteFactoredStates:: +GetOGet() const +{ + if(!_m_cached_FlatOM) + return 0; + + if(!_m_eventObservability) //default + { + if(_m_sparse_FlatOM) + return new OGet_ObservationModelMappingSparse( + ((ObservationModelMappingSparse*)_m_p_oModel) ); + else + return new OGet_ObservationModelMapping( + ((ObservationModelMapping*)_m_p_oModel) ); + } + else + { + if(_m_sparse_FlatOM) + return new OGet_EventObservationModelMappingSparse( + ((EventObservationModelMappingSparse*)_m_p_oModel) ); + else + return new OGet_EventObservationModelMapping( + ((EventObservationModelMapping*)_m_p_oModel) ); + } +} + +Index MultiAgentDecisionProcessDiscreteFactoredStates:: +SampleSuccessorState (Index sI, Index jaI) const +{ + if(_m_cached_FlatTM && _m_eventObservability) + return _m_p_tModel->SampleSuccessorState(sI, jaI); + + vector X = StateIndexToFactorValueIndices(sI); + vector A = JointToIndividualActionIndices(jaI); + vector Y; + SampleSuccessorState(X,A,Y); + return( FactorValueIndicesToStateIndex(Y) ); +} + +void MultiAgentDecisionProcessDiscreteFactoredStates:: +SampleSuccessorState(const std::vector &sIs, + const std::vector &aIs, + std::vector &sucIs) const +{ + sucIs=_m_2dbn.SampleY(sIs,aIs); +} + +Index MultiAgentDecisionProcessDiscreteFactoredStates:: +SampleJointObservation(Index jaI, Index sucSI) const +{ + if(_m_cached_FlatOM && _m_eventObservability) + return _m_p_oModel->SampleJointObservation(jaI, sucSI); + + vector X; + vector Y = StateIndexToFactorValueIndices(sucSI); + vector A = JointToIndividualActionIndices(jaI); + vector O; + SampleJointObservation(X,A,Y,O); + return( IndividualToJointObservationIndices(O) ); +} + +Index MultiAgentDecisionProcessDiscreteFactoredStates:: +SampleJointObservation(Index sI, Index jaI, Index sucSI) const +{ + if(_m_cached_FlatOM && _m_eventObservability) + return _m_p_oModel->SampleJointObservation(sI, jaI, sucSI); + + vector X = StateIndexToFactorValueIndices(sI); + vector Y = StateIndexToFactorValueIndices(sucSI); + vector A = JointToIndividualActionIndices(jaI); + vector O; + SampleJointObservation(X,A,Y,O); + return( IndividualToJointObservationIndices(O) ); +} + +void MultiAgentDecisionProcessDiscreteFactoredStates:: +SampleJointObservation(const std::vector &sIs, + const std::vector &aIs, + const std::vector &sucIs, + std::vector &oIs) const +{ + oIs=_m_2dbn.SampleO(sIs, aIs,sucIs); +} + +void MultiAgentDecisionProcessDiscreteFactoredStates:: +CreateNewTransitionModel() +{ + if(!_m_connectionsSpecified) + throw E("CreateNewTransitionModel connections are not yet specified"); + + for(Index y=0; y < GetNrStateFactors(); y++) + _m_2dbn.AddCPDForY(y); +} + +void MultiAgentDecisionProcessDiscreteFactoredStates:: +CreateNewObservationModel() +{ + if(!_m_connectionsSpecified) + throw E("CreateNewTransitionModel connections are not yet specified"); + + for(Index o=0; o < GetNrAgents(); o++) + _m_2dbn.AddCPDForO(o); + +} + +///Get the number of joint instantiations for the factors in sfScope +size_t MultiAgentDecisionProcessDiscreteFactoredStates::GetNrStateFactorInstantiations(const Scope& sfScope) const +{ + if(sfScope.size()>0) + { + const vector& nrSFvals = GetNrValuesPerFactor(); + vector restr_nrSFvals(sfScope.size()); + IndexTools::RestrictIndividualIndicesToScope( nrSFvals, sfScope, restr_nrSFvals ); + size_t restr_nrJSFvals = VectorTools::VectorProduct(restr_nrSFvals); + return restr_nrJSFvals; + } + else + return(0); +} + + +void MultiAgentDecisionProcessDiscreteFactoredStates::CacheFlatTransitionModel(bool sparse) +{ + if(!JointAIndicesValid()) + throw EOverflow("MultiAgentDecisionProcessDiscreteFactoredStates::CacheFlatTransitionModel() joint action indices are not available, overflow detected"); + + if(_m_cached_FlatTM) + delete(_m_p_tModel); + + _m_sparse_FlatTM = sparse; + if(sparse) + _m_p_tModel=new TransitionModelMappingSparse(GetNrStates(), + GetNrJointActions()); + else + _m_p_tModel=new TransitionModelMapping(GetNrStates(), + GetNrJointActions()); + + + for(Index sI=0; sISet(sI, jaI, sucsI, p); + } + + _m_cached_FlatTM = true; +} + +void MultiAgentDecisionProcessDiscreteFactoredStates::CacheFlatObservationModel(bool sparse) +{ + if(!JointIndicesValid()) + throw EOverflow("MultiAgentDecisionProcessDiscreteFactoredStates::CacheFlatObservationModel() joint action and/or observation indices are not available, overflow detected"); + + if(_m_cached_FlatOM) + delete(_m_p_oModel); + + _m_sparse_FlatOM = sparse; + if(!_m_eventObservability) + { + if(sparse) + _m_p_oModel = new + ObservationModelMappingSparse(GetNrStates(), + GetNrJointActions(), + GetNrJointObservations()); + else + _m_p_oModel = new + ObservationModelMapping(GetNrStates(), + GetNrJointActions(), + GetNrJointObservations()); + } + else + { + if(sparse) + _m_p_oModel = new + EventObservationModelMappingSparse(GetNrStates(), + GetNrJointActions(), + GetNrJointObservations()); + else + _m_p_oModel = new + EventObservationModelMapping(GetNrStates(), + GetNrJointActions(), + GetNrJointObservations()); + } + + for(Index sI=0; sISet(sI, jaI, sucsI, joI, p); + } + else + { + double p = GetObservationProbability(jaI, sucsI, joI); + if(! Globals::EqualProbability(p, 0) ) + _m_p_oModel->Set(jaI, sucsI, joI, p); + } + } + if(!_m_eventObservability) + break; + } + _m_cached_FlatOM = true; +} + +void MultiAgentDecisionProcessDiscreteFactoredStates::Initialize2DBN() +{ + BoundScopeFunctor sf(this, &MultiAgentDecisionProcessDiscreteFactoredStates::SetScopes); + BoundTransitionProbFunctor tf(this, &MultiAgentDecisionProcessDiscreteFactoredStates::ComputeTransitionProb); + BoundObservationProbFunctor of(this, &MultiAgentDecisionProcessDiscreteFactoredStates::ComputeObservationProb); + Initialize2DBN(sf,tf,of); +} + +void MultiAgentDecisionProcessDiscreteFactoredStates::Initialize2DBN(ScopeFunctor& SetScopes, + TransitionProbFunctor& ComputeTransitionProb, + ObservationProbFunctor& ComputeObservationProb) +{ +//Initialize storage in the 2DBN + _m_2dbn.InitializeStorage(); +#if DEBUG_2DBN + cout << "MultiAgentDecisionProcessDiscreteFactoredStates: 2DBN storage initialized"<& nryVals = GetNrValuesPerFactor(); +//Create Transition model +#if DEBUG_2DBN + if(DEBUG_2DBN) cout << ">>>Adding Transition model..."<) : + double p = ComputeTransitionProb(y, yVal, Xs,As,Ys); + + if(p > Globals::PROB_PRECISION) + { +#if DEBUG_2DBN + cout << "P("<GetName()<<":"<GetStateFactorValue(yVal)<<"|...)="; + printf("%.3f, ", p); +#endif + Index iiI = _m_2dbn.IndividualToJointYiiIndices( + y, Xs, As, Ys); + cpt->Set(yVal, iiI, p); + } + } +#if DEBUG_2DBN + cout << endl; +#endif + + } while(! IndexTools::Increment( Ys, r_nrY ) ); + + } while(! IndexTools::Increment( As, r_nrA ) ); + + } while(! IndexTools::Increment( Xs, r_nrX ) ); + //cout << "ii_size:" << ii_size << ", ii_size2:" << ii_size2 << endl; + if (ii_size != ii_size2 ) + throw E("ii_size != ii_size2 "); + + //4)attach it to 2BDN + _m_2dbn.SetCPD_Y(y, cpt); + }//end (for y) + + //Only continue if an initialization function for observation probabilities is given + if(!ComputeObservationProb.isEmpty()) + { + const vector& nroVals = GetNrObservations(); +//Create the observation model +#if DEBUG_2DBN + cout << ">>>Adding Observation model..."< yVal(1,Ys[YSoI_o[i]]); + if(_m_2dbn.GetYProbabilityGeneral(XSoI_o,Xs, + ASoI_o,As, + YSoI_o,Ys, + Y,yVal) <= Globals::PROB_PRECISION){ + skipY = true; + break; + } + } + } + } +#endif +#if DEBUG_2DBN + // cout << "\t" << SoftPrintVector(Ys) << " - NS state is " << + // SoftPrintPartialState(YSoI_o, Ys) << endl; +#endif + do{ +#if DEBUG_2DBN + cout << "\t\t";// << SoftPrintVector(Os) //<-empty +#endif + ii_size2++; + for(Index oVal=0; oVal < nrVals_o; oVal++) + { + //compute P(o=oVal | ii=) : + double p; + if(!skipY){ + p = ComputeObservationProb(o, oVal, Xs, As, Ys, Os); + }else{ + p = 1.0/(float) nrVals_o; + } +#if DEBUG_2DBN + cout << "P(o="< Globals::PROB_PRECISION) + cpt->Set(oVal, iiI, p); + } +#if DEBUG_2DBN + cout << endl; +#endif + } while(! IndexTools::Increment( Os, r_nrO ) ); + + } while(! IndexTools::Increment( Ys, r_nrY ) ); + + } while(_m_eventObservability && !IndexTools::Increment( Xs, r_nrX ) ); + + } while(! IndexTools::Increment( As, r_nrA ) ); + + //cout << "ii_size:" << ii_size << ", ii_size2:" << ii_size2 << endl; + if (ii_size != ii_size2 ) + throw E("ii_size != ii_size2 "); + + //4)attach it to 2BDN + _m_2dbn.SetCPD_O(o, cpt); + }//end (for o) + + } else { +#if DEBUG_2DBN + cout << ">>>Skipping addition of Observation model CPTs." << endl; +#endif + } + + SetInitialized(true); +} + +void MultiAgentDecisionProcessDiscreteFactoredStates::MarginalizeTransitionObservationModel(const Index sf, bool sparse) +{ + const Scope& YSoI_sf = _m_2dbn.GetYSoI_Y(sf); + + if(!YSoI_sf.empty()){ + throw E("Cannot marginalize a state factor with NS dependencies. NYI."); + } + + cout << "Marginalizing State Factor " << sf << endl; + + size_t nrS = 1; + vector new_factor_sizes; + for(size_t i = 0; i < GetNrStateFactors(); i++){ + if(i == sf) + continue; + size_t nrX = GetNrValuesForFactor(i); + nrS *= nrX; + new_factor_sizes.push_back(nrX); + } + vector input_factor_sizes = GetNrValuesPerFactor(); + input_factor_sizes[sf] = 1; + vector output_factor_sizes = GetNrValuesPerFactor(); + const vector& actions = GetNrActions(); + + if(!_m_cached_FlatTM) + ConstructJointActions(); + if(!_m_cached_FlatOM) + ConstructJointObservations(); + + TransitionModelDiscrete* marginalized_tm; + ObservationModelDiscrete* marginalized_om; + + _m_sparse_FlatTM = sparse; + _m_sparse_FlatOM = sparse; + + if(_m_eventObservability) + { + if(_m_sparse_FlatTM) + marginalized_tm = new + TransitionModelMappingSparse(nrS, + GetNrJointActions()); + else + marginalized_tm = new + TransitionModelMapping(nrS, + GetNrJointActions()); + if(_m_sparse_FlatOM) + marginalized_om = new + EventObservationModelMappingSparse(nrS, + GetNrJointActions(), + GetNrJointObservations()); + else + marginalized_om = new + EventObservationModelMapping(nrS, + GetNrJointActions(), + GetNrJointObservations()); + } + else + { + if(_m_sparse_FlatTM) + marginalized_tm = new + TransitionModelMappingSparse(nrS, + GetNrJointActions()); + else + marginalized_tm = new + TransitionModelMapping(nrS, + GetNrJointActions()); + if(_m_sparse_FlatOM) + marginalized_om = new + ObservationModelMappingSparse(nrS, + GetNrJointActions(), + GetNrJointObservations()); + else + marginalized_om = new + ObservationModelMapping(nrS, + GetNrJointActions(), + GetNrJointObservations()); + } + + Scope XScope = GetAllStateFactorScope(); + Scope YScope(XScope); + Scope AScope; + for(size_t i = 0; i < GetNrAgents(); i++) + AScope.Insert(i); + Scope sfScope; + sfScope.Insert(sf); + + vector X(GetNrStateFactors(),0); + vector A(GetNrAgents(),0); + vector Y(GetNrStateFactors(),0); + do{ + do{ + do{ + Index jaI = IndividualToJointActionIndices(A); + vector post_X; + vector post_Y; + Scope post_Y_Sc; + for(size_t i = 0; i < Y.size(); i++) + if(i != sf){ + post_X.push_back(X[i]); + post_Y.push_back(Y[i]); + post_Y_Sc.Insert(i); + } + + Index post_sI = IndexTools::IndividualToJointIndices(post_X, new_factor_sizes); + Index post_sucsI = IndexTools::IndividualToJointIndices(post_Y, new_factor_sizes); + + double p_t = _m_2dbn.GetYProbability(X,A,Y); + if(! Globals::EqualProbability(p_t, 0) ) + { + double post_p_t = marginalized_tm->Get(post_sI, jaI, post_sucsI); + double p_t_sum = post_p_t+p_t; + if(p_t_sum > Globals::PROB_PRECISION) + marginalized_tm->Set(post_sI, jaI, post_sucsI, p_t_sum); //marginalization + } + double post_p_given_sf = _m_2dbn.GetYProbabilityGeneral(XScope, + X, + AScope, + A, + YScope, + Y, + post_Y_Sc, + post_Y); + + for(Index joI = 0; joI < GetNrJointObservations(); joI++){ + vector O = JointToIndividualObservationIndices(joI); + + double p_o = _m_2dbn.GetOProbability(X,A,Y,O); + if(! Globals::EqualProbability(p_o, 0) ) + { + double post_p_o = marginalized_om->Get(post_sI, jaI, post_sucsI, joI); + double p_o_sum = post_p_o + p_o*post_p_given_sf*p_t; + if(p_o_sum > Globals::PROB_PRECISION) + marginalized_om->Set(post_sI, jaI, post_sucsI, joI, p_o_sum); //marginalization + } + } + }while(!IndexTools::Increment( Y, output_factor_sizes )); + }while(!IndexTools::Increment( A, actions )); + }while(!IndexTools::Increment( X, input_factor_sizes )); + + A.assign(GetNrAgents(),0); + //now we need to sanitize the observation model. + for(Index post_sI = 0; post_sI < nrS; post_sI++){ + for(Index jaI = 0; jaI < GetNrJointActions(); jaI++){ + for(Index post_sucsI = 0; post_sucsI < nrS; post_sucsI++){ + double p_total = 0; + for(Index joI = 0; joI < GetNrJointObservations(); joI++){ + p_total += marginalized_om->Get(post_sI, jaI, post_sucsI, joI); + } + for(Index joI = 0; joI < GetNrJointObservations(); joI++){ + if(p_total > 0){ + double post_p_o = marginalized_om->Get(post_sI, jaI, post_sucsI, joI); + if(post_p_o > Globals::PROB_PRECISION) + marginalized_om->Set(post_sI, jaI, post_sucsI, joI, post_p_o/p_total); //normalization + } + else + marginalized_om->Set(post_sI, jaI, post_sucsI, joI, 1.0/GetNrJointObservations()); //impossible transition + } + } + } + } + + if(_m_cached_FlatTM) + delete(_m_p_tModel); + if(_m_cached_FlatOM) + delete(_m_p_oModel); + + _m_p_tModel = marginalized_tm; + _m_p_oModel = marginalized_om; + + _m_cached_FlatTM = true; + _m_cached_FlatOM = true; + + RemoveStateFactor(sf); + + SanityCheck(); +} + +void MultiAgentDecisionProcessDiscreteFactoredStates::RemoveStateFactor(Index sf) +{ + Scope XScSf = _m_2dbn.GetXSoI_Y(sf); + vector Y_cpts; + vector O_cpts(GetNrAgents()); + vector XSoIY, ASoIY, YSoIY; + vector XSoIO, ASoIO, YSoIO, OSoIO; + + for(size_t i = 0; i < GetNrAgents(); i++) + { + O_cpts[i] = _m_2dbn.GetCPD_O(i)->Clone(); + Scope XSc(_m_2dbn.GetXSoI_O(i)), YSc(_m_2dbn.GetYSoI_O(i)); + for(Index x = sf+1; x < GetNrStateFactors(); x++){ + //This corrects the scope indices, which will be changed after removing sf + Scope sc; + sc.Insert(x); + if(XSc.Contains(x)){ + XSc.Remove(sc); + XSc.Insert(x-1); + } + if(YSc.Contains(x)){ + YSc.Remove(sc); + YSc.Insert(x-1); + } + } + XSoIO.push_back(XSc); + ASoIO.push_back(_m_2dbn.GetASoI_O(i)); + YSoIO.push_back(YSc); + OSoIO.push_back(_m_2dbn.GetOSoI_O(i)); + } + + for(size_t i = 0; i < GetNrStateFactors(); i++) + { + if(i == sf) + continue; + Scope YScI = _m_2dbn.GetYSoI_Y(i); + if(YScI.Contains(sf)) + { + Scope sfSc; + sfSc.Insert(sf); + Scope iSc; + iSc.Insert(i); + Scope XSc_i_sf = _m_2dbn.GetXSoI_Y(i); + XSc_i_sf.Insert(XScSf); + Scope ASc_i_sf = _m_2dbn.GetASoI_Y(i); + ASc_i_sf.Insert(_m_2dbn.GetASoI_Y(sf)); + Scope YScI_r(YScI); + YScI_r.Remove(sfSc); + + size_t ii_size = 1; + vector size_XSc_i_sf(XSc_i_sf.size(),0); + vector size_ASc_i_sf(ASc_i_sf.size(),0); + vector size_YScI_r(YScI_r.size(),0); + for(size_t j = 0; j < XSc_i_sf.size(); j++){ + size_XSc_i_sf[j] = GetNrValuesForFactor(XSc_i_sf[j]); + ii_size *= size_XSc_i_sf[j]; + } + for(size_t j = 0; j < ASc_i_sf.size(); j++){ + size_ASc_i_sf[j] = GetNrActions(ASc_i_sf[j]); + ii_size *= size_ASc_i_sf[j]; + } + for(size_t j = 0; j < YScI_r.size(); j++){ + size_YScI_r[j] = GetNrValuesForFactor(YScI_r[j]); + ii_size *= size_YScI_r[j]; + } + size_t nrVals_i = GetNrValuesForFactor(i); + CPT* cpt = new CPT(nrVals_i, ii_size); + + //NOTE: This already fixes the scopes and CPT sizes in the DBN, but it does not yet marginalize + //each CPT. This will be necessary for factored event-driven algorithms. + + Y_cpts.push_back(cpt); + Scope XSc(XSc_i_sf), YSc(YScI_r); + for(Index x = sf+1; x < GetNrStateFactors(); x++){ + Scope sc; + sc.Insert(x); + if(XSc.Contains(x)){ + XSc.Remove(sc); + XSc.Insert(x-1); + } + if(YSc.Contains(x)){ + YSc.Remove(sc); + YSc.Insert(x-1); + } + } + XSoIY.push_back(XSc); + ASoIY.push_back(ASc_i_sf); + YSoIY.push_back(YSc); + } + else + { + Y_cpts.push_back(_m_2dbn.GetCPD_Y(i)->Clone()); + Scope XSc(_m_2dbn.GetXSoI_Y(i)), YSc(_m_2dbn.GetYSoI_Y(i)); + for(Index x = sf+1; x < GetNrStateFactors(); x++){ + Scope sc; + sc.Insert(x); + if(XSc.Contains(x)){ + XSc.Remove(sc); + XSc.Insert(x-1); + } + if(YSc.Contains(x)){ + YSc.Remove(sc); + YSc.Insert(x-1); + } + } + XSoIY.push_back(XSc); + ASoIY.push_back(_m_2dbn.GetASoI_Y(i)); + YSoIY.push_back(YSc); + } + } + + _m_S.RemoveStateFactor(sf); + _m_S.SetInitialized(false); + _m_S.SetInitialized(true); + + //now we need to fix the DBN + _m_2dbn.InitializeStorage(); + for(size_t i = 0; i < GetNrAgents(); i++) + { + _m_2dbn.SetSoI_O(i, XSoIO[i], ASoIO[i], YSoIO[i], OSoIO[i]); + _m_2dbn.SetCPD_O(i, O_cpts[i]); + } + for(size_t i = 0; i < GetNrStateFactors(); i++) + { + _m_2dbn.SetSoI_Y(i, XSoIY[i], ASoIY[i], YSoIY[i]); + _m_2dbn.SetCPD_Y(i, Y_cpts[i]); + } + _m_2dbn.InitializeIIs(); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.h new file mode 100644 index 000000000..8b47289c5 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStates.h @@ -0,0 +1,573 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MULTIAGENTDECISIONPROCESSDISCRETEFACTOREDSTATES_H_ +#define _MULTIAGENTDECISIONPROCESSDISCRETEFACTOREDSTATES_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +//#include "MultiAgentDecisionProcessDiscreteInterface.h" +#include "MultiAgentDecisionProcess.h" +#include "MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h" +#include "MADPComponentFactoredStates.h" +#include "MADPComponentDiscreteActions.h" +#include "MADPComponentDiscreteObservations.h" +#include "TwoStageDynamicBayesianNetwork.h" +#include "FSDist_COF.h" + +#define MADP_DFS_WARNINGS 0 + +/**\brief MultiAgentDecisionProcessDiscreteFactoredStates is a class that + * represents the dynamics of a MAS with a factored state space. + * + * The agents are inherited from the MultiAgentDecisionProcess. + * Their actions and observations are discrete and defined and implemented in + * MADPComponentXXX + * + * The state space is factored and defined and implemented in + * MADPComponentFactoredStates. + * + * This class implements/maintains the factored transition and observation + * models through means of a TwoStageDynamicBayesianNetwork. + * */ +class MultiAgentDecisionProcessDiscreteFactoredStates + : + virtual public MultiAgentDecisionProcessDiscreteFactoredStatesInterface, + public MultiAgentDecisionProcess +{ +private: + + MADPComponentFactoredStates _m_S; + MADPComponentDiscreteActions _m_A; + MADPComponentDiscreteObservations _m_O; + + ///Boolean to indicate whether this MADPDiscrete has been initialized. + bool _m_initialized; + + //to add implementation of factored transition and observation model + //... + ///Check whether models appear valid probability distributions. + bool SanityCheck() const + {return(SanityCheckTransitions() && SanityCheckObservations());} + + /** \brief Pointer to *the flat (chached)* transition model + */ + TransitionModelDiscrete* _m_p_tModel; + + /** \brief Pointer to *the flat (chached)* observation model + */ + ObservationModelDiscrete* _m_p_oModel; + + bool _m_cached_FlatTM; + bool _m_sparse_FlatTM; + bool _m_cached_FlatOM; + bool _m_sparse_FlatOM; + + /**\brief Boolean that controls whether the observation model is defined over events. + */ + bool _m_eventObservability; + + TwoStageDynamicBayesianNetwork _m_2dbn; + + virtual void SetYScopes() = 0; + virtual void SetOScopes() = 0; + virtual void SetScopes() + {SetYScopes(); SetOScopes();} + + virtual double ComputeTransitionProb( + Index y, + Index yVal, + const std::vector< Index>& Xs, + const std::vector< Index>& As, + const std::vector< Index>& Ys + ) const = 0; + virtual double ComputeObservationProb( + Index o, + Index oVal, + const std::vector< Index>& As, + const std::vector< Index>& Ys, + const std::vector< Index>& Os + ) const = 0; + virtual double ComputeObservationProb( + Index o, + Index oVal, + const std::vector< Index>& Xs, + const std::vector< Index>& As, + const std::vector< Index>& Ys, + const std::vector< Index>& Os + ) const + {return ComputeObservationProb(o,oVal,As,Ys,Os);} + + ///Boolean to indicate whether all connections in the 2TBN are specified + /**If this is the case, than we can allocate space for CPDs. So this var + * is referenced by CreateNewTransitionModel and CreateNewObservationModel. + */ + bool _m_connectionsSpecified; + +protected: + // subclasses have direct access to 2DBN + TwoStageDynamicBayesianNetwork* Get2DBN() + {return &_m_2dbn;} + + virtual bool SanityCheckTransitions() const; + virtual bool SanityCheckObservations() const; + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + MultiAgentDecisionProcessDiscreteFactoredStates( + const std::string &name="received unspec. by MultiAgentDecisionProcessDiscreteFactoredStates", + const std::string &descr="received unspec. by MultiAgentDecisionProcessDiscreteFactoredStates", + const std::string &pf="received unspec. by MultiAgentDecisionProcessDiscreteFactoredStates"); + /// Copy constructor. + MultiAgentDecisionProcessDiscreteFactoredStates(const MultiAgentDecisionProcessDiscreteFactoredStates& a); + /// Destructor. + ~MultiAgentDecisionProcessDiscreteFactoredStates(); + /// Copy assignment operator + MultiAgentDecisionProcessDiscreteFactoredStates& operator= (const MultiAgentDecisionProcessDiscreteFactoredStates& o); + + + size_t GetNrStates() const { return(_m_S.GetNrStates()); } + const State* GetState(Index i) const { return(_m_S.GetState(i)); } + std::string SoftPrintState(Index sI) const { return(_m_S.SoftPrintState(sI)); } + double GetInitialStateProbability(Index sI) const { return(_m_S.GetInitialStateProbability(sI)); } + StateDistribution* GetISD() const { return(_m_S.GetISD()); } + Index SampleInitialState() const { return(_m_S.SampleInitialState()); } + void SampleInitialState(std::vector &sIs) const { _m_S.SampleInitialState(sIs); } + + size_t GetNrStateFactors() const { return(_m_S.GetNrStateFactors()); } + const Scope& GetAllStateFactorScope() const { return(_m_S.GetAllStateFactorScope()); } + const std::vector& GetNrValuesPerFactor() const { return(_m_S.GetNrValuesPerFactor()); } + const size_t GetNrValuesForFactor(Index sf) const { return(_m_S.GetNrValuesForFactor(sf)); } + const StateFactorDiscrete* GetStateFactorDiscrete(Index sfacI) const { return(_m_S.GetStateFactorDiscrete(sfacI)); } + const FactoredStateDistribution* GetFactoredISD() const { return(_m_S.GetFactoredISD()); } + std::vector StateIndexToFactorValueIndices(Index stateI) const + { return(_m_S.StateIndexToFactorValueIndices(stateI)); } + Index StateIndexToFactorValueIndex(Index factor, Index s) const + { return(_m_S.StateIndexToFactorValueIndex(factor,s)); } + Index FactorValueIndicesToStateIndex(const std::vector &fv) const + { return(_m_S.FactorValueIndicesToStateIndex(fv)); } + Index FactorValueIndicesToStateIndex(const std::vector& s_e_vec, + const Scope& sfSC) const + { return(_m_S.FactorValueIndicesToStateIndex(s_e_vec,sfSC)); } + std::vector StateIndexToFactorValueIndices(Index s_e, + const Scope& sfSC) const + { return(_m_S.StateIndexToFactorValueIndices(s_e,sfSC)); } + + const std::vector& GetNrActions() const { return(_m_A.GetNrActions()); } + size_t GetNrActions(Index AgentI) const { return(_m_A.GetNrActions(AgentI)); } + size_t GetNrJointActions() const { return(_m_A.GetNrJointActions()); } + size_t GetNrJointActions(const Scope& agScope) const { return(_m_A.GetNrJointActions(agScope)); } + bool JointAIndicesValid() const { return(_m_A.JointIndicesValid()); } + const Action* GetAction(Index agentI, Index a) const { return(_m_A.GetAction(agentI,a)); } + const JointAction* GetJointAction(Index i) const { return(_m_A.GetJointAction(i)); } + size_t ConstructJointActions() { return(_m_A.ConstructJointActions()); } + Index IndividualToJointActionIndices(const Index* AI_ar) const + { return(_m_A.IndividualToJointActionIndices(AI_ar)); } + Index IndividualToJointActionIndices(const std::vector& indivActionIndices) const + { return(_m_A.IndividualToJointActionIndices(indivActionIndices)); } + const std::vector& JointToIndividualActionIndices(Index jaI) const + { return(_m_A.JointToIndividualActionIndices(jaI)); } + Index IndividualToJointActionIndices(const std::vector& ja_e, const Scope& agSC) const + { return(_m_A.IndividualToJointActionIndices(ja_e, agSC)); } + std::vector JointToIndividualActionIndices(Index ja_e, const Scope& agSC) const + { return(_m_A.JointToIndividualActionIndices(ja_e, agSC)); } + Index JointToRestrictedJointActionIndex(Index jaI, const Scope& agSc_e ) const + { return(_m_A.JointToRestrictedJointActionIndex(jaI, agSc_e)); } + + const std::vector& GetNrObservations() const { return(_m_O.GetNrObservations()); } + size_t GetNrObservations(Index AgentI) const { return(_m_O.GetNrObservations(AgentI)); } + size_t GetNrJointObservations() const { return(_m_O.GetNrJointObservations()); } + bool JointOIndicesValid() const { return(_m_O.JointIndicesValid()); } + size_t ConstructJointObservations() { return(_m_O.ConstructJointObservations()); } + const Observation* GetObservation(Index agentI, Index a) const + { return(_m_O.GetObservation(agentI,a)); } + const JointObservation* GetJointObservation(Index i) const + { return(_m_O.GetJointObservation(i)); } + Index IndividualToJointObservationIndices(const std::vector& indivObservationIndices) const + { return(_m_O.IndividualToJointObservationIndices(indivObservationIndices)); } + const std::vector& JointToIndividualObservationIndices(Index joI) const + { return(_m_O.JointToIndividualObservationIndices(joI)); } + Index IndividualToJointObservationIndices( + const std::vector& jo_e, const Scope& agSC) const + { return(_m_O.IndividualToJointObservationIndices(jo_e,agSC)); } + std::vector JointToIndividualObservationIndices(Index jo_e, const Scope& agSC) const + { return(_m_O.JointToIndividualObservationIndices(jo_e,agSC)); } + Index JointToRestrictedJointObservationIndex(Index joI, const Scope& agSc_e ) const + { return(_m_O.JointToRestrictedJointObservationIndex(joI,agSc_e)); } + + bool JointIndicesValid() const + { return (JointAIndicesValid() && JointOIndicesValid()); } + void SetISD(FactoredStateDistribution* p) { _m_S.SetISD(p); } +// void SetISD(StateDistributionVector* p) { _m_S.SetISD(p); } + void SetUniformISD() { _m_S.SetUniformISD(); } + Index AddStateFactor(const std::string &n="undef. name", + const std::string &d="undef. descr.") + { return(_m_S.AddStateFactor(n,d)); } + Index AddStateFactorValue(Index sf, const std::string &v="undef") + { return(_m_S.AddStateFactorValue(sf,v)); } + + /**\brief This function removes a state factor from the model's + * MADPComponentFactoredStates, fixes the factor indices, and + * adjusts the 2DBN accordingly - all CPTs of nodes which depend on the + * removed state factor are recalculated by marginalizing their + * respective clique joints. + * */ + void RemoveStateFactor(Index sf); + + bool SetStatesInitialized(bool b) { return(_m_S.SetInitialized(b)); } + + void SetNrActions(Index AI, size_t nrA) { _m_A.SetNrActions(AI,nrA); } + void AddAction(Index AI, const std::string &name, + const std::string &description="") { _m_A.AddAction(AI,name, description); } + bool SetActionsInitialized(bool b) { return(_m_A.SetInitialized(b)); } + + void SetNrObservations(Index AI, size_t nrO) { _m_O.SetNrObservations(AI,nrO); } + void AddObservation(Index AI, const std::string &name, + const std::string &description="") { _m_O.AddObservation(AI,name, description); } + bool SetObservationsInitialized(bool b) { return(_m_O.SetInitialized(b)); } + + + Scope StateScopeBackup( const Scope & stateScope, + const Scope & agentScope) const + { return(_m_2dbn.StateScopeBackup(stateScope,agentScope)); } + Scope AgentScopeBackup( const Scope & stateScope, + const Scope & agentScope) const + { return(_m_2dbn.AgentScopeBackup(stateScope,agentScope)); } + double GetYOProbability(const Scope& X, const std::vector& Xs, + const Scope& A, const std::vector& As, + const Scope& Y, const std::vector& Ys, + const Scope& O, const std::vector& Os) const + { return(_m_2dbn.GetYOProbability(X,Xs,A,As,Y,Ys,O,Os)); } + void SetSoI_Y( Index y, + const Scope& XSoI, + const Scope& ASoI, + const Scope& YSoI) + { _m_2dbn.SetSoI_Y(y,XSoI,ASoI,YSoI);} + void SetSoI_O( Index o, + const Scope& ASoI, + const Scope& YSoI, + const Scope& OSoI) + { _m_2dbn.SetSoI_O(o,ASoI,YSoI,OSoI); } + void SetSoI_O( Index o, + const Scope& XSoI, + const Scope& ASoI, + const Scope& YSoI, + const Scope& OSoI) + { _m_2dbn.SetSoI_O(o,XSoI,ASoI,YSoI,OSoI); } + + const Scope& GetXSoI_Y(Index y) const + { return(_m_2dbn.GetXSoI_Y(y)); } + const Scope& GetASoI_Y(Index y) const + { return(_m_2dbn.GetASoI_Y(y)); } + const Scope& GetYSoI_Y(Index y) const + { return(_m_2dbn.GetYSoI_Y(y)); } + + const Scope& GetXSoI_O(Index o) const + { return(_m_2dbn.GetXSoI_O(o)); } + const Scope& GetASoI_O(Index o) const + { return(_m_2dbn.GetASoI_O(o)); } + const Scope& GetYSoI_O(Index o) const + { return(_m_2dbn.GetYSoI_O(o)); } + const Scope& GetOSoI_O(Index o) const + { return(_m_2dbn.GetOSoI_O(o)); } + + //operators: + + //data manipulation (set) functions: + bool SetInitialized(bool b); + void SetConnectionsSpecified(bool b) + { _m_connectionsSpecified = b; } + + + void SetSparse(bool b) + { +#if MADP_DFS_WARNINGS + std::cerr<< "Warning MultiAgentDecisionProcessDiscreteFactoredStates SetSparse not yet implemented" << std::endl; +#endif + } + void SetEventObservability(bool eventO) + {_m_eventObservability=eventO;} + + ///Creates a new transition model: initializes new CPDs for the 2BDN + /**This first checks whether all connections are specified + * (_m_connectionsSpecified) and consequently allocates CPDs for + * the 2DBN. + * + * In the future it should be possible to specify what kind of CPDs + * are used (e.g. CPT, sparse CPT, ADD, rules etc.) + * + */ + void CreateNewTransitionModel(); + ///Creates a new observation model mapping: initializes new CPDs + /**This first checks whether all connections are specified + * (_m_connectionsSpecified) and consequently allocates CPDs for + * the 2DBN. + * + * In the future it should be possible to specify what kind of CPDs + * are used (e.g. CPT, sparse CPT, ADD, rules etc.) + */ + void CreateNewObservationModel(); + + + +/* perhaps some forwarding functions here to set probabilities? + ///Set the probability of successor state sucSI: P(sucSI|sI,jaI). + void SetTransitionProbability(Index sI, Index jaI, Index sucSI, + double p); + ///Set the probability of joint observation joI: P(joI|jaI,sucSI). + void SetObservationProbability(Index jaI, Index sucSI, Index joI, + double p); +*/ + + //get (data) functions: + // + const TwoStageDynamicBayesianNetwork* Get2DBN() const + {return &_m_2dbn;} + +//implement the MultiAgentDecisionProcessDiscreteFactoredStatesInterface +//(i.e., the functions not handled by MADPComponentFactoredStates ) + + ///Get the number of joint instantiations for the factors in sfScope + size_t GetNrStateFactorInstantiations(const Scope& sfScope) const; + + /// Are we using an event observation model? + bool GetEventObservability() const { return(_m_eventObservability); } + + ///SoftPrints information on the MultiAgentDecisionProcessDiscrete. + std::string SoftPrint() const; + + +//implement the MultiAgentDecisionProcessDiscreteInterface.h: + + + double GetTransitionProbability (Index sI, Index jaI, Index sucSI) const; + TGet* GetTGet() const; + + double GetObservationProbability (Index jaI, Index sucSI, Index joI) const; + /// O(s,ja,s',jo) version. You can access a standard O(ja,s',jo) model both ways + /// (the PS index is simply ignored in the latter case). + double GetObservationProbability (Index sI, Index jaI, Index sucSI, Index joI) const; + OGet* GetOGet() const; + + Index SampleSuccessorState (Index sI, Index jaI) const; + void SampleSuccessorState(const std::vector &sIs, + const std::vector &aIs, + std::vector &sucIs) const; + Index SampleJointObservation(Index jaI, Index sucI) const; + Index SampleJointObservation(Index sI, Index jaI, Index sucI) const; + void SampleJointObservation(const std::vector &aIs, + const std::vector &sucIs, + std::vector &oIs) const + { SampleJointObservation(std::vector(),aIs,sucIs,oIs); } + void SampleJointObservation(const std::vector &sIs, + const std::vector &aIs, + const std::vector &sucIs, + std::vector &oIs) const; + + //the following are implemented by MADPComponentFactoredStates + //double GetInitialStateProbability(Globals::Index) const; + //std::vector GetISD() const; + //Globals::Index SampleInitialState() const; + + void CacheFlatTransitionModel(bool sparse=false); + void CacheFlatObservationModel(bool sparse=false); + + //the observation and transition model are represented by the + //TwoStageDynamicBayesianNetwork so the following functions are + //problematic... + //However, they can simply return 0 + const TransitionModelDiscrete* GetTransitionModelDiscretePtr() const + { + if(_m_cached_FlatTM) + return _m_p_tModel; + else + return(0); + } + const ObservationModelDiscrete* GetObservationModelDiscretePtr() const + { + if(_m_cached_FlatOM) + return _m_p_oModel; + else + return(0); + } + + /**\brief This function marginalizes a state factor out of the flat + * joint transition and observation models of the system. The function then + * removes that factor from the process model altogether (through RemoveStateFactor). + * Currently, it only supports the marginalization of nodes without + * NS dependencies, and which do not directly influence any LRF + * */ + void MarginalizeTransitionObservationModel(const Index sf, bool sparse); + + /** + * \brief This is the base class for functors that set the scopes of the 2-DBN. + */ + class ScopeFunctor + { + public: + virtual void operator()(void) = 0; + }; + + /** + * \brief This is the base class for functors that return the transition probability for a given (s,a,s') tuple. + */ + class TransitionProbFunctor + { + public: + virtual double operator()(Index y, + Index yVal, + const std::vector< Index>& Xs, + const std::vector< Index>& As, + const std::vector< Index>& Ys) const = 0; + }; + + /** + * \brief This is the base class for functors that return the observation probability for a given (s,a,s',o) tuple. + */ + class ObservationProbFunctor + { + public: + ObservationProbFunctor(bool isEmpty = false) : + _m_isEmpty(isEmpty){} + + virtual double operator()(Index o, + Index oVal, + const std::vector< Index>& Xs, + const std::vector< Index>& As, + const std::vector< Index>& Ys, + const std::vector< Index>& Os) const + {return 0;} + + bool isEmpty() + {return _m_isEmpty;} + + private: + bool _m_isEmpty; + }; + + /** + * \brief The BoundScopeFunctor class binds the "SetScopes" function to a templated object. + */ + template class BoundScopeFunctor : public ScopeFunctor + { + private: + SF* _m_sf; + void (SF::*_m_func)(); + public: + BoundScopeFunctor(SF* sf_ptr, void (SF::*func_ptr) (void)): + _m_sf(sf_ptr), + _m_func(func_ptr){}; + + void operator()(void) + {(*_m_sf.*_m_func) ();}; + }; + + /** + * \brief The BoundTransitionProbFunctor class binds the "ComputeTransitionProb" function to a templated object. + */ + template class BoundTransitionProbFunctor : public TransitionProbFunctor + { + private: + TF* _m_tf; + double (TF::*_m_func)(Index, + Index, + const std::vector< Index>&, + const std::vector< Index>&, + const std::vector< Index>&) const; + public: + BoundTransitionProbFunctor(TF* tf_ptr, double (TF::*func_ptr) (Index, + Index, + const std::vector< Index>&, + const std::vector< Index>&, + const std::vector< Index>&) const): + _m_tf(tf_ptr), + _m_func(func_ptr){}; + + double operator()(Index y, + Index yVal, + const std::vector< Index>& Xs, + const std::vector< Index>& As, + const std::vector< Index>& Ys) const + {return (*_m_tf.*_m_func) (y,yVal,Xs,As,Ys);}; + }; + + /** + * \brief The BoundObservationProbFunctor class binds the "ComputeObservationProb" function to a templated object. + */ + template class BoundObservationProbFunctor : public ObservationProbFunctor + { + private: + OF* _m_of; + double (OF::*_m_func)(Index, + Index, + const std::vector< Index>&, + const std::vector< Index>&, + const std::vector< Index>&, + const std::vector< Index>&) const; + public: + BoundObservationProbFunctor(OF* of_ptr, double (OF::*func_ptr) (Index, + Index, + const std::vector< Index>&, + const std::vector< Index>&, + const std::vector< Index>&, + const std::vector< Index>&) const): + _m_of(of_ptr), + _m_func(func_ptr){}; + + double operator()(Index o, + Index oVal, + const std::vector< Index>& Xs, + const std::vector< Index>& As, + const std::vector< Index>& Ys, + const std::vector< Index>& Os) const + {return (*_m_of.*_m_func) (o,oVal,Xs,As,Ys,Os);}; + }; + + /** + * The EmptyObservationProbFunctor class can be used by fully-observable subclasses of + * MultiAgentDecisionProcessDiscreteFactoredStates, in order to initialize the 2DBN without + * requiring an actual observation function. + */ + class EmptyObservationProbFunctor : public ObservationProbFunctor + { + public: + EmptyObservationProbFunctor() : + ObservationProbFunctor(true){}; + }; + + virtual void Initialize2DBN(); + /** + * This signature allows us to initialize the 2DBN using externally supplied functors to + * set the scopes, and compute transition and observation probabilities in a discrete factored + * model. This is useful, for example, if we want to read these from a file + * (e.g. as done by ParserProbModelXML) instead of creating ad-hoc implementations of each + * of these functions for each specific planning problem. + */ + virtual void Initialize2DBN(ScopeFunctor& SetScopes, + TransitionProbFunctor& ComputeTransitionProb, + ObservationProbFunctor& ComputeObservationProb); +}; + + + + +#endif /* !_MULTIAGENTDECISIONPROCESSDISCRETEFACTOREDSTATES_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h new file mode 100644 index 000000000..cae958c7e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h @@ -0,0 +1,151 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MULTIAGENTDECISIONPROCESSDISCRETEFACTOREDSTATESINTERFACE_H_ +#define _MULTIAGENTDECISIONPROCESSDISCRETEFACTOREDSTATESINTERFACE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "MultiAgentDecisionProcessDiscreteInterface.h" +#include "IndexTools.h" +#include "Scope.h" +#include "FactoredStateDistribution.h" + +class StateFactorDiscrete; +class TwoStageDynamicBayesianNetwork; + +/**\brief MultiAgentDecisionProcessDiscreteFactoredStatesInterface is the + * interface for factored state problems. + * + * Currently it has one implementation: + * MultiAgentDecisionProcessDiscreteFactoredStates + * + * + * This class defines the functions that implement/maintain the factored + * transition and observation models. + * */ +class MultiAgentDecisionProcessDiscreteFactoredStatesInterface + : + virtual public MultiAgentDecisionProcessDiscreteInterface +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + //MultiAgentDecisionProcessDiscreteFactoredStatesInterface(); + /// Copy constructor. + //MultiAgentDecisionProcessDiscreteFactoredStatesInterface(const MultiAgentDecisionProcessDiscreteFactoredStatesInterface& a); + /// Destructor. + virtual ~MultiAgentDecisionProcessDiscreteFactoredStatesInterface(){}; + /// Copy assignment operator + //MultiAgentDecisionProcessDiscreteFactoredStatesInterface& operator= (const MultiAgentDecisionProcessDiscreteFactoredStatesInterface& o); + + //operators: + + //data manipulation (set) functions: + + //get (data) functions: + virtual const TwoStageDynamicBayesianNetwork* Get2DBN() const + { throw E("Get2DBN should be overriden by the implementation of MultiAgentDecisionProcessDiscreteFactoredStatesInterface. (actually, we may want to define a TwoStageDynamicBayesianNetworkInterface or something similar, such the derived class does not need to use TwoStageDynamicBayesianNetwork per se. This, however, is work for the future.)"); } + + virtual const StateFactorDiscrete* GetStateFactorDiscrete(Index sfacI) const = 0; + +//stuff that is implemented by a factored state component: +//(i.e., the only current implementation is MADPComponentFactoredStates) + virtual const FactoredStateDistribution* GetFactoredISD() const = 0; + + /// Get the number of state components. -no is inherited from + // MultiAgentDecisionProcessDiscreteInterface! + //virtual size_t GetNrStates() const + //{ return (IndexTools::VectorProduct( GetNrValuesPerFactor() ) ); } + + /// Get the number of state components. + virtual size_t GetNrStateFactors() const = 0; + /// Convenience function to quickly get the full state scope + virtual const Scope& GetAllStateFactorScope() const=0; + /// Get the number of possible assignments or values to each factor. + virtual const std::vector& GetNrValuesPerFactor() const = 0; + /// Get the number of possible values for a particular factor. + virtual const size_t GetNrValuesForFactor(Index sf) const = 0; + /**\brief Get the vector of FactorValue indices corresponding to stateI + * used to be called + * virtual vector GetStateFactorValues(Index stateI) const + */ + virtual std::vector StateIndexToFactorValueIndices(Index stateI) + const = 0; + /// Get the value of a particular state factor given a joint flat state + virtual Index StateIndexToFactorValueIndex(Index factor, Index s) + const = 0; + ///convert std::vector of (indices of) factor values to (flat) state index. + virtual Index FactorValueIndicesToStateIndex(const std::vector &fv) + const = 0; + //functions with explicitly specified scope + /**\brief convert an local state vector \a s_e_vec of scope \a sfScope + * to a joint index. + */ + virtual Index FactorValueIndicesToStateIndex(const std::vector& + s_e_vec, const Scope& sfSC) const=0; + /**\brief convert an local state index \a s_e to a vector of + * state factors (with scope \a sfScope). + */ + virtual std::vector StateIndexToFactorValueIndices(Index s_e, + const Scope& sfSC) const=0; + + +//not sure how to classify these, but they are implemented by +//MultiAgentDecisionProcessDiscreteFactoredStatesInterface + + ///Get the number of joint instantiations for the factors in sfScope + virtual size_t GetNrStateFactorInstantiations(const Scope& sfScope) const=0; + +// stuf that has to be implemented by something that represents the transition- +// and observation model. + virtual Scope StateScopeBackup( const Scope & stateScope, + const Scope & agentScope) const = 0; + virtual Scope AgentScopeBackup( const Scope & stateScope, + const Scope & agentScope) const = 0; + + virtual void SampleInitialState(std::vector &sIs) const = 0; + virtual void SampleSuccessorState(const std::vector &sIs, + const std::vector &aIs, + std::vector &sucIs) const = 0; + virtual void SampleJointObservation(const std::vector &aIs, + const std::vector &sucIs, + std::vector &oIs) const = 0; + + + /// Returns a pointer to a copy of this class. + virtual MultiAgentDecisionProcessDiscreteFactoredStatesInterface* Clone() const = 0; + + ///SoftPrints information on the MultiAgentDecisionProcessDiscrete. + //string SoftPrint() const; + std::string SoftPrintState(Index sI) const + {throw E("MultiAgentDecisionProcessDiscreteInterface::SoftPrintState should be overriden");} + + virtual void CacheFlatModels(bool sparse) + {throw E("MultiAgentDecisionProcessDiscreteInterface::CacheFlatModels should be overriden");} +}; + + +#endif /* !_MULTIAGENTDECISIONPROCESSDISCRETEFACTOREDSTATESINTERFACE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteInterface.h new file mode 100644 index 000000000..26b1c280f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessDiscreteInterface.h @@ -0,0 +1,231 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MULTIAGENTDECISIONPROCESSDISCRETEINTERFACE_H_ +#define _MULTIAGENTDECISIONPROCESSDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "MultiAgentDecisionProcessInterface.h" + +class Action; +class Observation; +class JointObservation; +class State; +class JointAction; +class TransitionModelMapping; +class TransitionModelMappingSparse; +class ObservationModelMapping; +class ObservationModelMappingSparse; +class EventObservationModelMapping; +class EventObservationModelMappingSparse; + +class TransitionModelDiscrete; +class ObservationModelDiscrete; + +class TGet; +class OGet; +class StateDistribution; + +class Scope; + +/**\brief MultiAgentDecisionProcessDiscreteInterface is an abstract base + * class that defines publicly accessible member functions that a + * discrete multiagent decision process must implement. + * + * This interface is currently implemented by MultiAgentDecisionProcessDiscrete + * and MultiAgentDecisionProcessDiscreteFactoredStates. + * + * The functions this interface defines relate to actions, observations, + * transition and observation probabilities. * + * + **/ +class MultiAgentDecisionProcessDiscreteInterface : + virtual public MultiAgentDecisionProcessInterface +{ + private: + + protected: + + public: + + ///Destructor. + virtual ~MultiAgentDecisionProcessDiscreteInterface() {}; + + // 'get' functions: + + ///Return the number of states. + virtual size_t GetNrStates() const = 0; + ///Returns a pointer to state i. + virtual const State* GetState(Index i) const = 0; + virtual std::string SoftPrintState(Index sI) const = 0; + + /////Return the initial state distribution. + virtual double GetInitialStateProbability(Index sI) const = 0; + /// Returns the complete initial state distribution. + virtual StateDistribution* GetISD() const = 0; + + + + ///Return the number of actions vector + virtual const std::vector& GetNrActions() const = 0; + ///Return the number of actions of agent agentI + virtual size_t GetNrActions(Index AgentI) const =0 ; + ///Return the number of joint actions. + virtual size_t GetNrJointActions() const = 0; + ///Get the number of joint actions the agents in agScope can form + virtual size_t GetNrJointActions(const Scope& agScope) const = 0; + + ///Return a ref to the a-th action of agent agentI. + virtual const Action* GetAction(Index agentI, Index a) const = 0; + ///Return a ref to the i-th joint action. + virtual const JointAction* GetJointAction(Index i) const = 0; + /**\brief Returns the joint action index that corresponds to the array + * of specified individual action indices.*/ + virtual Index IndividualToJointActionIndices(const Index* AI_ar) + const = 0; + /**\brief Returns the joint action index that corresponds to the vector + * of specified individual action indices.*/ + virtual Index IndividualToJointActionIndices(const std::vector& + indivActionIndices) const = 0; + /**\brief Returns a vector of indices to indiv. action indicies corr. + * to joint action index jaI.*/ + virtual const std::vector& JointToIndividualActionIndices( + Index jaI) const = 0; + + ///indiv->joint for a restricted set (Scope) of agents + virtual Index IndividualToJointActionIndices( + const std::vector& ja_e, const Scope& agSC) const = 0; + ///joint->indiv for a restricted set (Scope) of agents + virtual std::vector JointToIndividualActionIndices( + Index ja_e, const Scope& agSC) const = 0; + /**\brief Converts a global joint action index jaI to a restricted + * joint action index ja_e, for agents scope agSc_e + * Returns a vector of indices to indiv. action indicies corr. + * to joint action index jaI.*/ + virtual Index JointToRestrictedJointActionIndex( + Index jaI, const Scope& agSc_e ) const = 0; + + ///Return the number of observations vector. + virtual const std::vector& GetNrObservations() const = 0; + ///Return the number of observations of agent agentI + virtual size_t GetNrObservations(Index AgentI) const = 0; + ///Return the number of joint observations. + virtual size_t GetNrJointObservations() const = 0; + + ///Return a ref to the a-th observation of agent agentI. + virtual const Observation* GetObservation(Index agentI, Index a) + const = 0; + ///Return a ref to the i-th joint observation. + virtual const JointObservation* GetJointObservation(Index i) const = 0; + + /**\brief Returns the joint observation index that corresponds to the + * vector of specified individual observation indices.*/ + virtual Index IndividualToJointObservationIndices( + const std::vector& + indivObservationIndices) const = 0; + /**\brief Returns a vector of indices to indiv. observation indicies + * corr. to joint observation index joI.*/ + virtual const std::vector& + JointToIndividualObservationIndices(Index joI) const = 0; + + ///indiv->joint for a restricted set (Scope) of agents + virtual Index IndividualToJointObservationIndices( + const std::vector& jo_e, const Scope& agSC) const + { throw E("MultiAgentDecisionProcessDiscreteInterface function not implemented (by TOI model?) "); }; + // = 0; + ///joint->indiv for a restricted set (Scope) of agents + virtual std::vector JointToIndividualObservationIndices( + Index jo_e, const Scope& agSC) const + { throw E("MultiAgentDecisionProcessDiscreteInterface function not implemented (by TOI model?) "); }; + //= 0; + /**\brief Converts a global joint observation index joI to a restricted + * joint observation index jo_e, for agents scope agSc_e + * Returns a vector of indices to indiv. observation indicies corr. + * to joint observation index joI.*/ + virtual Index JointToRestrictedJointObservationIndex( + Index joI, const Scope& agSc_e ) const + { throw E("MultiAgentDecisionProcessDiscreteInterface function not implemented (by TOI model?) "); }; + // = 0; + + ///Return the probability of successor state sucSI: P(sucSI|sI,jaI). + virtual double GetTransitionProbability(Index sI, Index jaI, Index + sucSI) const = 0; + virtual TGet* GetTGet() const = 0; + + ///Return the probability of joint observation joI: P(joI|jaI,sucSI). + virtual double GetObservationProbability(Index jaI, Index sucSI, + Index joI) const = 0; + virtual double GetObservationProbability(Index sI, Index jaI, Index sucSI, + Index joI) const + {return GetObservationProbability(jaI, sucSI, joI); } + virtual OGet* GetOGet() const = 0; + + + /**\brief Returns a pointer to the underlying transition model. + * + * If speed is required (for instance when looping through all states) + * the pointer can be requested by an algorithm. It can than obtain + * a pointer to the actual implementation type by runtime type + * identification. (i.e., using typeid and dynamic_cast). + */ + virtual const TransitionModelDiscrete* GetTransitionModelDiscretePtr() + const = 0; + + /**\brief Returns a pointer to the underlying observation model. + * + * If speed is required (for instance when looping through all states) + * the pointer can be requested by an algorithm. It can than obtain + * a pointer to the actual implementation type by runtime type + * identification. (i.e., using typeid and dynamic_cast). + */ + virtual const ObservationModelDiscrete* GetObservationModelDiscretePtr() + const = 0; + + /**\brief Whether observation models are P(o|s,a) or P(o|s,a,s'). + * This is here since model-independent structures(such as joint beliefs) + * need this information. It should be overriden by derived classes. + * */ + virtual bool GetEventObservability() const + {return(false);} + + //sample functions: + + /// Sample a successor state - needed by simulations. + virtual Index SampleSuccessorState(Index sI, Index jaI) const = 0; + /// Sample an observation - needed for simulations. + virtual Index SampleJointObservation(Index jaI, Index sucI) const =0; + virtual Index SampleJointObservation(Index sI, Index jaI, Index sucI) const + { return SampleJointObservation(jaI, sucI); } + /// Sample a state according to the initial state PDF. + virtual Index SampleInitialState() const = 0; + + /// Returns a pointer to a copy of this class. + virtual MultiAgentDecisionProcessDiscreteInterface* Clone() const = 0; + + /**\brief Prints some information on the + * MultiAgentDecisionProcessDiscreteInterface. + * */ + virtual std::string SoftPrint() const = 0; + +}; + +#endif /* !_MULTIAGENTDECISIONPROCESSINTERFACE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessInterface.h new file mode 100644 index 000000000..feb7e2e02 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/MultiAgentDecisionProcessInterface.h @@ -0,0 +1,66 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MULTIAGENTDECISIONPROCESSINTERFACE_H_ +#define _MULTIAGENTDECISIONPROCESSINTERFACE_H_ 1 + +/* the include directives */ +#include +#include + +#include "Globals.h" +class Scope; + +/**\brief MultiAgentDecisionProcessInterface is an abstract base class that + * declares the primary properties of a multiagent decision process. + * + * These primary properties are: + * \li the number of agents + * \li possibly, a vector of (named) agents + * \li the filename to be parsed, if applicable. + * + * It is implemented by MultiAgentDecisionProcess + * */ +class MultiAgentDecisionProcessInterface +{ + private: + + protected: + + public: + + ///Destructor. + //(Can't make a virt.destr. pure abstract!) + virtual ~MultiAgentDecisionProcessInterface() {} + + ///Return the number of agents + virtual size_t GetNrAgents() const = 0; + virtual const Scope& GetAllAgentScope() const=0; + + /// Returns the base part of the problem filename. + virtual std::string GetUnixName() const = 0; + + /// Returns a copy of this class. + virtual MultiAgentDecisionProcessInterface* Clone() const = 0; + +}; + +#endif /* !_MULTIAGENTDECISIONPROCESSINTERFACE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.cpp new file mode 100644 index 000000000..b09d31de5 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.cpp @@ -0,0 +1,39 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "NamedDescribedEntity.h" +#include + +using namespace std; + +NamedDescribedEntity::NamedDescribedEntity(const string &name, + const string &description) : + _m_name(name), + _m_description(description) +{ +} + +string NamedDescribedEntity::SoftPrint() const +{ + stringstream ss; + ss << "name:" << this->GetName() + << " - descr." << this->GetDescription(); + return(ss.str()); +} + +string NamedDescribedEntity::SoftPrintBrief() const +{ + return(GetName()); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.h b/payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.h new file mode 100644 index 000000000..da23e9b55 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/NamedDescribedEntity.h @@ -0,0 +1,75 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _NAMED_DESCRIBED_ENTITY_H_ +#define _NAMED_DESCRIBED_ENTITY_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" + +/// NamedDescribedEntity represents named entities. +/** For example actions and observations in a decision process. */ +class NamedDescribedEntity +{ + private: + + protected: + /// The name. + std::string _m_name; + /// The description. + std::string _m_description; + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + NamedDescribedEntity(const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")); + /// Destructor. + virtual ~NamedDescribedEntity(){}; + // 'get' functions: + /// Get the name of the NamedDescribedEntity + std::string GetName() const {return _m_name;}; + /// Get the description of the NamedDescribedEntity + std::string GetDescription() const {return _m_description;}; + + /// Set the name. + void SetName(const std::string &name){ + _m_name=name; + } + + /// Set the description. + void SetDescription(const std::string &description){ + _m_description=description; + } + + /// Returns the name and description (if not reimplemented). + virtual std::string SoftPrint() const; + /// Returns the name (if not reimplemented). + virtual std::string SoftPrintBrief() const; + /// Prints the name and description (by default). + void Print() const {std::cout << SoftPrint() << std::endl; } + /// Prints the name (by default). + void PrintBrief() const {std::cout << SoftPrintBrief() << std::endl; } +}; + +#endif /* !_NAMED_DESCRIBED_ENTITY_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/OGet.h b/payntbind/src/synthesis/decpomdp/madp/src/base/OGet.h new file mode 100644 index 000000000..b24e1b726 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/OGet.h @@ -0,0 +1,121 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OGET_H_ +#define _OGET_H_ 1 + +/* the include directives */ +#include "Globals.h" + +#include "ObservationModelMapping.h" +#include "ObservationModelMappingSparse.h" +#include "EventObservationModelMapping.h" +#include "EventObservationModelMappingSparse.h" + +/** \brief OGet can be used for direct access to the observation model. */ +class OGet +{ +public: + virtual ~OGet() = 0; + //get (data) functions: + virtual double Get(Index jaI, Index sucSI, Index joI) const = 0; + virtual double Get(Index sI, Index jaI, Index sucSI, Index joI) const + {return Get(jaI, sucSI, joI);} +}; + +//http://www.parashift.com/c++-faq-lite/pointers-to-members.html +//says "defined even though it's pure virtual; it's faster this way; trust me" +inline OGet::~OGet() {} + +/** \brief OGet_ObservationModelMapping can be used for direct access + * to a ObservationModelMapping. */ +class OGet_ObservationModelMapping : public OGet +{ + +private: + std::vector _m_O; +public: + OGet_ObservationModelMapping( ObservationModelMapping* om) + { + _m_O = om->_m_O; + }; + + virtual double Get(Index jaI, Index sucSI, Index joI) const + { { return((*_m_O[jaI])(sucSI,joI)); } } + +}; + +/** \brief OGet_ObservationModelMappingSparse can be used for direct + * access to a ObservationModelMappingSparse. */ +class OGet_ObservationModelMappingSparse : public OGet +{ + +private: + std::vector _m_O; +public: + OGet_ObservationModelMappingSparse( ObservationModelMappingSparse* om) + { + _m_O = om->_m_O; + }; + + virtual double Get(Index jaI, Index sucSI, Index joI) const + { { return((*_m_O[jaI])(sucSI,joI)); } } + +}; + +class OGet_EventObservationModelMapping : public OGet +{ + +private: + std::vector > _m_O; +public: + OGet_EventObservationModelMapping( EventObservationModelMapping* om) + { + _m_O = om->_m_O; + }; + + virtual double Get(Index jaI, Index sucSI, Index joI) const + { throw E("Cannot refer to an Event Observation Model with (o,s',a). Use Get(s,a,s',o) instead."); } + + virtual double Get(Index sI, Index jaI, Index sucSI, Index joI) const + { { return((*_m_O[jaI][sI])(sucSI,joI)); } } + +}; + +class OGet_EventObservationModelMappingSparse : public OGet +{ + +private: + std::vector > _m_O; +public: + OGet_EventObservationModelMappingSparse( EventObservationModelMappingSparse* om) + { + _m_O = om->_m_O; + }; + + virtual double Get(Index jaI, Index sucSI, Index joI) const + { throw E("Cannot refer to an Event Observation Model with (o,s',a). Use Get(s,a,s',o) instead."); } + + virtual double Get(Index sI, Index jaI, Index sucSI, Index joI) const + { { return((*_m_O[jaI][sI])(sucSI,joI)); } } + +}; + +#endif /* !_OGET_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/Observation.h b/payntbind/src/synthesis/decpomdp/madp/src/base/Observation.h new file mode 100644 index 000000000..5f7a3669e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/Observation.h @@ -0,0 +1,46 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OBSERVATION_H_ +#define _OBSERVATION_H_ 1 + +/* the include directives */ +#include +#include + +#include "NamedDescribedEntity.h" + +/// Observation represents observations. +class Observation : public NamedDescribedEntity +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + Observation(const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")) : + NamedDescribedEntity(name, description){}; + +}; + +#endif /* !_OBSERVATION_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationDiscrete.h new file mode 100644 index 000000000..054c43e82 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationDiscrete.h @@ -0,0 +1,54 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OBSERVATIONDISCRETE_H_ +#define _OBSERVATIONDISCRETE_H_ 1 + +/* the include directives */ +#include +#include + +#include "Globals.h" +#include "Observation.h" +#include "DiscreteEntity.h" + +/// ObservationDiscrete represents discrete observations. +/** + * ObservationDiscrete is a class that represent observations in a + * discrete observation set, which are identified by their index. */ +class ObservationDiscrete : public Observation, + public DiscreteEntity +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + ObservationDiscrete(Index i=INDEX_MAX, + const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")) : + Observation(name, description), + DiscreteEntity(i){}; + +}; + +#endif /* !_OBSERVATIONDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModel.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModel.h new file mode 100644 index 000000000..0bd3a8431 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModel.h @@ -0,0 +1,51 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OBSERVATIONMODEL_H_ +#define _OBSERVATIONMODEL_H_ 1 + +/* the include directives */ + +#include +#include "Globals.h" + +/// ObservationModel represents the observation model in a decision process. +class ObservationModel +{ +private: + +protected: + +public: + /// default Constructor + ObservationModel(){}; + + /// Destructor. + virtual ~ObservationModel(){} + + /// Returns a pointer to a copy of this class. + virtual ObservationModel* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; + void Print() const + {std::cout << SoftPrint();} +}; + +#endif /* !_OBSERVATIONMODEL_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.cpp new file mode 100644 index 000000000..8bf510117 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.cpp @@ -0,0 +1,94 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "ObservationModelDiscrete.h" +#include + +using namespace std; + +//Default constructor +ObservationModelDiscrete::ObservationModelDiscrete(int nrS, + int nrJA, + int nrJO) : + _m_nrStates(nrS), + _m_nrJointActions(nrJA), + _m_nrJointObservations(nrJO) +{ +} + +//Destructor +ObservationModelDiscrete::~ObservationModelDiscrete() +{ +} + +string ObservationModelDiscrete::SoftPrint() const +{ + stringstream ss; + double p; + ss << "jo\tja\ts'\tP (tuples with P==0 are not printed)"<0) + ss << jo_i << "\t" << ja_i << "\t" << s_ip << "\t" << p + << endl; + } + return(ss.str()); +} + +Index ObservationModelDiscrete::SampleJointObservation(Index jaI, Index sucI) +{ + double randNr=rand() / (RAND_MAX + 1.0); + + double sum=0; + Index jo=0; + int i; + + for(i=0;i<_m_nrJointObservations;i++) + { + sum+=Get(jaI,sucI,i); + if(randNr<=sum) + { + jo=i; + break; + } + } + + return(jo); +} + +Index ObservationModelDiscrete::SampleJointObservation(Index sI, Index jaI, Index sucI) +{ + double randNr=rand() / (RAND_MAX + 1.0); + + double sum=0; + Index jo=0; + int i; + + for(i=0;i<_m_nrJointObservations;i++) + { + sum+=Get(sI,jaI,sucI,i); + if(randNr<=sum) + { + jo=i; + break; + } + } + + return(jo); +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.h new file mode 100644 index 000000000..f1802c0bc --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscrete.h @@ -0,0 +1,64 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OBSERVATIONMODELDISCRETE_H_ +#define _OBSERVATIONMODELDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "ObservationModelDiscreteInterface.h" + +/// ObservationModelDiscrete represents a discrete observation model. +class ObservationModelDiscrete : public ObservationModelDiscreteInterface +{ +private: + + /// The number of states. + int _m_nrStates; + /// The number of joint actions. + int _m_nrJointActions; + /// The number of joint observations + int _m_nrJointObservations; + +protected: + +public: + /// Constructor with the dimensions of the observation model. + ObservationModelDiscrete(int nrS = 1, int nrJA = 1, int nrJO = 1); + + /// Destructor. + virtual ~ObservationModelDiscrete(); + + /// Sample a joint observation. + Index SampleJointObservation(Index jaI, Index sucI); + + /// Sample a joint observation. + Index SampleJointObservation(Index sI, Index jaI, Index sucI); + + /// Returns a pointer to a copy of this class. + virtual ObservationModelDiscrete* Clone() const = 0; + + /// SoftPrints tabular observation model. + std::string SoftPrint() const; +}; + + +#endif /* !_OBSERVATIONMODELDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscreteInterface.h new file mode 100644 index 000000000..328e54ea5 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelDiscreteInterface.h @@ -0,0 +1,65 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OBSERVATIONMODELDISCRETEINTERFACE_H_ +#define _OBSERVATIONMODELDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "ObservationModel.h" + +/// ObservationModelDiscreteInterface represents a discrete observation model. +class ObservationModelDiscreteInterface : public ObservationModel +{ +private: + +protected: + +public: + /// Constructor with the dimensions of the observation model. + ObservationModelDiscreteInterface(){}; + + /// Destructor. + virtual ~ObservationModelDiscreteInterface(){}; + + /// Returns P(jo|ja,s') + virtual double Get(Index ja_i, Index suc_s_i, Index jo_i) const = 0; + virtual double Get(Index s_i, Index ja_i, Index suc_s_i, Index jo_i) const + {return Get(ja_i, suc_s_i, jo_i); } + + //data manipulation funtions: + /// Sets P(o|ja,s') + /** Index jo_i, Index ja_i, Index suc_s_i, are indices of the + * joint observation, taken joint action and resulting successor + * state. prob is the probability. The order of events is ja, s', + * o, so is the arg. list + */ + virtual void Set(Index ja_i, Index suc_s_i, Index jo_i, double prob) = 0; + virtual void Set(Index s_i, Index ja_i, Index suc_s_i, Index jo_i, double prob) + {Set(ja_i, suc_s_i, jo_i, prob); } + + /// Returns a pointer to a copy of this class. + virtual ObservationModelDiscreteInterface* Clone() const = 0; + +}; + + +#endif /* !_OBSERVATIONMODELDISCRETEINTERFACE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.cpp new file mode 100644 index 000000000..36ed9b2ed --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.cpp @@ -0,0 +1,51 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "ObservationModelMapping.h" + +using namespace std; + +ObservationModelMapping::ObservationModelMapping(int nrS, int nrJA, + int nrJO) : + ObservationModelDiscrete(nrS, nrJA, nrJO) +{ + Matrix *O; + for(int a=0;a!=nrJA;++a) + { + O=new Matrix(nrS,nrJO); + O->clear(); + _m_O.push_back(O); + } +} + +ObservationModelMapping:: +ObservationModelMapping(const ObservationModelMapping& OM) : + ObservationModelDiscrete(OM) +{ + Matrix *O; + for(unsigned int a=0;a!=OM._m_O.size();++a) + { + O=new Matrix(*OM._m_O[a]); + _m_O.push_back(O); + } +} + +ObservationModelMapping::~ObservationModelMapping() +{ + for(vector::iterator it=_m_O.begin(); + it!=_m_O.end(); ++it) + delete(*it); + _m_O.clear(); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.h new file mode 100644 index 000000000..88f373295 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMapping.h @@ -0,0 +1,82 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OBSERVATIONMODELMAPPING_H_ +#define _OBSERVATIONMODELMAPPING_H_ 1 + +/* the include directives */ +#include "boost/numeric/ublas/matrix.hpp" +#include "Globals.h" +#include "ObservationModelDiscrete.h" +class OGet; +class OGet_ObservationModelMapping; + +/// ObservationModelMapping implements an ObservationModelDiscrete. +/** Uses full matrices. */ +class ObservationModelMapping : + public ObservationModelDiscrete +{ +public: + + typedef boost::numeric::ublas::matrix Matrix; + +private: + + std::vector _m_O; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + ObservationModelMapping(int nrS = 1, int nrJA = 1, int nrJO = 1); + + /// Copy constructor. + ObservationModelMapping(const ObservationModelMapping& OM); + /// Destructor. + ~ObservationModelMapping(); + + /// Returns P(jo|ja,s') + double Get(Index ja_i, Index suc_s_i, Index jo_i) const + { return((*_m_O[ja_i])(suc_s_i,jo_i)); } + + //data manipulation funtions: + /// Sets P(o|ja,s') + /** jo_i, Index ja_i, Index suc_s_i, are indices of the joint + * observation, taken joint action and resulting successor + * state. prob is the probability. The order of events is ja, s', + * o, so is the arg. list + */ + void Set(Index ja_i, Index suc_s_i, Index jo_i, double prob) + { (*_m_O[ja_i])(suc_s_i,jo_i)=prob; } + + /// Get a pointer to a transition matrix for a particular action. + const Matrix* GetMatrixPtr(Index a) const + { return(_m_O.at(a)); } + + /// Returns a pointer to a copy of this class. + virtual ObservationModelMapping* Clone() const + { return new ObservationModelMapping(*this); } + + friend class OGet_ObservationModelMapping; +}; + +#endif /* !_OBSERVATIONMODELMAPPING_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.cpp new file mode 100644 index 000000000..18caa0850 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.cpp @@ -0,0 +1,51 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "ObservationModelMappingSparse.h" + +using namespace std; + +ObservationModelMappingSparse::ObservationModelMappingSparse(int nrS, + int nrJA, + int nrJO) : + ObservationModelDiscrete(nrS, nrJA, nrJO) +{ + SparseMatrix *O; + for(int a=0;a!=nrJA;++a) + { + O=new SparseMatrix(nrS,nrJO); + _m_O.push_back(O); + } +} + +ObservationModelMappingSparse:: +ObservationModelMappingSparse(const ObservationModelMappingSparse& OM) : + ObservationModelDiscrete(OM) +{ + SparseMatrix *O; + for(unsigned int a=0;a!=OM._m_O.size();++a) + { + O=new SparseMatrix(*OM._m_O[a]); + _m_O.push_back(O); + } +} + +ObservationModelMappingSparse::~ObservationModelMappingSparse() +{ + for(vector::iterator it=_m_O.begin(); + it!=_m_O.end(); ++it) + delete(*it); + _m_O.clear(); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.h b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.h new file mode 100644 index 000000000..a33925edd --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/ObservationModelMappingSparse.h @@ -0,0 +1,93 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _OBSERVATIONMODELMAPPINGSPARSE_H_ +#define _OBSERVATIONMODELMAPPINGSPARSE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "ObservationModelDiscrete.h" +#include "boost/numeric/ublas/matrix_sparse.hpp" +class OGet; +class OGet_ObservationModelMapping; + +/// ObservationModelMappingSparse implements an ObservationModelDiscrete. +/** Uses sparse matrices. */ +class ObservationModelMappingSparse : + public ObservationModelDiscrete +{ +public: +#if BOOST_1_32_OR_LOWER // they renamed sparse_vector to mapped_vector + typedef boost::numeric::ublas::sparse_matrix SparseMatrix; +#else + typedef boost::numeric::ublas::compressed_matrix SparseMatrix; +#endif + + +private: + + std::vector _m_O; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + ObservationModelMappingSparse(int nrS = 1, int nrJA = 1, int nrJO = 1); + + /// Copy constructor. + ObservationModelMappingSparse(const ObservationModelMappingSparse& OM); + /// Destructor. + ~ObservationModelMappingSparse(); + + /// Returns P(jo|ja,s') + double Get(Index ja_i, Index suc_s_i, Index jo_i) const + { return((*_m_O[ja_i])(suc_s_i,jo_i)); } + + //data manipulation funtions: + /// Sets P(o|ja,s') + /** jo_i, Index ja_i, Index suc_s_i, are indices of the joint + * observation, taken joint action and resulting successor + * state. prob is the probability. The order of events is ja, s', + * o, so is the arg. list + */ + void Set(Index ja_i, Index suc_s_i, Index jo_i, double prob) + { + // make sure probability is not 0 + if(prob > PROB_PRECISION) + (*_m_O[ja_i])(suc_s_i,jo_i)=prob; + // check if we already defined this element, if so remove it + else if((*_m_O[ja_i])(suc_s_i,jo_i)>PROB_PRECISION) + (*_m_O[ja_i]).erase_element(suc_s_i,jo_i); + } + + /// Get a pointer to a transition matrix for a particular action. + const SparseMatrix* GetMatrixPtr(Index a) const + { return(_m_O.at(a)); } + + /// Returns a pointer to a copy of this class. + virtual ObservationModelMappingSparse* Clone() const + { return new ObservationModelMappingSparse(*this); } + + friend class OGet_ObservationModelMappingSparse; +}; + +#endif /* !_OBSERVATIONMODELMAPPINGSPARSE_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/PDDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/PDDiscreteInterface.h new file mode 100644 index 000000000..a61aaf963 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/PDDiscreteInterface.h @@ -0,0 +1,69 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _PDDISCRETEINTERFACE_H_ +#define _PDDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include "Globals.h" + +/** \brief PDDiscreteInterface is an abstract base class that represents + * a joint probability distribution \f$ Pr(x_1,\dots,x_k ) \f$. + * + * The interface (so far) only implements Get. + * In the future + * \li we may want to add a function that allows multiplication of + * PDDiscreteInterface's ? + * \li I think it might not be convenient to add Set() functions (each + * implementation might use different set functions? E.g. a PD based on rules + * may use quite a different mechanism to set probabilities than a CPT) + * + * */ +class PDDiscreteInterface +{ +private: + +protected: + +public: + /// Destructor. + virtual ~PDDiscreteInterface(){}; + + ///return the probability \f$ Pr(x) \f$ + virtual double Get(Index x) const = 0; + virtual double Get(const std::vector& indices) const = 0; + virtual double Get(const Scope& sc, + const std::vector& indices_sc) const=0; + + ///set the probability \f$ Pr(x) \f$ of x + virtual void Set(Index x, double p) = 0; + + /// Returns a (joint index of an) x drawn \f$ P(x) \f$ + virtual Index Sample() const = 0; + + virtual void SanityCheck() const = 0; + + /// Returns a pointer to a copy of this class. + virtual PDDiscreteInterface* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; +}; + +#endif /* !_PDDISCRETEINTERFACE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.cpp new file mode 100644 index 000000000..9806af636 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.cpp @@ -0,0 +1,43 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "POMDPDiscrete.h" + +using namespace std; + +//Default constructor +POMDPDiscrete::POMDPDiscrete(const std::string &name, + const std::string &descr, + const std::string &pf) : + DecPOMDPDiscrete(name,descr,pf) +{ + this->SetNrAgents(1); +} +//Copy constructor. +// POMDPDiscrete::POMDPDiscrete(const POMDPDiscrete& o) +// { +// } +//Destructor +//POMDPDiscrete::~POMDPDiscrete() +//{ +//} +//Copy assignment operator +// POMDPDiscrete& POMDPDiscrete::operator= (const POMDPDiscrete& o) +// { +// if (this == &o) return *this; // Gracefully handle self assignment +// // Put the normal assignment duties here... + +// return *this; +// } diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.h new file mode 100644 index 000000000..9b317e6de --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POMDPDiscrete.h @@ -0,0 +1,74 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _POMDPDISCRETE_H_ +#define _POMDPDISCRETE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "DecPOMDPDiscrete.h" + +/** \brief POMDPDiscrete models discrete POMDPs. It is basically a + * wrapper for a Dec-POMDP with a single agent. + * + **/ +class POMDPDiscrete : public DecPOMDPDiscrete +{ + private: + + + protected: + static const int SINGLE_AGENT_INDEX = 0; + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + /** Constructor that sets the name, description, and problem file, + * and subsequently loads this problem file. */ + POMDPDiscrete(const std::string &name="received unspec. by POMDPDiscrete", + const std::string &descr="received unspec. by POMDPDiscrete", + const std::string &pf="received unspec. by POMDPDiscrete"); + + /// Copy constructor. + /// POMDPDiscrete(const POMDPDiscrete& a); + /// Destructor. + virtual ~POMDPDiscrete(){}; + // Copy assignment operator +// POMDPDiscrete& operator= (const POMDPDiscrete& o); + + size_t GetNrSingleAgentActions() const { return(GetNrActions(SINGLE_AGENT_INDEX)); } + size_t GetNrSingleAgentObservations() const { return(GetNrObservations(SINGLE_AGENT_INDEX)); } + + ///set the number of actions for the single agent + void SetNrSingleAgentActions( size_t nrA ) + { this->SetNrActions(SINGLE_AGENT_INDEX, nrA); } + ///add an action for the single agent + void AddSingleAgentAction(const std::string &name, const std::string &description="") + { this->AddAction(SINGLE_AGENT_INDEX, name, description); } + ///set the number of obversations for the POMDP (single agent) + void SetNrSingleAgentObservations( size_t nrO ) + { this->SetNrObservations(SINGLE_AGENT_INDEX, nrO); } + ///add an observation for the single agent + void AddSingleAgentObservation(const std::string &name, const std::string &description="") + { this->AddObservation(SINGLE_AGENT_INDEX, name, description); } +}; + + +#endif /* !_POMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POSG.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/POSG.cpp new file mode 100644 index 000000000..564004af5 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POSG.cpp @@ -0,0 +1,84 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "POSG.h" + +using namespace std; + +POSG::POSG() +{ + _m_initialized = false; + _m_nrAgents = 0; + //we can not call GetNrAgents from here... + //instead this object will have to have the number of agents set and be + //initialized. + //_m_rewardType = vector(GetNrAgents(), REWARD); + //_m_discount = vector(GetNrAgents(), 1.0); +} + +void POSG::SetDiscount(Index agentI, double d) +{ + if(d>=0 && d<=1) + _m_discount.at(agentI)=d; + else + throw(E("POSG::SetDiscount() discount not valid, should be >=0 and <=1")); +} + +string POSG::SoftPrint() const +{ + stringstream ss; + ss << "Discount factors: " << + PrintTools::SoftPrintVector(_m_discount) << endl; + ss << "Reward type: " << + PrintTools::SoftPrintVector(_m_rewardType) << endl; + return ss.str(); +} + +void POSG::SetRewardType(Index agentI, reward_t r) +{ + if(r!=REWARD) + throw(E("POSG::SetRewardType only reward type REWARD is supported")); + _m_rewardType.at(agentI) = r; +} + +///changed initialized status +bool POSG::SetInitialized(bool b) +{ + if(_m_nrAgents == 0) + { + throw E("POSG::SetInitialized failed because POSG doesn't know the \ +number of agents yet. (use SetNrAgents first!)"); + } + + //do some checks? + _m_initialized = true; + return(true); +} + +///Sets the number of agents +void POSG::SetNrAgents (size_t nrAgents) +{ + if(_m_initialized) + { + //do some de-initialization things ? + _m_initialized = false; + _m_nrAgents = nrAgents; + } + + _m_discount = vector(_m_nrAgents, 1.0); + _m_rewardType = vector(_m_nrAgents, REWARD); + +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POSG.h b/payntbind/src/synthesis/decpomdp/madp/src/base/POSG.h new file mode 100644 index 000000000..8ceb793f9 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POSG.h @@ -0,0 +1,87 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#ifndef _POSG_H_ +#define _POSG_H_ 1 + +/* the include directives */ + +#include +#include "Globals.h" +#include "POSGInterface.h" + + +/**\brief POSG is a simple implementation of POSGInterface. + * + * It defines a couple of functions that relate to the (types of) + * rewards and discount factors for each agent. + * + * Conceptually an MultiAgentDecisionProcess that implements this interface, is + * a POSG: each agent his its own reward function. + */ +class POSG : + virtual public POSGInterface +{ + private: + /// is this initialized? + bool _m_initialized; + /// the number of agents + size_t _m_nrAgents; + + /// The discount parameter. + /** When agents have different interests (the POSG setting), + * they may also have different discount factors. + * For a + * POSG, however, we have one global discount factor + * (which typically is 1.0 in the finite horizon case). + **/ + std::vector _m_discount; + /// Do the agents get rewards or costs? + std::vector _m_rewardType; + protected: + + public: + + // constructors etc. + // Default constructor. sets initialized to false + POSG(); + + ///changed initialized status + bool SetInitialized(bool b); + + ///Sets the number of agents + void SetNrAgents (size_t nrAgents); + + /// Sets the discount parameter of \a agentI to \a d. + void SetDiscount(Index agentI, double d); + /// Returns the discount parameter for agent \a agentI. + double GetDiscount(Index agentI) const {return _m_discount.at(agentI);} + /// Sets the reward type to reward_t r. + /** At the moment only REWARD is supported. */ + void SetRewardType(Index agentI, reward_t r); + /// Returns the reward type. + reward_t GetRewardType(Index agentI) const + {return _m_rewardType.at(agentI);} + + /// SoftPrints some information on the POSG. + std::string SoftPrint() const; + +}; + +#endif //! _POSG_H_ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.cpp new file mode 100644 index 000000000..2250388de --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.cpp @@ -0,0 +1,107 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "POSGDiscrete.h" + +using namespace std; + +#define DEBUG_DPOMDPD 0 +//Debug Fill POSG Discrete - functions to initialize the POSGDiscrete +//(typically from a file) +#define DEBUG_FILLDPD 0 + +POSGDiscrete::POSGDiscrete( + const string &name, const string &descr, const string &pf) : + MultiAgentDecisionProcessDiscrete(name, descr, pf) +{ + _m_initialized = false; + size_t nrAgents = GetNrAgents(); + POSG::SetNrAgents(nrAgents); + POSG::SetInitialized(true); + _m_p_rModel = vector(nrAgents, 0); + +} + +POSGDiscrete::~POSGDiscrete() +{ + if(DEBUG_DPOMDPD) + cout << "deleting POSGDiscrete (deleting rewards)"<::iterator it = _m_p_rModel.begin(); + it != _m_p_rModel.end(); it++) + delete *it; +} + +bool POSGDiscrete::SetInitialized(bool b) +{ + if( MultiAgentDecisionProcessDiscrete::SetInitialized(true) ) + { + _m_initialized = b; + return(true); + } + else + return(false); +} + +void POSGDiscrete::CreateNewRewardModel + (Index agentI, size_t nrS, size_t nrJA) +{ + if(_m_initialized) + delete(_m_p_rModel.at(agentI)); + + _m_p_rModel.at(agentI) = new RewardModelMapping( nrS, nrJA); +} + +string POSGDiscrete::SoftPrint() const +{ + stringstream ss; + ss << MultiAgentDecisionProcessDiscrete::SoftPrint(); + ss << POSG::SoftPrint(); + + if(_m_initialized) + { + for(Index agentI = 0; agentI < GetNrAgents(); agentI++) + { + ss << "Reward model for agent "<SoftPrint() << endl; + } + } + else + throw E("POSGDiscrete components (reward model) not initialized"); + + return(ss.str()); +} + +double POSGDiscrete::GetReward(Index agentI, State* s, JointAction* ja) + const +{ + return GetReward(agentI, + ((StateDiscrete*)s)->GetIndex(), + ((JointActionDiscrete*)ja)->GetIndex()); +} + +void POSGDiscrete::SetReward(Index agentI, Index sI, Index jaI, + Index sucSI, double r) +{ + double rOld=GetReward(agentI, sI,jaI), + rExp=GetTransitionProbability(sI,jaI,sucSI)*r; + SetReward(agentI, sI,jaI,rOld+rExp); +} + +void POSGDiscrete::SetReward(Index agentI, Index sI, Index jaI, Index sucSI, + Index joI, double r) +{ + throw(E("POSGDiscrete::SetReward(agentI, sI,jaI,sucSI,joI,r) not implemented")); +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.h new file mode 100644 index 000000000..e47eb7723 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscrete.h @@ -0,0 +1,113 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _POSGDISCRETE_H_ +#define _POSGDISCRETE_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" +#include "MultiAgentDecisionProcessDiscrete.h" +#include "RewardModelMapping.h" +#include "POSG.h" +#include "POSGDiscreteInterface.h" + +/**\brief POSGDiscrete represent a discrete POSG model. + * + * It implements POSGDiscreteInterface. + * + * Also it inherits + * -MultiAgentDecisionProcessDiscrete + * -POSG + * + * and thus implements + * -POSGInterface + * -MultiAgentDecisionProcessDiscreteInterface + * -MultiAgentDecisionProcessInterface + * */ +class POSGDiscrete : + virtual public POSGDiscreteInterface, + public MultiAgentDecisionProcessDiscrete, + public POSG +{ + private: + ///Boolean that tracks whether this POSG is initialized. + bool _m_initialized; + + protected: + + /// The reward model used by POSGDiscrete is a RewardModelMapping + std::vector _m_p_rModel; + + public: + + // Constructor, destructor and copy assignment. + /// Default constructor. + /** Constructor that sets the name, description, and problem file, + * and subsequently loads this problem file. */ + POSGDiscrete(const std::string &name="received unspec. by POSGDiscrete", + const std::string &descr="received unspec. by POSGDiscrete", + const std::string &pf="received unspec. by POSGDiscrete"); + /// Destructor. + ~POSGDiscrete(); + + //data manipulation (set) functions: + /// Sets _m_initialized to b. + /** When setting to true, a verification of member elements is + * performed. (i.e. a check whether all vectors have the + * correct size and non-zero entries) */ + bool SetInitialized(bool b); + /// Creates a new reward model. + void CreateNewRewardModel( Index agentI, size_t nrS, size_t nrJA); + /// Set the reward for state, joint action indices + void SetReward(Index agentI, Index sI, Index jaI, double r) + { _m_p_rModel.at(agentI)->Set(sI, jaI, r);} + + /// Set the reward for state, joint action , suc. state indices + void SetReward(Index agentI, Index sI, Index jaI, Index sucSI, + double r); + + /// Set the reward for state, joint action, suc.state, joint obs indices + void SetReward(Index agentI, Index sI, Index jaI, Index sucSI, + Index joI, double r); + + // 'get' functions: + /// Return the reward for state, joint action indices + double GetReward(Index agentI, Index sI, Index jaI) const + { return(_m_p_rModel.at(agentI)->Get(sI, jaI));} + + /// Prints some information on the POSGDiscrete. + std::string SoftPrint() const; + + //We need to implement this for POSG: + double GetReward(Index agentI, State* s, JointAction* ja) const; + + /// Get a pointer to the reward model. + RewardModelMapping* GetRewardModelPtr(Index agentI) const + { return(_m_p_rModel.at(agentI)); } + + /// Returns a copy of this class. + virtual POSGDiscrete* Clone() const = 0; + +}; + +#endif /* !_POSGDISCRETE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscreteInterface.h new file mode 100644 index 000000000..4bb371e08 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGDiscreteInterface.h @@ -0,0 +1,79 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _POSGDISCRETEINTERFACE_H_ +#define _POSGDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" +#include "MultiAgentDecisionProcessDiscreteInterface.h" +#include "POSGInterface.h" + +/**\brief POSGDiscreteInterface is the interface for + * a discrete POSG model: it defines the set/get reward functions. + * + * POSGDiscreteInterface is an interface (i.e. pure abstract class) for + * a discrete POSG model. This means that there is a single reward function + * and that states, actions and observations are discrete. + * + * Classes that implement this interface are, for instance, POSGDiscrete + * and TransitionObservationIndependentPOSGDiscrete. + **/ +class POSGDiscreteInterface : + virtual public MultiAgentDecisionProcessDiscreteInterface, + virtual public POSGInterface +{ + private: + + protected: + + public: + /// Destructor. + virtual ~POSGDiscreteInterface() {}; + + /// Creates a new reward model mapping. + virtual void CreateNewRewardModelForAgent( + Index agentI) = 0; + /// Set the reward for state, joint action indices + virtual void SetRewardForAgent(Index agentI, Index sI, Index jaI, + double r) = 0; + + /// Set the reward for state, joint action , suc. state indices + virtual void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, double r) = 0; + + /// Set the reward for state, joint action, suc.state, joint obs indices + virtual void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, Index joI, double r) = 0; + + // 'get' functions: + /// Return the reward for state, joint action indices + virtual double GetRewardForAgent(Index agentI, Index sI, Index jaI) + const = 0; + + /// Returns a copy of this class. + virtual POSGDiscreteInterface* Clone() const = 0; + +}; + +#endif /* !_POSGDISCRETEINTERFACE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/POSGInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGInterface.h new file mode 100644 index 000000000..d3e5fa0e9 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/POSGInterface.h @@ -0,0 +1,75 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#ifndef _POSGINTERFACE_H_ +#define _POSGINTERFACE_H_ 1 + +/* the include directives */ + +#include +#include "Globals.h" +#include "MultiAgentDecisionProcessInterface.h" +class State; +class JointAction; + + + +/**\brief POSGInterface is an interface for POSGs. It declares a couple + * of function that relate to the (types of) rewards and discount factor. + * + * Conceptually an MultiAgentDecisionProcess that implements this interface, is + * a POSG: each agent has its own reward function. + */ +class POSGInterface : virtual public MultiAgentDecisionProcessInterface +{ + private: + + protected: + + public: + + /// Virtual destructor. + virtual ~POSGInterface() {}; + + /// Sets the discount parameter to 0 < d <= 1. + virtual void SetDiscountForAgent(Index agentI, double d) = 0; + + /// Returns the discount parameter. + virtual double GetDiscountForAgent(Index agentI) const = 0; + + /// Sets the reward type to reward_t r. + virtual void SetRewardTypeForAgent(Index agentI, reward_t r) = 0; + + /// Returns the reward type. + virtual reward_t GetRewardTypeForAgent(Index agentI) const = 0; + + /// Function that sets the reward for an agent, state and joint action. + /** This should be very generic.*/ + virtual void SetRewardForAgent(Index agentI, State* s, JointAction* ja, + double r) = 0; + /// Function that returns the reward for a state and joint action. + /** This should be very generic.*/ + virtual double GetRewardForAgent(Index agentI, State* s, + JointAction* ja) const = 0; + + /// Returns a pointer to a copy of this class. + virtual POSGInterface* Clone() const = 0; +}; + +#endif //! _POSGINTERFACE_H_ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/PrintTools.h b/payntbind/src/synthesis/decpomdp/madp/src/base/PrintTools.h new file mode 100644 index 000000000..842e9c24a --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/PrintTools.h @@ -0,0 +1,334 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#ifndef _PRINTTOOLS_H_ +#define _PRINTTOOLS_H_ 1 + +#include +#include +#include +#include +#include "boost/numeric/ublas/vector.hpp" +#include "boost/numeric/ublas/vector_sparse.hpp" +#include "boost/bimap.hpp" +#include "boost/unordered_map.hpp" +#include "boost/unordered_set.hpp" + +/// adding general stream operator for vector +template +std::ostream &operator <<(std::ostream &os, const std::vector &v) +{ + os<<'<'; + for (typename std::vector::const_iterator it=v.begin();it!=v.end();it++) + { + if (it!=v.begin()) os<<','; + os<<*it; + } + os<<'>'; + return os; +} + +/// adding general stream operator for set +template +std::ostream &operator <<(std::ostream &os, const std::set &s) +{ + os<<'{'; + for (typename std::set::const_iterator it=s.begin();it!=s.end();it++) + { + if (it!=s.begin()) os<<','; + os<<*it; + } + os<<'}'; + return os; +} + +/// adding general stream operator for map +template + std::ostream &operator <<(std::ostream &os, const std::map &m) +{ + os<<'['; + for (typename std::map::const_iterator it=m.begin();it!=m.end();it++) + { + if (it!=m.begin()) os<<','; + os<first<<"=>"<second; + } + os<<']'; + return os; +} + +/// adding general stream operator for bimap +template + std::ostream &operator <<(std::ostream &os, const boost::bimap &b) +{ + os<<'['; + typename boost::bimap::left_map::const_iterator it; + for (it=b.left.begin();it!=b.left.end();it++) + { + if (it!=b.left.begin()) os<<','; + os<first<<"<=>"<second; + } + os<<']'; + return os; +} + +/// adding general stream operator for unordered_map +template +std::ostream &operator <<(std::ostream &os, const boost::unordered_map &s) +{ + os<<"{"; + for (typename boost::unordered_map::const_iterator it=s.begin();it!=s.end();it++) + { + if (it!=s.begin()) os<<","; + os<first<<":"<second; + } + os<<"}"< +std::ostream &operator <<(std::ostream &os, const boost::unordered_set &s) +{ + os<<"<"; + for (typename boost::unordered_set::const_iterator it=s.begin();it!=s.end();it++) + { + if (it!=s.begin()) os<<","; + os<<*it; + } + os<<">"< +static std::string SoftPrintVector(const T &v) +{ + std::stringstream ss; + ss << v; + return(ss.str()); +} + +template +static std::string SoftPrintVector(const std::vector &v) +{ + std::stringstream ss; + typename std::vector::const_iterator it = v.begin(); + typename std::vector::const_iterator last = v.end(); + ss << "< "; + while(it != last) + { + if(it != v.begin()) + ss << ", "; + + ss << SoftPrintVector(*it); + it++; + } + ss << " >"; + return(ss.str()); +} + + +template +static std::string SoftPrintSet(const T &v) +{ + std::stringstream ss; + ss << v; + return(ss.str()); +} + +template +static std::string SoftPrintSet(const std::set &v) +{ + std::stringstream ss; + typename std::set::const_iterator it = v.begin(); + typename std::set::const_iterator last = v.end(); + ss << "{ "; + while(it != last) + { + if(it != v.begin()) + ss << ", "; + + ss << SoftPrintSet(*it); + it++; + } + ss << " }"; + return(ss.str()); +} + + +template +static std::string SoftPrintVector(const boost::numeric::ublas::mapped_vector &v) +{ + std::stringstream ss; + typename boost::numeric::ublas::mapped_vector::const_iterator it = v.begin(); + typename boost::numeric::ublas::mapped_vector::const_iterator last = v.end(); + ss << "< "; + while(it != last) + { + if(it != v.begin()) + ss << ", "; + + ss << it.index() << ":" << *it; + it++; + } + ss << " >"; + return(ss.str()); +} + + +template +static std::string SoftPrintVector(const boost::numeric::ublas::compressed_vector &v) +{ + std::stringstream ss; + typename boost::numeric::ublas::mapped_vector::const_iterator it = v.begin(); + typename boost::numeric::ublas::mapped_vector::const_iterator last = v.end(); + ss << "< "; + while(it != last) + { + if(it != v.begin()) + ss << ", "; + + ss << it.index() << ":" << *it; + it++; + } + ss << " >"; + return(ss.str()); +} + + +template +static std::string SoftPrintVector(const boost::numeric::ublas::coordinate_vector &v) +{ + std::stringstream ss; + typename boost::numeric::ublas::mapped_vector::const_iterator it = v.begin(); + typename boost::numeric::ublas::mapped_vector::const_iterator last = v.end(); + ss << "< "; + while(it != last) + { + if(it != v.begin()) + ss << ", "; + + ss << it.index() << ":" << *it; + it++; + } + ss << " >"; + return(ss.str()); +} + + +/**Prints a vector using cout - i.e. the data type can be written to cout using + * << (operator<< must be defined for T).*/ +template +static void PrintVectorCout(const T &v) +{ + std::cout << v; +} + +template +static void PrintVectorCout(const std::vector &v) +{ + std::cout << SoftPrintVector(v) << std::endl; +} + +template +static void PrintVectorCout(const boost::numeric::ublas::mapped_vector &v) +{ + std::cout << SoftPrintVector(v) << std::endl; +} + +template +static void PrintVectorCout(const boost::numeric::ublas::compressed_vector &v) +{ + std::cout << SoftPrintVector(v) << std::endl; +} + +template +static void PrintVectorCout(const boost::numeric::ublas::coordinate_vector &v) +{ + std::cout << SoftPrintVector(v) << std::endl; +} + + +template +static void PrintCout(const T &v) +{ + std::cout << v; +} + +template +static void PrintCout(const std::vector &v) +{ + PrintVectorCout(v); +} + +template +static void PrintCout(const std::set &v) +{ + std::cout << SoftPrint(v); +} + +template +static std::string SoftPrint(const T &v) +{ + std::stringstream ss; + ss << v; + return(ss.str()); +} + +template +static std::string SoftPrint(const std::vector &v) +{ + return(SoftPrintVector(v)); +} + +template +static std::string SoftPrint(const std::set &v) +{ + std::stringstream ss; + typename std::set::const_iterator it = v.begin(); + typename std::set::const_iterator last = v.end(); + ss << "< "; + while(it != last) + { + if(it != v.begin()) + ss << ", "; + + ss << SoftPrint(*it); + it++; + } + ss << " >"; + return(ss.str()); +} + +template +static void PrintProgress(T prefix, LIndex i, + LIndex nr, size_t interval) +{ + if(i % interval == 0&& i > interval) + { + std::cout << prefix << " "<< i << " of " << nr << " - " + << std::setprecision(4) + << (CastLIndexToDouble(i) / CastLIndexToDouble(nr)) * 100 + << "%" << std::endl; + } +} + +} + +#endif /* !_PRINTTOOLS_H_ */ diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/QTableInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/QTableInterface.h new file mode 100644 index 000000000..43f219899 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/QTableInterface.h @@ -0,0 +1,55 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _QTABLEINTERFACE_H_ +#define _QTABLEINTERFACE_H_ 1 + +/* the include directives */ +#include "Globals.h" + +/** \brief QTableInterface is the abstract base class for Q(., a) functions. + * It represents functions mapping from some domain (e.g. states, local states, + * histories, etc.) and some action domain (individual, joint or group actions) + * to a real number representing some form of payoff (long term reward, or + * immediate reward). + * + * Note the argument of the functions defined here assume Q(s,a), but is + * should be clear that for s_i any general domain index may be used. + * + * */ +class QTableInterface +{ + private: + + protected: + + public: + virtual double Get(Index s_i, Index ja_i) const = 0; + virtual void Set(Index s_i, Index ja_i, double rew) = 0; + + virtual ~QTableInterface(){}; + + /// Returns a pointer to a copy of this class. + virtual QTableInterface* Clone() const = 0; + +}; + + +#endif /* !_QTABLEINTERFACE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RGet.h b/payntbind/src/synthesis/decpomdp/madp/src/base/RGet.h new file mode 100644 index 000000000..3f8d796fd --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RGet.h @@ -0,0 +1,85 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _RGET_H_ +#define _RGET_H_ 1 + +/* the include directives */ +#include "Globals.h" + +#include "RewardModelMapping.h" +#include "RewardModelMappingSparse.h" + +/** \brief RGet can be used for direct access to a reward model. + */ +class RGet +{ +public: + virtual ~RGet() = 0; + //get (data) functions: + virtual double Get(Index sI, Index jaI) const = 0; +}; + +//http://www.parashift.com/c++-faq-lite/pointers-to-members.html +//says "defined even though it's pure virtual; it's faster this way; trust me" +inline RGet::~RGet() {} + +/** \brief RGet can be used for direct access to a RewardModelMapping. + */ +class RGet_RewardModelMapping : public RGet +{ + +private: + const RewardModelMapping::Matrix& _m_R; +public: + RGet_RewardModelMapping( RewardModelMapping* rm) + : + _m_R ( rm->_m_R ) + {}; + + virtual double Get(Index sI, Index jaI) const + { + return( _m_R(sI,jaI)) ; + } +}; + +/** \brief RGet can be used for direct access to a RewardModelMappingSparse. + */ +class RGet_RewardModelMappingSparse : public RGet +{ + +private: + + const RewardModelMappingSparse::SparseMatrix& _m_R; +public: + RGet_RewardModelMappingSparse( RewardModelMappingSparse* rm) + : + _m_R ( rm->_m_R ) + {}; + + virtual double Get(Index sI, Index jaI) const + { + return( _m_R(sI,jaI)) ; + } + + +}; + +#endif /* !_RGET_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModel.h b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModel.h new file mode 100644 index 000000000..9134e4ba4 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModel.h @@ -0,0 +1,56 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _REWARDMODEL_H_ +#define _REWARDMODEL_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "RewardModelDiscreteInterface.h" + +/// RewardModel represents the reward model in a decision process. +class RewardModel : + public RewardModelDiscreteInterface +{ +private: + /// The number of states. + size_t _m_nrStates; + /// The number of joint actions. + size_t _m_nrJointActions; + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + RewardModel(size_t nrS=1, size_t nrJA=1) : + _m_nrStates(nrS), + _m_nrJointActions(nrJA) + {}; + + size_t GetNrStates() const { return(_m_nrStates); } + size_t GetNrJointActions() const { return(_m_nrJointActions); } + + /// Destructor. + virtual ~RewardModel(){}; + +}; + +#endif /* !_REWARDMODEL_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelDiscreteInterface.h new file mode 100644 index 000000000..b9f800008 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelDiscreteInterface.h @@ -0,0 +1,68 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _REWARDMODELDISCRETEINTERFACE_H_ +#define _REWARDMODELDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "QTableInterface.h" + +/// RewardModelDiscreteInterface is an interface for discrete reward models. +class RewardModelDiscreteInterface : + public QTableInterface +{ +private: + +protected: + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + RewardModelDiscreteInterface(){}; + + /// Destructor. + virtual ~RewardModelDiscreteInterface(){}; + + /// Returns R(s,ja) + virtual double Get(Index s_i, Index ja_i) const = 0; + + //data manipulation funtions: + /// Sets R(s_i,ja_i) + /** Index ja_i, Index s_i, are indices of the state and taken + * joint action. r is the reward. The order of events is s, ja, so + * is the arg. list. */ + virtual void Set(Index s_i, Index ja_i, double rew) = 0; + + /// Returns a pointer to a copy of this class. + virtual RewardModelDiscreteInterface* Clone() const = 0; + + /// Prints a description of *this* to a string. + virtual std::string SoftPrint() const = 0; + + ///Print *this* to cout. + void Print() const + { std::cout << SoftPrint();} + +}; + +#endif /* !_REWARDMODELDISCRETEINTERFACE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.cpp new file mode 100644 index 000000000..bdec5ad1d --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.cpp @@ -0,0 +1,58 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "RewardModelMapping.h" + +using namespace std; +#define DEBUG 0 +RewardModelMapping::RewardModelMapping(size_t nrS, size_t nrJA, + const string &s_str, + const string &ja_str) : + RewardModel(nrS, nrJA), + _m_R(nrS,nrJA) +{ +#if DEBUG + cout << "Initialized a reward model with " + << nrS << "'"<< s_str <<"' and " + << nrJA << "'"<< ja_str <<"'"<< endl; +#endif + _m_s_str = s_str; + _m_ja_str = ja_str; + for(Index i=0; i < nrS; i++) + for(Index j=0; j < nrJA; j++) + _m_R(i, j) = 0.0; +} + +RewardModelMapping::~RewardModelMapping() +{ +} + +string RewardModelMapping::SoftPrint() const +{ + stringstream ss; + double r; + ss << _m_s_str <<"\t"<< _m_ja_str <<"\t" + << "R(" << _m_s_str <<","<< _m_ja_str + << ") (rewards of 0 are not printed)"<0) + ss << s_i << "\t" << ja_i << "\t" << r << endl; + } + return(ss.str()); +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.h b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.h new file mode 100644 index 000000000..9056c3eb1 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMapping.h @@ -0,0 +1,84 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _REWARDMODELMAPPING_H_ +#define _REWARDMODELMAPPING_H_ 1 + +/* the include directives */ +#include "boost/numeric/ublas/matrix.hpp" +#include "Globals.h" +#include "RewardModel.h" + +/// RewardModelMapping represents a discrete reward model. +class RewardModelMapping : public RewardModel +{ +private: + + std::string _m_s_str; + std::string _m_ja_str; + + typedef boost::numeric::ublas::matrix Matrix; + + Matrix _m_R; + +protected: + +public: + // Constructor, destructor and copy assignment. + /** default Constructor + * nrS - number of states + * nrJA - number of joint actions + * s_str - how to call a state (For example you can use this class to + * create a mapping from observation histories and ja's to + * reals. Then this argument could be "joh") + * ja_str - idem for the joint actions + */ + RewardModelMapping(size_t nrS = 1, size_t nrJA = 1, + const std::string &s_str="s", + const std::string &ja_str="ja"); + /// Copy constructor. + //RewardModelMapping(const RewardModelMapping&); + /// Destructor. + ~RewardModelMapping(); + + /// Returns R(s,ja) + double Get(Index s_i, Index ja_i) const + { return(_m_R(s_i,ja_i)); } + + //data manipulation funtions: + /// Sets R(s_i,ja_i) + /** Index ja_i, Index s_i, are indices of the state and taken + * joint action. r is the reward. The order of events is s, ja, so + * is the arg. list. */ + void Set(Index s_i, Index ja_i, double rew) + { _m_R(s_i,ja_i)=rew; } + + /// Returns a pointer to a copy of this class. + virtual RewardModelMapping* Clone() const + { return new RewardModelMapping(*this); } + + /// Prints a description of *this* to a string. + std::string SoftPrint() const; + + friend class RGet_RewardModelMapping; +}; + +#endif /* !_REWARDMODELMAPPING_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.cpp new file mode 100644 index 000000000..f62275712 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.cpp @@ -0,0 +1,50 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "RewardModelMappingSparse.h" + +using namespace std; + +RewardModelMappingSparse::RewardModelMappingSparse(size_t nrS, size_t nrJA, + const string &s_str, + const string &ja_str) : + RewardModel(nrS, nrJA), + _m_R(nrS,nrJA) +{ + _m_s_str = s_str; + _m_ja_str = ja_str; +} + +RewardModelMappingSparse::~RewardModelMappingSparse() +{ +} + +string RewardModelMappingSparse::SoftPrint() const +{ + stringstream ss; + double r; + ss << _m_s_str <<"\t"<< _m_ja_str <<"\t" + << "R(" << _m_s_str <<","<< _m_ja_str + << ") (rewards of 0 are not printed)"<0) + ss << s_i << "\t" << ja_i << "\t" << r << endl; + } + return(ss.str()); +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.h b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.h new file mode 100644 index 000000000..b3eaa45cc --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparse.h @@ -0,0 +1,99 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _REWARDMODELMAPPINGSPARSE_H_ +#define _REWARDMODELMAPPINGSPARSE_H_ 1 + +/* the include directives */ +#include "boost/numeric/ublas/matrix_sparse.hpp" +#include "Globals.h" +#include "RewardModel.h" + +/// RewardModelMappingSparse represents a discrete reward model. +class RewardModelMappingSparse : public RewardModel +{ +private: + + std::string _m_s_str; + std::string _m_ja_str; + +#if BOOST_1_32_OR_LOWER // they renamed sparse_vector to mapped_vector + typedef boost::numeric::ublas::sparse_matrix SparseMatrix; +#else + typedef boost::numeric::ublas::compressed_matrix SparseMatrix; + // if the matrix is really large, we might need to resort to a + // mapped matrix: + //typedef boost::numeric::ublas::mapped_matrix SparseMatrix; +#endif + + SparseMatrix _m_R; + +protected: + +public: + // Constructor, destructor and copy assignment. + /** default Constructor + * nrS - number of states + * nrJA - number of joint actions + * s_str - how to call a state (For example you can use this class to + * create a mapping from observation histories and ja's to + * reals. Then this argument could be "joh") + * ja_str - idem for the joint actions + */ + RewardModelMappingSparse(size_t nrS = 1, size_t nrJA = 1, + const std::string &s_str="s", + const std::string &ja_str="ja"); + /// Copy constructor. + //RewardModelMappingSparse(const RewardModelMappingSparse&); + /// Destructor. + ~RewardModelMappingSparse(); + + /// Returns R(s,ja) + double Get(Index s_i, Index ja_i) const + { return(_m_R(s_i,ja_i)); } + + //data manipulation funtions: + /// Sets R(s_i,ja_i) + /** Index ja_i, Index s_i, are indices of the state and taken + * joint action. r is the reward. The order of events is s, ja, so + * is the arg. list. */ + void Set(Index s_i, Index ja_i, double rew) + { + // make sure reward is not 0 + if(fabs(rew) > REWARD_PRECISION) + _m_R(s_i,ja_i)=rew; + // check if we already defined this element, if so remove it + else if(fabs(_m_R(s_i,ja_i))>REWARD_PRECISION) + _m_R.erase_element(s_i,ja_i); + + } + + /// Returns a pointer to a copy of this class. + virtual RewardModelMappingSparse* Clone() const + { return new RewardModelMappingSparse(*this); } + + /// Prints a description of *this* to a string. + std::string SoftPrint() const; + + friend class RGet_RewardModelMappingSparse; +}; + +#endif /* !_REWARDMODELMAPPINGSPARSE_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.cpp new file mode 100644 index 000000000..24338e1b2 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.cpp @@ -0,0 +1,50 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "RewardModelMappingSparseMapped.h" + +using namespace std; + +RewardModelMappingSparseMapped::RewardModelMappingSparseMapped(size_t nrS, size_t nrJA, + const string &s_str, + const string &ja_str) : + RewardModel(nrS, nrJA), + _m_R(nrS,nrJA) +{ + _m_s_str = s_str; + _m_ja_str = ja_str; +} + +RewardModelMappingSparseMapped::~RewardModelMappingSparseMapped() +{ +} + +string RewardModelMappingSparseMapped::SoftPrint() const +{ + stringstream ss; + double r; + ss << _m_s_str <<"\t"<< _m_ja_str <<"\t" + << "R(" << _m_s_str <<","<< _m_ja_str + << ") (rewards of 0 are not printed)"<0) + ss << s_i << "\t" << ja_i << "\t" << r << endl; + } + return(ss.str()); +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.h b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.h new file mode 100644 index 000000000..38b6ac574 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelMappingSparseMapped.h @@ -0,0 +1,101 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _REWARDMODELMAPPINGSPARSEMAPPED_H_ +#define _REWARDMODELMAPPINGSPARSEMAPPED_H_ 1 + +/* the include directives */ +#include "boost/numeric/ublas/matrix_sparse.hpp" +#include "Globals.h" +#include "RewardModel.h" + +/// RewardModelMappingSparseMapped represents a discrete reward model. +/** This version uses a mapped matrix as sparse representation. + */ +class RewardModelMappingSparseMapped : public RewardModel +{ +private: + + std::string _m_s_str; + std::string _m_ja_str; + +#if BOOST_1_32_OR_LOWER // they renamed sparse_vector to mapped_vector + typedef boost::numeric::ublas::sparse_matrix SparseMatrix; +#else + //typedef boost::numeric::ublas::compressed_matrix SparseMatrix; + // if the matrix is really large, we might need to resort to a + // mapped matrix: + typedef boost::numeric::ublas::mapped_matrix SparseMatrix; +#endif + + SparseMatrix _m_R; + +protected: + +public: + // Constructor, destructor and copy assignment. + /** default Constructor + * nrS - number of states + * nrJA - number of joint actions + * s_str - how to call a state (For example you can use this class to + * create a mapping from observation histories and ja's to + * reals. Then this argument could be "joh") + * ja_str - idem for the joint actions + */ + RewardModelMappingSparseMapped(size_t nrS = 1, size_t nrJA = 1, + const std::string &s_str="s", + const std::string &ja_str="ja"); + /// Copy constructor. + //RewardModelMappingSparseMapped(const RewardModelMappingSparseMapped&); + /// Destructor. + ~RewardModelMappingSparseMapped(); + + /// Returns R(s,ja) + double Get(Index s_i, Index ja_i) const + { return(_m_R(s_i,ja_i)); } + + //data manipulation funtions: + /// Sets R(s_i,ja_i) + /** Index ja_i, Index s_i, are indices of the state and taken + * joint action. r is the reward. The order of events is s, ja, so + * is the arg. list. */ + void Set(Index s_i, Index ja_i, double rew) + { + // make sure reward is not 0 + if(fabs(rew) > REWARD_PRECISION) + _m_R(s_i,ja_i)=rew; + // check if we already defined this element, if so remove it + else if(fabs(_m_R(s_i,ja_i))>REWARD_PRECISION) + _m_R.erase_element(s_i,ja_i); + + } + + /// Returns a pointer to a copy of this class. + virtual RewardModelMappingSparseMapped* Clone() const + { return new RewardModelMappingSparseMapped(*this); } + + /// Prints a description of *this* to a string. + std::string SoftPrint() const; + + friend class RGet_RewardModelMappingSparseMapped; +}; + +#endif /* !_REWARDMODELMAPPINGSPARSEMAPPED_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.cpp new file mode 100644 index 000000000..bb591af79 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.cpp @@ -0,0 +1,67 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "RewardModelTOISparse.h" + +using namespace std; + +RewardModelTOISparse::RewardModelTOISparse(const string &s_str, + const string &ja_str) +{ + _m_s_str = s_str; + _m_ja_str = ja_str; +} + +RewardModelTOISparse::~RewardModelTOISparse() +{ +} + +double RewardModelTOISparse::Get(const std::vector &sIs, + const std::vector &aIs) const +{ + if(_m_R.find(make_pair(sIs,aIs))!=_m_R.end()) + return(_m_R.find(make_pair(sIs,aIs))->second); + else + return(0); +} + +void RewardModelTOISparse::Set(const std::vector &sIs, + const std::vector &aIs, + double reward) +{ + _m_R.insert(make_pair(make_pair(sIs,aIs),reward)); +} + +string RewardModelTOISparse::SoftPrint() const +{ +#if 0 + stringstream ss; + double r; + ss << _m_s_str <<"\t"<< _m_ja_str <<"\t" + << "R(" << _m_s_str <<","<< _m_ja_str + << ") (rewards of 0 are not printed)"<0) + ss << s_i << "\t" << ja_i << "\t" << r << endl; + } + return(ss.str()); +#else +#endif + return("RewardModelTOISparse::SoftPrint: not yet implemented"); +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.h b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.h new file mode 100644 index 000000000..3d7d50d25 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/RewardModelTOISparse.h @@ -0,0 +1,88 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _REWARDMODELTOISPARSE_H_ +#define _REWARDMODELTOISPARSE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include + +/// RewardModelTOISparse represents a discrete reward model based on +/// vectors of states and actions. +class RewardModelTOISparse +{ +private: + + std::string _m_s_str; + std::string _m_ja_str; + + std::map, + std::vector >, + double> _m_R; + +protected: + +public: + + // Constructor, destructor and copy assignment. + /** default Constructor + * nrS - number of states + * nrJA - number of joint actions + * s_str - how to call a state (For example you can use this class to + * create a mapping from observation histories and ja's to + * reals. Then this argument could be "joh") + * ja_str - idem for the joint actions + */ + RewardModelTOISparse(const std::string &s_str="s", + const std::string &ja_str="ja"); + /// Copy constructor. + //RewardModelTOISparse(const RewardModelTOISparse&); + /// Destructor. + ~RewardModelTOISparse(); + + /// Returns R(s,ja) + double Get(const std::vector &sIs, + const std::vector &aIs) const; +#if 0 + double Get(Index s_i, Index ja_i) const + { + return(GetReward(JointToIndividualStateIndices(s_i), + JointToIndividualActionIndices(ja_i))); + } +#endif + //data manipulation funtions: + /// Sets R(s_i,ja_i) + /** Index ja_i, Index s_i, are indices of the state and taken + * joint action. r is the reward. The order of events is s, ja, so + * is the arg. list. */ + void Set(const std::vector &sIs, + const std::vector &aIs, + double reward); + + /// Prints a description of *this* to a string. + std::string SoftPrint() const; + ///Print *this* to cout. + void Print() const + { std::cout << SoftPrint();} +}; + +#endif /* !_REWARDMODELTOISPARSE_H_*/ + +// Local Variables: *** +// mode:c++ *** +// End: *** + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/Scope.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/Scope.cpp new file mode 100644 index 000000000..93efcc18f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/Scope.cpp @@ -0,0 +1,254 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + + +#include "Scope.h" +#include + +using namespace std; + +Scope::Scope(const string &s) +{ + stringstream ss; + ss.str(s); + ss >> *this; +} + +ostream& operator<< (ostream& o, const Scope& s) +{ + const vector& v = s; + return o << SoftPrintVector( v ); +} + +istream& operator>> (istream& i, Scope& s) +{ + char c = 0; + i >> c; + if(c != '<') + { + cerr << "Scope extraction error: expected '<', but got '"<> Index; +#if 0 + cout << "parsed Index="<', but got '"<Contains(i) ) + this->Insert(i); + it++; + } +} + +void Scope::Remove(const Scope& s) +{ + SDT::const_iterator it = s.begin(); + SDT::const_iterator last = s.end(); + while(it != last) + { + Index i = *it; //the index to remove from 'this' + SDT::iterator remove_it = this->GetIteratorForIndex(i); + if( remove_it != this->end() ) + this->erase(remove_it); + + it++; + } + +} +bool Scope::Contains(Index i) const +{ + SDT::const_iterator found_it = find(this->begin(), this->end(), i); + return( found_it != this->end() );// element is in *this* scope +} + +bool Scope::IsSubSetOf(const Scope& s) const +{ + Scope interS = Intersection(*this, s); + if(interS.size() == this->size() ) + //all elements in this were also contained by s + return true; + return false; +} + +Scope Scope::Intersection(const Scope& a, const Scope& b) +{ + Scope result; + SDT::const_iterator it = a.SDT::begin(); + SDT::const_iterator last = a.SDT::end(); + while(it != last) + { + Index i = *it; + if(b.Contains(i)) + result.Insert(i); + it++; + } + return result; +} + +bool Scope::Equals(const Scope& s) const +{ + size_t siz= this->size(); + if(siz != s.size()) + return false; + + for(Index i=0; i < siz; i++) + if( (*this)[i] != s[i] ) + return false; + + return true; + +} + +SDT::iterator Scope::GetIteratorForIndex(Index i) +{ + SDT::iterator found_it = find(this->begin(), this->end(), i); + return( found_it ); +} + + +Index Scope::GetPositionForIndex(Index i) const +{ + SDT::const_iterator it = this->begin(); + Index pos = 0; + while( *it != i ) + { + pos++; + it++; + if(it == this->end()) + { + stringstream ss; + ss << "Scope::GetPositionForIndex(Index i=" << i <<") not found!"; + throw E(ss); + } + + } + // *it == i, so... + return(pos); +} + +Scope& Scope::Sort() +{ + sort(this->begin(),this->end()); + return *this; +} + +// -------------------------------- +// sorting based on: +// http://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of +// rewriten for C++98 and simplified +template +class CompareVec +{ +public: + typename std::vector vec; + CompareVec(typename std::vector vec) : vec(vec) + {} + + bool operator()(std::size_t a,std::size_t b) + { return vec[a] +std::vector sort_permutation( + const std::vector& vec) +{ + std::vector p(vec.size()); + for (int i=0;i compareVec(vec); + std::sort(p.begin(), p.end(),compareVec); + return p; +} + +template +std::vector apply_permutation( + const std::vector& vec, + const std::vector& p) +{ + std::vector sorted_vec(p.size()); + for (int i=0;i permutation=sort_permutation(scope); + scope =apply_permutation(scope ,permutation); + scopeInstance=apply_permutation(scopeInstance,permutation); +} + +Scope& Scope::SortAndUnique() +{ + Sort(); + SDT::iterator prev=begin(); + for (SDT::iterator it=++begin();it!=end();) + { + if (*it==*prev) + erase(it); + else + { + prev=it; + ++it; + } + } + return *this; +} + +std::string Scope::SoftPrint() const +{ + stringstream ss; + ss << (*this); + return(ss.str()); +} + +ScopeInstance Scope::Instantiate(const std::vector& values) const +{ + ScopeInstance instance; + for (Scope::const_iterator it=begin();it!=end();it++) + instance.Insert(values[*it]); + return instance; +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/Scope.h b/payntbind/src/synthesis/decpomdp/madp/src/base/Scope.h new file mode 100644 index 000000000..c2d21c0f4 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/Scope.h @@ -0,0 +1,121 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _SCOPE_H_ +#define _SCOPE_H_ 1 + +#include "Globals.h" +#include +#include +#include + + +//typedef std::vector Scope; + +//we only use this here, so in unnamed namespace: +namespace{ + //the scope data type + typedef std::vector SDT; +} + +class Scope; + +typedef Scope ScopeInstance; // for values of the variables in the scope + +class Scope : public SDT +{ + private: + + public: + ///Default constructor + Scope() : SDT() + {} + + //construct from a vector Index + Scope(const SDT& o) : SDT(o) + {} + + //construct from a range + template + Scope (InputIterator first, InputIterator last, + const allocator_type& a = allocator_type()) + : + SDT (first, last, a) + {} + + //constuct from a string, e.g. Scope s("<1 54 2>") + Scope(const std::string &s); + + //construct from a given size + Scope( size_t size) : SDT(size) + {} + + //copy assign + Scope& operator= (const Scope& o) + { this->SDT::operator=(o); return *this;} + //copy assign from a vector + Scope& operator= (const SDT& o) + { this->SDT::operator=(o); return *this;} + + ///Insert an index into the scope + /**Note this does *not* check for duplicates! + */ + void Insert(Index i) + { + push_back(i); + } + ///Removes all indices in s from \em this. + void Remove(const Scope& s); + ///Merges all indices in s into \em this. + void Insert(const Scope& s); + ///Whether \em this contains Index i + bool Contains(Index i) const; + ///Whether this is a subset of s + bool IsSubSetOf(const Scope& s) const; + ///Whether two scopes are equal (i.e., with same ordering) + bool Equals(const Scope& s) const; + ///Returns a scope containing the intersection of a and b + static Scope Intersection(const Scope& a, const Scope& b); + ///Returns iterator to index \a i in this (or end() if i not contained) + SDT::iterator GetIteratorForIndex(Index i) ; + ///Returns the position of index i within \e this. + /**or throws an exception if \e this does not contain i. + */ + Index GetPositionForIndex(Index i) const; + ///Sorts the indices. + Scope& Sort(); + ///Sorts the indices of the scope and applies the same reordering to the scope instance + static void Sort(Scope& scope,ScopeInstance& scopeInstance); + ///First calls Sort() and then removes duplicate indices. + Scope& SortAndUnique(); + + std::string SoftPrint() const; + + ScopeInstance Instantiate(const std::vector& values) const; + + friend std::ostream& operator<< (std::ostream& o, const Scope& s); + friend std::istream& operator>> (std::istream& i, Scope& s); + + +}; +typedef std::vector Scopes; + + +#endif /* !_SCOPE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/State.h b/payntbind/src/synthesis/decpomdp/madp/src/base/State.h new file mode 100644 index 000000000..645701c97 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/State.h @@ -0,0 +1,44 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _STATE_H_ +#define _STATE_H_ 1 + +/* the include directives */ +#include +#include + +#include "NamedDescribedEntity.h" + +/// State is a class that represent states. +class State : public NamedDescribedEntity +{ +private: +protected: +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + State(const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")) : + NamedDescribedEntity(name, description){}; + +}; + +#endif /* !_STATE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/StateDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/StateDiscrete.h new file mode 100644 index 000000000..b81213452 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/StateDiscrete.h @@ -0,0 +1,55 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _STATEDISCRETE_H_ +#define _STATEDISCRETE_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" + +#include "State.h" +#include "DiscreteEntity.h" + + +/// StateDiscrete represents discrete states. +/** + * StateDiscrete is a class that represent states in a discrete state + * set, which are identified by their index. */ +class StateDiscrete : public State, + public DiscreteEntity +{ +private: + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + StateDiscrete(Index i=INDEX_MAX, + const std::string &name=std::string("undefined"), + const std::string &description=std::string("undefined")) : + State(name, description), + DiscreteEntity(i){}; + +}; + +#endif /* !_STATEDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/StateDistribution.h b/payntbind/src/synthesis/decpomdp/madp/src/base/StateDistribution.h new file mode 100644 index 000000000..a9db5f5c5 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/StateDistribution.h @@ -0,0 +1,54 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _STATEDISTRIBUTION_H_ +#define _STATEDISTRIBUTION_H_ 1 + +/* the include directives */ +#include "Globals.h" + +/** \brief StateDistribution is an interface for probability + * distributions over states. */ +class StateDistribution +{ + private: + + protected: + + public: + virtual ~StateDistribution(){}; + + //operators: + + //data manipulation (set) functions: + + //get (data) functions: + virtual double GetProbability( Index sI) const = 0; + virtual std::vector ToVectorOfDoubles() const = 0; + virtual size_t GetNrStates() const = 0; + + /// Returns a pointer to a copy of this class. + virtual StateDistribution* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; +}; + + +#endif /* !_STATEDISTRIBUTION_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/StateDistributionVector.h b/payntbind/src/synthesis/decpomdp/madp/src/base/StateDistributionVector.h new file mode 100644 index 000000000..defc1e21a --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/StateDistributionVector.h @@ -0,0 +1,98 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _STATEDISTRIBUTIONVECTOR_H_ +#define _STATEDISTRIBUTIONVECTOR_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "StateDistribution.h" + + +namespace { + typedef std::vector SDV; +} + +/** \brief StateDistributionVector represents a probability + * distribution over states as a vector of doubles. */ +class StateDistributionVector : + public StateDistribution, + public SDV +{ + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + StateDistributionVector() + : + SDV() + {}; + + /// Copy constructor. + StateDistributionVector(const StateDistributionVector& a) + : + SDV(a) + {}; + StateDistributionVector(const SDV& a) + : + SDV(a) + {}; + /// Destructor. + ~StateDistributionVector(){}; + /// Copy assignment operator + StateDistributionVector& operator= (const StateDistributionVector& o) + { + if (this == &o) return *this; // Gracefully handle self assignment + this->SDV::operator=(o); + return *this; + } + StateDistributionVector& operator= (const SDV& o) + { + if (this == &o) return *this; // Gracefully handle self assignment + this->SDV::operator=(o); + return *this; + } + + //operators: + + //data manipulation (set) functions: + + //get (data) functions: + virtual double GetProbability( Index sI) const + { return this->at(sI); } + + virtual std::vector ToVectorOfDoubles() const + { return *this; } + + virtual size_t GetNrStates() const { return this->size(); } + + /// Returns a pointer to a copy of this class. + virtual StateDistributionVector* Clone() const + { return new StateDistributionVector(*this); } + + virtual std::string SoftPrint() const + { return SoftPrintVector( *((SDV*)this) ); } +}; + + +#endif /* !_STATEDISTRIBUTIONVECTOR_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.cpp new file mode 100644 index 000000000..e37b579ce --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.cpp @@ -0,0 +1,76 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "StateFactorDiscrete.h" + +using namespace std; + +//Default constructor +StateFactorDiscrete::StateFactorDiscrete(const string &n, + const string &d) + : NamedDescribedEntity(n,d) +{ +} +StateFactorDiscrete::StateFactorDiscrete(size_t nrVs, const string &n, + const string &d) + : NamedDescribedEntity(n,d) +{ + _m_domainSize = nrVs; + //perhaps also add strings 0...nrVs-1 to _m_domainValues ? + +} +//Copy constructor. +StateFactorDiscrete::StateFactorDiscrete(const StateFactorDiscrete& o) +{ +} +//Destructor +StateFactorDiscrete::~StateFactorDiscrete() +{ +} +//Copy assignment operator +StateFactorDiscrete& StateFactorDiscrete::operator= (const StateFactorDiscrete& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + return *this; +} + +string StateFactorDiscrete::SoftPrint() const +{ + stringstream ss; + + ss << "SF '"<< GetName() << "' ("<::const_iterator it = _m_domainValues.begin(); + vector::const_iterator last = _m_domainValues.end(); + while(it != last) + { + if(it != _m_domainValues.begin() ) + ss << ", "; + ss << *it; + it++; + } + ss << "}"; + return(ss.str() ); +} + + +Index StateFactorDiscrete:: +AddStateFactorValue(const string &v) +{ + _m_domainValues.push_back(v); + Index i = _m_domainSize++; + return(i); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.h new file mode 100644 index 000000000..12f2ab2e6 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/StateFactorDiscrete.h @@ -0,0 +1,97 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _STATEFACTORDISCRETE_H_ +#define _STATEFACTORDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "NamedDescribedEntity.h" + +/**\brief StateFactorDiscrete is a class that represents a state variable, or + * factor. + * + * It has a name and description (handled by NamedDescribedEntity), as well as + * a domain: the list of discrete values it can take. These domain values + * are indexed and associated with a string. + * + * E.g. a state variable 'color' could have domain + * 0-'red', 1-'blue', 2-'yellow'. + * + * And Xpos could have domain: + * 0 -'-2', 1-'-1', 2-'0', 3-'+1', 4-'+2' + * + * In the future we might include special state factor types for numerical + * domains(?). + * */ +class StateFactorDiscrete : public NamedDescribedEntity +{ + private: + //not necessary/practical. + //the index of this state-factor + //Index _m_index; + + ///The size of the domain + size_t _m_domainSize; + ///The vector containing the domain values. + /**E.g. for factor 'color' this contains 'blue', 'red', etc. + */ + std::vector _m_domainValues; + + //name and description are stored in NamedDescribedEntity + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + StateFactorDiscrete(const std::string &n="undef. name", + const std::string &d="undef. descr."); + ///Constructor specifying the number of values. + /**This constructs \a n unnamed values.*/ + StateFactorDiscrete(size_t nrVs, + const std::string &n="undef. name", + const std::string &d="undef. descr."); + /// Copy constructor. + StateFactorDiscrete(const StateFactorDiscrete& a); + /// Destructor. + ~StateFactorDiscrete(); + /// Copy assignment operator + StateFactorDiscrete& operator= (const StateFactorDiscrete& o); + + //operators: + + //data manipulation (set) functions: + ///Adds a value to this state factor and returns the new index + /**I.e., the domain index or 'state factor value' index. + */ + Index AddStateFactorValue(const std::string &v="rec.undef.by StateFactorDiscrete"); + + //get (data) functions: + std::string GetStateFactorValue(Index domainI) const + {return _m_domainValues.at(domainI); } + + ///Soft prints this state factor + std::string SoftPrint() const; +}; + + +#endif /* !_STATEFACTORDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.cpp new file mode 100644 index 000000000..b37d9b446 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.cpp @@ -0,0 +1,39 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "StringTools.h" + +using namespace std; + +namespace StringTools{ + +string Append(const std::string& ioString, int inValue) +{ + std::ostringstream o; + o << ioString << inValue; + return o.str(); +} + +string Trim(const std::string& ioString) +{ + string trimmed = ioString; + size_t pos = trimmed.find_last_not_of(" \t"); + if(pos < trimmed.length()-1 && pos > 0) + trimmed.erase(pos+1); + return trimmed; +} + +} + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.h b/payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.h new file mode 100644 index 000000000..c29ab0e4b --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/StringTools.h @@ -0,0 +1,38 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _STRINGTOOLS_H_ +#define _STRINGTOOLS_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include + +/** \brief StringTools is a namespace that contains utility functions for + * strings. + * */ +namespace StringTools +{ + std::string Append(const std::string& ioString, int inValue); + std::string Trim(const std::string& ioString); +}; + + +#endif /* !_STRINGTOOLS_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TGet.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TGet.h new file mode 100644 index 000000000..465ffd0d1 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TGet.h @@ -0,0 +1,79 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TGET_H_ +#define _TGET_H_ 1 + +/* the include directives */ +#include "Globals.h" + +#include "TransitionModelMapping.h" +#include "TransitionModelMappingSparse.h" + +/** \brief TGet can be used for direct access to the transition model. */ +class TGet +{ +public: + virtual ~TGet() = 0; + //get (data) functions: + virtual double Get(Index sI, Index jaI, Index sucSI) const = 0; +}; + +//http://www.parashift.com/c++-faq-lite/pointers-to-members.html +//says "defined even though it's pure virtual; it's faster this way; trust me" +inline TGet::~TGet() {} + +/** \brief TGet_TransitionModelMapping can be used for direct access + * to a TransitionModelMapping. */ +class TGet_TransitionModelMapping : public TGet +{ + +private: + std::vector _m_T; +public: + TGet_TransitionModelMapping( TransitionModelMapping* tm) + { + _m_T = tm->_m_T; + }; + + virtual double Get(Index sI, Index jaI, Index sucSI) const + { { return((*_m_T[jaI])(sI,sucSI)); } } + +}; + +/** \brief TGet_TransitionModelMappingSparse can be used for direct + * access to a TransitionModelMappingSparse. */ +class TGet_TransitionModelMappingSparse : public TGet +{ + +private: + std::vector _m_T; +public: + TGet_TransitionModelMappingSparse( TransitionModelMappingSparse* tm) + { + _m_T = tm->_m_T; + }; + + virtual double Get(Index sI, Index jaI, Index sucSI) const + { { return((*_m_T[jaI])(sI,sucSI)); } } + +}; + +#endif /* !_TGET_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.cpp new file mode 100644 index 000000000..f48de71db --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.cpp @@ -0,0 +1,231 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TOICompactRewardDecPOMDPDiscrete.h" + +using namespace std; + +#define DEBUG_TOICompactRewardDecPOMDPDiscrete 0 + +TOICompactRewardDecPOMDPDiscrete:: +TOICompactRewardDecPOMDPDiscrete( + const string &name, const string &descr, const string &pf, + bool cacheFlatModels) : + TOIDecPOMDPDiscrete(name, descr, pf, cacheFlatModels), + _m_nrTwoAgentStates(2,0), + _m_nrTwoAgentActions(2,0) +{ + _m_initialized = false; +} + +TOICompactRewardDecPOMDPDiscrete:: +TOICompactRewardDecPOMDPDiscrete(const TOICompactRewardDecPOMDPDiscrete& o) +{ + throw(E("TOICompactRewardDecPOMDPDiscrete: copy ctor not yet implemented")); +} + +TOICompactRewardDecPOMDPDiscrete::~TOICompactRewardDecPOMDPDiscrete() +{ + for(unsigned int i=0;i!=_m_p_rModels.size();++i) + delete(_m_p_rModels[i]); +} + +//Copy assignment operator +TOICompactRewardDecPOMDPDiscrete& +TOICompactRewardDecPOMDPDiscrete::operator= +(const TOICompactRewardDecPOMDPDiscrete& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + throw(E("TOICompactRewardDecPOMDPDiscrete: assignment not yet implemented")); + + return *this; +} + +bool TOICompactRewardDecPOMDPDiscrete::SetInitialized(bool b) +{ + if(b == false) + { + _m_initialized = false; + return(true); + } + + if( TOIDecPOMDPDiscrete::SetInitialized(true)) + { + if( b == true ) + { + if (_m_p_rModels.size()!=GetNrAgents()) + throw E("TOICompactRewardDecPOMDPDiscrete::SetInitialized(true) : no reward models specified yet!"); + + for(unsigned int i=0;i!=GetNrAgents();++i) + if (_m_p_rModels[i] == 0) + throw E("TOICompactRewardDecPOMDPDiscrete::SetInitialized(true) : no reward model specified yet! ( _m_p_rModels[i] == 0 )"); + + size_t nrStates=GetIndividualDecPOMDPD(0)->GetNrStates(); + for(unsigned int i=0;i!=GetNrAgents();++i) + if(GetIndividualDecPOMDPD(i)->GetNrStates()!=nrStates) + throw E("TOICompactRewardDecPOMDPDiscrete::SetInitialized(true) : all agents are required to have the same state space (same number of individual states)"); + } + + for(Index i=0;i!=2;++i) + { + _m_nrTwoAgentStates[i]=GetIndividualMADPD(i)->GetNrStates(); + _m_nrTwoAgentActions[i]=GetIndividualMADPD(i)->GetNrJointActions(); + } + + _m_initialized = b; + return(true); + } + else + return(false); +} + +double TOICompactRewardDecPOMDPDiscrete::GetReward(Index sI, Index jaI) const +{ + return(GetReward(JointToIndividualStateIndices(sI), + JointToIndividualActionIndices(jaI))); +} + +double TOICompactRewardDecPOMDPDiscrete::GetReward( + const std::vector &indSIs, + const std::vector &indAIs) const +{ + double reward=0; + + for(unsigned int i=0;i!=GetNrAgents();++i) + reward+=GetIndividualReward(indSIs[i],indAIs[i],i); + + switch(GetNrAgents()) + { + case 2: + reward+=_m_p_rModel->Get(indSIs,indAIs); + break; + default: + { + vector indexVec(2,0); + vector nrElems(2,GetNrAgents()); + + do + { +#if 0 + if(indexVec[0]!=indexVec[1] && + GetTwoAgentReward(indexVec[0],indexVec[1],indSIs,indAIs)!=0) + cout << "adding i " << indexVec[0] + << " si " << indSIs[indexVec[0]] + << GetIndividualMADPD(indexVec[0])->GetState(indSIs[indexVec[0]])->SoftPrint() + << " j " << indexVec[1] + << " sj " << indSIs[indexVec[1]] + << GetIndividualMADPD(indexVec[1])->GetState(indSIs[indexVec[1]])->SoftPrint() + << " r " + << GetTwoAgentReward(indexVec[0],indexVec[1], + indSIs,indAIs) << endl; +#endif + if(indexVec[0]!=indexVec[1]) + reward+=GetTwoAgentReward(indexVec[0],indexVec[1], + indSIs,indAIs); + } + while(!IndexTools::Increment(indexVec,nrElems)); + +#if 0 + reward2+=GetTwoAgentReward(0,1,indSIs,indAIs); + reward2+=GetTwoAgentReward(1,0,indSIs,indAIs); + + reward2+=GetTwoAgentReward(0,2,indSIs,indAIs); + reward2+=GetTwoAgentReward(2,0,indSIs,indAIs); + + reward2+=GetTwoAgentReward(1,2,indSIs,indAIs); + reward2+=GetTwoAgentReward(2,1,indSIs,indAIs); +#endif + break; + } + } + +#if DEBUG_TOICompactRewardDecPOMDPDiscrete + cout << "GetReward(" << sI << "," << jaI << ") = " << reward << endl; +#endif + return(reward); +} + +double +TOICompactRewardDecPOMDPDiscrete:: +GetTwoAgentReward(Index i, Index j, + const vector &indSIs, + const vector &indAIs) const +{ + vector sIs(2,0), aIs(2,0); + + sIs[0]=indSIs[i]; aIs[0]=indAIs[i]; + sIs[1]=indSIs[j]; aIs[1]=indAIs[j]; +#if 0 + double reward=_m_p_rModel-> + Get(IndexTools::IndividualToJointIndices(sIs,_m_nrTwoAgentStates), + IndexTools::IndividualToJointIndices(aIs,_m_nrTwoAgentActions)); +#endif + + double reward=_m_p_rModel->Get(sIs,aIs); + +#if 0 + cout << reward << "indSIs " << SoftPrintVector(indSIs) + << SoftPrintVector(sIs) << " indAIs " + << SoftPrintVector(indAIs) + << IndexTools::IndividualToJointIndices(sIs,_m_nrTwoAgentStates) + << SoftPrintVector(aIs) + << IndexTools::IndividualToJointIndices(aIs,_m_nrTwoAgentActions) + << " i " << i << " j " << j << endl; +#endif + return(reward); +} + +void TOICompactRewardDecPOMDPDiscrete:: +SetIndividualRewardModel(RewardModel* rewardModel, + Index agentID) +{ + if(_m_p_rModels.size()<=agentID) + _m_p_rModels.resize(agentID+1); + + _m_p_rModels[agentID]=rewardModel; +} + +double TOICompactRewardDecPOMDPDiscrete:: +GetIndividualReward(Index indSI, Index indAI, Index agentID) const +{ + double reward=_m_p_rModels[agentID]->Get(indSI,indAI); +#if DEBUG_TOICompactRewardDecPOMDPDiscrete + cout << "GetIndividualReward[" << agentID << "](" << indSI << "," << indAI + << ") = " << reward << endl; +#endif + return(reward); +} + +string TOICompactRewardDecPOMDPDiscrete::SoftPrint() const +{ + stringstream ss; + ss << TOIDecPOMDPDiscrete::SoftPrint(); + + if(_m_initialized) + { + ss << "Reward models: " << endl; + for(unsigned int i=0;i!=GetNrAgents();++i) + { + ss << "Individual rewards for agent " << i << endl; + ss << _m_p_rModels[i]->SoftPrint(); + } + } + else + throw E("TOICompactRewardDecPOMDPDiscrete components (reward model) not initialized"); + + return(ss.str()); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.h new file mode 100644 index 000000000..7105c0d23 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOICompactRewardDecPOMDPDiscrete.h @@ -0,0 +1,91 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TOICOMPACTREWARDDECPOMDPDISCRETE_H_ +#define _TOICOMPACTREWARDDECPOMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "TOIDecPOMDPDiscrete.h" + +/**\brief TOICompactRewardDecPOMDPDiscrete is a class that represents a + * transition observation independent Dec-POMDP, in which the reward + * is the sum of each agent's individual reward plus some shared + * reward. The shared reward is defined for two agents only, and + * replicated for all combinations. */ +class TOICompactRewardDecPOMDPDiscrete : + public TOIDecPOMDPDiscrete +{ +private: + /**Boolean that tracks whether this TOICompactRewardDecPOMDPDiscrete is initialized.*/ + bool _m_initialized; + + std::vector_m_nrTwoAgentStates, _m_nrTwoAgentActions; + + double GetTwoAgentReward(Index i, Index j, + const std::vector &indSIs, + const std::vector &indAIs) const; + +protected: + std::vector _m_p_rModels; +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + TOICompactRewardDecPOMDPDiscrete( + const std::string &name="received unspec. by TOICompactRewardDecPOMDPDiscrete", + const std::string &descr="received unspec. by TOICompactRewardDecPOMDPDiscrete", + const std::string &pf="received unspec. by TOICompactRewardDecPOMDPDiscrete", + bool cacheFlatModels=false); + + TOICompactRewardDecPOMDPDiscrete + (const TOICompactRewardDecPOMDPDiscrete& o); + + virtual ~TOICompactRewardDecPOMDPDiscrete(); + + TOICompactRewardDecPOMDPDiscrete& operator= + (const TOICompactRewardDecPOMDPDiscrete& o); + + /** Sets _m_initialized to b. When setting to true, a verification of + * member elements is performed. (i.e. a check whether all vectors + * have the correct size and non-zero entries) */ + virtual bool SetInitialized(bool b); + + void SetIndividualRewardModel(RewardModel* rewardModel, + Index agentID); + + //get (data) functions: + ///**return the reward for state, joint action indices */ + double GetReward(Index sI, Index jaI) const; + double GetReward(const std::vector &sIs, + const std::vector &aIs) const; + + double GetIndividualReward(Index indSI, Index indAI, Index agentID) const; + + /// Returns a pointer to a copy of this class. + virtual TOICompactRewardDecPOMDPDiscrete* Clone() const + { return new TOICompactRewardDecPOMDPDiscrete(*this); } + + /** SoftPrints some information on the DecPOMDPDiscrete.*/ + std::string SoftPrint() const; +}; + + +#endif /* !_TOICOMPACTREWARDDECPOMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.cpp new file mode 100644 index 000000000..0065ff0b7 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.cpp @@ -0,0 +1,63 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TOIDecMDPDiscrete.h" + +using namespace std; + +//Default constructor +TOIDecMDPDiscrete::TOIDecMDPDiscrete( + const string &name, const string &descr, const string &pf, + bool cacheFlatModels) : + TOIDecPOMDPDiscrete(name, descr, pf, cacheFlatModels) +{ + _m_initialized = false; +} + +//Destructor +TOIDecMDPDiscrete::~TOIDecMDPDiscrete() +{ +} + +bool TOIDecMDPDiscrete::SetInitialized(bool b) +{ + if( TOIDecPOMDPDiscrete::SetInitialized(true) ) + { + if( b == true ) + { + //\todo NOTE: this does not check that the Dec-MDP is indeed jointly (and thus locally) observable + } + _m_initialized = b; + return(true); + } + else + return(false); +} + +void TOIDecMDPDiscrete::CreateStateObservations() +{ + for(Index agI=0; agI < GetNrAgents(); agI++) + { + size_t nrStatesAgent = GetNrStates(agI); + size_t nrActionsAgent = GetNrActions(agI); + SetNrObservations(agI, nrStatesAgent); + MultiAgentDecisionProcessDiscrete* ind_madp = GetIndividualMADPD(agI); + ind_madp->CreateNewObservationModel(); + + for(Index sI=0; sI < nrStatesAgent; sI++) + for(Index aI=0; aI < nrActionsAgent; aI++) + ind_madp->SetObservationProbability(aI, sI, sI, 1.0); + } +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.h new file mode 100644 index 000000000..61e9c2905 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecMDPDiscrete.h @@ -0,0 +1,64 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TOIDECMDPDISCRETE_H_ +#define _TOIDECMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "TOIDecPOMDPDiscrete.h" + +/**\brief TOIDecMDPDiscrete is a class that represents a transition + * observation indepedent discrete DecMDP. */ +class TOIDecMDPDiscrete : + public TOIDecPOMDPDiscrete +{ +private: + /**Boolean that tracks whether this TOIDecMDPDiscrete is initialized.*/ + bool _m_initialized; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + TOIDecMDPDiscrete( + const std::string &name="received unspec. by TOI-DecMDPDiscrete", + const std::string &descr="received unspec. by TOI-DecMDPDiscrete", + const std::string &pf="received unspec. by TOI-DecMDPDiscrete", + bool cacheFlatModels=false); + /// Destructor. + virtual ~TOIDecMDPDiscrete(); + + /** Sets _m_initialized to b. When setting to true, a verification of + * member elements is performed. (i.e. a check whether all vectors + * have the correct size and non-zero entries) */ + bool SetInitialized(bool b); + + /**This function creates the 'state observations'. I.e., for each agent + * the same number of observations as local states is created. The + * observation probabilities are set such that in a particular state + * the corresponding observation is received with certainty.*/ + void CreateStateObservations(); +}; + + +#endif /* !_TOIDECMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.cpp new file mode 100644 index 000000000..e49b39044 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.cpp @@ -0,0 +1,126 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TOIDecPOMDPDiscrete.h" +#include "RewardModelMappingSparse.h" +#include "RewardModelMapping.h" + +using namespace std; + +TOIDecPOMDPDiscrete:: +TOIDecPOMDPDiscrete( + const string &name, const string &descr, const string &pf, + bool cacheFlatModels) : + TransitionObservationIndependentMADPDiscrete(name, descr, pf, + cacheFlatModels) +{ + _m_initialized = false; + _m_p_rModel = 0; +} + +TOIDecPOMDPDiscrete::TOIDecPOMDPDiscrete(const TOIDecPOMDPDiscrete& o) +{ + throw(E("TOIDecPOMDPDiscrete: copy ctor not yet implemented")); +} +//Destructor +TOIDecPOMDPDiscrete::~TOIDecPOMDPDiscrete() +{ + delete(_m_p_rModel); +} +//Copy assignment operator +TOIDecPOMDPDiscrete& TOIDecPOMDPDiscrete::operator= (const TOIDecPOMDPDiscrete& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + throw(E("TOIDecPOMDPDiscrete: assignment not yet implemented")); + + return *this; +} + +void TOIDecPOMDPDiscrete::SetIndividualDecPOMDPD(DecPOMDPDiscrete *model, + Index agentI) +{ + if(_m_individualDecPOMDPDs.size()<=agentI) + _m_individualDecPOMDPDs.resize(agentI+1); + + _m_individualDecPOMDPDs[agentI]=model; +} + +bool TOIDecPOMDPDiscrete::SetInitialized(bool b) +{ + if(b == false) + { + _m_initialized = false; + return(true); + } + + if( TransitionObservationIndependentMADPDiscrete::SetInitialized(true) + ) + { + if( b == true ) + { + if (_m_p_rModel == 0) + throw E("TOIDecPOMDPDiscrete::SetInitialized(true) : no reward model specified yet! ( _m_p_rModel == 0 )"); + + } + _m_initialized = b; + return(true); + } + else + return(false); +} + +void TOIDecPOMDPDiscrete::CreateNewRewardModel() +{ + if(_m_initialized) + delete(_m_p_rModel); +#if 0 + // cannot call GetNrJointStates() and GetNrJointActions() because + // we're not initialized yet + size_t nrJS=1, nrJA=1; + for(Index i=0;i!=GetNrAgents();++i) + { + nrJS*=GetIndividualMADPD(i)->GetNrStates(); + nrJA*=GetIndividualMADPD(i)->GetNrJointActions(); + } + + if(GetSparse()) + _m_p_rModel = new RewardModelMappingSparse(nrJS, + nrJA); + else + _m_p_rModel = new RewardModelMapping(nrJS, + nrJA); +#else + _m_p_rModel = new RewardModelTOISparse(); +#endif +} + +string TOIDecPOMDPDiscrete::SoftPrint() const +{ + stringstream ss; + ss << TransitionObservationIndependentMADPDiscrete::SoftPrint(); + ss << DecPOMDP::SoftPrint(); + + if(_m_initialized) + { + ss << "Reward model: " << endl; + ss << _m_p_rModel->SoftPrint(); + } + else + throw E("TOIDecPOMDPDiscrete components (reward model) not initialized"); + + return(ss.str()); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.h new file mode 100644 index 000000000..00cb7ade5 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIDecPOMDPDiscrete.h @@ -0,0 +1,208 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TOIDECPOMDPDISCRETE_H_ +#define _TOIDECPOMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "TransitionObservationIndependentMADPDiscrete.h" +#include "RewardModelTOISparse.h" +#include "DecPOMDP.h" +#include "DecPOMDPDiscrete.h" +#include "DecPOMDPDiscreteInterface.h" + +/**\brief TOIDecPOMDPDiscrete is a class that represents a transition + * observation independent discrete DecPOMDP. */ +class TOIDecPOMDPDiscrete : + virtual public DecPOMDPDiscreteInterface, + public TransitionObservationIndependentMADPDiscrete, + public DecPOMDP +{ +private: + /**Boolean that tracks whether this TOIDecPOMDPDiscrete is + * initialized.*/ + bool _m_initialized; + +protected: +#if 0 + /**The reward model used by TOIDecPOMDPDiscrete is a + * RewardModelMapping*/ + RewardModel* _m_p_rModel; +#else + RewardModelTOISparse* _m_p_rModel; +#endif + /// A vector storing pointers to the individual DecPOMDPs. + std::vector _m_individualDecPOMDPDs; + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + /** Default constructor. + * Constructor that sets the name, description, and problem file. + * The problem file for a TransitionObservationIndependentMADPDiscrete + * is only the base name, e.g. test.toi-dpomdp. + * The files parsed in such a case are: + * test.toi-dpomdp.base + * test.toi-dpomdp.agent0 + * ... + * test.toi-dpomdp.agentN [where N-1 is the number of agents] + * test.toi-dpomdp.rewards + * + * The .base file specifies the number of agents, discount and reward_t + * The .agenti files specifies the local model for agent i. + * The .rewards files specifies the rewards. + * + */ + TOIDecPOMDPDiscrete( + const std::string &name="received unspec. by TOIDecPOMDPDiscrete", + const std::string &descr="received unspec. by TOIDecPOMDPDiscrete", + const std::string &pf="received unspec. by TOIDecPOMDPDiscrete", + bool cacheFlatModels=false); + + /// Copy constructor. + TOIDecPOMDPDiscrete(const TOIDecPOMDPDiscrete& a); + /// Destructor. + virtual ~TOIDecPOMDPDiscrete(); + + /// Copy assignment operator + TOIDecPOMDPDiscrete& operator= + (const TOIDecPOMDPDiscrete& o); + + //operators: + + //data manipulation (set) functions: + /** Sets _m_initialized to b. When setting to true, a verification of + * member elements is performed. (i.e. a check whether all vectors + * have the correct size and non-zero entries) */ + virtual bool SetInitialized(bool b); + + /// Creates a new reward model. + void CreateNewRewardModel(); + + /// Set the reward for state, joint action indices. + void SetReward(Index sI, Index jaI, double r) + { _m_p_rModel->Set(JointToIndividualStateIndices(sI), + JointToIndividualActionIndices(jaI), + r); + } + + void SetReward(const std::vector &sIs, + const std::vector &aIs, double r) + { _m_p_rModel->Set(sIs, aIs, r); + } + + /**set the reward for state, joint action , suc. state indices */ + void SetReward(Index sI, Index jaI, Index sucSI, double r) + {throw(E("SetReward(sI,jaI,sucSI,r) is not implemented yet..."));} + /**set the reward for state, joint action, suc.state, joint observation + * indices */ + void SetReward(Index sI, Index jaI, Index sucSI, Index joI, + double r) + {throw(E("SetReward(sI,jaI,sucSI,joI, r) not implemented yet..."));} + + void SetIndividualDecPOMDPD(DecPOMDPDiscrete *model, Index agentI); + + //get (data) functions: + + DecPOMDPDiscrete* GetIndividualDecPOMDPD(Index agentI) + const + {return _m_individualDecPOMDPDs[agentI];} + + ///**return the reward for state, joint action indices */ + virtual double GetReward(Index sI, Index jaI) const + { return(GetReward(JointToIndividualStateIndices(sI), + JointToIndividualActionIndices(jaI))); + } + virtual double GetReward(const std::vector &sIs, + const std::vector &aIs) const + { return(_m_p_rModel->Get(sIs, aIs)); + } + + std::vector GetRewards(Index sI, Index jaI) const; + + /** SoftPrints some information on the DecPOMDPDiscrete.*/ + virtual std::string SoftPrint() const; + /** Prints some information on the DecPOMDPDiscrete.*/ + void Print() const + {std::cout << SoftPrint();} + + + //functions to implement the DecPOMDPInterface: + ///implements the DecPOMDPInterface + double GetReward(State* s, JointAction* ja) const + { + return GetReward( + ((StateDiscrete*)s)->GetIndex(), + ((JointActionDiscrete*)ja)->GetIndex()); + } + ///implements the DecPOMDPInterface + void SetReward(State* s, JointAction* ja, double r) + { + return SetReward( + ((StateDiscrete*)s)->GetIndex(), + ((JointActionDiscrete*)ja)->GetIndex(), r); + } + + //functions to implement the POSGInterface: + + /// Function that sets the reward for an agent, state and joint action. + /** This should be very generic.*/ + void SetRewardForAgent(Index agentI, State* s, JointAction* ja, double r) + {SetReward(s, ja, r); } + /// Function that returns the reward for a state and joint action. + /** This should be very generic.*/ + double GetRewardForAgent(Index agentI, State* s, JointAction* ja) const + {return GetReward(s, ja); } + + //functions to implement the POSGDiscreteInterface: + + ///implementation of POSGDiscreteInterface + void CreateNewRewardModelForAgent(Index agentI) + {CreateNewRewardModel();} + + void SetRewardForAgent(Index agentI, Index sI, Index jaI, double r) + {SetReward(sI, jaI, r);} + + /// Set the reward for state, joint action , suc. state indices + void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, double r) + {SetReward(sI, jaI, sucSI, r);} + + /// Set the reward for state, joint action, suc.state, joint obs indices + void SetRewardForAgent(Index agentI, Index sI, Index jaI, + Index sucSI, Index joI, double r) + {SetReward(sI, jaI, sucSI, joI, r);} + + /// Return the reward for state, joint action indices + double GetRewardForAgent(Index agentI, Index sI, Index jaI) const + {return GetReward(sI, jaI);} + + RGet* GetRGet() const { return(0); } + + /// Returns a pointer to a copy of this class. + virtual TOIDecPOMDPDiscrete* Clone() const + { return new TOIDecPOMDPDiscrete(*this); } + +}; + + +#endif /* !_TOIDECPOMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.cpp new file mode 100644 index 000000000..cc37bec6d --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.cpp @@ -0,0 +1,158 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TOIFactoredRewardDecPOMDPDiscrete.h" + +using namespace std; + +#define DEBUG_TOIFactoredRewardDecPOMDPDiscrete 0 + +TOIFactoredRewardDecPOMDPDiscrete:: +TOIFactoredRewardDecPOMDPDiscrete( + const string &name, const string &descr, const string &pf, + bool cacheFlatModels) : + TOIDecPOMDPDiscrete(name, descr, pf, cacheFlatModels) +{ + _m_initialized = false; +} + +TOIFactoredRewardDecPOMDPDiscrete:: +TOIFactoredRewardDecPOMDPDiscrete(const TOIFactoredRewardDecPOMDPDiscrete& o) +{ + throw(E("TOIFactoredRewardDecPOMDPDiscrete: copy ctor not yet implemented")); +} + +TOIFactoredRewardDecPOMDPDiscrete::~TOIFactoredRewardDecPOMDPDiscrete() +{ + for(unsigned int i=0;i!=_m_p_rModels.size();++i) + delete(_m_p_rModels[i]); +} + +//Copy assignment operator +TOIFactoredRewardDecPOMDPDiscrete& +TOIFactoredRewardDecPOMDPDiscrete::operator= +(const TOIFactoredRewardDecPOMDPDiscrete& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + throw(E("TOIFactoredRewardDecPOMDPDiscrete: assignment not yet implemented")); + + return *this; +} + +bool TOIFactoredRewardDecPOMDPDiscrete::SetInitialized(bool b) +{ + if(b == false) + { + _m_initialized = false; + return(true); + } + + if( TOIDecPOMDPDiscrete::SetInitialized(true)) + { + if( b == true ) + { + if (_m_p_rModels.size()!=GetNrAgents()) + throw E("TOIFactoredRewardDecPOMDPDiscrete::SetInitialized(true) : no reward models specified yet!"); + + for(unsigned int i=0;i!=GetNrAgents();++i) + if (_m_p_rModels[i] == 0) + throw E("TOIFactoredRewardDecPOMDPDiscrete::SetInitialized(true) : no reward model specified yet! ( _m_p_rModels[i] == 0 )"); + + } + _m_initialized = b; + return(true); + } + else + return(false); +} + +double TOIFactoredRewardDecPOMDPDiscrete::GetReward(Index sI, Index jaI) const +{ + double reward=0; + vector indSIs=JointToIndividualStateIndices(sI), + indAIs=JointToIndividualActionIndices(jaI); + + for(unsigned int i=0;i!=GetNrAgents();++i) + reward+=GetIndividualReward(indSIs[i],indAIs[i],i); + + reward+=_m_p_rModel->Get(indSIs,indAIs); +// reward+=_m_p_rModel->Get(sI,jaI); + +#if DEBUG_TOIFactoredRewardDecPOMDPDiscrete + cout << "GetReward(" << sI << "," << jaI << ") = " << reward << endl; +#endif + return(reward); +} + +double +TOIFactoredRewardDecPOMDPDiscrete::GetReward(const std::vector &sIs, + const std::vector &aIs) const +{ + double reward=0; + + for(unsigned int i=0;i!=GetNrAgents();++i) + reward+=GetIndividualReward(sIs[i],aIs[i],i); + + reward+=_m_p_rModel->Get(sIs,aIs); + +#if DEBUG_TOIFactoredRewardDecPOMDPDiscrete + cout << "GetReward(" << sI << "," << jaI << ") = " << reward << endl; +#endif + + return(reward); +} + +void TOIFactoredRewardDecPOMDPDiscrete:: +SetIndividualRewardModel(RewardModel* rewardModel, + Index agentID) +{ + if(_m_p_rModels.size()<=agentID) + _m_p_rModels.resize(agentID+1); + + _m_p_rModels[agentID]=rewardModel; +} + +double TOIFactoredRewardDecPOMDPDiscrete:: +GetIndividualReward(Index indSI, Index indAI, Index agentID) const +{ + double reward=_m_p_rModels[agentID]->Get(indSI,indAI); +#if DEBUG_TOIFactoredRewardDecPOMDPDiscrete + cout << "GetIndividualReward[" << agentID << "](" << indSI << "," << indAI + << ") = " << reward << endl; +#endif + return(reward); +} + +string TOIFactoredRewardDecPOMDPDiscrete::SoftPrint() const +{ + stringstream ss; + ss << TOIDecPOMDPDiscrete::SoftPrint(); + + if(_m_initialized) + { + ss << "Reward models: " << endl; + for(unsigned int i=0;i!=GetNrAgents();++i) + { + ss << "Individual rewards for agent " << i << endl; + ss << _m_p_rModels[i]->SoftPrint(); + } + } + else + throw E("TOIFactoredRewardDecPOMDPDiscrete components (reward model) not initialized"); + + return(ss.str()); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.h new file mode 100644 index 000000000..4596a07d7 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TOIFactoredRewardDecPOMDPDiscrete.h @@ -0,0 +1,84 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TOIFACTOREDREWARDDECPOMDPDISCRETE_H_ +#define _TOIFACTOREDREWARDDECPOMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" +#include "TOIDecPOMDPDiscrete.h" + +/**\brief TOIFactoredRewardDecPOMDPDiscrete is a class that represents a + * transition observation independent Dec-POMDP, in which the reward + * is the sum of each agent's individual reward plus some shared + * reward. */ +class TOIFactoredRewardDecPOMDPDiscrete : + public TOIDecPOMDPDiscrete +{ +private: + /**Boolean that tracks whether this TOIFactoredRewardDecPOMDPDiscrete is initialized.*/ + bool _m_initialized; +protected: + std::vector _m_p_rModels; +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + TOIFactoredRewardDecPOMDPDiscrete( + const std::string &name="received unspec. by TOIFactoredRewardDecPOMDPDiscrete", + const std::string &descr="received unspec. by TOIFactoredRewardDecPOMDPDiscrete", + const std::string &pf="received unspec. by TOIFactoredRewardDecPOMDPDiscrete", + bool cacheFlatModels=false); + + + TOIFactoredRewardDecPOMDPDiscrete + (const TOIFactoredRewardDecPOMDPDiscrete& o); + + virtual ~TOIFactoredRewardDecPOMDPDiscrete(); + + TOIFactoredRewardDecPOMDPDiscrete& operator= + (const TOIFactoredRewardDecPOMDPDiscrete& o); + + /** Sets _m_initialized to b. When setting to true, a verification of + * member elements is performed. (i.e. a check whether all vectors + * have the correct size and non-zero entries) */ + virtual bool SetInitialized(bool b); + + void SetIndividualRewardModel(RewardModel* rewardModel, + Index agentID); + + //get (data) functions: + ///**return the reward for state, joint action indices */ + double GetReward(Index sI, Index jaI) const; + double GetReward(const std::vector &sIs, + const std::vector &aIs) const; + + double GetIndividualReward(Index indSI, Index indAI, Index agentID) const; + + /// Returns a pointer to a copy of this class. + virtual TOIFactoredRewardDecPOMDPDiscrete* Clone() const + { return new TOIFactoredRewardDecPOMDPDiscrete(*this); } + + /** SoftPrints some information on the DecPOMDPDiscrete.*/ + std::string SoftPrint() const; +}; + + +#endif /* !_TOIFACTOREDREWARDDECPOMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.cpp new file mode 100644 index 000000000..0177ea6e1 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.cpp @@ -0,0 +1,34 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TimeTools.h" + +namespace TimeTools{ + +double GetDeltaTimeDouble(timeval start_time, timeval cur_time) +{ + if(gettimeofday(&cur_time, NULL) != 0) + throw "Error with gettimeofday"; + + time_t delta_sec = cur_time.tv_sec - start_time.tv_sec; + suseconds_t delta_usec = cur_time.tv_usec - start_time.tv_usec; + double delta = 1000000.0 * delta_sec + delta_usec; //in microsecond + return delta; +} + +} + + + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.h new file mode 100644 index 000000000..5f787849c --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TimeTools.h @@ -0,0 +1,32 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + + +#ifndef TIMETOOLS_INC +#define TIMETOOLS_INC + +#include +#include + +namespace TimeTools{ + ///Returns the difference between start time and current time + /**Returns a double, time is in microseconds + */ + double GetDeltaTimeDouble(timeval start_time, timeval cur_time); + +} + +#endif /* ----- #ifndef TIMETOOLS_INC ----- */ + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModel.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModel.h new file mode 100644 index 000000000..77cfd33b9 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModel.h @@ -0,0 +1,54 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TRANSITIONMODEL_H_ +#define _TRANSITIONMODEL_H_ 1 + +/* the include directives */ + +#include +#include +#include "Globals.h" + +/// TransitionModel represents the transition model in a decision process. +class TransitionModel +{ +private: + +protected: + +public: + + /// default Constructor + TransitionModel(){}; + + /// Destructor. + virtual ~TransitionModel(){} + + /// Returns a pointer to a copy of this class. + virtual TransitionModel* Clone() const = 0; + + virtual std::string SoftPrint() const = 0; + void Print() const + { std::cout << SoftPrint();} +}; + +#endif /* !_TRANSITIONMODEL_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.cpp new file mode 100644 index 000000000..a715b6279 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.cpp @@ -0,0 +1,64 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TransitionModelDiscrete.h" + +using namespace std; + +TransitionModelDiscrete::TransitionModelDiscrete(int nrS, int nrJA) : + _m_nrStates(nrS), + _m_nrJointActions(nrJA) +{ +} + +TransitionModelDiscrete::~TransitionModelDiscrete() +{ +} + +string TransitionModelDiscrete::SoftPrint() const +{ + stringstream ss; + double p = 0.0; + ss << "s\tja\ts'\tP (tuples with P==0 are not printed)"<0) + ss << sI << "\t" << jaI << "\t" << sIp << "\t" << p << endl; + } + return(ss.str()); +} + +Index TransitionModelDiscrete::SampleSuccessorState(Index state, Index action) +{ + double randNr=rand() / (RAND_MAX + 1.0); + + double sum=0; + Index sucState=0; + int i; + + for(i=0;i<_m_nrStates;i++) + { + sum+=Get(state,action,i); + if(randNr<=sum) + { + sucState=i; + break; + } + } + return(sucState); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.h new file mode 100644 index 000000000..eb72ae3c3 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscrete.h @@ -0,0 +1,59 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TRANSITIONMODELDISCRETE_H_ +#define _TRANSITIONMODELDISCRETE_H_ 1 + +/* the include directives */ +#include "boost/numeric/ublas/matrix.hpp" +#include "Globals.h" +#include "TransitionModelDiscreteInterface.h" + +/// TransitionModelDiscrete represents a discrete transition model. +class TransitionModelDiscrete : public TransitionModelDiscreteInterface +{ +private: + + /// The number of states. + int _m_nrStates; + /// The number of joint actions. + int _m_nrJointActions; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// Constructor with the dimensions of the transition model. + TransitionModelDiscrete(int nrS = 1, int nrJA = 1); + + virtual ~TransitionModelDiscrete(); + + /// Sample a successor state. + Index SampleSuccessorState(Index sI, Index jaI); + + /// Returns a pointer to a copy of this class. + virtual TransitionModelDiscrete* Clone() const = 0; + + /// SoftPrints tabular transition model. + std::string SoftPrint() const; +}; + +#endif /* !_TRANSITIONMODELDISCRETE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscreteInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscreteInterface.h new file mode 100644 index 000000000..8cbad07a7 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelDiscreteInterface.h @@ -0,0 +1,59 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TRANSITIONMODELDISCRETEINTERFACE_H_ +#define _TRANSITIONMODELDISCRETEINTERFACE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "TransitionModel.h" + +/// TransitionModelDiscreteInterface represents a discrete transition model. +class TransitionModelDiscreteInterface : public TransitionModel +{ +private: + +protected: + +public: + // Constructor, destructor and copy assignment. + /// Constructor + TransitionModelDiscreteInterface(){}; + + virtual ~TransitionModelDiscreteInterface(){}; + + /// Returns P(s'|s,ja). + virtual double Get(Index sI, Index jaI, Index sucSI) const = 0; + + //data manipulation funtions: + /// Sets P(s'|s,ja) + /** sI, jaI, sucSI, are indices of the state, * taken joint action + * and resulting successor state. prob is * the probability. The + * order of events is s, ja, s', so is the arg. list + */ + virtual void Set(Index sI, Index jaI, Index sucSI, double prob) = 0; + + /// Returns a pointer to a copy of this class. + virtual TransitionModelDiscreteInterface* Clone() const = 0; + +}; + +#endif /* !_TRANSITIONMODELDISCRETEINTERFACE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.cpp new file mode 100644 index 000000000..43d2c0bed --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.cpp @@ -0,0 +1,52 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TransitionModelMapping.h" + +using namespace std; + +#define DEBUG_TM_MAPPING 0 + +TransitionModelMapping::TransitionModelMapping(int nrS, int nrJA) : + TransitionModelDiscrete(nrS, nrJA) +{ + Matrix *T; + for(int a=0;a!=nrJA;++a) + { + T=new Matrix(nrS,nrS); + T->clear(); + _m_T.push_back(T); + } +} + +TransitionModelMapping:: +TransitionModelMapping(const TransitionModelMapping& TM) : + TransitionModelDiscrete(TM) +{ + Matrix *T; + for(unsigned int a=0;a!=TM._m_T.size();++a) + { + T=new Matrix(*TM._m_T[a]); + _m_T.push_back(T); + } +} + +TransitionModelMapping::~TransitionModelMapping() +{ + for(vector::iterator it=_m_T.begin(); + it!=_m_T.end(); ++it) + delete(*it); + _m_T.clear(); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.h new file mode 100644 index 000000000..44e89861f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMapping.h @@ -0,0 +1,82 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TRANSITIONMODELMAPPING_H_ +#define _TRANSITIONMODELMAPPING_H_ 1 + +/* the include directives */ +#include "boost/numeric/ublas/matrix.hpp" +#include "Globals.h" +#include "TransitionModelDiscrete.h" +//#include "TGet.h" +class TGet; +class TGet_TransitionModelMapping; + +/// TransitionModelMapping implements a TransitionModelDiscrete. +/** Uses full matrices. */ +class TransitionModelMapping : public TransitionModelDiscrete +{ +public: + + typedef boost::numeric::ublas::matrix Matrix; + +private: + + std::vector _m_T; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + TransitionModelMapping(int nrS = 1, int nrJA = 1); + + /// Copy constructor. + TransitionModelMapping(const TransitionModelMapping& TM); + + /// Destructor. + ~TransitionModelMapping(); + + /// Returns P(s'|s,ja) + double Get(Index sI, Index jaI, Index sucSI) const + { return((*_m_T[jaI])(sI,sucSI)); } + + //data manipulation funtions: + ///Sets P(s'|s,ja) + /**sI, jaI, sucSI, are indices of the state, + * taken joint action and resulting successor state. prob is + * the probability. The order of events is s, ja, s', so is the arg. list + */ + void Set(Index sI, Index jaI, Index sucSI, double prob) + { (*_m_T[jaI])(sI,sucSI)=prob; } + + /// Get a pointer to a transition matrix for a particular action. + const Matrix* GetMatrixPtr(Index a) const + { return(_m_T.at(a)); } + + /// Returns a pointer to a copy of this class. + virtual TransitionModelMapping* Clone() const + { return new TransitionModelMapping(*this); } + + friend class TGet_TransitionModelMapping; +}; + +#endif /* !_TRANSITIONMODELMAPPING_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.cpp new file mode 100644 index 000000000..b903e494f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.cpp @@ -0,0 +1,49 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TransitionModelMappingSparse.h" + +using namespace std; + +TransitionModelMappingSparse::TransitionModelMappingSparse(int nrS, int nrJA) : + TransitionModelDiscrete(nrS, nrJA) +{ + SparseMatrix *T; + for(int a=0;a!=nrJA;++a) + { + T=new SparseMatrix(nrS,nrS); + _m_T.push_back(T); + } +} + +TransitionModelMappingSparse:: +TransitionModelMappingSparse(const TransitionModelMappingSparse& TM) : + TransitionModelDiscrete(TM) +{ + SparseMatrix *T; + for(unsigned int a=0;a!=TM._m_T.size();++a) + { + T=new SparseMatrix(*TM._m_T[a]); + _m_T.push_back(T); + } +} + +TransitionModelMappingSparse::~TransitionModelMappingSparse() +{ + for(vector::iterator it=_m_T.begin(); + it!=_m_T.end(); ++it) + delete(*it); + _m_T.clear(); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.h new file mode 100644 index 000000000..ba002f9f9 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionModelMappingSparse.h @@ -0,0 +1,95 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TRANSITIONMODELMAPPINGSPARSE_H_ +#define _TRANSITIONMODELMAPPINGSPARSE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "TransitionModelDiscrete.h" +#include "boost/numeric/ublas/matrix_sparse.hpp" + +//#include "TGet.h" +class TGet; +class TGet_TransitionModelMappingSparse; + +/// TransitionModelMappingSparse implements a TransitionModelDiscrete. +/** Uses sparse matrices. */ +class TransitionModelMappingSparse : public TransitionModelDiscrete +{ +public: + +#if BOOST_1_32_OR_LOWER // they renamed sparse_vector to mapped_vector + typedef boost::numeric::ublas::sparse_matrix SparseMatrix; +#else + typedef boost::numeric::ublas::compressed_matrix SparseMatrix; +#endif + +private: + + std::vector _m_T; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// default Constructor + TransitionModelMappingSparse(int nrS = 1, int nrJA = 1); + + /// Copy constructor. + TransitionModelMappingSparse(const TransitionModelMappingSparse& TM); + + /// Destructor. + ~TransitionModelMappingSparse(); + + /// Returns P(s'|s,ja). + double Get(Index sI, Index jaI, Index sucSI) const + { return((*_m_T[jaI])(sI,sucSI)); } + + ////data manipulation funtions: + /// Sets P(s'|s,ja) + /** sI, jaI, sucSI, are indices of the state, taken joint action + * and resulting successor state. prob is the probability. The + * order of events is s, ja, s', so is the arg. list + */ + void Set(Index sI, Index jaI, Index sucSI, double prob) + { + // make sure probability is not 0 + if(prob > PROB_PRECISION) + (*_m_T[jaI])(sI,sucSI)=prob; + // check if we already defined this element, if so remove it + else if((*_m_T[jaI])(sI,sucSI)>PROB_PRECISION) + (*_m_T[jaI]).erase_element(sI,sucSI); + } + + /// Get a pointer to a transition matrix for a particular action. + const SparseMatrix* GetMatrixPtr(Index a) const + { return(_m_T.at(a)); } + + /// Returns a pointer to a copy of this class. + virtual TransitionModelMappingSparse* Clone() const + { return new TransitionModelMappingSparse(*this); } + + friend class TGet_TransitionModelMappingSparse; + +}; + +#endif /* !_TRANSITIONMODELMAPPINGSPARSE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.cpp new file mode 100644 index 000000000..4122e0133 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.cpp @@ -0,0 +1,1283 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TransitionObservationIndependentMADPDiscrete.h" +#include "TransitionModelMappingSparse.h" +#include "TransitionModelMapping.h" +#include "ObservationModelMappingSparse.h" +#include "ObservationModelMapping.h" + +#include "VectorTools.h" + +using namespace std; + +#define DEBUG_TOIMADPD 0 +#define DEBUG_CJA 0 +#define DEBUG_CJO 0 +#define DEBUG_CENTRALIZEDSPARSEMODELS 0 + +TransitionObservationIndependentMADPDiscrete:: +TransitionObservationIndependentMADPDiscrete( + const string &name, const string &descr, const string &pf, + bool cacheFlatModels) : + MultiAgentDecisionProcess(name, descr, pf) +{ + _m_initialized = false; + _m_sparse = false; + if(cacheFlatModels) + { + _m_jointIndicesCached = true; + _m_jointModelsGenerated = true; + } + else + { + _m_jointIndicesCached = false; + _m_jointModelsGenerated = false; + } + _m_nrJointActions=0; + _m_nrJointStates=0; + _m_nrJointObservations=0; + _m_p_tModel = 0; + _m_p_oModel = 0; + // these are pointers to get around the const-ness of their Get functions + _m_jointStatesMap=new map, State*>; + _m_indivStateIndicesMap = new map< Index, vector >; + _m_jointActionMap=new map; + _m_jointObsMap=new map; + _m_initialStateDistribution=0; +} + +TransitionObservationIndependentMADPDiscrete:: +TransitionObservationIndependentMADPDiscrete( + const TransitionObservationIndependentMADPDiscrete&) +{ + throw(E("TransitionObservationIndependentMADPDiscrete: copy ctor not implemented")); +} + +TransitionObservationIndependentMADPDiscrete:: +~TransitionObservationIndependentMADPDiscrete() +{ + // this destructor leaks memory + + vector::iterator it = + _m_individualMADPDs.begin(); + vector::iterator last = + _m_individualMADPDs.end(); + while(it != last) + { + delete *it;//pointer to MultiAgentDecisionProcessDiscrete + it++; + } + _m_individualMADPDs.clear(); + + vector::iterator it2 = + _m_jointActionVec.begin(); + vector::iterator last2 = + _m_jointActionVec.end(); + while(it2 != last2) + { + delete *it2; //pointer to MultiAgentDecisionProcessDiscrete + it2++; + } + _m_jointActionVec.clear(); +} + + +void TransitionObservationIndependentMADPDiscrete::SetNrAgents(size_t n) +{ + MultiAgentDecisionProcess::SetNrAgents(n); + vector::iterator it = + _m_individualMADPDs.begin(); + vector::iterator last = + _m_individualMADPDs.end(); + while(it != last) + { + delete (*it); + it++; + } + + _m_individualMADPDs.clear(); + for(Index i=0; i < n; i++) + { + stringstream name, descr, pf; + name << "Indiv.MADPDiscr.Agent"<SetSparse(_m_sparse); + _m_individualMADPDs.push_back( m ); + } + + SetInitialized(false); +} + +void TransitionObservationIndependentMADPDiscrete::AddAgent(const string &namestring) +{ + Index i = GetNrAgents();//the old number of agents + MultiAgentDecisionProcess::AddAgent(namestring); + + stringstream name, descr, pf; + name << "Indiv.MADPDiscr.Agent"<SetSparse(_m_sparse); + _m_individualMADPDs.push_back( m ); + + SetInitialized(false); +} + +void TransitionObservationIndependentMADPDiscrete::SetNrStates(Index agentI, size_t nr) +{ + if(agentI >= _m_individualMADPDs.size()) + throw EInvalidIndex("TransitionObservationIndependentMADPDiscrete::SetNrStates - agentI out of bounds..."); + _m_individualMADPDs[agentI]->SetNrStates(nr); +} +void TransitionObservationIndependentMADPDiscrete::AddState(Index agentI, const string &name) +{ + if(agentI >= _m_individualMADPDs.size()) + throw EInvalidIndex("TransitionObservationIndependentMADPDiscrete::AddState - agentI out of bounds..."); + _m_individualMADPDs[agentI]->AddState(name); +} + +void TransitionObservationIndependentMADPDiscrete::SetNrActions(Index agentI, size_t nr) +{ + if(agentI >= _m_individualMADPDs.size()) + throw EInvalidIndex("TransitionObservationIndependentMADPDiscrete::SetNrActions - agentI out of bounds..."); + _m_individualMADPDs[agentI]->SetNrActions(0, nr); +} + +void TransitionObservationIndependentMADPDiscrete::AddAction(Index agentI, const string &name) +{ + if(agentI >= _m_individualMADPDs.size()) + throw EInvalidIndex("TransitionObservationIndependentMADPDiscrete::AddAction - agentI out of bounds..."); + _m_individualMADPDs[agentI]->AddAction(0, name); +} + +void TransitionObservationIndependentMADPDiscrete::SetNrObservations(Index agentI, size_t nr) +{ + if(agentI >= _m_individualMADPDs.size()) + throw EInvalidIndex("TransitionObservationIndependentMADPDiscrete::SetNrObservations - agentI out of bounds..."); + _m_individualMADPDs[agentI]->SetNrObservations(0, nr); +} + +void TransitionObservationIndependentMADPDiscrete::AddObservation(Index agentI, const string &name) +{ + if(agentI >= _m_individualMADPDs.size()) + throw EInvalidIndex("TransitionObservationIndependentMADPDiscrete::AddObservation - agentI out of bounds..."); + _m_individualMADPDs[agentI]->AddObservation(0, name); +} + +#if 0 // will be computed from individual ISDs +void TransitionObservationIndependentMADPDiscrete::SetUniformISD() +{ + size_t nrJS = GetNrJointStates(); + if(_m_initialStateDistribution->size() != nrJS) + _m_initialStateDistribution->resize(nrJS); + double uprob = 1.0 / nrJS; + vector::iterator it = _m_initialStateDistribution->begin(); + vector::iterator last = _m_initialStateDistribution->end(); + while(it!=last) + { + *it = uprob; + it++; + } +} +#endif + +void TransitionObservationIndependentMADPDiscrete::SetISD(const vector &v) +{ + if(_m_nrJointStates==0) + throw(E("TransitionObservationIndependentMADPDiscrete::CreateISD() joint states should have been created already")); + + if(v.size() != _m_nrJointStates) + throw E("TransitionObservationIndependentMADPDiscrete::SetISD - ERROR: nrStates don't match!"); + + delete _m_initialStateDistribution; + _m_initialStateDistribution = new StateDistributionVector(v); +} + +void TransitionObservationIndependentMADPDiscrete::CreateJointActions() +{ + _m_jointActionVec.clear(); + if(!_m_jointModelsGenerated) + { + _m_nrJointActions=1; + for(Index agI = 0; agI < GetNrAgents(); agI++) + _m_nrJointActions*=(GetIndividualMADPD(agI)->GetNrActions(0)); + } + else + { + JointActionDiscrete* ja = new JointActionDiscrete(); + _m_nrJointActions = CreateJointActionsRecursively(0, *ja, 0); + + if(_m_nrJointActions!=_m_jointActionVec.size()) + throw(E("TransitionObservationIndependentMADPDiscrete::CreateJointActions() sizes do not match")); + } + + _m_nrIndivActions.clear(); + for(Index agI = 0; agI < GetNrAgents(); agI++) + _m_nrIndivActions.push_back(GetIndividualMADPD(agI)->GetNrActions(0)); + + _m_jointToIndActionCache.clear(); + if(_m_jointIndicesCached) + for(Index ja=0;ja!=_m_nrJointActions;++ja) + _m_jointToIndActionCache.push_back( + JointToIndividualActionIndicesNoCache(ja)); +} + +size_t TransitionObservationIndependentMADPDiscrete:: +CreateJointActionsRecursively(Index curAgentI, JointActionDiscrete& ja, + Index jaI) +{ +if(DEBUG_CJA) cerr << "TransitionObservationIndependentMADPDiscrete::CreateJointActions(Index "< + GetActionDiscrete(0, a); + if(DEBUG_CJA) cerr << "\nnext action"; +// if(it == first) // + if(a == 0) + { + if(DEBUG_CJA) cerr << "(first action - not making copy)\n"; + p_ja = &ja; + } + else if ( a == nrA-1 )//it == beforelast) + //this is the last last action + { + if(DEBUG_CJA) cerr << "(last action - not making copy)\n"; + p_ja = p_jaReceivedArgCopy; //don't make a new copy + } + else //make a new copy + { + if(DEBUG_CJA) cerr << "(intermed. action - making copy)\n"; + p_ja = new JointActionDiscrete(*p_jaReceivedArgCopy); + } + if(lastAgent) + { + p_ja->SetIndex(jaI); + if(DEBUG_CJA)cerr << "setting index of this joint action to: "<< + jaI <AddIndividualAction(adp, curAgentI); + if(lastAgent) //jointAction is now completed: add it to jointAction set. + { + if(DEBUG_CJA){cerr << "INSERTING the joint action:"; + p_ja->Print();cerr<>ProblemDecTiger::CreateJointActionsRecursively(Index "<GetNrStates(); + _m_nrIndivStates.push_back(nrS); + _m_nrJointStates*=nrS; + } + if(_m_jointIndicesCached) + { + vector ind_sI(nrAg, 0); + size_t i=0; + do + { + _m_indivStateIndices.push_back(ind_sI); + State *state=new StateDiscrete(i); + string name=""; + for(Index agI = 0; agI < nrAg; agI++) + { + if(agI>0) + name+="_"; + name+=GetIndividualMADPD(agI)->GetState(ind_sI[agI])->GetName(); + } + state->SetName(name); + state->SetDescription(""); + _m_jointStates.push_back(state); + i++; + } while(! IndexTools::Increment(ind_sI, _m_nrIndivStates) ); + + if(_m_nrJointStates!=_m_jointStates.size()) + throw(E("TransitionObservationIndependentMADPDiscrete::CreateJointStates() sizes do not match")); + } +} + +const State* TransitionObservationIndependentMADPDiscrete::GetState( + const std::vector &sIs) const +{ + // we cached the ones already asked for + if(_m_jointStatesMap->find(sIs)!=_m_jointStatesMap->end()) + return(_m_jointStatesMap->find(sIs)->second); + else // create new joint state and add it to cache + { + State *state=new State; // not a StateDiscrete, since the + // index might overflow + string name=""; + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + if(agI>0) + name+="_"; + name+=GetIndividualMADPD(agI)->GetState(sIs[agI])->GetName(); + } + state->SetName(name); + state->SetDescription(""); + _m_jointStatesMap->insert(make_pair(sIs,state)); + return(state); + } +} + +const JointActionDiscrete* +TransitionObservationIndependentMADPDiscrete::GetJointActionDiscrete(Index i) const +{ + if(_m_jointIndicesCached) // we cached all joint actions + return(_m_jointActionVec.at(i)); + // we cached the ones already asked for + else if(_m_jointActionMap->find(i)!=_m_jointActionMap->end()) + return(_m_jointActionMap->find(i)->second); + else // create new joint action and add it to cache + { + JointActionDiscrete *action=new JointActionDiscrete(i); + vector ind_sI= + IndexTools::JointToIndividualIndices(i,_m_nrIndivActions); + for(Index agI = 0; agI < GetNrAgents(); agI++) + action->AddIndividualAction( + GetIndividualMADPD(agI)->GetActionDiscrete(0,ind_sI[agI]), + agI); + + _m_jointActionMap->insert(make_pair(i,action)); + return(action); + } +} + +const JointObservation* +TransitionObservationIndependentMADPDiscrete::GetJointObservation(Index i) const +{ + if(_m_jointIndicesCached) // we cached all joint obs + return(_m_jointObs.at(i)); + // we cached the ones already asked for + else if(_m_jointObsMap->find(i)!=_m_jointObsMap->end()) + return(_m_jointObsMap->find(i)->second); + else // create new joint obs and add it to cache + { + JointObservationDiscrete *observation=new JointObservationDiscrete(i); + vector ind_sI= + IndexTools::JointToIndividualIndices(i,_m_nrIndivObs); + for(Index agI = 0; agI < GetNrAgents(); agI++) + observation->AddIndividualObservation( + GetIndividualMADPD(agI)->GetObservationDiscrete(0,ind_sI[agI]), + agI); + + _m_jointObsMap->insert(make_pair(i,observation)); + return(observation); + } +} + +void TransitionObservationIndependentMADPDiscrete::CreateJointObservations() +{ + _m_jointObs.clear(); + size_t nrAg = GetNrAgents(); + _m_nrIndivObs.clear(); + for(Index agI = 0; agI < nrAg; agI++) + { + size_t nrO = GetIndividualMADPD(agI)->GetNrObservations(0); + _m_nrIndivObs.push_back(nrO); + } + + for(Index agI = 0; agI < nrAg; agI++) + { + vector indObs; + for(Index o=0; o!=GetIndividualMADPD(agI)->GetNrObservations(0); ++o) + indObs.push_back(*GetIndividualMADPD(agI)-> + GetObservationDiscrete(0,o)); + _m_indivObs.push_back(indObs); + } + + if(!_m_jointIndicesCached) + { + _m_nrJointObservations=1; + for(Index agI = 0; agI < nrAg; agI++) + _m_nrJointObservations*=GetIndividualMADPD(agI)-> + GetNrObservations(0); + } + else + { + JointObservationDiscrete* jo = new JointObservationDiscrete(); + _m_nrJointObservations=ConstructJointObservationsRecursively(0, *jo, 0); + if(_m_nrJointObservations!=_m_jointObs.size()) + throw(E("TransitionObservationIndependentMADPDiscrete::CreateJointObservations() sizes do not match")); + + for(Index jo=0;jo!=_m_nrJointObservations;++jo) + _m_jointToIndObsCache.push_back( + JointToIndividualObservationIndicesNoCache(jo)); + } +} + +size_t TransitionObservationIndependentMADPDiscrete:: +ConstructJointObservationsRecursively( + Index curAgentI, JointObservationDiscrete& jo, Index joI) +{ + bool lastAgent=false; + if(curAgentI == GetNrAgents()-1) + { + lastAgent = true; + } + if(curAgentI >= _m_indivObs.size()) + { + stringstream ss; + ss << "ConstructJointObservationsRecursively - current Agent index ("<< + curAgentI<<") out of bounds! (_m_indivObs contains "<< + "observations for "<<_m_indivObs.size() << " agents...)\n"; + throw E(ss.str().c_str()); + } + ObservationDVec::iterator first = _m_indivObs[curAgentI].begin(); + ObservationDVec::iterator it = _m_indivObs[curAgentI].begin(); + ObservationDVec::iterator last = _m_indivObs[curAgentI].end(); + ObservationDVec::iterator beforelast = _m_indivObs[curAgentI].end(); + beforelast--; + + if(it == last) + { + stringstream ss; + ss << " empty observation set for agent " << curAgentI; + throw E(ss); + } + //first observation extends the received jo + JointObservationDiscrete* p_joReceivedArgCopy = + new JointObservationDiscrete(jo); + JointObservationDiscrete* p_jo; + + while( it != last) // other observations extend duplicates of jo + { + if(it == first) // + { + p_jo = &jo; + } + else if (it == beforelast)//this is the last valid it -> last observation + { + p_jo = p_joReceivedArgCopy; //don't make a new copy + } + else //make a new copy + { + p_jo = new JointObservationDiscrete(*p_joReceivedArgCopy); + } + if(lastAgent) + { + p_jo->SetIndex(joI); + if(DEBUG_CJO) + cerr << "setting index of this observation to: "<< joI <AddIndividualObservation(ai, curAgentI); + + if(lastAgent)//jointObservation is now completed:add it to the jointObservation set. + { + if(DEBUG_CJO) + {cerr<<"INSERTING the joint observation:"; p_jo->Print();cerr<> TransitionObservationIndependentMADPDiscrete::ConstructJointObservationsRecursively(Index "< > isdIs; + //vector< const StateDistributionVector* > isdIs; + vector< const StateDistribution* > isdIs; + + for(Index i=0;i!=GetNrAgents();++i) + isdIs.push_back(GetIndividualMADPD(i)->GetISD()); + + if(_m_jointIndicesCached) + { + vector ISD(nrJS,1); + for(Index s=0;s!=nrJS;++s) + for(Index i=0;i!=GetNrAgents();++i) + ISD[s]*= isdIs[i]->GetProbability(_m_indivStateIndices[s][i] ); + + SetISD(ISD); + } +} + +bool TransitionObservationIndependentMADPDiscrete::SetInitialized(bool b) +{ + if(b == false) + { + if(_m_initialized == true) + delete [] _m_actionStepSizeArray; + _m_initialized = false; + return(true); + } + if(_m_initialized == true && b == true) + { + //first free mem before re-initialize: + delete [] _m_actionStepSizeArray; + } + + if(DEBUG_TOIMADPD) + cerr << "TransitionObservationIndependentMADPDiscrete::SetInitialized"<< + " called - GetNrAgents()="<< GetNrAgents()<::iterator it = + _m_individualMADPDs.begin(); + vector::iterator last = + _m_individualMADPDs.end(); + while(it != last) + { + (*it)->Initialize(); + it++; + } + + // for three agents or less we cache the joint indices + if(GetNrAgents()<=3) + _m_jointIndicesCached=true; + + //now perform the necessary actions for this class... + CreateJointActions(); + CreateJointStates(); + CreateJointObservations(); + + CreateISD(); + + _m_nr_agents=GetNrAgents(); + _m_actionStepSizeArray=IndexTools::CalculateStepSize(_m_nrIndivActions); + _m_actionStepSize=IndexTools::CalculateStepSizeVector(_m_nrIndivActions); + _m_observationStepSize=IndexTools::CalculateStepSizeVector(_m_nrIndivObs); + _m_stateStepSize=IndexTools::CalculateStepSizeVector(_m_nrIndivStates); + + _m_initialized = b; + + if(_m_jointModelsGenerated) + { + if(_m_sparse) + CreateCentralizedSparseModels(); + else + CreateCentralizedFullModels(); + } + + + } + return(true); +} + +vector TransitionObservationIndependentMADPDiscrete:: +JointToIndividualActionIndicesNoCache(Index jaI) const +{ +#if 0 + if(!_m_initialized) + { + stringstream ss; + ss << "TransitionObservationIndependentMADPDiscrete::"<< + "JointToIndividualActionIndices("<< jaI<< + ") - Error: not initialized. "< vai = jai->GetIndividualActionDiscretesIndices(); +#endif + vector vai=IndexTools::JointToIndividualIndices(jaI, + _m_nrIndivActions); + + if(DEBUG_TOIMADPD) + { + cerr << "TransitionObservationIndependentMADPDiscrete::"<< + "JointToIndividualActionIndices"<< "(Index "<::iterator vai_it = vai.begin(); + vector::iterator vai_last = vai.end(); + while(vai_it != vai_last) + { + cerr << ", " << *vai_it; + vai_it++; + } + cerr << endl; + } + return(vai); +} + +vector TransitionObservationIndependentMADPDiscrete:: +JointToIndividualObservationIndicesNoCache(Index joI) const +{ +#if 0 + if(!_m_initialized) + { + stringstream ss; + ss << "TransitionObservationIndependentMADPDiscrete::"<< + "JointToIndividualObservationIndices("<< joI<< + ") - Error: not initialized. "< nrO; + for(Index agI=0; agI < GetNrAgents(); agI++) + nrO.push_back(GetNrObservations(agI)); + vector voi = + IndexTools::JointToIndividualIndices(joI,nrO); + if(DEBUG_TOIMADPD) + { + cerr << "TransitionObservationIndependentMADPDiscrete::"<< + "JointToIndividualActionIndices"<< "(Index "<::iterator voi_it = voi.begin(); + vector::iterator voi_last = voi.end(); + while(voi_it != voi_last) + { + cerr << ", " << *voi_it; + voi_it++; + } + cerr << endl; + } + return(voi); +} + +string TransitionObservationIndependentMADPDiscrete::SoftPrint() const +{ + stringstream str; + str << MultiAgentDecisionProcess::SoftPrint(); + if(!_m_initialized) + { + stringstream ss; + ss << "TransitionObservationIndependentMADPDiscrete::Print("<< + ") - Error: not initialized. "<SoftPrint(); + + return(str.str()); +} + +string TransitionObservationIndependentMADPDiscrete::SoftPrintActionSets() const +{ + stringstream str; + if(!_m_initialized) + { + stringstream ss; + ss << "TransitionObservationIndependentMADPDiscrete::PrintAction"<< + "Sets() - Error: not initialized. "< + GetActionDiscrete(0, actionI); + str << adp->SoftPrint(); + str << endl; + } + } + return(str.str()); +} + +string TransitionObservationIndependentMADPDiscrete::SoftPrintJointActionSet() + const +{ + stringstream str; + if(!_m_initialized) + { + stringstream ss; + ss << "TransitionObservationIndependentMADPDiscrete::PrintJoint"<< + "ActionSets() - Error: not initialized. "<::const_iterator ja_it = + _m_jointActionVec.begin(); + vector::const_iterator ja_last = + _m_jointActionVec.end(); + while(ja_it != ja_last) + { + str << (*ja_it)->SoftPrint(); + str< sIs=JointToIndividualStateIndices(sI); + stringstream ss; + for(Index agI = 0; agI < sIs.size(); agI++) + ss << GetIndividualMADPD(agI)->SoftPrintState(sIs[agI]); + return(ss.str()); +} + + +double TransitionObservationIndependentMADPDiscrete:: +GetTransitionProbability(Index sI, + Index jaI, + Index sucSI) const +{ + double p=1; + if(!_m_jointModelsGenerated) + { + vector sIs=JointToIndividualStateIndices(sI), + sucSIs=JointToIndividualStateIndices(sucSI); + vector aIs=JointToIndividualActionIndices(jaI); + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + p*=GetIndividualMADPD(agI)->GetTransitionProbability( + sIs[agI], + aIs[agI], + sucSIs[agI]); + if(p==0) + break; + } + } + else + { + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + p*=GetIndividualMADPD(agI)->GetTransitionProbability( + _m_indivStateIndices[sI][agI], + _m_jointToIndActionCache[jaI][agI], + _m_indivStateIndices[sucSI][agI]); + if(p==0) + break; + } + } + return(p); +} + +double TransitionObservationIndependentMADPDiscrete:: +GetObservationProbability(Index jaI, + Index sucSI, + Index joI) const +{ + double p=1; + if(!_m_jointModelsGenerated) + { + vector sucSIs=JointToIndividualStateIndices(sucSI); + vector aIs=JointToIndividualActionIndices(jaI); + vector oIs=JointToIndividualObservationIndices(joI); + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + p*=GetIndividualMADPD(agI)->GetObservationProbability( + aIs[agI], + sucSIs[agI], + oIs[agI]); + if(p==0) + break; + } + } + else + { + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + p*=GetIndividualMADPD(agI)->GetObservationProbability( + _m_jointToIndActionCache[jaI][agI], + _m_indivStateIndices[sucSI][agI], + _m_jointToIndObsCache[joI][agI]); + if(p==0) + break; + } + } + return(p); +} + +StateDistributionVector* TransitionObservationIndependentMADPDiscrete::GetISD() const +{ + if(!_m_jointModelsGenerated) + { + throw(E("TransitionObservationIndependentMADPDiscrete::GetISD initial state distribution has not been generated as a double vector.")); + } + else + return(_m_initialStateDistribution); +} + +double TransitionObservationIndependentMADPDiscrete::GetInitialStateProbability(Index sI) const +{ + if(_m_initialStateDistribution) + return(_m_initialStateDistribution->at(sI)); + else + { + double p=1; + vector sIs=JointToIndividualStateIndices(sI); + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + p*=GetIndividualMADPD(agI)->GetInitialStateProbability( + sIs[agI]); + if(p==0) + break; + } + return(p); + } +} + +void TransitionObservationIndependentMADPDiscrete::SetSparse(bool sparse) +{ + _m_sparse=sparse; + + vector::iterator it = + _m_individualMADPDs.begin(); + vector::iterator last = + _m_individualMADPDs.end(); + while(it != last) + { + (*it)->SetSparse(sparse); + it++; + } +} + +///Get the number of joint actions the agents in agScope can form +size_t TransitionObservationIndependentMADPDiscrete::GetNrJointActions(const Scope& agScope) const +{ + if(agScope.size()>0) + { + const vector& nrActions = GetNrActions(); + vector restr_nrAs(agScope.size()); + IndexTools::RestrictIndividualIndicesToScope( + nrActions, agScope, restr_nrAs); + size_t restr_nrJA = VectorTools::VectorProduct(restr_nrAs); + return restr_nrJA; + } + else + return(0); +} + + +#if 0 + /// Kronecker tensor product of matrices - as per Matlab kron + template< class Matrix_T > + void + kron(const Matrix_T& x, const Matrix_T& y, Matrix_T& z) + { + const int rx = x.size1(); + const int cx = x.size2(); + const int ry = y.size1(); + const int cy = y.size2(); + z.resize (rx*ry, cx*cy); + z.clear (); + for ( typename Matrix_T::const_iterator1 i = x.begin1(); + i != x.end1(); ++i) + for ( typename Matrix_T::const_iterator2 j = i.begin(); + j != i.end(); ++j) + for ( typename Matrix_T::const_iterator1 k = y.begin1(); + k != y.end1(); ++k) + for (typename Matrix_T::const_iterator2 l = k.begin(); + l != k.end(); ++l) + z(j.index1()*ry + l.index1(), j.index2()*cy + l.index2()) = + (*j) * (*l); + } + +#endif + + +void TransitionObservationIndependentMADPDiscrete::CreateCentralizedSparseTransitionModel() +{ + if(!_m_initialized) + { + throw(E("TransitionObservationIndependentMADPDiscrete::CreateCentralizedSparseTransitionModel not initialized yet")); + return; + } + +#if 0 + boost::numeric::ublas::coordinate_matrix a; + boost::numeric::ublas::compressed_matrix b; + a=b; +#endif + + _m_p_tModel = new TransitionModelMappingSparse(_m_nrJointStates, + _m_nrJointActions); + + vector stateIndices(GetNrAgents(),0), + sucStateIndices(GetNrAgents(),0), + actionIndices(GetNrAgents(),0); + + double p=0; + switch(GetNrAgents()) + { + case 2: + case 3: + { + +#if DEBUG_CENTRALIZEDSPARSEMODELS + + for(Index a=0;a!=_m_nrJointActions;++a) + for(Index s=0;s!=_m_nrJointStates;++s) + for(Index s1=0;s1!=_m_nrJointStates;++s1) + { + p=GetTransitionProbability(s,a,s1); + if(p>0) + _m_p_tModel->Set(s,a,s1,p); + } + string okModel=_m_p_tModel->SoftPrint(); + +#endif + vector > Ts; + for(Index i=0;i!=GetNrAgents();++i) + { + Ts.push_back(vector()); + const TransitionModelMappingSparse *tms= + dynamic_cast(_m_individualMADPDs[i]-> + GetTransitionModelDiscretePtr()); + for(Index a=0;a!=GetNrActions(i);++a) + Ts.at(i).push_back(tms->GetMatrixPtr(a)); + } + +#if 0 + vector > Tjoint; + // this uses a lot of memory, don't know why... + for(Index ja=0;ja!=GetNrJointActions();++ja) + { + cout << _m_nrJointStates << ja << endl; + Tjoint.push_back(boost::numeric::ublas::coordinate_matrix(_m_nrJointStates, + _m_nrJointStates)); + } +#endif + + for(Index a0=0;a0!=_m_nrIndivActions[0];++a0) + { + actionIndices[0]=a0; + for(Index a1=0;a1!=_m_nrIndivActions[1];++a1) + { + actionIndices[1]=a1; + Index jaI=IndividualToJointActionIndices(actionIndices); + + cout << "trans ja " << jaI << endl; + + for(TransitionModelMappingSparse::SparseMatrix::const_iterator1 + ri0=Ts[0][a0]->begin1(); + ri0!=Ts[0][a0]->end1(); + ++ri0) + { + stateIndices[0]=ri0.index1(); + for (TransitionModelMappingSparse::SparseMatrix::const_iterator2 + ci0 = ri0.begin(); + ci0 != ri0.end(); + ++ci0) + { + sucStateIndices[0]=ci0.index2(); + + for(TransitionModelMappingSparse::SparseMatrix::const_iterator1 + ri1=Ts[1][a1]->begin1(); + ri1!=Ts[1][a1]->end1(); + ++ri1) + { + stateIndices[1]=ri1.index1(); + for (TransitionModelMappingSparse::SparseMatrix::const_iterator2 + ci1 = ri1.begin(); + ci1 != ri1.end(); + ++ci1) + { + sucStateIndices[1]=ci1.index2(); + + if(GetNrAgents()==3) + { + for(Index a2=0;a2!=_m_nrIndivActions[2];++a2) + { + actionIndices[2]=a2; + Index jaI=IndividualToJointActionIndices(actionIndices); + + for(TransitionModelMappingSparse::SparseMatrix::const_iterator1 + ri2=Ts[2][a2]->begin1(); + ri2!=Ts[2][a2]->end1(); + ++ri2) + { + stateIndices[2]=ri2.index1(); + for (TransitionModelMappingSparse::SparseMatrix::const_iterator2 + ci2 = ri2.begin(); + ci2 != ri2.end(); + ++ci2) + { + sucStateIndices[2]=ci2.index2(); + Index sI=IndividualToJointStateIndices(stateIndices); + Index sucSI=IndividualToJointStateIndices(sucStateIndices); + p=(*ci0)*(*ci1)*(*ci2); + _m_p_tModel->Set(sI, jaI, sucSI, p); + } + } + } + } + else + { + Index sI=IndividualToJointStateIndices(stateIndices); + Index sucSI=IndividualToJointStateIndices(sucStateIndices); + p=(*ci0)*(*ci1); + _m_p_tModel->Set(sI, jaI, sucSI, p); + } + } + } + } + } + } + } + + +#if 0 + vector > Tjoint1; + for(Index ja=0;ja!=GetNrJointActions();++ja) + Tjoint1.push_back(boost::numeric::ublas::compressed_matrix(Tjoint[ja])); +#endif + +#if DEBUG_CENTRALIZEDSPARSEMODELS + string newModel=_m_p_tModel->SoftPrint(); + + if(okModel!=newModel) + abort(); +#endif + + break; + } + default: + for(Index a=0;a!=_m_nrJointActions;++a) + for(Index s=0;s!=_m_nrJointStates;++s) + { + for(Index s1=0;s1!=_m_nrJointStates;++s1) + { + p=GetTransitionProbability(s,a,s1); + if(p>0) + _m_p_tModel->Set(s,a,s1,p); + } + } + } +} + +void TransitionObservationIndependentMADPDiscrete::CreateCentralizedObservationTransitionModel() +{ + if(!_m_initialized) + { + throw(E("TransitionObservationIndependentMADPDiscrete::CreateCentralizedSparseObservationModel not initialized yet")); + return; + } + + _m_p_oModel = new ObservationModelMappingSparse(_m_nrJointStates, + _m_nrJointActions, + _m_nrJointObservations); + + double p=0; + switch(GetNrAgents()) + { + case 2: + { + vector sucStateIndices(GetNrAgents(),0), + actionIndices(GetNrAgents(),0), + observationIndices(GetNrAgents(),0); + +#if DEBUG_CENTRALIZEDSPARSEMODELS + for(Index a=0;a!=_m_nrJointActions;++a) + for(Index s=0;s!=_m_nrJointStates;++s) + for(Index o=0;o!=_m_nrJointObservations;++o) + { + p=GetObservationProbability(a,s,o); + if(p>0) + _m_p_oModel->Set(a,s,o,p); + } + string okModelObs=_m_p_oModel->SoftPrint(); +#endif + + vector > Os; + for(Index i=0;i!=GetNrAgents();++i) + { + Os.push_back(vector()); + const ObservationModelMappingSparse *oms= + dynamic_cast(_m_individualMADPDs[i]-> + GetObservationModelDiscretePtr()); + for(Index a=0;a!=GetNrActions(i);++a) + Os.at(i).push_back(oms->GetMatrixPtr(a)); + } + + for(Index a0=0;a0!=_m_nrIndivActions[0];++a0) + { + actionIndices[0]=a0; + for(Index a1=0;a1!=_m_nrIndivActions[1];++a1) + { + actionIndices[1]=a1; + Index jaI=IndividualToJointActionIndices(actionIndices); + cout << "obs ja " << jaI << endl; + for(ObservationModelMappingSparse::SparseMatrix::const_iterator1 + ri0=Os[0][a0]->begin1(); + ri0!=Os[0][a0]->end1(); + ++ri0) + { + sucStateIndices[0]=ri0.index1(); + for(ObservationModelMappingSparse::SparseMatrix::const_iterator2 + ci0 = ri0.begin(); + ci0 != ri0.end(); + ++ci0) + { + observationIndices[0]=ci0.index2(); + + for(ObservationModelMappingSparse::SparseMatrix::const_iterator1 + ri1=Os[1][a1]->begin1(); + ri1!=Os[1][a1]->end1(); + ++ri1) + { + sucStateIndices[1]=ri1.index1(); + for (ObservationModelMappingSparse::SparseMatrix::const_iterator2 + ci1 = ri1.begin(); + ci1 != ri1.end(); + ++ci1) + { + observationIndices[1]=ci1.index2(); + + Index sucSI=IndividualToJointStateIndices(sucStateIndices); + Index oI=IndividualToJointObservationIndices(observationIndices); + p=(*ci0)*(*ci1); + _m_p_oModel->Set(jaI, sucSI, oI, p); + } + } + } + } + } + } + +#if DEBUG_CENTRALIZEDSPARSEMODELS + string newModelObs=_m_p_oModel->SoftPrint(); + + if(okModelObs!=newModelObs) + abort(); +#endif + break; + } + default: + for(Index a=0;a!=_m_nrJointActions;++a) + for(Index s=0;s!=_m_nrJointStates;++s) + { + for(Index s1=0;s1!=_m_nrJointStates;++s1) + { + p=GetTransitionProbability(s,a,s1); + if(p>0) + _m_p_tModel->Set(s,a,s1,p); + } + + for(Index o=0;o!=_m_nrJointObservations;++o) + { + p=GetObservationProbability(a,s,o); + if(p>0) + _m_p_oModel->Set(a,s,o,p); + } + } + } +} + +void +TransitionObservationIndependentMADPDiscrete::CreateCentralizedSparseModels() +{ + CreateCentralizedSparseTransitionModel(); + CreateCentralizedObservationTransitionModel(); +} + +void +TransitionObservationIndependentMADPDiscrete::CreateCentralizedFullModels() +{ + if(!_m_initialized) + { + throw(E("TransitionObservationIndependentMADPDiscrete::CreateCentralizedFullModels not initialized yet")); + return; + } + + _m_p_tModel = new TransitionModelMapping(_m_nrJointStates, + _m_nrJointActions); + _m_p_oModel = new ObservationModelMapping(_m_nrJointStates, + _m_nrJointActions, + _m_nrJointObservations); + double p=0; + for(Index a=0;a!=_m_nrJointActions;++a) + for(Index s=0;s!=_m_nrJointStates;++s) + { + for(Index s1=0;s1!=_m_nrJointStates;++s1) + { + p=GetTransitionProbability(s,a,s1); + if(p>0) + _m_p_tModel->Set(s,a,s1,p); + } + + for(Index o=0;o!=_m_nrJointObservations;++o) + { + p=GetObservationProbability(a,s,o); + if(p>0) + _m_p_oModel->Set(a,s,o,p); + } + } +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.h new file mode 100644 index 000000000..8d1d70fcf --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TransitionObservationIndependentMADPDiscrete.h @@ -0,0 +1,586 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TRANSITIONOBSERVATIONINDEPENDENTMADPDISCRETE_H_ +#define _TRANSITIONOBSERVATIONINDEPENDENTMADPDISCRETE_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "MultiAgentDecisionProcess.h" +#include "MultiAgentDecisionProcessDiscreteInterface.h" +#include "TransitionModelDiscrete.h" +#include "ObservationModelDiscrete.h" +#include "MultiAgentDecisionProcessDiscrete.h" +#include "TGet.h" +#include "OGet.h" +#include +#include "StateDistributionVector.h" + +/** + * \brief TransitionObservationIndependentMADPDiscrete is an base class that + * defines the primary properties of a Transition and Observation independent + * decision process. + * + * It implements the transition and observation model by assigning a regular + * (1-agent) MultiAgentDecisionProcessDiscrete for each agent: the agents + * local model. + * This class is responsible for constructing the joint actions and + * observations (upon which rewards will typically be based). + * + * + * */ +class TransitionObservationIndependentMADPDiscrete : + public MultiAgentDecisionProcess, + virtual public MultiAgentDecisionProcessDiscreteInterface +{ +private: + /// Boolean to indicate whether this MADPDiscrete has been initialized. + bool _m_initialized; + /// Boolean that indicates whether models should be stored sparsely. + bool _m_sparse; + + /// Boolean indicating whether joint indices have been cached. + bool _m_jointIndicesCached; + /// Boolean indicating whether joint models have been generated. + bool _m_jointModelsGenerated; + + + //cache this here (set by SetInitialized) + size_t _m_nr_agents; + + /**In a transition-observation independent MADP, each agent has a set + * of local states and observations together with an individual + * transition and observation model. We model this as each agent having + * its own MultiAgentDecisionProcessDiscrete. This is the vector that + * contains a pointer to each agent's MultiAgentDecisionProcessDiscrete. + */ + std::vector _m_individualMADPDs; + + /**The vector storing the joint actions */ + std::vector _m_jointActionVec; + std::map *_m_jointActionMap; + + std::vector > _m_jointToIndActionCache; + + size_t * _m_actionStepSizeArray; + std::vector _m_actionStepSize; + std::vector _m_observationStepSize; + std::vector _m_stateStepSize; + + /**Vector that stores the number of individual states. (Created by + * CreateJointStates() )*/ + std::vector _m_nrIndivStates; + + /**The vector storing the individual state indices for each joint + * index: _m_indivStateIndices[jointStateIndex] = vector */ + std::vector< std::vector > _m_indivStateIndices; + std::map< Index, std::vector > *_m_indivStateIndicesMap; + + std::vector _m_jointStates; + std::map, State*> *_m_jointStatesMap; + + size_t _m_nrJointStates; + + /**The vector containing the initial state distr over joint states.*/ + //std::vector _m_initialStateDistribution; + StateDistributionVector* _m_initialStateDistribution; + /**the number of joint actions.*/ + size_t _m_nrJointActions; + + std::vector _m_nrIndivActions; + + std::vector _m_nrIndivObs; + + std::vector > _m_indivObs; + std::vector _m_jointObs; + std::map *_m_jointObsMap; + + size_t _m_nrJointObservations; + + std::vector > _m_jointToIndObsCache; + + TransitionModelDiscrete* _m_p_tModel; + + ObservationModelDiscrete* _m_p_oModel; + +// initialization functions + + /**Recursively constructs all the joint actions. + * Works by calling + * CreateJointActionsRecursively on a new (empty) joint action.*/ + void CreateJointActions(); + /* Recursively creates the joint actions (_m_jointActionVec) + * using _m_actionVecs (which need to be initialized before calling + * this function...) */ + size_t CreateJointActionsRecursively( Index curAgentI, + JointActionDiscrete& ja, Index jaI); + + /**This function generates the joint -> individual state index cache. + * This function assumes that all the agents and their local states + * have been added.*/ + void CreateJointStates(); + + void CreateJointObservations(); + + void CreateISD(); + + size_t ConstructJointObservationsRecursively( + Index curAgentI, JointObservationDiscrete& jo, Index joI); + + std::vector JointToIndividualActionIndicesNoCache(Index jaI) const; + + std::vector JointToIndividualObservationIndicesNoCache(Index joI) + const; + +protected: + //data manipulation (set) functions: + /** Sets _m_initialized to b. When setting to true, a verification of + * member elements is performed. (i.e. a check whether all vectors + * have the correct size and non-zero entries) */ + virtual bool SetInitialized(bool b); + + bool GetSparse() const { return(_m_sparse); } + + void CreateCentralizedSparseModels(); + void CreateCentralizedObservationTransitionModel(); + void CreateCentralizedFullModels(); + +public: + bool Initialize() + {return(SetInitialized(true));} + + // Constructor, destructor and copy assignment. + // Default constructor. + TransitionObservationIndependentMADPDiscrete( + const std::string &name="received unspec. by TransitionObservationIndependentMADPDiscrete", + const std::string &descr="received unspec.by TransitionObservationIndependentMADPDiscrete", + const std::string &pf="received unspec. by TransitionObservationIndependentMADPDiscrete", + bool cacheFlatModels=false); + // Copy assignment constructor. + TransitionObservationIndependentMADPDiscrete( + const TransitionObservationIndependentMADPDiscrete&); + + ///Destructor. + virtual ~TransitionObservationIndependentMADPDiscrete(); + +// pre-initialization functions, functions that are used to construct the models +// etc. + /**\brief Sets the number of agents to n.*/ + void SetNrAgents(size_t n); + /**\brief Adds one agent with an optional name.*/ + void AddAgent(const std::string &name="unspec."); + /**\brief Sets the number of states for the specified agent.*/ + void SetNrStates(Index agentI, size_t nr); + /**\brief Adds a state with a particular name for the specified agent.*/ + void AddState(Index agentI, const std::string &name); + /**\brief Sets the number of actions for the specified agent.*/ + void SetNrActions(Index agentI, size_t nr); + /**\brief Adds an action with a particular name for the specified agent. + * */ + void AddAction(Index agentI, const std::string &name); + /**\brief Sets the number of Observations for the specified agent.*/ + void SetNrObservations(Index agentI, size_t nr); + /**\brief Adds an Observation with a particular name for the specified + * agent.*/ + void AddObservation(Index agentI, const std::string &name); +#if 0 // will be computed from individual ISDs + /**Sets the initial state distribution to a uniform one.*/ + void SetUniformISD(); +#endif + /**\brief Sets the initial state distribution to v.*/ + void SetISD(const std::vector &v); + + //get (data) functions: + /**Returns a pointer to agentsI's individual model. + */ + MultiAgentDecisionProcessDiscrete* GetIndividualMADPD(Index agentI) + const + {return _m_individualMADPDs[agentI];} + /**\brief return the number of joint actions.*/ + size_t GetNrJointActions() const + { + if(!_m_initialized) + throw(E("TransitionObservationIndependentMADPDiscrete::GetNrJointActions() not yet initialized")); + + return(_m_nrJointActions); + } + size_t GetNrJointActions(const Scope& agScope) const; + + /**\brief return a ref to the i-th joint action (a JointActionDiscrete).*/ + const JointActionDiscrete* GetJointActionDiscrete(Index i) const; + + size_t GetNrJointStates() const + { + if(!_m_initialized) + throw(E("TransitionObservationIndependentMADPDiscrete::GetNrJointStates() not yet initialized")); + + return(_m_nrJointStates); + } + +//some shortcut functions + + /**\brief return the number of actions of agent agentI*/ + size_t GetNrActions(Index agentI) const + {return GetIndividualMADPD(agentI)->GetNrActions(0);} + /**\brief Returns the number of local states for agent i.*/ + size_t GetNrStates(Index agI) const + {return _m_individualMADPDs[agI]->GetNrStates();} + + /**\brief return the number of observations of agent agentI*/ + size_t GetNrObservations(Index agentI) const + {return GetIndividualMADPD(agentI)->GetNrObservations(0);} + +//joint <-> individual action conversion + + /**\brief Returns the joint action index that corresponds to the vector + * of specified individual action indices.*/ + Index IndividualToJointActionIndices(const std::vector& + indivActionIndices) const + { + if(!_m_initialized) + throw(E("TransitionObservationIndependentMADPDiscrete::IndividualToJointActionIndices(vector&) - Error: not initialized.")); + return(IndexTools::IndividualToJointIndicesStepSize(indivActionIndices, + _m_actionStepSize)); + } + + /**\brief returns a vector of indices to indiv. action indicies corr. + * to joint action index jaI.*/ + const std::vector& JointToIndividualActionIndices(Index jaI) const { + if(_m_jointIndicesCached) + return(_m_jointToIndActionCache.at(jaI)); + else + { + throw(E("TransitionObservationIndependentMADPDiscrete::JointToIndividualActionIndices did not cache conversion")); +// return(JointToIndividualActionIndicesNoCache(jaI)); + } + } + Index IndividualToJointActionIndices(const Index* AI_ar) const + {return IndexTools::IndividualToJointIndicesArrayStepSize( + AI_ar, _m_actionStepSizeArray, _m_nr_agents);} + Index IndividualToJointActionIndices( + const std::vector& ja_e, const Scope& agSC) const + { + // identical to MADPComponentDiscreteActions + std::vector nr_A_e(agSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + GetNrActions(), agSC, nr_A_e); + Index jaI = IndexTools::IndividualToJointIndices( ja_e, nr_A_e); + return(jaI); + } + std::vector JointToIndividualActionIndices( + Index ja_e, const Scope& agSC) const + { + // identical to MADPComponentDiscreteActions + std::vector nr_A_e(agSC.size()); + IndexTools::RestrictIndividualIndicesToScope( + GetNrActions(), agSC, nr_A_e); + std::vector ja_e_vec = IndexTools::JointToIndividualIndices(ja_e, nr_A_e); + return(ja_e_vec); + } + + Index JointToRestrictedJointActionIndex(Index jaI, const Scope& agSc_e ) const + { + const std::vector& ja_vec = JointToIndividualActionIndices(jaI); + std::vector ja_vec_e(agSc_e.size()); + IndexTools::RestrictIndividualIndicesToScope(ja_vec, agSc_e, ja_vec_e); + Index ja_e = IndividualToJointActionIndices(ja_vec_e, agSc_e); + return(ja_e); + } + + /**\brief returns a vector of individual (local) state indices + * corresponding to joint state index jointSI.*/ + const std::vector& JointToIndividualStateIndices(Index jointSI) const + { + if(_m_jointIndicesCached) + return _m_indivStateIndices[jointSI]; + else if (_m_indivStateIndicesMap->find(jointSI)!= + _m_indivStateIndicesMap->end()) + return(_m_indivStateIndicesMap->find(jointSI)->second); + else + { + std::vector ind_sI= + IndexTools::JointToIndividualIndicesStepSize(jointSI,_m_stateStepSize); + _m_indivStateIndicesMap->insert(make_pair(jointSI,ind_sI)); + return(_m_indivStateIndicesMap->find(jointSI)->second); + } + } + + /**\brief returns the joint index for indivStateIndices*/ + Index IndividualToJointStateIndices(const std::vector& + indivStateIndices) const + { + return(IndexTools::IndividualToJointIndicesStepSize(indivStateIndices, + _m_stateStepSize)); + } + + /**\brief returns the joint index for indivObsIndices*/ + Index IndividualToJointObservationIndices(const std::vector& + indivObsIndices) const + { + if(!_m_initialized) + throw(E("TransitionObservationIndependentMADPDiscrete::IndividualToJointObservationIndices(const vector) - Error: not initialized. ")); + return(IndexTools::IndividualToJointIndicesStepSize(indivObsIndices,_m_observationStepSize)); + } + + /**\brief returns the individual indices for joint observation joI.*/ + const std::vector& JointToIndividualObservationIndices(Index joI) + const + { + if(_m_jointIndicesCached) + return(_m_jointToIndObsCache.at(joI)); + else + { + throw(E("TransitionObservationIndependentMADPDiscrete::JointToIndividualActionIndices did not cache conversion")); +// return(JointToIndividualObservationIndicesNoCache(joI)); + } + } + + /**\brief SoftPrints the action set for each agent.*/ + std::string SoftPrintActionSets() const; + /**\brief Prints the action set for each agent.*/ + void PrintActionSets() const + {std::cout << SoftPrintActionSets();} + /**\brief SoftPrints the set of joint actions.*/ + std::string SoftPrintJointActionSet() const; + /**\brief Prints the set of joint actions.*/ + void PrintJointActionSet() const + {std::cout << SoftPrintJointActionSet();} + /**\brief SoftPrints information regarding this + * TransitionObservationIndependentMADPDiscrete.*/ + std::string SoftPrint() const; + /**\brief Prints information regarding this + * TransitionObservationIndependentMADPDiscrete.*/ + void Print() const + {std::cout << SoftPrint();} + std::string SoftPrintState(Index sI) const; + + void SetSparse(bool sparse); + + // stuff to implement MultiAgentDecisionProcessDiscreteInterface + + /**\brief returns probability of joint transition (the product of + * the probabilities of the individual transitions) + */ + double GetTransitionProbability(Index sI, Index jaI, Index + sucSI) const; + double GetTransitionProbability(const std::vector &sIs, + const std::vector &aIs, + const std::vector &sucSIs) const + { + double p=1; + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + p*=GetIndividualMADPD(agI)->GetTransitionProbability( + sIs[agI], + aIs[agI], + sucSIs[agI]); + if(p==0) + break; + } + return(p); + } + + /**\brief Returns the probability of the joint observation joI (the + * product of the individual observation probabilities, which depend + * only on local states). + */ + double GetObservationProbability(Index jaI, Index sucSI, + Index joI) const; + double GetObservationProbability(const std::vector &aIs, + const std::vector &sucSIs, + const std::vector &oIs) const + { + double p=1; + for(Index agI = 0; agI < GetNrAgents(); agI++) + { + p*=GetIndividualMADPD(agI)->GetObservationProbability( + aIs[agI], + sucSIs[agI], + oIs[agI]); + if(p==0) + break; + } + return(p); + } + + /**\brief returns a successor state index sampled according to the + * transition probabilities. + */ + Index SampleSuccessorState(Index sI, Index jaI) const + { + std::vector sIs=JointToIndividualStateIndices(sI); + std::vector aIs=JointToIndividualActionIndices(jaI); + return(IndividualToJointStateIndices(SampleSuccessorState(sIs,aIs))); + } + + std::vector SampleSuccessorState(const std::vector &sIs, + const std::vector &aIs) + const + { + std::vector sucSIs(GetNrAgents()); + for(Index agI = 0; agI < GetNrAgents(); agI++) + sucSIs[agI]=GetIndividualMADPD(agI)->SampleSuccessorState(sIs[agI], + aIs[agI]); + return(sucSIs); + } + + /**\brief Returns a joint observation, sampled according to the + * observation probabilities.*/ + Index SampleJointObservation(Index jaI, Index sucI) const + { + std::vector sucIs=JointToIndividualStateIndices(sucI); + std::vector aIs=JointToIndividualActionIndices(jaI); + return(IndividualToJointObservationIndices(SampleJointObservation(aIs, + sucIs))); + } + + std::vector SampleJointObservation(const std::vector &aIs, + const std::vector &sucIs) + const + { + std::vector oIs(GetNrAgents()); + + for(Index agI = 0; agI < GetNrAgents(); agI++) + oIs[agI]=GetIndividualMADPD(agI)->SampleJointObservation(aIs[agI], + sucIs[agI]); + return(oIs); + } + + + /**\brief Samples an initial state. + */ + Index SampleInitialState() const + { + return(IndividualToJointStateIndices(SampleInitialStates())); + } + + std::vector SampleInitialStates() const + { + std::vector sIs(GetNrAgents()); + + for(Index agI = 0; agI < GetNrAgents(); agI++) + sIs[agI]=GetIndividualMADPD(agI)->SampleInitialState(); + + return(sIs); + } + + + /**\brief returns the number of (joint) states.*/ + size_t GetNrStates() const + { return(GetNrJointStates()); } + + /**\brief Returns a pointer to state i.*/ + const State* GetState(Index i) const + { + if(_m_jointIndicesCached) // we cached all joint states + return(_m_jointStates.at(i)); + else + return(GetState(JointToIndividualStateIndices(i))); + } + + const State* GetState(const std::vector &sIs) const; + + /**\brief returns the prob. of state sI according to the initial state + * distribution. */ + double GetInitialStateProbability(Index sI) const; + + /**\brief returns the initial state distribution.*/ + //std::vector GetISD() const; + virtual StateDistributionVector* GetISD() const; + + /**\brief returns a vector with the number of actions for each agent.*/ + const std::vector& GetNrActions() const + { return(_m_nrIndivActions); } + + /**\brief Get a pointer to action a of agentI.*/ + const Action* GetAction(Index agentI, Index a) const + { return(GetIndividualMADPD(agentI)->GetAction(0,a)); } + + /**\brief Returns a pointer to joint action i.*/ + const JointAction* GetJointAction(Index i) const + { return(GetJointActionDiscrete(i)); } + + /**\brief converts individual to joint actions.*/ + Index IndividualToJointActionIndices(Index* IndexArray) const + { + return(IndexTools::IndividualToJointIndicesArray(IndexArray, + _m_nrIndivActions)); + } + + /**\brief Returns a vector with the number of observations for each + * agent.*/ + const std::vector& GetNrObservations() const + { return(_m_nrIndivObs); } + + /**\brief Get the number of joint observations*/ + size_t GetNrJointObservations() const + { + if(!_m_initialized) + throw(E("TransitionObservationIndependentMADPDiscrete::GetNrJointObservations() not yet initialized")); + + return(_m_nrJointObservations); + } + + /**\brief Get a pointer to observation o of agentI.*/ + const Observation* GetObservation(Index agentI, Index o) const + { return(GetIndividualMADPD(agentI)->GetObservation(0,o)); } + + /**\brief Get a pointer to the i-th joint observation.*/ + const JointObservation* GetJointObservation(Index i) const; + + TGet* GetTGet() const + { + if(!_m_jointModelsGenerated) + return 0; + + if(_m_sparse) + return new TGet_TransitionModelMappingSparse( + ((TransitionModelMappingSparse*)_m_p_tModel) ); + else + return new TGet_TransitionModelMapping( + ((TransitionModelMapping*)_m_p_tModel) ); + } + + OGet* GetOGet() const + { + if(!_m_jointModelsGenerated) + return 0; + + if(_m_sparse) + return new OGet_ObservationModelMappingSparse( + ((ObservationModelMappingSparse*)_m_p_oModel) ); + else + return new OGet_ObservationModelMapping( + ((ObservationModelMapping*)_m_p_oModel) ); + } + + TransitionModelDiscrete* GetTransitionModelDiscretePtr() const + { return(_m_p_tModel); } + + ObservationModelDiscrete* GetObservationModelDiscretePtr() const + { return(_m_p_oModel); } + + // this one is called externally sometimes + void CreateCentralizedSparseTransitionModel(); + +}; + +#endif /* !_TRANSITIONOBSERVATIONINDEPENDENTMADPDISCRETE_H_ */ + + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.cpp new file mode 100644 index 000000000..783260b84 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.cpp @@ -0,0 +1,1236 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "TwoStageDynamicBayesianNetwork.h" +#include "IndexTools.h" +#include + +using namespace std; + +//Default constructor +TwoStageDynamicBayesianNetwork::TwoStageDynamicBayesianNetwork( + MultiAgentDecisionProcessDiscreteFactoredStatesInterface& madp) + : + _m_madp(&madp) + ,_m_nrY(0) + ,_m_nrO(0) +{ + _m_SoIStorageInitialized = false; + _m_IndividualToJointYiiIndices_catVector = 0; + _m_IndividualToJointOiiIndices_catVector = 0; + _m_SampleY = 0; + _m_SampleO = 0; + _m_SampleNrO = 0; + +} +/* +//Copy constructor. +TwoStageDynamicBayesianNetwork::TwoStageDynamicBayesianNetwork(const TwoStageDynamicBayesianNetwork& o) +{ +} +*/ +//Destructor +TwoStageDynamicBayesianNetwork::~TwoStageDynamicBayesianNetwork() +{ + for(Index i=0;i!=_m_Y_CPDs.size();++i) + delete _m_Y_CPDs.at(i); + for(Index i=0;i!=_m_O_CPDs.size();++i) + delete _m_O_CPDs.at(i); + + // delete by iterator, as temporary data may not be allocated when + // reading from disk + { std::vector* >::iterator it; + for (it=_m_X_restr_perY.begin();it!=_m_X_restr_perY.end();it++) + delete *it; + for (it=_m_A_restr_perY.begin();it!=_m_A_restr_perY.end();it++) + delete *it; + for (it=_m_Y_restr_perY.begin();it!=_m_Y_restr_perY.end();it++) + delete *it; + + for (it=_m_X_restr_perO.begin();it!=_m_X_restr_perO.end();it++) + delete *it; + for (it=_m_A_restr_perO.begin();it!=_m_A_restr_perO.end();it++) + delete *it; + for (it=_m_Y_restr_perO.begin();it!=_m_Y_restr_perO.end();it++) + delete *it; + for (it=_m_O_restr_perO.begin();it!=_m_O_restr_perO.end();it++) + delete *it; + } + { std::vector::iterator it; + for (it=_m_nrVals_SoI_Y_stepsize.begin();it!=_m_nrVals_SoI_Y_stepsize.end();it++) + delete [] *it; + for (it=_m_nrVals_SoI_O_stepsize.begin();it!=_m_nrVals_SoI_O_stepsize.end();it++) + delete [] *it; + } + + delete _m_IndividualToJointYiiIndices_catVector; + delete _m_IndividualToJointOiiIndices_catVector; + delete _m_SampleY; + delete _m_SampleO; + delete _m_SampleNrO; +} +/* +//Copy assignment operator +TwoStageDynamicBayesianNetwork& TwoStageDynamicBayesianNetwork::operator= (const TwoStageDynamicBayesianNetwork& o) +{ + if (this == &o) return *this; // Gracefully handle self assignment + // Put the normal assignment duties here... + + return *this; +} +*/ + + +void TwoStageDynamicBayesianNetwork:: +ScopeBackup( const Scope & stateScope, + const Scope & agentScope, + Scope & X, + Scope & A) const +{ + //first we compute the 'closure' of the NS state factors Y and + //observations O. I.e., within stage connections can grow the set + //of Y and O that need to be considered: + Scope Y = stateScope; + Scope O = agentScope; + ComputeWithinNextStageClosure(Y,O); + + X.clear(); + A.clear(); + //Next we do the backup of the Ys and Os + for( Scope::iterator y_it = Y.begin(); y_it != Y.end(); y_it++) + { + Index yI = *y_it; + X.Insert( _m_XSoI_Y.at(yI) ); + A.Insert( _m_ASoI_Y.at(yI) ); + } + for( Scope::iterator o_it = O.begin(); o_it != O.end(); o_it++) + { + X.Insert( _m_XSoI_O.at(*o_it) ); + A.Insert( _m_ASoI_O.at(*o_it) ); + } + X.Sort(); + A.Sort(); + return; +} + + +void TwoStageDynamicBayesianNetwork:: +ComputeWithinNextStageClosure(Scope& Y, Scope& O) const +{ + bool converged = true; + do{ + converged = true; + //check all Y for non-included Y dependencies + Scope::const_iterator s_it = Y.begin(); + Scope::const_iterator s_last = Y.end(); + while(s_it != s_last) + { + Index yI = *s_it; + const Scope& y_YSoI = _m_YSoI_Y.at(yI); + for(Scope::const_iterator oy_it = y_YSoI.begin(); + oy_it != y_YSoI.end(); oy_it++) + //yI has other Y (oyI = *oy_it) that point to it... + //let's see if they are in Y already. + if(! Y.Contains( *oy_it ) ) + { + converged = false; + Y.Insert(*oy_it); + } + s_it++; + } + + //check all O for non-included O and Y dependencies + Scope::const_iterator o_it = O.begin(); + Scope::const_iterator o_last = O.end(); + while(o_it != o_last) + { + Index oI = *o_it; + const Scope& o_YSoI = _m_YSoI_O.at(oI); + for(Scope::const_iterator oy_it = o_YSoI.begin(); + oy_it != o_YSoI.end(); oy_it++) + //oI has other Y (oyI = *oy_it) that point to it... + //let's see if they are in Y already. + if(! Y.Contains( *oy_it ) ) + { + converged = false; + Y.Insert(*oy_it); + } + + const Scope& o_OSoI = _m_OSoI_O.at(oI); + for(Scope::const_iterator oo_it = o_OSoI.begin(); + oo_it != o_OSoI.end(); oo_it++) + //oI has other O (ooI = *oo_it) that point to it... + //let's see if they are in O already. + if(! O.Contains( *oo_it ) ) + { + converged = false; + O.Insert(*oo_it); + } + + + o_it++; + } + + }while (! converged ); +} + +Scope TwoStageDynamicBayesianNetwork:: +StateScopeBackup( const Scope & stateScope, + const Scope & agentScope ) const +{ + Scope X, A; + ScopeBackup(stateScope, agentScope, X,A); + return(X); +} +Scope TwoStageDynamicBayesianNetwork:: +AgentScopeBackup( const Scope & stateScope, + const Scope & agentScope) const +{ + Scope X, A; + ScopeBackup(stateScope, agentScope, X,A); + return(A); +} + +double TwoStageDynamicBayesianNetwork:: +GetYProbability( const vector& X, + const vector& A, + const vector& Y) const +{ + size_t nrSF = _m_madp->GetNrStateFactors(); + if(Y.size() != nrSF || X.size() != nrSF) + throw E("TwoStageDynamicBayesianNetwork::GetYProbability only implemented for full state vectors"); + if(A.size() != _m_madp->GetNrAgents()) + throw E("TwoStageDynamicBayesianNetwork::GetYProbability only implemented for full joint actions"); + + double p = 1.0; + for(Index y=0; y < Y.size(); y++) + { + vector X_restr(GetXSoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToScope(X, GetXSoI_Y(y), X_restr ); + vector A_restr(GetASoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToScope(A, GetASoI_Y(y), A_restr ); + vector Y_restr(GetYSoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToScope(Y, GetYSoI_Y(y), Y_restr ); + Index iiI = IndividualToJointYiiIndices(y, X_restr, A_restr, Y_restr); + Index yVal = Y[y]; + double p_y = _m_Y_CPDs[y]->Get(yVal, iiI); + p *= p_y; + } + return(p); +} + +double TwoStageDynamicBayesianNetwork:: +GetYProbabilityGeneral( + const Scope& Xscope, + const vector& X, + const Scope& Ascope, + const vector& A, + const Scope& YIIscope, + const vector& YII, + const Scope& Yscope, + const vector& Y + ) const +{ + + double p = 1.0; + for(Index Y_index=0; Y_index < Y.size(); Y_index++) + { + //Y_index is index in vector Y + Index y = Yscope.at(Y_index); // the index to the variable we look at + Index yVal = Y.at(Y_index); // the value of that variable + + vector X_restr(GetXSoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(X, Xscope, GetXSoI_Y(y), X_restr ); + vector A_restr(GetASoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(A, Ascope, GetASoI_Y(y), A_restr ); + vector YII_restr(GetYSoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(YII, YIIscope, GetYSoI_Y(y), YII_restr ); + Index iiI = IndividualToJointYiiIndices(y, X_restr, A_restr, YII_restr); + double p_y = _m_Y_CPDs[y]->Get(yVal, iiI); + p *= p_y; + } + return(p); +} + + +vector TwoStageDynamicBayesianNetwork:: +GetYProbabilitiesExactScopes( + const vector& Xii, + const vector& Aii, + const vector& Yii, + const Index& yIndex + ) const +{ + size_t nrVals = _m_madp->GetNrValuesForFactor(yIndex); + vector probs(nrVals, 0.0); + Index iiI = IndividualToJointYiiIndices(yIndex, Xii, Aii, Yii); + for(Index valI=0; valI < nrVals; valI++) + { + probs[valI] = _m_Y_CPDs[yIndex]->Get(valI, iiI); + } + return(probs); +} +vector TwoStageDynamicBayesianNetwork:: +GetOProbabilitiesExactScopes( + const vector& Aii, + const vector& Yii, + const vector& Oii, + const Index& oIndex //agentI + ) const +{ + if(GetXSoI_O(oIndex).size() > 0) + { + stringstream errormsg; + errormsg << "Observation Factor " << oIndex << " has a non-null PS SF scope, which was ignored."; + throw E(errormsg.str()); + } + size_t nrVals = _m_madp->GetNrObservations(oIndex); + vector probs(nrVals, 0.0); + Index iiI = IndividualToJointOiiIndices(oIndex, Aii, Yii, Oii); + for(Index valI=0; valI < nrVals; valI++) + { + probs[valI] = _m_O_CPDs[oIndex]->Get(valI, iiI); + } + return(probs); +} +vector TwoStageDynamicBayesianNetwork:: +GetOProbabilitiesExactScopes( + const vector& Xii, + const vector& Aii, + const vector& Yii, + const vector& Oii, + const Index& oIndex //agentI + ) const +{ + size_t nrVals = _m_madp->GetNrObservations(oIndex); + vector probs(nrVals, 0.0); + Index iiI = IndividualToJointOiiIndices(oIndex, Xii ,Aii, Yii, Oii); + for(Index valI=0; valI < nrVals; valI++) + { + probs[valI] = _m_O_CPDs[oIndex]->Get(valI, iiI); + } + return(probs); +} +double TwoStageDynamicBayesianNetwork:: +GetOProbability( const vector& A, + const vector& Y, + const vector& O) const +{ + double p = 1.0; + for(Index o=0; o < O.size(); o++) + { + if(GetXSoI_O(o).size() > 0) + { + stringstream errormsg; + errormsg << "Observation Factor " << o << " has a non-null PS SF scope, which was ignored."; + throw E(errormsg.str()); + } + vector A_restr(GetASoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToScope(A, GetASoI_O(o), A_restr ); + vector Y_restr(GetYSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToScope(Y, GetYSoI_O(o), Y_restr ); + vector O_restr(GetOSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToScope(O, GetOSoI_O(o), O_restr ); + Index iiI = IndividualToJointOiiIndices(o, A_restr, Y_restr, O_restr); + Index oVal = O[o]; + double p_o = _m_O_CPDs[o]->Get(oVal, iiI); + p *= p_o; + } + return(p); +} +double TwoStageDynamicBayesianNetwork:: +GetOProbability( const vector& X, + const vector& A, + const vector& Y, + const vector& O) const +{ + double p = 1.0; + for(Index o=0; o < O.size(); o++) + { + vector X_restr(GetXSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToScope(X, GetXSoI_O(o), X_restr ); + vector A_restr(GetASoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToScope(A, GetASoI_O(o), A_restr ); + vector Y_restr(GetYSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToScope(Y, GetYSoI_O(o), Y_restr ); + vector O_restr(GetOSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToScope(O, GetOSoI_O(o), O_restr ); + Index iiI = IndividualToJointOiiIndices(o, X_restr, A_restr, Y_restr, O_restr); + Index oVal = O[o]; + double p_o = _m_O_CPDs[o]->Get(oVal, iiI); + p *= p_o; + } + return(p); +} +double TwoStageDynamicBayesianNetwork:: +GetYOProbability( const Scope& X, const vector& Xs, + const Scope& A, const vector& As, + const Scope& Y, const vector& Ys, + const Scope& O, const vector& Os) const +{ + double p = 1.0; + for(Index yI=0; yI < Y.size(); yI++) + { + //get the index of the variable Y we are looking at... + Index y = Y[yI]; + Index yVal = Ys[yI]; //and its value acc. to Ys + vector Xs_restr(GetXSoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(Xs, X, GetXSoI_Y(y), Xs_restr ); + vector As_restr(GetASoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(As, A, GetASoI_Y(y), As_restr ); + vector Ys_restr(GetYSoI_Y(y).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(Ys, Y, GetYSoI_Y(y), Ys_restr ); + Index iiI = IndividualToJointYiiIndices(y,Xs_restr, As_restr, Ys_restr); + + double p_y = _m_Y_CPDs.at(y)->Get(yVal, iiI); + p *= p_y; + } + for(Index oI=0; oI < O.size(); oI++) + { + //get the index of the variable Y we are looking at... + Index o = O[oI]; + Index oVal = Os[oI]; // and its value according to Os + vector Xs_restr(GetXSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(Xs, X, GetXSoI_O(o), Xs_restr ); + vector As_restr(GetASoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(As, A, GetASoI_O(o), As_restr ); + vector Ys_restr(GetYSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(Ys, Y, GetYSoI_O(o), Ys_restr ); + vector Os_restr(GetOSoI_O(o).size()); + IndexTools::RestrictIndividualIndicesToNarrowerScope(Os, O, GetOSoI_O(o), Os_restr ); + Index iiI = IndividualToJointOiiIndices(o,As_restr, Ys_restr, Os_restr); + + double p_o = _m_O_CPDs.at(o)->Get(oVal, iiI); + p *= p_o; + } + return(p); +} +///Sample a NS state +vector TwoStageDynamicBayesianNetwork:: +SampleY( const vector& X, + const vector& A) const +{ + for(Index y=0; y < _m_nrY; y++) + { + IndexTools::RestrictIndividualIndicesToScope(X, GetXSoI_Y(y), + *_m_X_restr_perY[y]); + IndexTools::RestrictIndividualIndicesToScope(A, GetASoI_Y(y), + *_m_A_restr_perY[y]); + //because Y->Y dependencies can only depend on lower index, we have already + //sampled the relevant Y's in _m_SampleY: + IndexTools::RestrictIndividualIndicesToScope(*_m_SampleY, GetYSoI_Y(y), + *_m_Y_restr_perY[y]); + Index iiI = IndividualToJointYiiIndices(y, + *_m_X_restr_perY[y], + *_m_A_restr_perY[y], + *_m_Y_restr_perY[y]); + Index sampledYVal = _m_Y_CPDs[y]->Sample(iiI); + (*_m_SampleY)[y] = sampledYVal; + } + return(*_m_SampleY); + /* + double randNr=rand() / (RAND_MAX + 1.0); + + double sum=0; + + + for(Index y=0; y < _m_nrY; y++) + { + IndexTools::RestrictIndividualIndicesToScope(X, GetXSoI_Y(y), + *_m_X_restr_perY[y]); + IndexTools::RestrictIndividualIndicesToScope(A, GetASoI_Y(y), + *_m_A_restr_perY[y]); + (*_m_SampleY)[y]=0; + } + + do { + + double p = 1.0; + for(Index y=0; y < _m_SampleY->size(); y++) + { + IndexTools::RestrictIndividualIndicesToScope(*_m_SampleY, GetYSoI_Y(y), + *_m_Y_restr_perY[y] ); + Index iiI = IndividualToJointYiiIndices(y, + *_m_X_restr_perY[y], + *_m_A_restr_perY[y], + *_m_Y_restr_perY[y]); + Index yVal = (*_m_SampleY)[y]; + double p_y = _m_Y_CPDs[y]->Get(yVal, iiI); + p *= p_y; + // if the probability is 0, there is no need to check the + // rest of the Ys + //if(EqualProbability(p,0.0)) + // break; + } + sum+=p; +#if 0 + cout << "Y " << SoftPrintVector(Y) << " : " << p << " sum " + << sum << " rand " << randNr << endl; +#endif + if(randNr<=sum) + break; + + } while(! IndexTools::Increment( *_m_SampleY,_m_madp->GetNrValuesPerFactor()));// _m_Yii_size_Y) ); + + return(*_m_SampleY); + */ +} + +///Sample an observation. +vector TwoStageDynamicBayesianNetwork:: +SampleO( const vector& X, + const vector& A, + const vector& Y) const +{ + for(Index o=0; o < _m_nrO; o++) + { + IndexTools::RestrictIndividualIndicesToScope(X, GetXSoI_O(o), + *_m_X_restr_perO[o]); + IndexTools::RestrictIndividualIndicesToScope(A, GetASoI_O(o), + *_m_A_restr_perO[o]); + IndexTools::RestrictIndividualIndicesToScope(Y, GetYSoI_O(o), + *_m_Y_restr_perO[o] ); + //because O->O dependencies can only depend on lower index, we have already + //sampled the relevant O's in _m_SampleO: + IndexTools::RestrictIndividualIndicesToScope( + *_m_SampleO, GetOSoI_O(o), *_m_O_restr_perO[o] ); + Index iiI = IndividualToJointOiiIndices(o, + *_m_X_restr_perO[o], + *_m_A_restr_perO[o], + *_m_Y_restr_perO[o], + *_m_O_restr_perO[o]); + Index sampledOVal = _m_O_CPDs[o]->Sample(iiI); + (*_m_SampleO)[o] = sampledOVal; + } + return(*_m_SampleO); +/* + double randNr=rand() / (RAND_MAX + 1.0); + + double sum=0; + + for(Index o=0; o < _m_nrO; o++) + { + IndexTools::RestrictIndividualIndicesToScope(A, GetASoI_O(o), + *_m_A_restr_perO[o] ); + IndexTools::RestrictIndividualIndicesToScope(Y, GetYSoI_O(o), + *_m_Y_restr_perO[o] ); + (*_m_SampleO)[o]=0; + } + + do { + + double p = 1.0; + for(Index o=0; o < _m_SampleO->size(); o++) + { + IndexTools::RestrictIndividualIndicesToScope( + *_m_SampleO, GetOSoI_O(o), *_m_O_restr_perO[o] ); + Index iiI = IndividualToJointOiiIndices(o, + *_m_A_restr_perO[o], + *_m_Y_restr_perO[o], + *_m_O_restr_perO[o]); + Index oVal = (*_m_SampleO)[o]; + double p_o = _m_O_CPDs[o]->Get(oVal, iiI); + p *= p_o; + // if the probability is 0, there is no need to check the + // rest of the Os + if(EqualProbability(p,0.0)) + break; + } + + sum+=p; +#if 0 + cout << "O " << SoftPrintVector(O) << " : " << p << " sum " + << sum << " rand " << randNr << endl; +#endif + if(randNr<=sum) + break; + + } while(! IndexTools::Increment( *_m_SampleO, *_m_SampleNrO) ); + + return(*_m_SampleO); + */ +} + +#define DEBUG_INIT_SOIs 0 +void TwoStageDynamicBayesianNetwork::InitializeStorage() +{ + _m_nrY = _m_madp->GetNrStateFactors(); + _m_nrO = _m_madp->GetNrAgents(); + +#if DEBUG_INIT_SOIs + cout << ">>>>>>\nInitializeStorage called, checking SoftPrint..." << endl; + cout << this->SoftPrint() << endl; +#endif + _m_XSoI_Y.clear(); + _m_ASoI_Y.clear(); + _m_YSoI_Y.clear(); + _m_XSoI_O.clear(); + _m_ASoI_O.clear(); + _m_YSoI_O.clear(); + _m_OSoI_O.clear(); + + _m_XSoI_Y.resize(_m_nrY); + _m_ASoI_Y.resize(_m_nrY); + _m_YSoI_Y.resize(_m_nrY); + _m_XSoI_O.resize(_m_nrO); + _m_ASoI_O.resize(_m_nrO); + _m_YSoI_O.resize(_m_nrO); + _m_OSoI_O.resize(_m_nrO); + + for(Index i=0;i!=_m_Y_CPDs.size();++i) + delete _m_Y_CPDs.at(i); + for(Index i=0;i!=_m_O_CPDs.size();++i) + delete _m_O_CPDs.at(i); + _m_Y_CPDs.resize(_m_nrY); + _m_O_CPDs.resize(_m_nrO); + +#if DEBUG_INIT_SOIs + cout << "InitializeStorage finished, checking SoftPrint..." << endl; + cout << this->SoftPrint() << "<<<<<<<<"<(_m_nrY); + _m_SampleO=new vector(_m_nrO); + _m_SampleNrO=new vector(_m_nrO); + for(Index o=0; o < _m_SampleNrO->size(); o++) + (*_m_SampleNrO)[o]=_m_madp->GetNrObservations(o); + + _m_SoIStorageInitialized = true; +} + +void TwoStageDynamicBayesianNetwork::SetSoI_Y( + Index y, + const Scope& XSoI, + const Scope& ASoI, + const Scope& YSoI) +{ + if(!_m_SoIStorageInitialized) + throw E("Scopes of influence not yet initialized"); + + _m_XSoI_Y.at(y) = XSoI; + _m_ASoI_Y.at(y) = ASoI; + _m_YSoI_Y.at(y) = YSoI; + + _m_XSoI_Y.at(y).Sort(); + _m_ASoI_Y.at(y).Sort(); + _m_YSoI_Y.at(y).Sort(); +} + +void TwoStageDynamicBayesianNetwork::SetSoI_O( + Index o, + const Scope& ASoI, + const Scope& YSoI, + const Scope& OSoI) +{ + if(!_m_SoIStorageInitialized) + throw E("Scopes of influence not yet initialized"); + + _m_ASoI_O.at(o) = ASoI; + _m_YSoI_O.at(o) = YSoI; + _m_OSoI_O.at(o) = OSoI; + + _m_ASoI_O.at(o).Sort(); + _m_YSoI_O.at(o).Sort(); + _m_OSoI_O.at(o).Sort(); +} + +void TwoStageDynamicBayesianNetwork::SetSoI_O( + Index o, + const Scope& XSoI, + const Scope& ASoI, + const Scope& YSoI, + const Scope& OSoI) +{ + if(!_m_SoIStorageInitialized) + throw E("Scopes of influence not yet initialized"); + + _m_XSoI_O.at(o) = XSoI; + _m_ASoI_O.at(o) = ASoI; + _m_YSoI_O.at(o) = YSoI; + _m_OSoI_O.at(o) = OSoI; + + _m_XSoI_O.at(o).Sort(); + _m_ASoI_O.at(o).Sort(); + _m_YSoI_O.at(o).Sort(); + _m_OSoI_O.at(o).Sort(); +} + + +void TwoStageDynamicBayesianNetwork::InitializeIIs() +{ + vector nrValsPerSF = _m_madp->GetNrValuesPerFactor(); + + //initialize meta data for Ys + _m_nrVals_XSoI_Y.clear(); //for possible re-initialization (e.g. in marginalization) + _m_nrVals_ASoI_Y.clear(); + _m_nrVals_YSoI_Y.clear(); + _m_nrVals_SoI_Y.clear(); + _m_nrVals_XSoI_Y.resize(_m_nrY); + _m_nrVals_ASoI_Y.resize(_m_nrY); + _m_nrVals_YSoI_Y.resize(_m_nrY); + _m_nrVals_SoI_Y.resize(_m_nrY); + + _m_Xii_size_Y.clear(); + _m_Aii_size_Y.clear(); + _m_Yii_size_Y.clear(); + _m_ii_size_Y.clear(); + _m_Xii_size_Y.resize(_m_nrY); + _m_Aii_size_Y.resize(_m_nrY); + _m_Yii_size_Y.resize(_m_nrY); + _m_ii_size_Y.resize(_m_nrY); + + for(Index yI=0; yI < _m_nrY; yI++) + { + _m_nrVals_XSoI_Y.at(yI).resize(GetXSoI_Y(yI).size()); + IndexTools::RestrictIndividualIndicesToScope( + nrValsPerSF, GetXSoI_Y(yI), _m_nrVals_XSoI_Y.at(yI)); + + size_t Xii_size = 1; + for(Index i=0; i < _m_nrVals_XSoI_Y.at(yI).size(); i++) + Xii_size *= _m_nrVals_XSoI_Y.at(yI).at(i); + _m_Xii_size_Y.at(yI) = Xii_size; + + _m_nrVals_ASoI_Y.at(yI).resize(GetASoI_Y(yI).size()); + IndexTools::RestrictIndividualIndicesToScope( + _m_madp->GetNrActions(), GetASoI_Y(yI), _m_nrVals_ASoI_Y.at(yI)); + + size_t Aii_size = 1; + for(Index i=0; i < _m_nrVals_ASoI_Y.at(yI).size(); i++) + Aii_size *= _m_nrVals_ASoI_Y.at(yI).at(i); + _m_Aii_size_Y.at(yI) = Aii_size; + + _m_nrVals_YSoI_Y.at(yI).resize(GetYSoI_Y(yI).size()); + IndexTools::RestrictIndividualIndicesToScope( + nrValsPerSF, GetYSoI_Y(yI), _m_nrVals_YSoI_Y.at(yI) ); + size_t Yii_size = 1; + for(Index i=0; i < _m_nrVals_YSoI_Y.at(yI).size(); i++) + Yii_size *= _m_nrVals_YSoI_Y.at(yI).at(i); + _m_Yii_size_Y.at(yI) = Yii_size; + + _m_ii_size_Y.at(yI) = Xii_size * Aii_size * Yii_size; + + vector< size_t >::iterator pos, it1, it2; + pos = _m_nrVals_SoI_Y.at(yI).end(); + it1 = _m_nrVals_XSoI_Y.at(yI).begin(); + it2 = _m_nrVals_XSoI_Y.at(yI).end(); + _m_nrVals_SoI_Y.at(yI).insert( pos, it1, it2 ); + pos = _m_nrVals_SoI_Y.at(yI).end(); + it1 = _m_nrVals_ASoI_Y.at(yI).begin(); + it2 = _m_nrVals_ASoI_Y.at(yI).end(); + _m_nrVals_SoI_Y.at(yI).insert( pos, it1, it2 ); + pos = _m_nrVals_SoI_Y.at(yI).end(); + it1 = _m_nrVals_YSoI_Y.at(yI).begin(); + it2 = _m_nrVals_YSoI_Y.at(yI).end(); + _m_nrVals_SoI_Y.at(yI).insert( pos, it1, it2 ); + + + + } + + //initialize meta data for Os + for(size_t i = 0; i < _m_nrVals_XSoI_O.size(); i++) + _m_nrVals_XSoI_O.at(i).clear(); + _m_nrVals_XSoI_O.clear(); + for(size_t i = 0; i < _m_nrVals_ASoI_O.size(); i++) + _m_nrVals_ASoI_O.at(i).clear(); + _m_nrVals_ASoI_O.clear(); + for(size_t i = 0; i < _m_nrVals_YSoI_O.size(); i++) + _m_nrVals_YSoI_O.at(i).clear(); + _m_nrVals_YSoI_O.clear(); + for(size_t i = 0; i < _m_nrVals_OSoI_O.size(); i++) + _m_nrVals_OSoI_O.at(i).clear(); + _m_nrVals_OSoI_O.clear(); + for(size_t i = 0; i < _m_nrVals_SoI_O.size(); i++) + _m_nrVals_SoI_O.at(i).clear(); + _m_nrVals_SoI_O.clear(); + _m_nrVals_XSoI_O.resize(_m_nrO); + _m_nrVals_ASoI_O.resize(_m_nrO); + _m_nrVals_YSoI_O.resize(_m_nrO); + _m_nrVals_OSoI_O.resize(_m_nrO); + _m_nrVals_SoI_O.resize(_m_nrO); + + _m_Xii_size_O.clear(); + _m_Aii_size_O.clear(); + _m_Yii_size_O.clear(); + _m_Oii_size_O.clear(); + _m_ii_size_O.clear(); + _m_Xii_size_O.resize(_m_nrO); + _m_Aii_size_O.resize(_m_nrO); + _m_Yii_size_O.resize(_m_nrO); + _m_Oii_size_O.resize(_m_nrO); + _m_ii_size_O.resize(_m_nrO); + + for(Index oI=0; oI < _m_nrO; oI++) + { + _m_nrVals_XSoI_O.at(oI).resize(GetXSoI_O(oI).size()); + IndexTools::RestrictIndividualIndicesToScope( + nrValsPerSF, GetXSoI_O(oI), _m_nrVals_XSoI_O.at(oI)); + + size_t Xii_size = 1; + for(Index i=0; i < _m_nrVals_XSoI_O.at(oI).size(); i++) + Xii_size *= _m_nrVals_XSoI_O.at(oI).at(i); + _m_Xii_size_O.at(oI) = Xii_size; + + _m_nrVals_ASoI_O.at(oI).resize(GetASoI_O(oI).size()); + IndexTools::RestrictIndividualIndicesToScope( + _m_madp->GetNrActions(), GetASoI_O(oI), _m_nrVals_ASoI_O.at(oI)); + + size_t Aii_size = 1; + for(Index i=0; i < _m_nrVals_ASoI_O.at(oI).size(); i++) + Aii_size *= _m_nrVals_ASoI_O.at(oI).at(i); + _m_Aii_size_O.at(oI) = Aii_size; + + _m_nrVals_YSoI_O.at(oI).resize(GetYSoI_O(oI).size()); + IndexTools::RestrictIndividualIndicesToScope( + nrValsPerSF, GetYSoI_O(oI), _m_nrVals_YSoI_O.at(oI)); + + size_t Yii_size = 1; + for(Index i=0; i < _m_nrVals_YSoI_O.at(oI).size(); i++) + Yii_size *= _m_nrVals_YSoI_O.at(oI).at(i); + _m_Yii_size_O.at(oI) = Yii_size; + + _m_nrVals_OSoI_O.at(oI).resize(GetOSoI_O(oI).size()); + IndexTools::RestrictIndividualIndicesToScope( + _m_madp->GetNrObservations(), GetOSoI_O(oI), _m_nrVals_OSoI_O.at(oI)); + + size_t Oii_size = 1; + for(Index i=0; i < _m_nrVals_OSoI_O.at(oI).size(); i++) + Oii_size *= _m_nrVals_OSoI_O.at(oI).at(i); + _m_Oii_size_O.at(oI) = Oii_size; + + _m_ii_size_O.at(oI) = Oii_size * Xii_size * Aii_size * Yii_size; + + vector< size_t >::iterator pos, it1, it2; + pos = _m_nrVals_SoI_O.at(oI).end(); + it1 = _m_nrVals_XSoI_O.at(oI).begin(); + it2 = _m_nrVals_XSoI_O.at(oI).end(); + _m_nrVals_SoI_O.at(oI).insert( pos, it1, it2 ); + pos = _m_nrVals_SoI_O.at(oI).end(); + it1 = _m_nrVals_ASoI_O.at(oI).begin(); + it2 = _m_nrVals_ASoI_O.at(oI).end(); + _m_nrVals_SoI_O.at(oI).insert( pos, it1, it2 ); + pos = _m_nrVals_SoI_O.at(oI).end(); + it1 = _m_nrVals_YSoI_O.at(oI).begin(); + it2 = _m_nrVals_YSoI_O.at(oI).end(); + _m_nrVals_SoI_O.at(oI).insert( pos, it1, it2 ); + pos = _m_nrVals_SoI_O.at(oI).end(); + it1 = _m_nrVals_OSoI_O.at(oI).begin(); + it2 = _m_nrVals_OSoI_O.at(oI).end(); + _m_nrVals_SoI_O.at(oI).insert( pos, it1, it2 ); + } + + // initialize some memory and variables used to speed up index + // conversion functions + _m_IndividualToJointYiiIndices_catVector=new vector(_m_nrVals_SoI_Y.size()); + _m_IndividualToJointOiiIndices_catVector=new vector(_m_nrVals_SoI_O.size()); + + _m_nrVals_SoI_Y_stepsize.resize(_m_nrY); + for(Index yI=0; yI < _m_nrY; yI++) + _m_nrVals_SoI_Y_stepsize[yI]= + IndexTools::CalculateStepSize(_m_nrVals_SoI_Y[yI]); + + for(size_t i = 0; i < _m_X_restr_perY.size(); i++) + delete(_m_X_restr_perY[i]); + _m_X_restr_perY.clear(); + for(size_t i = 0; i < _m_A_restr_perY.size(); i++) + delete(_m_A_restr_perY[i]); + _m_A_restr_perY.clear(); + for(size_t i = 0; i < _m_Y_restr_perY.size(); i++) + delete(_m_Y_restr_perY[i]); + _m_Y_restr_perY.clear(); + for(Index y=0; y < _m_nrY; y++) + { + _m_X_restr_perY.push_back(new vector(GetXSoI_Y(y).size())); + _m_A_restr_perY.push_back(new vector(GetASoI_Y(y).size())); + _m_Y_restr_perY.push_back(new vector(GetYSoI_Y(y).size())); + } + + _m_nrVals_SoI_O_stepsize.resize(_m_nrO); + + for(Index oI=0; oI < _m_nrO; oI++) + _m_nrVals_SoI_O_stepsize[oI]= + IndexTools::CalculateStepSize(_m_nrVals_SoI_O[oI]); + + for(size_t i = 0; i < _m_X_restr_perO.size(); i++) + delete(_m_X_restr_perO[i]); + _m_X_restr_perO.clear(); + for(size_t i = 0; i < _m_A_restr_perO.size(); i++) + delete(_m_A_restr_perO[i]); + _m_A_restr_perO.clear(); + for(size_t i = 0; i < _m_Y_restr_perO.size(); i++) + delete(_m_Y_restr_perO[i]); + _m_Y_restr_perO.clear(); + for(size_t i = 0; i < _m_O_restr_perO.size(); i++) + delete(_m_O_restr_perO[i]); + _m_O_restr_perO.clear(); + for(Index o=0; o < _m_nrO; o++) + { + _m_X_restr_perO.push_back(new vector(GetXSoI_O(o).size())); + _m_A_restr_perO.push_back(new vector(GetASoI_O(o).size())); + _m_Y_restr_perO.push_back(new vector(GetYSoI_O(o).size())); + _m_O_restr_perO.push_back(new vector(GetOSoI_O(o).size())); + } + + _m_ii_initialized = true; +} + +const vector& TwoStageDynamicBayesianNetwork:: +GetNrVals_XSoI_Y(Index yI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_nrVals_XSoI_Y[yI]; +} + +const vector& TwoStageDynamicBayesianNetwork:: +GetNrVals_ASoI_Y(Index yI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_nrVals_ASoI_Y[yI]; +} + +const vector& TwoStageDynamicBayesianNetwork:: +GetNrVals_YSoI_Y(Index yI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_nrVals_YSoI_Y[yI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetXiiSize_Y(Index yI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_Xii_size_Y[yI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetAiiSize_Y(Index yI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_Aii_size_Y[yI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetYiiSize_Y(Index yI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_Yii_size_Y[yI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetiiSize_Y(Index yI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_ii_size_Y[yI]; +} + +//functions for _O + +const vector& TwoStageDynamicBayesianNetwork:: +GetNrVals_OSoI_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_nrVals_OSoI_O[oI]; +} + +const vector& TwoStageDynamicBayesianNetwork:: +GetNrVals_XSoI_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_nrVals_XSoI_O[oI]; +} + +const vector& TwoStageDynamicBayesianNetwork:: +GetNrVals_ASoI_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_nrVals_ASoI_O[oI]; +} + +const vector& TwoStageDynamicBayesianNetwork:: +GetNrVals_YSoI_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_nrVals_YSoI_O[oI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetOiiSize_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_Oii_size_O[oI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetXiiSize_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_Xii_size_O[oI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetAiiSize_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_Aii_size_O[oI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetYiiSize_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_Yii_size_O[oI]; +} + +size_t TwoStageDynamicBayesianNetwork::GetiiSize_O(Index oI) const +{ + if(!_m_ii_initialized) + throw E("ii (influence instantiation) meta-info is not initialized"); + return _m_ii_size_O[oI]; +} + + + + + +void TwoStageDynamicBayesianNetwork:: +JointToIndividualYiiIndices(Index y, Index iiI, + vector& Xs, + vector& As, + vector& Ys) const +{ + vector catVector = IndexTools::JointToIndividualIndices(iiI, + _m_nrVals_SoI_Y[y]); + + size_t Xsize = _m_nrVals_XSoI_Y[y].size(); + size_t Asize = _m_nrVals_ASoI_Y[y].size(); + size_t Ysize = _m_nrVals_YSoI_Y[y].size(); + vector::iterator it1 = catVector.begin(); + vector::iterator it2 = it1 + Xsize; + Xs.assign(it1, it2); + it1 = it2; + it2 += Asize; + As.assign(it1, it2); + it1 = it2; + it2 += Ysize; + if(it2 != catVector.end()) + cerr << "JointToIndividualYiiIndices (it2 != catVector.end() - check this code!"<& Xs, + const vector& As, + const vector& Ys) const +{ + _m_IndividualToJointYiiIndices_catVector->clear(); + _m_IndividualToJointYiiIndices_catVector->insert( + _m_IndividualToJointYiiIndices_catVector->end(), Xs.begin(), Xs.end() ); + _m_IndividualToJointYiiIndices_catVector->insert( + _m_IndividualToJointYiiIndices_catVector->end(), As.begin(), As.end() ); + _m_IndividualToJointYiiIndices_catVector->insert( + _m_IndividualToJointYiiIndices_catVector->end(), Ys.begin(), Ys.end() ); + + Index iiI = IndexTools::IndividualToJointIndicesStepSize( + *_m_IndividualToJointYiiIndices_catVector, + _m_nrVals_SoI_Y_stepsize[y] ); + +#if 0 + size_t iiS = GetiiSize_Y(y); + if( iiI >= iiS) + { + cerr << "error in index computation, let's see what happened"<& As, + vector& Ys, + vector& Os) const +{ + if(GetXSoI_O(o).size() > 0) + { + stringstream errormsg; + errormsg << "Observation Factor " << o << " has a non-null PS SF scope, which was ignored."; + throw E(errormsg.str()); + } + vector catVector = IndexTools::JointToIndividualIndices(iiI, + _m_nrVals_SoI_O[o]); + + size_t Asize = _m_nrVals_ASoI_O[o].size(); + size_t Ysize = _m_nrVals_YSoI_O[o].size(); + size_t Osize = _m_nrVals_OSoI_O[o].size(); + vector::iterator it1 = catVector.begin(); + vector::iterator it2 = it1 + Asize; + As.assign(it1, it2); + it1 = it2; + it2 += Ysize; + Ys.assign(it1, it2); + it1 = it2; + it2 += Osize; + if(it2 != catVector.end()) + cerr << "JointToIndividualYiiIndices (it2 != catVector.end() - check this code!"<& Xs, + vector& As, + vector& Ys, + vector& Os) const +{ + vector catVector = IndexTools::JointToIndividualIndices(iiI, + _m_nrVals_SoI_O[o]); + + size_t Xsize = _m_nrVals_XSoI_O[o].size(); + size_t Asize = _m_nrVals_ASoI_O[o].size(); + size_t Ysize = _m_nrVals_YSoI_O[o].size(); + size_t Osize = _m_nrVals_OSoI_O[o].size(); + vector::iterator it1 = catVector.begin(); + vector::iterator it2 = it1 + Xsize; + Xs.assign(it1, it2); + it1 = it2; + it2 += Asize; + As.assign(it1, it2); + it1 = it2; + it2 += Ysize; + Ys.assign(it1, it2); + it1 = it2; + it2 += Osize; + if(it2 != catVector.end()) + cerr << "JointToIndividualYiiIndices (it2 != catVector.end() - check this code!"<& As, + const vector& Ys, + const vector& Os) const +{ + _m_IndividualToJointOiiIndices_catVector->clear(); + _m_IndividualToJointOiiIndices_catVector->insert( + _m_IndividualToJointOiiIndices_catVector->end(), As.begin(), As.end() ); + _m_IndividualToJointOiiIndices_catVector->insert( + _m_IndividualToJointOiiIndices_catVector->end(), Ys.begin(), Ys.end() ); + _m_IndividualToJointOiiIndices_catVector->insert( + _m_IndividualToJointOiiIndices_catVector->end(), Os.begin(), Os.end() ); + + Index iiI = IndexTools::IndividualToJointIndicesStepSize( + *_m_IndividualToJointOiiIndices_catVector, + _m_nrVals_SoI_O_stepsize[o] ); + return iiI; + +} + +Index TwoStageDynamicBayesianNetwork:: +IndividualToJointOiiIndices(Index o, + const vector& Xs, + const vector& As, + const vector& Ys, + const vector& Os) const +{ + _m_IndividualToJointOiiIndices_catVector->clear(); + _m_IndividualToJointOiiIndices_catVector->insert( + _m_IndividualToJointOiiIndices_catVector->end(), Xs.begin(), Xs.end() ); + _m_IndividualToJointOiiIndices_catVector->insert( + _m_IndividualToJointOiiIndices_catVector->end(), As.begin(), As.end() ); + _m_IndividualToJointOiiIndices_catVector->insert( + _m_IndividualToJointOiiIndices_catVector->end(), Ys.begin(), Ys.end() ); + _m_IndividualToJointOiiIndices_catVector->insert( + _m_IndividualToJointOiiIndices_catVector->end(), Os.begin(), Os.end() ); + + Index iiI = IndexTools::IndividualToJointIndicesStepSize( + *_m_IndividualToJointOiiIndices_catVector, + _m_nrVals_SoI_O_stepsize[o] ); + return iiI; + +} + +void TwoStageDynamicBayesianNetwork:: +AddCPDForY(Index y) +{ + //get SoI of y + //compute #local states + //allocate a CPD + throw E("AddCPDForY NYI"); +} + +void TwoStageDynamicBayesianNetwork:: +AddCPDForO(Index o) +{ + //get SoI of o + //compute #local states + //allocate a CPD + throw E("AddCPDForO NYI"); +} + + +string TwoStageDynamicBayesianNetwork::SoftPrint() const +{ + string indent("\t"); + stringstream ss; + ss << indent << "TwoStageDynamicBayesianNetwork::SoftPrint()" <GetNrStateFactors(); y++) + ss << indent << SoftPrintSoI_Y(y) << endl; + for(Index agI=0; agI < _m_madp->GetNrAgents(); agI++) + ss << indent << SoftPrintSoI_O(agI) << endl; + ss << indent << "Probabilities: (not yet implemented)" << endl; + } + return (ss.str()); +} + +string TwoStageDynamicBayesianNetwork::SoftPrintSoI_Y(Index y) const +{ + stringstream ss; + ss << "sfI=" << y << + ", XSoI=" << _m_XSoI_Y.at(y) << + ", ASoI=" << _m_ASoI_Y.at(y) << + ", YSoI=" << _m_YSoI_Y.at(y); + return(ss.str()); +} +string TwoStageDynamicBayesianNetwork::SoftPrintSoI_O(Index agI) const +{ + stringstream ss; + ss << "agI=" << agI << + ", XSoI_O=" << _m_XSoI_O.at(agI) << + ", ASoI_O=" << _m_ASoI_O.at(agI) << + ", YSoI_O=" << _m_YSoI_O.at(agI) << + ", OSoI_O=" << _m_OSoI_O.at(agI); + return(ss.str()); +} + + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.h b/payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.h new file mode 100644 index 000000000..f73e556b1 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/TwoStageDynamicBayesianNetwork.h @@ -0,0 +1,495 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _TWOSTAGEDYNAMICBAYESIANNETWORK_H_ +#define _TWOSTAGEDYNAMICBAYESIANNETWORK_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include "MultiAgentDecisionProcessDiscreteFactoredStatesInterface.h" +#include "CPDDiscreteInterface.h" + +/** \brief TwoStageDynamicBayesianNetwork (2DBN) is a class that represents + * the transition and observation model for a factored MADP + * + * (i.e., for something that implements the + * MultiAgentDecisionProcessDiscreteFactoredStatesInterface. E.g., a + * MultiAgentDecisionProcessDiscreteFactoredStates) + * + * Components\n + * ---------- + * A TwoStageDynamicBayesianNetwork is composed of the + * \li previous-stage (PS) state factors (SF) - x + * \li next-stage (NS) state factors - y + * \li actions variables - a + * \li observation variables - o + * + * Connections\n + * ----------- + * There are causal connections of the following forms + * \li x -> y influence of a PS SF on a NS SF + * \li a -> y influence of an action on a NS SF + * \li a -> o influence of an action on an observation + * \li y -> y influence of a NS SF (with a lower index) on a NS SF (with a + * higher index) + * \li y -> o influence of a NS SF on an observation + * \li o -> o influence of a observation ((of an agent) with a lower index) + * on an observation ((of an agent) with a higher index). + * + * Scopes of influence\n + * -------------------\n + * As such each y has a Scope of influence (SoI): SoI(y) = subset{x,a,y} + * And similar for each o: SoI(o) = subset{a,y,o} + * + * Influence Instantiations (II)\n + * -----------------------------\n + * We refer to 1 instantiation of all variables in SoI(y) as an + * 'influence instantiation' (II). This class provides some convenience + * functions for converting a joint II index (denoted iiI), from vectors + * describing an instantiation, and vice versa. + * + * We discriminate Yii's and Oii's for resp. NS SFs and Observations. + * + * CPFs\n + * ----\n + * For each y and o, the 2DBN maintains a conditional probability distr. (CPD). + * These functions maintain the actual probabilities + * \li CPD(y) = Pr(y | SoI(y) ) + * \li CPD(o) = Pr(o | SoI(o) ) + * + * Computing joint probabilities\n + * -----------------------------\n + * Joint probabilities can now be computed as illustrated here: + * \li Pr( y1, y2 | SoI(y1), SoI(y2) ) = CPD(y1,y2) = CPD(y1) * CPD(y2) = + * Pr(y1 | SoI(y1)) * Pr(y2 | SoI(y2)) + * \li similar for observations and combinations of o and y. + * + * In the future we may want to use compact CPDs such as decision trees, ADDs + * or rules based descriptions. Then we can also consider concretizing + * the product CPD(y1,y2) = CPD(y1) * CPD(y2) (e.g., overloading the operator* + * for CPDs etc...) + * + * + * + **/ +class TwoStageDynamicBayesianNetwork +{ + private: + /**\brief pointer to a MultiAgentDecisionProcessFactoredStatesInterface + * + * A TwoStageDynamicBayesianNetwork is not a standalone thing. It is + * used to represent a MADP with a factored states space. + */ + MultiAgentDecisionProcessDiscreteFactoredStatesInterface* _m_madp; + + //representing the nodes of the network + ///the number of state factors + size_t _m_nrY; + ///the number of observation factors (i.e., the number of agents) + size_t _m_nrO; + + //representing the connections + bool _m_SoIStorageInitialized; + /// For each NS SF we maintain the PS SFs, X, in its SoI + std::vector< Scope > _m_XSoI_Y; + /// For each NS SF we maintain the actions, A, in its SoI + std::vector< Scope > _m_ASoI_Y; + /// For each NS SF we maintain the NS SFs, Y, in its SoI + std::vector< Scope > _m_YSoI_Y; + + /// For each O we maintain the PS SFs, X, in its SoI + std::vector< Scope > _m_XSoI_O; + /// For each O we maintain the actions, A, in its SoI + std::vector< Scope > _m_ASoI_O; + /// For each O we maintain the NS SFs, Y, in its SoI + std::vector< Scope > _m_YSoI_O; + /// For each O we maintain the observations, O, in its SoI + std::vector< Scope > _m_OSoI_O; + + ///Boolean that is set when all ii info is initialized + bool _m_ii_initialized; + ///For each NS SF Y we maintain the number of SF values in XSoI(Y) + /**I.e., For each SF in XSoI(Y) we maintain the number of values it + * has. + * This can be used to convert 'local' X indices to joint ones, etc. + */ + std::vector< std::vector > _m_nrVals_XSoI_Y; + ///For each NS SF Y we maintain the number of actions in ASoI(Y) + std::vector< std::vector > _m_nrVals_ASoI_Y; + ///For each NS SF Y we maintain the number of SF values in YSoI(Y) + std::vector< std::vector > _m_nrVals_YSoI_Y; + ///For each NS SF Y we maintain the number of SF values in SoI(Y) + /**This is the concatination of [nrValsXSoI, nrValsASoI, nrValsYSoI] + * used by IndividualToJointYiiIndices, and JointToIndividualYiiIndices + */ + std::vector< std::vector > _m_nrVals_SoI_Y; + + + ///For each Y we maintain the number of instantiations of XSoI(Y) + std::vector _m_Xii_size_Y; + ///For each Y we maintain the number of instantiations of ASoI(Y) + std::vector _m_Aii_size_Y; + ///For each Y we maintain the number of instantiations of YSoI(Y) + std::vector _m_Yii_size_Y; + ///For each y we maintain the number of II's + std::vector _m_ii_size_Y; + + ///For each O we maintain the number of SF values in XSoI(O) + std::vector< std::vector > _m_nrVals_XSoI_O; + ///For each O we maintain the number of actions in ASoI(O) + std::vector< std::vector > _m_nrVals_ASoI_O; + ///For each O we maintain the number of SF values in YSoI(O) + std::vector< std::vector > _m_nrVals_YSoI_O; + ///For each O we maintain the number of observations in OSoI(O) + std::vector< std::vector > _m_nrVals_OSoI_O; + ///For each NS SF O we maintain the number of SF values in SoI(O) + /**This is the concatination of [nrValsASoI, nrValsYSoI, nrValsOSoI] + * used by IndividualToJointOiiIndices, and JointToIndividualOiiIndices + */ + std::vector< std::vector > _m_nrVals_SoI_O; + + ///For each O we maintain the number of instantiations of XSoI(O) + std::vector _m_Xii_size_O; + ///For each O we maintain the number of instantiations of ASoI(O) + std::vector _m_Aii_size_O; + ///For each O we maintain the number of instantiations of YSoI(O) + std::vector _m_Yii_size_O; + ///For each O we maintain the number of instantiations of OSoI(O) + std::vector _m_Oii_size_O; + ///For each y we maintain the number of II's + std::vector _m_ii_size_O; + + //representing the CPDs + /// For each next-stage state variable y we maintain a CPD + std::vector< CPDDiscreteInterface * > _m_Y_CPDs; + /// For each observation we maintain a CPD + std::vector< CPDDiscreteInterface * > _m_O_CPDs; + + ///Computes the 'closure' of NS variables Y and O. + void ComputeWithinNextStageClosure(Scope& Y, Scope& O) const; + + /// Temporary storage used in IndividualToJointYiiIndices. + std::vector *_m_IndividualToJointYiiIndices_catVector; + /// Cache the step size for speed. + std::vector _m_nrVals_SoI_Y_stepsize; + /// Temporary storage used in IndividualToJointOiiIndices. + std::vector *_m_IndividualToJointOiiIndices_catVector; + /// Cache the step size for speed. + std::vector _m_nrVals_SoI_O_stepsize; + /// Temporary storage used in SampleY. + std::vector *_m_SampleY; + std::vector* > _m_X_restr_perY; + std::vector* > _m_A_restr_perY; + std::vector* > _m_Y_restr_perY; + + /// Temporary storage used in SampleO. + std::vector *_m_SampleO; + std::vector* > _m_X_restr_perO; + std::vector* > _m_A_restr_perO; + std::vector* > _m_Y_restr_perO; + std::vector* > _m_O_restr_perO; + /// Temporary storage used in SampleO. + std::vector *_m_SampleNrO; + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + TwoStageDynamicBayesianNetwork( + MultiAgentDecisionProcessDiscreteFactoredStatesInterface& madp); + /// Copy constructor. + //TwoStageDynamicBayesianNetwork(const TwoStageDynamicBayesianNetwork& a); + /// Destructor. + ~TwoStageDynamicBayesianNetwork(); + /// Copy assignment operator + //TwoStageDynamicBayesianNetwork& operator= (const TwoStageDynamicBayesianNetwork& o); + + //operators: + + //SoI functions + ///Sets the SoI vectors to appropriate size and containing empty scopes. + /**this function should be called after adding all state factors, + * but before trying to add the connections (by setting the SoI's) + */ + void InitializeStorage(); + + ///Sets the SoI of a NS SF y + void SetSoI_Y( Index y, + const Scope& XSoI, + const Scope& ASoI, + const Scope& YSoI); + + ///Sets the SoI of an observation o + void SetSoI_O( Index o, + const Scope& ASoI, + const Scope& YSoI, + const Scope& OSoI); + + ///Sets the SoI of an observation o + void SetSoI_O( Index o, + const Scope& XSoI, + const Scope& ASoI, + const Scope& YSoI, + const Scope& OSoI); + + const Scope& GetXSoI_Y(Index y) const + { return _m_XSoI_Y[y]; } + const Scope& GetASoI_Y(Index y) const + { return _m_ASoI_Y[y]; } + const Scope& GetYSoI_Y(Index y) const + { return _m_YSoI_Y[y]; } + + const Scope& GetXSoI_O(Index o) const + { return _m_XSoI_O[o]; } + const Scope& GetASoI_O(Index o) const + { return _m_ASoI_O[o]; } + const Scope& GetYSoI_O(Index o) const + { return _m_YSoI_O[o]; } + const Scope& GetOSoI_O(Index o) const + { return _m_OSoI_O[o]; } + + //influence instantiation (II) functions + + ///Computes some administrative things necessary for II functions + /**This should be called after all SoI's have been defined. + * When not using any II functions, this may be skipped?! (I think) + */ + void InitializeIIs(); + + ///returns the number of values for all state factors in XSoI(y) + const std::vector& GetNrVals_XSoI_Y(Index yI) const; + ///returns the number of values for all actions in XSoI(y) + const std::vector& GetNrVals_ASoI_Y(Index yI) const; + ///returns the number of values for all state factors in YSoI(y) + const std::vector& GetNrVals_YSoI_Y(Index yI) const; + + ///returns the nr. instantiations of XSoI(y) + size_t GetXiiSize_Y(Index yI) const; + ///returns the nr instantiations of ASoI(y) + size_t GetAiiSize_Y(Index yI) const; + ///returns the nr instantiations of YSoI(y) + size_t GetYiiSize_Y(Index yI) const; + ///returns the total number of IIs + size_t GetiiSize_Y(Index yI) const; + + ///returns the number of values for all state factors in XSoI(o) + const std::vector& GetNrVals_XSoI_O(Index oI) const; + ///returns the number of values for all actions in ASoI(o) + const std::vector& GetNrVals_ASoI_O(Index oI) const; + ///returns the number of values for all state factors in YSoI(o) + const std::vector& GetNrVals_YSoI_O(Index oI) const; + ///returns the number of values for all state factors in OSoI(o) + const std::vector& GetNrVals_OSoI_O(Index oI) const; + + ///returns the nr instantiations of XSoI(o) + size_t GetXiiSize_O(Index oI) const; + ///returns the nr instantiations of ASoI(o) + size_t GetAiiSize_O(Index oI) const; + ///returns the nr instantiations of YSoI(o) + size_t GetYiiSize_O(Index oI) const; + ///returns the nr instantiations of OSoI(o) + size_t GetOiiSize_O(Index oI) const; + ///returns the total number of IIs + size_t GetiiSize_O(Index oI) const; + + + + ///Computes Xs, As, Ys from the joint iiI denoting a II of SoI(y). + /**This returns restricted std::vectors (std::vectors restricted to the SoI(y)) + */ + void JointToIndividualYiiIndices(Index y, Index iiI, + std::vector& X_rest, + std::vector& A_rest, + std::vector& Y_rest) const; + ///Computes the joint II index for restricted std::vectors Xs,As,Ys. + /**std::vectors are restricted to only include values in SoI(y) + */ + Index IndividualToJointYiiIndices(Index y, + const std::vector& X_rest, + const std::vector& A_rest, + const std::vector& Y_rest) const; + ///Computes Xs, As, Ys from the joint iiI denoting a II of SoI(o). + /**This returns restricted std::vectors (std::vectors restricted to the SoI(o)) + */ + void JointToIndividualOiiIndices(Index o, Index iiI, + std::vector& A_rest, + std::vector& Y_rest, + std::vector& O_rest) const; + void JointToIndividualOiiIndices(Index o, Index iiI, + std::vector& X_rest, + std::vector& A_rest, + std::vector& Y_rest, + std::vector& O_rest) const; + ///Computes the joint II index for restricted std::vectors As,Ys,Os + /**std::vectors are restricted to only include values in SoI(o) + */ + Index IndividualToJointOiiIndices(Index o, + const std::vector& A_rest, + const std::vector& Y_rest, + const std::vector& O_rest) const; + Index IndividualToJointOiiIndices(Index o, + const std::vector& X_rest, + const std::vector& A_rest, + const std::vector& Y_rest, + const std::vector& O_rest) const; + //functions regarding CPDs + ///Set the CPDDiscreteInterface for Y + void SetCPD_Y(Index yI, CPDDiscreteInterface* cpt) + { _m_Y_CPDs.at(yI) = cpt; } + ///Set the CPDDiscreteInterface for O + void SetCPD_O(Index oI, CPDDiscreteInterface* cpt) + { _m_O_CPDs.at(oI) = cpt; } + + CPDDiscreteInterface* GetCPD_Y(Index yI) + { return(_m_Y_CPDs.at(yI)); } + ///Set the CPDDiscreteInterface for O + CPDDiscreteInterface* GetCPD_O(Index oI) + { return(_m_O_CPDs.at(oI)); } + + ///Get the probability of all possible values of yIndex given II + /**\li yIndex the index of the state factor for which we request + * the probability of all its possible values. + * \li Xii,Aii, Yii are the vectors of influence instantiations + * of exact scopes. (I.e., Xii, Aii and Yii are of scope as + * specified by GetXSoI_Y(), GetASoI_Y(), GetYSoI_Y(). ) + * + */ + std::vector GetYProbabilitiesExactScopes( + const std::vector& Xii, + const std::vector& Aii, + const std::vector& Yii, + const Index& yIndex ) const; + ///Get the probability of Y given X,A. All are full-length + /**because all factors are included, we do not need to discriminate + * Yii and Y. + */ + double GetYProbability( const std::vector& X, + const std::vector& A, + const std::vector& Y) const; + ///Get the probability of Y given X,A. general version + /**We need to discriminate between + * \li Y, the vector of next-stage state variables + * for which we want to know the probability, + * \li YII the vector of next-stage that can influence the SFs in + * Yscope. + * + * YIIscope needs to be a superset of Yscope and YII and Y need to + * specify the same values for the same NS SFs. + * + * + * This function returns the probability + * \f[ \Pr(Y | X, A, (YII \setminus Y) ) \f] + * + */ + double GetYProbabilityGeneral( + const Scope& Xscope, + const std::vector& X, + const Scope& Ascope, + const std::vector& A, + const Scope& YIIscope, + const std::vector& YII, + const Scope& Yscope, + const std::vector& Y + ) const; + + ///Get the probability of all possible values of oIndex given II + /**\li oIndex the index of the observation factor (i.e., of the agent) + * for which we request + * the probability of all its possible values. + * \li Aii,Yii, Oii are the vectors of influence instantiations + * of exact scopes. (I.e., Aii, Yii and Oii are of scope as + * specified by GetASoI_O(), GetYSoI_O(), GetOSoI_O(). ) + * + */ + std::vector GetOProbabilitiesExactScopes( + const std::vector& Aii, + const std::vector& Yii, + const std::vector& Oii, + const Index& oIndex ) const; + std::vector GetOProbabilitiesExactScopes( + const std::vector& Xii, + const std::vector& Aii, + const std::vector& Yii, + const std::vector& Oii, + const Index& oIndex ) const; + ///Get the probability of O given A,Y. All std::vectors are full length + double GetOProbability( const std::vector& A, + const std::vector& Y, + const std::vector& O) const; + double GetOProbability( const std::vector& X, + const std::vector& A, + const std::vector& Y, + const std::vector& O) const; + ///Compute the probability P(Ys,Os|Xs,As) + /**All std::vectors are of specified scopes. When the probability is + * undefined (because X and A do not contain all necessary vars) + * an exception is thrown + */ + double GetYOProbability(const Scope& X, const std::vector& Xs, + const Scope& A, const std::vector& As, + const Scope& Y, const std::vector& Ys, + const Scope& O, const std::vector& Os) const; + ///Sample a NS state + std::vector SampleY( const std::vector& X, + const std::vector& A) const; + ///Sample an observation. + std::vector SampleO( const std::vector& A, + const std::vector& Y) const + {return SampleO(std::vector(), A, Y); } + + std::vector SampleO( const std::vector& X, + const std::vector& A, + const std::vector& Y) const; + + ///Perfom the Stat and Agent Scope backup + /**this function is called by StateScopeBackup and + * AgentScopeBackup. If you need both, this is therefore more efficient. + * Xout and Aout are output arguments containing the backed-up scopes. + */ + void ScopeBackup( const Scope & Y, + const Scope & X, + Scope& Xout, + Scope& Aout + ) const; + ///Get the state factors that are a ancestor of the arguments + Scope StateScopeBackup( const Scope & stateScope, + const Scope & agentScope) const; + ///Get the agent actions that are a ancestor of the arguments + Scope AgentScopeBackup( const Scope & stateScope, + const Scope & agentScope) const; + + ///Add a CPD for a NS SF y + void AddCPDForY(Index y); + ///Add a CPD for an observation o + void AddCPDForO(Index o); + + std::string SoftPrint() const; + std::string SoftPrintSoI_Y(Index y) const; + std::string SoftPrintSoI_O(Index agI) const; + + + + + +}; + + +#endif /* !_TWOSTAGEDYNAMICBAYESIANNETWORK_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.cpp b/payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.cpp new file mode 100644 index 000000000..3e028380a --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.cpp @@ -0,0 +1,22 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "VectorTools.h" + +using namespace std; + +namespace VectorTools { + +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.h b/payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.h new file mode 100644 index 000000000..dbab57f8c --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/base/VectorTools.h @@ -0,0 +1,104 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _VECTORTOOLS_H_ +#define _VECTORTOOLS_H_ 1 + +/* the include directives */ +#include "Globals.h" +#include + +namespace VectorTools{ + + template + bool Equal( const std::vector& vec1, const std::vector& vec2 ) + { + size_t s1 = vec1.size(); + if( s1 != vec2.size()) + return false; + + return( std::equal(vec1.begin(), vec1.end(), vec2.begin() ) ); +/* + typename std::vector< T >::const_iterator it1, it2; + it1 = vec1.begin(); + it2 = vec2.begin(); + while(it1 != vec1.end() ) + { + if( (*it1) != (*it2) ) + it1++; + it2++; + } + */ + + } + + template + T InnerProduct( const std::vector& vec1, const std::vector& vec2 ) + { + size_t s1 = vec1.size(); + if( s1 != vec2.size()) + throw E("VectorTools::InnerProduct - vec sizes not equal"); + + typename std::vector< T >::const_iterator it1, it2; + T inprod=0; + it1 = vec1.begin(); + it2 = vec2.begin(); + while(it1 != vec1.end() ) + { + inprod += (*it1) * (*it2) ; + it1++; + it2++; + } + return(inprod); + } + + ///Compute the product of the vector's elements + /** + * clearly the product operator* should be defined for type T + */ + template + T VectorProduct( const std::vector& vec ) + { + if(vec.size() == 0) + throw E("IndexTools::VectorProduct - vector product of vec of size 0 is undefined!"); + + T product = *(vec.begin());//the first element + typename std::vector< T >::const_iterator it; + it = vec.begin() + 1;//second element + while(it != vec.end() ) + { + product = product * (*it); + it++; + } + return(product); + } + + template + T MaxNorm( const std::vector& vec ) + { + T norm = 0.0 ; + for (Index i = 0 ; i < vec.size() ; i++) + norm = std::max (fabs (vec [i]), norm) ; + return norm; + } + +} + +#endif /* !_VECTORTOOLS_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/include/configuration.h b/payntbind/src/synthesis/decpomdp/madp/src/include/configuration.h new file mode 100644 index 000000000..d59ead554 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/include/configuration.h @@ -0,0 +1,31 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +// This file contains some project-wide defines that indicate the +// availability of certain software libraries. Enabling something here +// usually also requires modifying Makefile.custom. + +// Indicates whether we use libgmp to represent LIndex. +#define USE_ARBITRARY_PRECISION_INDEX 0 + +// Indicates whether to use the pomdp-solve library, or call an external program. +// #if !DARWIN +// #define USE_POMDPSOLVE_LIBRARY 1 +// #else // on OSX the pomdpsolve library doesn't get compiled in, so this +// // should always be set to 0 +// #define USE_POMDPSOLVE_LIBRARY 0 +// #endif +//FAO: we don't want to use it anymore... +#define USE_POMDPSOLVE_LIBRARY 0 diff --git a/payntbind/src/synthesis/decpomdp/madp/src/include/versions.h b/payntbind/src/synthesis/decpomdp/madp/src/include/versions.h new file mode 100644 index 000000000..c3fe18717 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/include/versions.h @@ -0,0 +1,37 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "boost/version.hpp" + +#if BOOST_VERSION < 103300 +#define BOOST_1_32_OR_LOWER 1 +#else +#define BOOST_1_32_OR_LOWER 0 +#endif + +#if BOOST_VERSION >= 103600 +#define BOOST_1_36_OR_HIGHER 1 +#else +#define BOOST_1_36_OR_HIGHER 0 +#endif + +#if BOOST_VERSION >= 103800 +// Spirit V2 has been merged in the Boost, so now we need to use the +// "classic" version +#define USE_BOOST_SPIRIT_CLASSIC 1 +#else +#define USE_BOOST_SPIRIT_CLASSIC 0 +#endif + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/CommentBlankOrNewlineParser.h b/payntbind/src/synthesis/decpomdp/madp/src/parser/CommentBlankOrNewlineParser.h new file mode 100644 index 000000000..f80dcd4de --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/CommentBlankOrNewlineParser.h @@ -0,0 +1,105 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _COMMENTBLANKORNEWLINEPARSER_H_ +#define _COMMENTBLANKORNEWLINEPARSER_H_ 1 + +#if USE_BOOST_SPIRIT_CLASSIC +#include "boost/spirit/include/classic_core.hpp" +#else +#include "boost/spirit/core.hpp" +#endif + +#include "CommentOrBlankParser.h" + +#if USE_BOOST_SPIRIT_CLASSIC +using namespace boost::spirit::classic; +#else +using namespace boost::spirit; +#endif + +/* aliases */ + +/* constants */ +#define DEBUG_CBONL_COMPARS 0 +#define DEBUG_CBONL_COBP 0 + +namespace comment_cbonlp { + typedef char char_t; + //typedef file_iterator iterator_t; + typedef file_iterator iterator_t_fi; + typedef position_iterator iterator_t; + typedef scanner scanner_t; + typedef rule rule_t; + +namespace{ + void cbonlp_eol(iterator_t, iterator_t) + { + if(DEBUG_CBONL_COMPARS) + std::cout << "EOL\n"; + } + void cbonlp_comment_or_blank(iterator_t str, iterator_t end) + { + if(DEBUG_CBONL_COBP) + { + std::string s(str, end); + std::cout<< "SKIPPED COMMENT: \""<< s << "\""<< std::endl; + } + }; +} + +struct CommentBlankorNewLineParser : public sub_grammar +{ + /*typedef int start_t; //<- used to infer the following type:*/ + typedef + alternative< + action< + comment_cobp::CommentOrBlankParser, + void (*)(position_iterator< + file_iterator + >, + position_iterator< + file_iterator + >) + >, + action< + eol_parser, + void (*)(position_iterator< + file_iterator + >, + position_iterator< + file_iterator + >) + > + > + start_t; + + CommentBlankorNewLineParser() + : start + ( + comment_cobp::commentOrBlankParser_p[&cbonlp_comment_or_blank] + | + eol_p[&cbonlp_eol] + ) + {} + + start_t start; + +}; + +} +#endif /* !_COMMENTBLANKORNEWLINEPARSER_H_ */ + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/CommentOrBlankParser.h b/payntbind/src/synthesis/decpomdp/madp/src/parser/CommentOrBlankParser.h new file mode 100644 index 000000000..05e8715e9 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/CommentOrBlankParser.h @@ -0,0 +1,146 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _COMMENTORBLANKPARSER_H_ +#define _COMMENTORBLANKPARSER_H_ 1 + +#if USE_BOOST_SPIRIT_CLASSIC +#include "boost/spirit/include/classic_core.hpp" +#else +#include "boost/spirit/core.hpp" +#endif + +namespace boost { namespace spirit +#if USE_BOOST_SPIRIT_CLASSIC + { namespace classic +#endif +{ + template + struct sub_grammar : public parser + { + typedef sub_grammar self_t; + typedef DerivedT const& embed_t; + + template + struct result + { + typedef typename parser_result< + typename DerivedT::start_t, ScannerT>::type + type; + }; + + DerivedT const& derived() const + { return *static_cast(this); } + + template + typename parser_result::type + parse(ScannerT const& scan) const + { + return derived().start.parse(scan); + } + }; +}} +#if USE_BOOST_SPIRIT_CLASSIC +} +#endif + +#if USE_BOOST_SPIRIT_CLASSIC +using namespace boost::spirit::classic; +#else +using namespace boost::spirit; +#endif + +/* aliases */ + +/* constants */ +#define DEBUG_COMPARS 0 +#define DEBUG_COBP 0 + +namespace comment_cobp { + typedef char char_t; + //typedef file_iterator iterator_t; + typedef file_iterator iterator_t_fi; + typedef position_iterator iterator_t; + typedef scanner scanner_t; + typedef rule rule_t; + +namespace{ + void cp_eol(iterator_t, iterator_t) + { if(DEBUG_COMPARS) std::cout << "EOL\n"; } + //void cp_endinput(iterator_t, iterator_t) + //{ if(DEBUG_COMPARS) std::cout << "END OF INPUT\n"; } + void cp_startcomment(char_t) + { if(DEBUG_COMPARS) std::cout<< "#-STARTCOMMENT\n"; } + + void cobp_blank(char_t) + {if(DEBUG_COBP) std::cout << "SKIPPED BLANK\n";}; + //void cobp_emptyline(iterator_t str, iterator_t end) + //{ if(DEBUG_COBP) std::cout << "SKIPPED EMPTYLINE\n"; }; + void cobp_comment(iterator_t str, iterator_t end) + { + std::string s(str, end); + if(DEBUG_COBP) + std::cout<< "SKIPPED COMMENT: \""<< s << "\""<< std::endl; + }; +} + +//CommentParser +struct CommentParser : public sub_grammar +{ + typedef + sequence, void (*)(comment_cobp::char_t)>, kleene_star > >, kleene_star > > + start_t; + + CommentParser() + : start + ( //commenttoken + ch_p('#')[&cp_startcomment] + >> + //lineremainder + *( print_p | /* alnum_p |*/ blank_p ) + >> + *( eol_p[&cp_eol] ) + ) + {} + + start_t start; +}; + +CommentParser const commentParser_p = CommentParser(); +struct CommentOrBlankParser : public sub_grammar +{ + + typedef + alternative, action > + start_t; + + CommentOrBlankParser() + : start + ( + blank_p[&cobp_blank] + | + commentParser_p[&cobp_comment] + ) + {} + + start_t start; + +}; +CommentOrBlankParser const commentOrBlankParser_p = CommentOrBlankParser(); + +} +#endif /* !_COMMENTORBLANKPARSER_H_ */ + diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.cpp b/payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.cpp new file mode 100644 index 000000000..b28aa3cea --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.cpp @@ -0,0 +1,32 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "MADPParser.h" +#include "ParserDPOMDPFormat_Spirit.h" +#include "ParserPOMDPFormat_Spirit.h" +#include "DecPOMDPDiscrete.h" +#include "POMDPDiscrete.h" + +void MADPParser::Parse(DecPOMDPDiscrete *model) +{ + DPOMDPFormatParsing::ParserDPOMDPFormat_Spirit parser(model); + parser.Parse(); +} + +void MADPParser::Parse(POMDPDiscrete *model) +{ + POMDPFormatParsing::ParserPOMDPFormat_Spirit parser(model); + parser.Parse(); +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.h b/payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.h new file mode 100644 index 000000000..ad49e8762 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/MADPParser.h @@ -0,0 +1,69 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _MADPPARSER_H_ +#define _MADPPARSER_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" + +class DecPOMDPDiscrete; +class FactoredDecPOMDPDiscrete; +class TOIDecPOMDPDiscrete; +class TOIDecMDPDiscrete; +class TOIFactoredRewardDecPOMDPDiscrete; +class TOICompactRewardDecPOMDPDiscrete; +class POMDPDiscrete; + +/// MADPParser is a general class for parsers in MADP. +/** It is templatized to allow for different parsers to be + * implemented. When constructed with particular model, the proper + * parser is instantiated, and its Parse() function is called (see + * ParserInterface). + */ +class MADPParser +{ +private: + + /// Parse a DecPOMDPDiscrete using ParserDPOMDPFormat_Spirit. + void Parse(DecPOMDPDiscrete *model); + void Parse(TOIDecPOMDPDiscrete *model); + void Parse(TOIDecMDPDiscrete *model); + void Parse(TOIFactoredRewardDecPOMDPDiscrete *model); + void Parse(TOICompactRewardDecPOMDPDiscrete *model); + void Parse(FactoredDecPOMDPDiscrete *model); + void Parse(POMDPDiscrete *model); + +protected: + +public: + // Constructor, destructor and copy assignment. + /// Constructor, on return the model has been parsed. + template + MADPParser(A* model){ Parse(model); } + + /// Destructor. + ~MADPParser(){}; + +}; + +#endif /* !_MADPPARSER_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.cpp b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.cpp new file mode 100644 index 000000000..e06e555c4 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.cpp @@ -0,0 +1,1216 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * + * For contact information please see the included AUTHORS file. + */ + + + +#include "ParserDPOMDPFormat_Spirit.h" + +using namespace std; + +namespace DPOMDPFormatParsing{ + +/**Outputs the file_position structure info (gotten from + * postion_iterator::get_position() ). */ +std::ostream& operator<<(std::ostream& out, file_position const& lc) +{ + return out << + "\nFile:\t" << lc.file << + "\nLine:\t" << lc.line << + "\nCol:\t" << lc.column << endl; +} + + +//Default constructor +ParserDPOMDPFormat_Spirit::ParserDPOMDPFormat_Spirit(DecPOMDPDiscrete* problem) : +// UNIFORMTOK(this), + _m_decPOMDPDiscrete(problem) +{ + if(DEBUG_PARSE) + { + cout << "Creating parser, referring to problem..."; + cout << problem->MultiAgentDecisionProcess::SoftPrint(); + cout << endl; + } + //need to reset the parse matrix the first time... + _m_matrixModified = true; + _m_identListModified = false; + _m_startStateListExclude = false; + _m_nrA = 0; + // UNIFORMTOK = uniformtok(this); + +} + +/* +//Copy assignment constructor. +ParserDPOMDPFormat_Spirit::ParserDPOMDPFormat_Spirit(const ParserDPOMDPFormat_Spirit& o) +{ +} +//Destructor +ParserDPOMDPFormat_Spirit::~ParserDPOMDPFormat_Spirit() +{ +} +*/ + +/* +bool ParserDPOMDPFormat_Spirit::Parse() +{ + string pf = GetDecPOMDPDiscrete()->GetProblemFile(); + + + +} + +void ParserDPOMDPFormat_Spirit::FileTest() +{ + string pf = GetDecPOMDPDiscrete()->GetProblemFile(); + const char* pf_c = pf.c_str(); + cout << "pf_c is \""<> str) + cout << "\"" << str<<"\""<GetProblemFile(); + const char* pf_c = pf.c_str(); + // Create a file iterator for this file + iterator_t_fi first_fi(pf_c); + if (!first_fi) + { + stringstream ss; ss << "Unable to open file: "< info; + + //+ + // Parse + // Spirit changed when upgrading from 1.33.1 to 1.34.0: + // trailing spaces are no longer accepted, which the next line + // to fail on some problems (those with trailing comments). Adding + // ">> !end_p" to the grammar works around this problem. + info = parse(first, last , dpomdp >> !end_p, skip_parser); + if (info.full) + { + if(DEBUG_PARSE) cout << ">>>Parsing succeeded\n"; + } + else + { + iterator_t st = info.stop; + iterator_t st2 = info.stop; + for(Index i=1;i<200 && st2 != last ;i++)st2++; + string unparsed(st, st2); + cout << "stopped at: " << info.stop.get_position() + << "-> unparsed : "<< "\"" << unparsed << "\"\n"; + cout << "partial match? hit: \"" << info.hit << "\"\n"; + cout << "consumed chars - length: \"" << info.length << "\"\n"; + throw E("Parsing failed"); + } + + if(DEBUG_PARSE) cout << "-------------------------\n"; + return; +} + + + +//used functor functions: +void ParserDPOMDPFormat_Spirit::DebugOutput::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_PARSE) cout << s <<" parsed \"" << string(str,end) << "\""<_m_lp_int = i; + _m_po->_m_lp_type = INT; +} +void ParserDPOMDPFormat_Spirit::StoreLastParsedElement::operator()(const unsigned int i) const +{ + if(DEBUG_PARSE) cout << "Stored Last Parsed: uint "<_m_lp_uint = i; + _m_po->_m_lp_type = UINT; +} + +void ParserDPOMDPFormat_Spirit::StoreLastParsedElement::operator()(const double f) const +{ + if(DEBUG_PARSE) cout << "Stored Last Parsed: double "<_m_lp_double = f; + _m_po->_m_lp_type = DOUBLE; +} + +void ParserDPOMDPFormat_Spirit::StoreLastParsedElement::operator()(iterator_t str, iterator_t end) const +{ + _m_po->_m_lp_string.clear(); + _m_po->_m_lp_string = string(str, end); + if(DEBUG_PARSE) cout << "Stored Last Parsed: string "<< + _m_po->_m_lp_string << endl; + _m_po->_m_lp_type = STRING; +} + + +void ParserDPOMDPFormat_Spirit::SetNrAgents::operator()(iterator_t first, iterator_t last) const +{ + if (_m_po->_m_lp_type != UINT) + { + stringstream msg; + msg << "SetNrAgents::operator()(iterator_t first, iterator_t last) requires that last parsed element is a UINT!"<< + "(at " << _m_po->_m_first->get_position() << ")" << endl; + throw EParse(msg); + } + size_t nrA = _m_po->_m_lp_uint; + if(DEBUG_PARSE){cout <<"agt_SetNrAgents - nrA="<GetDecPOMDPDiscrete()->SetNrAgents(nrA); + _m_po->_m_nrA = nrA; +} + +void ParserDPOMDPFormat_Spirit::SetNrAgents::operator()(const int& i) const +{ + if(DEBUG_PARSE){cout << "i="<_m_lp_JA.push_back(_m_po->_m_anyJAIndex); + } +} +/* +void ParserDPOMDPFormat_Spirit::StoreLPJointAction::operator()(iterator_t str, iterator_t end) const +{ + _m_po->_m_lp_JAI.clear(); + if(_m_isJointActionIndex) + { + //_m_lp_JA not necessary: clear it. + _m_po->_m_lp_JA.clear(); + _m_po->_m_lp_JAI.push_back(_m_po->_m_lp_int); + return; + } + try{ MatchingJointActions(0,_m_po->_m_lp_JA); } + catch(E e) + { + file_position fp = str.get_position(); + stringstream ermsg; + ermsg << e.SoftPrint() << endl << "(at " << fp << ")"<_m_lp_JA.clear(); +} +void ParserDPOMDPFormat_Spirit::StoreLPJointAction::operator()(const unsigned int&) const +{ + _m_po->_m_lp_JAI.clear(); + if(_m_isJointActionIndex) + { + //_m_lp_JA not necessary: clear it. + _m_po->_m_lp_JA.clear(); + _m_po->_m_lp_JAI.push_back(_m_po->_m_lp_int); + return; + } + MatchingJointActions(0,_m_po->_m_lp_JA); + _m_po->_m_lp_JA.clear(); +} +*/ +void ParserDPOMDPFormat_Spirit::StoreLPJointAction::Store() const +{ + _m_po->_m_lp_JAI.clear(); + //check to see if the last parsed index was a joint action index + if(_m_isJointActionIndex) + { + //_m_lp_JA not necessary: clear it. + _m_po->_m_lp_JA.clear(); + //do check that the last parsed element was a index (uint) + if (_m_po->_m_lp_type != UINT) + { + stringstream msg; + msg << "StoreLPJointAction::Store() requires that last parsed element is a UINT! (last parsed index was a joint action index)"<< endl; + throw EParse(msg); + } + _m_po->_m_lp_JAI.push_back(_m_po->_m_lp_uint); + return; + } + MatchingJointActions(0,_m_po->_m_lp_JA); + _m_po->_m_lp_JA.clear(); +} +void ParserDPOMDPFormat_Spirit::StoreLPJointAction::MatchingJointActions (Index curAgent, vector indIndices) const +{ + if(_m_po->_m_nrA != indIndices.size()) + { + stringstream msg; + msg << "individual indices vector has wrong size. ("<< + indIndices.size() << " while nrAgents is " << _m_po->_m_nrA << ")"<< + "at(" << _m_po->_m_first->get_position() << ")" << endl;; + throw EParse(msg); + } + if(curAgent == _m_po->_m_nrA) //past last agent: all work done + { + Index jaI = 0; + try + { + jaI = _m_po->GetDecPOMDPDiscrete()->IndividualToJointActionIndices( + indIndices); + _m_po->_m_lp_JAI.push_back(jaI); + } + catch(E e) + { + stringstream ss; ss << e.SoftPrint() << "(at "<< _m_po->_m_first-> + get_position()<<")"<::iterator it = indIndices.begin(); + vector::iterator last = indIndices.end(); + while(it != last){ cout << *it <<" "; it++;} + cout << "> = "<_m_anyJAIndex) + { + //do for each action of this agent + size_t nrAc = _m_po->GetDecPOMDPDiscrete()->GetNrActions(curAgent); + for(Index ai=0; ai < nrAc; ai++) + { + indIndices[curAgent] = ai; + MatchingJointActions(curAgent+1, indIndices); + } + } + else + MatchingJointActions(curAgent+1, indIndices); + } +} + +void ParserDPOMDPFormat_Spirit::WildCardJointObservation::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_PARSE ) + cout << "WildCardJointObservation: _m_lp_JOI.size()="<<_m_po->_m_lp_JOI.size(); + //this (can) contain elements from a failed indiv_action parse. + _m_po->_m_lp_JO.clear(); + _m_po->_m_lp_JOI.clear(); + //this is different from how we handle joint actions: joint actions are + //immediately expanded (i.e. '*'/ANY_INDEX is replaced by all matching + //indices.) + //For joint observations, this is not practical, as for most common reward + //forms, this is not required. I.e., one will typically specify + //R: ja : s : * : * : prob + //then we want to call addReward(ja,s) and not expand the succesor states + //and joint observations... + + _m_po->_m_lp_JOI.push_back(_m_po->_m_anyJOIndex); +} + +/* +void ParserDPOMDPFormat_Spirit::StoreLPJointObservation::operator()(const unsigned int&) const +{ + Store(); +} +void ParserDPOMDPFormat_Spirit::StoreLPJointObservation::operator()(iterator_t str, iterator_t end) const +{ + Store(); +} +*/ +void ParserDPOMDPFormat_Spirit::StoreLPJointObservation::Store() const +{ + _m_po->_m_lp_JOI.clear(); + if(_m_isJointObservationIndex) //as opposed to parsing individual indices + { + //_m_lp_JO not necessary: clear it. + _m_po->_m_lp_JO.clear(); + //do check that the last parsed element was a index (uint) + if (_m_po->_m_lp_type != UINT) + { + stringstream msg; + msg << "StoreLPJointObservation::Store() requires that last parsed element is a UINT! (last parsed index was a joint action index)"<< endl; + throw EParse(msg); + } + _m_po->_m_lp_JOI.push_back(_m_po->_m_lp_uint); + return; + } + MatchingJointObservations(0,_m_po->_m_lp_JO); + _m_po->_m_lp_JO.clear(); +} +/* +void ParserDPOMDPFormat_Spirit::StoreLPJointObservation::operator()(iterator_t str, iterator_t end) const +{ + _m_po->_m_lp_JOI.clear(); + if(_m_isJointObservationIndex) //as opposed to parsing individual indices + { + //_m_lp_JO not necessary: clear it. + _m_po->_m_lp_JO.clear(); + _m_po->_m_lp_JOI.push_back(_m_po->_m_lp_int); + return; + } + try{ MatchingJointObservations(0,_m_po->_m_lp_JO); } + catch(E e) + { + stringstream ermsg; + ermsg << e.SoftPrint() << endl << "(at " << str.get_position() << ")"<_m_lp_JO.clear(); +} +*/ +void ParserDPOMDPFormat_Spirit::StoreLPJointObservation::MatchingJointObservations (Index curAgent, vector indIndices) const +{ + if(_m_po->_m_nrA != indIndices.size()) + { + stringstream msg; + msg << "individual obs indices vector has wrong size. ("<_m_nrA << ")" << "at(" << + _m_po->_m_first->get_position() << ")" << endl;; + throw EParse(msg); + } + if(curAgent == _m_po->_m_nrA) //past last agent: all work done + { + Index joI = 0; + try + { + joI = _m_po->GetDecPOMDPDiscrete()->IndividualToJointObservationIndices( + indIndices); + _m_po->_m_lp_JOI.push_back(joI); + } + catch(E e) + { + stringstream ss; ss <_m_first->get_position() << + ")" << endl; + throw EParse(ss); + } + if(DEBUG_PARSE) + { + cout<<"MatchingJointObservations: joint observation index for < "; + vector::iterator it = indIndices.begin(); + vector::iterator last = indIndices.end(); + while(it != last){ cout << *it <<" "; it++;} + cout << "> = "<_m_anyJOIndex) + { + //do for each action of this agent + size_t nrObs = _m_po->GetDecPOMDPDiscrete()->GetNrObservations(curAgent); + for(Index oi=0; oi < nrObs; oi++) + { + indIndices[curAgent] = oi; + MatchingJointObservations(curAgent+1, indIndices); + } + } + else + MatchingJointObservations(curAgent+1, indIndices); + } +} + + +void ParserDPOMDPFormat_Spirit::StoreLPFromState::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_PARSE) + cout << "StoreLPFromState: pushing "<_m_lp_sI"<_m_lp_fromSI.clear(); + if(_m_po->_m_lp_type == UINT) + { + unsigned int index = _m_po->_m_lp_uint; + size_t nrS = _m_po->GetDecPOMDPDiscrete()->GetNrStates(); + if(index >= nrS) + { + stringstream ss; ss<<"StoreLPFromState: '"<< index<< "' is not a valid state index!? Number of states is " << nrS <<" (at " <_m_lp_fromSI.push_back( _m_po->_m_lp_uint ); + } + else if(_m_po->_m_lp_type == STRING) + { + try + { + Index sI = _m_po->GetDecPOMDPDiscrete()->GetStateIndexByName( + _m_po->_m_lp_string); + _m_po->_m_lp_fromSI.push_back(sI); + } + catch(E e) + { + stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_fromSI.push_back(_m_po->_m_anyStateIndex); + else + throw E("StoreLPFromState expected that the last parsed type is a state index(uint), state name (string) or wilcard ('*')."); +} + + +void ParserDPOMDPFormat_Spirit::StoreLPToState::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_PARSE) + cout << "StoreLPToState: pushing "<_m_lp_sI"<_m_lp_toSI.clear(); + if(_m_po->_m_lp_type == UINT) + { + unsigned int index = _m_po->_m_lp_uint; + size_t nrS = _m_po->GetDecPOMDPDiscrete()->GetNrStates(); + if(index >= nrS) + { + stringstream ss; ss<<"StoreLPToState: '"<< index<< "' is not a valid state index!? Number of states is " << nrS <<" (at " <_m_lp_toSI.push_back( _m_po->_m_lp_uint ); + } + else if(_m_po->_m_lp_type == STRING) + { + try + { + Index sI = _m_po->GetDecPOMDPDiscrete()->GetStateIndexByName( + _m_po->_m_lp_string); + _m_po->_m_lp_toSI.push_back(sI); + } + catch(E e) + { + stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_toSI.push_back(_m_po->_m_anyStateIndex); + else + throw E("StoreLPToState expected that the last parsed type is a state index(uint), state name (string) or wilcard ('*')."); +} + +void ParserDPOMDPFormat_Spirit::ProcessTProb::operator()(iterator_t str, iterator_t end) const +{ + if(_m_po->_m_lp_type != DOUBLE) + { + stringstream ss; ss<<"ProcessTProb:last parsed type should be a double. (at " <_m_lp_double; + if( _m_po->_m_lp_fromSI.size() != 1 || + _m_po-> _m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessTProb:_m_lp_fromSI or _m_lp_toSI has size != 1. (at " <_m_lp_fromSI[0]; + Index sucSI = _m_po-> _m_lp_toSI[0]; + Index jaI = 0; + /* redundant - this simple setting is dealt with appropriately with the complex situation + * code below. + if( sI != _m_po->_m_anyStateIndex && + sucSI != _m_po->_m_anyStateIndex && + _m_po->_m_lp_JAI.size() == 1 ) + { + jaI = _m_po->_m_lp_JAI[0]; + _m_po->GetDecPOMDPDiscrete()->SetTransitionProbability(sI, jaI, + sucSI, prob); + if(DEBUG_PARSE) + cout << "Setting T(sI="<_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + if( sucSI == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + } + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetDecPOMDPDiscrete()->SetTransitionProbability(sI, jaI, sucSI, + prob); + if(DEBUG_PARSE) + cout << "Setting T("<_m_lp_fromSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessTRow:_m_lp_fromSI has size != 1. (at " <IsRowMatrixLP() ) + { + stringstream ss; ss<<"ProcessTRow: a row matrix should have been parsed. (at " < row = _m_po->_m_curMatrix[0]; + size_t nrS = _m_po->GetDecPOMDPDiscrete()->GetNrStates(); + if( row.size() != nrS) + { + stringstream ss; ss<<"ProcessTRow: exected a row matrix with nrStates="<_m_lp_fromSI[0]; + Index jaI = 0; + if( sI == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + _m_po->GetDecPOMDPDiscrete()->SetTransitionProbability + (sI, jaI, sucSI, row[sucSI]); + if(DEBUG_PARSE) + cout << "Setting T("<::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + double prob = _m_po->_m_curMatrix[sI][sucSI]; + _m_po->GetDecPOMDPDiscrete()->SetTransitionProbability + (sI, jaI, sucSI, prob); + if(DEBUG_PARSE) + cout << "Setting T("<_m_lp_type = "<<_m_po->_m_lp_type <_m_lp_type != DOUBLE) + { + stringstream ss; ss<<"ProcessOProb:last parsed type should be a double. (at " <_m_lp_double; + if( _m_po-> _m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessOProb: _m_lp_toSI has size != 1. (at " <_m_lp_JOI.size() <= 0 ) + { + stringstream ss; ss<<"ProcessOProb: _m_lp_JOI has size <= 0. (at " < _m_lp_toSI[0]; + Index joI = _m_po->_m_lp_JOI[0]; + if( sucSI == _m_po->_m_anyStateIndex ) //if '*' -> replace by a list of all state indices + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + } + if( joI == _m_po->_m_anyJOIndex ) //if '*' -> replace by list of all joint obs.indices + { + _m_po->_m_lp_JOI.clear(); + for(Index jo=0; jo<_m_po->GetDecPOMDPDiscrete()->GetNrJointObservations();jo++) + _m_po->_m_lp_JOI.push_back(jo); + } + + vector::iterator joI_it = _m_po->_m_lp_JOI.begin(); + vector::iterator joI_last = _m_po->_m_lp_JOI.end(); + while(joI_it != joI_last) + { + joI = *joI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po-> + _m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po-> + _m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetDecPOMDPDiscrete()->SetObservationProbability + (jaI, sucSI, joI, prob); + if(DEBUG_PARSE) + cout << "Setting O("<IsRowMatrixLP() ) + { + stringstream ss; ss<<"ProcessORow: a row matrix should have been parsed. (at " < row = _m_po->_m_curMatrix[0]; + size_t nrS = _m_po->GetDecPOMDPDiscrete()->GetNrStates(); + size_t nrJO = _m_po->GetDecPOMDPDiscrete()->GetNrJointObservations(); + if( row.size() != nrJO) + { + stringstream ss; ss<<"ProcessORow: exected a row matrix with nrJO="<_m_lp_toSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < nrS; si++) + _m_po->_m_lp_toSI.push_back(si); + } + vector::iterator sI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_toSI.end(); + while(sI_it != sI_last) + { + Index sucSI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + for(Index joI=0; joI < nrJO; joI++) + { + _m_po->GetDecPOMDPDiscrete()->SetObservationProbability + (jaI, sucSI, joI, row[joI]); + if(DEBUG_PARSE) + cout << "Setting O("<::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + double prob = _m_po->_m_curMatrix[sucSI][joI]; + _m_po->GetDecPOMDPDiscrete()->SetObservationProbability + (jaI, sucSI, joI, prob); + if(DEBUG_PARSE) + cout << "Setting O("<_m_lp_type = "<<_m_po->_m_lp_type <_m_lp_type != DOUBLE) + { + stringstream ss; ss<<"ProcessR:last parsed type should be a double. (at " <_m_lp_double; + if( _m_po-> _m_lp_fromSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessR: _m_lp_fromSI has size != 1. (at " < _m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessR: _m_lp_toSI has size != 1. (at " <GetDecPOMDPDiscrete()->GetNrStates(); + //size_t nrJO = _m_po->GetDecPOMDPDiscrete()->GetNrJointObservations(); + Index sucSI = _m_po-> _m_lp_toSI[0]; + Index joI = _m_po->_m_lp_JOI[0]; + if( sucSI == _m_po->_m_anyStateIndex && joI == _m_po->_m_anyJOIndex) + { + // rewards of the form R: ja : s : * : * : %f, so we can simply do + // AddReward() + + if(_m_po->_m_lp_fromSI[0] == _m_po->_m_anyStateIndex) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < nrS; si++) + _m_po->_m_lp_fromSI.push_back(si); + } + + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + _m_po->GetDecPOMDPDiscrete()->SetReward(sI, jaI, reward); + if(DEBUG_PARSE) + cout << "Setting R("<_m_anyJOIndex ) //but end-state is not ANY_INDEX + { + if( _m_po->_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } +/* Obsolete test... + if( sucSI == ANY_INDEX ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + }*/ + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetDecPOMDPDiscrete()->SetReward(sI, jaI, sucSI, reward); + if(DEBUG_PARSE) + cout << "Setting R("<_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + if( _m_po->_m_lp_toSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + } + vector::iterator joI_it = _m_po->_m_lp_JOI.begin(); + vector::iterator joI_last = _m_po->_m_lp_JOI.end(); + while(joI_it != joI_last) + { + joI = *joI_it; + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetDecPOMDPDiscrete()->SetReward(sI, jaI, sucSI,joI, + reward); + if(DEBUG_PARSE) + cout << "Setting R("<_m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessRRow:_m_lp_fromSI has size != 1. (at " <IsRowMatrixLP() ) + { + stringstream ss; ss<<"ProcessRRow: a row matrix should have been parsed. (at " < row = _m_po->_m_curMatrix[0]; + size_t nrS = _m_po->GetDecPOMDPDiscrete()->GetNrStates(); + size_t nrJO = _m_po->GetDecPOMDPDiscrete()->GetNrJointObservations(); + if( row.size() != nrJO) + { + stringstream ss; ss<<"ProcessRRow: exected a row matrix with nrJO="<_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + if( _m_po->_m_lp_toSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < nrS; si++) + _m_po->_m_lp_toSI.push_back(si); + } + + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + Index sucSI = *sucSI_it; + for(Index joI=0; joI < nrJO; joI++) + { + _m_po->GetDecPOMDPDiscrete()->SetReward(sI, jaI, sucSI,joI, + row[joI]); + if(DEBUG_PARSE) + cout << "Setting R("<IsDimensionOfMatrix(nrS,nrJO)) + { + stringstream ss; ss << "Expected a nrS x nrJO matrix."<< + " (nrS="<< nrS <<", nrJO="<_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetDecPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + for(Index sucSI=0; sucSIGetDecPOMDPDiscrete()->SetReward(sI, jaI, sucSI,joI, + _m_po->_m_curMatrix[sucSI][joI]); + if(DEBUG_PARSE) + { + cout << "Setting R(" << sI << "," << jaI <<"," << sucSI; + cout << "," << joI << ") = " << + _m_po->_m_curMatrix[sucSI][joI] << endl; + } + } + } + jaI_it++; + } + sI_it++; + } +} + + +}// end namespace DPOMDPFormatParsing diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.h b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.h new file mode 100644 index 000000000..ecf07d18e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserDPOMDPFormat_Spirit.h @@ -0,0 +1,1044 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * + * For contact information please see the included AUTHORS file. + */ + +#ifndef _PARSERDECPOMDPDISCRETE_H_ +#define _PARSERDECPOMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" +#include "DecPOMDPDiscrete.h" +#include "EParse.h" + +#include "boost/version.hpp" + +#if USE_BOOST_SPIRIT_CLASSIC +#include "boost/spirit/include/classic_core.hpp" +#include "boost/spirit/include/classic_iterator.hpp" +#else +#include "boost/spirit/core.hpp" +#include "boost/spirit/iterator/file_iterator.hpp" +#include "boost/spirit/iterator/position_iterator.hpp" +#endif + +#include "CommentOrBlankParser.h" +#include "ParserInterface.h" + + +using namespace boost::spirit; + +/* constants */ +//use subgrammars (or not?) +#define SUBGRAMMAR 0 + +//General parsing debug informations +#define DEBUG_PARSE 0 + +//debugging the 'comment' parser: +#define DEBUG_COMPARS 0 +//debugging the 'comment or blank parser' : +#define DEBUG_COBP 0 + +// the 'any' individual action to denote the wildcard '*' +#define ANY_INDEX -1 + + +namespace DPOMDPFormatParsing{ + +/**Outputs the file_position structure info (gotten from + * postion_iterator::get_position() ). */ +std::ostream& operator<<(std::ostream& out, file_position const& lc); + + + +/**ParserDPOMDPFormat_Spirit is a parser for DecPOMDPDiscrete. + * That is, it parses the .dpomdp file format. + * \todo TODO: CHANGE NAME? */ +class ParserDPOMDPFormat_Spirit : + public ParserInterface +{ + typedef char char_t; + typedef file_iterator iterator_t_fi; + typedef position_iterator iterator_t; + typedef scanner scanner_t; + typedef rule rule_t; + + //used to now what has been parsed: + enum parsed_t { INT, DOUBLE, UINT, STRING, ASTERICK, UNIFORM, IDENTITY }; + private: + + // TODO + DecPOMDPDiscrete* _m_decPOMDPDiscrete; + + DecPOMDPDiscrete* GetDecPOMDPDiscrete() + { + return _m_decPOMDPDiscrete; + } + + struct Initialize + { + ParserDPOMDPFormat_Spirit* _m_po; + Initialize (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->_m_first = &str; + } + }; + + struct DebugOutputNoParsed + { + std::string s; + DebugOutputNoParsed (std::string s2){s = s2;} + void operator()(iterator_t str, iterator_t end) const; + void operator()(const unsigned int&) const; + void operator()(const double &) const; + }; + struct DebugOutput + { + std::string s; + DebugOutput (std::string s2){s = s2;} + void operator()(iterator_t str, iterator_t end) const; + void operator()(const int&) const; + void operator()(const unsigned int&) const; + void operator()(const double&) const; + }; + struct StoreLastParsedElement + { + ParserDPOMDPFormat_Spirit* _m_po; + StoreLastParsedElement(ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(const int i) const; + void operator()(const unsigned int i) const; + void operator()(const double f) const; + void operator()(iterator_t str, iterator_t end) const; + }; + struct SetLastParsedType + { + ParserDPOMDPFormat_Spirit* _m_po; + parsed_t _m_lastParsedType; + SetLastParsedType (ParserDPOMDPFormat_Spirit* po, parsed_t lpt) + { + _m_po = po; + _m_lastParsedType = lpt; + } + void operator()(iterator_t str, iterator_t end) const + { + _m_po->_m_lp_type = _m_lastParsedType; + } + }; + + struct NYI//Not Yet Implemented + { + std::string msg; + NYI(std::string s){msg = s;} + void operator()(iterator_t str, iterator_t end) const + { + file_position fp = str.get_position(); + std::stringstream ermsg; + ermsg << "sorry, \""<< msg <<"\" is not yet implemented." + << std::endl << "(at " << fp << ")"<_m_curAI = _m_temp_stor; + } + }; + struct SetNextAgentIndex + { + ParserDPOMDPFormat_Spirit* _m_po; + SetNextAgentIndex (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_curAI < _m_po->_m_nrA) + _m_po->_m_curAI++; + else + std::cout << "SetNextAgentIndex - ERROR: current agent index ("<< + _m_po->_m_curAI<<") out of bounds (number of agents="<< + _m_po->_m_nrA<<")"<_m_curMatrix.push_back( std::vector() ); + } + }; + struct NextFloatOfRow + { + ParserDPOMDPFormat_Spirit* _m_po; + NextFloatOfRow (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_lp_type != DOUBLE) + { + std::cout <<"NextFloatOfRow - ERROR parsing probabilities, which" + <<" means that doubles are expected, however last found" + <<" type is #"<<_m_po->_m_lp_type<_m_curMatrix.size(); + std::cout <<" _m_po->_m_curMatrix.size()= " << s <0) + std::cout <<" _m_po->_m_curMatrix.back().size()= " << + _m_po->_m_curMatrix.back().size()<_m_curMatrix.size(); + if(s > 0) + { + double d = _m_po->_m_lp_double; + //_m_curMatrix.back() isa vector + _m_po->_m_curMatrix.back().push_back(d); + _m_po->_m_matrixModified = true; + } + else + std::cout << "NextFloatOfRow - ERROR _m_curMatrix contains no "<< + "elements (i.e. there are no rows to add to...)."; + + if(DEBUG_PARSE){//DEBUG + size_t s =_m_po->_m_curMatrix.size(); + std::cout <<" _m_po->_m_curMatrix.size()= " << s <0) + std::cout <<" _m_po->_m_curMatrix.back().size()= " << + _m_po->_m_curMatrix.back().size()<_m_curIdentList.push_back(std::string(str,end)); + _m_po->_m_identListModified = true; + + } + }; + struct SetNrStates + { + ParserDPOMDPFormat_Spirit* _m_po; + size_t _m_target; + bool _m_haveTarget; + SetNrStates (ParserDPOMDPFormat_Spirit* po){ + _m_po = po; + _m_target = 0; + _m_haveTarget = false; + } + SetNrStates (ParserDPOMDPFormat_Spirit* po, size_t i){ + _m_po = po; + _m_target = i; + _m_haveTarget = true; + } + void operator()(const size_t& i) const + { + if(_m_haveTarget) + _m_po->GetDecPOMDPDiscrete()->SetNrStates(_m_target); + else + _m_po->GetDecPOMDPDiscrete()->SetNrStates( i ); + + } + void operator()(iterator_t str, iterator_t end) const + { + if(_m_haveTarget) + _m_po->GetDecPOMDPDiscrete()->SetNrStates(_m_target); + else if(_m_po->_m_lp_type == UINT)// || _m_lp_type + _m_po->GetDecPOMDPDiscrete()->SetNrStates(_m_po->_m_lp_uint); + else + throw E("SetNrStates: no target value set and last parsed data type != UINT..."); + } + }; + //this processes the start state specification in case of a specified + //probability vector and in case of the keyword uniform. + struct StartStateRowProbs + { + ParserDPOMDPFormat_Spirit* _m_po; + StartStateRowProbs (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_lp_type == UNIFORM) + { + _m_po->GetDecPOMDPDiscrete()->SetUniformISD(); + return; + } + + //check size - it should be a row matrix... + if( !_m_po->IsRowMatrixLP() ) + throw E("StartStateRowProbs: _m_po->_m_curMatrix should \ + be a row vector!"); + if(_m_po->_m_curMatrix[0].size() != _m_po->GetDecPOMDPDiscrete()-> + GetNrStates()) + { + std::string err = "StartStateRowProbs: _m_po->_m_curMatrix"; + err += " [0] should contain NrStates(="; + std::stringstream ss; + ss << "StartStateRowProbs: _m_po->_m_curMatrix[0] " << + "should contain NrStates(=" << _m_po->GetDecPOMDPDiscrete()-> + GetNrStates() << ") entries! (not "<< + _m_po->_m_curMatrix[0].size()<<")\n"; + + throw E( ss.str().c_str() ); + } + + StateDistributionVector *isd=new StateDistributionVector(_m_po->_m_curMatrix[0]); + _m_po->GetDecPOMDPDiscrete()->SetISD(isd); + } + }; + //this adds states to the start state list - the first stage in + //processing the other ways of specifying the start state distribution. + struct AddStartState + { + ParserDPOMDPFormat_Spirit* _m_po; + AddStartState (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_lp_type != STRING) + { + std::stringstream ss; ss << "SetStartState::operator()(iterator_t str, iterator_t end) - expected a string as last parsed type! (at"<GetDecPOMDPDiscrete()->GetStateIndexByName( + _m_po->_m_lp_string); + _m_po->_m_startStateListSI.push_back(sI); + } + catch(E e) + { + std::stringstream ss; ss << e.SoftPrint() << "(at"<< + str.get_position() <<")"<_m_lp_type != UINT) + { + std::stringstream ss; ss << "SetStartState::operator()(const unsigned int i&) - expected a UINT as last parsed type! (at"<< _m_po->_m_first->get_position() <<")"<_m_startStateListSI.push_back( _m_po->_m_lp_uint); + } + }; + //this functor specified that the states in the start state list should + //be excluded, not included. + struct StartStateExludes + { + ParserDPOMDPFormat_Spirit* _m_po; + StartStateExludes (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->_m_startStateListExclude = true; + } + }; + struct ProcessStartStateList + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessStartStateList (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(const unsigned int& i) const + { + ProcessList(); + } + void operator()(iterator_t str, iterator_t end) const + { + ProcessList(); + } + void ProcessList() const + { + size_t nrS = _m_po->GetDecPOMDPDiscrete()->GetNrStates(); + size_t listSize = _m_po->_m_startStateListSI.size(); + size_t nrIncS = (_m_po->_m_startStateListExclude)? nrS - listSize: + listSize; + double u_prob = 1.0 / nrIncS; + + std::vector init_probs; + if(!_m_po->_m_startStateListExclude) + //elems in list get uniform prob. + init_probs = std::vector(nrS, 0.0); + else //other elems get uniform prob + init_probs = std::vector(nrS, u_prob); + + std::vector::iterator it = _m_po->_m_startStateListSI.begin(); + std::vector::iterator last = _m_po->_m_startStateListSI.end(); + while(it != last) + { + init_probs[*it] = _m_po->_m_startStateListExclude? + 0.0 : u_prob; + it++; + } + StateDistributionVector *isd=new StateDistributionVector(init_probs); + _m_po->GetDecPOMDPDiscrete()->SetISD(isd); + init_probs.clear(); + _m_po->_m_startStateListSI.clear(); + + } + }; + struct SetNrActions + { + ParserDPOMDPFormat_Spirit* _m_po; + SetNrActions (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(DEBUG_PARSE) + { + std::string s(str,end); + std::cout << "SetNrActions: setting "<<_m_po->_m_lp_uint<< + " actions for agent "<<_m_po->_m_curAI<GetDecPOMDPDiscrete()->SetNrActions(_m_po->_m_curAI, + _m_po->_m_lp_uint); + + } + void operator()(const unsigned int&) const + { + _m_po->GetDecPOMDPDiscrete()->SetNrActions(_m_po->_m_curAI, + _m_po->_m_lp_uint); + if(DEBUG_PARSE) + std::cout << "SetNrActions: set "<<_m_po->_m_lp_uint<< + " actions for agent "<<_m_po->_m_curAI<GetDecPOMDPDiscrete()->AddAction(_m_po->_m_curAI, s); + if(DEBUG_PARSE) + std::cout << "AddAction: added action "<_m_curAI<GetDecPOMDPDiscrete()->SetNrObservations(_m_po->_m_curAI, + _m_po->_m_lp_uint); + if(DEBUG_PARSE) + std::cout << "SetNrObservations: set "<<_m_po->_m_lp_uint<< + " observations for agent "<<_m_po->_m_curAI<GetDecPOMDPDiscrete()->SetNrObservations(_m_po->_m_curAI, + _m_po->_m_lp_uint); + if(DEBUG_PARSE) + std::cout << "SetNrObservations: set "<<_m_po->_m_lp_uint<< + " observations for agent "<<_m_po->_m_curAI<GetDecPOMDPDiscrete()->AddObservation(_m_po->_m_curAI, s); + if(DEBUG_PARSE) + std::cout << "AddObservation: added action "<_m_curAI<_m_anyStateIndex = _m_po->GetDecPOMDPDiscrete()->GetNrStates(); + _m_po->GetDecPOMDPDiscrete()->SetStatesInitialized(true); + + } + }; + + struct InitializeActions + { + ParserDPOMDPFormat_Spirit* _m_po; + InitializeActions (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetDecPOMDPDiscrete()->ConstructJointActions(); + _m_po->GetDecPOMDPDiscrete()->SetActionsInitialized(true); + _m_po->_m_anyJAIndex = _m_po->GetDecPOMDPDiscrete()-> + GetNrJointActions(); + } + }; + struct InitializeObservations + { + ParserDPOMDPFormat_Spirit* _m_po; + InitializeObservations (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetDecPOMDPDiscrete()->ConstructJointObservations(); + _m_po->GetDecPOMDPDiscrete()->SetObservationsInitialized(true); + _m_po->_m_anyJOIndex = _m_po->GetDecPOMDPDiscrete()-> + GetNrJointObservations(); + } + }; + + struct AddModels + { + ParserDPOMDPFormat_Spirit* _m_po; + AddModels (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + DecPOMDPDiscrete* p = _m_po->GetDecPOMDPDiscrete(); + // add the transition model + p->CreateNewTransitionModel(); + p->CreateNewObservationModel(); + p->CreateNewRewardModel(); + //if(DEBUG_PARSE) _m_po->GetDecPOMDPDiscrete()->PrintInfo(); + } + + }; + struct StoreLPAction + { + ParserDPOMDPFormat_Spirit* _m_po; + StoreLPAction (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + + if(DEBUG_PARSE) + std::cout << "StoreLPAction: pushing "<_m_lp_type == UINT) + { + unsigned int index = _m_po->_m_lp_uint; + if(index >= _m_po->GetDecPOMDPDiscrete()->GetNrJointActions() ) + { + std::stringstream ss; ss<<"StoreLPAction: '"<< index<< "' is not valid!? Number of actions is " << _m_po->GetDecPOMDPDiscrete()->GetNrJointActions() <<" (at " <_m_lp_JA.push_back(_m_po->_m_lp_uint); + } + else if(_m_po->_m_lp_type == STRING) + { + //make sure that _m_lp_JA is cleared after each joint action + //so we can do this: + Index curAgIndex = _m_po->_m_lp_JA.size(); + try + { + Index aI = _m_po->GetDecPOMDPDiscrete()->GetActionIndexByName( + _m_po->_m_lp_string, curAgIndex); + _m_po->_m_lp_JA.push_back(aI); + } + catch(E e) + { + std::stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_JA.push_back(_m_po->_m_anyJAIndex); + else + throw E("StoreLPAction expected that the last parsed type is a action index(uint), action name (string) or wilcard ('*')."); + } + }; + /**called before StoreLPJointAction in case of a wildcard '*' joint + * action. Effectively, this functions sets _m_lp_JA to a vector of + * _m_po->_m_anyJAIndexs (one for each agent) .*/ + struct WildCardJointAction + { + ParserDPOMDPFormat_Spirit* _m_po; + WildCardJointAction (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + }; + struct StoreLPJointAction + { + ParserDPOMDPFormat_Spirit* _m_po; + bool _m_isJointActionIndex; + StoreLPJointAction (ParserDPOMDPFormat_Spirit* po, bool b = false) + { + _m_po = po; + _m_isJointActionIndex = b; + } + void operator()(iterator_t str, iterator_t end) const{Store();} + void operator()(const unsigned int&) const{Store();} + void Store() const; + /**This function constructs the vector (_m_lp_JAI) of joint actions + * that match with the vector with individual action indices. + * + * This is needed to properly deal with wild cards; a single action specification + * in the (d)pomdp file may correspond to manu (joint) actions. + */ + void MatchingJointActions (Index curAgent, std::vector indIndices) const; + + }; + struct StoreLPObservation + { + ParserDPOMDPFormat_Spirit* _m_po; + StoreLPObservation (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + + if(DEBUG_PARSE) + std::cout << "StoreLPObservation: pushing "<_m_lp_type == UINT) + _m_po->_m_lp_JO.push_back(_m_po->_m_lp_uint); + else if(_m_po->_m_lp_type == STRING) + { + //make sure that _m_lp_JO is cleared after each joint action + //so we can do this: + Index curAgIndex = _m_po->_m_lp_JO.size(); + try + { + Index aI = _m_po->GetDecPOMDPDiscrete()->GetObservationIndexByName( + _m_po->_m_lp_string, curAgIndex); + _m_po->_m_lp_JO.push_back(aI); + } + catch(E e) + { + std::stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_JO.push_back(_m_po->_m_anyJOIndex); + else + throw E("StoreLPObservation expected that the last parsed type is a action index(uint), action name (string) or wilcard ('*')."); + } + }; + /**called before StoreLPJointObservation in case of a wildcard '*' joint + * action. Effectively, this functions sets _m_lp_JO to a vector of + * ANY_INDEXs (=_m_anyJOIndex) (one for each agent) .*/ + struct WildCardJointObservation + { + ParserDPOMDPFormat_Spirit* _m_po; + WildCardJointObservation (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + }; + struct StoreLPJointObservation + { + ParserDPOMDPFormat_Spirit* _m_po; + bool _m_isJointObservationIndex; + StoreLPJointObservation (ParserDPOMDPFormat_Spirit* po, bool b = false) + { + _m_po = po; + _m_isJointObservationIndex = b; + } + void operator()(iterator_t str, iterator_t end) const{Store();} + void operator()(const unsigned int&) const{Store();} + void Store() const; + + /**This function constructs the vector (_m_lp_JOI) of joint actions + * that match with the vector with individual action indices. + * + * This is needed to properly deal with wild cards; a single specification + * in the (d)pomdp file may correspond to many (joint) observations. + * */ + void MatchingJointObservations(Index curAgent, std::vector indIndices) const; + + }; + struct StoreLPFromState + { + ParserDPOMDPFormat_Spirit* _m_po; + StoreLPFromState (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct StoreLPToState + { + ParserDPOMDPFormat_Spirit* _m_po; + StoreLPToState (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessTProb + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessTProb (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessTRow + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessTRow (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessTMatrix + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessTMatrix (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessOProb + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessOProb (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessORow + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessORow (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessOMatrix + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessOMatrix (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessR + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessR (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessRRow + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessRRow (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessRMatrix + { + ParserDPOMDPFormat_Spirit* _m_po; + ProcessRMatrix (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct InitializeDPOMDP + { + ParserDPOMDPFormat_Spirit* _m_po; + InitializeDPOMDP (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetDecPOMDPDiscrete()->SetInitialized(true); + } + }; + struct SetNrAgents + { + ParserDPOMDPFormat_Spirit* _m_po; + SetNrAgents(ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const; + void operator()(const int&) const; + }; + struct AddAgents + { + ParserDPOMDPFormat_Spirit* _m_po; + AddAgents(ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const + { + _m_po->GetDecPOMDPDiscrete()->SetNrAgents(0); + std::vector::iterator it = _m_po->_m_curIdentList.begin(); + std::vector::iterator l = _m_po->_m_curIdentList.end(); + while(it != l) + { + _m_po->GetDecPOMDPDiscrete()->AddAgent(*it); + it++; + _m_po->_m_nrA++; + } + _m_po->ClearCurIdentList(); + } + }; + //discount param + static void dp_number(iterator_t str, iterator_t end); + struct dp_SetDiscountParam{ + ParserDPOMDPFormat_Spirit* _m_po; + dp_SetDiscountParam(ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const + { + std::string s(first, last); + if(DEBUG_PARSE){std::cout << "discount="<_m_lp_type == DOUBLE ) + { + if(DEBUG_PARSE){std::cout <<"last parsed number was a DOUBLE"<< + "(_m_po->_m_lp_double = "<<_m_po->_m_lp_double<<")\n";} + discount = _m_po->_m_lp_double; + } + else if (_m_po->_m_lp_type == INT) + { + if(DEBUG_PARSE){std::cout <<"last parsed number was a INT"<< + "(_m_po->_m_lp_int = "<<_m_po->_m_lp_int<<")\n";} + //the discount was parsed as an int + discount = (double) _m_po->_m_lp_int; + } + else if (_m_po->_m_lp_type == UINT) + { + if(DEBUG_PARSE){std::cout <<"last parsed number was a UINT"<< + "(_m_po->_m_lp_uint = "<<_m_po->_m_lp_uint<<")\n";} + //the discount was parsed as an int + discount = (double) _m_po->_m_lp_uint; + } + + if(DEBUG_PARSE){std::cout <<"dp_SetDiscountParam - discount="<< + discount<GetDecPOMDPDiscrete()->SetDiscount(discount); + } + }; + //value param + static void vp_value_tail(iterator_t str, iterator_t end); + struct vt_REWARDTOK + { + ParserDPOMDPFormat_Spirit* _m_po; + vt_REWARDTOK (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetDecPOMDPDiscrete()->SetRewardType(REWARD); + } + }; + struct vt_COSTTOK + { + ParserDPOMDPFormat_Spirit* _m_po; + vt_COSTTOK (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetDecPOMDPDiscrete()->SetRewardType(COST); + } + }; + //This is the functor which is called from the grammar (i.e. + //dpomdp.spirit) to add the parsed state (names) to the problem. + struct AddState + { + ParserDPOMDPFormat_Spirit* _m_po; + AddState(ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const + { + if(DEBUG_PARSE){std::cout << "AddState - adding state \""<< + _m_po->_m_lp_string<<"\"\n";} + _m_po->GetDecPOMDPDiscrete()->AddState(_m_po->_m_lp_string); + } + }; + + struct DecPOMDPFileParser : public grammar + { + ParserDPOMDPFormat_Spirit* _m_parserObject; + DecPOMDPDiscrete* _m_problem; + DecPOMDPFileParser(ParserDPOMDPFormat_Spirit* parserObject) + { + _m_parserObject = parserObject; + _m_problem = _m_parserObject->GetDecPOMDPDiscrete(); + } + template + struct definition + { +#if SUBGRAMMAR +#include "sub_grammar_defs.h" + agentstok_parser AGENTSTOK; + discounttok_parser DISCOUNTTOK; + valuestok_parser VALUESTOK; + statestok_parser STATESTOK; + actionstok_parser ACTIONSTOK; + observationstok_parser OBSERVATIONSTOK; + ttok_parser TTOK; + otok_parser OTOK; + rtok_parser RTOK; + //the parsers that need a reference: + uniformtok_parser UNIFORMTOK; + inttok_parser INTTOK; +#endif + definition(DecPOMDPFileParser const& self) +#if SUBGRAMMAR + : + INTTOK(self._m_parserObject), + UNIFORMTOK(self._m_parserObject) +#endif + { +#include "dpomdp.spirit" + } + +#if SUBGRAMMAR == 0 +rule AGENTSTOK, DISCOUNTTOK,VALUESTOK,STATESTOK,ACTIONSTOK,OBSERVATIONSTOK,TTOK,OTOK,RTOK,UNIFORMTOK, INTTOK; +#endif +rule + EOLTOK, IDENTITYTOK,REWARDTOK,COSTTOK,STARTTOK,INCLUDETOK,EXCLUDETOK,RESETTOK,COLONTOK,ASTERICKTOK,PLUSTOK,MINUSTOK,FLOATTOK,STRINGTOK, + dpomdp_file,preamble,agents_param,discount_param,value_param,value_tail,state_param,state_tail,action_param,action_tail,obs_param,obs_tail,start_state,start_state_list,param_list,param_spec,trans_prob_spec,trans_spec_tail,obs_prob_spec,obs_spec_tail,reward_spec,reward_spec_tail,ui_matrix,u_matrix,prob, + action_param_line,obs_param_line,joint_obs,joint_action,state_or_indiv_act_or_obs,agents_tail,prob_row_vector,num_row_vector,reserved_word,state_list, float_r, int_r, action_list,obs_list, floats_matrix, floats_row_vector, dpomdp, from_state, to_state + ; + rule const& + start() const { return dpomdp_file; } + }; + }; + + + protected: + + public: + + // Constructor, destructor and copy assignment. + /// (default) Constructor + ParserDPOMDPFormat_Spirit(DecPOMDPDiscrete* problem=0); + // Copy constructor. + //ParserDPOMDPFormat_Spirit(const ParserDPOMDPFormat_Spirit& a); + // Destructor. + //~ParserDPOMDPFormat_Spirit(); + + //operators: + + //data manipulation (set) functions: + /**The function that starts the parsing.*/ + void Parse(); + + //get (data) functions: + + + ///The last parsed ... + unsigned int _m_lp_uint; + int _m_lp_int; + double _m_lp_double; + std::string _m_lp_string; + ///The type (INT or DOUBLE) of the last parsed number. + parsed_t _m_lp_type; + ///Whether the last 'number' used an optional sign ('+' or '-') + bool _m_lp_number_used_opt_sign; + ///Whether the last optional sign was a '-' + bool _m_lp_number_negated; + + //the number of agents - used often therefore also stored here + size_t _m_nrA; + //the current agent index. + Index _m_curAI; + //The following are special indices denoting any state, (joint) action + //or (joint) observation + Index _m_anyJAIndex; + Index _m_anyJOIndex; + Index _m_anyStateIndex; + + std::vector< std::vector > _m_curMatrix; + bool _m_matrixModified; + struct ResetCurMatrix //TODO: move to proper place + { + ParserDPOMDPFormat_Spirit* _m_po; + ResetCurMatrix (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_matrixModified) + _m_po->ClearCurMatrix(); + } + }; + void ClearCurMatrix()//TODO:move to proper place... + { + _m_curMatrix.clear();//no pointers stored, so this should not leak + _m_curMatrix.push_back( std::vector() ); + _m_matrixModified = false; + } + bool IsDimensionOfMatrix(size_t rows, size_t cols) + { + if(_m_curMatrix.size() != rows + 1) + return false; + for(Index r=0; r _m_curIdentList; + bool _m_identListModified; + struct ResetCurIdentList //TODO: move to proper place + { + ParserDPOMDPFormat_Spirit* _m_po; + ResetCurIdentList (ParserDPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + //TODO:check whether the previous matrix was consistent + if(_m_po->_m_identListModified) + _m_po->ClearCurIdentList(); + } + }; + void ClearCurIdentList()//TODO:move to proper place... + { + _m_curIdentList.clear();//no pointers stored, should not leak + _m_identListModified = false; + } + + /**A vector in which the currently parsed individual action indices + * of a joint action are stored. (by StoreLPAction). + * wild-cards (asterik) are stored as _m_anyJAIndex . */ + std::vector _m_lp_JA; + /**similar for the observations...*/ + std::vector _m_lp_JO; + + /**A vector that stores the indices of joint actions that match the + * last parsed joint action. (if the last parsed joint action didn't + * contain any wildcards, the size of this vector is 1.) + * This vector is constructed by StoreLPJointAction by transforming + * the above vector (_m_lp_JA).*/ + std::vector _m_lp_JAI; + /**similar for the joint observations...*/ + std::vector _m_lp_JOI; + + /**A vector that stores the last parsed from-state index. + * Contrary to _m_lp_JAI above, this vector contains only 1 element,\ + * which can be the ANY_INDEX (_m_anyStateIndex) . This is more + * convenient, as it allows + * easier selection of the proper AddReward and AddObservation + * functions.*/ + std::vector _m_lp_fromSI; + /**idem, but for to-state.*/ + std::vector _m_lp_toSI; + + /**A vector used to store the state indices for the start state + * specification.*/ + std::vector _m_startStateListSI; + /**A boolean indicating whether the states in the start state list + * should be excluded (versus the default: uniform prob. over the + * specified states). */ + bool _m_startStateListExclude; + + /**Pointer to the first iterator. This is used to give informative error messages. + * (Specifically the position of the error.*/ + iterator_t* _m_first; +}; + +}// end namespace DPOMDPFormatParsing + +#endif /* !_PARSERDECPOMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserInterface.h b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserInterface.h new file mode 100644 index 000000000..7a214840e --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserInterface.h @@ -0,0 +1,46 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _PARSERINTERFACE_H_ +#define _PARSERINTERFACE_H_ 1 + +/* the include directives */ +#include + +/// ParserInterface is an interface for parsers. +class ParserInterface +{ +private: + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + ParserInterface(){}; + + virtual ~ParserInterface(){}; + + virtual void Parse() = 0; + +}; + + +#endif /* !_PARSERINTERFACE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.cpp b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.cpp new file mode 100644 index 000000000..855858e2f --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.cpp @@ -0,0 +1,102 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +#include "ParserPOMDPDiscrete.h" +#include + +#if USE_POMDPSOLVE_LIBRARY +extern "C" { +#include "mdp/mdp.h" +} +#endif + +using namespace std; + +ParserPOMDPDiscrete:: +ParserPOMDPDiscrete(POMDPDiscrete *problem) : + _m_problem(problem) +{ +} + +void ParserPOMDPDiscrete::Parse() +{ +#if USE_POMDPSOLVE_LIBRARY + string filename=_m_problem->GetProblemFile(); + + readMDP(const_cast(filename.c_str())); + + _m_problem->SetNrAgents(1); + _m_problem->SetNrStates(gNumStates); + _m_problem->SetDiscount(gDiscount); + if(gValueType==REWARD_value_type) + _m_problem->SetRewardType(REWARD); + else if(gValueType==COST_value_type) + _m_problem->SetRewardType(COST); + + _m_problem->SetNrActions(0,gNumActions); + _m_problem->ConstructJointActions(); + _m_problem->SetActionsInitialized(true); + + _m_problem->SetNrObservations(0,gNumObservations); + _m_problem->ConstructJointObservations(); + _m_problem->SetObservationsInitialized(true); + + vector isdVector(_m_problem->GetNrStates()); + for(Index s=0;s!=_m_problem->GetNrStates();++s) + isdVector.at(s)=gInitialBelief[s]; + + StateDistribution* isd= + new StateDistributionVector(isdVector); + _m_problem->SetISD(isd); + + _m_problem->CreateNewTransitionModel(); + for(Index a=0;a!=_m_problem->GetNrActions();++a) + for(Index s=0;s!=_m_problem->GetNrStates();++s) + for(Index s1=0;s1!=_m_problem->GetNrStates();++s1) + { + // this isn't the fastest way, since it does a lookup + // in the sparse matrix, but at least we don't have to + // mess with the internals + double p=getEntryMatrix(P[a],s,s1); + _m_problem->SetTransitionProbability(s,a,s1,p); + } + + _m_problem->CreateNewObservationModel(); + for(Index a=0;a!=_m_problem->GetNrActions();++a) + for(Index s=0;s!=_m_problem->GetNrStates();++s) + for(Index o=0;o!=_m_problem->GetNrObservations();++o) + { + // this isn't the fastest way, since it does a lookup + // in the sparse matrix, but at least we don't have to + // mess with the internals + double p=getEntryMatrix(R[a],s,o); + _m_problem->SetObservationProbability(a,s,o,p); + } + + + _m_problem->CreateNewRewardModel(); + for(Index a=0;a!=_m_problem->GetNrActions();++a) + for( Index i = Q->row_start[a]; + i < Q->row_start[a] + Q->row_length[a]; + i++ ) + _m_problem->SetReward(Q->col[i],a,Q->mat_val[i]); + + + + _m_problem->SetInitialized(true); +#else + throw(E("ParserPOMDPDiscrete needs to be compiled with USE_POMDPSOLVE_LIBRARY")); +#endif +} diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.h b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.h new file mode 100644 index 000000000..12d70a241 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPDiscrete.h @@ -0,0 +1,53 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * Matthijs Spaan + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _PARSERPOMDPDISCRETE_H_ +#define _PARSERPOMDPDISCRETE_H_ 1 + +/* the include directives */ +#include +#include "Globals.h" + +#include "POMDPDiscrete.h" +#include "ParserInterface.h" + + +/**\brief \deprecated ParserPOMDPDiscrete is a parser for + * POMDPDiscrete that makes use of Tony Cassandra's POMDPsolve to do the parsing.*/ +class ParserPOMDPDiscrete : + public ParserInterface +{ +private: + + POMDPDiscrete *_m_problem; + +protected: + +public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + ParserPOMDPDiscrete(POMDPDiscrete *problem); + + void Parse(); + +}; + + +#endif /* !_PARSERPOMDPDISCRETE_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.cpp b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.cpp new file mode 100644 index 000000000..6fe2f99eb --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.cpp @@ -0,0 +1,1261 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * + * For contact information please see the included AUTHORS file. + */ + +#include "ParserPOMDPFormat_Spirit.h" +//#include "CommentOrBlankParser.h" +#include "CommentBlankOrNewlineParser.h" + +using namespace std; + +namespace POMDPFormatParsing{ + +/**Outputs the file_position structure info (gotten from + * postion_iterator::get_position() ). */ +std::ostream& operator<<(std::ostream& out, file_position const& lc) +{ + return out << + "\nFile:\t" << lc.file << + "\nLine:\t" << lc.line << + "\nCol:\t" << lc.column << endl; +} + + +//Default constructor +ParserPOMDPFormat_Spirit::ParserPOMDPFormat_Spirit(POMDPDiscrete* problem) : +// UNIFORMTOK(this), + _m_decPOMDPDiscrete(problem) +{ + if(DEBUG_POMDP_PARSE) + { + cout << "Creating parser, referring to problem..."; + cout << problem->MultiAgentDecisionProcess::SoftPrint(); + cout << endl; + } + //need to reset the parse matrix the first time... + _m_matrixModified = true; + _m_identListModified = false; + _m_startStateListExclude = false; + _m_nrA = 0; + _m_curAI = 0; + // UNIFORMTOK = uniformtok(this); + +} + +/* +// Actions +void ParserPOMDPFormat_Spirit::echo(iterator_t first, iterator_t const& last) +{ + while (first != last) + std::cout << *first++; +} + +*/ + +// Main program +void ParserPOMDPFormat_Spirit::Parse() +{ + string pf = GetPOMDPDiscrete()->GetProblemFile(); + const char* pf_c = pf.c_str(); + // Create a file iterator for this file + iterator_t_fi first_fi(pf_c); + if (!first_fi) + { + stringstream ss; ss << "Unable to open file: "< info; + + + //+ + // Parse + // Spirit changed when upgrading from 1.33.1 to 1.34.0: + // trailing spaces are no longer accepted, which the next line + // to fail on some problems (those with trailing comments). Adding + // ">> !end_p" to the grammar works around this problem. + info = parse(first, last , pomdpfp >> !end_p, skip_parser); + if (info.full) + { + if(DEBUG_POMDP_PARSE) cout << ">>>Parsing succeeded\n"; + } + else + { + iterator_t st = info.stop; + iterator_t st2 = info.stop; + for(Index i=1;i<200 && st2 != last ;i++)st2++; + string unparsed(st, st2); + cout << "stopped at: " << info.stop.get_position() + << "-> unparsed : "<< "\"" << unparsed << "\"\n"; + cout << "partial match? hit: \"" << info.hit << "\"\n"; + cout << "consumed chars - length: \"" << info.length << "\"\n"; + throw E("Parsing failed"); + } + + if(DEBUG_POMDP_PARSE) cout << "-------------------------\n"; + return; +} + + + +//used functor functions: +void ParserPOMDPFormat_Spirit::DebugOutput::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_POMDP_PARSE) cout << s <<" parsed: \"" << string(str,end) << "\""<_m_lp_int = i; + _m_po->_m_lp_type = INT; +} +void ParserPOMDPFormat_Spirit::StoreLastParsedElement::operator()(const unsigned int i) const +{ + if(DEBUG_POMDP_PARSE) cout << "Stored Last Parsed: uint "<_m_lp_uint = i; + _m_po->_m_lp_type = UINT; +} + +void ParserPOMDPFormat_Spirit::StoreLastParsedElement::operator()(const double f) const +{ + if(DEBUG_POMDP_PARSE) cout << "Stored Last Parsed: double "<_m_lp_double = f; + _m_po->_m_lp_type = DOUBLE; +} + +void ParserPOMDPFormat_Spirit::StoreLastParsedElement::operator()(iterator_t str, iterator_t end) const +{ + _m_po->_m_lp_string.clear(); + _m_po->_m_lp_string = string(str, end); + if(DEBUG_POMDP_PARSE) cout << "Stored Last Parsed: string "<< + _m_po->_m_lp_string << endl; + _m_po->_m_lp_type = STRING; +} + + +void ParserPOMDPFormat_Spirit::SetNrAgents::operator()(iterator_t first, iterator_t last) const +{ + if(DEBUG_POMDP_PARSE) {cout <<"setting number of agents to 1..."<GetPOMDPDiscrete()->SetNrAgents(1); + _m_po->_m_nrA = 1; + return; +#if 0 + if (_m_po->_m_lp_type != UINT) + { + stringstream msg; + msg << "SetNrAgents::operator()(iterator_t first, iterator_t last) requires that last parsed element is a UINT!"<< + "(at " << _m_po->_m_first->get_position() << ")" << endl; + throw EParse(msg); + } + size_t nrA = _m_po->_m_lp_uint; + if(DEBUG_POMDP_PARSE){cout <<"agt_SetNrAgents - nrA="<GetPOMDPDiscrete()->SetNrAgents(nrA); + _m_po->_m_nrA = nrA; +#endif +} + +void ParserPOMDPFormat_Spirit::SetNrAgents::operator()(const int& i) const +{ + if(DEBUG_POMDP_PARSE) {cout <<"setting number of agents to 1..."<GetPOMDPDiscrete()->SetNrAgents(1); + _m_po->_m_nrA = 1; + return; +#if 0 + if(DEBUG_POMDP_PARSE){cout << "i="<_m_lp_JA.push_back(_m_po->_m_anyJAIndex); + } +} +/* +void ParserPOMDPFormat_Spirit::StoreLPJointAction::operator()(iterator_t str, iterator_t end) const +{ + _m_po->_m_lp_JAI.clear(); + if(_m_isJointActionIndex) + { + //_m_lp_JA not necessary: clear it. + _m_po->_m_lp_JA.clear(); + _m_po->_m_lp_JAI.push_back(_m_po->_m_lp_int); + return; + } + try{ MatchingJointActions(0,_m_po->_m_lp_JA); } + catch(E e) + { + file_position fp = str.get_position(); + stringstream ermsg; + ermsg << e.SoftPrint() << endl << "(at " << fp << ")"<_m_lp_JA.clear(); +} +void ParserPOMDPFormat_Spirit::StoreLPJointAction::operator()(const unsigned int&) const +{ + _m_po->_m_lp_JAI.clear(); + if(_m_isJointActionIndex) + { + //_m_lp_JA not necessary: clear it. + _m_po->_m_lp_JA.clear(); + _m_po->_m_lp_JAI.push_back(_m_po->_m_lp_int); + return; + } + MatchingJointActions(0,_m_po->_m_lp_JA); + _m_po->_m_lp_JA.clear(); +} +*/ +void ParserPOMDPFormat_Spirit::StoreLPJointAction::Store() const +{ + _m_po->_m_lp_JAI.clear(); + //check to see if the last parsed index was a joint action index + if(_m_isJointActionIndex) + { + //_m_lp_JA not necessary: clear it. + _m_po->_m_lp_JA.clear(); + //do check that the last parsed element was a index (uint) + if (_m_po->_m_lp_type != UINT) + { + stringstream msg; + msg << "StoreLPJointAction::Store() requires that last parsed element is a UINT! (last parsed index was a joint action index)"<< endl; + throw EParse(msg); + } + _m_po->_m_lp_JAI.push_back(_m_po->_m_lp_uint); + return; + } + MatchingJointActions(0,_m_po->_m_lp_JA); + _m_po->_m_lp_JA.clear(); +} + +//Construct the set of joint actions that may match any wildcards recursively: +void ParserPOMDPFormat_Spirit::StoreLPJointAction::MatchingJointActions (Index curAgent, vector indIndices) const +{ + if(_m_po->_m_nrA != indIndices.size()) + { + stringstream msg; + msg << "individual indices vector has wrong size. ("<< indIndices.size() << " while nrAgents is " << _m_po->_m_nrA << ")"<< + "at(" << _m_po->_m_first->get_position() << ")" << endl;; + throw EParse(msg); + } + if(curAgent == _m_po->_m_nrA) //past last agent: all work done + { + Index jaI = 0; + try + { + jaI = _m_po->GetPOMDPDiscrete()->IndividualToJointActionIndices(indIndices); + _m_po->_m_lp_JAI.push_back(jaI); + } + catch(E e) + { + stringstream ss; ss << e.SoftPrint() << "(at "<< _m_po->_m_first-> get_position()<<")"<::iterator it = indIndices.begin(); + vector::iterator last = indIndices.end(); + while(it != last){ cout << *it <<" "; it++;} + cout << "> = "<_m_anyJAIndex) //<- IF THIS IS A WILDCARD + { + //do for each action of this agent + //size_t nrAc = _m_po->GetPOMDPDiscrete()->GetNrActions(curAgent); + size_t nrAc = _m_po->GetPOMDPDiscrete()->GetNrSingleAgentActions(); + for(Index ai=0; ai < nrAc; ai++) + { + indIndices[curAgent] = ai; + MatchingJointActions(curAgent+1, indIndices); + } + } + else + MatchingJointActions(curAgent+1, indIndices); + } +} + +void ParserPOMDPFormat_Spirit::WildCardJointObservation::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_POMDP_PARSE ) + cout << "WildCardJointObservation: _m_lp_JOI.size()="<<_m_po->_m_lp_JOI.size(); + //this (can) contain elements from a failed indiv_action parse. + _m_po->_m_lp_JO.clear(); + _m_po->_m_lp_JOI.clear(); + //this is different from how we handle joint actions: joint actions are + //immediately expanded (i.e. '*'/ANY_INDEX is replaced by all matching + //indices.) + //For joint observations, this is not practical, as for most common reward + //forms, this is not required. I.e., one will typically specify + //R: ja : s : * : * : prob + //then we want to call addReward(ja,s) and not expand the succesor states + //and joint observations... + + _m_po->_m_lp_JOI.push_back(_m_po->_m_anyJOIndex); +} + +/* +void ParserPOMDPFormat_Spirit::StoreLPJointObservation::operator()(const unsigned int&) const +{ + Store(); +} +void ParserPOMDPFormat_Spirit::StoreLPJointObservation::operator()(iterator_t str, iterator_t end) const +{ + Store(); +} +*/ +void ParserPOMDPFormat_Spirit::StoreLPJointObservation::Store() const +{ + _m_po->_m_lp_JOI.clear(); + if(_m_isJointObservationIndex) //as opposed to parsing individual indices + { + //_m_lp_JO not necessary: clear it. + _m_po->_m_lp_JO.clear(); + //do check that the last parsed element was a index (uint) + if (_m_po->_m_lp_type != UINT) + { + stringstream msg; + msg << "StoreLPJointObservation::Store() requires that last parsed element is a UINT! (last parsed index was a joint action index)"<< endl; + throw EParse(msg); + } + _m_po->_m_lp_JOI.push_back(_m_po->_m_lp_uint); + return; + } + MatchingJointObservations(0,_m_po->_m_lp_JO); + _m_po->_m_lp_JO.clear(); +} +/* +void ParserPOMDPFormat_Spirit::StoreLPJointObservation::operator()(iterator_t str, iterator_t end) const +{ + _m_po->_m_lp_JOI.clear(); + if(_m_isJointObservationIndex) //as opposed to parsing individual indices + { + //_m_lp_JO not necessary: clear it. + _m_po->_m_lp_JO.clear(); + _m_po->_m_lp_JOI.push_back(_m_po->_m_lp_int); + return; + } + try{ MatchingJointObservations(0,_m_po->_m_lp_JO); } + catch(E e) + { + stringstream ermsg; + ermsg << e.SoftPrint() << endl << "(at " << str.get_position() << ")"<_m_lp_JO.clear(); +} +*/ +void ParserPOMDPFormat_Spirit::StoreLPJointObservation::MatchingJointObservations (Index curAgent, vector indIndices) const +{ + if(_m_po->_m_nrA != indIndices.size()) + { + stringstream msg; + msg << "individual obs indices vector has wrong size. ("<_m_nrA << ")" << "at(" << + _m_po->_m_first->get_position() << ")" << endl;; + throw EParse(msg); + } + if(curAgent == _m_po->_m_nrA) //past last agent: all work done + { + Index joI = 0; + try + { + joI = _m_po->GetPOMDPDiscrete()->IndividualToJointObservationIndices( + indIndices); + _m_po->_m_lp_JOI.push_back(joI); + } + catch(E e) + { + stringstream ss; ss <_m_first->get_position() << + ")" << endl; + throw EParse(ss); + } + if(DEBUG_POMDP_PARSE) + { + cout<<"MatchingJointObservations: joint observation index for < "; + vector::iterator it = indIndices.begin(); + vector::iterator last = indIndices.end(); + while(it != last){ cout << *it <<" "; it++;} + cout << "> = "<_m_anyJOIndex) + { + //do for each action of this agent + //size_t nrObs = _m_po->GetPOMDPDiscrete()->GetNrObservations(curAgent); + size_t nrObs = _m_po->GetPOMDPDiscrete()->GetNrSingleAgentObservations(); + for(Index oi=0; oi < nrObs; oi++) + { + indIndices[curAgent] = oi; + MatchingJointObservations(curAgent+1, indIndices); + } + } + else + MatchingJointObservations(curAgent+1, indIndices); + } +} + + +void ParserPOMDPFormat_Spirit::StoreLPFromState::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_POMDP_PARSE) + cout << "StoreLPFromState: pushing "<_m_lp_sI"<_m_lp_fromSI.clear(); + if(_m_po->_m_lp_type == UINT) + { + unsigned int index = _m_po->_m_lp_uint; + size_t nrS = _m_po->GetPOMDPDiscrete()->GetNrStates(); + if(index >= nrS) + { + stringstream ss; ss<<"StoreLPFromState: '"<< index<< "' is not a valid state index!? Number of states is " << nrS <<" (at " <_m_lp_fromSI.push_back( _m_po->_m_lp_uint ); + } + else if(_m_po->_m_lp_type == STRING) + { + try + { + Index sI = _m_po->GetPOMDPDiscrete()->GetStateIndexByName( + _m_po->_m_lp_string); + _m_po->_m_lp_fromSI.push_back(sI); + } + catch(E e) + { + stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_fromSI.push_back(_m_po->_m_anyStateIndex); + else + throw E("StoreLPFromState expected that the last parsed type is a state index(uint), state name (string) or wilcard ('*')."); +} + + +void ParserPOMDPFormat_Spirit::StoreLPToState::operator()(iterator_t str, iterator_t end) const +{ + if(DEBUG_POMDP_PARSE) + cout << "StoreLPToState: pushing "<_m_lp_sI"<_m_lp_toSI.clear(); + if(_m_po->_m_lp_type == UINT) + { + unsigned int index = _m_po->_m_lp_uint; + size_t nrS = _m_po->GetPOMDPDiscrete()->GetNrStates(); + if(index >= nrS) + { + stringstream ss; ss<<"StoreLPToState: '"<< index<< "' is not a valid state index!? Number of states is " << nrS <<" (at " <_m_lp_toSI.push_back( _m_po->_m_lp_uint ); + } + else if(_m_po->_m_lp_type == STRING) + { + try + { + Index sI = _m_po->GetPOMDPDiscrete()->GetStateIndexByName( + _m_po->_m_lp_string); + _m_po->_m_lp_toSI.push_back(sI); + } + catch(E e) + { + stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_toSI.push_back(_m_po->_m_anyStateIndex); + else + throw E("StoreLPToState expected that the last parsed type is a state index(uint), state name (string) or wilcard ('*')."); +} + +void ParserPOMDPFormat_Spirit::ProcessTProb::operator()(iterator_t str, iterator_t end) const +{ + if(_m_po->_m_lp_type != DOUBLE) + { + stringstream ss; ss<<"ProcessTProb:last parsed type should be a double. (at " <_m_lp_double; + if( _m_po->_m_lp_fromSI.size() != 1 || + _m_po-> _m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessTProb:_m_lp_fromSI or _m_lp_toSI has size != 1. (at " <_m_lp_fromSI[0]; + Index sucSI = _m_po-> _m_lp_toSI[0]; + Index jaI = 0; + if( sI != _m_po->_m_anyStateIndex && + sucSI != _m_po->_m_anyStateIndex && + _m_po->_m_lp_JAI.size() != 1 ) + { + jaI = _m_po->_m_lp_JAI[0]; + _m_po->GetPOMDPDiscrete()->SetTransitionProbability(sI, jaI, sucSI, prob); + if(DEBUG_POMDP_PARSE) + cout << "Setting T("<_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + if( sucSI == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + } + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetPOMDPDiscrete()->SetTransitionProbability(sI, jaI, sucSI, + prob); + if(DEBUG_POMDP_PARSE) + cout << "Setting T("<_m_lp_fromSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessTRow:_m_lp_fromSI has size != 1. (at " <IsRowMatrixLP() ) + { + stringstream ss; ss<<"ProcessTRow: a row matrix should have been parsed. (at " < row = _m_po->_m_curMatrix[0]; + size_t nrS = _m_po->GetPOMDPDiscrete()->GetNrStates(); + if( row.size() != nrS) + { + stringstream ss; ss<<"ProcessTRow: exected a row matrix with nrStates="<_m_lp_fromSI[0]; + Index jaI = 0; + if( sI == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + _m_po->GetPOMDPDiscrete()->SetTransitionProbability(sI, jaI, sucSI, row[sucSI]); + if(DEBUG_POMDP_PARSE) + cout << "Setting T("<GetPOMDPDiscrete()->GetNrStates(); + if(_m_po->_m_lp_type == DOUBLE) + { + +#if POMDPFORMAT_NEWLINES_SKIPPED + /* code for POMDP file format, which skips newlines and thus delivers the matrix as a single row...*/ + if(!_m_po->IsDimensionOfMatrix(1,nrS*nrS)) + { + stringstream ss; + ss << "Expected a 1 x (nrS*nrS) row matrix."<<" (nrS="<< nrS <<"). At "<PrintDimensionOfMatrix() << ")" << endl; + throw E(ss); + } + vector& row_vector = _m_po->_m_curMatrix.at(0); + + //also need to loop through matchin (joint) actions, since wildcards might have been used + Index jaI = 0; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + Index row_vector_index = 0; + for(Index sI = 0; sI < nrS; sI++) + { + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + double prob = row_vector.at(row_vector_index); // corresponds to _m_po->_m_curMatrix[sI][sucSI]; + _m_po->GetPOMDPDiscrete()->SetTransitionProbability(sI, jaI, sucSI, prob); + row_vector_index++; + if(DEBUG_POMDP_PARSE) + cout << "Setting T("<IsDimensionOfMatrix(nrS,nrS)) + { + stringstream ss; + ss << "Expected a nrS x nrS matrix."<<" (nrS="<< nrS <<"). At "<PrintDimensionOfMatrix() << ")" << endl; + throw E(ss); + } + Index jaI = 0; + for(Index sI = 0; sI < nrS; sI++) + { + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + double prob = _m_po->_m_curMatrix[sI][sucSI]; + _m_po->GetPOMDPDiscrete()->SetTransitionProbability + (sI, jaI, sucSI, prob); + if(DEBUG_POMDP_PARSE) + cout << "Setting T("<_m_lp_type = "<<_m_po->_m_lp_type <_m_lp_type != DOUBLE) + { + stringstream ss; ss<<"ProcessOProb:last parsed type should be a double. (at " <_m_lp_double; + if( _m_po-> _m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessOProb: _m_lp_toSI has size != 1. (at " <_m_lp_JOI.size() <= 0 ) + { + stringstream ss; ss<<"ProcessOProb: _m_lp_JOI has size <= 0. (at " < _m_lp_toSI[0]; + Index joI = _m_po->_m_lp_JOI[0]; + if( sucSI == _m_po->_m_anyStateIndex ) //if '*' -> replace by a list of all state indices + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + } + if( joI == _m_po->_m_anyJOIndex ) //if '*' -> replace by list of all joint obs.indices + { + _m_po->_m_lp_JOI.clear(); + for(Index jo=0; jo<_m_po->GetPOMDPDiscrete()->GetNrJointObservations();jo++) + _m_po->_m_lp_JOI.push_back(jo); + } + + vector::iterator joI_it = _m_po->_m_lp_JOI.begin(); + vector::iterator joI_last = _m_po->_m_lp_JOI.end(); + while(joI_it != joI_last) + { + joI = *joI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po-> + _m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po-> + _m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetPOMDPDiscrete()->SetObservationProbability + (jaI, sucSI, joI, prob); + if(DEBUG_POMDP_PARSE) + cout << "Setting O("<IsRowMatrixLP() ) + { + stringstream ss; ss<<"ProcessORow: a row matrix should have been parsed. (at " < row = _m_po->_m_curMatrix[0]; + size_t nrS = _m_po->GetPOMDPDiscrete()->GetNrStates(); + size_t nrJO = _m_po->GetPOMDPDiscrete()->GetNrJointObservations(); + if( row.size() != nrJO) + { + stringstream ss; ss<<"ProcessORow: exected a row matrix with nrJO="<_m_lp_toSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < nrS; si++) + _m_po->_m_lp_toSI.push_back(si); + } + vector::iterator sI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_toSI.end(); + while(sI_it != sI_last) + { + Index sucSI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + for(Index joI=0; joI < nrJO; joI++) + { + _m_po->GetPOMDPDiscrete()->SetObservationProbability + (jaI, sucSI, joI, row[joI]); + if(DEBUG_POMDP_PARSE) + cout << "Setting O("<PrintDimensionOfMatrix() << ")" << endl; + throw E(ss); + } + vector& row_vector = _m_po->_m_curMatrix.at(0); + + //also need to loop through matchin (joint) actions, since wildcards might have been used + Index jaI = 0; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + Index row_vector_index = 0; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + for(Index joI = 0; joI < nrJO; joI++) + { + double prob = row_vector.at(row_vector_index); // corresponds to _m_po->_m_curMatrix[sucSI][joI]; + _m_po->GetPOMDPDiscrete()->SetObservationProbability(jaI, sucSI, joI, prob); + row_vector_index++; + if(DEBUG_POMDP_PARSE) + cout << "Setting O("<IsDimensionOfMatrix(nrS,nrJO)) + { + stringstream ss; ss << "Expected a nrS x nrJO matrix."<<" (nrS="<< nrS <<", nrJO="<PrintDimensionOfMatrix() << ")" << endl; + throw E(ss); + } + Index jaI = 0; + for(Index joI = 0; joI < nrJO; joI++) + { + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + jaI = *jaI_it; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + double prob = _m_po->_m_curMatrix[sucSI][joI]; + _m_po->GetPOMDPDiscrete()->SetObservationProbability(jaI, sucSI, joI, prob); + if(DEBUG_POMDP_PARSE) + cout << "Setting O("<_m_lp_type == UNIFORM) + { + double uniform_prob = 1.0 / nrJO; + for(Index joI = 0; joI < nrJO; joI++) + { + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + for(Index sucSI=0; sucSI < nrS; sucSI++) + { + double prob = uniform_prob;//_m_po->_m_curMatrix[sI][sucSI]; + _m_po->GetPOMDPDiscrete()->SetObservationProbability + (jaI, sucSI, joI, prob); + if(DEBUG_POMDP_PARSE) + cout << "Setting O("<_m_lp_type = "<<_m_po->_m_lp_type <_m_lp_type != DOUBLE) + { + stringstream ss; ss<<"ProcessR:last parsed type should be a double. (at " <_m_lp_double; + if( _m_po-> _m_lp_fromSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessR: _m_lp_fromSI has size != 1. (at " < _m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessR: _m_lp_toSI has size != 1. (at " <GetPOMDPDiscrete()->GetNrStates(); + //size_t nrJO = _m_po->GetPOMDPDiscrete()->GetNrJointObservations(); + Index sucSI = _m_po-> _m_lp_toSI[0]; + Index joI = _m_po->_m_lp_JOI[0]; + if( sucSI == _m_po->_m_anyStateIndex && joI == _m_po->_m_anyJOIndex) + { + // rewards of the form R: ja : s : * : * : %f, so we can simply do + // AddReward() + + if(_m_po->_m_lp_fromSI[0] == _m_po->_m_anyStateIndex) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < nrS; si++) + _m_po->_m_lp_fromSI.push_back(si); + } + + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + _m_po->GetPOMDPDiscrete()->SetReward(sI, jaI, reward); + if(DEBUG_POMDP_PARSE) + cout << "Setting R("<_m_anyJOIndex ) //but end-state is not ANY_INDEX + { + if( _m_po->_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } +/* Obsolete test... + if( sucSI == ANY_INDEX ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + }*/ + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetPOMDPDiscrete()->SetReward(sI, jaI, sucSI, reward); + if(DEBUG_POMDP_PARSE) + cout << "Setting R("<_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + if( _m_po->_m_lp_toSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_toSI.push_back(si); + } + vector::iterator joI_it = _m_po->_m_lp_JOI.begin(); + vector::iterator joI_last = _m_po->_m_lp_JOI.end(); + while(joI_it != joI_last) + { + joI = *joI_it; + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + sucSI = *sucSI_it; + _m_po->GetPOMDPDiscrete()->SetReward(sI, jaI, sucSI,joI, + reward); + if(DEBUG_POMDP_PARSE) + cout << "Setting R("<_m_lp_toSI.size() != 1 ) + { + stringstream ss; ss<<"ProcessRRow:_m_lp_fromSI has size != 1. (at " <IsRowMatrixLP() ) + { + stringstream ss; ss<<"ProcessRRow: a row matrix should have been parsed. (at " < row = _m_po->_m_curMatrix[0]; + size_t nrS = _m_po->GetPOMDPDiscrete()->GetNrStates(); + size_t nrJO = _m_po->GetPOMDPDiscrete()->GetNrJointObservations(); + if( row.size() != nrJO) + { + stringstream ss; ss<<"ProcessRRow: exected a row matrix with nrJO="<_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + if( _m_po->_m_lp_toSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_toSI.clear(); + for(Index si=0; si < nrS; si++) + _m_po->_m_lp_toSI.push_back(si); + } + + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + vector::iterator sucSI_it = _m_po->_m_lp_toSI.begin(); + vector::iterator sucSI_last = _m_po->_m_lp_toSI.end(); + while(sucSI_it != sucSI_last) + { + Index sucSI = *sucSI_it; + for(Index joI=0; joI < nrJO; joI++) + { + _m_po->GetPOMDPDiscrete()->SetReward(sI, jaI, sucSI,joI, + row[joI]); + if(DEBUG_POMDP_PARSE) + cout << "Setting R("<IsDimensionOfMatrix(nrS,nrJO)) + { + stringstream ss; ss << "Expected a nrS x nrJO matrix."<< + " (nrS="<< nrS <<", nrJO="<_m_lp_fromSI[0] == _m_po->_m_anyStateIndex ) + { + _m_po->_m_lp_fromSI.clear(); + for(Index si=0; si < _m_po->GetPOMDPDiscrete()->GetNrStates();si++) + _m_po->_m_lp_fromSI.push_back(si); + } + vector::iterator sI_it = _m_po->_m_lp_fromSI.begin(); + vector::iterator sI_last = _m_po->_m_lp_fromSI.end(); + while(sI_it != sI_last) + { + Index sI = *sI_it; + vector::iterator jaI_it = _m_po->_m_lp_JAI.begin(); + vector::iterator jaI_last = _m_po->_m_lp_JAI.end(); + while(jaI_it != jaI_last) + { + Index jaI = *jaI_it; + for(Index sucSI=0; sucSIGetPOMDPDiscrete()->SetReward(sI, jaI, sucSI,joI, + _m_po->_m_curMatrix[sucSI][joI]); + if(DEBUG_POMDP_PARSE) + { + cout << "Setting R(" << sI << "," << jaI <<"," << sucSI; + cout << "," << joI << ") = " << + _m_po->_m_curMatrix[sucSI][joI] << endl; + } + } + } + jaI_it++; + } + sI_it++; + } +} + + +}// end namespace POMDPFormatParsing diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.h b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.h new file mode 100644 index 000000000..c3b3ab0f0 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/ParserPOMDPFormat_Spirit.h @@ -0,0 +1,1094 @@ +/* This file is part of the Multiagent Decision Process (MADP) Toolbox. + * + * The majority of MADP is free software released under GNUP GPL v.3. However, + * some of the included libraries are released under a different license. For + * more information, see the included COPYING file. For other information, + * please refer to the included README file. + * + * This file has been written and/or modified by the following people: + * + * Frans Oliehoek + * + * For contact information please see the included AUTHORS file. + */ + +/* Only include this header file once. */ +#ifndef _PARSERPOMDPFORMAT_H_ +#define _PARSERPOMDPFORMAT_H_ 1 + +/* the include directives */ +#include +#include +#include "Globals.h" +#include "POMDPDiscrete.h" +#include "EParse.h" + +#include "boost/version.hpp" + +#if USE_BOOST_SPIRIT_CLASSIC +#include "boost/spirit/include/classic_core.hpp" +#include "boost/spirit/include/classic_iterator.hpp" +#else +#include "boost/spirit/core.hpp" +#include "boost/spirit/iterator/file_iterator.hpp" +#include "boost/spirit/iterator/position_iterator.hpp" +#endif + +#include "ParserInterface.h" + + +using namespace boost::spirit; + +/* constants */ +//use subgrammars (or not?) +#define SUBGRAMMAR 0 + +//General parsing debug informations +#define DEBUG_POMDP_PARSE 0 + +//whether or not newlines are skipped when parsing: +//FAO: keep 1 - if set to 0 the code will not work +#define POMDPFORMAT_NEWLINES_SKIPPED 1 + +//debugging the 'comment' parser: +/*#define DEBUG_COMPARS 1*/ +//debugging the 'comment or blank parser' : +/*#define DEBUG_COBP 1*/ + +// the 'any' individual action to denote the wildcard '*' +#define ANY_INDEX -1 + +#include "CommentBlankOrNewlineParser.h" +namespace POMDPFormatParsing{ + +/**Outputs the file_position structure info (gotten from + * postion_iterator::get_position() ). */ +std::ostream& operator<<(std::ostream& out, file_position const& lc); + + + +/** \brief ParserPOMDPFormat_Spirit is a parser for the .pomdp file format. + * + * \todo: TODO: currently massive amount of code duplication from .dpomdp format + * parsing. At some point we may want to resolve, but first let's get it to + * work. + * + **/ +class ParserPOMDPFormat_Spirit : + public ParserInterface +{ + typedef char char_t; + typedef file_iterator iterator_t_fi; + typedef position_iterator iterator_t; + typedef scanner scanner_t; + typedef rule rule_t; + + //used to now what has been parsed: + enum parsed_t { INT, DOUBLE, UINT, STRING, ASTERICK, UNIFORM, IDENTITY }; + private: + + // TODO + POMDPDiscrete* _m_decPOMDPDiscrete; + + POMDPDiscrete* GetPOMDPDiscrete() + { + return _m_decPOMDPDiscrete; + } + + struct Initialize + { + ParserPOMDPFormat_Spirit* _m_po; + Initialize (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->_m_first = &str; + } + }; + + struct DebugOutputNoParsed + { + std::string s; + DebugOutputNoParsed (std::string s2){s = s2;} + void operator()(iterator_t str, iterator_t end) const; + void operator()(const unsigned int&) const; + void operator()(const double &) const; + }; + struct DebugOutput + { + std::string s; + DebugOutput (std::string s2){s = s2;} + void operator()(iterator_t str, iterator_t end) const; + void operator()(const int&) const; + void operator()(const unsigned int&) const; + void operator()(const double&) const; + }; + struct StoreLastParsedElement + { + ParserPOMDPFormat_Spirit* _m_po; + StoreLastParsedElement(ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(const int i) const; + void operator()(const unsigned int i) const; + void operator()(const double f) const; + void operator()(iterator_t str, iterator_t end) const; + }; + struct SetLastParsedType + { + ParserPOMDPFormat_Spirit* _m_po; + parsed_t _m_lastParsedType; + SetLastParsedType (ParserPOMDPFormat_Spirit* po, parsed_t lpt) + { + _m_po = po; + _m_lastParsedType = lpt; + } + void operator()(iterator_t str, iterator_t end) const + { + _m_po->_m_lp_type = _m_lastParsedType; + } + }; + + struct NYI//Not Yet Implemented + { + std::string msg; + NYI(std::string s){msg = s;} + void operator()(iterator_t str, iterator_t end) const + { + file_position fp = str.get_position(); + std::stringstream ermsg; + ermsg << "sorry, \""<< msg <<"\" is not yet implemented." + << std::endl << "(at " << fp << ")"<_m_curAI = 0; + } + }; + struct SetNextAgentIndex + { + ParserPOMDPFormat_Spirit* _m_po; + SetNextAgentIndex (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + std::cerr << "Warning ParserPOMDPFormat_Spirit::SetNextAgentIndex called ?!? ignoring" << std::endl; + return; +/* + if(_m_po->_m_curAI < _m_po->_m_nrA) + _m_po->_m_curAI++; + else + std::cout << "SetNextAgentIndex - ERROR: current agent index ("<<_m_po->_m_curAI<<") out of bounds (number of agents="<<_m_po->_m_nrA<<")"<_m_curMatrix.push_back( std::vector() ); + } + }; + struct NextFloatOfRow + { + ParserPOMDPFormat_Spirit* _m_po; + NextFloatOfRow (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_lp_type != DOUBLE) + { + std::cout <<"NextFloatOfRow - ERROR parsing probabilities, which" + <<" means that doubles are expected, however last found" + <<" type is #"<<_m_po->_m_lp_type<_m_curMatrix.size(); + std::cout <<" _m_po->_m_curMatrix.size()= " << s <0) + std::cout <<" _m_po->_m_curMatrix.back().size()= " << + _m_po->_m_curMatrix.back().size()<_m_curMatrix.size(); + if(s > 0) + { + double d = _m_po->_m_lp_double; + //_m_curMatrix.back() isa vector + _m_po->_m_curMatrix.back().push_back(d); + _m_po->_m_matrixModified = true; + } + else + std::cout << "NextFloatOfRow - ERROR _m_curMatrix contains no "<< + "elements (i.e. there are no rows to add to...)."; + + if(DEBUG_POMDP_PARSE){//DEBUG + size_t s =_m_po->_m_curMatrix.size(); + std::cout <<" _m_po->_m_curMatrix.size()= " << s <0) + std::cout <<" _m_po->_m_curMatrix.back().size()= " << + _m_po->_m_curMatrix.back().size()<_m_curIdentList.push_back(std::string(str,end)); + _m_po->_m_identListModified = true; + + } + }; + struct SetNrStates + { + ParserPOMDPFormat_Spirit* _m_po; + size_t _m_target; + bool _m_haveTarget; + SetNrStates (ParserPOMDPFormat_Spirit* po){ + _m_po = po; + _m_target = 0; + _m_haveTarget = false; + } + SetNrStates (ParserPOMDPFormat_Spirit* po, size_t i){ + _m_po = po; + _m_target = i; + _m_haveTarget = true; + } + void operator()(const size_t& i) const + { + if(_m_haveTarget) + _m_po->GetPOMDPDiscrete()->SetNrStates(_m_target); + else + _m_po->GetPOMDPDiscrete()->SetNrStates( i ); + + } + void operator()(iterator_t str, iterator_t end) const + { + if(_m_haveTarget) + _m_po->GetPOMDPDiscrete()->SetNrStates(_m_target); + else if(_m_po->_m_lp_type == UINT)// || _m_lp_type + _m_po->GetPOMDPDiscrete()->SetNrStates(_m_po->_m_lp_uint); + else + throw E("SetNrStates: no target value set and last parsed data type != UINT..."); + } + }; + //this processes the start state specification in case of a specified + //probability vector and in case of the keyword uniform. + struct StartStateRowProbs + { + ParserPOMDPFormat_Spirit* _m_po; + StartStateRowProbs (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_lp_type == UNIFORM) + { + _m_po->GetPOMDPDiscrete()->SetUniformISD(); + return; + } + + //check size - it should be a row matrix... + if( !_m_po->IsRowMatrixLP() ) + throw E("StartStateRowProbs: _m_po->_m_curMatrix should be a row vector!"); + if(_m_po->_m_curMatrix[0].size() != _m_po->GetPOMDPDiscrete()-> + GetNrStates()) + { + std::string err = "StartStateRowProbs: _m_po->_m_curMatrix"; + err += " [0] should contain NrStates(="; + std::stringstream ss; + ss << "StartStateRowProbs: _m_po->_m_curMatrix[0] " << + "should contain NrStates(=" << _m_po->GetPOMDPDiscrete()-> + GetNrStates() << ") entries! (not "<< + _m_po->_m_curMatrix[0].size()<<")\n"; + + throw E( ss.str().c_str() ); + } + + StateDistributionVector *isd=new StateDistributionVector(_m_po->_m_curMatrix[0]); + _m_po->GetPOMDPDiscrete()->SetISD(isd); + } + }; + //this adds states to the start state list - the first stage in + //processing the other ways of specifying the start state distribution. + struct AddStartState + { + ParserPOMDPFormat_Spirit* _m_po; + AddStartState (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(_m_po->_m_lp_type != STRING) + { + std::stringstream ss; ss << "SetStartState::operator()(iterator_t str, iterator_t end) - expected a string as last parsed type! (at"<GetPOMDPDiscrete()->GetStateIndexByName( + _m_po->_m_lp_string); + _m_po->_m_startStateListSI.push_back(sI); + } + catch(E e) + { + std::stringstream ss; ss << e.SoftPrint() << "(at"<< + str.get_position() <<")"<_m_lp_type != UINT) + { + std::stringstream ss; ss << "SetStartState::operator()(const unsigned int i&) - expected a UINT as last parsed type! (at"<< _m_po->_m_first->get_position() <<")"<_m_startStateListSI.push_back( _m_po->_m_lp_uint); + } + }; + //this functor specified that the states in the start state list should + //be excluded, not included. + struct StartStateExludes + { + ParserPOMDPFormat_Spirit* _m_po; + StartStateExludes (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->_m_startStateListExclude = true; + } + }; + struct ProcessStartStateList + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessStartStateList (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(const unsigned int& i) const + { + ProcessList(); + } + void operator()(iterator_t str, iterator_t end) const + { + ProcessList(); + } + void ProcessList() const + { + size_t nrS = _m_po->GetPOMDPDiscrete()->GetNrStates(); + size_t listSize = _m_po->_m_startStateListSI.size(); + size_t nrIncS = (_m_po->_m_startStateListExclude)? nrS - listSize: + listSize; + double u_prob = 1.0 / nrIncS; + + std::vector init_probs; + if(!_m_po->_m_startStateListExclude) + //elems in list get uniform prob. + init_probs = std::vector(nrS, 0.0); + else //other elems get uniform prob + init_probs = std::vector(nrS, u_prob); + + std::vector::iterator it = _m_po->_m_startStateListSI.begin(); + std::vector::iterator last = _m_po->_m_startStateListSI.end(); + while(it != last) + { + init_probs[*it] = _m_po->_m_startStateListExclude? + 0.0 : u_prob; + it++; + } + StateDistributionVector *isd=new StateDistributionVector(init_probs); + _m_po->GetPOMDPDiscrete()->SetISD(isd); + init_probs.clear(); + _m_po->_m_startStateListSI.clear(); + + } + }; + struct SetNrActions + { + ParserPOMDPFormat_Spirit* _m_po; + SetNrActions (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(DEBUG_POMDP_PARSE) + { + std::string s(str,end); + std::cout << "SetNrActions: setting "<<_m_po->_m_lp_uint<<" actions for agent "<<_m_po->_m_curAI<GetPOMDPDiscrete()->SetNrSingleAgentActions(_m_po->_m_lp_uint); + + } + void operator()(const unsigned int&) const + { + _m_po->GetPOMDPDiscrete()->SetNrSingleAgentActions(_m_po->_m_lp_uint); + if(DEBUG_POMDP_PARSE) + std::cout << "SetNrActions: set "<<_m_po->_m_lp_uint<<" actions for agent "<<_m_po->_m_curAI<GetPOMDPDiscrete()->AddSingleAgentAction(s); + if(DEBUG_POMDP_PARSE) + std::cout << "AddAction: added action "<_m_curAI<GetPOMDPDiscrete()->SetNrSingleAgentObservations(_m_po->_m_lp_uint); + if(DEBUG_POMDP_PARSE) + std::cout << "SetNrObservations: set "<<_m_po->_m_lp_uint<<" observations for agent "<<_m_po->_m_curAI<GetPOMDPDiscrete()->SetNrSingleAgentObservations(_m_po->_m_lp_uint); + if(DEBUG_POMDP_PARSE) + std::cout << "SetNrObservations: set "<<_m_po->_m_lp_uint<<" observations for agent "<<_m_po->_m_curAI<GetPOMDPDiscrete()->AddSingleAgentObservation(s); + if(DEBUG_POMDP_PARSE) + std::cout << "AddObservation: added action "<_m_curAI<_m_anyStateIndex = _m_po->GetPOMDPDiscrete()->GetNrStates(); + _m_po->GetPOMDPDiscrete()->SetStatesInitialized(true); + + } + }; + + struct InitializeActions + { + ParserPOMDPFormat_Spirit* _m_po; + InitializeActions (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetPOMDPDiscrete()->ConstructJointActions(); + _m_po->GetPOMDPDiscrete()->SetActionsInitialized(true); + _m_po->_m_anyJAIndex = _m_po->GetPOMDPDiscrete()-> + GetNrJointActions(); + } + }; + struct InitializeObservations + { + ParserPOMDPFormat_Spirit* _m_po; + InitializeObservations (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetPOMDPDiscrete()->ConstructJointObservations(); + _m_po->GetPOMDPDiscrete()->SetObservationsInitialized(true); + _m_po->_m_anyJOIndex = _m_po->GetPOMDPDiscrete()-> + GetNrJointObservations(); + } + }; + + struct AddModels + { + ParserPOMDPFormat_Spirit* _m_po; + AddModels (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + POMDPDiscrete* p = _m_po->GetPOMDPDiscrete(); + // add the transition model + p->CreateNewTransitionModel(); + p->CreateNewObservationModel(); + p->CreateNewRewardModel(); + //if(DEBUG_POMDP_PARSE) _m_po->GetPOMDPDiscrete()->PrintInfo(); + } + + }; + ///Stores the last-parsed action: + struct StoreLPAction + { + ParserPOMDPFormat_Spirit* _m_po; + StoreLPAction (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + + if(DEBUG_POMDP_PARSE) + std::cout << "StoreLPAction: pushing "<_m_lp_type == UINT) + { + unsigned int index = _m_po->_m_lp_uint; + if(index >= _m_po->GetPOMDPDiscrete()->GetNrJointActions() ) + { + std::stringstream ss; ss<<"StoreLPAction: '"<< index<< "' is not valid!? Number of actions is " << _m_po->GetPOMDPDiscrete()->GetNrJointActions() <<" (at " <_m_lp_JA.push_back(_m_po->_m_lp_uint); + } + else if(_m_po->_m_lp_type == STRING) + { + //make sure that _m_lp_JA is cleared after each joint action + //so we can do this: + Index curAgIndex = _m_po->_m_lp_JA.size(); + try + { + Index aI = _m_po->GetPOMDPDiscrete()->GetActionIndexByName( + _m_po->_m_lp_string, curAgIndex); + _m_po->_m_lp_JA.push_back(aI); + } + catch(E e) + { + std::stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_JA.push_back(_m_po->_m_anyJAIndex); + else + throw E("StoreLPAction expected that the last parsed type is a action index(uint), action name (string) or wilcard ('*')."); + } + }; + /**called before StoreLPJointAction in case of a wildcard '*' joint + * action. Effectively, this functions sets _m_lp_JA to a vector of + * _m_po->_m_anyJAIndexs (one for each agent) .*/ + struct WildCardJointAction + { + ParserPOMDPFormat_Spirit* _m_po; + WildCardJointAction (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + }; + struct StoreLPJointAction + { + ParserPOMDPFormat_Spirit* _m_po; + bool _m_isJointActionIndex; + StoreLPJointAction (ParserPOMDPFormat_Spirit* po, bool b = false) + { + _m_po = po; + _m_isJointActionIndex = b; + } + void operator()(iterator_t str, iterator_t end) const{Store();} + void operator()(const unsigned int&) const{Store();} + void Store() const; + /**This function constructs the vector (_m_lp_JAI) of joint actions + * that match with the vector with individual action indices. + * + * This is needed to properly deal with wild cards; a single action specification + * in the (d)pomdp file may correspond to manu (joint) actions. + * + * */ + void MatchingJointActions (Index curAgent, std::vector indIndices) const; + + }; + struct StoreLPObservation + { + ParserPOMDPFormat_Spirit* _m_po; + StoreLPObservation (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + + if(DEBUG_POMDP_PARSE) + std::cout << "StoreLPObservation: pushing "<_m_lp_type == UINT) + _m_po->_m_lp_JO.push_back(_m_po->_m_lp_uint); + else if(_m_po->_m_lp_type == STRING) + { + //make sure that _m_lp_JO is cleared after each joint action + //so we can do this: + Index curAgIndex = _m_po->_m_lp_JO.size(); + try + { + Index aI = _m_po->GetPOMDPDiscrete()->GetObservationIndexByName( + _m_po->_m_lp_string, curAgIndex); + _m_po->_m_lp_JO.push_back(aI); + } + catch(E e) + { + std::stringstream ermsg; ermsg << e.SoftPrint() << " (at " << + str.get_position() << ")"<_m_lp_type == ASTERICK ) + _m_po->_m_lp_JO.push_back(_m_po->_m_anyJOIndex); + else + throw E("StoreLPObservation expected that the last parsed type is a action index(uint), action name (string) or wilcard ('*')."); + } + }; + /**called before StoreLPJointObservation in case of a wildcard '*' joint + * action. Effectively, this functions sets _m_lp_JO to a vector of + * ANY_INDEXs (=_m_anyJOIndex) (one for each agent) .*/ + struct WildCardJointObservation + { + ParserPOMDPFormat_Spirit* _m_po; + WildCardJointObservation (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + }; + struct StoreLPJointObservation + { + ParserPOMDPFormat_Spirit* _m_po; + bool _m_isJointObservationIndex; + StoreLPJointObservation (ParserPOMDPFormat_Spirit* po, bool b = false) + { + _m_po = po; + _m_isJointObservationIndex = b; + } + void operator()(iterator_t str, iterator_t end) const{Store();} + void operator()(const unsigned int&) const{Store();} + void Store() const; + + /**This function constructs the vector (_m_lp_JOI) of joint actions + * that match with the vector with individual action indices.*/ + void MatchingJointObservations(Index curAgent, std::vector indIndices) + const; + + }; + struct StoreLPFromState + { + ParserPOMDPFormat_Spirit* _m_po; + StoreLPFromState (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct StoreLPToState + { + ParserPOMDPFormat_Spirit* _m_po; + StoreLPToState (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessTProb + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessTProb (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessTRow + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessTRow (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessTMatrix + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessTMatrix (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessOProb + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessOProb (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessORow + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessORow (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessOMatrix + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessOMatrix (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessR + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessR (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessRRow + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessRRow (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct ProcessRMatrix + { + ParserPOMDPFormat_Spirit* _m_po; + ProcessRMatrix (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const; + + }; + struct InitializePOMDP + { + ParserPOMDPFormat_Spirit* _m_po; + InitializePOMDP (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetPOMDPDiscrete()->SetInitialized(true); + } + }; + struct SetNrAgents + { + ParserPOMDPFormat_Spirit* _m_po; + SetNrAgents(ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const; + void operator()(const int&) const; + }; +/* struct AddAgents + { + ParserPOMDPFormat_Spirit* _m_po; + AddAgents(ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const + { + _m_po->GetPOMDPDiscrete()->SetNrAgents(0); + std::vector::iterator it = _m_po->_m_curIdentList.begin(); + std::vector::iterator l = _m_po->_m_curIdentList.end(); + while(it != l) + { + _m_po->GetPOMDPDiscrete()->AddAgent(*it); + it++; + _m_po->_m_nrA++; + } + _m_po->ClearCurIdentList(); + } + }; +*/ + //discount param + static void dp_number(iterator_t str, iterator_t end); + struct dp_SetDiscountParam{ + ParserPOMDPFormat_Spirit* _m_po; + dp_SetDiscountParam(ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const + { + std::string s(first, last); + if(DEBUG_POMDP_PARSE){std::cout << "dp_SetDiscountParam::operator():discount="<< s << std::endl;} + double discount = -1.0; + if( _m_po->_m_lp_type == DOUBLE ) + { + if(DEBUG_POMDP_PARSE){std::cout <<"last parsed number was a DOUBLE"<< + "(_m_po->_m_lp_double = "<<_m_po->_m_lp_double<<")\n";} + discount = _m_po->_m_lp_double; + } + else if (_m_po->_m_lp_type == INT) + { + if(DEBUG_POMDP_PARSE){std::cout <<"last parsed number was a INT"<< + "(_m_po->_m_lp_int = "<<_m_po->_m_lp_int<<")\n";} + //the discount was parsed as an int + discount = (double) _m_po->_m_lp_int; + } + else if (_m_po->_m_lp_type == UINT) + { + if(DEBUG_POMDP_PARSE){std::cout <<"last parsed number was a UINT"<< + "(_m_po->_m_lp_uint = "<<_m_po->_m_lp_uint<<")\n";} + //the discount was parsed as an int + discount = (double) _m_po->_m_lp_uint; + } + else + { + std::cerr << "WARNING, discount not parsed correctly?!?, _m_po->_m_lp_type : " << _m_po->_m_lp_type << std::endl; + } + + if(DEBUG_POMDP_PARSE){std::cout <<"dp_SetDiscountParam - discount=" << discount << std::endl;} + _m_po->GetPOMDPDiscrete()->SetDiscount(discount); + } + }; + //value param + static void vp_value_tail(iterator_t str, iterator_t end); + struct vt_REWARDTOK + { + ParserPOMDPFormat_Spirit* _m_po; + vt_REWARDTOK (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetPOMDPDiscrete()->SetRewardType(REWARD); + } + }; + struct vt_COSTTOK + { + ParserPOMDPFormat_Spirit* _m_po; + vt_COSTTOK (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + _m_po->GetPOMDPDiscrete()->SetRewardType(COST); + } + }; + //This is the functor which is called from the grammar (i.e. + //dpomdp.spirit) to add the parsed state (names) to the problem. + struct AddState + { + ParserPOMDPFormat_Spirit* _m_po; + AddState(ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t first, iterator_t last) const + { + if(DEBUG_POMDP_PARSE){std::cout << "AddState - adding state \""<< + _m_po->_m_lp_string<<"\"\n";} + _m_po->GetPOMDPDiscrete()->AddState(_m_po->_m_lp_string); + } + }; + + struct POMDPFileParser : public grammar + { + ParserPOMDPFormat_Spirit* _m_parserObject; + POMDPDiscrete* _m_problem; + POMDPFileParser(ParserPOMDPFormat_Spirit* parserObject) + { + _m_parserObject = parserObject; + _m_problem = _m_parserObject->GetPOMDPDiscrete(); + } + template + struct definition + { +#if SUBGRAMMAR +#include "sub_grammar_defs.h" + agentstok_parser AGENTSTOK; + discounttok_parser DISCOUNTTOK; + valuestok_parser VALUESTOK; + statestok_parser STATESTOK; + actionstok_parser ACTIONSTOK; + observationstok_parser OBSERVATIONSTOK; + ttok_parser TTOK; + otok_parser OTOK; + rtok_parser RTOK; + //the parsers that need a reference: + uniformtok_parser UNIFORMTOK; + inttok_parser INTTOK; +#endif + definition(POMDPFileParser const& self) +#if SUBGRAMMAR + : + INTTOK(self._m_parserObject), + UNIFORMTOK(self._m_parserObject) +#endif + { +#include "pomdp.spirit" + } + +#if SUBGRAMMAR == 0 +rule AGENTSTOK, DISCOUNTTOK,VALUESTOK,STATESTOK,ACTIONSTOK,OBSERVATIONSTOK,TTOK,OTOK,RTOK,UNIFORMTOK, INTTOK; +#endif +rule + EOLTOK, IDENTITYTOK, REWARDTOK, COSTTOK, STARTTOK, INCLUDETOK, + EXCLUDETOK, RESETTOK, COLONTOK, ASTERICKTOK, PLUSTOK, MINUSTOK, + FLOATTOK, STRINGTOK, + pomdp_file, preamble, preamble_unordered, preamble_item, agents_param, + discount_param, value_param, value_tail, state_param, state_tail, + action_param, action_tail, obs_param, obs_tail, start_state, + start_state_list, param_list, param_spec, trans_prob_spec, trans_spec_tail, + obs_prob_spec, obs_spec_tail, reward_spec, reward_spec_tail, ui_matrix, + u_matrix, u_row, prob, action_param_line, obs_param_line, joint_obs, obs, + joint_action, paction, state_or_indiv_act_or_obs, agents_tail, prob_row_vector, + num_row_vector, reserved_word, state_list, float_r, int_r, action_list, + obs_list, floats_matrix, floats_row_vector, pomdp, from_state, to_state + ; + rule const& + start() const { return pomdp_file; } + }; + }; + + + protected: + + public: + + // Constructor, destructor and copy assignment. + /// (default) Constructor + ParserPOMDPFormat_Spirit(POMDPDiscrete* problem=0); + // Copy constructor. + //ParserPOMDPFormat_Spirit(const ParserPOMDPFormat_Spirit& a); + // Destructor. + //~ParserPOMDPFormat_Spirit(); + + //operators: + + //data manipulation (set) functions: + /**The function that starts the parsing.*/ + void Parse(); + + //get (data) functions: + + + ///The last parsed ... + unsigned int _m_lp_uint; + int _m_lp_int; + double _m_lp_double; + std::string _m_lp_string; + ///The type (INT or DOUBLE) of the last parsed number. + parsed_t _m_lp_type; + ///Whether the last 'number' used an optional sign ('+' or '-') + bool _m_lp_number_used_opt_sign; + ///Whether the last optional sign was a '-' + bool _m_lp_number_negated; + + //the number of agents - used often therefore also stored here + size_t _m_nrA; + //the current agent index. + Index _m_curAI; + //The following are special indices denoting any state, (joint) action + //or (joint) observation + Index _m_anyJAIndex; + Index _m_anyJOIndex; + Index _m_anyStateIndex; + + std::vector< std::vector > _m_curMatrix; + bool _m_matrixModified; + struct ResetCurMatrix //TODO: move to proper place + { + ParserPOMDPFormat_Spirit* _m_po; + ResetCurMatrix (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + if(DEBUG_POMDP_PARSE) + std::cout << "ResetCurMatrix() called... "; + if(_m_po->_m_matrixModified) + { + if(DEBUG_POMDP_PARSE) + std::cout << "resetting current matrix..." ; + + _m_po->ClearCurMatrix(); + } + if(DEBUG_POMDP_PARSE) + std::cout << std::endl; + } + }; + void ClearCurMatrix()//TODO:move to proper place... + { + _m_curMatrix.clear();//no pointers stored, so this should not leak + _m_curMatrix.push_back( std::vector() ); + _m_matrixModified = false; + } + std::string PrintDimensionOfMatrix() + { + std::stringstream ss; + ss << _m_curMatrix.size() << "rows, of size: "; + for(Index r=0; r<_m_curMatrix.size(); r++) + ss << _m_curMatrix[r].size() << ", "; + return(ss.str()); + } + bool IsDimensionOfMatrix(size_t rows, size_t cols) + { + if(_m_curMatrix.size() != rows + 1) + return false; + for(Index r=0; r _m_curIdentList; + bool _m_identListModified; + struct ResetCurIdentList //TODO: move to proper place + { + ParserPOMDPFormat_Spirit* _m_po; + ResetCurIdentList (ParserPOMDPFormat_Spirit* po){_m_po = po;} + void operator()(iterator_t str, iterator_t end) const + { + //TODO:check whether the previous matrix was consistent + if(_m_po->_m_identListModified) + _m_po->ClearCurIdentList(); + } + }; + void ClearCurIdentList()//TODO:move to proper place... + { + _m_curIdentList.clear();//no pointers stored, should not leak + _m_identListModified = false; + } + + /**A vector in which the currently parsed individual action indices + * of a joint action are stored. (by StoreLPAction). + * wild-cards (asterik) are stored as _m_anyJAIndex . */ + std::vector _m_lp_JA; + /**similar for the observations...*/ + std::vector _m_lp_JO; + + /**A vector that stores the indices of joint actions that match the + * last parsed joint action. (if the last parsed joint action didn't + * contain any wildcards, the size of this vector is 1.) + * This vector is constructed by StoreLPJointAction by transforming + * the above vector (_m_lp_JA).*/ + std::vector _m_lp_JAI; + /**similar for the joint observations...*/ + std::vector _m_lp_JOI; + + /**A vector that stores the last parsed from-state index. + * Contrary to _m_lp_JAI above, this vector contains only 1 element,\ + * which can be the ANY_INDEX (_m_anyStateIndex) . This is more + * convenient, as it allows + * easier selection of the proper AddReward and AddObservation + * functions.*/ + std::vector _m_lp_fromSI; + /**idem, but for to-state.*/ + std::vector _m_lp_toSI; + + /**A vector used to store the state indices for the start state + * specification.*/ + std::vector _m_startStateListSI; + /**A boolean indicating whether the states in the start state list + * should be excluded (versus the default: uniform prob. over the + * specified states). */ + bool _m_startStateListExclude; + + /**Pointer to the first iterator. This is used to give informative error messages. + * (Specifically the position of the error.*/ + iterator_t* _m_first; +#if 0 + private: + + protected: + + public: + // Constructor, destructor and copy assignment. + /// (default) Constructor + ParserPOMDPFormat_Spirit(); + /// Copy constructor. + ParserPOMDPFormat_Spirit(const ParserPOMDPFormat_Spirit& a); + /// Destructor. + ~ParserPOMDPFormat_Spirit(); + /// Copy assignment operator + ParserPOMDPFormat_Spirit& operator= (const ParserPOMDPFormat_Spirit& o); + + //operators: + + //data manipulation (set) functions: + + //get (data) functions: +#endif +}; + +}// end namespace POMDPFormatParsing + +#endif /* !_PARSERPOMDPFORMAT_H_ */ + +// Local Variables: *** +// mode:c++ *** +// End: *** diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/dpomdp.spirit b/payntbind/src/synthesis/decpomdp/madp/src/parser/dpomdp.spirit new file mode 100644 index 000000000..8d7908938 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/dpomdp.spirit @@ -0,0 +1,480 @@ +//we can't simply skip end of lines as they have semantics in the multi-agent +//case - this means we have to explicitly parse the eol's +EOLTOK + = eol_p; + +#if SUBGRAMMAR == 0 +AGENTSTOK //new for DECENTRALIZED pomdp + = str_p("agents"); +DISCOUNTTOK + = str_p("discount"); +VALUESTOK + = str_p("values"); +STATESTOK + = str_p("states"); +ACTIONSTOK + = str_p("actions"); +OBSERVATIONSTOK + = str_p("observations"); +TTOK + = str_p("T"); +OTOK + = str_p("O"); +RTOK + = str_p("R"); +UNIFORMTOK + = EOLTOK >> str_p("uniform") + [SetLastParsedType(self._m_parserObject, UNIFORM)]; +INTTOK + = uint_p[StoreLastParsedElement(self._m_parserObject)]; +#endif +IDENTITYTOK + = EOLTOK >> str_p("identity") + [SetLastParsedType(self._m_parserObject, IDENTITY)]; +REWARDTOK + = str_p("reward"); +COSTTOK + = str_p("cost"); +STARTTOK + = str_p("start"); +INCLUDETOK + = str_p("include"); +EXCLUDETOK + = str_p("exclude"); +RESETTOK + = str_p("reset"); +COLONTOK + = str_p(":"); +ASTERICKTOK + = str_p("*")[SetLastParsedType(self._m_parserObject, ASTERICK)]; +PLUSTOK + = str_p("+"); +MINUSTOK + = str_p("-"); +FLOATTOK + = real_p[StoreLastParsedElement(self._m_parserObject)]; + //([0-9]+ \. [0-9]* | \. [0-9]+ | [0-9]+ ) ([eE] [+-]? [0-9]+)? +STRINGTOK + = (lexeme_d[alpha_p >> *(alnum_p | ch_p('_') | ch_p('-') ) ] + - reserved_word)[StoreLastParsedElement(self._m_parserObject)]; + ; + +reserved_word + = AGENTSTOK | DISCOUNTTOK | ACTIONSTOK | OBSERVATIONSTOK | TTOK | OTOK + | RTOK + ; + +dpomdp_file + = eps_p[Initialize(self._m_parserObject)] + >> dpomdp[InitializeDPOMDP(self._m_parserObject)] + ; + +dpomdp + = //NOTE start_state has been moved to a fixed place in the preamble + //cassandra's def: preamble start_state param_list + preamble[AddModels(self._m_parserObject)] + >> param_list + ; + +preamble + = //NOTE the order of the preamble is now fixed + //cassandra's def: preamble param_type | /* empty */ + agents_param[DebugOutput("preamble_agents_param")] >> + discount_param[DebugOutput("preamble_discount_param")] >> + value_param[DebugOutput("preamble_value_param")] >> + state_param[DebugOutput("preamble_state_param")] >> + start_state[DebugOutput("preamble_start_state")] + [InitializeStates(self._m_parserObject)] >> + action_param[DebugOutput("preamble_action_param")] + [InitializeActions(self._m_parserObject)] >> + obs_param[DebugOutput("preamble_obs_param")] + [InitializeObservations(self._m_parserObject)] +; +agents_param + = AGENTSTOK[DebugOutput("ap_AGENTSTOK")] >> COLONTOK >> agents_tail >> + EOLTOK + ; +agents_tail + = INTTOK [SetNrAgents(self._m_parserObject)] + | ( + +( STRINGTOK[DebugOutput("il_STRINGTOK_next")] + [NextStringOfIdentList(self._m_parserObject)] + ) + )[AddAgents(self._m_parserObject)] + ; +discount_param + = DISCOUNTTOK >> COLONTOK >> FLOATTOK[dp_SetDiscountParam(self. + _m_parserObject)] >> EOLTOK + ; + +value_param + = VALUESTOK >> COLONTOK >> value_tail[DebugOutput("vp_value_tail")] >> EOLTOK + ; + +value_tail + = REWARDTOK[vt_REWARDTOK(self._m_parserObject)] + | COSTTOK[vt_COSTTOK(self._m_parserObject)] + ; + +state_param + = STATESTOK[SetNrStates(self._m_parserObject,0)] >> COLONTOK >> + state_tail[DebugOutput("sp_state_tail")] >> EOLTOK + ; + +state_tail + = INTTOK[SetNrStates(self._m_parserObject)] + [DebugOutput("st_INTTOK")] + | state_list[DebugOutput("st_ident_list")] + ; + +state_list + = // ident_list >> STRINGTOK | STRINGTOK + +( STRINGTOK[AddState(self._m_parserObject)] ) + ; + + +//new - each agent has its actions on its own line +action_param + = ACTIONSTOK[SetAgentIndex(self._m_parserObject, 0)] >> COLONTOK >> + +(EOLTOK >> action_param_line[DebugOutput("ap_action_param_line")]) + >> EOLTOK + ; +action_param_line + = action_tail[DebugOutput("apl_action_tail")][SetNextAgentIndex( + self._m_parserObject)] + ; +action_tail + = INTTOK[SetNrActions(self._m_parserObject)] + | action_list[DebugOutput("at_ident_list")] + ; +action_list + = +( STRINGTOK[AddAction(self._m_parserObject)] ) + ; + +obs_param + = OBSERVATIONSTOK[SetAgentIndex(self._m_parserObject, 0)] >> COLONTOK >> + +(EOLTOK >> obs_param_line[DebugOutput("op_obs_param_line")]) >> + EOLTOK + ; +obs_param_line + = obs_tail[DebugOutput("opl_obs_tail")][SetNextAgentIndex( + self._m_parserObject)] + ; +obs_tail + = INTTOK[SetNrObservations(self._m_parserObject)] + | obs_list[DebugOutput("ot_ident_list")] + ; +obs_list + = +( STRINGTOK[AddObservation(self._m_parserObject)] ) + ; +start_state + = + ( + STARTTOK[ResetCurMatrix(self._m_parserObject)] >> + ( + ( COLONTOK >> + ( + u_matrix[StartStateRowProbs(self._m_parserObject)] + | + STRINGTOK[AddStartState(self._m_parserObject)] + [ProcessStartStateList(self._m_parserObject)] + | + INTTOK[AddStartState(self._m_parserObject)] + [ProcessStartStateList(self._m_parserObject)] + ) + ) + | (INCLUDETOK >> COLONTOK >> start_state_list + [ProcessStartStateList(self._m_parserObject)]) + | (EXCLUDETOK[StartStateExludes(self._m_parserObject)] + >> COLONTOK >> start_state_list + [ProcessStartStateList(self._m_parserObject)]) + ) + ) >> EOLTOK +; + +/*changed to remove left recursion: */ +start_state_list + = // cassandra's def: start_state_list state_or_indiv_act_or_obs | state_or_indiv_act_or_obs + // easy substitute: +(state_or_indiv_act_or_obs) + // NOTE - this allows '*' as states here... + // we don't want that, so : + +(STRINGTOK[AddStartState(self._m_parserObject)] + [DebugOutput("ssl_STRINGTOK")] + | INTTOK[AddStartState(self._m_parserObject)] + [DebugOutput("ssl_INTTOK")]) + ; + +param_list + = *(param_spec) + ; + +param_spec + = trans_prob_spec + | obs_prob_spec + | reward_spec +; + +trans_prob_spec + = TTOK[DebugOutput("tps_TTOK")][ResetCurMatrix(self._m_parserObject)] >> COLONTOK >> trans_spec_tail >> EOLTOK + ; + +trans_spec_tail + = joint_action[DebugOutput("tst_joint_action")] + >> + COLONTOK[DebugOutput("tst_COLON")] + >> + ( + ( + from_state[DebugOutput("tst_statefrom")] >> COLONTOK >> + ( + to_state[DebugOutput("tst_stateto")] >> COLONTOK >> prob + [DebugOutput("tst_prob")] + [ProcessTProb(self._m_parserObject)] + | u_matrix[DebugOutput("tst_u_matrix")] + [ProcessTRow(self._m_parserObject)] + ) + ) + | ui_matrix[DebugOutput("tst_ui_matrix")] + [ProcessTMatrix(self._m_parserObject)] + ) + +; + +obs_prob_spec + = OTOK[DebugOutput("ops_OTOK")][ResetCurMatrix(self._m_parserObject)] >> COLONTOK >> obs_spec_tail >> EOLTOK + ; + +obs_spec_tail + = + joint_action[DebugOutput("ost_joint_action")] + >> + COLONTOK + >> + ( + ( + to_state[DebugOutput("ost_stateto")] >> COLONTOK >> + ( + joint_obs[DebugOutput("ost_joint_obs")] >> COLONTOK >> prob + [DebugOutput("ost_prob")] + [ProcessOProb(self._m_parserObject)] + | + u_matrix[DebugOutput("ost_u_matrix")] + [ProcessORow(self._m_parserObject)] + ) + ) + | u_matrix[DebugOutput("ost_u_matrix")] + [ProcessOMatrix(self._m_parserObject)] + ) + ; + +reward_spec + = RTOK[DebugOutput("rs_rtok")][ResetCurMatrix(self._m_parserObject)] >> COLONTOK >> reward_spec_tail >> EOLTOK + ; + +reward_spec_tail + = joint_action[DebugOutput("rst_joint_action")] + >> COLONTOK + >> from_state[DebugOutput("rst_statefrom")] + >> COLONTOK + >> + ( + ( + to_state[DebugOutput("rst_stateto")] + >> COLONTOK + >> + ( + joint_obs[DebugOutput("rst_jointobs")] >> COLONTOK + >> FLOATTOK[DebugOutput("rst_FLOATTOK")] + [ProcessR(self._m_parserObject)] + | + floats_matrix + [DebugOutput("rst_floats_matrix")] + [ProcessRRow(self._m_parserObject)] + + ) + ) + | + floats_matrix + [DebugOutput("rst_floats_matrix")] + [ProcessRMatrix(self._m_parserObject)] + ) + ; + +ui_matrix + = + ( + UNIFORMTOK[DebugOutput("ui_matrix UNIFORMTOK ")] + | IDENTITYTOK + | floats_matrix + ) + ; + +u_matrix + = UNIFORMTOK[DebugOutput("um_UNIFORMTOK")] +// | RESETTOK - NOTE:reset is not documented by cassandra, therefore +// omitted here + | floats_matrix[DebugOutput("um_floats_matrix")] + ; + +floats_matrix + = //floats_matrix >> floats | floats + +(floats_row_vector[DebugOutput("pm_floats_row_vector")] + [NextRowOfMatrix(self._m_parserObject)]) + ; +floats_row_vector + = + EOLTOK >> +(FLOATTOK[NextFloatOfRow(self._m_parserObject)]) + ; + + +//difference between prob and num is deprecated - only parse doubles +// and do typechecking when putting data in model...(easier) + +/* +prob_matrix + = //prob_matrix >> prob | prob + +(prob_row_vector[DebugOutput("pm_prob_row_vector")][NextRowOfMatrix( + self._m_parserObject)]) + ; +prob_row_vector + = + EOLTOK >> +(prob[DebugOutput("prv_prob")][NextProbOfRow(self._m_parserObject)]) + ; + +num_matrix + = //num_matrix >> number | number + +(num_row_vector[DebugOutput("nm_num_row_vector")]) + ; + +num_row_vector + = EOLTOK >> +(number[DebugOutput("nrv_number")]) + ; + +*/ + +from_state + = state_or_indiv_act_or_obs[StoreLPFromState(self._m_parserObject)] + ; +to_state + = state_or_indiv_act_or_obs[StoreLPToState(self._m_parserObject)] + ; +/* +state_or_indiv_act_or_obs + = INTTOK + | STRINGTOK + | ASTERICKTOK + ; +*/ +/*in order to discriminate between a joint action specified by + * 1) a joint action index (or '*') + * and + * 2) multiple individual actions (indices, '*'s or action names) + * + * we have to require at least two individual indices in case (2).*/ +joint_action + = ( state_or_indiv_act_or_obs[StoreLPAction(self._m_parserObject)] >> + +(state_or_indiv_act_or_obs[StoreLPAction(self._m_parserObject)]) + )[StoreLPJointAction(self._m_parserObject)] + | INTTOK[StoreLPJointAction(self._m_parserObject, true)] + | ASTERICKTOK + [WildCardJointAction(self._m_parserObject)] + [StoreLPJointAction(self._m_parserObject)] + + //NYI(" the * joint action ")]//to indicate any joint action... + ; + +/* the parsed sub-rules set the "_m_lp_type" variable, which is used by + * StoreLPAction above. Therefore, no functions need to be called here.*/ +state_or_indiv_act_or_obs + = INTTOK + | STRINGTOK + | ASTERICKTOK //[NYI(" the * individual action ")] + ; +joint_obs + = //first try to catch joint actions specified as "* * ... *" which + //denote all joint observations + // (we don't want to expand these individual joint observations...) + ( ASTERICKTOK >> +ASTERICKTOK ) + [WildCardJointObservation(self._m_parserObject)] + | + //the regular case of specifying individual observation(indice)s + ( state_or_indiv_act_or_obs[StoreLPObservation(self._m_parserObject)] >> + +(state_or_indiv_act_or_obs[StoreLPObservation(self._m_parserObject)]) + )[StoreLPJointObservation(self._m_parserObject)] + | + //a joint observation index + INTTOK[StoreLPJointObservation(self._m_parserObject, true)] + | + //a single '*' denoting all joint observations: + ASTERICKTOK + [WildCardJointObservation(self._m_parserObject)] + + //NYI(" the * joint obs ")]//to indicate any joint obs... + ; +/* +state_or_indiv_act_or_obs + = INTTOK + | STRINGTOK + | ASTERICKTOK + ; + +*/ +/* old +joint_obs + = +(state_or_indiv_act_or_obs) + | ASTERICKTOK //to indicate any joint observation... + ; + +state_or_indiv_act_or_obs + = INTTOK + | STRINGTOK + | ASTERICKTOK + ; + +ident_list + = // ident_list >> STRINGTOK | STRINGTOK + +( STRINGTOK[DebugOutput("il_STRINGTOK_next")] + [NextStringOfIdentList(self._m_parserObject)] ) + ; +*/ + +prob + = FLOATTOK // try float first -> don't want to be left with ".024" + | INTTOK + ; + + + +//XXX: +// +//this works: +//R: open-left open-left : tiger-right : * : * +20 +//but this doesn't: +//R: open-right open-right : tiger-left : * : * 20 + +/* +number + // try float first -> don't want to be left with ".024" + = float_r + | int_r + ; + +float_r + = FLOATTOK[n_SetLPNumberType(self._m_parserObject,DOUBLE)] + ; + +int_r + = (optional_sign[n_SetLPNumUsedOptSign(self._m_parserObject,true)]) >> + INTTOK[n_SetLPNumberType(self._m_parserObject,INT)] + [n_ProcessNegation(self._m_parserObject)] + | INTTOK[n_SetLPNumUsedOptSign(self._m_parserObject,false)] + [n_SetLPNumberType(self._m_parserObject,INT)] + ; + +optional_sign + = PLUSTOK[os_SetNegated(self._m_parserObject,false)] + | MINUSTOK[os_SetNegated(self._m_parserObject,true)] +// | eps_p[DebugOutput("optional_sign_epsmatch")] +; + +*/ diff --git a/payntbind/src/synthesis/decpomdp/madp/src/parser/pomdp.spirit b/payntbind/src/synthesis/decpomdp/madp/src/parser/pomdp.spirit new file mode 100644 index 000000000..c52cf12c6 --- /dev/null +++ b/payntbind/src/synthesis/decpomdp/madp/src/parser/pomdp.spirit @@ -0,0 +1,596 @@ +#if SUBGRAMMAR == 0 +AGENTSTOK //new for DECENTRALIZED pomdp + = str_p("agents"); +DISCOUNTTOK + = str_p("discount"); +VALUESTOK + = str_p("values"); +STATESTOK + = str_p("states"); +ACTIONSTOK + = str_p("actions")[DebugOutput("'actions' keyword")]; +OBSERVATIONSTOK + = str_p("observations")[DebugOutput("'observations' keyword")]; +TTOK + = str_p("T")[DebugOutput("'T' keyword")]; +OTOK + = str_p("O")[DebugOutput("'O' keyword")]; +RTOK + = str_p("R")[DebugOutput("'R' keyword")]; +UNIFORMTOK + = str_p("uniform")[SetLastParsedType(self._m_parserObject, UNIFORM)]; +INTTOK + = uint_p[StoreLastParsedElement(self._m_parserObject)]; +#endif +IDENTITYTOK + = str_p("identity")[SetLastParsedType(self._m_parserObject, IDENTITY)]; +REWARDTOK + = str_p("reward"); +COSTTOK + = str_p("cost"); +STARTTOK + = str_p("start")[DebugOutput("'start' keyword")]; +INCLUDETOK + = str_p("include"); +EXCLUDETOK + = str_p("exclude"); +RESETTOK + = str_p("reset"); +COLONTOK + = str_p(":"); +ASTERICKTOK + = str_p("*")[SetLastParsedType(self._m_parserObject, ASTERICK)]; +PLUSTOK + = str_p("+"); +MINUSTOK + = str_p("-"); +FLOATTOK + = real_p[StoreLastParsedElement(self._m_parserObject)][DebugOutput("FLOATTOK")]; + //([0-9]+ \. [0-9]* | \. [0-9]+ | [0-9]+ ) ([eE] [+-]? [0-9]+)? +STRINGTOK + = (lexeme_d[alpha_p >> *(alnum_p | ch_p('_') | ch_p('-') ) ] + - reserved_word)[StoreLastParsedElement(self._m_parserObject)]; + ; + +reserved_word + = AGENTSTOK | DISCOUNTTOK | ACTIONSTOK | OBSERVATIONSTOK | TTOK | OTOK | RTOK | STARTTOK + ; + +pomdp_file + = eps_p[DebugOutput("start of pomdp file")][Initialize(self._m_parserObject)] + >> pomdp[DebugOutput("pomdp")][InitializePOMDP(self._m_parserObject)] + ; +/* +pomdp_file + = //NOTE start_state has been moved to a fixed place in the preamble + //cassandra's def: preamble start_state param_list + preamble + >> + param_list + ; +*/ + +pomdp + = + eps_p[DebugOutput("setting number of agents...")][SetNrAgents(self._m_parserObject)] >> + preamble_unordered[DebugOutput("preamble_unordered")][AddModels(self._m_parserObject)] + >> + //start_state is optional: + !start_state[DebugOutput("preamble_start_state")][InitializeStates(self._m_parserObject)] + >> + param_list[DebugOutput("param_list")] + ; + +preamble_unordered + = // 5 lines for preamble: + preamble_item >> + preamble_item >> + preamble_item >> + preamble_item >> + preamble_item + ; + + +preamble_item + = + discount_param[DebugOutput("preamble_discount_param")] | + value_param[DebugOutput("preamble_value_param")] | + state_param[DebugOutput("preamble_state_param")] | + action_param[DebugOutput("preamble_action_param")][InitializeActions(self._m_parserObject)] | + obs_param[DebugOutput("preamble_obs_param")][InitializeObservations(self._m_parserObject)] +; +preamble + = //NOTE the order of the preamble is now fixed + //cassandra's def: preamble param_type | /* empty */ + eps_p[DebugOutput("preamble: setting number of agents...")][SetNrAgents(self._m_parserObject)] >> + discount_param[DebugOutput("preamble_discount_param")] >> + value_param[DebugOutput("preamble_value_param")] >> + state_param[DebugOutput("preamble_state_param")] >> + + //this is a much more logical place for start state and compatible with the .dpomdp file format: + //(but of course not with Cassandra's format...) + !start_state[DebugOutput("preamble_start_state")][InitializeStates(self._m_parserObject)] >> + + action_param[DebugOutput("preamble_action_param")][InitializeActions(self._m_parserObject)] >> + obs_param[DebugOutput("preamble_obs_param")][InitializeObservations(self._m_parserObject)] +; + +//preamble +// = //NOTE the order of the preamble is now fixed +// //cassandra's def: preamble param_type | /* empty */ +// agents_param >> +// discount_param >> +// value_param >> +// state_param >> +// start_state >> +// action_param >> +// obs_param +//; + +/* deprecated +param_type = discount_param + | value_param + | state_param + | action_param + | obs_param +;*/ + +/* new:*/ +//agents_param +// = AGENTSTOK >> COLONTOK >> INTTOK +// = AGENTSTOK[DebugOutput("ap_AGENTSTOK")] >> COLONTOK >> agents_tail >> +// ; + +discount_param +// = DISCOUNTTOK >> COLONTOK >> number + = DISCOUNTTOK >> COLONTOK >> FLOATTOK[dp_SetDiscountParam(self._m_parserObject)] + ; + +value_param + = VALUESTOK >> COLONTOK >> value_tail[DebugOutput("vp_value_tail")] +// = VALUESTOK >> COLONTOK >> value_tail + ; + +value_tail +// = REWARDTOK +// | COSTTOK + = REWARDTOK[vt_REWARDTOK(self._m_parserObject)] + | COSTTOK[vt_COSTTOK(self._m_parserObject)] + ; + +state_param +// = STATESTOK >> COLONTOK >> state_tail + = STATESTOK[SetNrStates(self._m_parserObject,0)] >> COLONTOK >> + state_tail[DebugOutput("sp_state_tail")] + ; + +state_tail +// = INTTOK +// | ident_list + = INTTOK[SetNrStates(self._m_parserObject)] + [DebugOutput("st_INTTOK")] + | state_list[DebugOutput("st_ident_list")] + ; + +state_list + = // ident_list >> STRINGTOK | STRINGTOK + +( STRINGTOK[AddState(self._m_parserObject)] ) + ; + +//// for multiple agents, setting the action is more complex: +////new - each agent has its actions on its own line +//action_param +// = ACTIONSTOK[SetAgentIndex(self._m_parserObject, 0)] >> COLONTOK >> +// +(EOLTOK >> action_param_line[DebugOutput("ap_action_param_line")]) +// ; +//action_param_line +// = action_tail[DebugOutput("apl_action_tail")][SetNextAgentIndex( +// self._m_parserObject)] +// ; +//action_tail +// = INTTOK[SetNrActions(self._m_parserObject)] +// | action_list[DebugOutput("at_ident_list")] +// ; +//action_list +// = +( STRINGTOK[AddAction(self._m_parserObject)] ) +// ; +// +//// + +action_param + = ACTIONSTOK >> COLONTOK >> action_tail + ; + +action_tail +// = INTTOK +// | ident_list + = INTTOK[SetNrActions(self._m_parserObject)] + | action_list[DebugOutput("at_ident_list")] + ; +action_list + = +( STRINGTOK[AddAction(self._m_parserObject)] ) + ; + +obs_param +// = OBSERVATIONSTOK >> COLONTOK >> obs_param_tail + = OBSERVATIONSTOK[SetAgentIndex(self._m_parserObject, 0)] + >> COLONTOK + >> obs_tail +// +(EOLTOK >> obs_param_line[DebugOutput("op_obs_param_line")]) >> EOLTOK + + ; + +//obs_param_tail +// = INTTOK +// | ident_list +//; + +obs_tail + = INTTOK[SetNrObservations(self._m_parserObject)] + | obs_list[DebugOutput("ot_ident_list")] + ; +obs_list + = +( STRINGTOK[AddObservation(self._m_parserObject)] ) + ; + +start_state +// = STARTTOK >> COLONTOK >> u_matrix //distribution specified +// | STARTTOK >> COLONTOK >> STRINGTOK //name of start state +// | STARTTOK >> INCLUDETOK >> COLONTOK >> start_state_list +// | STARTTOK >> EXCLUDETOK >> COLONTOK >> start_state_list +// // empty not valid | /* empty */ + = + ( + STARTTOK[DebugOutput("STARTTOK")][ResetCurMatrix(self._m_parserObject)] >> + ( + ( COLONTOK[DebugOutput("COLONTOK")] >> + ( + u_matrix[DebugOutput("u_matrix")][StartStateRowProbs(self._m_parserObject)] + | + STRINGTOK[AddStartState(self._m_parserObject)] + [ProcessStartStateList(self._m_parserObject)] + | + INTTOK[AddStartState(self._m_parserObject)] + [ProcessStartStateList(self._m_parserObject)] + ) + ) + | (INCLUDETOK >> COLONTOK >> start_state_list + [ProcessStartStateList(self._m_parserObject)]) + | (EXCLUDETOK[StartStateExludes(self._m_parserObject)] + >> COLONTOK >> start_state_list + [ProcessStartStateList(self._m_parserObject)]) + ) + ) +; + + + +; + +/*changed to remove left recursion: */ +start_state_list + = // cassandra's def: start_state_list state | state + // easy substitute: +(state) + // NOTE - this allows '*' as states here... + // we don't want that, so : +// +(STRINGTOK) +// | +(INTTOK) + +(STRINGTOK[AddStartState(self._m_parserObject)][DebugOutput("ssl_STRINGTOK")] + | INTTOK[AddStartState(self._m_parserObject)][DebugOutput("ssl_INTTOK")]) + + ; + +/*remove left recursion...*/ +param_list + = // cassandra's def: param_list param_spec | /* empty */ + *(param_spec) + ; + +param_spec + = trans_prob_spec[DebugOutput("trans_prob_spec")] + | obs_prob_spec[DebugOutput("obs_prob_spec")] + | reward_spec[DebugOutput("reward_spec")] +; + +trans_prob_spec +// = TTOK >> COLONTOK >> trans_spec_tail + = TTOK[DebugOutput("tps_TTOK")][ResetCurMatrix(self._m_parserObject)] >> COLONTOK >> trans_spec_tail + ; + +trans_spec_tail +// = paction >> COLONTOK >> state >> COLONTOK >> state >> prob +// | paction >> COLONTOK >> state >> u_matrix +// | paction >> ui_matrix + = paction[DebugOutput("tst_paction")] + >> + ( + ( //case 1 or 2 + COLONTOK[DebugOutput("tst_COLON2")] + >> + from_state[DebugOutput("tst_statefrom")] + >> + ( + ( //case 1 + COLONTOK[DebugOutput("tst_COLON3")] + >> to_state[DebugOutput("tst_stateto")] + >> prob[DebugOutput("tst_prob")][ProcessTProb(self._m_parserObject)] + ) + | + ( //case 2 + u_matrix[DebugOutput("tst_u_matrix")][ProcessTRow(self._m_parserObject)] + ) + ) + ) + | + ( //case 3 + ui_matrix[DebugOutput("tst_ui_matrix")][ProcessTMatrix(self._m_parserObject)] + ) + ) +; + + +obs_prob_spec +// = OTOK >> COLONTOK >> obs_spec_tail + = OTOK[DebugOutput("ops_OTOK")][ResetCurMatrix(self._m_parserObject)] >> COLONTOK >> obs_spec_tail + ; + +obs_spec_tail +// = paction >> COLONTOK >> state >> COLONTOK >> obs >> prob +// | paction >> COLONTOK >> state >> u_matrix +// | paction >> u_matrix + = paction[DebugOutput("tst_paction")] + >> + ( + ( //case 1 or 2 + COLONTOK[DebugOutput("tst_COLON2")] + >> + to_state[DebugOutput("ost_stateto")] + >> + ( + ( //case 1 + COLONTOK[DebugOutput("tst_COLON3")] + >> obs[DebugOutput("ost_obs")] + >> prob[DebugOutput("ost_prob")][ProcessOProb(self._m_parserObject)] + ) + | + ( //case 2 + u_matrix[DebugOutput("ost_u_matrix")][ProcessORow(self._m_parserObject)] + ) + ) + ) + | + ( //case 3 + ui_matrix[DebugOutput("tst_ui_matrix")][ProcessOMatrix(self._m_parserObject)] + ) + ) +; + ; + +reward_spec +// = RTOK >> COLONTOK >> reward_spec_tail + = RTOK[DebugOutput("rs_rtok")][ResetCurMatrix(self._m_parserObject)] >> COLONTOK >> reward_spec_tail[DebugOutput("reward_spec_tail")] + ; + +reward_spec_tail +// = paction >> COLONTOK >> state >> COLONTOK >> state >> COLONTOK >> obs >> number +// | paction >> COLONTOK >> state >> COLONTOK >> state >> num_matrix +// | paction >> COLONTOK >> state >> num_matrix + = paction[DebugOutput("rst_paction")] + >> COLONTOK + >> from_state[DebugOutput("rst_statefrom")] + >> + ( + ( + COLONTOK + >> to_state[DebugOutput("rst_stateto")] + >> COLONTOK + >> + ( //case 1 + obs[DebugOutput("rst_obs")] + >> FLOATTOK[DebugOutput("rst_FLOATTOK")][ProcessR(self._m_parserObject)] + | + //case 2: + floats_matrix[DebugOutput("rst_floats_matrix")][ProcessRRow(self._m_parserObject)] + ) + ) + | + //case 3 + floats_matrix[DebugOutput("rst_floats_matrix")][ProcessRMatrix(self._m_parserObject)] + ) + ; + + ; + +ui_matrix +// = UNIFORMTOK +// | IDENTITYTOK +// | prob_matrixx + = + ( + UNIFORMTOK[DebugOutput("ui_matrix UNIFORMTOK ")] + | IDENTITYTOK + | floats_matrix + ) + ; + + +u_matrix +// = UNIFORMTOK +// | RESETTOK +// | prob_matrix + = UNIFORMTOK[DebugOutput("um_UNIFORMTOK")] +// | RESETTOK - NOTE:reset is not documented by cassandra, therefore +// omitted here + | floats_matrix[DebugOutput("um_floats_matrix")] + ; + +floats_matrix + = //floats_matrix >> floats | floats + +(floats_row_vector[DebugOutput("pm_floats_row_vector")][NextRowOfMatrix(self._m_parserObject)]) + ; +floats_row_vector + = + lexeme_d + [ + + + ( + *blank_p[DebugOutput("blank")] + >> + FLOATTOK[NextFloatOfRow(self._m_parserObject)] + ) +// >> +// *blank_p[DebugOutput("blank")] +// >> +// eol_p[DebugOutput("eol")] + ] + ; + + +//difference between prob and num is deprecated - only parse doubles +// and do typechecking when putting data in model...(easier) + +/* +prob_matrix + = //prob_matrix >> prob | prob + +(prob_row_vector[DebugOutput("pm_prob_row_vector")][NextRowOfMatrix( + self._m_parserObject)]) + ; +prob_row_vector + = + +(prob[DebugOutput("prv_prob")][NextProbOfRow(self._m_parserObject)]) + ; + +num_matrix + = //num_matrix >> number | number + +(num_row_vector[DebugOutput("nm_num_row_vector")]) + ; + +num_row_vector + = +(number[DebugOutput("nrv_number")]) + ; + +*/ + +from_state + = state_or_indiv_act_or_obs[StoreLPFromState(self._m_parserObject)] + ; +to_state + = state_or_indiv_act_or_obs[StoreLPToState(self._m_parserObject)] + ; + +/* +state + = INTTOK + | STRINGTOK + | ASTERICKTOK + ; +*/ +/* the parsed sub-rules set the "_m_lp_type" variable, which is used by + * StoreLPAction above. Therefore, no functions need to be called here.*/ +state_or_indiv_act_or_obs + = INTTOK + | STRINGTOK + | ASTERICKTOK + ; + +paction +//original +// = +// INTTOK +// | STRINGTOK +// | ASTERICKTOK +//but probably easier to just treat as a joint action with 1 element: + = state_or_indiv_act_or_obs[StoreLPAction(self._m_parserObject)][StoreLPJointAction(self._m_parserObject)] + | INTTOK[StoreLPJointAction(self._m_parserObject, true)] + | ASTERICKTOK[WildCardJointAction(self._m_parserObject)][StoreLPJointAction(self._m_parserObject)] + + + + +#if 0 +/*in order to discriminate between a joint action specified by + * 1) a joint action index (or '*') + * and + * 2) multiple individual actions (indices, '*'s or action names) + * + * we have to require at least two individual indices in case (2).*/ +joint_action + = ( state_or_indiv_act_or_obs[StoreLPAction(self._m_parserObject)] >> + +(state_or_indiv_act_or_obs[StoreLPAction(self._m_parserObject)]) + )[StoreLPJointAction(self._m_parserObject)] + | INTTOK[StoreLPJointAction(self._m_parserObject, true)] + | ASTERICKTOK[WildCardJointAction(self._m_parserObject)][StoreLPJointAction(self._m_parserObject)] + + //NYI(" the * joint action ")]//to indicate any joint action... + ; + +joint_obs + = //first try to catch joint actions specified as "* * ... *" which + //denote all joint observations + // (we don't want to expand these individual joint observations...) + ( ASTERICKTOK >> +ASTERICKTOK ) + [WildCardJointObservation(self._m_parserObject)] + | + //the regular case of specifying individual observation(indice)s + ( state_or_indiv_act_or_obs[StoreLPObservation(self._m_parserObject)] >> + +(state_or_indiv_act_or_obs[StoreLPObservation(self._m_parserObject)]) + )[StoreLPJointObservation(self._m_parserObject)] + | + //a joint observation index + INTTOK[StoreLPJointObservation(self._m_parserObject, true)] + | + //a single '*' denoting all joint observations: + ASTERICKTOK[WildCardJointObservation(self._m_parserObject)] + + //NYI(" the * joint obs ")]//to indicate any joint obs... + ; +#endif + ; + +obs +// = INTTOK +// | STRINGTOK +// | ASTERICKTOK +//but probably easier to just treat as a joint observation with 1 element: + = state_or_indiv_act_or_obs[StoreLPObservation(self._m_parserObject)][StoreLPJointObservation(self._m_parserObject)] + | INTTOK[StoreLPJointAction(self._m_parserObject, true)] + | ASTERICKTOK[WildCardJointAction(self._m_parserObject)][StoreLPJointAction(self._m_parserObject)] + ; + +//ident_list +// = // ident_list >> STRINGTOK | STRINGTOK +//// +(STRINGTOK) +// +( STRINGTOK[DebugOutput("il_STRINGTOK_next")][NextStringOfIdentList(self._m_parserObject)] ) +// ; + +prob + = FLOATTOK // try float first -> don't want to be left with ".024" + | INTTOK + ; + +/* +number + // try float first -> don't want to be left with ".024" + = float_r + | int_r + ; + +float_r + = FLOATTOK[n_SetLPNumberType(self._m_parserObject,DOUBLE)] + ; + +int_r + = (optional_sign[n_SetLPNumUsedOptSign(self._m_parserObject,true)]) >> + INTTOK[n_SetLPNumberType(self._m_parserObject,INT)] + [n_ProcessNegation(self._m_parserObject)] + | INTTOK[n_SetLPNumUsedOptSign(self._m_parserObject,false)] + [n_SetLPNumberType(self._m_parserObject,INT)] + ; + +optional_sign + = PLUSTOK[os_SetNegated(self._m_parserObject,false)] + | MINUSTOK[os_SetNegated(self._m_parserObject,true)] +// | eps_p[DebugOutput("optional_sign_epsmatch")] +; + +*/ diff --git a/payntbind/src/synthesis/helpers.cpp b/payntbind/src/synthesis/helpers.cpp new file mode 100644 index 000000000..00b6413ee --- /dev/null +++ b/payntbind/src/synthesis/helpers.cpp @@ -0,0 +1,62 @@ +#include "synthesis.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace synthesis { + +template +std::shared_ptr transformUntilToEventually( + storm::logic::Formula const& formula +) { + auto const& of = formula.asOperatorFormula(); + bool is_reward = of.isRewardOperatorFormula(); + + auto ef = std::make_shared( + of.getSubformula().asUntilFormula().getRightSubformula().asSharedPointer(), + !is_reward ? storm::logic::FormulaContext::Probability : storm::logic::FormulaContext::Reward); + + std::shared_ptr modified_formula; + if(!is_reward) { + modified_formula = std::make_shared(ef, of.getOperatorInformation()); + } else { + modified_formula = std::make_shared(ef, of.asRewardOperatorFormula().getRewardModelName(), of.getOperatorInformation()); + } + + return modified_formula; +} + +} + + +void define_helpers(py::module& m) { + + m.def("set_loglevel_off", []() { storm::utility::setLogLevel(l3pp::LogLevel::OFF); }, "set loglevel for storm to off"); + + m.def("set_precision_native", [](storm::NativeSolverEnvironment& nsenv, double value) { + nsenv.setPrecision(storm::utility::convertNumber(value)); + }); + m.def("set_precision_minmax", [](storm::MinMaxSolverEnvironment& nsenv, double value) { + nsenv.setPrecision(storm::utility::convertNumber(value)); + }); + + m.def("transform_until_to_eventually", &synthesis::transformUntilToEventually, py::arg("formula")); + + m.def("multiply_with_vector", [] (storm::storage::SparseMatrix matrix,std::vector vector) { + std::vector result(matrix.getRowCount()); + matrix.multiplyWithVector(vector, result); + return result; + }, py::arg("matrix"), py::arg("vector")); + +} + diff --git a/payntbind/src/synthesis/pomdp/PomdpManager.cpp b/payntbind/src/synthesis/pomdp/PomdpManager.cpp new file mode 100644 index 000000000..eaac2756d --- /dev/null +++ b/payntbind/src/synthesis/pomdp/PomdpManager.cpp @@ -0,0 +1,318 @@ +#include "PomdpManager.h" + +#include "storm/exceptions/InvalidArgumentException.h" +#include "storm/exceptions/NotSupportedException.h" +#include "storm/storage/sparse/ModelComponents.h" +#include "storm/storage/SparseMatrix.h" +#include "storm/models/sparse/StandardRewardModel.h" + +namespace storm { + namespace synthesis { + + + template + PomdpManager::PomdpManager(storm::models::sparse::Pomdp const& pomdp) + : pomdp(pomdp) { + STORM_LOG_THROW(pomdp.isCanonic(), storm::exceptions::InvalidArgumentException, "POMDP must be canonic"); + + auto num_prototype_states = pomdp.getNumberOfStates(); + auto num_prototype_rows = pomdp.getNumberOfChoices(); + auto num_observations = pomdp.getNrObservations(); + this->observation_actions.resize(num_observations,0); + this->observation_successors.resize(num_observations); + this->prototype_row_index.resize(num_prototype_rows,0); + + std::vector> observation_successor_sets; + observation_successor_sets.resize(num_observations); + + for(uint64_t prototype_state = 0; prototype_state < num_prototype_states; prototype_state++) { + auto observation = pomdp.getObservation(prototype_state); + + auto const& row_group_indices = pomdp.getTransitionMatrix().getRowGroupIndices(); + uint64_t row_index = 0; + for ( + uint64_t prototype_row = row_group_indices[prototype_state]; + prototype_row < row_group_indices[prototype_state + 1]; + prototype_row++ + ) { + this->prototype_row_index[prototype_row] = row_index; + row_index++; + + for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { + auto dst = entry.getColumn(); + auto dst_obs = this->pomdp.getObservation(dst); + observation_successor_sets[observation].insert(dst_obs); + } + } + + if(this->observation_actions[observation] != 0) { + continue; + } + this->observation_actions[observation] = pomdp.getTransitionMatrix().getRowGroupSize(prototype_state); + } + for(uint64_t obs = 0; obs < num_observations; obs++) { + this->observation_successors[obs] = std::vector( + observation_successor_sets[obs].begin(), + observation_successor_sets[obs].end() + ); + } + + + this->observation_memory_size.resize(num_observations, 1); + this->prototype_duplicates.resize(num_prototype_states); + + this->max_successor_memory_size.resize(num_observations); + } + + + template + void PomdpManager::buildStateSpace() { + this->num_states = 0; + this->state_prototype.clear(); + this->state_memory.clear(); + for(uint64_t prototype = 0; prototype < this->pomdp.getNumberOfStates(); prototype++) { + auto obs = this->pomdp.getObservation(prototype); + auto memory_size = this->observation_memory_size[obs]; + this->prototype_duplicates[prototype].clear(); + this->prototype_duplicates[prototype].reserve(memory_size); + for(uint64_t memory = 0; memory < memory_size; memory++) { + this->prototype_duplicates[prototype].push_back(this->num_states); + this->state_prototype.push_back(prototype); + this->state_memory.push_back(memory); + this->num_states++; + } + } + } + + + template + uint64_t PomdpManager::translateState(uint64_t prototype, uint64_t memory) { + if(memory >= this->prototype_duplicates[prototype].size()) { + memory = 0; + } + return this->prototype_duplicates[prototype][memory]; + } + + + template + void PomdpManager::buildTransitionMatrixSpurious() { + // for each observation, define the maximum successor memory size + // this will define the number of copies we need to make of each row + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + uint64_t max_mem_size = 0; + for(auto dst_obs: this->observation_successors[obs]) { + if(max_mem_size < this->observation_memory_size[dst_obs]) { + max_mem_size = this->observation_memory_size[dst_obs]; + } + } + this->max_successor_memory_size[obs] = max_mem_size; + } + + this->row_groups.resize(this->num_states+1); + this->row_prototype.clear(); + this->row_memory.clear(); + + // TODO can simplify this: state (s,x) will have the same rows as state (s,0) + for(uint64_t state = 0; state < this->num_states; state++) { + this->row_groups[state] = this->row_prototype.size(); + auto prototype_state = this->state_prototype[state]; + auto observation = this->pomdp.getObservation(prototype_state); + auto const& row_group_indices = this->pomdp.getTransitionMatrix().getRowGroupIndices(); + for ( + uint64_t prototype_row = row_group_indices[prototype_state]; + prototype_row < row_group_indices[prototype_state + 1]; + prototype_row++ + ) { + // create the required number of copies of this row + // each transition will be associated with its own memory update + for(uint64_t dst_mem = 0; dst_mem < max_successor_memory_size[observation]; dst_mem++) { + this->row_prototype.push_back(prototype_row); + this->row_memory.push_back(dst_mem); + } + } + } + this->num_rows = this->row_prototype.size(); + this->row_groups[this->num_states] = this->num_rows; + } + + + template + void PomdpManager::resetDesignSpace() { + auto num_observations = this->pomdp.getNrObservations(); + this->num_holes = 0; + this->action_holes.clear(); + this->action_holes.resize(num_observations); + this->memory_holes.clear(); + this->memory_holes.resize(num_observations); + this->hole_options.clear(); + + this->row_action_hole.clear(); + this->row_action_hole.resize(this->num_rows); + this->row_action_option.clear(); + this->row_action_option.resize(this->num_rows); + this->row_memory_hole.clear(); + this->row_memory_hole.resize(this->num_rows); + this->row_memory_option.clear(); + this->row_memory_option.resize(this->num_rows); + } + + + template + void PomdpManager::buildDesignSpaceSpurious() { + this->resetDesignSpace(); + + // for each (z,n) create an action and a memory hole (if necessary) + // store hole range + // ? inverse mapping ? + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + if(this->observation_actions[obs] > 1) { + for(uint64_t mem = 0; mem < this->observation_memory_size[obs]; mem++) { + this->action_holes[obs].push_back(this->num_holes); + this->hole_options.push_back(this->observation_actions[obs]); + // std::cout << "created A(" << obs << "," << mem << ") = " << this->num_holes << " in {} of size " << this->observation_actions[obs] << std::endl; + this->num_holes++; + } + } + if(this->max_successor_memory_size[obs] > 1) { + for(uint64_t mem = 0; mem < this->observation_memory_size[obs]; mem++) { + this->memory_holes[obs].push_back(this->num_holes); + this->hole_options.push_back(this->max_successor_memory_size[obs]); + // std::cout << "created N(" << obs << "," << mem << ") = " << this->num_holes << " in {} of size " << this->max_successor_memory_size[obs] << std::endl; + this->num_holes++; + } + } + } + + // map each row to some action (memory) hole (if applicable) and its value + for(uint64_t state = 0; state < this->num_states; state++) { + auto prototype = this->state_prototype[state]; + auto obs = this->pomdp.getObservation(prototype); + auto mem = this->state_memory[state]; + for (uint64_t row = this->row_groups[state]; row < this->row_groups[state+1]; row++) { + auto prototype_row = this->row_prototype[row]; + auto row_index = this->prototype_row_index[prototype_row]; + auto row_mem = this->row_memory[row]; + if(this->observation_actions[obs] > 1) { + // there is an action hole that corresponds to this state + auto action_hole = this->action_holes[obs][mem]; + this->row_action_hole[row] = action_hole; + this->row_action_option[row] = row_index; + } else { + // no corresponding action hole + this->row_action_hole[row] = this->num_holes; + } + if(this->max_successor_memory_size[obs] > 1) { + // there is a memory hole that corresponds to this state + auto memory_hole = this->memory_holes[obs][mem]; + this->row_memory_hole[row] = memory_hole; + this->row_memory_option[row] = row_mem; + } else { + this->row_memory_hole[row] = this->num_holes; + } + // std::cout << "row " << row << ": A[" << row_action_hole[row] << "]=" << row_action_option[row] << ", N[" << row_memory_hole[row] << "]=" << row_memory_option[row] << std::endl; + } + } + } + + + template + std::shared_ptr> PomdpManager::constructMdp() { + this->buildStateSpace(); + this->buildTransitionMatrixSpurious(); + + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = this->constructTransitionMatrix(); + // TODO remove unreachable states + components.stateLabeling = this->constructStateLabeling(); + for (auto const& reward_model : pomdp.getRewardModels()) { + auto constructed = this->constructRewardModel(reward_model.second); + components.rewardModels.emplace(reward_model.first, constructed); + } + this->mdp = std::make_shared>(std::move(components)); + + this->buildDesignSpaceSpurious(); + + return this->mdp; + } + + template + storm::models::sparse::StateLabeling PomdpManager::constructStateLabeling() { + storm::models::sparse::StateLabeling labeling(this->num_states); + for (auto const& label : pomdp.getStateLabeling().getLabels()) { + storm::storage::BitVector label_flags(this->num_states, false); + + if (label == "init") { + // init label is only assigned to states with the initial memory state + for (auto const& prototype : pomdp.getStateLabeling().getStates(label)) { + label_flags.set(translateState(prototype, 0)); + } + } else { + for (auto const& prototype : pomdp.getStateLabeling().getStates(label)) { + for(auto duplicate: this->prototype_duplicates[prototype]) { + label_flags.set(duplicate); + } + } + } + labeling.addLabel(label, std::move(label_flags)); + } + return labeling; + } + + + template + storm::storage::SparseMatrix PomdpManager::constructTransitionMatrix() { + storm::storage::SparseMatrixBuilder builder( + this->num_rows, this->num_states, 0, true, true, this->num_states + ); + for(uint64_t state = 0; state < this->num_states; state++) { + builder.newRowGroup(this->row_groups[state]); + for (uint64_t row = this->row_groups[state]; row < this->row_groups[state+1]; row++) { + auto prototype_row = this->row_prototype[row]; + auto dst_mem = this->row_memory[row]; + for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { + auto dst = this->translateState(entry.getColumn(),dst_mem); + builder.addNextValue(row, dst, entry.getValue()); + } + } + } + + return builder.build(); + } + + + template + storm::models::sparse::StandardRewardModel PomdpManager::constructRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model + ) { + std::optional> state_rewards, action_rewards; + STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); + STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); + + action_rewards = std::vector(); + for(uint64_t row = 0; row < this->num_rows; row++) { + auto prototype = this->row_prototype[row]; + auto reward = reward_model.getStateActionReward(prototype); + action_rewards->push_back(reward); + } + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } + + + template + void PomdpManager::setObservationMemorySize(uint64_t obs, uint64_t memory_size) { + assert(obs < this->pomdp.getNrObservations()); + this->observation_memory_size[obs] = memory_size; + } + + template + void PomdpManager::setGlobalMemorySize(uint64_t memory_size) { + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + this->observation_memory_size[obs] = memory_size; + } + } + + + template class PomdpManager; + + } +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/PomdpManager.h b/payntbind/src/synthesis/pomdp/PomdpManager.h new file mode 100644 index 000000000..f9fb94a78 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/PomdpManager.h @@ -0,0 +1,128 @@ +#pragma once + +#include "storm/models/sparse/Mdp.h" +#include "storm/models/sparse/Pomdp.h" + +namespace storm { + namespace synthesis { + + template + class PomdpManager { + + public: + + PomdpManager(storm::models::sparse::Pomdp const& pomdp); + + // number of actions available at this observation + std::vector observation_actions; + // for each observation, a list of successor observations + std::vector> observation_successors; + + /** Memory manipulation . */ + + // for each observation contains the number of allocated memory states (initially 1) + std::vector observation_memory_size; + + // set memory size to a selected observation + void setObservationMemorySize(uint64_t obs, uint64_t memory_size); + // set memory size to all observations + void setGlobalMemorySize(uint64_t memory_size); + + // unfold memory model (a priori memory update) into the POMDP + std::shared_ptr> constructMdp(); + + /** Design space associated with this POMDP. */ + + // total number of holes + uint64_t num_holes; + // for each observation, a list of action holes + std::vector> action_holes; + // for each observation, a list of memory holes + std::vector> memory_holes; + // for each hole, its size + std::vector hole_options; + + /** Unfolded MDP stuff. */ + + // MDP obtained after last injection (initially contains MDP-ized POMDP) + std::shared_ptr> mdp; + + // for each state contains its prototype state (reverse of prototype_duplicates) + std::vector state_prototype; + // for each state contains its memory index + std::vector state_memory; + + // for each row, the corresponding action hole + std::vector row_action_hole; + // for each row, the corresponding option of the action hole + std::vector row_action_option; + // for each row, the corresponding memory hole + std::vector row_memory_hole; + // for each row, the corresponding option of the memory hole + std::vector row_memory_option; + + // for each observation contains the maximum memory size of a destination + // across all rows of a prototype state having this observation + std::vector max_successor_memory_size; + + + private: + + /** + * Build the state space: + * - compute total number of states (@num_states) + * - associate prototype states with their duplicates (@prototype_duplicates) + * - for each state, remember its prototype (@state_prototype) + * - for each state, remember its memory (@state_memory) + */ + void buildStateSpace(); + + /** + * Get index of the @memory equivalent of the @prototype. + * If the prototype does not have the corresponding memory + * equivalent, default to @memory=0. + */ + uint64_t translateState(uint64_t prototype, uint64_t memory); + + // compute max memory size among all destinations of a prototype row + uint64_t maxSuccessorMemorySize(uint64_t prototype_row); + + /** + * Build the shape of the transition matrix: + * - for each row store its prototype (@row_prototype) + * - for each row store its memory index (@row_memory) + * - deduce row groups of the resulting transition matrix (@row_groups) + * - deduce the overall number of rows (@num_rows) + */ + void buildTransitionMatrix(); + + void buildTransitionMatrixSpurious(); + + void resetDesignSpace(); + void buildDesignSpaceSpurious(); + + storm::models::sparse::StateLabeling constructStateLabeling(); + storm::storage::SparseMatrix constructTransitionMatrix(); + storm::models::sparse::StandardRewardModel constructRewardModel(storm::models::sparse::StandardRewardModel const& reward_model); + + // original POMDP + storm::models::sparse::Pomdp const& pomdp; + // for each row of a POMDP contains its index in its row group + std::vector prototype_row_index; + + // number of states in an unfolded MDP + uint64_t num_states; + // for each prototype state contains a list of its duplicates (including itself) + std::vector> prototype_duplicates; + + // number of rows in an unfolded MDP + uint64_t num_rows; + // row groups of the resulting transition matrix + std::vector row_groups; + // for each row contains index of the prototype row + std::vector row_prototype; + // for each row contains a memory update associated with it + std::vector row_memory; + }; + } +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp new file mode 100644 index 000000000..ed84da145 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.cpp @@ -0,0 +1,259 @@ +#include "PomdpManagerAposteriori.h" + +#include "storm/exceptions/InvalidArgumentException.h" +#include "storm/exceptions/NotSupportedException.h" +#include "storm/storage/sparse/ModelComponents.h" +#include "storm/storage/SparseMatrix.h" +#include "storm/models/sparse/StandardRewardModel.h" + +namespace storm { + namespace synthesis { + + + template + PomdpManagerAposteriori::PomdpManagerAposteriori(storm::models::sparse::Pomdp const& pomdp) + : pomdp(pomdp) { + + auto num_observations = this->pomdp.getNrObservations(); + auto const& tm = this->pomdp.getTransitionMatrix(); + auto const& row_group_indices = tm.getRowGroupIndices(); + + this->observation_actions.resize(num_observations,0); + for(uint64_t state = 0; state < this->pomdp.getNumberOfStates(); state++) { + auto obs = this->pomdp.getObservation(state); + if(this->observation_actions[obs] == 0) { + this->observation_actions[obs] = tm.getRowGroupSize(state); + } + } + + this->row_posteriors.resize(tm.getRowCount()); + this->observation_posteriors.resize(num_observations); + for(uint64_t state = 0; state < this->pomdp.getNumberOfStates(); state++) { + auto prior = this->pomdp.getObservation(state); + for(auto row = row_group_indices[state]; row < row_group_indices[state+1]; row++) { + std::set posteriors; + for(auto const &entry: tm.getRow(row)) { + auto successor_state = entry.getColumn(); + auto posterior = this->pomdp.getObservation(successor_state); + posteriors.insert(posterior); + this->observation_posteriors[prior].insert(posterior); + } + this->row_posteriors[row] = std::vector(posteriors.begin(),posteriors.end()); + } + } + + this->observation_memory_size.resize(num_observations, 1); + } + + + template + void PomdpManagerAposteriori::setObservationMemorySize(uint64_t obs, uint64_t memory_size) { + assert(obs < this->pomdp.getNrObservations()); + this->observation_memory_size[obs] = memory_size; + } + + template + void PomdpManagerAposteriori::setGlobalMemorySize(uint64_t memory_size) { + for(uint64_t obs = 0; obs < this->pomdp.getNrObservations(); obs++) { + this->observation_memory_size[obs] = memory_size; + } + } + + + + + template + void PomdpManagerAposteriori::clear_before_unfolding() { + + this->num_states = 0; + this->prototype_state_copies.clear(); + this->state_prototype.clear(); + this->state_memory.clear(); + + this->action_holes.clear(); + this->update_holes.clear(); + this->hole_num_options.clear(); + + this->row_prototype.clear(); + this->coloring.clear(); + } + + template + void PomdpManagerAposteriori::clear_after_unfolding() { + + this->num_states = 0; + this->prototype_state_copies.clear(); + + this->row_prototype.clear(); + } + + + template + void PomdpManagerAposteriori::buildStateSpace() { + this->prototype_state_copies.resize(this->pomdp.getNumberOfStates()); + for(uint64_t prototype = 0; prototype < this->pomdp.getNumberOfStates(); prototype++) { + auto obs = this->pomdp.getObservation(prototype); + auto memory_size = this->observation_memory_size[obs]; + this->prototype_state_copies[prototype].resize(memory_size); + for(uint64_t memory = 0; memory < memory_size; memory++) { + this->prototype_state_copies[prototype][memory] = this->num_states++; + this->state_prototype.push_back(prototype); + this->state_memory.push_back(memory); + } + } + } + + + template + void PomdpManagerAposteriori::buildDesignSpace() { + uint64_t num_holes = 0; + for(uint64_t prior = 0; prior < this->pomdp.getNrObservations(); prior++) { + auto num_actions = this->observation_actions[prior]; + for(uint64_t mem = 0; mem < this->observation_memory_size[prior]; mem++) { + // gamma(n,z) in Act + this->action_holes[std::make_pair(mem,prior)] = num_holes++; + this->hole_num_options.push_back(num_actions); + + for(auto posterior: this->observation_posteriors[prior]) { + // delta(n,z,z') in mu(z') + auto num_updates = this->observation_memory_size[posterior]; + this->update_holes[std::make_tuple(mem,prior,posterior)] = num_holes++; + this->hole_num_options.push_back(num_updates); + } + } + } + } + + + template + storm::models::sparse::StateLabeling PomdpManagerAposteriori::constructStateLabeling() { + storm::models::sparse::StateLabeling labeling(this->num_states); + for (auto const& label : this->pomdp.getStateLabeling().getLabels()) { + storm::storage::BitVector label_flags(this->num_states, false); + for (auto const& prototype : this->pomdp.getStateLabeling().getStates(label)) { + for(auto state: this->prototype_state_copies[prototype]) { + label_flags.set(state); + if (label == "init") { + break; + } + } + } + labeling.addLabel(label, std::move(label_flags)); + } + return labeling; + } + + template + void PomdpManagerAposteriori::unfoldRow( + storm::storage::SparseMatrixBuilder & builder, + uint64_t pomdp_state, uint64_t memory, uint64_t action + ) { + auto prior = this->pomdp.getObservation(pomdp_state); + auto prototype_row = this->pomdp.getTransitionMatrix().getRowGroupIndices()[pomdp_state] + action; + auto const& posteriors = this->row_posteriors[prototype_row]; + auto action_hole = this->action_holes[std::make_pair(memory,prior)]; + + // iterate over all combinations of memory updates + uint64_t num_combinations = 1; + for(auto posterior: posteriors) { + num_combinations *= this->observation_memory_size[posterior]; + } + std::map combination; + for(uint64_t c=0 ; c=0; i--) { + auto posterior = posteriors[i]; + auto posterior_size = this->observation_memory_size[posterior]; + combination[posterior] = index % posterior_size; + index = index / posterior_size; + } + + // add row + for(auto const &entry: this->pomdp.getTransitionMatrix().getRow(prototype_row)) { + auto successor_pomdp_state = entry.getColumn(); + auto posterior = this->pomdp.getObservation(successor_pomdp_state); + auto successor_memory = combination[posterior]; + auto successor_state = this->prototype_state_copies[successor_pomdp_state][successor_memory]; + builder.addNextValue(this->num_unfolded_rows(),successor_state,entry.getValue()); + } + + // add row coloring + std::map coloring; + coloring[action_hole] = action; + for(uint64_t index = 0; index < posteriors.size(); index++) { + auto posterior = posteriors[index]; + auto update_hole = this->update_holes[std::make_tuple(memory,prior,posterior)]; + coloring[update_hole] = combination[posterior]; + } + this->coloring.push_back(std::move(coloring)); + + // register prototype + this->row_prototype.push_back(prototype_row); + } + } + + + template + storm::storage::SparseMatrix PomdpManagerAposteriori::constructTransitionMatrix() { + + storm::storage::SparseMatrixBuilder builder( + 0, this->num_states, 0, true, true, this->num_states + ); + for(uint64_t pomdp_state = 0; pomdp_state < this->pomdp.getNumberOfStates(); pomdp_state++) { + auto prior = this->pomdp.getObservation(pomdp_state); + for(uint64_t memory = 0; memory < this->observation_memory_size[prior]; memory++) { + builder.newRowGroup(this->num_unfolded_rows()); + for(uint64_t action = 0; action < this->pomdp.getTransitionMatrix().getRowGroupSize(pomdp_state); action++) { + this->unfoldRow(builder,pomdp_state,memory,action); + } + } + } + return builder.build(); + } + + + template + storm::models::sparse::StandardRewardModel PomdpManagerAposteriori::constructRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model + ) { + std::optional> state_rewards, action_rewards; + STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); + STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); + + action_rewards = std::vector(); + for(uint64_t row = 0; row < this->row_prototype.size(); row++) { + auto prototype = this->row_prototype[row]; + auto reward = reward_model.getStateActionReward(prototype); + action_rewards->push_back(reward); + } + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } + + template + std::shared_ptr> PomdpManagerAposteriori::constructMdp() { + + this->clear_before_unfolding(); + + this->buildStateSpace(); + this->buildDesignSpace(); + + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = this->constructTransitionMatrix(); + assert(components.transitionMatrix.isProbabilistic()); + components.stateLabeling = this->constructStateLabeling(); + for (auto const& reward_model : this->pomdp.getRewardModels()) { + auto constructed = this->constructRewardModel(reward_model.second); + components.rewardModels.emplace(reward_model.first, constructed); + } + + this->mdp = std::make_shared>(std::move(components)); + + this->clear_after_unfolding(); + + return this->mdp; + + } + + template class PomdpManagerAposteriori; + } +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h new file mode 100644 index 000000000..94811f593 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/PomdpManagerAposteriori.h @@ -0,0 +1,96 @@ +#pragma once + +#include "storm/models/sparse/Mdp.h" +#include "storm/models/sparse/Pomdp.h" + +#include + + +namespace storm { + namespace synthesis { + + + template + class PomdpManagerAposteriori { + + public: + + PomdpManagerAposteriori(storm::models::sparse::Pomdp const& pomdp); + + // unfold memory model (a aposteriori memory update) into the POMDP + std::shared_ptr> constructMdp(); + + // for each observation contains the number of allocated memory states (initially 1) + std::vector observation_memory_size; + // set memory size to a selected observation + void setObservationMemorySize(uint64_t obs, uint64_t memory_size); + // set memory size to all observations + void setGlobalMemorySize(uint64_t memory_size); + + // MDP obtained after last unfolding + std::shared_ptr> mdp; + // for each state contains its prototype state (reverse of prototype_duplicates) + std::vector state_prototype; + // for each state contains its memory index + std::vector state_memory; + + // for each unfolded row, its coloring + std::vector> coloring; + // for each hole, the number of its options + std::vector hole_num_options; + // hole identifier for each (memory,prior) combination + std::map,uint64_t> action_holes; + // hole identifier for each (memory,prior,posterior) combination + std::map,uint64_t> update_holes; + + private: + + // original POMDP + storm::models::sparse::Pomdp const& pomdp; + + // for each observation, number of available actions + std::vector observation_actions; + // for each POMDP row, a set of successor observations + std::vector> row_posteriors; + // for each observation, a set of successor observations + std::vector> observation_posteriors; + + // clear auxiliary data structures before unfolding + void clear_before_unfolding(); + // clear auxiliary data structures after unfolding + void clear_after_unfolding(); + + // current number of unfolded states + uint64_t num_states; + // for each POMDP state, a list of identifiers of unfolded states + std::vector> prototype_state_copies; + // establish the state space: to each state s, create mu(O(s)) of its copies + void buildStateSpace(); + + // establish the design space: create action and update holes + void buildDesignSpace(); + + // for each unfolded row, its prototype + std::vector row_prototype; + // get current number of unfolded rows + uint64_t num_unfolded_rows() { return this->coloring.size(); } + + + // unfold a given state-action pair + void unfoldRow( + storm::storage::SparseMatrixBuilder & builder, + uint64_t pomdp_state, uint64_t memory, uint64_t action + ); + storm::storage::SparseMatrix constructTransitionMatrix(); + + // translate state labeling for the unfolded MDP + storm::models::sparse::StateLabeling constructStateLabeling(); + // translate reward models for the unfolded MDP + storm::models::sparse::StandardRewardModel constructRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model + ); + + + }; + } +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp b/payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp new file mode 100644 index 000000000..d623315c0 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/SubPomdpBuilder.cpp @@ -0,0 +1,263 @@ +#include "SubPomdpBuilder.h" + +#include "storm/exceptions/InvalidArgumentException.h" + +#include "storm/storage/sparse/ModelComponents.h" +#include "storm/storage/SparseMatrix.h" +#include "storm/models/sparse/StandardRewardModel.h" + +#include + +namespace synthesis { + + + SubPomdpBuilder::SubPomdpBuilder( + storm::models::sparse::Pomdp const& pomdp, + std::string const& reward_name, + std::string const& target_label + ) + : pomdp(pomdp), reward_name(reward_name), target_label(target_label) { + + auto const& tm = pomdp.getTransitionMatrix(); + this->reachable_successors.resize(pomdp.getNumberOfStates()); + for(uint64_t state = 0; state < pomdp.getNumberOfStates(); state++) { + this->reachable_successors[state] = std::set(); + for(auto const& entry: tm.getRowGroup(state)) { + auto successor = entry.getColumn(); + if(successor != state) { + this->reachable_successors[state].insert(successor); + } + } + } + + this->relevant_states = storm::storage::BitVector(this->pomdp.getNumberOfStates(),false); + this->frontier_states = storm::storage::BitVector(this->pomdp.getNumberOfStates(),false); + } + + void SubPomdpBuilder::setRelevantStates(storm::storage::BitVector const& relevant_states) { + this->relevant_states = relevant_states; + this->collectFrontierStates(); + } + + void SubPomdpBuilder::collectFrontierStates() { + this->frontier_states.clear(); + for(auto state: this->relevant_states) { + for(uint64_t successor: this->reachable_successors[state]) { + if(!this->relevant_states[successor]) { + this->frontier_states.set(successor,true); + } + } + } + } + + void SubPomdpBuilder::setRelevantObservations( + storm::storage::BitVector const& relevant_observations, + std::map const& initial_belief + ) { + this->relevant_observations = relevant_observations; + this->relevant_states.clear(); + this->frontier_states.clear(); + + // traverse the POMDP and identify states with relevant observations that are reachable from the initial belief + std::stack state_stack; + for(const auto &entry : initial_belief) { + auto state = entry.first; + this->relevant_states.set(state,true); + state_stack.push(state); + } + while(!state_stack.empty()) { + auto state = state_stack.top(); + state_stack.pop(); + for(auto dst: this->reachable_successors[state]) { + auto dst_obs = this->pomdp.getObservation(dst); + if(this->relevant_observations[dst_obs] && !this->relevant_states[dst]) { + // first encounter of a relevant state + this->relevant_states.set(dst,true); + state_stack.push(dst); + } + } + } + this->collectFrontierStates(); + } + + + void SubPomdpBuilder::constructStates() { + + this->num_states_subpomdp = this->relevant_states.getNumberOfSetBits() + this->frontier_states.getNumberOfSetBits() + 2; + this->num_rows_subpomdp = this->frontier_states.getNumberOfSetBits() + 2; + for(auto state: this->relevant_states) { + this->num_rows_subpomdp += this->pomdp.getNumberOfChoices(state); + } + + auto num_states_pomdp = this->pomdp.getNumberOfStates(); + this->state_sub_to_full = std::vector(this->num_states_subpomdp,0); + this->state_full_to_sub = std::vector(num_states_pomdp,0); + + // indices 0 and 1 are reserved for the initial and the sink state respectively + uint64_t state_subpomdp = 0; + this->state_sub_to_full[state_subpomdp++] = num_states_pomdp; + this->state_sub_to_full[state_subpomdp++] = num_states_pomdp; + + for(auto state: this->relevant_states) { + this->state_full_to_sub[state] = state_subpomdp; + this->state_sub_to_full[state_subpomdp] = state; + state_subpomdp++; + } + for(auto state: this->frontier_states) { + this->state_full_to_sub[state] = state_subpomdp; + this->state_sub_to_full[state_subpomdp] = state; + state_subpomdp++; + } + } + + + storm::storage::SparseMatrix SubPomdpBuilder::constructTransitionMatrix( + std::map const& initial_belief + ) { + + // building the transition matrix + storm::storage::SparseMatrixBuilder builder( + this->num_rows_subpomdp, this->num_states_subpomdp, 0, true, true, this->num_states_subpomdp + ); + uint64_t current_row = 0; + + // initial state distribution + builder.newRowGroup(current_row); + for(const auto &entry : initial_belief) { + auto dst = this->state_full_to_sub[entry.first]; + builder.addNextValue(current_row, dst, entry.second); + } + current_row++; + + // sink state self-loop + builder.newRowGroup(current_row); + builder.addNextValue(current_row, this->sink_state, 1); + current_row++; + + // relevant states + auto const& tm = this->pomdp.getTransitionMatrix(); + auto const& row_groups = this->pomdp.getNondeterministicChoiceIndices(); + for(auto state: this->relevant_states) { + builder.newRowGroup(current_row); + for(uint64_t row = row_groups[state]; row < row_groups[state+1]; row++) { + if(this->discount_factor < 1) { + builder.addNextValue(current_row, this->sink_state, 1-this->discount_factor); + } + for(auto const& entry: tm.getRow(row)) { + auto dst = this->state_full_to_sub[entry.getColumn()]; + builder.addNextValue(current_row, dst, entry.getValue() * this->discount_factor); + } + current_row++; + } + } + + // frontier states are rerouted to the sink state with probability 1 + for(const auto state: this->frontier_states) { + (void) state; + builder.newRowGroup(current_row); + builder.addNextValue(current_row, this->sink_state, 1); + current_row++; + } + + // transition matrix finalized + return builder.build(); + } + + storm::models::sparse::StateLabeling SubPomdpBuilder::constructStateLabeling() { + // initial state labeling + storm::models::sparse::StateLabeling labeling(this->num_states_subpomdp); + storm::storage::BitVector label_init(this->num_states_subpomdp, false); + label_init.set(this->initial_state); + labeling.addLabel("init", std::move(label_init)); + + // target state labeling + storm::storage::BitVector label_target(this->num_states_subpomdp, false); + auto const& pomdp_labeling = this->pomdp.getStateLabeling(); + auto const& pomdp_target_states = pomdp_labeling.getStates(this->target_label); + for(auto state: pomdp_target_states) { + if(this->relevant_states[state]) { + label_target.set(this->state_full_to_sub[state]); + } + } + label_target.set(this->sink_state); + labeling.addLabel(this->target_label, std::move(label_target)); + + return labeling; + } + + storm::models::sparse::ChoiceLabeling SubPomdpBuilder::constructChoiceLabeling() { + // copy existing labels, add fresh label + storm::models::sparse::ChoiceLabeling labeling(this->num_rows_subpomdp); + auto const& pomdp_labeling = this->pomdp.getChoiceLabeling(); + for (auto const& label : pomdp_labeling.getLabels()) { + labeling.addLabel(label, storm::storage::BitVector(this->num_rows_subpomdp,false)); + } + labeling.addLabel(this->empty_label, storm::storage::BitVector(this->num_rows_subpomdp,false)); + + // initial state, sink state + labeling.addLabelToChoice(this->empty_label, 0); + labeling.addLabelToChoice(this->empty_label, 1); + + // relevant states + auto const& row_groups = this->pomdp.getNondeterministicChoiceIndices(); + uint64_t current_row = 2; + for(auto state: this->relevant_states) { + for(uint64_t row = row_groups[state]; rowfrontier_states) { + (void) state; + labeling.addLabelToChoice(this->empty_label, current_row++); + } + + return labeling; + } + + std::vector SubPomdpBuilder::constructObservabilityClasses() { + std::vector observation_classes(this->num_states_subpomdp); + uint32_t fresh_observation = this->pomdp.getNrObservations(); + observation_classes[this->initial_state] = fresh_observation; + observation_classes[this->sink_state] = fresh_observation; + for(auto state: this->relevant_states) { + observation_classes[this->state_full_to_sub[state]] = this->pomdp.getObservation(state); + } + for(auto state: this->frontier_states) { + observation_classes[this->state_full_to_sub[state]] = fresh_observation; + } + return observation_classes; + } + + storm::models::sparse::StandardRewardModel SubPomdpBuilder::constructRewardModel() { + auto const& reward_model = this->pomdp.getRewardModel(this->reward_name); + std::optional> state_rewards; + std::vector action_rewards(this->num_rows_subpomdp,0); + uint64_t current_row = 2; + auto const& row_groups = this->pomdp.getNondeterministicChoiceIndices(); + for(auto state: this->relevant_states) { + for(uint64_t row = row_groups[state]; row(std::move(state_rewards), std::move(action_rewards)); + } + + std::shared_ptr> SubPomdpBuilder::restrictPomdp( + std::map const& initial_belief + ) { + this->constructStates(); + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = this->constructTransitionMatrix(initial_belief); + components.stateLabeling = this->constructStateLabeling(); + components.choiceLabeling = this->constructChoiceLabeling(); + components.observabilityClasses = this->constructObservabilityClasses(); + components.rewardModels.emplace(this->reward_name, this->constructRewardModel()); + return std::make_shared>(std::move(components)); + } + +} diff --git a/payntbind/src/synthesis/pomdp/SubPomdpBuilder.h b/payntbind/src/synthesis/pomdp/SubPomdpBuilder.h new file mode 100644 index 000000000..ac45c7823 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/SubPomdpBuilder.h @@ -0,0 +1,110 @@ +#pragma once + +#include "storm/models/sparse/Pomdp.h" +#include "storm/logic/Formula.h" + +namespace synthesis { + + class SubPomdpBuilder { + + public: + + /** + * Prepare sub-POMDP construction wrt a given canonic POMDP. New + * sub-POMDP will be model checked using property + * R[reward_name]=? [F target_label]. + */ + SubPomdpBuilder( + storm::models::sparse::Pomdp const& pomdp, + std::string const& reward_name, + std::string const& target_label + ); + + /** + * If <1 discount factor is set, each action will redirect 1-df probability to the (target) sink state. + */ + void setDiscountFactor(double discount_factor) { + this->discount_factor = discount_factor; + } + + /** + * Set which observations to keep in the restricted sub-POMDP. All states reachable from the initial belief + * having relevant observation will be included in the sub-POMDP. + */ + void setRelevantObservations( + storm::storage::BitVector const& relevant_observations, + std::map const& initial_belief + ); + + /** Set which states to keep in the restricted sub-POMDP. */ + void setRelevantStates(storm::storage::BitVector const& relevant_states); + + /** + * Construct a POMDP restriction containing the following states: + * - fresh initial state to simulate initial distribution + * - fresh sink state (labeled as target) + * - relevant states + * - frontier states having single action going to sink state with probability 1 and reward 0 + * @param initial_belief initial probability distribution + */ + std::shared_ptr> restrictPomdp( + std::map const& initial_belief + ); + + // observations relevant for the current restriction + storm::storage::BitVector relevant_observations; + // states relevant for the current restriction + storm::storage::BitVector relevant_states; + // irrelevant states reachable from the relevant ones in one step + storm::storage::BitVector frontier_states; + + // for each state of a sub-POMDP its index in the full POMDP; fresh states (initial & sink) are associated + // with a number of states in the POMDP + std::vector state_sub_to_full; + // for each state of a full POMDP its index in the sub-POMDP; unreachable states are associated with + // a number of states in the sub-POMDP + std::vector state_full_to_sub; + // nondeterminstic choice indices of the sub-POMDP + std::vector subpomdp_row_groups; + + private: + + // original POMDP + storm::models::sparse::Pomdp const& pomdp; + // name of the investigated reward + std::string const reward_name; + // label assigned to target states + std::string const target_label; + // for each state, a list of immediate successors (excluding state itself) + std::vector> reachable_successors; + // discount factor to be applied to the transformed POMDP + double discount_factor = 1; + + // number of states in the sub-POMDP + uint64_t num_states_subpomdp; + // number of rows in the sub-POMDP + uint64_t num_rows_subpomdp; + + // index of the new initial state + const uint64_t initial_state = 0; + // index of the new sink state + const uint64_t sink_state = 1; + // label associated with initial distribution as well as shortcut actions + const std::string empty_label = ""; + + // upon setting vector of relevant states, identify frontier states + void collectFrontierStates(); + // create sub-to-full and full-to-sub state maps + void constructStates(); + + storm::storage::SparseMatrix constructTransitionMatrix( + std::map const& initial_belief + ); + storm::models::sparse::StateLabeling constructStateLabeling(); + storm::models::sparse::ChoiceLabeling constructChoiceLabeling(); + std::vector constructObservabilityClasses(); + storm::models::sparse::StandardRewardModel constructRewardModel(); + + + }; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/bindings.cpp b/payntbind/src/synthesis/pomdp/bindings.cpp new file mode 100644 index 000000000..5b47ad073 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/bindings.cpp @@ -0,0 +1,54 @@ +#include "../synthesis.h" + +#include "PomdpManager.h" +#include "PomdpManagerAposteriori.h" +#include "SubPomdpBuilder.h" + +void bindings_pomdp(py::module& m) { + + py::class_>(m, "PomdpManager", "POMDP manager") + .def(py::init const&>(), "Constructor.", py::arg("pomdp")) + .def("set_observation_memory_size", &storm::synthesis::PomdpManager::setObservationMemorySize, "Set memory size to a selected observation.", py::arg("observation"), py::arg("memory_size")) + .def("set_global_memory_size", &storm::synthesis::PomdpManager::setGlobalMemorySize, "Set memory size to all observations.", py::arg("memory_size")) + .def("construct_mdp", &storm::synthesis::PomdpManager::constructMdp, "Unfold memory model (a priori memory update) into the POMDP.") + .def_property_readonly("state_prototype", [](storm::synthesis::PomdpManager& manager) {return manager.state_prototype;}) + .def_property_readonly("state_memory", [](storm::synthesis::PomdpManager& manager) {return manager.state_memory;}) + .def_property_readonly("observation_memory_size", [](storm::synthesis::PomdpManager& manager) {return manager.observation_memory_size;}) + .def_property_readonly("observation_actions", [](storm::synthesis::PomdpManager& manager) {return manager.observation_actions;}) + .def_property_readonly("observation_successors", [](storm::synthesis::PomdpManager& manager) {return manager.observation_successors;}) + .def_property_readonly("max_successor_memory_size", [](storm::synthesis::PomdpManager& manager) {return manager.max_successor_memory_size;}) + .def_property_readonly("num_holes", [](storm::synthesis::PomdpManager& manager) {return manager.num_holes;}) + .def_property_readonly("action_holes", [](storm::synthesis::PomdpManager& manager) {return manager.action_holes;}) + .def_property_readonly("memory_holes", [](storm::synthesis::PomdpManager& manager) {return manager.memory_holes;}) + .def_property_readonly("hole_options", [](storm::synthesis::PomdpManager& manager) {return manager.hole_options;}) + .def_property_readonly("row_action_hole", [](storm::synthesis::PomdpManager& manager) {return manager.row_action_hole;}) + .def_property_readonly("row_action_option", [](storm::synthesis::PomdpManager& manager) {return manager.row_action_option;}) + .def_property_readonly("row_memory_hole", [](storm::synthesis::PomdpManager& manager) {return manager.row_memory_hole;}) + .def_property_readonly("row_memory_option", [](storm::synthesis::PomdpManager& manager) {return manager.row_memory_option;}) + ; + + py::class_>(m, "PomdpManagerAposteriori", "POMDP manager (a posteriori)") + .def(py::init const&>(), "Constructor.") + .def("set_observation_memory_size", &storm::synthesis::PomdpManagerAposteriori::setObservationMemorySize) + .def("set_global_memory_size", &storm::synthesis::PomdpManagerAposteriori::setGlobalMemorySize) + .def("construct_mdp", &storm::synthesis::PomdpManagerAposteriori::constructMdp) + .def_property_readonly("state_prototype", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.state_prototype;}) + .def_property_readonly("state_memory", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.state_memory;}) + .def_property_readonly("coloring", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.coloring;}) + .def_property_readonly("hole_num_options", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.hole_num_options;}) + .def_property_readonly("action_holes", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.action_holes;}) + .def_property_readonly("update_holes", [](storm::synthesis::PomdpManagerAposteriori& manager) {return manager.update_holes;}) + ; + + py::class_>(m, "SubPomdpBuilder") + .def(py::init const&, std::string const&, std::string const&>()) + .def("set_discount_factor", &synthesis::SubPomdpBuilder::setDiscountFactor) + .def("set_relevant_observations", &synthesis::SubPomdpBuilder::setRelevantObservations) + .def_property_readonly("relevant_states", [](synthesis::SubPomdpBuilder& builder) {return builder.relevant_states;}) + .def_property_readonly("frontier_states", [](synthesis::SubPomdpBuilder& builder) {return builder.frontier_states;}) + .def("restrict_pomdp", &synthesis::SubPomdpBuilder::restrictPomdp) + .def_property_readonly("state_sub_to_full", [](synthesis::SubPomdpBuilder& builder) {return builder.state_sub_to_full;}) + .def_property_readonly("state_full_to_sub", [](synthesis::SubPomdpBuilder& builder) {return builder.state_full_to_sub;}) + ; +} + diff --git a/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp new file mode 100644 index 000000000..fff8454ad --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.cpp @@ -0,0 +1,332 @@ +#include "GameAbstractionSolver.h" + + + +#include + +namespace synthesis { + + template + std::pair>,std::vector> randomizeActionVariant( + storm::models::sparse::Model const& model, + std::vector>> const& state_action_choices + ) { + storm::storage::sparse::ModelComponents components; + + // copy state labeling + storm::models::sparse::StateLabeling state_labeling(model.getNumberOfStates()); + for (auto const& label : model.getStateLabeling().getLabels()) { + state_labeling.addLabel(label, storm::storage::BitVector(model.getStates(label))); + } + components.stateLabeling = state_labeling; + + // build transition matrix and reward models + storm::storage::SparseMatrixBuilder builder(0, 0, 0, false, true, 0); + std::map> reward_vectors; + for(auto const& reward_model : model.getRewardModels()) { + reward_vectors.emplace(reward_model.first, std::vector()); + } + + uint64_t num_rows = 0; + std::vector choice_to_action; + auto num_actions = state_action_choices[0].size(); + for(uint64_t state=0; state dst_prob; + for(auto choice: choices) { + for(auto const &entry: model.getTransitionMatrix().getRow(choice)) { + dst_prob[entry.getColumn()] += entry.getValue() / num_choices; + } + } + for(auto const& [dst,prob] : dst_prob) { + builder.addNextValue(num_rows,dst,prob); + } + num_rows++; + + // handle reward models + for(auto const& reward_model : model.getRewardModels()) { + ValueType reward_value = 0; + for(auto choice: choices) { + reward_value += reward_model.second.getStateActionReward(choice) / num_choices; + } + reward_vectors[reward_model.first].push_back(reward_value); + } + } + } + components.transitionMatrix = builder.build(); + + for(auto const& [name,choice_rewards]: reward_vectors) { + std::optional> state_rewards; + components.rewardModels.emplace(name, storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(choice_rewards))); + } + + auto randomized_model = std::make_shared>(std::move(components)); + return std::make_pair(randomized_model,choice_to_action); + } + template std::pair>,std::vector> randomizeActionVariant(storm::models::sparse::Model const& model, std::vector>> const& state_action_choices); + + + template + void print_matrix(storm::storage::SparseMatrix matrix) { + auto const& row_group_indices = matrix.getRowGroupIndices(); + for(uint64_t state=0; state < matrix.getRowGroupCount(); state++) { + std::cout << "state " << state << ": " << std::endl; + for(uint64_t row=row_group_indices[state]; row " << entry.getColumn() << " [" << entry.getValue() << "];"; + } + std::cout << std::endl; + } + } + std::cout << "-----" << std::endl; + } + + template void print_matrix(storm::storage::SparseMatrix matrix); + template void print_matrix(storm::storage::SparseMatrix matrix); + + template + GameAbstractionSolver::GameAbstractionSolver( + storm::models::sparse::Model const& quotient, + uint64_t quotient_num_actions, + std::vector const& choice_to_action, + std::string const& target_label, + double precision + ) : quotient(quotient), quotient_num_actions(quotient_num_actions), choice_to_action(choice_to_action) { + + auto quotient_num_states = quotient.getNumberOfStates(); + auto quotient_num_choices = quotient.getNumberOfChoices(); + + this->choice_to_destinations.resize(quotient_num_choices); + for(uint64_t choice=0; choicechoice_to_destinations[choice].push_back(entry.getColumn()); + } + } + this->setupSolverEnvironment(precision); + + // identify target states + this->state_is_target = storm::storage::BitVector(quotient_num_states,false); + for(auto state: quotient.getStateLabeling().getStates(target_label)) { + this->state_is_target.set(state,true); + } + + this->solution_state_values = std::vector(quotient_num_states,0); + this->solution_state_to_player1_action = std::vector(quotient_num_states,quotient_num_actions); + this->solution_state_to_quotient_choice = std::vector(quotient_num_states,quotient_num_choices); + } + + + template + void GameAbstractionSolver::setupSolverEnvironment(double precision) { + this->env.solver().game().setPrecision(storm::utility::convertNumber(precision)); + + // value iteration + // this->env.solver().game().setMethod(storm::solver::GameMethod::ValueIteration); + + // policy iteration + this->env.solver().game().setMethod(storm::solver::GameMethod::PolicyIteration); + this->env.solver().setLinearEquationSolverType(storm::solver::EquationSolverType::Native); + this->env.solver().native().setMethod(storm::solver::NativeLinearEquationSolverMethod::Jacobi); + this->env.solver().setLinearEquationSolverPrecision(env.solver().game().getPrecision()); + } + + + template + storm::OptimizationDirection GameAbstractionSolver::getOptimizationDirection(bool maximizing) { + return maximizing ? storm::OptimizationDirection::Maximize : storm::OptimizationDirection::Minimize; + } + + + template + void GameAbstractionSolver::solve( + storm::storage::BitVector quotient_choice_mask, + bool player1_maximizing, bool player2_maximizing + ) { + if(profiling_enabled) { + this->timer_total.start(); + } + + auto quotient_num_states = this->quotient.getNumberOfStates(); + auto quotient_num_choices = this->quotient.getNumberOfChoices(); + auto quotient_initial_state = *(this->quotient.getInitialStates().begin()); + + ItemTranslator state_to_player1_state(quotient_num_states); + ItemKeyTranslator state_action_to_player2_state(quotient_num_states); + std::vector> player1_state_to_actions; + std::vector> player2_state_to_choices; + + std::queue unexplored_states; + storm::storage::BitVector state_is_encountered(quotient_num_states); + unexplored_states.push(quotient_initial_state); + state_is_encountered.set(quotient_initial_state,true); + auto const& quotient_row_group_indices = this->quotient.getTransitionMatrix().getRowGroupIndices(); + while(not unexplored_states.empty()) { + auto state = unexplored_states.front(); + unexplored_states.pop(); + auto player1_state = state_to_player1_state.translate(state); + player1_state_to_actions.resize(state_to_player1_state.numTranslations()); + for(auto choice = quotient_row_group_indices[state]; choice < quotient_row_group_indices[state+1]; choice++) { + if(not quotient_choice_mask[choice]) { + continue; + } + auto action = choice_to_action[choice]; + player1_state_to_actions[player1_state].insert(action); + auto player2_state = state_action_to_player2_state.translate(state,action); + player2_state_to_choices.resize(state_action_to_player2_state.numTranslations()); + player2_state_to_choices[player2_state].push_back(choice); + for(auto state_dst: this->choice_to_destinations[choice]) { + if(state_is_encountered[state_dst]) { + continue; + } + unexplored_states.push(state_dst); + state_is_encountered.set(state_dst,true); + } + } + } + auto player1_num_states = state_to_player1_state.numTranslations(); + auto player2_num_states = state_action_to_player2_state.numTranslations(); + + // add fresh target states + auto player1_target_state = player1_num_states++; + auto player2_target_state = player2_num_states++; + + // build the matrix of Player 1 + std::vector player1_choice_to_action; + storm::storage::SparseMatrixBuilder player1_matrix_builder(0,0,0,false,true); + uint64_t player1_num_rows = 0; + for(uint64_t player1_state=0; player1_state player2_choice_to_quotient_choice; + storm::storage::SparseMatrixBuilder player2_matrix_builder(0,0,0,false,true); + // build the reward vector + std::vector player2_row_rewards; + uint64_t player2_num_rows = 0; + for(uint64_t player2_state=0; player2_statequotient.getTransitionMatrix().getRow(choice)) { + auto state_dst = entry.getColumn(); + auto player1_state_dst = state_to_player1_state.translate(state_dst); + player2_matrix_builder.addNextValue(player2_num_rows,player1_state_dst,entry.getValue()); + } + player2_choice_to_quotient_choice.push_back(choice); + player2_row_rewards.push_back(0); + player2_num_rows++; + } + } + } + // fresh target state of Player 2: transition to the target state of Player 1 with zero reward + player2_matrix_builder.newRowGroup(player2_num_rows); + player2_matrix_builder.addNextValue(player2_num_rows,player1_target_state,1); + player2_choice_to_quotient_choice.push_back(quotient_num_choices); + player2_row_rewards.push_back(0); + player2_num_rows++; + auto player2_matrix = player2_matrix_builder.build(); + + // solve the game + auto solver = storm::solver::GameSolverFactory().create(env, player1_matrix, player2_matrix); + solver->setTrackSchedulers(true); + auto player1_direction = this->getOptimizationDirection(player1_maximizing); + auto player2_direction = this->getOptimizationDirection(player2_maximizing); + std::vector player1_state_values(player1_num_states,0); + if(profiling_enabled) { + this->timer_game_solving.start(); + } + solver->solveGame(this->env, player1_direction, player2_direction, player1_state_values, player2_row_rewards); + if(profiling_enabled) { + this->timer_game_solving.stop(); + } + auto player1_choices = solver->getPlayer1SchedulerChoices(); + auto player2_choices = solver->getPlayer2SchedulerChoices(); + + // collect all the results + std::fill(this->solution_state_values.begin(),this->solution_state_values.end(),0); + std::fill(this->solution_state_to_player1_action.begin(),this->solution_state_to_player1_action.end(),quotient_num_actions); + std::fill(this->solution_state_to_quotient_choice.begin(),this->solution_state_to_quotient_choice.end(),quotient_num_choices); + + auto const& player1_matrix_row_group_indices = player1_matrix.getRowGroupIndices(); + auto const& player2_matrix_row_group_indices = player2_matrix.getRowGroupIndices(); + + for(uint64_t player1_state=0; player1_statesolution_state_values[state] = player1_state_values[player1_state]; + + // get action selected by Player 1 + auto player1_choice = player1_matrix_row_group_indices[player1_state] + player1_choices[player1_state]; + auto player1_action = player1_choice_to_action[player1_choice]; + this->solution_state_to_player1_action[state] = player1_action; + + if(this->state_is_target[state]) { + auto state_only_choice = quotient_row_group_indices[state]; + this->solution_state_to_quotient_choice[state] = state_only_choice; + continue; + } + + // get action selected by Player 2 and map it to the quotient choice + auto player2_state = state_action_to_player2_state.translate(state,player1_action); + auto player2_choice = player2_matrix_row_group_indices[player2_state]+player2_choices[player2_state]; + auto quotient_choice = player2_choice_to_quotient_choice[player2_choice]; + this->solution_state_to_quotient_choice[state] = quotient_choice; + } + + if(profiling_enabled) { + this->timer_total.stop(); + } + + this->solution_value = this->solution_state_values[quotient_initial_state]; + + } + + + + template + void GameAbstractionSolver::enableProfiling(bool enable) { + profiling_enabled = enable; + } + + template + void GameAbstractionSolver::printProfiling() { + std::cout << "[s] total: " << this->timer_total << std::endl; + std::cout << "[s] game solving: " << this->timer_game_solving << std::endl; + } + + + template class GameAbstractionSolver; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h new file mode 100644 index 000000000..241f63b6c --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/GameAbstractionSolver.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "src/synthesis/translation/ItemTranslator.h" +#include "src/synthesis/translation/ItemKeyTranslator.h" + +namespace synthesis { + + + /** + * Given an MDP having multiple variants of actions, create an MDP in which this variant selection is randomized. + */ + template + std::pair>,std::vector> randomizeActionVariant( + storm::models::sparse::Model const& model, + std::vector>> const& state_action_choices + ); + + template + class GameAbstractionSolver { + + /* + - Player 1. States are states of the quotient model + a fresh target state. In each state s Player 1 has a + choice of an action a, which leads to state (s,a) of Player 2. In their fresh target state, Player 1 + transitions to the fresh target state of Player 2. + - Player 2. States are pairs (s,a), where s is the state of the quotient and a is the action selected in s by + Player 1. In state (s,a), Player 2 chooses the color of a to be executed. In state (s,*) where s is the + target state of the quotient, Player 2 receives reward 1 and executes action available in this state. In + their fresh target state, Player 2 loops back to the target state of Player 1. + */ + + public: + + /** + * Create game abstraction solver. + * @param quotient The quotient MDP. Sub-MDPs from the quotient will be used to construct sub-games. + * @param quotient_num_action The total number of distinct actions in the quotient. + * @param choice_to_action For each row of the quotient, the associated action. + * @param target_label Label of the target states. + * @param precision Game solving precision. + */ + GameAbstractionSolver( + storm::models::sparse::Model const& quotient, + uint64_t quotient_num_actions, + std::vector const& choice_to_action, + std::string const& target_label, + double precision + ); + + /** + * Solve the game induced by the sub-MDP. + * @param quotient_choice_mask Choices of the quotient that remained in the sub-MDP. + */ + void solve( + storm::storage::BitVector quotient_choice_mask, + bool player1_maximizing, bool player2_maximizing + ); + + /** State values for the solution. */ + std::vector solution_state_values; + /** Solution value of the game. */ + double solution_value; + + /** + * For each state, an action selected by Player 1. State s contains quotient_num_actions if the action was not + * set. + */ + std::vector solution_state_to_player1_action; + /** + * For each state, a choice selected by Player 1 & Player2. State s contains quotient_num_choices if the + * choice was not set. + */ + std::vector solution_state_to_quotient_choice; + + // Profiling + void enableProfiling(bool enable); + void printProfiling(); + + private: + + storm::models::sparse::Model const& quotient; + uint64_t quotient_num_actions; + std::vector choice_to_action; + + /** Identification of target states. */ + storm::storage::BitVector state_is_target; + + /** For each state of the quotient, a list of actions associated with its rows. */ + // std::vector> state_to_actions; + /** For each choice of the quotient, its destinations. */ + std::vector> choice_to_destinations; + + /** Solver environment. */ + storm::Environment env; + + void setupSolverEnvironment(double precisions); + storm::OptimizationDirection getOptimizationDirection(bool maximizing); + + // Profiling + bool profiling_enabled = false; + storm::utility::Stopwatch timer_total; + storm::utility::Stopwatch timer_game_solving; + + }; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp b/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp new file mode 100644 index 000000000..1fd91e2a9 --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.cpp @@ -0,0 +1,109 @@ +#include "ObservationEvaluator.h" + +#include +#include +#include +#include + +namespace synthesis { + + template + ObservationEvaluator::ObservationEvaluator( + storm::prism::Program & prism, + storm::models::sparse::Model const& model + ) { + // substitute constanst and simplify formulas in the program + prism = prism.substituteConstantsFormulas(true,true); + + // identify names and types of observation labels + this->num_obs_expressions = prism.getNumberOfObservationLabels(); + this->obs_expr_label.resize(this->num_obs_expressions); + this->obs_expr_is_boolean.resize(this->num_obs_expressions); + + for(uint32_t o = 0; o < this->num_obs_expressions; o++) { + auto const& obs_label = prism.getObservationLabels()[o]; + obs_expr_label[o] = obs_label.getName(); + auto const& obs_expr = obs_label.getStatePredicateExpression(); + STORM_LOG_THROW(obs_expr.hasBooleanType() or obs_expr.hasIntegerType(), storm::exceptions::InvalidTypeException, + "expected boolean or integer observation expression"); + this->obs_expr_is_boolean[o] = obs_expr.hasBooleanType(); + } + + // evaluate observation expression for each state valuation + storm::expressions::ExpressionEvaluator evaluator(prism.getManager()); + auto const& state_valuations = model.getStateValuations(); + // associate each evaluation with the unique observation class + this->state_to_obs_class.resize(model.getNumberOfStates()); + this->num_obs_classes = 0; + for(uint64_t state = 0; state < model.getNumberOfStates(); state++) { + + // collect state valuation into evaluator + for(auto it = state_valuations.at(state).begin(); it != state_valuations.at(state).end(); ++it) { + // we pass Jani variables to the evaluator, but it seems to work, perhaps it works with variable names + auto const& var = it.getVariable(); + if(it.isBoolean()) { + evaluator.setBooleanValue(var, it.getBooleanValue()); + } else if(it.isInteger()) { + evaluator.setIntegerValue(var, it.getIntegerValue()); + } else { + // this is a rational variable: we skip it in a hope that this variable encodes reward value + // which is not relevant for the observation + // evaluator.setRationalValue(var, it.getRationalValue()); + } + } + + // evaluate observation expressions and assign class + storm::storage::BitVector evaluation(OBS_EXPR_VALUE_SIZE*num_obs_expressions); + for (uint32_t o = 0; o < num_obs_expressions; o++) { + evaluation.setFromInt( + OBS_EXPR_VALUE_SIZE*o, + OBS_EXPR_VALUE_SIZE, + evaluator.asInt(prism.getObservationLabels()[o].getStatePredicateExpression()) + ); + } + auto result = this->obs_evaluation_to_class.insert(std::make_pair(evaluation,this->num_obs_classes)); + if(not result.second) { + // existing evaluation + this->state_to_obs_class[state] = result.first->second; + } else { + // new evaluation + this->state_to_obs_class[state] = this->num_obs_classes; + this->obs_class_to_evaluation.push_back(evaluation); + this->num_obs_classes++; + } + } + } + + template + uint32_t ObservationEvaluator::observationClassValue(uint32_t obs_class, uint32_t obs_expr) { + return this->obs_class_to_evaluation[obs_class].getAsInt(OBS_EXPR_VALUE_SIZE*obs_expr,OBS_EXPR_VALUE_SIZE); + } + + + template + std::shared_ptr> ObservationEvaluator::addObservationsToSubMdp( + storm::models::sparse::Mdp const& sub_mdp, + std::vector state_sub_to_full + ) { + + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = sub_mdp.getTransitionMatrix(); + components.stateLabeling = sub_mdp.getStateLabeling(); + components.rewardModels = sub_mdp.getRewardModels(); + components.choiceLabeling = sub_mdp.getChoiceLabeling(); + + std::vector observability_classes(sub_mdp.getNumberOfStates()); + for(uint64_t state = 0; state < sub_mdp.getNumberOfStates(); state++) { + observability_classes[state] = this->state_to_obs_class[state_sub_to_full[state]]; + } + components.observabilityClasses = observability_classes; + + auto pomdp = storm::models::sparse::Pomdp(std::move(components)); + auto pomdp_canonic = storm::transformer::MakePOMDPCanonic(pomdp).transform(); + return pomdp_canonic; + // return std::make_shared>(std::move(components)); + } + + + template class ObservationEvaluator; +} diff --git a/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.h b/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.h new file mode 100644 index 000000000..2606f58f0 --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/ObservationEvaluator.h @@ -0,0 +1,58 @@ +#pragma once + +#include "storm/storage/prism/Program.h" +#include "storm/models/sparse/Model.h" +#include "storm/models/sparse/Mdp.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/storage/BitVector.h" + +namespace synthesis { + + template + class ObservationEvaluator { + + public: + + ObservationEvaluator( + storm::prism::Program & prism, + storm::models::sparse::Model const& model + ); + + /** Number of observation expressions. */ + uint32_t num_obs_expressions; + /** For each observation expression its label. */ + std::vector obs_expr_label; + /** For each observation expression whether it is boolean. */ + std::vector obs_expr_is_boolean; + + /** Number of observation classes. */ + uint32_t num_obs_classes = 0; + /** For each state its observation class. */ + std::vector state_to_obs_class; + + /** Get the value of the observation expression in the given observation class. */ + uint32_t observationClassValue(uint32_t obs_class, uint32_t obs_expr); + + /** + * Create a sub-POMDP from the given sub-MDP by associating its states with observations. + * @param mdp a sub-MDP + * @param state_sub_to_full for each state of the sub-MDP the index of the original state + */ + std::shared_ptr> addObservationsToSubMdp( + storm::models::sparse::Mdp const& sub_mdp, + std::vector state_sub_to_full + ); + + // TODO observation valuations + + private: + /** Bitwidth of observation expression value size. */ + static const int OBS_EXPR_VALUE_SIZE = 64; + /** Mapping of observation expressions evaluation to a unique observation class. */ + std::map obs_evaluation_to_class; + /** Mapping of observation class to observation expressions evaluation. */ + std::vector obs_class_to_evaluation; + + }; + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.cpp b/payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.cpp new file mode 100644 index 000000000..ad2a1fb5d --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.cpp @@ -0,0 +1,130 @@ +#include "ProductPomdpFsc.h" +#include "src/synthesis/translation/componentTranslations.h" + +namespace synthesis { + + + template + ProductPomdpFsc::ProductPomdpFsc( + storm::models::sparse::Model const& quotient, + std::vector state_to_obs_class, + uint64_t num_actions, + std::vector choice_to_action + ) : quotient(quotient), state_to_obs_class(state_to_obs_class), + num_actions(num_actions), choice_to_action(choice_to_action) { + + this->state_translator = ItemKeyTranslator(); + this->state_action_choices.resize(this->quotient.getNumberOfStates()); + auto const& row_group_indices = this->quotient.getTransitionMatrix().getRowGroupIndices(); + for(uint64_t state = 0; state < this->quotient.getNumberOfStates(); state++) { + this->state_action_choices[state].resize(this->num_actions); + for (uint64_t row = row_group_indices[state]; row < row_group_indices[state+1]; row++) { + uint64_t action = this->choice_to_action[row]; + this->state_action_choices[state][action].insert(row); + } + } + } + + template + uint64_t ProductPomdpFsc::numberOfTranslatedStates() { + return this->state_translator.numTranslations(); + } + + template + uint64_t ProductPomdpFsc::numberOfTranslatedChoices() { + return this->product_choice_to_choice.size(); + } + + template + uint64_t ProductPomdpFsc::translateInitialState() { + uint64_t initial_state = *(this->quotient.getInitialStates().begin()); + uint64_t initial_memory = 0; + return this->state_translator.translate(initial_state,initial_memory); + } + + + template + void ProductPomdpFsc::buildStateSpace( + std::vector> action_function, + std::vector> update_function + ) { + this->state_translator.resize(this->quotient.getNumberOfStates()); + auto translated_state = this->translateInitialState(); + while(true) { + auto[state,memory] = this->state_translator.retrieve(translated_state); + auto observation = this->state_to_obs_class[state]; + auto action = action_function[memory][observation]; + auto memory_dst = update_function[memory][observation]; + for(auto choice: this->state_action_choices[state][action]) { + for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) { + auto state_dst = entry.getColumn(); + this->state_translator.translate(state_dst,memory_dst); + } + } + translated_state++; + if(translated_state >= this->numberOfTranslatedStates()) { + break; + } + } + this->product_state_to_state = this->state_translator.translationToItem(); + } + + template + storm::storage::SparseMatrix ProductPomdpFsc::buildTransitionMatrix( + std::vector> action_function, + std::vector> update_function + ) { + this->product_choice_to_choice.clear(); + storm::storage::SparseMatrixBuilder builder(0, 0, 0, false, true, 0); + for(uint64_t translated_state = 0; translated_state < this->numberOfTranslatedStates(); translated_state++) { + builder.newRowGroup(this->numberOfTranslatedChoices()); + auto[state,memory] = this->state_translator.retrieve(translated_state); + auto observation = this->state_to_obs_class[state]; + auto action = action_function[memory][observation]; + auto memory_dst = update_function[memory][observation]; + for(auto choice: this->state_action_choices[state][action]) { + auto product_choice = this->numberOfTranslatedChoices(); + this->product_choice_to_choice.push_back(choice); + for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) { + auto translated_dst = this->state_translator.translate(entry.getColumn(),memory_dst); + builder.addNextValue(product_choice, translated_dst, entry.getValue()); + } + } + } + + return builder.build(); + } + + + template + void ProductPomdpFsc::applyFsc( + std::vector> action_function, + std::vector> update_function + ) { + this->buildStateSpace(action_function,update_function); + storm::storage::sparse::ModelComponents components; + components.stateLabeling = synthesis::translateStateLabeling( + this->quotient,this->state_translator.translationToItem(),this->translateInitialState() + ); + + components.transitionMatrix = this->buildTransitionMatrix(action_function,update_function); + storm::storage::BitVector translated_choice_mask(this->numberOfTranslatedChoices(),true); + components.choiceLabeling = synthesis::translateChoiceLabeling(this->quotient,this->product_choice_to_choice,translated_choice_mask); + for (auto const& reward_model : this->quotient.getRewardModels()) { + auto new_reward_model = synthesis::translateRewardModel(reward_model.second,this->product_choice_to_choice,translated_choice_mask); + components.rewardModels.emplace(reward_model.first, new_reward_model); + } + + this->clearMemory(); + this->product = std::make_shared>(std::move(components)); + } + + + template + void ProductPomdpFsc::clearMemory() { + this->state_translator.clear(); + } + + + template class ProductPomdpFsc; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.h b/payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.h new file mode 100644 index 000000000..ea7836d2c --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/ProductPomdpFsc.h @@ -0,0 +1,77 @@ +#pragma once + +#include "src/synthesis/translation/ItemKeyTranslator.h" + +#include +#include +#include + + +namespace synthesis { + + + template + class ProductPomdpFsc { + + public: + + ProductPomdpFsc( + storm::models::sparse::Model const& quotient, + std::vector state_to_obs_class, + uint64_t num_actions, + std::vector choice_to_action + ); + + /** + * Create a product of the quotient POMDP and the given FSC. + * @param action_function for each node in the FSC and for each observation class, an index of the choice + * @param action_function for each node in the FSC and for each observation class, a memory update + */ + void applyFsc( + std::vector> action_function, + std::vector> update_function + ); + + /** The constructed product with an FSC. */ + std::shared_ptr> product; + /** For each choice of the product MDP, the original choice. */ + std::vector product_choice_to_choice; + /** For each state of the product MDP, the original state. */ + std::vector product_state_to_state; + + + private: + + /** The quotient model. */ + storm::models::sparse::Model const& quotient; + /** For each state of the quotient, its observation class. */ + std::vector state_to_obs_class; + /** Overall number of actions. */ + uint64_t num_actions; + /** For each choice of the quotient, the corresponding action. */ + std::vector choice_to_action; + /** For each state-action pair, a list of choices that implement this action. */ + std::vector>> state_action_choices; + + /** For each state and memory value, the state in the product. */ + ItemKeyTranslator state_translator; + uint64_t translateInitialState(); + + uint64_t numberOfTranslatedStates(); + uint64_t numberOfTranslatedChoices(); + + + void buildStateSpace( + std::vector> action_function, + std::vector> update_function + ); + storm::storage::SparseMatrix buildTransitionMatrix( + std::vector> action_function, + std::vector> update_function + ); + + + void clearMemory(); + + }; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.cpp b/payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.cpp new file mode 100644 index 000000000..7d24ade8e --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.cpp @@ -0,0 +1,157 @@ +#include "ProductPomdpRandomizedFsc.h" + +#include "src/synthesis/translation/componentTranslations.h" + +namespace synthesis { + + + template + ProductPomdpRandomizedFsc::ProductPomdpRandomizedFsc( + storm::models::sparse::Model const& quotient, + std::vector state_to_obs_class, + uint64_t num_actions, + std::vector choice_to_action + ) : quotient(quotient), state_to_obs_class(state_to_obs_class), + num_actions(num_actions), choice_to_action(choice_to_action) { + + this->state_translator = ItemKeyTranslator>(); + this->state_action_choices.resize(this->quotient.getNumberOfStates()); + auto const& row_group_indices = this->quotient.getTransitionMatrix().getRowGroupIndices(); + for(uint64_t state = 0; state < this->quotient.getNumberOfStates(); ++state) { + this->state_action_choices[state].resize(this->num_actions); + for (uint64_t row = row_group_indices[state]; row < row_group_indices[state+1]; row++) { + uint64_t action = this->choice_to_action[row]; + this->state_action_choices[state][action].insert(row); + } + } + } + + template + uint64_t ProductPomdpRandomizedFsc::numberOfTranslatedStates() { + return this->state_translator.numTranslations(); + } + + template + uint64_t ProductPomdpRandomizedFsc::numberOfTranslatedChoices() { + return this->product_choice_to_choice.size(); + } + + template + uint64_t ProductPomdpRandomizedFsc::translateInitialState() { + uint64_t initial_state = *(this->quotient.getInitialStates().begin()); + uint64_t initial_memory = 0; + uint64_t initial_action = this->num_actions; + return this->state_translator.translate(initial_state,std::make_pair(initial_memory,initial_action)); + } + + + template + void ProductPomdpRandomizedFsc::buildStateSpace( + std::vector>> action_function, + std::vector> update_function + ) { + this->state_translator.resize(this->quotient.getNumberOfStates()); + auto translated_state = this->translateInitialState(); + while(true) { + auto[state,memory_action] = this->state_translator.retrieve(translated_state); + auto[memory,action] = memory_action; + auto observation = this->state_to_obs_class[state]; + if(action == this->num_actions) { + for(auto [action,prob] : action_function[memory][observation]) { + this->state_translator.translate(state,std::make_pair(memory,action)); + } + } else { + auto memory_dst = update_function[memory][observation]; + for(auto choice: this->state_action_choices[state][action]) { + for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) { + auto state_dst = entry.getColumn(); + this->state_translator.translate(state_dst,std::make_pair(memory_dst,this->num_actions)); + } + } + } + translated_state++; + if(translated_state >= this->numberOfTranslatedStates()) { + break; + } + } + + this->product_state_to_state = this->state_translator.translationToItem(); + this->product_state_to_state_memory_action = this->state_translator.translationToItemKey(); + } + + template + storm::storage::SparseMatrix ProductPomdpRandomizedFsc::buildTransitionMatrix( + std::vector>> action_function, + std::vector> update_function + ) { + this->product_choice_to_choice.clear(); + auto quotient_num_choices = this->quotient.getNumberOfChoices(); + storm::storage::SparseMatrixBuilder builder(0, 0, 0, false, true, 0); + for(uint64_t translated_state = 0; translated_state < this->numberOfTranslatedStates(); ++translated_state) { + builder.newRowGroup(this->numberOfTranslatedChoices()); + auto[state,memory_action] = this->state_translator.retrieve(translated_state); + auto[memory,action] = memory_action; + auto observation = this->state_to_obs_class[state]; + if(action == this->num_actions) { + // 1 choice where we stochastically pick an action + auto product_choice = this->numberOfTranslatedChoices(); + this->product_choice_to_choice.push_back(quotient_num_choices); + for(auto [action,prob] : action_function[memory][observation]) { + auto translated_dst = this->state_translator.translate(state,std::make_pair(memory,action)); + builder.addNextValue(product_choice, translated_dst, prob); + } + } else { + auto memory_dst = update_function[memory][observation]; + for(auto choice: this->state_action_choices[state][action]) { + auto product_choice = this->numberOfTranslatedChoices(); + this->product_choice_to_choice.push_back(choice); + for(auto const &entry: this->quotient.getTransitionMatrix().getRow(choice)) { + auto translated_dst = this->state_translator.translate(entry.getColumn(),std::make_pair(memory_dst,this->num_actions)); + builder.addNextValue(product_choice, translated_dst, entry.getValue()); + } + } + } + } + + return builder.build(); + } + + + template + void ProductPomdpRandomizedFsc::applyFsc( + std::vector>> action_function, + std::vector> update_function + ) { + this->buildStateSpace(action_function,update_function); + storm::storage::sparse::ModelComponents components; + components.stateLabeling = synthesis::translateStateLabeling( + this->quotient,this->state_translator.translationToItem(),this->translateInitialState() + ); + + components.transitionMatrix = this->buildTransitionMatrix(action_function,update_function); + storm::storage::BitVector translated_choice_mask(this->numberOfTranslatedChoices(),true); + auto quotient_num_choices = this->quotient.getNumberOfChoices(); + for(uint64_t translated_choice = 0; translated_choicenumberOfTranslatedChoices(); ++translated_choice) { + if(this->product_choice_to_choice[translated_choice]==quotient_num_choices) { + translated_choice_mask.set(translated_choice,false); + } + } + components.choiceLabeling = synthesis::translateChoiceLabeling(this->quotient,this->product_choice_to_choice,translated_choice_mask); + for (auto const& reward_model : this->quotient.getRewardModels()) { + auto new_reward_model = synthesis::translateRewardModel(reward_model.second,this->product_choice_to_choice,translated_choice_mask); + components.rewardModels.emplace(reward_model.first, new_reward_model); + } + + this->clearMemory(); + this->product = std::make_shared>(std::move(components)); + } + + + template + void ProductPomdpRandomizedFsc::clearMemory() { + this->state_translator.clear(); + } + + + template class ProductPomdpRandomizedFsc; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.h b/payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.h new file mode 100644 index 000000000..72612eb47 --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/ProductPomdpRandomizedFsc.h @@ -0,0 +1,80 @@ +#pragma once + +#include "src/synthesis/translation/ItemKeyTranslator.h" + +#include +#include +#include + + +namespace synthesis { + + + template + class ProductPomdpRandomizedFsc { + + public: + + ProductPomdpRandomizedFsc( + storm::models::sparse::Model const& quotient, + std::vector state_to_obs_class, + uint64_t num_actions, + std::vector choice_to_action + ); + + /** + * Create a product of the quotient POMDP and the given FSC. + * @param action_function for each node in the FSC and for each observation class, a dictionary containing + * entries (action,probability) + * @param action_function for each node in the FSC and for each observation class, a memory update + */ + void applyFsc( + std::vector>> action_function, + std::vector> update_function + ); + + /** The constructed product with an FSC. */ + std::shared_ptr> product; + /** For each choice of the product MDP, the original choice. */ + std::vector product_choice_to_choice; + /** For each state of the product MDP, the original state. */ + std::vector product_state_to_state; + /** For each state of the product MDP, the correponding state-memory-action triple. */ + std::vector>> product_state_to_state_memory_action; + + + private: + + /** The quotient model. */ + storm::models::sparse::Model const& quotient; + /** For each state of the quotient, its observation class. */ + std::vector state_to_obs_class; + /** Overall number of actions. */ + uint64_t num_actions; + /** For each choice of the quotient, the corresponding action. */ + std::vector choice_to_action; + /** For each state-action pair, a list of choices that implement this action. */ + std::vector>> state_action_choices; + + /** For each state, maps memory-action selection to the state in the product. */ + ItemKeyTranslator> state_translator; + uint64_t translateInitialState(); + uint64_t numberOfTranslatedStates(); + + /** For each choice and memory value, the choice in the product. */ + ItemKeyTranslator choice_translator; + uint64_t numberOfTranslatedChoices(); + + void buildStateSpace( + std::vector>> action_function, + std::vector> update_function + ); + storm::storage::SparseMatrix buildTransitionMatrix( + std::vector>> action_function, + std::vector> update_function + ); + + void clearMemory(); + + }; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp_family/bindings.cpp b/payntbind/src/synthesis/pomdp_family/bindings.cpp new file mode 100644 index 000000000..fa357ddf0 --- /dev/null +++ b/payntbind/src/synthesis/pomdp_family/bindings.cpp @@ -0,0 +1,59 @@ +#include "../synthesis.h" + +#include "ObservationEvaluator.h" +#include "ProductPomdpFsc.h" +#include "ProductPomdpRandomizedFsc.h" +#include "GameAbstractionSolver.h" + +void bindings_pomdp_family(py::module& m) { + + py::class_>(m, "ObservationEvaluator") + .def(py::init const& >(), py::arg("prism"), py::arg("model")) + .def_property_readonly("num_obs_expressions", [](synthesis::ObservationEvaluator& e) {return e.num_obs_expressions;} ) + .def_property_readonly("obs_expr_label", [](synthesis::ObservationEvaluator& e) {return e.obs_expr_label;} ) + .def_property_readonly("obs_expr_is_boolean", [](synthesis::ObservationEvaluator& e) {return e.obs_expr_is_boolean;} ) + .def_property_readonly("num_obs_classes", [](synthesis::ObservationEvaluator& e) {return e.num_obs_classes;} ) + .def_property_readonly("state_to_obs_class", [](synthesis::ObservationEvaluator& e) {return e.state_to_obs_class;} ) + .def("obs_class_value", &synthesis::ObservationEvaluator::observationClassValue, py::arg("obs_class"), py::arg("obs_expr")) + .def("add_observations_to_submdp", &synthesis::ObservationEvaluator::addObservationsToSubMdp, py::arg("mdp"), py::arg("state_sub_to_full")) + ; + + py::class_>(m, "ProductPomdpFsc") + .def( + py::init const&, std::vector, uint64_t, std::vector>(), + py::arg("quotient"), py::arg("state_to_obs_class"), py::arg("num_actions"), py::arg("choice_to_action") + ) + .def("apply_fsc", &synthesis::ProductPomdpFsc::applyFsc, py::arg("action_function"), py::arg("udate_function")) + .def_property_readonly("product", [](synthesis::ProductPomdpFsc& m) {return m.product;} ) + .def_property_readonly("product_choice_to_choice", [](synthesis::ProductPomdpFsc& m) {return m.product_choice_to_choice;} ) + .def_property_readonly("product_state_to_state", [](synthesis::ProductPomdpFsc& m) {return m.product_state_to_state;} ) + ; + + m.def("randomize_action_variant", &synthesis::randomizeActionVariant); + + py::class_>(m, "ProductPomdpRandomizedFsc") + .def( + py::init const&, std::vector, uint64_t, std::vector>(), + py::arg("quotient"), py::arg("state_to_obs_class"), py::arg("num_actions"), py::arg("choice_to_action") + ) + .def("apply_fsc", &synthesis::ProductPomdpRandomizedFsc::applyFsc, py::arg("action_function"), py::arg("udate_function")) + .def_property_readonly("product", [](synthesis::ProductPomdpRandomizedFsc& m) {return m.product;} ) + .def_property_readonly("product_choice_to_choice", [](synthesis::ProductPomdpRandomizedFsc& m) {return m.product_choice_to_choice;} ) + .def_property_readonly("product_state_to_state", [](synthesis::ProductPomdpRandomizedFsc& m) {return m.product_state_to_state;} ) + .def_property_readonly("product_state_to_state_memory_action", [](synthesis::ProductPomdpRandomizedFsc& m) {return m.product_state_to_state_memory_action;} ) + ; + + py::class_>(m, "GameAbstractionSolver") + .def( + py::init const&, uint64_t, std::vector const&, std::string const&, double>(), + py::arg("quotient"), py::arg("quoitent_num_actions"), py::arg("choice_to_action"), py::arg("target_label"), py::arg("precision") + ) + .def("solve", &synthesis::GameAbstractionSolver::solve) + .def_property_readonly("solution_state_values", [](synthesis::GameAbstractionSolver& solver) {return solver.solution_state_values;}) + .def_property_readonly("solution_value", [](synthesis::GameAbstractionSolver& solver) {return solver.solution_value;}) + .def_property_readonly("solution_state_to_player1_action", [](synthesis::GameAbstractionSolver& solver) {return solver.solution_state_to_player1_action;}) + .def_property_readonly("solution_state_to_quotient_choice", [](synthesis::GameAbstractionSolver& solver) {return solver.solution_state_to_quotient_choice;}) + .def("enable_profiling", &synthesis::GameAbstractionSolver::enableProfiling) + .def("print_profiling", &synthesis::GameAbstractionSolver::printProfiling) + ; +} diff --git a/payntbind/src/synthesis/quotient/Coloring.cpp b/payntbind/src/synthesis/quotient/Coloring.cpp new file mode 100644 index 000000000..624f2c486 --- /dev/null +++ b/payntbind/src/synthesis/quotient/Coloring.cpp @@ -0,0 +1,103 @@ +#include "Coloring.h" + +#include + +namespace synthesis { + + +Coloring::Coloring( + Family const& family, std::vector const& row_groups, + std::vector>> choice_to_assignment +) : family(family), choice_to_assignment(choice_to_assignment) { + + auto num_choices = numChoices(); + colored_choices.resize(num_choices,false); + uncolored_choices.resize(num_choices,false); + for(uint64_t choice = 0; choice 1) { + is_simple = false; + } + } +} + +const uint64_t Coloring::numChoices() const { + return choice_to_assignment.size(); +} + +std::vector>> const& Coloring::getChoiceToAssignment() const { + return choice_to_assignment; +} + +std::vector const& Coloring::getStateToHoles() const { + return state_to_holes; +} + +BitVector const& Coloring::getUncoloredChoices() const { + return uncolored_choices; +} + +BitVector Coloring::selectCompatibleChoices(Family const& subfamily) const { + auto selection = BitVector(uncolored_choices); + for(auto choice: colored_choices) { + if(subfamily.includesAssignment(choice_to_assignment[choice])) { + selection.set(choice,true); + } + } + return selection; +} + + + +std::vector Coloring::collectHoleOptionsMask(BitVector const& choices) const { + + std::vector hole_options_mask(family.numHoles()); + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + hole_options_mask[hole] = BitVector(family.holeNumOptionsTotal(hole),false); + } + for(auto choice: choices) { + for(auto const& [hole,option]: choice_to_assignment[choice]) { + hole_options_mask[hole].set(option,true); + } + } + return hole_options_mask; +} + + +std::vector> Coloring::collectHoleOptions(BitVector const& choices) const { + auto hole_options_mask = collectHoleOptionsMask(choices); + std::vector> hole_options(family.numHoles()); + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + for(auto option: hole_options_mask[hole]) { + hole_options[hole].push_back(option); + } + } + return hole_options; +} + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/Coloring.h b/payntbind/src/synthesis/quotient/Coloring.h new file mode 100644 index 000000000..66670ca47 --- /dev/null +++ b/payntbind/src/synthesis/quotient/Coloring.h @@ -0,0 +1,62 @@ +#pragma once + +#include "src/synthesis/quotient/Family.h" + +#include + +#include +#include +#include + +namespace synthesis { + +using BitVector = storm::storage::BitVector; + + +class Coloring { +public: + + Coloring( + Family const& family, std::vector const& row_groups, + std::vector>> choice_to_assignment + ); + + /** Get choice-to-assignment mapping. */ + std::vector>> const& getChoiceToAssignment() const; + /** Get a mapping from states to holes involved in its choices. */ + std::vector const& getStateToHoles() const; + /** Get mask of uncolored choices. */ + BitVector const& getUncoloredChoices() const; + + /** Get a mask of choices compatible with the family. */ + BitVector selectCompatibleChoices(Family const& subfamily) const; + /** For each hole, collect options (colors) involved in any of the given choices. */ + std::vector> collectHoleOptions(BitVector const& choices) const; + +protected: + + /** Reference to the unrefined family. */ + Family family; + /** For each choice, a list of hole-option pairs (colors). */ + const std::vector>> choice_to_assignment; + + /** Number of choices in the quotient. */ + const uint64_t numChoices() const; + + /** For each state, identification of holes associated with its choices. */ + std::vector choice_to_holes; + /** For each state, identification of holes associated with its choices. */ + std::vector state_to_holes; + /** Whether all states have at most one hole associated with its choices. */ + bool is_simple; + + /** Choices not labeled by any hole. */ + BitVector uncolored_choices; + /** Choices labeled by some hole. */ + BitVector colored_choices; + + /** For each hole, collect options (colors) involved in any of the given choices. */ + std::vector collectHoleOptionsMask(BitVector const& choices) const; +}; + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/Family.cpp b/payntbind/src/synthesis/quotient/Family.cpp new file mode 100644 index 000000000..1c485c0db --- /dev/null +++ b/payntbind/src/synthesis/quotient/Family.cpp @@ -0,0 +1,133 @@ +#include "Coloring.h" + +#include + + +namespace synthesis { + +Family::Family() { + // left intentionally blank +} + +Family::Family(Family const& other) { + hole_options = std::vector>(other.numHoles()); + hole_options_mask = std::vector(other.numHoles()); + for(uint64_t hole = 0; hole < numHoles(); ++hole) { + hole_options[hole] = other.holeOptions(hole); + hole_options_mask[hole] = other.holeOptionsMask(hole); + } +} + + +uint64_t Family::numHoles() const { + return hole_options.size(); +} + +void Family::addHole(uint64_t num_options) { + hole_options_mask.push_back(BitVector(num_options,true)); + std::vector options(num_options); + for(uint64_t option=0; option const& Family::holeOptions(uint64_t hole) const { + return hole_options[hole]; +} + +BitVector const& Family::holeOptionsMask(uint64_t hole) const { + return hole_options_mask[hole]; +} + + +void Family::holeSetOptions(uint64_t hole, std::vector const& options) { + hole_options[hole] = options; + hole_options_mask[hole].clear(); + for(auto option: options) { + hole_options_mask[hole].set(option); + } +} +void Family::holeSetOptions(uint64_t hole, BitVector const& options) { + hole_options[hole].clear(); + for(auto option: options) { + hole_options[hole].push_back(option); + } + hole_options_mask[hole] = options; +} + + + + + +uint64_t Family::holeNumOptions(uint64_t hole) const { + return hole_options[hole].size(); +} + +uint64_t Family::holeNumOptionsTotal(uint64_t hole) const { + return hole_options_mask[hole].size(); +} + +bool Family::holeContains(uint64_t hole, uint64_t option) const { + return hole_options_mask[hole][option]; +} + + +bool Family::isSubsetOf(Family const& other) const { + for(uint64_t hole = 0; hole < numHoles(); ++hole) { + if(not hole_options_mask[hole].isSubsetOf(other.holeOptionsMask(hole))) { + return false; + } + } + return true; +} + +bool Family::includesAssignment(std::vector const& hole_to_option) const { + for(uint64_t hole = 0; hole < numHoles(); ++hole) { + if(not hole_options_mask[hole][hole_to_option[hole]]) { + return false; + } + } + return true; +} + +bool Family::includesAssignment(std::map const& hole_to_option) const { + for(auto const& [hole,option]: hole_to_option) { + if(not hole_options_mask[hole][option]) { + return false; + } + } + return true; +} + +bool Family::includesAssignment(std::vector> const& hole_to_option) const { + for(auto const& [hole,option]: hole_to_option) { + if(not hole_options_mask[hole][option]) { + return false; + } + } + return true; +} + +std::vector::iterator Family::begin() { + return hole_options_mask.begin(); +} + +std::vector::iterator Family::end() { + return hole_options_mask.end(); +} + + +void Family::setChoices(BitVector const& choices) { + this->choices = BitVector(choices); +} + +void Family::setChoices(BitVector && choices) { + this->choices = choices; +} + +BitVector const& Family::getChoices() const { + return choices; +} + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/Family.h b/payntbind/src/synthesis/quotient/Family.h new file mode 100644 index 000000000..c932e293d --- /dev/null +++ b/payntbind/src/synthesis/quotient/Family.h @@ -0,0 +1,63 @@ +#pragma once + +#include + +#include +#include +#include + +namespace synthesis { + +using BitVector = storm::storage::BitVector; + +class Family { +public: + + Family(); + Family(Family const& other); + + uint64_t numHoles() const; + void addHole(uint64_t num_options); + + std::vector const& holeOptions(uint64_t hole) const; + BitVector const& holeOptionsMask(uint64_t hole) const; + + void holeSetOptions(uint64_t hole, std::vector const& options); + void holeSetOptions(uint64_t hole, BitVector const& options); + // void holeSetOptions(uint64_t hole, BitVector&& options); + + uint64_t holeNumOptions(uint64_t hole) const; + uint64_t holeNumOptionsTotal(uint64_t hole) const; + bool holeContains(uint64_t hole, uint64_t option) const; + + bool includesAssignment(std::vector const& hole_to_option) const; + bool includesAssignment(std::map const& hole_to_option) const; + bool includesAssignment(std::vector> const& hole_to_option) const; + + bool isSubsetOf(Family const& other) const; + // uint64_t size(); + + // iterator over hole options + std::vector::iterator begin(); + std::vector::iterator end(); + + // choice operations + void setChoices(BitVector const& choices); + void setChoices(BitVector&& choices); + BitVector const& getChoices() const; + + +protected: + /** For each hole, a list of available options. */ + std::vector> hole_options; + /** For each hole, a mastk of available options. */ + std::vector hole_options_mask; + + + /** Whether choices have been set for this family. */ + bool choices_set = false; + /** Bitvector of choices relevant to this family. */ + BitVector choices; +}; + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/JaniChoices.cpp b/payntbind/src/synthesis/quotient/JaniChoices.cpp new file mode 100644 index 000000000..4615160e5 --- /dev/null +++ b/payntbind/src/synthesis/quotient/JaniChoices.cpp @@ -0,0 +1,80 @@ +#include "JaniChoices.h" + +#include +#include +#include +#include + +namespace synthesis { + + template + storm::models::sparse::ChoiceLabeling reconstructChoiceLabelsFromJani( + storm::models::sparse::Model const& model + ) { + uint64_t num_choices = model.getNumberOfChoices(); + auto const& co = model.getChoiceOrigins()->asJaniChoiceOrigins(); + auto const& jani = co.getModel(); + + storm::models::sparse::ChoiceLabeling choice_labeling(num_choices); + for (auto const& action : jani.getActions()) { + choice_labeling.addLabel(action.getName(), storm::storage::BitVector(num_choices,false)); + } + + for(uint64_t choice = 0; choice < num_choices; choice++) { + for(auto const& aut_edge: co.getEdgeIndexSet(choice)) { + auto [aut_index,edge_index] = jani.decodeAutomatonAndEdgeIndices(aut_edge); + auto action_index = jani.getAutomaton(aut_index).getEdge(edge_index).getActionIndex(); + choice_labeling.addLabelToChoice(jani.getAction(action_index).getName(), choice); + } + } + + return choice_labeling; + } + + /** + * Given model and its choice labeling, remove unused labels and make sure that all choices have at most 1 label. + * If the choice does not have a label, label it with the label derived from the provided prefix. + * Make sure that for each state of the MDP, either all its rows have no labels or all its rows have exactly one + */ + template + void makeChoiceLabelingCanonic( + storm::models::sparse::Model const& model, + storm::models::sparse::ChoiceLabeling& choice_labeling, + std::string const& no_label_prefix + ) { + for(auto const& label: choice_labeling.getLabels()) { + if(choice_labeling.getChoices(label).empty()) { + choice_labeling.removeLabel(label); + } + } + storm::storage::BitVector no_label_labeling(model.getNumberOfChoices()); + for(uint64_t choice = 0; choice < model.getNumberOfChoices(); choice++) { + uint64_t choice_num_labels = choice_labeling.getLabelsOfChoice(choice).size(); + if(choice_num_labels > 1) { + throw std::invalid_argument("A choice of the model contains multiple labels."); + } + if(choice_num_labels == 0) { + no_label_labeling.set(choice,true); + } + } + std::string empty_label = choice_labeling.addUniqueLabel(no_label_prefix, no_label_labeling); + } + + template + std::shared_ptr> addChoiceLabelsFromJani(storm::models::sparse::Model const& model) { + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = model.getTransitionMatrix(); + components.stateLabeling = model.getStateLabeling(); + if(model.hasStateValuations()) { + components.stateValuations = model.getStateValuations(); + } + storm::models::sparse::ChoiceLabeling choice_labeling = reconstructChoiceLabelsFromJani(model); + makeChoiceLabelingCanonic(model,choice_labeling,"empty_label"); + components.choiceLabeling = choice_labeling; + components.rewardModels = model.getRewardModels(); + return std::make_shared>(std::move(components)); + } + + + template std::shared_ptr> addChoiceLabelsFromJani(storm::models::sparse::Model const& model); +} diff --git a/payntbind/src/synthesis/quotient/JaniChoices.h b/payntbind/src/synthesis/quotient/JaniChoices.h new file mode 100644 index 000000000..692489c3d --- /dev/null +++ b/payntbind/src/synthesis/quotient/JaniChoices.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace synthesis { + + /** + * Given model with Jani choice origins, reconstuct the corresponding choice labels. + */ + template + std::shared_ptr> addChoiceLabelsFromJani( + storm::models::sparse::Model const& model + ); +} \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/bindings.cpp b/payntbind/src/synthesis/quotient/bindings.cpp new file mode 100644 index 000000000..79f73100f --- /dev/null +++ b/payntbind/src/synthesis/quotient/bindings.cpp @@ -0,0 +1,298 @@ +#include "../synthesis.h" + +#include "JaniChoices.h" +#include "Family.h" +#include "Coloring.h" + +#include +#include +#include +#include + +#include + +namespace synthesis { + +template +std::pair>>> janiMapChoicesToHoleAssignments( + storm::models::sparse::Mdp const& mdp, + Family const& family, + std::map>> edge_to_hole_assignment +) { + + uint64_t num_choices = mdp.getNumberOfChoices(); + storm::storage::BitVector choice_is_valid(num_choices,true); + std::vector>> choice_to_hole_assignment(num_choices); + for(uint64_t choice = 0; choice < num_choices; ++choice) { + std::vector hole_set(family.numHoles(),false); + std::vector hole_option(family.numHoles()); + bool valid_choice = true; + for(auto const& edge: mdp.getChoiceOrigins()->asJaniChoiceOrigins().getEdgeIndexSet(choice)) { + auto hole_assignment = edge_to_hole_assignment.find(edge); + if(hole_assignment == edge_to_hole_assignment.end()) { + continue; + } + for(auto const& [hole,option]: hole_assignment->second) { + if(not hole_set[hole]) { + hole_option[hole] = option; + hole_set[hole] = true; + } else if(hole_option[hole] != option) { + valid_choice = false; + break; + } + } + if(not valid_choice) { + break; + } + } + if(not valid_choice) { + choice_is_valid.set(choice,false); + continue; + } + for(uint64_t hole = 0; hole < family.numHoles(); ++hole) { + if(not hole_set[hole]) { + continue; + } + choice_to_hole_assignment[choice].push_back(std::make_pair(hole,hole_option[hole])); + } + } + return std::make_pair(choice_is_valid,choice_to_hole_assignment); +} + + +template +std::vector> computeChoiceDestinations(storm::models::sparse::Mdp const& mdp) { + uint64_t num_choices = mdp.getNumberOfChoices(); + std::vector> choice_destinations(num_choices); + for(uint64_t choice = 0; choice < num_choices; ++choice) { + for(auto const& entry: mdp.getTransitionMatrix().getRow(choice)) { + choice_destinations[choice].push_back(entry.getColumn()); + } + } + return choice_destinations; +} + +template +std::vector schedulerToStateToGlobalChoice( + storm::storage::Scheduler const& scheduler, storm::models::sparse::Mdp const& sub_mdp, + std::vector choice_to_global_choice +) { + uint64_t num_states = sub_mdp.getNumberOfStates(); + std::vector state_to_choice(num_states); + auto const& nci = sub_mdp.getNondeterministicChoiceIndices(); + for(uint64_t state=0; state computeInconsistentHoleVariance( + Family const& family, + std::vector const& row_groups, std::vector const& choice_to_global_choice, std::vector const& choice_to_value, + Coloring const& coloring, std::map> const& hole_to_inconsistent_options, + std::vector const& state_to_expected_visits +) { + + auto num_holes = family.numHoles(); + std::vector hole_to_inconsistent_options_mask(num_holes); + for(uint64_t hole=0; hole hole_difference_avg(num_holes,0); + std::vector hole_states_affected(num_holes,0); + auto const& choice_to_assignment = coloring.getChoiceToAssignment(); + + std::vector hole_set(num_holes); + std::vector hole_min(num_holes); + std::vector hole_max(num_holes); + + auto num_states = row_groups.size()-1; + for(uint64_t state=0; state hole_max[hole]) { + hole_max[hole] = value; + } + } + } + } + + for(auto hole: inconsistent_holes) { + if(not hole_set[hole]) { + continue; + } + double difference = (hole_max[hole]-hole_min[hole])*state_to_expected_visits[state]; + hole_states_affected[hole] += 1; + hole_difference_avg[hole] += (difference-hole_difference_avg[hole]) / hole_states_affected[hole]; + } + std::fill(hole_set.begin(), hole_set.end(), false); + } + + std::map inconsistent_hole_variance; + for(auto hole: inconsistent_holes) { + inconsistent_hole_variance[hole] = hole_difference_avg[hole]; + } + + return inconsistent_hole_variance; +} + + + +/*storm::storage::BitVector keepReachableChoices( + storm::storage::BitVector enabled_choices, uint64_t initial_state, + std::vector const& row_groups, std::vector> const& choice_destinations +) { + + uint64_t num_states = row_groups.size()-1; + uint64_t num_choices = enabled_choices.size(); + + storm::storage::BitVector reachable_choices(num_choices,false); + storm::storage::BitVector state_visited(num_states,false); + + std::queue state_queue; + state_visited.set(initial_state,true); + state_queue.push(initial_state); + while(not state_queue.empty()) { + auto state = state_queue.front(); + state_queue.pop(); + for(uint64_t choice = row_groups[state]; choice < row_groups[state+1]; ++choice) { + if(not enabled_choices[choice]) { + continue; + } + reachable_choices.set(choice,true); + for(auto dst: choice_destinations[choice]) { + if(not state_visited[dst]) { + state_visited.set(dst,true); + state_queue.push(dst); + } + } + } + } + return reachable_choices; +}*/ + +// RA: I don't even understand why this needs to be optimized, but it does +storm::storage::BitVector policyToChoicesForFamily( + std::vector const& policy_choices, + storm::storage::BitVector const& family_choices +) { + storm::storage::BitVector choices(family_choices.size(),false); + for(auto choice : policy_choices) { + choices.set(choice,true); + } + return choices & family_choices; +} + + +/*std::pair,storm::storage::BitVector> fixPolicyForFamily( + std::vector const& policy, uint64_t invalid_action, + storm::storage::BitVector const& family_choices, + uint64_t initial_state, uint64_t num_choices, + std::vector> const& state_to_actions, + std::vector>> const& state_action_choices, + std::vector> const& choice_destinations +) { + + uint64_t num_states = state_to_actions.size(); + + std::vector policy_fixed(num_states,invalid_action); + storm::storage::BitVector choice_mask(num_choices,false); + + storm::storage::BitVector state_visited(num_states,false); + state_visited.set(initial_state,true); + + std::queue state_queue; + state_queue.push(initial_state); + while(not state_queue.empty()) { + auto state = state_queue.front(); + state_queue.pop(); + // get action executed in the state + auto action = policy[state]; + if(action == invalid_action) { + action = state_to_actions[state][0]; + } + policy_fixed[state] = action; + // expand through the choices that correspond to this action + for(auto choice: state_action_choices[state][action]) { + if(not family_choices[choice]) { + continue; + } + choice_mask.set(choice,true); + for(auto dst: choice_destinations[choice]) { + if(not state_visited[dst]) { + state_visited.set(dst,true); + state_queue.push(dst); + } + } + } + } + return std::make_pair(policy_fixed,choice_mask); +}*/ + +} + + +void bindings_coloring(py::module& m) { + + m.def("janiMapChoicesToHoleAssignments", &synthesis::janiMapChoicesToHoleAssignments); + m.def("addChoiceLabelsFromJani", &synthesis::addChoiceLabelsFromJani); + + m.def("computeChoiceDestinations", &synthesis::computeChoiceDestinations); + + m.def("schedulerToStateToGlobalChoice", &synthesis::schedulerToStateToGlobalChoice); + m.def("computeInconsistentHoleVariance", &synthesis::computeInconsistentHoleVariance); + + m.def("policyToChoicesForFamily", &synthesis::policyToChoicesForFamily); + + + py::class_(m, "Family") + .def(py::init<>(), "Constructor.") + .def(py::init(), "Constructor.", py::arg("other")) + .def("numHoles", &synthesis::Family::numHoles) + .def("addHole", &synthesis::Family::addHole) + + .def("holeOptions", &synthesis::Family::holeOptions) + .def("holeOptionsMask", &synthesis::Family::holeOptionsMask) + .def("holeSetOptions", py::overload_cast const&>(&synthesis::Family::holeSetOptions)) + .def("holeSetOptions", py::overload_cast(&synthesis::Family::holeSetOptions)) + .def("holeNumOptions", &synthesis::Family::holeNumOptions) + .def("holeNumOptionsTotal", &synthesis::Family::holeNumOptionsTotal) + .def("holeContains", &synthesis::Family::holeContains) + ; + + py::class_(m, "Coloring") + .def(py::init const&, std::vector>> >(), "Constructor.") + .def("getChoiceToAssignment", &synthesis::Coloring::getChoiceToAssignment) + .def("getStateToHoles", &synthesis::Coloring::getStateToHoles) + .def("getUncoloredChoices", &synthesis::Coloring::getUncoloredChoices) + .def("selectCompatibleChoices", &synthesis::Coloring::selectCompatibleChoices) + .def("collectHoleOptions", &synthesis::Coloring::collectHoleOptions) + ; + +} diff --git a/payntbind/src/synthesis/synthesis.cpp b/payntbind/src/synthesis/synthesis.cpp new file mode 100644 index 000000000..05f9c204a --- /dev/null +++ b/payntbind/src/synthesis/synthesis.cpp @@ -0,0 +1,13 @@ +#include "synthesis.h" + +void define_synthesis(py::module& m) { + define_helpers(m); + + bindings_pomdp(m); + bindings_decpomdp(m); + bindings_counterexamples(m); + bindings_pomdp_family(m); + + bindings_coloring(m); +} + diff --git a/payntbind/src/synthesis/synthesis.h b/payntbind/src/synthesis/synthesis.h new file mode 100644 index 000000000..6358537a0 --- /dev/null +++ b/payntbind/src/synthesis/synthesis.h @@ -0,0 +1,13 @@ +#pragma once + +#include "src/common.h" + +void define_synthesis(py::module& m); +void define_helpers(py::module &m); + +void bindings_pomdp(py::module &m); +void bindings_decpomdp(py::module &m); +void bindings_counterexamples(py::module &m); +void bindings_pomdp_family(py::module &m); + +void bindings_coloring(py::module &m); diff --git a/payntbind/src/synthesis/translation/ItemKeyTranslator.cpp b/payntbind/src/synthesis/translation/ItemKeyTranslator.cpp new file mode 100644 index 000000000..bce10560f --- /dev/null +++ b/payntbind/src/synthesis/translation/ItemKeyTranslator.cpp @@ -0,0 +1,67 @@ +#include "ItemKeyTranslator.h" + +namespace synthesis { + + template + ItemKeyTranslator::ItemKeyTranslator() : num_items(0) { + // left intentionally blank + } + + template + ItemKeyTranslator::ItemKeyTranslator(uint64_t num_items) : num_items(num_items) { + item_key_to_translation.resize(num_items); + } + + template + void ItemKeyTranslator::clear() { + item_key_to_translation.clear(); + translation_to_item_key.clear(); + } + + template + void ItemKeyTranslator::resize(uint64_t num_items) { + item_key_to_translation.resize(num_items); + } + + template + uint64_t ItemKeyTranslator::numTranslations() const { + return translation_to_item_key.size(); + } + + template + bool ItemKeyTranslator::hasTranslation(uint64_t item, K key) const { + return item_key_to_translation[item].find(key) != item_key_to_translation[item].end(); + } + + template + uint64_t ItemKeyTranslator::translate(uint64_t item, K key) { + auto new_translation = numTranslations(); + auto const& result = item_key_to_translation[item].try_emplace(key,new_translation); + if(result.second) { + translation_to_item_key.push_back(std::make_pair(item,key)); + } + return (*result.first).second; + } + + template + std::pair ItemKeyTranslator::retrieve(uint64_t translation) const { + return translation_to_item_key[translation]; + } + + template + std::vector> const& ItemKeyTranslator::translationToItemKey() const { + return translation_to_item_key; + } + + template + std::vector ItemKeyTranslator::translationToItem() const { + std::vector translation_to_item(numTranslations()); + for(uint64_t translation = 0; translation; + template class ItemKeyTranslator>; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/translation/ItemKeyTranslator.h b/payntbind/src/synthesis/translation/ItemKeyTranslator.h new file mode 100644 index 000000000..ad38f4847 --- /dev/null +++ b/payntbind/src/synthesis/translation/ItemKeyTranslator.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include + +namespace synthesis { + + template + class ItemKeyTranslator { + public: + + ItemKeyTranslator(); + ItemKeyTranslator(uint64_t num_items); + void clear(); + void resize(uint64_t num_items); + + uint64_t numTranslations() const; + + /** Check if the item-key pair has a translation. */ + bool hasTranslation(uint64_t item, K key) const; + + /** Translate an item-key pair. If the pair does not have a translation, create and remember a new one. */ + uint64_t translate(uint64_t item, K key); + + /** Retrieve the item-key pair that has the given translation. */ + std::pair retrieve(uint64_t translation) const; + + /** Retrieve the current translation-to-item-key mapping. */ + std::vector> const& translationToItemKey() const; + + /** Construct the current translation-to-item mapping. */ + std::vector translationToItem() const; + + private: + + uint64_t num_items; + + std::vector> item_key_to_translation; + std::vector> translation_to_item_key; + }; + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/translation/ItemTranslator.cpp b/payntbind/src/synthesis/translation/ItemTranslator.cpp new file mode 100644 index 000000000..f86a26a9a --- /dev/null +++ b/payntbind/src/synthesis/translation/ItemTranslator.cpp @@ -0,0 +1,48 @@ +#include "ItemTranslator.h" + +namespace synthesis { + + ItemTranslator::ItemTranslator() : num_items(0) { + // left intentionally blank + } + + ItemTranslator::ItemTranslator(uint64_t num_items) : num_items(num_items) { + item_to_translation.resize(num_items, num_items); + } + + void ItemTranslator::clear() { + num_items = 0; + item_to_translation.clear(); + translation_to_item.clear(); + } + + void ItemTranslator::resize(uint64_t num_items) { + item_to_translation.resize(num_items, num_items); + } + + uint64_t ItemTranslator::numTranslations() const { + return translation_to_item.size(); + } + + bool ItemTranslator::hasTranslation(uint64_t item) const { + return item_to_translation[item] != num_items; + } + + uint64_t ItemTranslator::translate(uint64_t item) { + uint64_t *translation = &(item_to_translation[item]); + if(*translation == num_items) { + *translation = numTranslations(); + translation_to_item.push_back(item); + } + return *translation; + } + + uint64_t ItemTranslator::retrieve(uint64_t translation) const { + return translation_to_item[translation]; + } + + std::vector const& ItemTranslator::translationToItem() const { + return translation_to_item; + } + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/translation/ItemTranslator.h b/payntbind/src/synthesis/translation/ItemTranslator.h new file mode 100644 index 000000000..3100cbf59 --- /dev/null +++ b/payntbind/src/synthesis/translation/ItemTranslator.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include + +namespace synthesis { + + class ItemTranslator { + public: + + /** Create empty translator. */ + ItemTranslator(); + /** Create translator of specified size. */ + ItemTranslator(uint64_t num_items); + /** Remove all translations. */ + void clear(); + /** Prepare to translate \p num_items items. It is assumed that translator has currently size 0. */ + void resize(uint64_t num_items); + + /** Number of created translations. */ + uint64_t numTranslations() const; + /** Check if the item has a defined translation. */ + bool hasTranslation(uint64_t item) const; + /** Translate an item. If the item does not have a translation, create and remember a new one. */ + uint64_t translate(uint64_t item); + /** Retrieve the item that has the given translation. */ + uint64_t retrieve(uint64_t translation) const; + + /** Returns reverse mapping of translation to item. */ + std::vector const& translationToItem() const; + + private: + + /** Maximum number of items to be translated. */ + uint64_t num_items; + /** + * For each item, contains a translation. Item without previously defined translation has translation equal to + * \p num_items. + */ + std::vector item_to_translation; + /** Reverse mapping of translation to item. Grows when new translations are created. */ + std::vector translation_to_item; + }; + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/translation/componentTranslations.cpp b/payntbind/src/synthesis/translation/componentTranslations.cpp new file mode 100644 index 000000000..e0d33d178 --- /dev/null +++ b/payntbind/src/synthesis/translation/componentTranslations.cpp @@ -0,0 +1,85 @@ +#include "componentTranslations.h" + +#include + +namespace synthesis { + + template + storm::models::sparse::StateLabeling translateStateLabeling( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_state, + uint64_t translated_initial_state + ) { + auto translated_num_states = translated_to_original_state.size(); + storm::models::sparse::StateLabeling translated_labeling(translated_num_states); + for (auto const& label : model.getStateLabeling().getLabels()) { + translated_labeling.addLabel(label, storm::storage::BitVector(translated_num_states,false)); + } + for(uint64_t translated_state=0; translated_state + storm::models::sparse::ChoiceLabeling translateChoiceLabeling( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask + ) { + auto translated_num_choices = translated_to_original_choice.size(); + storm::models::sparse::ChoiceLabeling translated_labeling(translated_num_choices); + for (auto const& label : model.getChoiceLabeling().getLabels()) { + translated_labeling.addLabel(label, storm::storage::BitVector(translated_num_choices,false)); + } + for(auto translated_choice: translated_choice_mask) { + auto choice = translated_to_original_choice[translated_choice]; + for (auto const& label : model.getChoiceLabeling().getLabelsOfChoice(choice)) { + translated_labeling.addLabelToChoice(label,translated_choice); + } + } + return translated_labeling; + } + + template + storm::models::sparse::StandardRewardModel translateRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask + ) { + std::optional> state_rewards; + STORM_LOG_THROW(!reward_model.hasStateRewards(), storm::exceptions::NotSupportedException, "state rewards are currently not supported."); + STORM_LOG_THROW(!reward_model.hasTransitionRewards(), storm::exceptions::NotSupportedException, "transition rewards are currently not supported."); + + std::vector action_rewards(translated_to_original_choice.size()); + for(auto translated_choice: translated_choice_mask) { + auto choice = translated_to_original_choice[translated_choice]; + auto reward = reward_model.getStateActionReward(choice); + action_rewards[translated_choice] = reward; + } + return storm::models::sparse::StandardRewardModel(std::move(state_rewards), std::move(action_rewards)); + } + + + + template storm::models::sparse::StateLabeling translateStateLabeling( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_state, + uint64_t translated_initial_state); + template storm::models::sparse::ChoiceLabeling translateChoiceLabeling( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask); + template storm::models::sparse::StandardRewardModel translateRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask); + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/translation/componentTranslations.h b/payntbind/src/synthesis/translation/componentTranslations.h new file mode 100644 index 000000000..a65f26aa6 --- /dev/null +++ b/payntbind/src/synthesis/translation/componentTranslations.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace synthesis { + + template + storm::models::sparse::StateLabeling translateStateLabeling( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_state, + uint64_t translated_initial_state + ); + + template + storm::models::sparse::ChoiceLabeling translateChoiceLabeling( + storm::models::sparse::Model const& model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask + ); + + template + storm::models::sparse::StandardRewardModel translateRewardModel( + storm::models::sparse::StandardRewardModel const& reward_model, + std::vector const& translated_to_original_choice, + storm::storage::BitVector const& translated_choice_mask + ); + +} \ No newline at end of file diff --git a/payntbind/src/synthesis/verification/MdpModelChecker.cpp b/payntbind/src/synthesis/verification/MdpModelChecker.cpp new file mode 100644 index 000000000..84f75c4f7 --- /dev/null +++ b/payntbind/src/synthesis/verification/MdpModelChecker.cpp @@ -0,0 +1,27 @@ +#include "MdpModelChecker.h" + +#include "storm/modelchecker/prctl/SparseMdpPrctlModelChecker.h" +#include "storm/exceptions/NotSupportedException.h" + +namespace synthesis { + + template + std::shared_ptr verifyMdp( + storm::Environment const& env, + std::shared_ptr> const& mdp, + storm::logic::Formula const& formula, + bool produce_schedulers + ) { + storm::modelchecker::CheckTask task(formula); + task.setProduceSchedulers(produce_schedulers); + storm::modelchecker::SparseMdpPrctlModelChecker> modelchecker(*mdp); + return modelchecker.check(env, task); + } + + template std::shared_ptr verifyMdp( + storm::Environment const& env, + std::shared_ptr> const& mdp, + storm::logic::Formula const& formula, + bool produce_schedulers + ); +} diff --git a/payntbind/src/synthesis/verification/MdpModelChecker.h b/payntbind/src/synthesis/verification/MdpModelChecker.h new file mode 100644 index 000000000..3be2d9390 --- /dev/null +++ b/payntbind/src/synthesis/verification/MdpModelChecker.h @@ -0,0 +1,18 @@ +#pragma once + +#include "storm/environment/Environment.h" +#include "storm/models/sparse/Mdp.h" +#include "storm/modelchecker/CheckTask.h" +#include "storm/modelchecker/results/CheckResult.h" + +namespace synthesis { + + template + std::shared_ptr verifyMdp( + storm::Environment const& env, + std::shared_ptr> const& mdp, + storm::logic::Formula const& formula, + bool produce_schedulers + ); + +}