Commit f2adf891 authored by Amelie Royer's avatar Amelie Royer

MEMCP -> PAMCP

parent 9ddd8486
#ifndef AI_TOOLBOX_POMDP_MEMCP_HEADER_FILE
#define AI_TOOLBOX_POMDP_MEMCP_HEADER_FILE
#ifndef AI_TOOLBOX_POMDP_PAMCP_HEADER_FILE
#define AI_TOOLBOX_POMDP_PAMCP_HEADER_FILE
#include <AIToolbox/POMDP/Types.hpp>
#include <AIToolbox/ProbabilityUtils.hpp>
......@@ -14,7 +14,7 @@ namespace AIToolbox {
#ifndef DOXYGEN_SKIP
// This is done to avoid bringing around the enable_if everywhere.
template <typename M, typename = typename std::enable_if<is_generative_model<M>::value>::type>
class MEMCP;
class PAMCP;
#endif
/**
......@@ -22,7 +22,7 @@ namespace AIToolbox {
*
*/
template <typename M>
class MEMCP<M> {
class PAMCP<M> {
public:
using SampleBelief = std::vector<size_t>;
......@@ -53,7 +53,7 @@ namespace AIToolbox {
* @param iterations The number of episodes to run before completion.
* @param exp The exploration constant. This parameter is VERY important to determine the final POMCP performance.
*/
MEMCP(const M& m, size_t beliefSize, unsigned iterations, double exp);
PAMCP(const M& m, size_t beliefSize, unsigned iterations, double exp);
/**
* @brief This function resets the internal graph and samples
......@@ -291,11 +291,11 @@ namespace AIToolbox {
};
template <typename M>
MEMCP<M>::MEMCP(const M& m, size_t beliefSize, unsigned iter, double exp) : model_(m), S(model_.getS()), A(model_.getA()), O(model_.getO()), E(model_.getE()), beliefSize_(beliefSize), iterations_(iter),
PAMCP<M>::PAMCP(const M& m, size_t beliefSize, unsigned iter, double exp) : model_(m), S(model_.getS()), A(model_.getA()), O(model_.getO()), E(model_.getE()), beliefSize_(beliefSize), iterations_(iter),
exploration_(exp), graph_(), rand_(Impl::Seeder::getSeed()) {}
template <typename M>
size_t MEMCP<M>::sampleAction(const Belief& be, size_t o, unsigned horizon, bool start_session /* false */) {
size_t PAMCP<M>::sampleAction(const Belief& be, size_t o, unsigned horizon, bool start_session /* false */) {
// Reset graph initially or with new belief (e.g. observation missing)
if (reset_belief) {
graph_ = BeliefNode(A);
......@@ -328,7 +328,7 @@ namespace AIToolbox {
}
template <typename M>
size_t MEMCP<M>::sampleAction(size_t a, size_t o, unsigned horizon) {
size_t PAMCP<M>::sampleAction(size_t a, size_t o, unsigned horizon) {
// Update full graph
//update_fullgraph(graph_.children[a], a);
if (to_update) {
......@@ -371,22 +371,8 @@ namespace AIToolbox {
template <typename M>
/*
void MEMCP<M>::update_fullgraph(ActionNode current, size_t a) {
auto & current_branch = fullgraph_;
// Browse history
for (auto it = history.begin(); it != history.end(); ++it) {
auto an = current_branch.children[std::get<0>(*it)];
current_branch = an.children[std::get<1>(*it)];
}
// Modify
current_branch.children.resize(A);
current_branch.children[a] = current;
}
*/
void MEMCP<M>::update_fullgraph(BeliefNode current, size_t a) {
void PAMCP<M>::update_fullgraph(BeliefNode current, size_t a) {
auto & current_branch = fullgraph_;
// Browse history
for (auto it = history.begin(); it != history.end(); ++it) {
......@@ -395,15 +381,12 @@ namespace AIToolbox {
}
// Modify
current_branch = current;
//current_branch.children.resize(A);
//current_branch.children[a] = current;
}
template <typename M>
size_t MEMCP<M>::runSimulation(unsigned horizon) {
size_t PAMCP<M>::runSimulation(unsigned horizon) {
if ( !horizon ) return 0;
maxDepth_ = horizon;
......@@ -417,7 +400,7 @@ namespace AIToolbox {
}
template <typename M>
double MEMCP<M>::simulate(BeliefNode & b, size_t s, unsigned depth) {
double PAMCP<M>::simulate(BeliefNode & b, size_t s, unsigned depth) {
b.N++;
auto begin = std::begin(b.children);
......@@ -465,7 +448,7 @@ namespace AIToolbox {
}
template <typename M>
double MEMCP<M>::rollout(size_t s, unsigned depth) {
double PAMCP<M>::rollout(size_t s, unsigned depth) {
double rew = 0.0, totalRew = 0.0, gamma = 1.0;
std::uniform_int_distribution<size_t> generator(0, A-1);
......@@ -480,13 +463,13 @@ namespace AIToolbox {
template <typename M>
template <typename Iterator>
Iterator MEMCP<M>::findBestA(Iterator begin, Iterator end) {
Iterator PAMCP<M>::findBestA(Iterator begin, Iterator end) {
return std::max_element(begin, end, [](const ActionNode & lhs, const ActionNode & rhs){ return lhs.V < rhs.V; });
}
template <typename M>
template <typename Iterator>
Iterator MEMCP<M>::findBestBonusA(Iterator begin, Iterator end, unsigned count) {
Iterator PAMCP<M>::findBestBonusA(Iterator begin, Iterator end, unsigned count) {
// Count here can be as low as 1.
// Since log(1) = 0, and 0/0 = error, we add 1.0.
double logCount = std::log(count + 1.0);
......@@ -511,7 +494,7 @@ namespace AIToolbox {
}
template <typename M>
typename MEMCP<M>::SampleBelief MEMCP<M>::makeSampledBelief(const Belief & b) {
typename PAMCP<M>::SampleBelief PAMCP<M>::makeSampledBelief(const Belief & b) {
SampleBelief belief;
belief.reserve(beliefSize_);
......@@ -522,42 +505,42 @@ namespace AIToolbox {
}
template <typename M>
void MEMCP<M>::setBeliefSize(size_t beliefSize) {
void PAMCP<M>::setBeliefSize(size_t beliefSize) {
beliefSize_ = beliefSize;
}
template <typename M>
void MEMCP<M>::setIterations(unsigned iter) {
void PAMCP<M>::setIterations(unsigned iter) {
iterations_ = iter;
}
template <typename M>
void MEMCP<M>::setExploration(double exp) {
void PAMCP<M>::setExploration(double exp) {
exploration_ = exp;
}
template <typename M>
const M& MEMCP<M>::getModel() const {
const M& PAMCP<M>::getModel() const {
return model_;
}
template <typename M>
const typename MEMCP<M>::BeliefNode& MEMCP<M>::getGraph() const {
const typename PAMCP<M>::BeliefNode& PAMCP<M>::getGraph() const {
return graph_;
}
template <typename M>
size_t MEMCP<M>::getBeliefSize() const {
size_t PAMCP<M>::getBeliefSize() const {
return beliefSize_;
}
template <typename M>
unsigned MEMCP<M>::getIterations() const {
unsigned PAMCP<M>::getIterations() const {
return iterations_;
}
template <typename M>
double MEMCP<M>::getExploration() const {
double PAMCP<M>::getExploration() const {
return exploration_;
}
}
......
......@@ -41,9 +41,9 @@ void mainMEMDP(M model, std::string datafile_base, std::string algo, int horizon
}
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
// MEMCP
else if (!algo.compare("memcp")) {
AIToolbox::POMDP::MEMCP<decltype(model)> solver( model, beliefSize, steps, exp);
// PAMCP
else if (!algo.compare("pamcp")) {
AIToolbox::POMDP::PAMCP<decltype(model)> solver( model, beliefSize, steps, exp);
training_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
start = std::chrono::high_resolution_clock::now();
std::cout << current_time_str() << " - Starting evaluation!\n" << std::flush;
......@@ -100,7 +100,7 @@ int main(int argc, char* argv[]) {
assert(("Unvalid data mode", !(data.compare("reco") && data.compare("maze"))));
std::string algo = ((argc > 3) ? argv[3] : "pbvi");
std::transform(algo.begin(), algo.end(), algo.begin(), ::tolower);
assert(("Unvalid POMDP solver parameter", !(algo.compare("pbvi") && algo.compare("pomcp") && algo.compare("memcp"))));
assert(("Unvalid POMDP solver parameter", !(algo.compare("pbvi") && algo.compare("pomcp") && algo.compare("pamcp"))));
double discount = ((argc > 4) ? std::atof(argv[4]) : 0.95);
assert(("Unvalid discount parameter", discount > 0 && discount <= 1));
int steps = ((argc > 5) ? std::atoi(argv[5]) : 1000000);
......
......@@ -119,7 +119,7 @@ if [ $MODE = "mdp" ]; then
# COMPILE
if [ "$COMPILE" = true ]; then
echo
echo "Compiling MDP model in mainMDP"
echo "Compiling mainMDP"
$GCC -O3 -Wl,-rpath,$STDLIB -DNITEMSPRM=$NITEMS -DHISTPRM=$HIST -DNPROFILESPRM=$PROFILES -std=c++11 mazemodel.cpp recomodel.cpp utils.cpp main_MDP.cpp -o mainMDP -I $AIINCLUDE -I $EIGEN -L $AIBUILD -l AIToolboxMDP -l AIToolboxPOMDP -l lpsolve55 -lz -lboost_iostreams
if [ $? -ne 0 ]; then
echo "Compilation failed!"
......@@ -138,7 +138,7 @@ else
# COMPILE
if [ "$COMPILE" = true ]; then
echo
echo "Compiling MEMDP model in mainMEMDP"
echo "Compiling mainMEMDP"
$GCC -O3 -Wl,-rpath,$STDLIB -DNITEMSPRM=$NITEMS -DHISTPRM=$HIST -DNPROFILESPRM=$PROFILES -std=c++11 mazemodel.cpp recomodel.cpp utils.cpp main_MEMDP.cpp -o mainMEMDP -I $AIINCLUDE -I $EIGEN -L $LPSOLVE -L $AIBUILD -l AIToolboxMDP -l AIToolboxPOMDP -l lpsolve55 -lz -lboost_iostreams
if [ $? -ne 0 ]
then
......
......@@ -21,7 +21,7 @@
#include <AIToolbox/MDP/Policies/Policy.hpp>
#include <AIToolbox/POMDP/Policies/Policy.hpp>
#include <AIToolbox/POMDP/Algorithms/POMCP.hpp>
#include "AIToolBox/MEMCP.hpp"
#include "AIToolBox/PAMCP.hpp"
#include "model.hpp"
......@@ -97,11 +97,11 @@ AIToolbox::POMDP::Belief update_belief(AIToolbox::POMDP::Belief b, size_t a, siz
/*! \brief Returns the initial prediction and belief for a given model and solver.
*
* \param model the underlying model.
* \param solver the solver to evaluate (MDP policy, POMDP policy, POMCP or MEMCP).
* \param solver the solver to evaluate (MDP policy, POMDP policy, POMCP or PAMCP).
* \param horizon the horizon to predict for, if applicable.
* \param action_scores array to store the probability distribution over predictions, if applicable.
*
* \return belief the initial belief over states (or over environments for MEMCP).
* \return belief the initial belief over states (or over environments for PAMCP).
* \return prediction the initial prediction.
*/
// MDP
......@@ -125,15 +125,15 @@ std::pair<AIToolbox::POMDP::Belief, size_t> make_initial_prediction(const Model&
return std::make_pair(belief, prediction);
}
// MEMCP
// PAMCP
template<typename M>
std::pair<AIToolbox::POMDP::Belief, size_t> make_initial_prediction(const Model& model, AIToolbox::POMDP::MEMCP<M> &memcp, int horizon, std::vector<double> &action_scores) {
std::pair<AIToolbox::POMDP::Belief, size_t> make_initial_prediction(const Model& model, AIToolbox::POMDP::PAMCP<M> &pamcp, int horizon, std::vector<double> &action_scores) {
size_t init_observation = 0;
AIToolbox::POMDP::Belief env_belief = AIToolbox::POMDP::Belief(model.getE());
env_belief.fill(1.0 / model.getE());
size_t prediction = memcp.sampleAction(env_belief, init_observation, horizon, true);
size_t prediction = pamcp.sampleAction(env_belief, init_observation, horizon, true);
auto & graph_ = memcp.getGraph();
auto & graph_ = pamcp.getGraph();
for (size_t a = 0; a < model.getA(); a++) {
action_scores.at(a) = graph_.children[a].V;
}
......@@ -144,14 +144,14 @@ std::pair<AIToolbox::POMDP::Belief, size_t> make_initial_prediction(const Model&
/*! \brief Returns the prediction of the solver for a given action and observation -a-> o.
*
* \param model the underlying model.
* \param solver the solver to evaluate (MDP policy, POMDP policy, POMCP or MEMCP).
* \param solver the solver to evaluate (MDP policy, POMDP policy, POMCP or PAMCP).
* \param b current belief.
* \param o last seen observation.
* \param a last action.
* \param horizon the horizon to predict for, if applicable.
* \param action_scores array to store the probability distribution over predictions, if applicable.
*
* \return belief the initial belief over states (or over environments for MEMCP).
* \return belief the initial belief over states (or over environments for PAMCP).
* \return prediction the initial prediction.
*/
//MDP
......@@ -171,11 +171,11 @@ size_t make_prediction(const Model& model, AIToolbox::POMDP::POMCP<M> &pomcp, AI
return prediction;
}
// MEMCP
// PAMCP
template<typename M>
size_t make_prediction(const Model& model, AIToolbox::POMDP::MEMCP<M> &memcp, AIToolbox::POMDP::Belief &b, size_t o, size_t a, int horizon, std::vector<double> &action_scores) {
size_t prediction = memcp.sampleAction(a, o, horizon);
auto & graph_ = memcp.getGraph();
size_t make_prediction(const Model& model, AIToolbox::POMDP::PAMCP<M> &pamcp, AIToolbox::POMDP::Belief &b, size_t o, size_t a, int horizon, std::vector<double> &action_scores) {
size_t prediction = pamcp.sampleAction(a, o, horizon);
auto & graph_ = pamcp.getGraph();
for (size_t action = 0; action < model.getA(); action++) {
action_scores.at(action) = graph_.children[action].V;
}
......@@ -185,7 +185,7 @@ size_t make_prediction(const Model& model, AIToolbox::POMDP::MEMCP<M> &memcp, AI
/*! \brief Returns the accuracy and prediction for the profile detection of the solver in the current state of the simulation..
*
* \param model the underlying model.
* \param solver the solver to evaluate (MDP policy, POMDP policy, POMCP or MEMCP).
* \param solver the solver to evaluate (MDP policy, POMDP policy, POMCP or PAMCP).
* \param current belief.
* \param o last seen observation.
* \param cluster ground-truth cluster.
......@@ -218,10 +218,10 @@ std::pair<double, double> identification_score(const Model& model, AIToolbox::PO
return std::make_pair(accuracy, 1.0 / rank);
}
// MEMCP
// PAMCP
template<typename M>
std::pair<double, double> identification_score(const Model& model, AIToolbox::POMDP::MEMCP<M> memcp, AIToolbox::POMDP::Belief b, size_t o, int cluster) {
std::vector<size_t> sampleBelief = memcp.getGraph().belief;
std::pair<double, double> identification_score(const Model& model, AIToolbox::POMDP::PAMCP<M> pamcp, AIToolbox::POMDP::Belief b, size_t o, int cluster) {
std::vector<size_t> sampleBelief = pamcp.getGraph().belief;
std::vector<int> scores(model.getE());
for (auto it = begin(sampleBelief); it != end(sampleBelief); ++it) {
scores.at(model.get_env(*it))++;
......@@ -438,7 +438,7 @@ void evaluate_interactive(int n_sessions,
mean_identification_precision[cluster] += identity_precision / session_length;
mean_total_reward[cluster] += total_reward / session_length;
// If Trap, do not count the rest
if (model.get_rep(state) != 1) {
if (model.get_rep(state) != 1) {
continue;
}
// Normal execution, i.e. goal state
......
# [ R e C A ] r e a d m e
# [ R e C A ] r e a d m e
# Installation
......@@ -58,7 +58,7 @@ Generate synthetic POMDP parameters to highlight the impact of using multiple en
* ``[--norm]`` If present, output transition probabilities are normalized.
* ``[--zip]`` If present, transitions are stored in an archive. Recommended for large state spaces.
* ``[--help]`` displays help about the script.
#### maze dataset
Generating POMDP parameters for a typical maze/path finding problem with multiple environments.
......@@ -96,8 +96,8 @@ The following variables can be configured at the beginning of the ``run.sh`` scr
./run.sh -m [1] -d [2] -n [3] -k [4] -u [5] -g [6] -s [7] -h [8] -e [9] -x [10] -b [11] -c -p -v
```
* ``[1]`` Model to use. Defaults to mdp. Available options are
* *mdp*. MDP model obtained by a weighted average of all the environments' transition probabilities and solved by Value iteration. The solver can be configured with
* ``[1]`` Model to use. Defaults to mdp. Available options are
* *mdp*. MDP model obtained by a weighted average of all the environments' transition probabilities and solved by Value iteration. The solver can be configured with
* ``[7]`` Number of iterations. Defaults to 1500.
* *pbvi*. point-based value iteration optimized for the MEMDP structure with options
* ``[8]`` Horizon parameter. Must be greater than 1. Defaults to 2.
......@@ -114,7 +114,7 @@ The following variables can be configured at the beginning of the ``run.sh`` scr
* ``[5]`` User discretization level. Defaults to 0.
* *mz* recommandations.
* ``[3]`` Base name for the directory containing the corresponding MEMDP model parameters.
* *rd* (synthetic data recommandations)
* *rd* (synthetic data recommandations)
* ``[3]`` Number of actions. Defaults to 4.
* ``[4]`` History length. Must be strictly greater than 1. Defaults to 2.
* ``[6]`` Discount Parameter. Must be strictly between 0 and 1. Defaults to 0.95.
......@@ -127,7 +127,7 @@ The following variables can be configured at the beginning of the ``run.sh`` scr
synthetic recommandations, 10 environments, 10 actions, ~100 states
```bash
cd Data/
python prepare_synth.py --norm --zip -n 10 -k 2
python prepare_synth.py --norm --zip -n 10 -k 2
cd ../Code/
./run.sh -m mdp -d rd -n 10 -k 2 -c
./run.sh -m pamcp -d rd -n 10 -k 2 -c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment