Commit ffa3ecaa authored by Amelie Royer's avatar Amelie Royer

Improving PBVI phase 1

parent bd83621a
......@@ -152,6 +152,7 @@ namespace AIToolbox {
// up). However, this is easily changeable, since the belief generator
// can be called multiple times to increase the size of the belief
// vector.
BeliefGenerator<M> bGen(model);
auto beliefs = bGen(beliefSize_);
......@@ -165,6 +166,7 @@ namespace AIToolbox {
bool useEpsilon = checkDifferentSmall(epsilon_, 0.0);
double variation = epsilon_ * 2; // Make it bigger
while ( timestep < horizon_ && ( !useEpsilon || variation > epsilon_ ) ) {
std::cout << " Timestep " << timestep + 1 <<"/" << horizon_ << "\n";
++timestep;
// Compute all possible outcomes, from our previous results.
......@@ -184,10 +186,8 @@ namespace AIToolbox {
}
VList w;
w.reserve(finalWSize);
for ( size_t a = 0; a < A; ++a )
std::move(std::begin(projs[a][0]), std::end(projs[a][0]), std::back_inserter(w));
auto begin = std::begin(w), bound = begin, end = std::end(w);
for ( auto & belief : beliefs )
bound = extractWorstAtBelief(belief, begin, bound, end);
......
......@@ -3,7 +3,6 @@
#include <AIToolbox/ProbabilityUtils.hpp>
#include <AIToolbox/POMDP/Types.hpp>
//#include "../utils.hpp"
namespace AIToolbox {
namespace POMDP {
......@@ -85,8 +84,11 @@ namespace AIToolbox {
typename Projecter<M>::ProjectionsTable Projecter<M>::operator()(const VList & w) {
ProjectionsTable projections( boost::extents[A][O] );
for ( size_t a = 0; a < A; ++a )
for ( size_t a = 0; a < A; ++a ) {
std::cerr << "\r projection " << a + 1 << "/" << A;
projections[a] = operator()(w, a);
}
std::cerr << "\r projection " << A << "/" << A <<" \n";
return projections;
}
......@@ -100,6 +102,7 @@ namespace AIToolbox {
// Other obsevrations
for ( size_t o = 1; o < O; ++o ) {
std::cerr << "\r > observation " << o + 1 << "/" << O;
for ( size_t i = 0; i < w.size(); ++i ) {
auto & v = std::get<VALUES>(w[i]);
......
......@@ -288,7 +288,7 @@ int main(int argc, char* argv[]) {
int steps = ((argc > 4) ? std::atoi(argv[4]) : 1000000);
assert(("Unvalid steps parameter", steps > 0));
unsigned int horizon = ((argc > 5) ? std::atoi(argv[5]) : 1);
assert(("Unvalid horizon parameter", horizon > 0));
assert(("Unvalid horizon parameter", ( !algo.compare("pbvi") && horizon > 1 ) || (algo.compare("pbvi") && horizon > 0)));
double epsilon = ((argc > 6) ? std::atof(argv[6]) : 0.01);
assert(("Unvalid convergence criterion", epsilon >= 0));
double exp = ((argc > 7) ? std::atof(argv[7]) : 10000);
......@@ -346,8 +346,10 @@ int main(int argc, char* argv[]) {
// Incremental Pruning
else if (!algo.compare("pbvi")) {
// DEBUG PBVI //nBelef = n observations ?
AIToolbox::POMDP::PBVI solver(beliefSize, horizon, epsilon);
AIToolbox::POMDP::PBVI solver(beliefSize, horizon, epsilon);
if (!verbose) {std::cerr.setstate(std::ios_base::failbit);}
auto solution = solver(model);
if (!verbose) {std::cerr.clear();}
std::cout << current_time_str() << " - Convergence criterion reached: " << std::boolalpha << std::get<0>(solution) << "\n";
std::chrono::high_resolution_clock::now() - start;
training_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
......
......@@ -20,7 +20,7 @@ PRECISION="0"
VERBOSE="0"
BELIEFSIZE="100"
EXPLORATION="10000"
HORIZON="1"
HORIZON="2"
COMPILE=false
# SET ARGUMENTS FROM CMD LINE
......
......@@ -486,28 +486,42 @@ void evaluate_policyMEMDP(std::string sfile,
accuracy = 0, precision = 0, total_reward = 0, discounted_reward = 0;
cdiscount = discount;
// Initial belief and first action
size_t id, prediction;
size_t init_state = 0;
unsigned int timesteps = horizon;
AIToolbox::POMDP::Belief belief = build_belief(init_state);
std::tie(prediction, id) = policy.sampleAction(belief, timesteps);
timesteps --;
// For each (state, action) in the session
for (auto it2 = begin(std::get<1>(*it)); it2 != end(std::get<1>(*it)); ++it2) {
// current state
size_t state = std::get<0>(*it2), action = std::get<1>(*it2);
size_t observation = std::get<0>(*it2), action = std::get<1>(*it2);
// predict
if (observation != init_state) {
std::tie(prediction, id) = policy.sampleAction(id, observation, timesteps);
}
// get a prediction
AIToolbox::POMDP::Belief belief = build_belief(state);
for (size_t a = 0; a < n_actions; a++) {
//AIToolbox::POMDP::Belief belief = build_belief(state);
//for (size_t a = 0; a < n_actions; a++) {
//action_scores.at(a) = 1. / n_actions;
//action_scores.at(a) = policy.getActionProbability (belief, a, horizon);
action_scores.at(a) = policy.getActionProbability (belief, a);
}
size_t prediction = get_prediction(action_scores);
//action_scores.at(a) = policy.getActionProbability (belief, a);
//}
//size_t prediction = get_prediction(action_scores);
// evaluate
accuracy += accuracy_score(prediction, action);
precision += avprecision_score(action_scores, action);
//precision += avprecision_score(action_scores, action);
if (prediction == action) {
total_reward += rewards[state][prediction];
discounted_reward += cdiscount * rewards[state][prediction];
total_reward += rewards[observation][prediction];
discounted_reward += cdiscount * rewards[observation][prediction];
}
cdiscount *= discount;
timesteps = ((timesteps > 1) ? timesteps - 1 : 1);
}
// accumulate
......
......@@ -365,13 +365,13 @@ void evaluate_pomcp(std::string sfile,
init_belief(i * n_observations + init_state) = 1.0 / n_environments;
}
prediction = pomcp.sampleAction(init_belief, chorizon);
action = n_actions;
if (!verbose) {std::cerr.setstate(std::ios_base::failbit);}
// For each (state, action) in the session
for (auto it2 = begin(std::get<1>(*it)); it2 != end(std::get<1>(*it)); ++it2) {
size_t observation = std::get<0>(*it2);
// If not init state, predict from past action and observation
if (action < n_actions) {
if (observation != init_state) {
prediction = pomcp.sampleAction(action, observation, chorizon);
}
// Get graph and action scores
......@@ -472,14 +472,13 @@ void evaluate_memcp(std::string sfile,
// init belief
prediction = memcp.sampleAction(init_belief, init_state, chorizon, true);
action = n_actions;
// For each (state, action) in the session
if (!verbose) {std::cerr.setstate(std::ios_base::failbit);}
for (auto it2 = begin(std::get<1>(*it)); it2 != end(std::get<1>(*it)); ++it2) {
size_t observation = std::get<0>(*it2);
// If not init state, predict from past action and observation
if (action < n_actions) {
if (observation != init_state) {
prediction = memcp.sampleAction(action, observation, chorizon);
}
// Get graph and action scores
......
......@@ -70,7 +70,7 @@ If needed, first set the correct library pathes in ``run.sh``. The script can th
* ``[4]`` History length (Defaults to 2). Must be strictly greater than 1.
* ``[5]`` Discount Parameter gamma (Defaults to 0.95). Must be strictly between 0 and 1.
* ``[6]`` Number of iterations for mdp, and number of simulation steps for pomcp and memcp (Defaults to 1500).
* ``[7]`` Horizon parameter for the POMDP solvers. Defaults to 1.
* ``[7]`` Horizon parameter for the POMDP solvers. Defaults to 2. Must be above 1 (strictly, for pbvi).
* ``[8]`` Convergence criterion for mdp and ip. Defaults to 0.01.
* ``[9]`` Exploration parameter for pomcp and memcp. Defaults to 10000 (high exploration). A high exploration parameter allows for less "Observation never seen in the simulation" during evaluation of a pomcp or memcp model. (*Note*: to see these errors, you need to run in verbose mode).
* ``[10]`` Number of beliefs to use for PBVI, or number of particles for the belief approximation in pomcp and memcp. Defaults to 100.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment