Commit b36084cc authored by Amelie Royer's avatar Amelie Royer

Adding working + opimized PBVI solver

parent 526bbccd
#ifndef AI_TOOLBOX_POMDP_BELIEF_GENERATOR_HEADER_FILE
#define AI_TOOLBOX_POMDP_BELIEF_GENERATOR_HEADER_FILE
#include <AIToolbox/ProbabilityUtils.hpp>
#include <AIToolbox/POMDP/Types.hpp>
#include <AIToolbox/Impl/Seeder.hpp>
namespace AIToolbox {
namespace POMDP {
#ifndef DOXYGEN_SKIP
// This is done to avoid bringing around the enable_if everywhere.
template <typename M, typename = typename std::enable_if<is_generative_model<M>::value>::type>
class BeliefGenerator;
#endif
/**
* @brief This class offers pruning facilities for non-parsimonious ValueFunction sets.
*/
template <typename M>
class BeliefGenerator<M> {
public:
using BeliefList = std::vector<Belief>;
BeliefGenerator(const M& model);
BeliefList operator()(size_t beliefNumber) const;
void operator()(size_t beliefNumber, BeliefList * bl) const;
private:
/**
* @brief This function uses the model to generate new beliefs, and adds them to the provided list.
*
* @param max The maximum number of elements that the list should have.
* @param bl The list to expand.
*/
void expandBeliefList(size_t max, BeliefList * bl) const;
const M& model_;
size_t S, A;
mutable std::default_random_engine rand_;
};
template <typename M>
BeliefGenerator<M>::BeliefGenerator(const M& model) : model_(model), S(model_.getS()), A(model_.getA()), rand_(Impl::Seeder::getSeed()) {}
template <typename M>
typename BeliefGenerator<M>::BeliefList BeliefGenerator<M>::operator()(size_t beliefNumber) const {
// We add all simplex corners and the middle belief.
BeliefList beliefs; beliefs.reserve(beliefNumber);
beliefs.emplace_back(S);
beliefs.back().fill(1.0/S);
for ( size_t s = 0; s < S && s < beliefNumber; ++s ) {
beliefs.emplace_back(S);
beliefs.back().fill(0.0); beliefs.back()(s) = 1.0;
}
this->operator()(beliefNumber, &beliefs);
return beliefs;
}
template <typename M>
void BeliefGenerator<M>::operator()(size_t beliefNumber, BeliefList * bl) const {
if ( !bl ) return;
auto & beliefs = *bl;
// Since the original method of obtaining beliefs is stochastic,
// we keep trying for a while in case we don't find any new beliefs.
// However, for some problems (for example the Tiger problem) still
// this does not perform too well since the probability of finding
// a new belief (via action LISTEN) is pretty low.
size_t currentSize = beliefs.size();
while ( currentSize < beliefNumber ) {
unsigned counter = 0;
while ( counter < 5 ) {
expandBeliefList(beliefNumber, &beliefs);
if ( currentSize == beliefs.size() ) ++counter;
else {
currentSize = beliefs.size();
if ( currentSize == beliefNumber ) break;
}
}
for ( size_t i = 0; currentSize < beliefNumber && i < (beliefNumber/20); ++i, ++currentSize )
beliefs.emplace_back(makeRandomBelief(S, rand_));
}
}
template <typename M>
void BeliefGenerator<M>::expandBeliefList(size_t max, BeliefList * blp) const {
assert(blp);
auto & bl = *blp;
size_t size = bl.size();
std::vector<Belief> newBeliefs(A);
std::vector<double> distances(A);
auto dBegin = std::begin(distances), dEnd = std::end(distances);
// L1 distance
auto computeDistance = [this](const Belief & lhs, const Belief & rhs) {
double distance = 0.0;
for ( size_t i = 0; i < S; ++i )
distance += std::abs(lhs[i] - rhs[i]);
return distance;
};
Belief helper; double distance;
// We apply the discovery process also to all beliefs we discover
// along the way.
for ( auto it = std::begin(bl); it != std::end(bl); ++it ) {
// Compute all new beliefs
for ( size_t a = 0; a < A; ++a ) {
distances[a] = 0.0;
for ( int j = 0; j < 20; ++j ) {
size_t s = sampleProbability(S, *it, rand_);
size_t o;
std::tie(std::ignore, o, std::ignore) = model_.sampleSOR(s, a);
helper = updateBelief(model_, *it, a, o);
// Compute distance (here we compare also against elements we just added!)
distance = computeDistance(helper, bl.front());
for ( auto jt = ++std::begin(bl); jt != std::end(bl); ++jt ) {
if ( checkEqualSmall(distance, 0.0) ) break; // We already have it!
distance = std::min(distance, computeDistance(helper, *jt));
}
// Select the best found over 20 times
if ( distance > distances[a] ) {
distances[a] = distance;
newBeliefs[a] = helper; // ALLOCATION ERROR BECAUSE OF MOVE
}
}
}
// Find furthest away, add only if it is new.
size_t id = std::distance( dBegin, std::max_element(dBegin, dEnd) );
if ( checkDifferentSmall(distances[id], 0.0) ) {
bl.emplace_back(newBeliefs[id]); // REMOVED STD::MOVE
++size;
if ( size == max ) break;
}
}
}
}
}
#endif
This diff is collapsed.
#ifndef AI_TOOLBOX_POMDP_PROJECTER_HEADER_FILE
#define AI_TOOLBOX_POMDP_PROJECTER_HEADER_FILE
#include <AIToolbox/ProbabilityUtils.hpp>
#include <AIToolbox/POMDP/Types.hpp>
namespace AIToolbox {
namespace POMDP {
#ifndef DOXYGEN_SKIP
// This is done to avoid bringing around the enable_if everywhere.
template <typename M, typename = typename std::enable_if<is_model<M>::value>::type>
class Projecter;
#endif
/**
* @brief This class offers projecting facilities for Models.
*/
template <typename M>
class Projecter<M> {
public:
using ProjectionsTable = boost::multi_array<VList, 2>;
using ProjectionsRow = boost::multi_array<VList, 1>;
/**
* @brief Basic constructor.
*
* This constructor initializes the internal immediate reward table and the
* table containing what are the possible observations for the model (this
* may speed up the computation of the projections).
*
* @param model The model that is used as a base for all projections.
*/
Projecter(const M & model);
/**
* @brief This function returns all possible projections for the provided VList.
*
* @param w The list that needs to be projected.
*
* @return A 2d array of projection lists.
*/
ProjectionsTable operator()(const VList & w);
/**
* @brief This function returns all possible projections for the provided VList and action.
*
* @param w The list that needs to be projected.
* @param a The action used for projecting the list.
*
* @return A 1d array of projection lists.
*/
ProjectionsRow operator()(const VList & w, size_t a);
private:
// using PossibleObservationsTable = boost::multi_array<bool, 2>;
/**
* @brief This function precomputes which observations are possible from specific actions.
*/
//void computePossibleObservations();
/**
* @brief This function precomputes immediate rewards for the POMDP state-action pairs.
*/
void computeImmediateRewards();
const M & model_;
size_t S, A, O;
double discount_;
Matrix2D immediateRewards_;
//PossibleObservationsTable possibleObservations_;
};
template <typename M>
Projecter<M>::Projecter(const M& model) : model_(model), S(model_.getS()), A(model_.getA()), O(model_.getO()), discount_(model_.getDiscount()),
immediateRewards_(A, S)/*, possibleObservations_(boost::extents[A][O])*/
{
//computePossibleObservations();
computeImmediateRewards();
}
template <typename M>
typename Projecter<M>::ProjectionsTable Projecter<M>::operator()(const VList & w) {
ProjectionsTable projections( boost::extents[A][O] );
for ( size_t a = 0; a < A; ++a )
projections[a] = operator()(w, a);
return projections;
}
template <typename M>
typename Projecter<M>::ProjectionsRow Projecter<M>::operator()(const VList & w, size_t a) {
ProjectionsRow projections( boost::extents[O] );
// Observation 0 (impossible observation)
projections[0].emplace_back(immediateRewards_.row(a), a, VObs(1,0));
// Other obsevrations
for ( size_t o = 1; o < O; ++o ) {
for ( size_t i = 0; i < w.size(); ++i ) {
auto & v = std::get<VALUES>(w[i]);
MDP::Values vproj(S); vproj.fill(0.0);
// For each value function in the previous timestep, we compute the new value
// if we performed action a and obtained observation o.
for ( size_t s = 0; s < S; ++s ) {
size_t s1 = NPROFILES * (s / O) + o;
vproj[s] += model_.getTransitionProbability(s,a,s1) * v[s1];
}
// Set new projection with found value and previous V id.
// projections[o].emplace_back(vproj, a, VObs(1,i));
projections[o].emplace_back(vproj * discount_ + immediateRewards_.row(a).transpose(), a, VObs(1,i));
}
}
//for ( size_t o = 1; o < O; ++o ) {
// Here we put in just the immediate rewards so that the cross-summing step in the main
// function works correctly. However we communicate via the boolean that pruning should
// not be done at this step (since adding constants shouldn't do anything anyway).
/*if ( !possibleObservations_[a][o] ) {
// We add a parent id anyway in order to keep the code that cross-sums simple. However
// note that this fake ID of 0 should never be used, so it should be safe to avoid
// setting it to a special value like -1. If one really wants to check, he/she can
// just look at the observation table and the belief and see if it makes sense.
projections[o].emplace_back(immediateRewards_.row(a), a, VObs(1,0));
continue;
}*/
// Otherwise we compute a projection for each ValueFunction supplied to us.
//for ( size_t i = 0; i < w.size(); ++i ) {
// auto & v = std::get<VALUES>(w[i]);
// MDP::Values vproj(S); vproj.fill(0.0);
// For each value function in the previous timestep, we compute the new value
// if we performed action a and obtained observation o.
// for ( size_t s = 0; s < S; ++s )
// vproj_{a,o}[s] = R(s,a) / |O| + discount * sum_{s'} ( T(s,a,s') * O(s',a,o) * v_{t-1}(s') )
// for ( size_t s1 = 0; s1 < S; ++s1 )
// vproj[s] += model_.getTransitionProbability(s,a,s1) * model_.getObservationProbability(s1,a,o) * v[s1];
// Set new projection with found value and previous V id.
// projections[o].emplace_back(vproj, a, VObs(1,i));
// projections[o].emplace_back(vproj * discount_ + immediateRewards_.row(a).transpose(), a, VObs(1,i));
// }
// }
return projections;
}
template <typename M>
void Projecter<M>::computeImmediateRewards() {
immediateRewards_.fill(0.0);
for ( size_t a = 0; a < A; ++a )
for ( size_t s = 0; s < S; ++s )
for ( size_t s1 = 0; s1 < S; ++s1 )
immediateRewards_(a, s) += model_.getTransitionProbability(s,a,s1) * model_.getExpectedReward(s,a,s1);
// You can find out why this is divided in the incremental pruning paper =)
// The idea is that at the end of all the cross sums it's going to add up to the correct value.
immediateRewards_ /= static_cast<double>(O);
}
/*template <typename M>
void Projecter<M>::computePossibleObservations() {
std::fill( boo.origin(), boo.origin() + boo.size(), true );
// MODIFIED: for our problem, all observations are possible except 0 (initial state)
for ( size_t a = 0; a < A; ++a )
for ( size_t o = 0; o < O; ++o )
for ( size_t s = 0; s < S; ++s ) // This NEEDS to be last!
if ( checkDifferentSmall(model_.getObservationProbability(s,a,o), 0.0) ) { possibleObservations_[a][o] = true; break; } // We only break the S loop!
}*/
}
}
#endif
......@@ -274,10 +274,9 @@ int main(int argc, char* argv[]) {
// Solve
start = std::chrono::high_resolution_clock::now();
std::cout << current_time_str() << " - Init solver...!\n";
std::cout << "\n" << current_time_str() << " - Starting MDP ValueIteration solver\n";
RecoMDP model;
AIToolbox::MDP::ValueIteration<decltype(model)> solver(steps, epsilon);
std::cout << current_time_str() << " - Starting solver!\n";
auto solution = solver(model);
std::cout << current_time_str() << " - Convergence criterion e = " << epsilon << " reached ? " << std::boolalpha << std::get<0>(solution) << "\n";
elapsed = std::chrono::high_resolution_clock::now() - start;
......
......@@ -13,7 +13,7 @@
#include "utils.hpp"
#include <AIToolbox/POMDP/IO.hpp>
#include <AIToolbox/POMDP/Algorithms/IncrementalPruning.hpp>
#include "AIToolBox/PBVI.hpp"
/*
......@@ -283,7 +283,7 @@ int main(int argc, char* argv[]) {
assert(("Usage: ./main files_basename [solver] [discount] [nsteps] [precision]", argc >= 2));
std::string algo = ((argc > 2) ? argv[2] : "pbvi");
std::transform(algo.begin(), algo.end(), algo.begin(), ::tolower);
assert(("Unvalid POMDP solver parameter", !(algo.compare("ip") && algo.compare("pomcp") && algo.compare("memcp"))));
assert(("Unvalid POMDP solver parameter", !(algo.compare("pbvi") && algo.compare("pomcp") && algo.compare("memcp"))));
discount = ((argc > 3) ? std::atof(argv[3]) : 0.95);
assert(("Unvalid discount parameter", discount > 0 && discount < 1));
int steps = ((argc > 4) ? std::atoi(argv[4]) : 1000000);
......@@ -321,7 +321,7 @@ int main(int argc, char* argv[]) {
// Training
double training_time, testing_time;
start = std::chrono::high_resolution_clock::now();
std::cout << current_time_str() << " - Init " << algo << " solver...!\n";
std::cout << "\n" << current_time_str() << " - Starting " << algo << " solver...!\n";
RecoMEMDP model;
// Evaluation
......@@ -344,8 +344,9 @@ int main(int argc, char* argv[]) {
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
// Incremental Pruning
else if (!algo.compare("ip")) {
AIToolbox::POMDP::IncrementalPruning solver(horizon, epsilon);
else if (!algo.compare("pbvi")) {
// DEBUG PBVI //nBelef = n observations ?
AIToolbox::POMDP::PBVI solver(beliefSize, horizon, epsilon);
auto solution = solver(model);
std::cout << current_time_str() << " - Convergence criterion reached: " << std::boolalpha << std::get<0>(solution) << "\n";
std::chrono::high_resolution_clock::now() - start;
......
......@@ -5,7 +5,7 @@
##### 1. Pre-requisites
* GCC 4.9 +
* cmake [``cmake`` package]
* Boost version 1.53+ [``libbost-dev`` package]
* Boost version 1.53+ [``libboost-dev`` package]
* [Eigen 3.2+](http://eigen.tuxfamily.org/index.php?title=Main_Page) library
* [lp_solve](http://lpsolve.sourceforge.net/5.5/) library [``lp-solve`` package]
......@@ -64,7 +64,7 @@ If needed, first set the correct library pathes in ``run.sh``. The script can th
``./run.sh -m [1] -d [2] -n [3] -k [4] -g [5] -s [6] -h [7] -e [8] -x [9] -b [10] -c -p --help``
* ``[1]`` Model to use (Defaults to mdp). Available options are *mdp* (MDP model obtained by a weighted average of all the environments' transition probabilities and solved by Value iteration), *ip* (incremental pruning on the MEMDP), *pomcp* and *memcp*.
* ``[1]`` Model to use (Defaults to mdp). Available options are *mdp* (MDP model obtained by a weighted average of all the environments' transition probabilities and solved by Value iteration), *pbvi* (point-based value iteration optimized for the MEMDP structure), *pomcp* and *memcp* (Monte-carlo solver, respectively without and with optimization for the MEMDP structure).
* ``[2]`` Dataset to use (Defaults to fm). Available options are *fm* (foodmart) and *rd* (synthetic data).
* ``[3]`` Product discretization level if foodmart dataset, or the number of items if synthetic data.
* ``[4]`` History length (Defaults to 2). Must be strictly greater than 1.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment