Commit ad998336 authored by Amelie Royer's avatar Amelie Royer

ADding verbose mode

parent 817c0f19
......@@ -250,6 +250,7 @@ int main(int argc, char* argv[]) {
float epsilon = ((argc > 4) ? std::atof(argv[4]) : 0.01);
assert(("Unvalid epsilon parameter", epsilon >= 0));
bool precision = ((argc > 5) ? (atoi(argv[5]) == 1) : false);
bool verbose = ((argc > 6) ? (atoi(argv[6]) == 1) : false);
// Load model parameters
auto start = std::chrono::high_resolution_clock::now();
......@@ -286,7 +287,7 @@ int main(int argc, char* argv[]) {
start = std::chrono::high_resolution_clock::now();
std::cout << "\n" << current_time_str() << " - Evaluation results\n";
AIToolbox::MDP::Policy policy(n_observations, n_actions, std::get<1>(solution));
evaluate_policyMDP(datafile_base + ".test", policy, discount, rewards);
evaluate_policyMDP(datafile_base + ".test", policy, discount, rewards, verbose);
elapsed = std::chrono::high_resolution_clock::now() - start;
double testing_time = std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() / 1000000.;
......
......@@ -228,7 +228,6 @@ public:
* \return s2 such that s -a-> s2, and the associated reward R(s, a, s2).
*/
std::tuple<size_t, double> sampleSR(size_t s,size_t a) const {
std::cout << "TEST\n";
// Sample random transition
std::discrete_distribution<int> distribution (transition_matrix[get_env(s)][get_rep(s)][a], transition_matrix[get_env(s)][get_rep(s)][a] + n_actions);
size_t link = distribution(generator);
......@@ -297,6 +296,7 @@ int main(int argc, char* argv[]) {
unsigned int beliefSize = ((argc > 8) ? std::atoi(argv[8]) : 100);
assert(("Unvalid belief size", beliefSize >= 0));
bool precision = ((argc > 9) ? (atoi(argv[9]) == 1) : false);
bool verbose = ((argc > 10) ? (atoi(argv[10]) == 1) : false);
// Load model parameters
auto start = std::chrono::high_resolution_clock::now();
......@@ -331,7 +331,7 @@ int main(int argc, char* argv[]) {
training_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
start = std::chrono::high_resolution_clock::now();
std::cout << current_time_str() << " - Starting evaluation!\n";
evaluate_pomcp(datafile_base + ".test", solver, discount, horizon, rewards);
evaluate_pomcp(datafile_base + ".test", solver, discount, horizon, rewards, verbose);
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
// MEMCP
......@@ -340,7 +340,7 @@ int main(int argc, char* argv[]) {
training_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
start = std::chrono::high_resolution_clock::now();
std::cout << current_time_str() << " - Starting evaluation!\n";
evaluate_memcp(datafile_base + ".test", solver, discount, horizon, rewards);
evaluate_memcp(datafile_base + ".test", solver, discount, horizon, rewards, verbose);
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
// Incremental Pruning
......@@ -356,7 +356,7 @@ int main(int argc, char* argv[]) {
start = std::chrono::high_resolution_clock::now();
std::cout << "\n" << current_time_str() << " - Evaluation results\n";
AIToolbox::POMDP::Policy policy(n_states, n_actions, n_observations, std::get<1>(solution));
evaluate_policyMEMDP(datafile_base + ".test", policy, discount, horizon, rewards);
evaluate_policyMEMDP(datafile_base + ".test", policy, discount, horizon, rewards, verbose);
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
......
......@@ -17,13 +17,14 @@ DISCOUNT="0.95"
STEPS="1500"
EPSILON="0.01"
PRECISION="0"
VERBOSE="0"
BELIEFSIZE="100"
EXPLORATION="10000"
HORIZON="1"
COMPILE=false
# SET ARGUMENTS FROM CMD LINE
while getopts "m:d:n:k:g:s:h:e:x:b:cp" opt; do
while getopts "m:d:n:k:g:s:h:e:x:b:cpv" opt; do
case $opt in
m)
MODE=$OPTARG
......@@ -46,6 +47,9 @@ while getopts "m:d:n:k:g:s:h:e:x:b:cp" opt; do
p)
PRECISION=1
;;
v)
VERBOSE=1
;;
h)
HORIZON=$OPTARG
;;
......@@ -110,7 +114,7 @@ if [ $MODE = "mdp" ]; then
# RUN
echo
echo "Running mainMDP on $BASE"
./mainMDP $BASE $DISCOUNT $STEPS $EPSILON $PRECISION
./mainMDP $BASE $DISCOUNT $STEPS $EPSILON $PRECISION $VERBOSE
echo
# POMDPs
else
......@@ -130,6 +134,6 @@ else
# RUN
echo
echo "Running mainMEMDP on $BASE with $MODE solver"
./mainMEMDP $BASE $MODE $DISCOUNT $STEPS $HORIZON $EPSILON $EXPLORATION $BELIEFSIZE $PRECISION
./mainMEMDP $BASE $MODE $DISCOUNT $STEPS $HORIZON $EPSILON $EXPLORATION $BELIEFSIZE $PRECISION $VERBOSE
echo
fi
\ No newline at end of file
......@@ -323,25 +323,26 @@ std::pair<double, double> identification_score(std::vector<size_t> sampleBelief,
*/
void print_evaluation_result(int set_lengths[n_environments],
std::vector<double*> results,
std::vector<std::string> titles)
std::vector<std::string> titles,
bool verbose /* = false*/)
{
// Print results for each environment, as well as global result
int n_results = results.size();
int session_length = 0;
std::vector<double> acc(n_results, 0);
std::cout << "> Results by cluster ----------------\n";
if (verbose) { std::cout << "> Results by cluster ----------------\n";}
for (int i = 0; i < n_environments; i++) {
std::cout << " cluster " << i;
if (verbose) { std::cout << " cluster " << i;}
for (int j = 0; j < n_results; j++) {
acc.at(j) += results.at(j)[i];
std::cout << "\n > " << titles[j] << ": " << results.at(j)[i] / set_lengths[i];
if (verbose) { std::cout << "\n > " << titles[j] << ": " << results.at(j)[i] / set_lengths[i];}
}
session_length += set_lengths[i];
std::cout << "\n\n";
if (verbose) {std::cout << "\n\n";}
}
// Global
std::cout << "> Global results ----------------";
for (int j = 0; j < n_results; j++) {
std::cout << "\n > " << titles[j] << ": " << acc.at(j) / session_length;
......@@ -356,7 +357,8 @@ void print_evaluation_result(int set_lengths[n_environments],
void evaluate_policyMDP(std::string sfile,
AIToolbox::MDP::Policy policy,
double discount,
double rewards [n_observations][n_actions]) {
double rewards [n_observations][n_actions],
bool verbose /* = false*/) {
// Aux variables
int cluster, session_length;
double cdiscount;
......@@ -410,7 +412,7 @@ void evaluate_policyMDP(std::string sfile,
std::cout << "\n\n";
std::vector<std::string> titles {"acc", "avgpr", "avgrw", "discrw"};
std::vector<double*> results {mean_accuracy, mean_precision, mean_total_reward, mean_discounted_reward};
print_evaluation_result(set_lengths, results, titles);
print_evaluation_result(set_lengths, results, titles, verbose);
}
......@@ -433,7 +435,8 @@ void evaluate_policyMEMDP(std::string sfile,
AIToolbox::POMDP::Policy policy,
double discount,
unsigned int horizon,
double rewards [n_observations][n_actions]) {
double rewards [n_observations][n_actions],
bool verbose /* = false*/) {
// Aux variables
int cluster, session_length;
double cdiscount;
......@@ -498,5 +501,5 @@ void evaluate_policyMEMDP(std::string sfile,
std::cout << "\n\n";
std::vector<std::string> titles {"acc", "avgpr", "avgrw", "discrw"};
std::vector<double*> results {mean_accuracy, mean_precision, mean_total_reward, mean_discounted_reward};
print_evaluation_result(set_lengths, results, titles);
print_evaluation_result(set_lengths, results, titles, verbose);
}
......@@ -246,10 +246,12 @@ std::pair<double, double> identification_score(std::vector<size_t> sampleBelief,
* \param set_length contains the number of test sessions per cluster
* \param results contains the various evaluation measures per cluster
* \param titles contains the name of each evaluation measures
* \param verbose if true, increases the verbosity. Defaults to false.
*/
void print_evaluation_result(int set_lengths[n_environments],
std::vector<double*> results,
std::vector<std::string> titles);
std::vector<std::string> titles,
bool verbose=false);
/*! \brief Evaluates a given policy (MDP) on a sequence of test user sessions
......@@ -259,11 +261,13 @@ void print_evaluation_result(int set_lengths[n_environments],
* \param policy AIToolbox policy.
* \param discount discount factor in the MDP model.
* \param rewards stored reward values.
* \param verbose if true, increases the verbosity. Defaults to false.
*/
void evaluate_policyMDP(std::string sfile,
AIToolbox::MDP::Policy policy,
double discount,
double rewards [n_observations][n_actions]);
double rewards [n_observations][n_actions],
bool verbose=false);
/*! \brief Builds the belief (distribution over states) correpsonding to the
......@@ -282,37 +286,16 @@ AIToolbox::POMDP::Belief build_belief(size_t o);
* \param discount discount factor in the POMDP model.
* \param horizon planning horizon for action sampling.
* \param rewards stored reward values.
* \param verbose if true, increases the verbosity. Defaults to false.
*/
void evaluate_policyMEMDP(std::string sfile,
AIToolbox::POMDP::Policy policy,
double discount,
unsigned int horizon,
double rewards [n_observations][n_actions]);
double rewards [n_observations][n_actions],
bool verbose=false);
/*! \brief Returns a string representation of the internal tree for the POMCP algorithm.
*
* \param sfile full path to the base_name.test file.
* \param pomcp AIToolbox pomcp instantiation.
* \param discount discount factor in the POMDP model.
* \param horizon planning horizon for POMCP.
* \param rewards stored reward values.
*/
template<typename M>
void pomcp_tree_to_string(AIToolbox::POMDP::POMCP< M > pomcp) {
auto tree = pomcp.getGraph();
for (size_t a = 0; a < n_actions; a++) {
auto anode = tree.children[a];
std::cout << " - " << a << "-> (" << anode.V << ")\n" ;
std::cout << " obs: ";
for (auto b = anode.children.begin(); b != anode.children.end(); ++b) {
std::cout << b->first << " ";;
}
std::cout << "\n";
}
//return;
}
/*! \brief Evaluates the sequence of actions recommended by POMCP.
*
......@@ -321,6 +304,7 @@ void pomcp_tree_to_string(AIToolbox::POMDP::POMCP< M > pomcp) {
* \param discount discount factor in the POMDP model.
* \param horizon planning horizon for POMCP.
* \param rewards stored reward values.
* \param verbose if true, increases the verbosity. Defaults to false.
*/
template<typename M>
......@@ -328,7 +312,8 @@ void evaluate_pomcp(std::string sfile,
AIToolbox::POMDP::POMCP<M> pomcp,
double discount,
unsigned int horizon,
double rewards [n_observations][n_actions])
double rewards [n_observations][n_actions],
bool verbose=false)
{
// Aux variables
int cluster, session_length, chorizon;
......@@ -371,8 +356,7 @@ void evaluate_pomcp(std::string sfile,
}
prediction = pomcp.sampleAction(init_belief, chorizon);
action = n_actions;
int i = 0;
if (!verbose) {std::cerr.setstate(std::ios_base::failbit);}
// For each (state, action) in the session
for (auto it2 = begin(std::get<1>(*it)); it2 != end(std::get<1>(*it)); ++it2) {
size_t observation = std::get<0>(*it2);
......@@ -400,6 +384,7 @@ void evaluate_pomcp(std::string sfile,
cdiscount *= discount;
chorizon = ((chorizon > 1) ? chorizon - 1 : 1 );
}
if (!verbose) {std::cerr.clear();}
// Set score
mean_accuracy[cluster] += accuracy / session_length;
mean_precision[cluster] += precision / session_length;
......@@ -413,7 +398,7 @@ void evaluate_pomcp(std::string sfile,
std::cout << "\n\n";
std::vector<std::string> titles {"acc", "avgpr", "avgrw", "discrw", "idac", "idpr"};
std::vector<double*> results {mean_accuracy, mean_precision, mean_total_reward, mean_discounted_reward, mean_identification, mean_identification_precision};
print_evaluation_result(set_lengths, results, titles);
print_evaluation_result(set_lengths, results, titles, verbose);
}
......@@ -426,6 +411,7 @@ void evaluate_pomcp(std::string sfile,
* \param discount discount factor in the POMDP model.
* \param horizon planning horizon for POMCP.
* \param rewards stored reward values.
* \param verbose if true, increases the verbosity. Defaults to false.
*/
template<typename M>
......@@ -433,7 +419,8 @@ void evaluate_memcp(std::string sfile,
AIToolbox::POMDP::MEMCP<M> memcp,
double discount,
unsigned int horizon,
double rewards [n_observations][n_actions])
double rewards [n_observations][n_actions],
bool verbose=false)
{
// Aux variables
int cluster, session_length, chorizon;
......@@ -477,8 +464,8 @@ void evaluate_memcp(std::string sfile,
prediction = memcp.sampleAction(init_belief, init_state, chorizon, true);
action = n_actions;
int i = 0;
// For each (state, action) in the session
if (!verbose) {std::cerr.setstate(std::ios_base::failbit);}
for (auto it2 = begin(std::get<1>(*it)); it2 != end(std::get<1>(*it)); ++it2) {
size_t observation = std::get<0>(*it2);
// If not init state, predict from past action and observation
......@@ -505,6 +492,7 @@ void evaluate_memcp(std::string sfile,
cdiscount *= discount;
chorizon = ((chorizon > 1) ? chorizon - 1 : 1 );
}
if (!verbose) {std::cerr.clear();}
// Set score
mean_accuracy[cluster] += accuracy / session_length;
mean_precision[cluster] += precision / session_length;
......@@ -518,7 +506,7 @@ void evaluate_memcp(std::string sfile,
std::cout << "\n\n";
std::vector<std::string> titles {"acc", "avgpr", "avgrw", "discrw", "idac", "idpr"};
std::vector<double*> results {mean_accuracy, mean_precision, mean_total_reward, mean_discounted_reward, mean_identification, mean_identification_precision};
print_evaluation_result(set_lengths, results, titles);
print_evaluation_result(set_lengths, results, titles, verbose);
}
......
......@@ -62,7 +62,7 @@ ctest -V
#### Running the code (``Code/`` folder)
If needed, first set the correct library pathes in ``run.sh``. The script can then be used as follow:
``./run.sh -m [1] -d [2] -n [3] -k [4] -g [5] -s [6] -h [7] -e [8] -x [9] -b [10] -c -p --help``
``./run.sh -m [1] -d [2] -n [3] -k [4] -g [5] -s [6] -h [7] -e [8] -x [9] -b [10] -c -p -v --help``
* ``[1]`` Model to use (Defaults to mdp). Available options are *mdp* (MDP model obtained by a weighted average of all the environments' transition probabilities and solved by Value iteration), *pbvi* (point-based value iteration optimized for the MEMDP structure), *pomcp* and *memcp* (Monte-carlo solver, respectively without and with optimization for the MEMDP structure).
* ``[2]`` Dataset to use (Defaults to fm). Available options are *fm* (foodmart) and *rd* (synthetic data).
......@@ -72,10 +72,11 @@ If needed, first set the correct library pathes in ``run.sh``. The script can th
* ``[6]`` Number of iterations for mdp, and number of simulation steps for pomcp and memcp (Defaults to 1500).
* ``[7]`` Horizon parameter for the POMDP solvers. Defaults to 1.
* ``[8]`` Convergence criterion for mdp and ip. Defaults to 0.01.
* ``[9]`` Exploration parameter for pomcp and memcp. Defaults to 10000 (high exploration).
* ``[10]`` Number of particles for the belief approximation in pomcp and memcp. Defaults to 100.
* ``[-p]`` If present, use Kahan summation for more precision while handling small probabilities. Use this option if AIToolbox throws an ``Input transition table does not contain valid probabilities`` error.
* ``[9]`` Exploration parameter for pomcp and memcp. Defaults to 10000 (high exploration). A high exploration parameter allows for less "Observation never seen in the simulation" during evaluation of a pomcp or memcp model. (*Note*: to see these errors, you need to run in verbose mode).
* ``[10]`` Number of beliefs to use for PBVI, or number of particles for the belief approximation in pomcp and memcp. Defaults to 100.
* ``[-c]`` If present, recompile the code before running (*Note*: this should be used whenever using a dataset with different parameters as the number of items, environments etc are determined at compilation time).
* ``[-p]`` If present, use Kahan summation for more precision while handling small probabilities. Use this option if AIToolbox throws an ``Input transition table does not contain valid probabilities`` error.
* ``[-v]`` If present, enables verbose output. In verbose mode, evaluation results per environments are displayed, and the std::cerr stream is eanbled during evaluation.
**Example** *(foodmart, 6 environments, 3 actions, 13 states)* :
```bash
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment