Commit fff9153c authored by Amelie Royer's avatar Amelie Royer

Reduce failure when hitting wall in prepare_maze

parent 3532abb7
......@@ -580,7 +580,7 @@ double Mazemodel::getTransitionProbability(size_t s1, size_t a, size_t s2) const
double Mazemodel::getExpectedReward(size_t s1, size_t a, size_t s2) const {
// Trap
if (get_rep(s2) == T) {
return -1000.;
return -100.;
}
// Step
else if (!(get_rep(s2) == G && isGoal(s1))) {
......@@ -588,7 +588,7 @@ double Mazemodel::getExpectedReward(size_t s1, size_t a, size_t s2) const {
}
// Goal reached
else {
return goal_rewards.at(s1).at(a); //positive
return 100; //goal_rewards.at(s1).at(a); //positive
}
}
......
......@@ -367,7 +367,7 @@ void evaluate_interactive(int n_sessions,
unsigned int horizon,
bool verbose=false,
bool supervised=false, //true only works if full policy is computed (i.e. pbvi)
int session_length_max=100) {
int session_length_max=1000) {
// Aux variables
size_t observation = 0, prev_observation, action, prediction;
size_t state, prev_state;
......@@ -387,8 +387,11 @@ void evaluate_interactive(int n_sessions,
double mean_identification_precision [model.getE()] = {0};
// Generate test sessions
int subgroup_size = n_sessions / (int)(model.getE());
n_sessions = n_sessions - n_sessions % (int)(model.getE());
for (int user = 0; user < n_sessions; user++) {
cluster = (model.mdp_enabled() ? 0 : rand() % (int)(model.getE()));
cluster = user / subgroup_size;
//cluster = (model.mdp_enabled() ? 0 : rand() % (int)(model.getE()));
set_lengths[cluster] += 1;
std::cerr << "\r User " << user + 1 << "/" << n_sessions << std::string(15, ' ');
......@@ -423,7 +426,7 @@ void evaluate_interactive(int n_sessions,
// Update scores
if (!verbose) {std::cerr.clear();}
if (!model.isTerminal(state)) {
if (!model.isTerminal(state) || (model.get_rep(state) != 1)) {
if (verbose) {
std::cerr << " run " << user + 1 << " ignored: did not reach final state.";
}
......
......@@ -173,9 +173,9 @@ if __name__ == "__main__":
# E.L.S.E
elif element != '1':
# Move forward
target = "%dx%dx%s" % (i + changeMap[orient][0], j + changeMap[orient][1],orient) if not isWall(i, j, orient) else 'T'
f_transitions.write("%s %s %s %f\n" % (current_state, 'F', target, 1.0 - failures[0]))
f_transitions.write("%s %s %s %f\n" % (current_state, 'F', current_state, failures[0]))
target, fail = ("%dx%dx%s" % (i + changeMap[orient][0], j + changeMap[orient][1],orient), failures[0]) if not isWall(i, j, orient) else ('T', 0.95)
f_transitions.write("%s %s %s %f\n" % (current_state, 'F', target, 1.0 - fail))
f_transitions.write("%s %s %s %f\n" % (current_state, 'F', current_state, fail))
# Turn left
target = "%dx%dx%s" % (i, j, left[orient]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment