Commit 7b1ef81e authored by Amelie Royer's avatar Amelie Royer

Minor modification for maze explorations (Debat: reward for forward action)

parent 9f956cdd
......@@ -37,7 +37,7 @@ void mainMEMDP(M model, std::string datafile_base, std::string algo, int horizon
if (has_test) {
evaluate_from_file(datafile_base + ".test", model, solver, horizon, verbose);
} else {
evaluate_interactive(2000, model, solver, horizon, verbose);
evaluate_interactive(5000, model, solver, horizon, verbose);
}
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
......@@ -52,7 +52,7 @@ void mainMEMDP(M model, std::string datafile_base, std::string algo, int horizon
if (has_test) {
evaluate_from_file(datafile_base + ".test", model, solver, horizon, verbose);
} else {
evaluate_interactive(2000, model, solver, horizon, verbose);
evaluate_interactive(5000, model, solver, horizon, verbose);
}
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
......@@ -77,7 +77,7 @@ void mainMEMDP(M model, std::string datafile_base, std::string algo, int horizon
if (has_test) {
evaluate_from_file(datafile_base + ".test", model, policy, horizon_reached, verbose);
} else {
evaluate_interactive(2000, model, policy, horizon_reached, verbose);
evaluate_interactive(5000, model, policy, horizon_reached, verbose);
}
testing_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start).count() / 1000000.;
}
......
......@@ -584,9 +584,13 @@ double Mazemodel::getExpectedReward(size_t s1, size_t a, size_t s2) const {
if (get_rep(s2) == T) {
return -100.;
}
// Step
// Step (slightly encourage the model to change case rather than changing orientation)
else if (!(get_rep(s2) == G && isGoal(s1))) {
return -1.;
if (is_connected(s1, s2) == 2) {
return -0.9;
} else {
return -1.;
}
}
// Goal reached
else {
......
......@@ -367,12 +367,12 @@ void evaluate_interactive(int n_sessions,
unsigned int horizon,
bool verbose=false,
bool supervised=false, //true only works if full policy is computed (i.e. pbvi)
int session_length_max=1000) {
int session_length_max=400) {
// Aux variables
size_t observation = 0, prev_observation, action, prediction;
size_t state, prev_state;
int cluster, chorizon;
double r, cdiscount, session_length, total_reward, discounted_reward, identity, identity_precision;
double r, session_length, total_reward, identity, identity_precision;
// Initialize arrays
AIToolbox::POMDP::Belief belief;
......@@ -382,7 +382,7 @@ void evaluate_interactive(int n_sessions,
double mean_session_length [model.getE()] = {0};
double mean_success [model.getE()] = {0};
double mean_total_reward [model.getE()] = {0};
double mean_discounted_reward [model.getE()] = {0};
double mean_goal_reward [model.getE()] = {0};
double mean_identification [model.getE()] = {0};
double mean_identification_precision [model.getE()] = {0};
......@@ -396,9 +396,8 @@ void evaluate_interactive(int n_sessions,
std::cerr << "\r User " << user + 1 << "/" << n_sessions << std::string(15, ' ');
// Reset
cdiscount = 1.;
chorizon = horizon;
session_length = 0, total_reward = 0, discounted_reward = 0, identity = 0, identity_precision = 0;
session_length = 0, total_reward = 0, identity = 0, identity_precision = 0;
std::vector< double > action_scores(model.getA(), 0);
// Make initial guess
......@@ -411,8 +410,6 @@ void evaluate_interactive(int n_sessions,
std::tie(state, observation, r) = model.sampleSOR(state, prediction);
// Update
total_reward += r;
discounted_reward += cdiscount * r;
cdiscount *= model.getDiscount();
chorizon = ((chorizon > 1) ? chorizon - 1 : 1 );
// Predict
prediction = make_prediction(model, solver, belief, observation, (supervised ? model.is_connected(prev_state, state) : prediction), horizon, action_scores);
......@@ -426,7 +423,8 @@ void evaluate_interactive(int n_sessions,
// Update scores
if (!verbose) {std::cerr.clear();}
if (!model.isTerminal(state) || (model.get_rep(state) != 1)) {
// Not reaching anything
if (!model.isTerminal(state)) {
if (verbose) {
std::cerr << " run " << user + 1 << " ignored: did not reach final state.";
}
......@@ -434,26 +432,28 @@ void evaluate_interactive(int n_sessions,
n_failures += 1;
continue;
}
mean_session_length[cluster] += session_length;
mean_success[cluster] += ((model.get_rep(state) == 1) ? 1. : 0.); // Goal in robot maze
mean_total_reward[cluster] += total_reward / session_length;
mean_discounted_reward[cluster] += discounted_reward;
// id score
mean_identification[cluster] += identity / session_length;
mean_identification_precision[cluster] += identity_precision / session_length;
mean_total_reward[cluster] += total_reward / session_length;
// If Trap, do not count the rest
if (model.get_rep(state) != 1) {
continue;
}
// Normal execution, i.e. goal state
mean_session_length[cluster] += session_length;
mean_success[cluster] += ((model.get_rep(state) == 1) ? 1. : 0.); // Goal in robot maze
mean_goal_reward[cluster] += total_reward / session_length;
}
// Only output relevant metrics
bool has_identity = (identity >= 0);
bool has_total_reward = (model.getDiscount() < 1);
// Output
std::cout << "\n\n";
std::vector<std::string> titles {"discrw", "avgllng", "avgsuc"}; std::vector<double*> results {mean_discounted_reward, mean_session_length, mean_success};
std::vector<std::string> titles {"goalrw", "avgrw", "avgllng", "avgsuc"}; std::vector<double*> results {mean_goal_reward, mean_total_reward, mean_session_length, mean_success};
if (has_total_reward) {
titles.insert(titles.begin(), "avgrw");
results.insert(results.begin(), mean_total_reward);
}
if (has_identity) {
titles.push_back("idac"); titles.push_back("idpr");
results.push_back(mean_identification); results.push_back(mean_identification_precision);
......
......@@ -79,16 +79,19 @@ if __name__ == "__main__":
parser.add_argument("-i", "--fin", type=str, help="If given, load the mazes from a file (takes precedence over the other parameters.")
parser.add_argument("-n", "--size", type=int, default=5, help="size of the maze")
parser.add_argument("-s", "--init", default=1, type=int, help="number of initial states per maze")
parser.add_argument("-t", "--trap", default=1, type=int, help="number of trap states per maze")
parser.add_argument("-t", "--trap", default=0, type=int, help="number of trap states per maze")
parser.add_argument("-w", "--wall", default=0, type=int, help="number of walls per maze")
parser.add_argument("-g", "--goal", default=1, type=int, help="number of goal states per maze")
parser.add_argument("-e", "--env", default=1, type=int, help="number of environments to generate for")
parser.add_argument("-wf", "--wall_failure", default=0.05, type=float, help="Probability of failure when going forward at a wall")
parser.add_argument("--rdf", action='store_true', help="each environment has randomized failure rates")
parser.add_argument('-o', '--output', type=str, default=os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "Code", "Models"), help="Path to output directory.")
args = parser.parse_args()
# Hyperparameters
actions = ['F','L','R']
failures = [0.2, 0.1, 0.1] # Probability of staying still after action forward, left and right respectively.
wall_failure = 0.05 # Probability of being trapped when going forward towards a wall
wall_failure = args.wall_failure # Probability of being trapped when going forward towards a wall
goal_reward = 1.0
min_x, max_x, min_y, max_y = sys.maxint, 0, sys.maxint, 0
changeMap = {'N':[-1,0],'S':[1,0],'E':[0,1],'W':[0,-1]}
......@@ -120,10 +123,10 @@ if __name__ == "__main__":
min_y = min(min_y, y1); max_y = max(max_y, y2);
# OR generate mazes
else:
base_name = "gen_%dx%d_%d%d%d_%d" % (args.size, args.size, args.init, args.trap, args.goal, args.env)
base_name = "gen_%dx%d_%d%d%d%d_%d" % (args.size, args.size, args.init, args.trap, args.goal, args.wall, args.env)
maze = np.pad(np.zeros((args.size - 1, args.size - 1), dtype=int) + 48, 1, 'constant', constant_values=49)
n_cases = (args.size - 1) * (args.size - 1)
n_choices = args.goal + args.init + args.trap
n_choices = args.goal + args.init + args.trap + args.wall
choices = range(n_cases)
assert(n_choices <= n_cases)
# for each environment
......@@ -135,7 +138,7 @@ if __name__ == "__main__":
# write states
for i in xrange(n_choices):
c = cases[i]
current[c / (args.size - 1) + 1, c % (args.size - 1) + 1] = 60 if i < args.init else 120 if i < args.init + args.trap else 103
current[c / (args.size - 1) + 1, c % (args.size - 1) + 1] = 60 if i < args.init else 120 if i < args.init + args.trap else 103 if i < args.init + args.trap + args.goal else 49
# append new environment
str_maze = [[str(unichr(x)) for x in line] for line in current]
mazes.append(str_maze)
......@@ -162,11 +165,14 @@ if __name__ == "__main__":
from collections import Counter
for e, maze in enumerate(mazes):
print "\n > Maze %d/%d \n" % (e + 1, len(mazes)),
if args.rdf:
failures = np.random.rand(3) / 2. # uniformly random sampling in [0; 0.5)
print " sampled failures:", failures
c = Counter([x for y in maze for x in y])
n_init = c['v'] + c['>'] + c['^'] + c['<']
for i in range(0, width):
for j in range(0, height):
print "\r state %d/%d" % (i * height + j + 1, width * height),
print "\r state %d/%d" % (4 * (i * height + j + 1), 4 * width * height), #4 * = all orientations
element = maze[i][j]
# I.N.I.T
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment