Commit 1294bd67 authored by Amelie Royer's avatar Amelie Royer

Adding zip I/O to recomodel

parent e323467c
......@@ -13,6 +13,9 @@
#include <fstream>
#include <cassert>
#include <algorithm>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
/**
* RANDOM ENGINE
......@@ -86,7 +89,7 @@ size_t Recomodel::is_connected(size_t s1, size_t s2) const {
s1 = get_rep(s1);
s2 = get_rep(s2);
}
// Check if corresponding observations are connected
// Check if corresponding observations can be connected
// Suffix of s1
int suffix_s1 = s1 % pows[0];
suffix_s1 = ((suffix_s1 >= acpows[1] || s1 < pows[0]) ? suffix_s1 - acpows[1] : suffix_s1 + pows[0] - acpows[1]);
......@@ -203,20 +206,18 @@ void Recomodel::load_rewards(std::string rfile) {
double v;
size_t a;
int rewards_found = 0;
size_t s1, s2; // TODO
infile.open(rfile, std::ios::in);
assert((".rewards file not found", infile.is_open()));
while (std::getline(infile, line)) {
std::istringstream iss(line);
//if (!(iss >> a >> v)) { break; } TODO
if (!(iss >> s1 >> a >> s2 >> v)) { break; }
if (!(iss >> a >> v)) { break; }
assert(("Unvalid reward entry", a <= n_actions));
rewards[a - 1] = v;
rewards_found++;
}
//assert(("Missing item while parsing .rewards file",
// rewards_found == n_actions)); TODO
assert(("Missing item while parsing .rewards file",
rewards_found == n_actions));
infile.close();
}
......@@ -225,30 +226,43 @@ void Recomodel::load_rewards(std::string rfile) {
* LOAD_TRANSITIONS
*/
void Recomodel::load_transitions(std::string tfile, bool precision /* =false */, std::string pfile /* ="" */) {
std::ifstream infile;
//std::fstream infile;
std::string line;
std::ifstream file, gzfile;
std::istringstream iss;
double v;
size_t s1, a, s2, link, p;
int transitions_found = 0, profiles_found = 0;
boost::iostreams::filtering_streambuf<boost::iostreams::input> in;
// If MDP mode, load profiles proportions for weighted average
std::vector<double> profiles_prop;
if (is_mdp) {
infile.open(pfile, std::ios::in);
assert((".profiles file not found", infile.is_open()));
while (std::getline(infile, line)) {
file.open(pfile, std::ios::in);
assert((".profiles file not found", file.is_open()));
while (std::getline(file, line)) {
std::istringstream iss(line);
if (!(iss >> s1 >> s2 >> v)) { break; }
profiles_prop.push_back(v);
}
infile.close();
file.close();
assert(("Missing profiles in .profiles file", profiles_prop.size() == n_environments));
}
// Load transitions
infile.open(tfile, std::ios::in);
assert((".transitions file not found", infile.is_open()));
std::istream infile(nullptr);
file.open(tfile, std::ios::in);
// If not found try the zipped version
if (!file.is_open()) {
std::cout << ".transitions not found. Searching for .gz alternative" << std::flush;;
gzfile.open(tfile + ".gz", std::ios_base::in | std::ios_base::binary);
in.push(boost::iostreams::gzip_decompressor());
in.push(gzfile);
infile.rdbuf(&in);
} else {
infile.rdbuf(file.rdbuf());
}
assert((".transitions(.gz) file not found", file.is_open() || gzfile.is_open()));
while (std::getline(infile, line)) {
std::istringstream iss(line);
// Change of environment
......@@ -268,12 +282,16 @@ void Recomodel::load_transitions(std::string tfile, bool precision /* =false */,
transition_matrix[index(0, s1, a - 1, link)] += profiles_prop.at(profiles_found) * v;
} else {
transition_matrix[index(profiles_found, s1, a - 1, link)] = v;
//transition_matrix[index(n_environments - 1, s1, a - 1, link)] += profiles_prop.at(profiles_found) * v;
}
transitions_found++;
}
assert(("Missing profiles in .transitions file", profiles_found == n_environments));
infile.close();
if (file.is_open()) {
file.close();
}
if (gzfile.is_open()) {
gzfile.close();
}
//Normalization
double nrm;
......
......@@ -113,7 +113,7 @@ if [ $MODE = "mdp" ]; then
if [ "$COMPILE" = true ]; then
echo
echo "Compiling MDP model in mainMDP"
$GCC -O3 -Wl,-rpath,$STDLIB -DNITEMSPRM=$NITEMS -DHISTPRM=$HIST -DNPROFILESPRM=$PROFILES -std=c++11 mazemodel.cpp recomodel.cpp utils.cpp main_MDP.cpp -o mainMDP -I $AIINCLUDE -I $EIGEN -L $AIBUILD -l AIToolboxMDP -l AIToolboxPOMDP -l lpsolve55
$GCC -O3 -Wl,-rpath,$STDLIB -DNITEMSPRM=$NITEMS -DHISTPRM=$HIST -DNPROFILESPRM=$PROFILES -std=c++11 mazemodel.cpp recomodel.cpp utils.cpp main_MDP.cpp -o mainMDP -I $AIINCLUDE -I $EIGEN -L $AIBUILD -l AIToolboxMDP -l AIToolboxPOMDP -l lpsolve55 -lz -lboost_iostreams
if [ $? -ne 0 ]; then
echo "Compilation failed!"
echo "exit"
......@@ -132,7 +132,7 @@ else
if [ "$COMPILE" = true ]; then
echo
echo "Compiling MEMDP model in mainMEMDP"
$GCC -O3 -Wl,-rpath,$STDLIB -DNITEMSPRM=$NITEMS -DHISTPRM=$HIST -DNPROFILESPRM=$PROFILES -std=c++11 mazemodel.cpp recomodel.cpp utils.cpp main_MEMDP.cpp -o mainMEMDP -I $AIINCLUDE -I $EIGEN -L $LPSOLVE -L $AIBUILD -l AIToolboxMDP -l AIToolboxPOMDP -l lpsolve55
$GCC -O3 -Wl,-rpath,$STDLIB -DNITEMSPRM=$NITEMS -DHISTPRM=$HIST -DNPROFILESPRM=$PROFILES -std=c++11 mazemodel.cpp recomodel.cpp utils.cpp main_MEMDP.cpp -o mainMEMDP -I $AIINCLUDE -I $EIGEN -L $LPSOLVE -L $AIBUILD -l AIToolboxMDP -l AIToolboxPOMDP -l lpsolve55 -lz -lboost_iostreams
if [ $? -ne 0 ]
then
echo "Compilation failed!"
......
......@@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
"""
Generate a synthetic POMDP model with high discrepancy between environments.
Generate a synthetic recommendation task MEMDP with high discrepancy between the environments.
"""
__author__ = "Amelie Royer"
__email__ = "amelie.royer@ist.ac.at"
......@@ -12,7 +12,7 @@ import sys, os
import gzip
import argparse
from random import randint
from utils import Logger, init_base_writing, get_nstates, get_next_state_id
from utils import ChunkedWriter, Logger, init_base_writing, get_nstates, get_next_state_id
def init_output_dir(nitems, hlength):
"""
......@@ -35,9 +35,9 @@ def init_output_dir(nitems, hlength):
##################################################### M A I N R O U T I N E #######
if __name__ == "__main__":
###### 0. Set Parameters
###### 0. Parameters
base_folder = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
parser = argparse.ArgumentParser(description='Generate a synthetic recommendation task MEMDP with high discrepancy between the environments.')
parser = argparse.ArgumentParser(description="Generate a synthetic recommendation task MEMDP with high discrepancy between the environments.")
parser.add_argument('-o', '--output', type=str, default=os.path.join(base_folder, "Code", "Models"), help="Output directory.")
parser.add_argument('-n', '--nactions', type=int, default=3, help="Number of items.")
parser.add_argument('-k', '--history', type=int, default=2, help="History length.")
......@@ -94,10 +94,9 @@ if __name__ == "__main__":
###### 4. Set rewards
print "\n\n\033[91m-----> Rewards generation\033[0m"
with open("%s.rewards" % output_base, 'w') as f:
for s1 in xrange(n_states):
sys.stderr.write(" state: %d / %d \r" % (s1 + 1, n_states))
for item in actions:
f.write("%d\t%d\t%d\t%.5f\n" % (s1, item, get_next_state_id(s1, item), 1))
for item in actions:
sys.stderr.write(" item: %d / %d \r" % (item + 1, len(actions)))
f.write("%d\t%.5f\n" % (item, 1))
###### 5. Create transition function
print "\n\n\033[91m-----> Probability inference\033[0m"
......@@ -133,12 +132,13 @@ if __name__ == "__main__":
with open("%s.transitions" % output_base, 'w') as f:
f.write(transitions_str)
else:
with gzip.open("%s.transitions.gz" % output_base, 'w') as f:
f.write(transitions_str)
f = gzip.open("%s.transitions.gz" % output_base, 'wb')
cw = ChunkedWriter(f)
cw.write(transitions_str)
f.close()
with open("%s.summary" % output_base, 'wb') as f:
f.write("%d States\n%d Actions (Items)\n%d user profiles\n%d history length\n%d product clustering level\n\n%s" % (n_states, n_items, n_users, args.history, args.nactions, logger.to_string()))
print
###### 6. Summary
print "\n\n\033[92m-----> End\033[0m"
......
......@@ -8,7 +8,7 @@ __author__ = "Amelie Royer"
__email__ = "amelie.royer@ist.ac.at"
import mmap
import sys
import numpy as np
from StringIO import StringIO
......@@ -228,3 +228,16 @@ def id_to_state(s):
i += 1
output[-1] = real
return output
class ChunkedWriter(object):
"""
Write chunks of data in a given file. Work around of the overflow bug when writing
with gzip in Python 2.7
"""
def __init__(self, file, chunksize=sys.maxint):
self.file = file
self.chunksize = chunksize
def write(self, mdata):
for i in range(0, len(mdata), self.chunksize):
self.file.write(bytes(mdata[i:i+self.chunksize]))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment