Commit 5554b127 authored by Amelie Royer's avatar Amelie Royer

adding a notebook showing how to convert imagenet to hdf5

parent ea4eb561
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import h5py\n",
"import os\n",
"\n",
"import multiprocessing\n",
"from scipy.misc import imread\n",
"from scipy.misc import imresize"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"batch_size = 100000\n",
"image_size = 128\n",
"num_cpus = multiprocessing.cpu_count()\n",
"\n",
"def process(f):\n",
" global image_size\n",
" im = imread(f, mode='RGB')\n",
" im = imresize(im, (image_size, image_size), interp='bicubic')\n",
" return im\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100000 images\n",
"200000 images\n",
"300000 images\n",
"400000 images\n",
"500000 images\n",
"600000 images\n",
"700000 images\n",
"800000 images\n",
"900000 images\n",
"1000000 images\n",
"1100000 images\n",
"1200000 images\n",
"1300000 images\n",
"CPU times: user 11.8 s, sys: 15.5 s, total: 27.3 s\n",
"Wall time: 12min 38s\n"
]
}
],
"source": [
"%%time\n",
"## Train\n",
"prefix = '/home/ILSVRC2012/train/'\n",
"l = list(map(lambda x : os.path.join(prefix, x), os.listdir(prefix)))\n",
"\n",
"i = 0\n",
"imagenet = np.zeros((len(l), image_size, image_size, 3), dtype='uint8')\n",
"pool = multiprocessing.Pool(num_cpus)\n",
"while i < len(l):\n",
" current_batch = l[i:i + batch_size] \n",
" current_res = np.array(pool.map(process, current_batch))\n",
" imagenet[i:i + batch_size] = current_res \n",
" i += batch_size\n",
" print(i, 'images')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100000 images\n",
"CPU times: user 528 ms, sys: 8.7 s, total: 9.23 s\n",
"Wall time: 39.6 s\n"
]
}
],
"source": [
"%%time\n",
"# Val\n",
"prefix = '/home/ILSVRC2012/val/'\n",
"l_val = list(map(lambda x : os.path.join(prefix, x), os.listdir(prefix)))\n",
"\n",
"i = 0\n",
"imagenet_val = np.zeros((len(l_val), image_size, image_size, 3), dtype='uint8')\n",
"pool = multiprocessing.Pool(multiprocessing.cpu_count())\n",
"\n",
"while i < len(l_val):\n",
" current_batch = l_val[i:i + batch_size] \n",
" current_res = np.array(pool.map(process, current_batch))\n",
" imagenet_val[i:i + batch_size] = current_res \n",
" i += batch_size\n",
" print(i, 'images')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"with h5py.File('/home/aroyer/Datasets/imagenet-128.hdf5', 'w') as f:\n",
" f['train'] = imagenet\n",
" f['val'] = imagenet_val"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment