From 4017a58898f6f6d87442ec6b57e3eef363b075c6 Mon Sep 17 00:00:00 2001 From: Christoph Lampert Date: Tue, 2 Oct 2018 15:14:36 +0200 Subject: [PATCH] now it compiled --- poster.tex | 235 +++++++++-------------------------------------------- 1 file changed, 38 insertions(+), 197 deletions(-) diff --git a/poster.tex b/poster.tex index 117347c..286087d 100755 --- a/poster.tex +++ b/poster.tex @@ -33,7 +33,7 @@ %\usepackage{times}\usefonttheme{professionalfonts} % obsolete %\usefonttheme[onlymath]{serif} \boldmath -\usepackage[orientation=portrait,size=a0,scale=1.4, debug]{beamerposter} +\usepackage[orientation=landscape,size=a1,scale=1.4, debug]{beamerposter} % change list indention level % \setdefaultleftmargin{3em}{}{}{}{}{} @@ -72,7 +72,7 @@ %\title{\LARGE iCaRL: incremental Classifier and Representation Learning} \title{Computer Vision and Machine Learning} -\author{} +\author{~} \institute{\vskip-.5\baselineskip\large Institute of Science and Technology (IST) Austria, 3400 Klosterneuburg, Austria} %\institute{~}%Christoph Lampert} %\textsuperscript{1} ENS Rennes (Ecole Normale Sup\'{e}rieure de Rennes), Rennes, France \textsuperscript{2} IST Austria (Institute of Science and Technology Austria), Klosterneuburg, Austria} @@ -80,7 +80,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \newlength{\columnheight} -\setlength{\columnheight}{95cm} +\setlength{\columnheight}{40cm} \setlength{\columnsep}{1cm} \renewcommand{\P}{\pmb{\mathbb{P}}} @@ -139,8 +139,14 @@ \vspace*{-1.5cm} -\begin{block}{\Large People} - \newcommand{\peopleheight}{6cm} +\vskip-1cm +\begin{columns}[t] +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% First COlumn +\ \ \ \begin{column}{.49\textwidth} +\begin{block}{\Large Our Research} + + \newcommand{\peopleheight}{4cm} \begin{center} \includegraphics[height=\peopleheight{}]{people/clampert.jpg} ~~ \includegraphics[height=\peopleheight{}]{people/akolesnikov-new.jpg} ~~ @@ -152,136 +158,59 @@ \includegraphics[height=\peopleheight{}]{people/gsperl.jpg} ~~ \includegraphics[height=\peopleheight{}]{people/azimin.jpg} ~~ \end{center} -\end{block} - -\vskip-1cm -\begin{columns}[t] -% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%% First COlumn -\ \ \ \begin{column}{.49\textwidth} -\begin{block}{\Large Multi-Task Learning} \bigskip Object categorization methods are trained to recognize \textbf{1000s of classes}: -\medskip -\centerline{\includegraphics[width=.9\textwidth]{2010-imagenet}}% -\medskip -\blue{Standard training requires:} -\begin{itemize} -\item random order access to 100s of GB of training data, -\item many days to weeks of training time. -\end{itemize} - -\bigskip - -\medskip -\centerline{\includegraphics[width=.9\textwidth]{2010-imagenet-new}}% -\medskip +\end{block} -\blue{What, if a few new classes are meant to be included?} -\begin{itemize} -\item training must be re-run for all classes +\begin{block}{\Large Example: Fine-tuning} -$\rightarrow$ huge computational cost, all training data must be kept around {\color{orange}\large \Frowny{}} -\end{itemize} + \begin{center} + yes%\includegraphics{dummy} ~~ + \end{center} -\bigskip -Potential solution: \bblue{class-incremental learning} \end{block} +\begin{block}{\Large Example: Distillation} -\vskip4\blockskip -\begin{block}{\Large Conditional Risk} -\textbf{Fixed data representation:} -\begin{itemize} -\item retrain classifiers on data subset with biased regularization {\scriptsize [Kuzborskij \etal, 2013]} -\item represent classes by \blue{mean feature vectors} {\scriptsize [Mensink \etal, 2012], [Ristin \etal, 2014]} -\end{itemize} -\textbf{Learning the data representation:} -\begin{itemize} -\item grow neural network incrementally, fixing parts that are responsible for earlier class decisions -{\scriptsize [Mandziuk, Shastri. 1998], \dots, [Rusu \etal, 2016]} - -\item continuously generate patterns to prevent forgetting {\scriptsize [Ans, Rousset. 2000]} + \begin{center} + yes%\includegraphics{dummy} ~~ + \end{center} -\item multi-task setting: preserve network activations by \blue{distillation} {\scriptsize [Li, Hoiem. 2016]} -\end{itemize} \end{block} -\vskip4\blockskip -\begin{block}{\Large iCaRL} % {\scriptsize [arXiv \dots]}} -We incrementally learn \blue{classifiers and features} with a fixed-size network. +\end{column} +% -%Notation: -\begin{itemize} -\item $f^1(x),\dots,f^T(x)$: probabilistic network outputs for (up to) $T$ classes -\item $\bphi$: (current) feature representation defined by the network -%\item $K$: size of extra memory that we can use to store images/exemplars -\item $t$: number of classes observed so far -\item $P^y=(p^y_1,\dots,p^y_{m})$ set of exemplar images for class $y=1,\dots,t$ -\end{itemize} -\bigskip -\bblue{iCaRL component 1: exemplar-based classification.} -\begin{itemize} -\item a new image, $x$, is classified by the \blue{nearest-mean-of-exemplars} rule -$$ y^\ast = \operatorname*{argmin}_{y=1,\dots,t} \Big\|\bphi(x) - \mu^y\Big\| -\qquad\text{for }\ -\mu^y=\frac{1}{m}\sum_{j=1}^{m}\bphi(p^y_j).$$ -\end{itemize} +\ \ \begin{column}{.495\textwidth} -\bigskip -\bblue{iCaRL component 2: representation learning.} -For new data $X^y=\{x^y_1,\dots,x^y_{n_y}\}$ of classes $y=t\!+\!1,\dots,t'$ -\begin{itemize} -\item create training set of training examples and exemplars -$$\mathcal{D} \leftarrow \bigcup_{y=t+1,\dots,t'}\!\!\!\{(x,y) : x\in X^y\} -\ \cup\!\!\bigcup_{y=1,\dots,t} \!\!\!\{(x,y) : x\in P^y\} $$ -\item for all $x_i\in\mathcal{D}$, store network outputs $a^y_i = f^y(x_i)$ of classes $y=1,\dots,t$ -\item update the network parameters, $\theta$, using BackProp on loss function -$$\ell(\theta) = -\!\!\!\!\!\!\sum_{(x_i,y_i)\in\mathcal{D}}\!\!\!\![\ \log( f^{y_i}(x_i;\theta)+\sum_{y=1}^{t}\, a^y_i\log( f^y(x_i;\theta) )\ ].$$ -\end{itemize} +\begin{block}{\Large Example: Class-incremental learning} + + \begin{center} + yes%\includegraphics{dummy} ~~ + \end{center} \bigskip -\bblue{iCaRL component 3: exemplar selection.} +Object categorization methods are trained to recognize \textbf{1000s of classes}: -When the number of observed classes increases from $t$ to $t'$: set $m'=\frac{K}{t'}$. -\begin{itemize} -\item for classes $y=1,\dots,t$, keep exemplars $p^y_{1},\dots,p^y_{m'}$, discard others % $p^j_{m'_j+1},\dots,p^j_{m_j}$ -\item for classes $y=t\!+\!1,\dots,t'$, find new exemplars $p^y_{1},\dots,p^y_{m'}$ as -\begin{align*} -p^y_k \leftarrow\!\argmin\limits_{x\in X^y} \Big\| \frac{1}{n_y}\sum_{i=1}^{n_y}\!\!\bphi(x^y_i) - \frac{1}{k}\Big[\bphi(x)+\sum_{j=1}^{k-1}\!\!\bphi(p^y_j)\Big] \Big\| -\ \text{for $k=1,\dots,m'$}. -\end{align*} -%\begin{align*} -%\operatorname*{argmin}_{|I|=m'_j} \Big\| \frac{1}{n_j}\sum_{i=1}^{n_j}\bphi(x^j_i) - \frac{1}{m'_j}\sum_{i\in I}\bphi(x^j_i) \Big\| -%\end{align*} -\end{itemize} \end{block} -% -% - %\STATE // form combined training set: - %% - %\STATE // store network outputs with pre-update parameters: - %\FOR{$j=1,\dots,s-1$} - %\STATE $a^j_i\leftarrow f^j(x_i)$ \quad for all $(x_i,\cdot)\in\D$ - %\ENDFOR - %%\STATE run network training (\eg BackProp) with loss function - %\begin{align*} - %\ell(\Theta) = -\!\!\!\!\!\!\sum_{(x_i,y_i)\in\D}&\!\!\!\!\!\!\big[\log( f^{y_i}(x_i))+\sum_{j=1}^{s-1}\, a^j_i\log( f^j(x_i) )\big] - %\end{align*} - %\textcolor{red}{\textbf{is this the right way to handle EXEMPLARS???}} +\begin{block}{\Large Example: Multi-task Learning} -\end{column} -% + \begin{center} + yes%\includegraphics{dummy} ~~ + \end{center} +\bigskip +Object categorization methods are trained to recognize \textbf{1000s of classes}: -\ \ \begin{column}{.495\textwidth} -\begin{block}{\Large Multi-output Distillation} +\end{block} + +\begin{block}{\Large Example: Multi-output Distillation} \textbf{Situation:} \begin{itemize} \item classes appear sequentially (or in batches) % $c_1,c_2,\dots,c_T$ @@ -294,96 +223,8 @@ p^y_k \leftarrow\!\argmin\limits_{x\in X^y} \Big\| \frac{1}{n_y}\sum_{i=1}^{n_y} \item for any number of observed classes, $t$, learn a multi-class classifier% for $c_1,\dots,c_t$ \item store a certain number, $K$, of images (a few hundreds or thousands) \end{itemize} - -\bigskip -\textbf{We do not want to/we cannot:} -\begin{itemize} -\item retrain from scratch whenever new classes become available -\item store all training examples (could be millions) -\end{itemize} -\bigskip -\textbf{The dilemma:} -\begin{itemize} - \item \textcolor{blue}{fixing the data representation}: suboptimal results on new classes. % {\tiny [Tommasi \etal, 2015]}. - \item \textcolor{blue}{continuously improving the representation}: classifiers for earlier classes deteriorate over time - ("catastrophic forgetting/interference").{\scriptsize [McCloskey, Cohen. 1989]} % (catastrophic forgetting) {\tiny []}. - % Ratcliff, R. (1990) Connectionist models of recognition memory: Constraints imposed by learning and forgetting functions. Psychological Review,97, 285-308 -\end{itemize} \end{block} -\vskip4\blockskip -\begin{block}{\Large Flexible Fine-tuning} - -\vskip4\blockskip -\mbox{ -\parbox{.48\textwidth}{ -\!\!\!\textbf{CIFAR-100:} -\begin{itemize} -\item 100 classes, in batches of 10 -\item 32-layer ResNet {\scriptsize [He \etal, 2015]} -\item evaluated by top-1 accuracy -\item number of exemplars: 2000 -\end{itemize} -} -\parbox{.48\textwidth}{ -\textbf{ImageNet ILSVRC 2012 (subset):} -\begin{itemize} -\item 100 classes, in batches of 10 -\item 18-layer ResNet {\scriptsize [He \etal, 2015]} -\item evaluated by top-5 accuracy -\item number of exemplars: 2000 -\end{itemize} -}} - -\vskip4\blockskip -\textbf{Baselines:} -\begin{itemize} -\item fixed representation: freeze representation after first batch of classes -\item finetuning: ordinary NN learning, finetune whenever new classes come in -\item LwF: \emph{"Learning without Forgetting"} {\scriptsize [Li, Hoiem. 2016]}, use network itself to classify -%\item LwF+proto: like LwF, but with prototypes used for representation learning -\item iNCM: like iCaRL, but store all images and classify with true class means -\end{itemize} -\end{block} - - - -\vskip4\blockskip -\begin{block}{\Large 7) Summary} -\begin{itemize} -\item iCaRL learns incrementally with a fixed memory footprint -\item much better results than baselines, on par with (intractable) iNCM -\end{itemize} -\end{block} - - -\bigskip\hrule\medskip\tiny - -%[Thrun \etal, "Learning one more thing", \dots] - -[Ans, Rousset. \emph{"Neural networks with a self-refreshing memory: Knowledge transfer in sequential learning tasks without catastrophic forgetting"}, Connection Science 12(1), 2000] - -[He, Zhang, Ren, Sun. \emph{"Deep residual learning for image recognition"}. arXiv:1512.03385, 2015] - -[Hinton, Vinyals, Dean. \emph{"Distilling the Knowledge in a Neural Network"}, NIPS Workshop on Deep Learning, 2014] - -[Kuzborskij, Orabona, Caputo. \emph{"From N to N+1: Multiclass transfer incremental learning"}, CVPR 2013] - -[Mandziuk, Shastri. \emph{"Incremental class learning approach and its application to handwritten digit recognition"}, Information Sciences, 2002] - -[McCloskey, Cohen. \emph{"Catastrophic interference in connectionist networks: The sequential learning problem"}, The Psychology of Learning and Motivation, 1989] - -[Mensink, Verbeek, Perronnin, Csurka. \emph{"Distance-based image classification: Generalizing to new Classes at near-zero cost"}, 2013] - -[Li, Hoiem. \emph{"Learning without forgetting"}, ECCV 2016] - -[Ristin, Huillaumin, Gall, van Gool. \emph{"Incremental learning of NCM forests for large-scale image classification"}, CVPR 2014] - -[Rusu, Rabinowitz, Desjardins, Soyer, Kirkpatrick, Kavukcuoglu, Pascanu, Hadsell. \emph{"Progressive neural networks"}, arXiv:1606.04671 [cs.LG] 2016] - -%[Ans, "Sequential Learning in Distributed Neural Networks without Catastrophic Forgetting: A Single and Realistic Self-Refreshing Memory Can Do It", Neural Information Processing--Letters and Reviews. 2004] -%[He, Zhang, Ren, Sun. Deep residual learning for image recognition. CVPR 2016] -%[Rocco De Rosa, Thomas Mensink, and Barbara Caputo, "Online Open World Recognition"] arXiv \end{column} \end{columns} -- GitLab