From 6655c76639ac36944b0abb6f133f2a42a67c2e9a Mon Sep 17 00:00:00 2001 From: Christoph Lampert Date: Tue, 2 Oct 2018 17:33:24 +0200 Subject: [PATCH] more shuffles --- finetuning.tex | 7 +-- poster.tex | 115 +++++++++++++++++++++++++++++++------------------ 2 files changed, 78 insertions(+), 44 deletions(-) diff --git a/finetuning.tex b/finetuning.tex index f3889d5..5c6874c 100644 --- a/finetuning.tex +++ b/finetuning.tex @@ -3,9 +3,9 @@ \vspace{0.45cm} \begin{minipage}[c]{0.46\textwidth} - \textbf{Problem Formulation:} + \textbf{Goal:} \begin{itemize} - \item how to fine-tune pretrained model $M$ for new task? %$T: X \rightarrow Y$ + \item fine-tune pretrained model for new task %$T: X \rightarrow Y$ %\item How to most efficiently fine-tune $M$ for new task $T': X' \rightarrow Y$ where $\mathcal{P}(X) \neq \mathcal{P}(X')$ %\item \textbf{Baseline:} Fine-tuning % \begin{itemize} @@ -14,8 +14,9 @@ % \end{itemize} \end{itemize} -\textbf{Proposed}: flexible Fine-tuning of internal layers +\textbf{Proposed}: \begin{itemize} +\item flexible fine-tuning of internal layers \item allow any layer to be tuned, not just last \item automatic selection criterion \end{itemize} diff --git a/poster.tex b/poster.tex index 72438d9..ce4d608 100755 --- a/poster.tex +++ b/poster.tex @@ -207,7 +207,7 @@ % \begin{column}{.35\textwidth} \begin{itemize} -\item Learning with Strong Supervision +\item Learning with dependent data \end{itemize} \end{column} \end{columns} @@ -232,7 +232,7 @@ \end{column} \begin{column}{.35\textwidth} \begin{itemize} -\item Non-standard forms of supervision +\item Learning with strong supervision \end{itemize} \end{column} \end{columns} @@ -268,19 +268,24 @@ \input{finetuning.tex} \end{block} -\begin{block}{\Large Conditional Risk Minimization} - \begin{minipage}{.45\textwidth} -\textbf{Situation:} -\begin{itemize} -\item data for more and more classes appears sequentially % $c_1,c_2,\dots,c_T$ -\end{itemize} - -\end{minipage} -% -\begin{minipage}{.5\textwidth} -\includegraphics[width=\textwidth]{lifelong}\qquad -\end{minipage} - +\begin{block}{\Large Multi-output Distillation} + \begin{columns} + \begin{column}{0.62\textwidth} + \includegraphics[width=\textwidth]{multi-output/architecture.pdf} + \end{column} + \begin{column}{0.35\textwidth} + \includegraphics[width=0.9\textwidth,height=.6\textwidth]{fine-tuning/selection_criterion_pacs.png} + + \textbf{Multi-exit architectures} + \begin{itemize} + \item can be stopped anytime to provide a valid prediction + \end{itemize} + \textbf{Proposed training} + \begin{itemize} + \item Distill from later (more accurate) to earlier exits + \end{itemize} + \end{column} + \end{columns} \end{block} \end{column} @@ -291,68 +296,96 @@ \begin{block}{\Large iCaRL (Incremental Classifier and Representation Learning) {\tiny [Rebuffi et al, CVPR 2017]}} -\begin{minipage}{.38\textwidth} +\begin{minipage}{.48\textwidth} \textbf{Situation:} \begin{itemize} \item data for more and more classes appears sequentially % $c_1,c_2,\dots,c_T$ \end{itemize} \bigskip -\textbf{We want to/we can:} +\textbf{Goal:} \begin{itemize} \item learn a multi-class classifier for all classes so far % $c_1,c_2,\dots,c_T$% for $c_1,\dots,c_t$ \item avoid \textbf{catastrophic forgetting} -\item store a certain number, $K$, of images (a few hundreds or thousands) \end{itemize} \bigskip -\textbf{Suggestion: } +\textbf{Method:} +\begin{itemize} +\item select and store small number of exemplars +\item add distillation to training objective +\end{itemize} \end{minipage} % -\begin{minipage}{.58\textwidth} +\begin{minipage}{.48\textwidth} \includegraphics[width=\textwidth]{incremental} \end{minipage} \end{block} -\begin{block}{\Large Multi-task Learning with Labeled and Unlabeled Tasks {\tiny [Pentina et al, ICML 2017]}} +\begin{block}{\Large Multi-task Learning with Labeled and Unlabeled Tasks {\tiny [Pentina, Lampert. ICML 2017]}} \begin{minipage}{.45\textwidth} \textbf{Situation:} \begin{itemize} -\item data for more and more classes appears sequentially % $c_1,c_2,\dots,c_T$ +\item many learning tasks to solve, \newline most have only unlabeled data +\end{itemize} + +\textbf{Goal:} +\begin{itemize} +\item learn predictors for each task (including unlabeled ones) \end{itemize} +\textbf{Method:} +\begin{itemize} +\item share data between tasks +\item derive optimal way to share from generalization bound +\end{itemize} \end{minipage} % \begin{minipage}{.5\textwidth} -\includegraphics[width=\textwidth]{asya-multitask}\qquad +\includegraphics[width=.9\textwidth]{asya-multitask}\qquad % with-theorem \end{minipage} \end{block} -\begin{block}{\Large Example: Multi-output Distillation} - \begin{columns} - \begin{column}{0.68\textwidth} - \includegraphics{multi-output/architecture.pdf} - \end{column} - \begin{column}{0.28\textwidth} - \includegraphics[width=0.9\textwidth,height=.6\textwidth]{fine-tuning/selection_criterion_pacs.png} - - \textbf{Multi-exit architectures} - \begin{itemize} - \item can be stopped anytime to provide a valid prediction - \end{itemize} - \textbf{Proposed training} - \begin{itemize} - \item Distill from later (more accurate) to earlier exits - \end{itemize} - \end{column} - \end{columns} +\begin{block}{\Large Conditional Risk Minimization {\tiny [Zimin, Lampert. AISTATS 2017]}} + \begin{minipage}{.45\textwidth} + +\textbf{Situation:} +\begin{itemize} +\item data is stochastic process, $z_1,z_2,\dots$ +\end{itemize} + +\textbf{Goal:} +\begin{itemize} +\item learn predictor $h$ for next step of process +\end{itemize} + +\textbf{Method:} +\begin{itemize} +\item minimize \emph{conditional risk} +$$\mathcal{R}_{\text{cond}}(h) = \mathbb{E}[\ell(z_{n+1},h) | z_1,\dots,z_n]$$ +instead of marginal risk +$$\mathcal{R}_{\text{marg}}(h) = \mathbb{E}[\ell(z_{n+1},h)]$$ +\end{itemize} +\end{minipage} +% +\begin{minipage}{.5\textwidth} +\includegraphics[width=\textwidth]{lifelong}\qquad +\end{minipage} + \end{block} \end{column} \end{columns} + +[Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1] + +[Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1] + +[Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1] + \end{frame} -- GitLab