Commit 6655c766 authored by Christoph Lampert's avatar Christoph Lampert

more shuffles

parent cabb5896
...@@ -3,9 +3,9 @@ ...@@ -3,9 +3,9 @@
\vspace{0.45cm} \vspace{0.45cm}
\begin{minipage}[c]{0.46\textwidth} \begin{minipage}[c]{0.46\textwidth}
\textbf{Problem Formulation:} \textbf{Goal:}
\begin{itemize} \begin{itemize}
\item how to fine-tune pretrained model $M$ for new task? %$T: X \rightarrow Y$ \item fine-tune pretrained model for new task %$T: X \rightarrow Y$
%\item How to most efficiently fine-tune $M$ for new task $T': X' \rightarrow Y$ where $\mathcal{P}(X) \neq \mathcal{P}(X')$ %\item How to most efficiently fine-tune $M$ for new task $T': X' \rightarrow Y$ where $\mathcal{P}(X) \neq \mathcal{P}(X')$
%\item \textbf{Baseline:} Fine-tuning %\item \textbf{Baseline:} Fine-tuning
% \begin{itemize} % \begin{itemize}
...@@ -14,8 +14,9 @@ ...@@ -14,8 +14,9 @@
% \end{itemize} % \end{itemize}
\end{itemize} \end{itemize}
\textbf{Proposed}: flexible Fine-tuning of internal layers \textbf{Proposed}:
\begin{itemize} \begin{itemize}
\item flexible fine-tuning of internal layers
\item allow any layer to be tuned, not just last \item allow any layer to be tuned, not just last
\item automatic selection criterion \item automatic selection criterion
\end{itemize} \end{itemize}
......
...@@ -207,7 +207,7 @@ ...@@ -207,7 +207,7 @@
% %
\begin{column}{.35\textwidth} \begin{column}{.35\textwidth}
\begin{itemize} \begin{itemize}
\item Learning with Strong Supervision \item Learning with dependent data
\end{itemize} \end{itemize}
\end{column} \end{column}
\end{columns} \end{columns}
...@@ -232,7 +232,7 @@ ...@@ -232,7 +232,7 @@
\end{column} \end{column}
\begin{column}{.35\textwidth} \begin{column}{.35\textwidth}
\begin{itemize} \begin{itemize}
\item Non-standard forms of supervision \item Learning with strong supervision
\end{itemize} \end{itemize}
\end{column} \end{column}
\end{columns} \end{columns}
...@@ -268,19 +268,24 @@ ...@@ -268,19 +268,24 @@
\input{finetuning.tex} \input{finetuning.tex}
\end{block} \end{block}
\begin{block}{\Large Conditional Risk Minimization} \begin{block}{\Large Multi-output Distillation}
\begin{minipage}{.45\textwidth} \begin{columns}
\textbf{Situation:} \begin{column}{0.62\textwidth}
\begin{itemize} \includegraphics[width=\textwidth]{multi-output/architecture.pdf}
\item data for more and more classes appears sequentially % $c_1,c_2,\dots,c_T$ \end{column}
\end{itemize} \begin{column}{0.35\textwidth}
\includegraphics[width=0.9\textwidth,height=.6\textwidth]{fine-tuning/selection_criterion_pacs.png}
\end{minipage}
%
\begin{minipage}{.5\textwidth}
\includegraphics[width=\textwidth]{lifelong}\qquad
\end{minipage}
\textbf{Multi-exit architectures}
\begin{itemize}
\item can be stopped anytime to provide a valid prediction
\end{itemize}
\textbf{Proposed training}
\begin{itemize}
\item Distill from later (more accurate) to earlier exits
\end{itemize}
\end{column}
\end{columns}
\end{block} \end{block}
\end{column} \end{column}
...@@ -291,68 +296,96 @@ ...@@ -291,68 +296,96 @@
\begin{block}{\Large iCaRL (Incremental Classifier and Representation Learning) {\tiny [Rebuffi et al, CVPR 2017]}} \begin{block}{\Large iCaRL (Incremental Classifier and Representation Learning) {\tiny [Rebuffi et al, CVPR 2017]}}
\begin{minipage}{.38\textwidth} \begin{minipage}{.48\textwidth}
\textbf{Situation:} \textbf{Situation:}
\begin{itemize} \begin{itemize}
\item data for more and more classes appears sequentially % $c_1,c_2,\dots,c_T$ \item data for more and more classes appears sequentially % $c_1,c_2,\dots,c_T$
\end{itemize} \end{itemize}
\bigskip \bigskip
\textbf{We want to/we can:} \textbf{Goal:}
\begin{itemize} \begin{itemize}
\item learn a multi-class classifier for all classes so far % $c_1,c_2,\dots,c_T$% for $c_1,\dots,c_t$ \item learn a multi-class classifier for all classes so far % $c_1,c_2,\dots,c_T$% for $c_1,\dots,c_t$
\item avoid \textbf{catastrophic forgetting} \item avoid \textbf{catastrophic forgetting}
\item store a certain number, $K$, of images (a few hundreds or thousands)
\end{itemize} \end{itemize}
\bigskip \bigskip
\textbf{Suggestion: } \textbf{Method:}
\begin{itemize}
\item select and store small number of exemplars
\item add distillation to training objective
\end{itemize}
\end{minipage} \end{minipage}
% %
\begin{minipage}{.58\textwidth} \begin{minipage}{.48\textwidth}
\includegraphics[width=\textwidth]{incremental} \includegraphics[width=\textwidth]{incremental}
\end{minipage} \end{minipage}
\end{block} \end{block}
\begin{block}{\Large Multi-task Learning with Labeled and Unlabeled Tasks {\tiny [Pentina et al, ICML 2017]}} \begin{block}{\Large Multi-task Learning with Labeled and Unlabeled Tasks {\tiny [Pentina, Lampert. ICML 2017]}}
\begin{minipage}{.45\textwidth} \begin{minipage}{.45\textwidth}
\textbf{Situation:} \textbf{Situation:}
\begin{itemize} \begin{itemize}
\item data for more and more classes appears sequentially % $c_1,c_2,\dots,c_T$ \item many learning tasks to solve, \newline most have only unlabeled data
\end{itemize}
\textbf{Goal:}
\begin{itemize}
\item learn predictors for each task (including unlabeled ones)
\end{itemize} \end{itemize}
\textbf{Method:}
\begin{itemize}
\item share data between tasks
\item derive optimal way to share from generalization bound
\end{itemize}
\end{minipage} \end{minipage}
% %
\begin{minipage}{.5\textwidth} \begin{minipage}{.5\textwidth}
\includegraphics[width=\textwidth]{asya-multitask}\qquad \includegraphics[width=.9\textwidth]{asya-multitask}\qquad % with-theorem
\end{minipage} \end{minipage}
\end{block} \end{block}
\begin{block}{\Large Example: Multi-output Distillation} \begin{block}{\Large Conditional Risk Minimization {\tiny [Zimin, Lampert. AISTATS 2017]}}
\begin{columns} \begin{minipage}{.45\textwidth}
\begin{column}{0.68\textwidth}
\includegraphics{multi-output/architecture.pdf} \textbf{Situation:}
\end{column} \begin{itemize}
\begin{column}{0.28\textwidth} \item data is stochastic process, $z_1,z_2,\dots$
\includegraphics[width=0.9\textwidth,height=.6\textwidth]{fine-tuning/selection_criterion_pacs.png} \end{itemize}
\textbf{Goal:}
\begin{itemize}
\item learn predictor $h$ for next step of process
\end{itemize}
\textbf{Method:}
\begin{itemize}
\item minimize \emph{conditional risk}
$$\mathcal{R}_{\text{cond}}(h) = \mathbb{E}[\ell(z_{n+1},h) | z_1,\dots,z_n]$$
instead of marginal risk
$$\mathcal{R}_{\text{marg}}(h) = \mathbb{E}[\ell(z_{n+1},h)]$$
\end{itemize}
\end{minipage}
%
\begin{minipage}{.5\textwidth}
\includegraphics[width=\textwidth]{lifelong}\qquad
\end{minipage}
\textbf{Multi-exit architectures}
\begin{itemize}
\item can be stopped anytime to provide a valid prediction
\end{itemize}
\textbf{Proposed training}
\begin{itemize}
\item Distill from later (more accurate) to earlier exits
\end{itemize}
\end{column}
\end{columns}
\end{block} \end{block}
\end{column} \end{column}
\end{columns} \end{columns}
[Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1]
[Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1]
[Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1][Reference 1]
\end{frame} \end{frame}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment