\documentclass{beamer} \usetheme{metropolis} \usepackage{hyperref} \usepackage[utf8]{inputenc} % this is needed for german umlauts \usepackage[english]{babel} % this is needed for german umlauts \usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf \usepackage{caption} \usepackage{tikz} \usetikzlibrary{arrows.meta} \usetikzlibrary{decorations.pathreplacing} \usetikzlibrary{positioning} \usetikzlibrary{decorations.text} \usetikzlibrary{decorations.pathmorphing} \usetikzlibrary{shapes.multipart, calc} \usepackage{minted} % needed for the inclusion of source code \begin{document} \title{Convolutional Neural Networks (CNNs)} \subtitle{Theory and Applications} \author{Martin Thoma -- \footnotesize \href{http://tinyurl.com/CNN-Intro}{tinyurl.com/CNN-Intro}} \date{22. February 2019} \subject{Machine Learning, AI, Neural Networks, Convolutional Neural Networks} \frame{\titlepage} % \section{Neural Network Basics} % \subsection{} \begin{frame}{Artificial Neuron (Perceptron)} $$f: \mathbb{R}^n \rightarrow \mathbb{R}$$ \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/artificial-neuron.pdf} \end{figure} % $$f(x) = ax^2 + bx + c \text{ with } f(0) = 3, f(1) = 2, f(-1) = 6$$ % \begin{align*} % \onslide<2->{f(0) &= a \cdot 0^2 + b \cdot 0 + c = 3} &\onslide<3->{\Rightarrow c &= 3\\} % \onslide<4->{f(1) &= a \cdot 1^2 + b \cdot 1 + 3 = 2} &\onslide<5->{\Rightarrow a &= -1-b\\} % \onslide<6->{f(-1) &= a \cdot {(-1)}^2 - b + 3 = 6\\} % \onslide<7->{\Leftrightarrow 3&=a - b\\} % \onslide<8->{\Leftrightarrow 3&= (-1-b) - b\\} % \onslide<9->{\Leftrightarrow b&= -2\\} % \onslide<10>{\Rightarrow \quad f(x) &= x^2 -2 x + 3\\} % \end{align*} % \only<1>{$$f: \mathbb{R}^n \rightarrow \mathbb{R}^m$$} % \only<2>{$$f: \mathbb{R}^2 \rightarrow \mathbb{R}$$ % # 2x - 1 % # (x-1)^2 + 1 % Examples: % \begin{itemize} % \item $1 \rightarrow 1$: $f(x) = x$ % \item $2 \rightarrow 3$: $f(x) = $ % % \item $3 \rightarrow 3$ % \end{itemize} % } \end{frame} \begin{frame}{Multi-Layer Perceptron (MLP)} $$f: \mathbb{R}^n \rightarrow \mathbb{R}^m$$ \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/perceptron-notation.pdf} \end{figure} \end{frame} \begin{frame}{} \begin{itemize}[<+->] \item Predict housing prices: (bed rooms, size, age) $\rightarrow$ Price \item Product categorization: (weight, volume, price) $\rightarrow$ \{shoe, handbag, shirt\} \item Image classification: List of pixel colors $\rightarrow$ \{cat, dog\} \end{itemize} \end{frame} \begin{frame}{} \begin{center} \Huge Data \end{center} \end{frame} \begin{frame}{Necessary Data} \begin{itemize} \item $f(x) = w_0$ \item $f(x) = w_1 \cdot x + w_0$ \item $f(x) = w_2^2 \cdot x^2 + w_1^2 \cdot x + w_0$ \item sin, cos, tan, \dots \end{itemize} \end{frame} \begin{frame}{Convolution} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth]{graphics/convolution-linear.pdf}\\ \href{https://martin-thoma.com/graphic-filters/}{martin-thoma.com/graphic-filters} \end{figure} \end{frame} \begin{frame}{Max Pooling} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth]{graphics/max-pooling.pdf} \end{figure} \end{frame} \begin{frame}{Convolutional Layer} \begin{figure}[ht] \centering \input{graphics/convolution-layer} \end{figure} \end{frame} \section{Applications} \begin{frame}{Symbol recognizer} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/symbol-recognizer.png} \captionsetup{labelformat=empty} \caption{\href{http://write-math.com}{write-math.com}} \end{figure} \end{frame} \begin{frame}{} \inputminted[linenos, numbersep=7pt, gobble=0, fontsize=\footnotesize, tabsize=4]{python}{cnn.py} \end{frame} \begin{frame}{Super Resolution} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/pixel-recursive-super-resolution.png} \captionsetup{labelformat=empty} \caption{Dahl, Norouzi, Shlens: Pixel recursive super resolution (2017)} \end{figure} \end{frame} \begin{frame}{Colorization: The Problem} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/multimodality-apple.png} \captionsetup{labelformat=empty} \caption{Cinarel: Automatic Colorization of Webtoons Using Deep Convolutional Neural Networks (2018)} \end{figure} \end{frame} \begin{frame}{Colorization - Photographs} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/colorful-image-colorization.png} \captionsetup{labelformat=empty} \caption{Zhang, Isola, Efros: Colorful Image Colorization (2016)} \end{figure} Interactive Demo: \href{http://richzhang.github.io/colorization/}{richzhang.github.io/colorization}\\ Model Lab: \href{https://github.com/MartinThoma/model-lab}{github.com/MartinThoma/model-lab} \end{frame} \begin{frame}{Colorization - Comic} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/comic-colorization.png} \captionsetup{labelformat=empty} \caption{Ci, Ma, Wang, Li, Luo: User-Guided Deep Anime Line Art Colorization with Conditional Adversarial Networks (2018)} \end{figure} \end{frame} \begin{frame}{Denoising} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/denoising.png} \captionsetup{labelformat=empty} \caption{Zhang, Zuo, Gu, Zhang: Learning Deep CNN Denoiser Prior for Image Restoration (2017)} \end{figure} \end{frame} \begin{frame}{Image Inpainting (Watermark removal)} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/leopard-inpainting.png} \captionsetup{labelformat=empty} \caption{Yang, Lu, Lin, Shechtman, Wang, Li: High-Resolution Image Inpainting using Multi-Scale Neural Patch Synthesis (2017)} \end{figure} \end{frame} \begin{frame}{CNNs in NLP} \begin{figure}[ht] \centering \includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/tdnns.png} \captionsetup{labelformat=empty} \caption{Collobert, Weston, Bottou, Karlen, Kavukcuoglu, Kuksa: Natural Language Processing (almost) from Scratch (2011)} \end{figure} \end{frame} \end{document}