diff --git a/presentations/Bachelor-Short/LaTeX/bachelor-short.tex b/presentations/Bachelor-Short/LaTeX/bachelor-short.tex index a12f650..5f3e9ad 100644 --- a/presentations/Bachelor-Short/LaTeX/bachelor-short.tex +++ b/presentations/Bachelor-Short/LaTeX/bachelor-short.tex @@ -26,13 +26,20 @@ \section{What is my Bachelor's thesis about?} \input{introduction} -\section{What did I do so far?} -\input{work-done} +\section{write-math.com} +\input{write-math} + +\section{Preprocessing and Features} +\input{preprocessing} +\input{features} + +\section{Neural Nets} +\input{neural-nets} \section{What will I do next?} \input{will-do} -\section{End} +\section*{End} \subsection{End} \input{sources} \framedgraphic{Thanks for Your Attention!}{../images/xi.png} diff --git a/presentations/Bachelor-Short/LaTeX/features.tex b/presentations/Bachelor-Short/LaTeX/features.tex new file mode 100644 index 0000000..bcab48e --- /dev/null +++ b/presentations/Bachelor-Short/LaTeX/features.tex @@ -0,0 +1,23 @@ +\subsection{Features} +\begin{frame}{Features} + \begin{itemize} + \item Local + \begin{itemize} + \item Coordinates + \item Speed + \item Binary pen pressure + \item Direction + \item Curvature + \item Bitmap-environment + \item Hat-Feature + \end{itemize} + \item Global + \begin{itemize} + \item \# of points + \item \# of strokes + \item Center point + \item Bitmap + \item Bounding box (width, height, time) + \end{itemize} + \end{itemize} +\end{frame} \ No newline at end of file diff --git a/presentations/Bachelor-Short/LaTeX/introduction.tex b/presentations/Bachelor-Short/LaTeX/introduction.tex index 0c60f3c..da38362 100644 --- a/presentations/Bachelor-Short/LaTeX/introduction.tex +++ b/presentations/Bachelor-Short/LaTeX/introduction.tex @@ -2,9 +2,9 @@ \begin{frame}{What is my Bachelor's thesis about?} \begin{itemize} - \item Recognition of handwritten mathematical formulas + \item Recognition of handwritten mathematical symbols \item On-line recognition, not OCR! - \item Given a series of points $(x(t), y(t), b)$\\ + \item Given a series of points $(x(t), y(t), b(t))$\\ I want to get the proper \LaTeX{} code. \end{itemize} \end{frame} @@ -16,13 +16,13 @@ \item It's much harder to find complete formulas. \end{itemize} - I want to - \begin{itemize} - \item provide a tool that enables beginners to get the best \LaTeX{} code - for their formula, - \item find out what works best for symbol recognition - \item and provide data and a platform to test new ideas for classifiers - \end{itemize} + % I want to + % \begin{itemize} + % \item provide a tool that enables beginners to get the best \LaTeX{} code + % for their formula, + % \item find out what works best for symbol recognition + % \item and provide data and a platform to test new ideas for classifiers + % \end{itemize} As soon as symbol recognition works good in terms of classification time and performance, I will continue with formula recognition. diff --git a/presentations/Bachelor-Short/LaTeX/neural-nets.tex b/presentations/Bachelor-Short/LaTeX/neural-nets.tex new file mode 100644 index 0000000..604253a --- /dev/null +++ b/presentations/Bachelor-Short/LaTeX/neural-nets.tex @@ -0,0 +1,13 @@ +\subsection{Neural Net experiments} +\begin{frame}{Experiments} + \textbf{Preprocessing:} Scaling, shifting and linear interpolation\\ + \textbf{Features:} Coordinates of 80 points (4 Lines with 20 points each)\\ + \textbf{Learning:} MLP, 300 epochs, LR of 0.1 + \begin{itemize} + \item[] \textit{toplogy \tabto{6cm} error in training time} + \item 160:500:369 \tabto{6cm} 30.62 \% in \hphantom{0}9min 08s + \item 160:500:500:369 \tabto{6cm} 27.73 \% in 11min 49s + \item 160:500:500:500:369 \tabto{6cm} 34.79 \% in 14min 09s + \item 160:500:500:500:500:369 \tabto{6cm} 33.61 \% in 14min 06s + \end{itemize} +\end{frame} \ No newline at end of file diff --git a/presentations/Bachelor-Short/LaTeX/preprocessing.tex b/presentations/Bachelor-Short/LaTeX/preprocessing.tex new file mode 100644 index 0000000..8667139 --- /dev/null +++ b/presentations/Bachelor-Short/LaTeX/preprocessing.tex @@ -0,0 +1,18 @@ +\subsection{Preprocessing} +\begin{frame}{Preprocessing} + \begin{itemize} + \item Normalizing + \begin{itemize} + \item Scaling + \item Shifting + \item Resampling + \end{itemize} + \item Noise reduction + \begin{itemize} + \item Smoothing (e.g. moving average) + \item Dot reduction + \item Filtering (by distance, speed or angle) + \item Stroke connection + \end{itemize} + \end{itemize} +\end{frame} \ No newline at end of file diff --git a/presentations/Bachelor-Short/LaTeX/will-do.tex b/presentations/Bachelor-Short/LaTeX/will-do.tex index 8a2cc5a..12178e6 100644 --- a/presentations/Bachelor-Short/LaTeX/will-do.tex +++ b/presentations/Bachelor-Short/LaTeX/will-do.tex @@ -1,32 +1,22 @@ \subsection{What will I do next?} \begin{frame}{What will I do next?} \begin{itemize} - \item Get classification performance with cross-validation - \item Implement neural net for classification - \begin{itemize} - \item preprocessing: compute cubic spline for each line - \begin{itemize} - \item equi-spaced points or - \item get equi-timed points - \end{itemize} - \item 5 - 20 input neurons for each line - \item 1076 output neurons (one for each symbol) - \end{itemize} - \item Get a language model (e.g. by parsing Wikipedia) - \item Use ANN with HMM (?) + \item Evaluate preprocessing steps + \item Try other features + \item Try other topologies / trainings (e.g. newbob) \end{itemize} \end{frame} -\subsection{Far future} -\begin{frame}{What could be done?} - \begin{itemize} - \item Make use of audio data in a multimodal approach\\ - e.g. $R$ and $\mathcal{R}$ - \item Currently, the Lecture Translation system doesn't recognize math.\\ - You get \enquote{integral of e raised to the power of x d x} instead - of $\int e^x \mathrm{d} x$. - \item Spoken math is ambigous: $\sqrt{a+b}$ vs. $\sqrt{a} + b$ - \item The language model I create could help to find probable formulas - \item The platform could be used to get more input data of users - \end{itemize} -\end{frame} \ No newline at end of file +% \subsection{Far future} +% \begin{frame}{What could be done?} +% \begin{itemize} +% \item Make use of audio data in a multimodal approach\\ +% e.g. $R$ and $\mathcal{R}$ +% \item Currently, the Lecture Translation system doesn't recognize math.\\ +% You get \enquote{integral of e raised to the power of x d x} instead +% of $\int e^x \mathrm{d} x$. +% \item Spoken math is ambigous: $\sqrt{a+b}$ vs. $\sqrt{a} + b$ +% \item The language model I create could help to find probable formulas +% \item The platform could be used to get more input data of users +% \end{itemize} +% \end{frame} \ No newline at end of file diff --git a/presentations/Bachelor-Short/LaTeX/work-done.tex b/presentations/Bachelor-Short/LaTeX/write-math.tex similarity index 58% rename from presentations/Bachelor-Short/LaTeX/work-done.tex rename to presentations/Bachelor-Short/LaTeX/write-math.tex index 2cd4312..1479893 100644 --- a/presentations/Bachelor-Short/LaTeX/work-done.tex +++ b/presentations/Bachelor-Short/LaTeX/write-math.tex @@ -13,25 +13,18 @@ \framedgraphic{Classify}{../images/classify.png} \framedgraphic{Workflow}{../images/workflow.png} -\framedgraphic{User page}{../images/user-page.png} -\framedgraphic{Information about handwritten-data}{../images/view.png} -\framedgraphic{Non-mathematical symbols}{../images/yinyang.png} -\framedgraphic{Training}{../images/train.png} +% \framedgraphic{User page}{../images/user-page.png} +% \framedgraphic{Information about recordings}{../images/view.png} +% \framedgraphic{Symbol page}{../images/symbol.png} +% \framedgraphic{Training}{../images/train.png} \framedgraphic{Ranking}{../images/ranking.png} -\framedgraphic{Symbol page}{../images/symbol.png} -\begin{frame}{Statistics} + +\begin{frame}[fragile]{Statistics} \begin{itemize} - \item 40 users - \item 1076 symbols - \item 5519 handwritten symbols (e.g. 195 times the letter \enquote{A}) - \begin{itemize} - \item only 264 have 4 lines - \item only 36 have 5 lines - \item only 16 have 6 lines - \item only 19 have 7 lines or more - \item none has more than 12 lines - \end{itemize} + \item 127 users with at least 5 recordings + \item 1109 symbols, but only 369 used for experiments + \item $\num{235831}$ recordings (e.g. $\num{3486}$ times \verb+\int+) \end{itemize} \end{frame} @@ -40,12 +33,11 @@ \item preprocessing: Scale to fit into unit square while keeping the aspect ratio \item applies dynamic time warping - \item compares a new handwritten symbol with every handwritten symbol + \item compares a new recording with every recording in the database - \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{handwritten symbols})$, + \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{recordings})$, but we rather would like $\mathcal{O}(\text{symbols})$ - \item the current server / workflow can only handle about 4000 handwritten - symbols + \item the current server / workflow can only handle about 4000 recordings \item[$\Rightarrow$] Another way to classify is necessary \end{itemize} \end{frame} \ No newline at end of file diff --git a/presentations/Bachelor-Short/images/ranking.png b/presentations/Bachelor-Short/images/ranking.png index b0a2325..a09a068 100644 Binary files a/presentations/Bachelor-Short/images/ranking.png and b/presentations/Bachelor-Short/images/ranking.png differ diff --git a/presentations/Bachelor-Short/templates/myStyle.sty b/presentations/Bachelor-Short/templates/myStyle.sty index 4ec8a54..d3babb0 100644 --- a/presentations/Bachelor-Short/templates/myStyle.sty +++ b/presentations/Bachelor-Short/templates/myStyle.sty @@ -4,9 +4,11 @@ \InputIfFileExists{../templates/beamerthemekit.sty}{\usepackage{../templates/beamerthemekit}}{\usetheme{Frankfurt}} \usefonttheme{professionalfonts} +\usepackage{tabto} \usepackage{hyperref} \usepackage{lmodern} \usepackage{listings} +\usepackage{siunitx} \usepackage{wrapfig} % see http://en.wikibooks.org/wiki/LaTeX/Floats,_Figures_and_Captions \usepackage[utf8]{inputenc} % this is needed for german umlauts \usepackage[english]{babel} % this is needed for german umlauts