more images
After Width: | Height: | Size: 13 KiB |
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 21 KiB |
|
@ -1,5 +1,5 @@
|
|||
\subsection{Preprocessing}
|
||||
\begin{frame}{Preprocessing}
|
||||
\subsection{Preprocessing Algorithms}
|
||||
\begin{frame}{Preprocessing Algorithms}
|
||||
\begin{columns}[T] % contents are top vertically aligned
|
||||
\begin{column}[T]{5cm} % each column can also be its own environment
|
||||
\begin{itemize}
|
||||
|
@ -14,7 +14,7 @@
|
|||
\item<4-> Smoothing (e.g. moving average)
|
||||
\item<5-> Dot reduction
|
||||
\item<6-> Filtering (by distance, speed or angle)
|
||||
\item<7-> Stroke connection
|
||||
\item<8-> Stroke connection
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
|
@ -22,7 +22,10 @@
|
|||
\only<2>{\includegraphics[width=6cm, keepaspectratio]{scale-and-shift.png}}
|
||||
\only<3>{\includegraphics[width=6cm, keepaspectratio]{resampling.png}}
|
||||
\only<4>{\includegraphics[width=6cm, keepaspectratio]{smooth-1-1-1.png}}
|
||||
\only<7>{\includegraphics[width=6cm, keepaspectratio]{interrupted-stroke.png}}
|
||||
\only<5>{\includegraphics[width=6cm, keepaspectratio]{dot-reduction.png}}
|
||||
\only<6>{\includegraphics[width=6cm, keepaspectratio]{wildpoint-1.png}}
|
||||
\only<7>{\includegraphics[width=6cm, keepaspectratio]{wildpoint-2.png}}
|
||||
\only<8>{\includegraphics[width=6cm, keepaspectratio]{interrupted-stroke.png}}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
|
||||
|
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
BIN
presentations/Bachelor-Final-Presentation/LaTeX/wildpoint-1.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
presentations/Bachelor-Final-Presentation/LaTeX/wildpoint-2.png
Normal file
After Width: | Height: | Size: 16 KiB |
|
@ -1,149 +0,0 @@
|
|||
\subsection{Write Math}
|
||||
|
||||
\begin{frame}{write-math.com}
|
||||
\begin{itemize}
|
||||
\item a website where users can add labeled training data and unlabeled
|
||||
data which they want to classify. I call this data \enquote{recording}
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\subfloat{
|
||||
\includegraphics[height=0.1\textwidth]{../images/279952.pdf}
|
||||
}%
|
||||
\qquad
|
||||
\subfloat{
|
||||
\includegraphics[height=0.1\textwidth]{../images/281507.pdf}
|
||||
}%
|
||||
\qquad
|
||||
\subfloat{
|
||||
\includegraphics[height=0.1\textwidth]{../images/287612.pdf}
|
||||
}%
|
||||
\qquad
|
||||
\subfloat{
|
||||
\includegraphics[height=0.1\textwidth]{../images/292175.pdf}
|
||||
}%
|
||||
\caption*{4 recordings}
|
||||
\end{figure}
|
||||
\item works with desktop computers and touch devices
|
||||
\item symbol recognition can be done by multiple classifiers
|
||||
\item users can contribute formulas as recordings and as \LaTeX{} answers
|
||||
for recordings
|
||||
\item users can vote for \LaTeX{} answers:
|
||||
\Large $\leq$, $\leqq$, $\leqslant$, \dots \normalsize
|
||||
\item user who entered the recording can accept one answer
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% \framedgraphic{Classify}{../images/classify.png}
|
||||
% \framedgraphic{Workflow}{../images/workflow.png}
|
||||
% \framedgraphic{User page}{../images/user-page.png}
|
||||
% \framedgraphic{Information about recordings}{../images/view.png}
|
||||
% \framedgraphic{Symbol page}{../images/symbol.png}
|
||||
% \framedgraphic{Training}{../images/train.png}
|
||||
% \framedgraphic{Ranking}{../images/ranking.png}
|
||||
|
||||
|
||||
\begin{frame}[fragile]{Statistics}
|
||||
\begin{itemize}
|
||||
\item 127 users with at least 5 recordings
|
||||
\item $\num{1111}$ symbols, but only $\num{369}$ used for experiments
|
||||
\item $\num{235831}$ recordings (e.g. $\num{3489}$ times \verb+\int+, but only 50 times \verb+X+)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% \begin{frame}{First classification worker}
|
||||
% \begin{itemize}
|
||||
% \item preprocessing: Scale to fit into unit square while keeping the aspect
|
||||
% ratio
|
||||
% \item applies greedy time warping
|
||||
% \item compares a new recording with every recording
|
||||
% in the database
|
||||
% \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{recordings})$,
|
||||
% but we rather would like $\mathcal{O}(\text{symbols})$
|
||||
% \item the current server / workflow can only handle about 4000 recordings
|
||||
% \item[$\Rightarrow$] Another way to classify is necessary
|
||||
% \end{itemize}
|
||||
% \end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Handwriting Recognition Toolkit (hwrt)}
|
||||
\begin{itemize}
|
||||
\item Many preprocessing algorithms / features implemented
|
||||
\item Plug-in system for preprocessing algorithms / features
|
||||
\item Needs neural network toolkit
|
||||
\item Hosted at \url{https://github.com/MartinThoma/hwrt}
|
||||
\item Installable via \verb+pip+ (Python package installer):\\
|
||||
\verb+$ pip install hwrt+
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{hwrt preprocessing configuration file}
|
||||
\begin{verbatim}
|
||||
data-source: raw-datasets/2014-08-26-20-14-data-raw.pickle
|
||||
queue:
|
||||
- RemoveDuplicateTime: null
|
||||
- ScaleAndShift:
|
||||
- max_width: 1.0
|
||||
- max_height: 1.0
|
||||
- center: true
|
||||
- SpaceEvenlyPerStroke:
|
||||
- kind: linear
|
||||
- number: 20
|
||||
\end{verbatim}
|
||||
\end{frame}
|
||||
|
||||
\subsection{Preprocessing algorithms}
|
||||
\begin{frame}{Preprocessing algorithms}
|
||||
\begin{itemize}
|
||||
\item Normalizing
|
||||
\begin{itemize}
|
||||
\item Scaling
|
||||
\item Shifting
|
||||
\item Resampling
|
||||
\end{itemize}
|
||||
\item Noise reduction
|
||||
\begin{itemize}
|
||||
\item Smoothing (e.g. moving average)
|
||||
\item Dot reduction
|
||||
\item Filtering (by distance, speed or angle)
|
||||
\item Stroke connection
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{hwrt feature configuration file}
|
||||
\begin{verbatim}
|
||||
data-source: preprocessed/baseline
|
||||
data-multiplication:
|
||||
- Multiply:
|
||||
- nr: 1
|
||||
features:
|
||||
- ConstantPointCoordinates:
|
||||
- strokes: 4
|
||||
- points_per_stroke: 20
|
||||
- fill_empty_with: 0
|
||||
- pen_down: false
|
||||
\end{verbatim}
|
||||
\end{frame}
|
||||
|
||||
\subsection{Features}
|
||||
\begin{frame}{Features}
|
||||
\begin{itemize}
|
||||
\item Local
|
||||
\begin{itemize}
|
||||
\item Coordinates
|
||||
\item Speed
|
||||
\item Binary pen pressure
|
||||
\item Direction
|
||||
\item Curvature
|
||||
\item Bitmap-environment
|
||||
\item Hat-Feature
|
||||
\end{itemize}
|
||||
\item Global
|
||||
\begin{itemize}
|
||||
\item \# of points
|
||||
\item \# of strokes
|
||||
\item Center point
|
||||
\item Bitmap
|
||||
\item Bounding box (width, height, time)
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|