diff --git a/publications/hasy/abstract.tex b/publications/hasy/abstract.tex index 476faed..3551544 100644 --- a/publications/hasy/abstract.tex +++ b/publications/hasy/abstract.tex @@ -1,5 +1,5 @@ \begin{abstract} -This paper describes a dataset of single symbols similar to MNIST. It contains -\dbTotalInstances~instances of \dbTotalClasses~classes, split into training -and test set in a stratified manner. +This paper describes a publicly available dataset of single symbols similar to +MNIST. It contains \dbTotalInstances~instances of \dbTotalClasses~classes, +split into training and test set in a stratified manner. \end{abstract} diff --git a/publications/hasy/appendix.tex b/publications/hasy/appendix.tex index 1c5db3d..7f87fad 100644 --- a/publications/hasy/appendix.tex +++ b/publications/hasy/appendix.tex @@ -1,4 +1,24 @@ %!TEX root = main.tex \appendix -\input{used-latex-symbols} \ No newline at end of file +\section*{Obtaining the data} +The data can be found at \dbDownloadURL. It is a \verb+tar.gz+ file of +\SI{\dbSizeMB}{\mega\byte}. The file can be verified with the MD5sum + +\texttt{\dbMDfivesum} + +The data is published under the ODbL~license. If you use +the \dbName~dataset, please cite this paper. + +The \verb+tar.gz+ archive contains two folders, \verb+hasy-train+ and +\verb+hasy-train+, as well as the two files \verb+hasy-train-labels.csv+ +and \verb+hasy-test-labels.csv+ in the top level. The CSV files have the +columns \verb+path,symbol_id,latex+ with a header row. The \verb+path+ is the +relative path to a training example, e.g. \verb+hasy-test/0.png+. The +\verb+symbol_id+ is an internal numeric identifier for the symbol class. The +website \href{http://write-math.com/symbol/?id=968}{write-math.com/symbol/?id=[symbol\_id]} +gives information related to the symbol. The column \verb+latex+ contains the +\LaTeX{} command associated with the class. +\onecolumn +\input{used-latex-symbols} +\twocolumn \ No newline at end of file diff --git a/publications/hasy/figures/sample-images.png b/publications/hasy/figures/sample-images.png new file mode 100644 index 0000000..24c7d21 Binary files /dev/null and b/publications/hasy/figures/sample-images.png differ diff --git a/publications/hasy/main.tex b/publications/hasy/main.tex index 323fa1f..0101660 100644 --- a/publications/hasy/main.tex +++ b/publications/hasy/main.tex @@ -10,7 +10,7 @@ \usepackage{vmargin} % Adjust margins in a simple way \usepackage{tikz} \usepackage{csquotes} -\usepackage[binary-units=true]{siunitx} +\usepackage[binary-units=true,detect-weight=true, detect-family=true]{siunitx} \DeclareSIUnit\pixel{px} \usepackage{listings} % needed for the inclusion of source code \usepackage{caption} @@ -59,7 +59,7 @@ \usepackage[german,nameinlink, noabbrev,capitalise]{cleveref} -\title{HASY dataset} +\title{The HASY dataset} \author{% \IEEEauthorblockN{Martin Thoma}\\ \IEEEauthorblockA{E-Mail: info@martin-thoma.de} % ORCID: http://orcid.org/0000-0002-6517-1690 @@ -69,7 +69,7 @@ pdfauthor = {Martin Thoma}, pdfkeywords = {dataset}, pdfsubject = {HASY, dataset}, - pdftitle = {HASY dataset}, + pdftitle = {The HASY dataset}, } \usepackage[inline]{enumitem} \usepackage{longtable} @@ -108,9 +108,7 @@ \input{content} \bibliographystyle{IEEEtranSA} \bibliography{literatur} -\clearpage% -\onecolumn \input{appendix} -\twocolumn + \end{document} diff --git a/publications/hasy/used-latex-symbols.tex b/publications/hasy/used-latex-symbols.tex index 6f8049f..e106947 100644 --- a/publications/hasy/used-latex-symbols.tex +++ b/publications/hasy/used-latex-symbols.tex @@ -1,5 +1,5 @@ %!TEX root = "../thesis.tex" -\subsection{Symbol Classes} +\section*{Symbol Classes} \begin{longtable}{lc|lc} \toprule \LaTeX & Rendered & \LaTeX & Rendered \\