From 9fdf8ffbdd5a384650973c051110c0830f877c2e Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 17 Jan 2017 13:48:11 +0100 Subject: [PATCH] Updat HASY publication --- publications/hasy/abstract.tex | 6 +++--- publications/hasy/appendix.tex | 22 +++++++++++++++++++- publications/hasy/figures/sample-images.png | Bin 0 -> 10239 bytes publications/hasy/main.tex | 10 ++++----- publications/hasy/used-latex-symbols.tex | 2 +- 5 files changed, 29 insertions(+), 11 deletions(-) create mode 100644 publications/hasy/figures/sample-images.png diff --git a/publications/hasy/abstract.tex b/publications/hasy/abstract.tex index 476faed..3551544 100644 --- a/publications/hasy/abstract.tex +++ b/publications/hasy/abstract.tex @@ -1,5 +1,5 @@ \begin{abstract} -This paper describes a dataset of single symbols similar to MNIST. It contains -\dbTotalInstances~instances of \dbTotalClasses~classes, split into training -and test set in a stratified manner. +This paper describes a publicly available dataset of single symbols similar to +MNIST. It contains \dbTotalInstances~instances of \dbTotalClasses~classes, +split into training and test set in a stratified manner. \end{abstract} diff --git a/publications/hasy/appendix.tex b/publications/hasy/appendix.tex index 1c5db3d..7f87fad 100644 --- a/publications/hasy/appendix.tex +++ b/publications/hasy/appendix.tex @@ -1,4 +1,24 @@ %!TEX root = main.tex \appendix -\input{used-latex-symbols} \ No newline at end of file +\section*{Obtaining the data} +The data can be found at \dbDownloadURL. It is a \verb+tar.gz+ file of +\SI{\dbSizeMB}{\mega\byte}. The file can be verified with the MD5sum + +\texttt{\dbMDfivesum} + +The data is published under the ODbL~license. If you use +the \dbName~dataset, please cite this paper. + +The \verb+tar.gz+ archive contains two folders, \verb+hasy-train+ and +\verb+hasy-train+, as well as the two files \verb+hasy-train-labels.csv+ +and \verb+hasy-test-labels.csv+ in the top level. The CSV files have the +columns \verb+path,symbol_id,latex+ with a header row. The \verb+path+ is the +relative path to a training example, e.g. \verb+hasy-test/0.png+. The +\verb+symbol_id+ is an internal numeric identifier for the symbol class. The +website \href{http://write-math.com/symbol/?id=968}{write-math.com/symbol/?id=[symbol\_id]} +gives information related to the symbol. The column \verb+latex+ contains the +\LaTeX{} command associated with the class. +\onecolumn +\input{used-latex-symbols} +\twocolumn \ No newline at end of file diff --git a/publications/hasy/figures/sample-images.png b/publications/hasy/figures/sample-images.png new file mode 100644 index 0000000000000000000000000000000000000000..24c7d21129afe9b88de6547d3c75d95c26ecf855 GIT binary patch literal 10239 zcmV#Bf>NYv`{@ZYO`%^IyAzF_!xEcNGm0V_J}HT{n6{` z0N|GyzBMLOT)!C#eqPp+x-41#zG}O${8ek!!y*FMz^7DGBsKIQOA4~U$fjzGRpa+x zS2YGK_2*w7Djc!W^NYjz>Pp3TyUgI78pvLmRD49UdovKcZt)(g-dAb?H4du!N zRJm%JNOrK{1?Pty#z0O@6;uN7uUMZPF*?h0K@CeQQ=A=M;xBxDWA$|n<&;z7e__83 z`wuViE0cV7U7o9rIy!?F+FAhTsOlG_>QE9$04u_U*KjS)sNkj-WeJI(J@g?!Tw_2r zG#;Mk(X(s-HhT?(=zy%~wkGG)P(o=q$a+3z%Y-skcpd?fn2PdKp2Wdu2rj$^LS+lt z1n)ltC2}X2W^D-Q-l13$W|bF~dXatC3E&~@^cwj4Kj&SkOBR^!p6$hFsXqowYwQ`O zHtFLZ9#M~h=jhP0!02roHSni|?9Pd&zE%;-5$&3ho|lfli-Eae0smgMu*79S`F#m{ zs9|rAU(KtH`w$eZJJ9}E{!{m@Rbbcw!wZ;>t{y>*+6>JIt?;nkHc*2IFI0{?rxEh& z#{)A{w11icV23VSanR*EjA4Y>PRJZ;O%2-*1u6mX?yG;u(i|{TFr`X~(VZG`Ug$P8 zEzp$evbf0`h@<^-D_@)ic&!gVOR? zIs+e}%v6V4$r|eisf23vzN&U;3pjKA=&+3etLZi^ly-6|0V8xV^~U_!^XJ<|+9F0` zigtEJafhAPpk%dEhm#PdX5NAC#-DNAvkS1{(XlmNw`CVE>=;q4qTs@w|hthpGkH ziXf?`Hg*w01~d5axD$y)qK1WsZ03h&N7+$!B!&I|4+V^uRL&R=Q#M0kmVn@BiT1AG z(85`=Yifvduk#Rc5DOBCj9M3-HAd zOG)%F9mxOG&|Svu70$hvd5(nrV41i5k69ZqP8}xR-hQg5yTMgeMUT;%(n(K(jQn&0 z5(n>0OZ@&T*D!jaB8VwF9IV_azUv>kwkb&6`1-4${WSo7`^dL`@I*qmgGdlPtxy%5 z8A+F6Z);NRBR8b~y$0m0W7vUXNw2a(Gl*z4^o*=t>jnlpZD(}ADP`O3>&#luQTxj! zPi1gXu|sb7H=dXR`LL=mOp82eP6Y#Dyvtl|(W4VUen(SuhYxi9gKxT$=3$5KzpgfT zIdd7DRCX{SwXlN@UmSwTbz{z*`Uow4$g@6j(ma^abYa6MwOw9+#)}`IlVq)j8JPmF zjp->n{4OS^2KOsiNNSUZrdb-RP`Vt__b)gNUJ`1WI&57{jcATW^HO*9$S^BHiGp3* z^>&aMjeEUHobZrmA5)ADT)SsKW$L6=bQr8A&y~F_SAf%jEdZMtJzUiXPTx`m$IKjE$|jR~ z+fKJx;F-~5?elwMgbQJ38oY24#URUbIC*GQMvy(KY(YP~RE3i3{>A-Q&X`2w_Vp$e zaJ?DT0etnZWKw8jSf-qNp+yZ$;uK#1_?j-+PVp&E5h#TcNAl6s6Qm8fJ?fH8Rp65{ z%m4l%x{btY$OH#Wg|Ak;ir~nY;(_Hp)Uv~_@EL~Ie~0^k<2fH-j$g$88A)!fZC2Y; zjXoVPEJwGLvqTCeOK?9v{r1B`+~|wyu<59a3BeRpsTLcT z;k)vQnuzc)E~X5!L!e);N+2P-<&Gj0vw7TJ;THuq&WN4?_&uR0)49845rFuLvRBJ9 zBU^f}1dxLEK&Aq_C0W1JP~r@B5XY9g5*}*C#i3NrFuijj)r*Gwz6zxQgpP7mV+wn< ze1Ct)XLOQE5R9y)xVGUI-l^g2+&CY*mPB<)iQJCVY)qzI(wmCz1yuXD93Q~{W@t1ZeeH6$sBK+xx+87}bf7xyi;V*tAeZISC}-3C1;Ae*=y1D2+yxXL}2RjC580H5Su3qK1uj5bHJ}ErKQV z^0X}2>=aHSZ5q0|Vp`WOLU9*RT+ztY681$hPOfjIN!Y`Y*KlK_EpF+T3~@K1gojW+ z+>8JabW6KFqXyhjLo5ff#OF6`x>IUdqgeuC1$u|UV?g1&w!Dj0X3oYtdbdqk>uvAe zbRIpd_)@<-qCtz8M3N(~A@0PNiZ;liwV{NWt74D1DajV9F(3_q<#_ZOuH7fPOXw0X zd5gfKt0MlfLRDG7gvd&Gb4pUlidw>LEyt19FtT#b0G47ICogU3O9LMjG92bayowGU z>@D+g`oC9=YEE5YI&1q`2in_MKuQBrQN}I=b#|ESV9bW2UR+SadjKCRl+PIS>l`8S zO(jH*r7U>??!rs1;R-VSA{ZSjT>9Tch@#|hyl{hAzG=qY7)wa6VJEg2z5kUKloxK$ zd{k~Wb@B>J$Z zk2b5q&W^I9>_`f}(vbEr{&+!%+?#k*?0K+e2=tkZ8$Xw3S}t#GbrDx5w50MI%z#Py z0CDW%6zH?D%oz=rlGR2mg)nW=xYXJY;PEo=&RXJMK+&zS59bB>M=JA3EL~9nuEt8r z#6$n^^4okRz57@K4_C9EPz5F~5U*+%$1n}q7HPa8VoW4#(7h?|YJJT=<q`G zuG~n6FXVLihN$di^JCz&Q2NNz5WTyGU01HF!&ASu1^_usBtg7Kfp*kzyaP6Z)DDAD zU6l_iy|Ah4@lLcF<9~$W40!wi+>JN*YwN z(dR%UdqVpzQzZajjI%NNl2!SDtwfvQks7Y!&LcNk)!PiZ1>ca~sYAa3MIvDg?MR_p z8RQmDy@m&vn1ZA0liU;XKn~s&$+1{oFZ4?dmW$SZQsWr@ih?ddI5!em_@#yiZ<`V( zQdfeA6#~e@5jC_EaMDo)6E7eBUI^M@pa9lg8j_~^Tsa9F7+i;O0&8km{qz-ln=8KDAv|mL$TW@;;iX~egea|+pX(o*Ek`n0~ zrfEpH78OuB@ZuY$>J4KI1o2Fy&+x?BYnWau241Y5c@>2x4uelRD8cm_2qQGUXygIM z$EAt$0DWxUQjSC-;R-UjhUAeE0+qN_cwtUw7knCtqiV>r5k02JPZ)^9xk+%T!SmRh zpe*RPB4}({UDEyY^u@%35Z~iVufbwg98NqLt7AE7?1D(OBoc{`u!wfdVyf&YJIaov z@alj<@4q}-s`X4H9OBi~@L+V^OAt|VkO#nd=06JFP7Pff;-52*xQ3ILHZDBf0w#ev ztPY5aTH?059OYzqUra%4AgQ{7O4VP9>@N9j72x4_k#lPHf{3ly5Cb1{i(3J^Tklj5& zHXT5C01v+HGGB~B5UW`pac3d%z?vEe^EaA)PO=adL)OSVe2uCih`qvKfZ3~~jKv#} ztH#|Ir7Ij0LwYdZG8^hPmc~dJh~t>Rq(Ya{rFgx$KDfORNF^gKcL@WJ#3OVxEJD%EPUgMYTi%qYZ-Uyi>yz4y~k%?gqGV!3+9g z6{?dzay!-1dVX|1<5i}-I|?NL2mGyUQ#hgq?W$pscRBd{#ugb`Xhr(s{GC)h*fC24?zBi#;UGQC`Qn;7^O3j z?zG|C=Tva00+xy+c1te)*#T2TSEf#oBf{&jI(uR!-JMdyG-|q(c|jF!tz7$jVfDo_ zpa{VZQ_$Ff&^~`Eol-*u!Q*1LJ8)1S{ba%p`Mjc3h_MSx7P!$?y;Fl(4N29JHxs<= zNp@|t4kxGaa;v8~cR-wY7@=znr!`~%J_VLuoHomB0l8=awIJOJ zn)_f%4NJ#$vYrAUjCMy=r~LD6PUio~FIbKmkmvrxswCUfcv7bTWrM2`?yB}y-G>cn z$ZlR_OANVdZSXx5PCim;)T`kbjTblxJSm@x-^hb%IQU#RS$o&-9RhfG&k03k+0${_ zoLXV@6%h_ocl^Asw}VVRQzLnScKXw{JW5_ZAW{R~+S;T@>5O5*ONW-d5vvOEeE34P z?2Ty}ej_8SO2`~h>$+A;^2dJRl``Qekh7&PMQ%|5Y?~p*!e6V~9fvKw%XSJRAm{-F zpR|y?@cms|sw9tKBii_a)hw-w91cp>c=lXkcVaHa4Wa z&T}OsMDQYNfO8gRhogiZbG3JoYEi|DsNvdeOliuAL?V%h4WlYUvoJn8%8s%l5zGS$ zOxi46As1O=O{T3q$;R1+#^bJjj-Z)UaBO>(B!D&{%2bDGD|f zZ$s`%>q^2KoC^1DmVK4EYHA>Cao(zl$gn9Nh(tcBi6Q`Lqa15kz3jgpXF z!_+kW?-KmTV&rKn`3O~xO|YXju9j{8B|w@J-s(b90}{?{D@6LX3V-z0ZF?OyR;|Q& z5Tgc5Lx;n4|LZL%EEPMd8AD3)TJAs=Wzi2W$uK*lm?raHYaHTeNqw!9Vw^AfAlaS2N$Auxkqi<+^f2Jtl<-FFbzhG1*g?}(t?eD)q9M7bzM z0L`SoMtbwi2&#$L?vofs@*Tg`%oA7(;2-oR!*}MY5@E<52Wy*9lv(Dp2G&KW=n!tWrtnW%BtxD zv$4=thqqKZL=7-!XBlD_fLT-yh{j%F)|#vx%cgb0p$a*jjJU;o}(bRq5; zP|Zh<`oec)b{JV%+S9(0v3cIQ^p2Lum|V(^$Os{1wq#pcN2=nD7$L;mDS?*NLbgc& zL=A*g{N;w5;j4QIC$M3JD2(Gu&JH!WlJ5<0MPa+HS0+kuW}(~^KLu9XfxHG+Cc6X; z-%JAzBg`9c0#c-lYlmlN)162G1+5Xm0rsguX=!Z(W^X^@&KD0 zvJJh4k{u$hF3VE@KKx54j>=xm%*;&H%v#2aB&y)B9VACEF2XxcPKl`ju^aBdLV=8Yz282vK*yLzw6*3yllKa{S zV3?evZVvWt+w-qHIiFDuMsw+{lG0uV*&L(?9U>uhWb1s@V8+#_hANBee4GSeNa6V< zu4Mxl4T@G9mbe108dyTeYY0?5wuILDG1Xl7S}!GCoiqhLA$U|&R&9HmIAnM`FO&|n zXLbQQ2pWa=p*LiuV9MuUA1I{>%IXOx8o-YfTh|cLahTqKC?Ak5d5q4d5BT%0stVZ| z)Fz1SD`?4)nNvg1uwuGJ3;KKA3`N^lAno1Q;xi(3)e1&dGCW2R^YGw4tQ!2}v$0iM zAvVJ*MyRg+5?YfGDloQa01os3-shYtlxSP#ploydy$s}&C~CN(vBM1vDIhf!Zo`*? z{lKT|yvzGccIf(j;$wf>6wGja{$l#rYRgz^xZa&MPVSE@iWB^6>|c0n3JuTjJG8g_8V z3wss5rT3N~p5K92)WwoRxtjPImYt(zYAA6kJ2>Qpz1XXVrQXBYq=@@4ZGkX@6lyr~ z0RMr*5jkO~jhbl>g9W2>59-xp2Xcc%ui+SWc!aO4O(Q{Fc3MKI7g_9GKO-$Mmw_wg z3$g?9cNohbfGhiRrCBn7q_-4GU3F`9JZxMTEQu;r;nR)p0DVS;tPwub#8D0_*AO}- z**(fu?Xk_V$5P-H{sMw$*-df_9?!8Twqgn2S`@iK7BzH534}I+)$XyKuy6`Q$|>S9 zO!v}n51by&@WhhhGD}$$J)kH>`2@UV(S^+r+FG3`ydWiN+eoW+gFsKUstUU*WjJ0iiyGoi;b^LW1*1K?QNjw0oJ#`%GCr}4s*AYgb98Em zJ7ySg&@0|}+gB8QGDYpap1hzTgLPd)T&zx5_c;B{z&H*V-Ii9{p6rsv^8gQ|!1O~Y z)NmNJ8(?XMLO2e$*-dx;DOA%sTi{`Jh-_fXClnq=&O}hLl^|Z03;z@RK%l~eDWVT# z!D~2x6r3YUCbC53IN;?^6EXwM55;;7NAiNr?IyRYoj;GGgIH#>XW%J<-YA1^UBhAd zp%(a(Ubl;UFGMc$9E0{OHt4oG|35FEKL;As6m4RBno@RR_fEtZ;UpswK^1LTE-1x2 z6sPh+iFUhdM=iA)F*<~wamx#C1#<;6nh;4^q0*42^1_4B>AKHg%83{W4gi|CejB7x zN@$A7e}_vICMqb;6P{rsbf-F0n#5E$sy%tb}-tn&W>5o2MC2UuV-WApDL-Pa5&C7_` zeg`f=2C!UnPW8b0037hCpN$xqiz^`$tEkK@;wptFq82D6kNc|8ot4*VUCGgD^<6a6_b7Q`)K=rs^rhP0mJkuIbG zoD_7V*N{l;hX*dGAp8M1ha*fR5(MOc7}s`HTt2uyp$+j;w3-?SW;D9&#TV3@HG7jMk+_P@Se6|Y zz73fmBRM6ZidA;NM906V0r+%OSDNWFFUP@@>Y>O#(Zz@zR;}?LHH<#axhh%0Aynt_ z7@9ah__pAB^yC!(sKJ$9TnR(Q#!={Xy=72m*n=vs- zh(`^_Z^ell7~fFu1B$e-Kf47%_;}QCWA!_{<@4PyN8=4Kw24STgrG+=h5xiS{)?nE zfoWfVc4Z;tLdj)n81b|#b+mX?Gb-!u{*u?yVQ74Um6k}-+tu;xPzqDN&X<8aI*uJK zPy^hJH`Z)F&fB%Q_Qz3>$OEu#_*B8R1f4pjRL9S zs-4Q<;C}^vL}K*f8SLP2C$RYr!0HtyvcD-6gD_i|Q-ili;hKpdB_!z>r@e@jtsB;gBrM(rv(5I?1Gy>H z=a<0T+pu4h*5(DX#RJEI21_?CJD8o_DrGUczXQ?VQaja7u+hK99~Zzt|(R8c8=o6v_Ca2ziAeU;0TLXVa$u2`H19^Zzj%}0%1EjJc zeI$T@#en-t*ergHE(ABHx!17jL-LoOY#{(Y^b&Cy9+1s$w1btsucpV5!Z_I^wUc&R zhABw6m}JlwU0NX&Cp0J9f+I*itt)# z{A+a$Fa!$<#Q7Upqe4#g33fz--lpflWeC0awNG}c36gShDK{@|j<4FVVs9o)d3$FoL z2w*7D0t^JJ4p4x9l>#zE0-}j_;#TFf*%;bhR6QWs*puHHz`R3haJv-kfjo2|H@>vN z8rKDKp060qZR%lFDsM|eNvqiFRU6^co=Pw!+_1(i5M-!r^g99T#j|V5#H9tr76jumfp;)b2Z7QxAWkO7nDxF{g~P@Ck_v! zoH7hAV2*qA4nAaG|KDqPjNZNg!?G$_1A;Z6_aC-94kz)#4ej%qQD4uR66Xa$BN;XI zXY|l9_5m-s@?W>F$?Z5^1GHIQUD%@&CAVv;tCK&imu{0W3S@<&tdcnVI1U>DWbZiO z9PiP(6R$MUz`W$iSZ?EV&eGw$V9OlrI_leK!Ph=n>Bxyfw!oSi-mGgp zUaKV!0j6_=7CwiGB3c&(q3k=em-Vsv>ul#`KsTY zn`6Qc{$0wJY7xJHt0z@?!8o=@4vixffq#=1{J{TQ{1LqES6xqpQ-h_whQ5W;!Yqgw z-3PVIu(rtS?Ps9=UW3%6Vm@~RKh~A1dpzM5!04FQJ&}vwjDLZy6D0zvcotN#gcl4R#3R>2=Xn z#jd4Nc{HcqGU!4uly2X`v{Pvw*;m`k@bD79{O%|5Uci!|qlZ@=9Hu;O(QeU~51Om8 zLY0L85;CV_J3EX7cno!EcHoCcC*oI+AV$xRx`65$o<4x(cmJp~8D?9JagVYXS)2CZ z2;mj{uE06&-NFtPM=$yRT`<|Ro3LLZnt90+;64y0faco(O?lWMnGnftRC=JbX{Jot z%6C53r9pNH4ml|zQ(3)B@IFx0kgfZmH%Z2RendB+3)M*(^@w~DFBmY2sdMLqIAHf- z3MC(w(9}YqbQRuffXoYIA98zWss!4&^a4YU&4i2ix(Of*F!sOHAV*zmcPMQ9g(cYU zmOkN134CE5YQyk?pa(7mLWrl~FB2wWczC(gli*>p{ds|QeZ+R~$0bWQ9M>9n-61dB zn4aL$j{h~{xGu2NFe2|K0nz=})s|`CI}Y~jAiL_?DWI{Qh*V5XcC*VEUhqQ-Bb3u& zAUU&H6g2=_dOK8(D`7ml%#4u}j7}~^e4>p-Q(OAgKx<|pYHI#WpFj@dTj1(>6Vt{m z)%5h`VQ!RCbjX9N5sYVq1D~P-j&}Q_LrlB|+Av~;@v)>G0hSy&v^KTwTp`ZIyMAv% zBJrAbzWEUt@)|hwwp7}*eS*sk)-u&El_ngu)7`A5BEh+&1O`^oAsp)e4s}@!8%D@yCA;*nXQ-FYp8Y!!3>sVZkB*@w>)e~kbR;ud6eLw z)L|4e`=1)_y`@^Te47}tL-(&9uq(1yLMae+mW>Sn5U*-^wCerC*ZE87@ za1O=)4i^vA73gz$D@o;Zu!)G6-=Rn(5-&tjLn4vr;6LTnq%mKMz+eCX002ovPDHLk FV1kw