diff --git a/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.pdf b/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.pdf index 5f3f9dc..43881e7 100644 Binary files a/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.pdf and b/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.pdf differ diff --git a/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.tex b/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.tex index 275607e..c191561 100644 --- a/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.tex +++ b/documents/Proseminar-Netzwerkanalyse/Ausarbeitung-Thoma.tex @@ -61,6 +61,9 @@ \section{Einleitung} \input{Einleitung} +\section{Related Work} +\input{Related-Work} + \section{DYCOS} \input{DYCOS-Algorithmus} diff --git a/documents/Proseminar-Netzwerkanalyse/DYCOS-Algorithmus.tex b/documents/Proseminar-Netzwerkanalyse/DYCOS-Algorithmus.tex index 4766c60..85368a5 100644 --- a/documents/Proseminar-Netzwerkanalyse/DYCOS-Algorithmus.tex +++ b/documents/Proseminar-Netzwerkanalyse/DYCOS-Algorithmus.tex @@ -99,8 +99,7 @@ Graphen. Die Vokabularbestimmung kann zu jedem Zeitpunkt $t$ durchgeführt werden, muss es aber nicht. -In \cref{alg:DYCOS} wird der DYCOS-Algorithmus als -Pseudocode vorgestellt: +In \cref{alg:DYCOS} steht der DYCOS-Algorithmus in Form von Pseudocode: In \cref{alg1:l8} wird für jeden unbeschrifteten Knoten durch die folgenden Zeilen eine Beschriftung gewählt. diff --git a/documents/Proseminar-Netzwerkanalyse/Related-Work.tex b/documents/Proseminar-Netzwerkanalyse/Related-Work.tex new file mode 100644 index 0000000..5acda93 --- /dev/null +++ b/documents/Proseminar-Netzwerkanalyse/Related-Work.tex @@ -0,0 +1,24 @@ +%!TEX root = Ausarbeitung-Thoma.tex +Sowohl das Problem der Knotenklassifikation, als auch das der Textklassifikation, +wurde bereits in verschiedenen Kontexten. Jedoch scheien bisher entweder nur die Struktur des zugrundeliegenden Graphen oder nur Eigenschaften der Texte verwendet worden zu sein. + +So werden in \cite{bhagat,szummer} unter anderem Verfahren zur Knotenklassifikation +beschrieben, die wie der in \cite{aggarwal2011} vorgestellte DYCOS-Algorithmus, +um den es in dieser Ausarbeitung geht, auch auf Random Walks basieren. + +Obwohl es auch zur Textklassifikation einige Paper gibt \cite{Zhu02learningfrom,Jiang2010302}, geht doch keines davon auf den Spezialfall der Textklassifikation +mit einem zugrundeliegenden Graphen ein. + +Die vorgestellten Methoden zur Textklassifikation variieren außerdem sehr stark. +Es gibt Verfahren, die auf dem bag-of-words-Modell basieren \cite{Ko:2012:STW:2348283.2348453} +wie es auch im DYCOS-Algorithmus verwendet wird. Aber es gibt auch Verfahren, +die auf dem Expectation-Maximization-Algorithmus basieren \cite{Nigam99textclassification} +oder Support Vector Machines nutzen \cite{Joachims98textcategorization}. + +Es wäre also gut Vorstellbar, die Art und Weise wie die Texte in die Klassifikation +des DYCOS-Algorithmus einfließen zu variieren. Allerdings ist dabei darauf hinzuweisen, +dass die im Folgeden vorgestellte Verwendung der Texte sowohl einfach zu implementieren +ist und nur lineare Vorverarbeitungszeit in Anzahl der Wörter des Textes hat, +als auch es erlaubt einzelne +Knoten zu klassifizieren, wobei der Graph nur lokal um den zu klassifizerenden +Knoten betrachten werden muss. \ No newline at end of file diff --git a/documents/Proseminar-Netzwerkanalyse/literatur.bib b/documents/Proseminar-Netzwerkanalyse/literatur.bib index aef88b5..f3a0042 100644 --- a/documents/Proseminar-Netzwerkanalyse/literatur.bib +++ b/documents/Proseminar-Netzwerkanalyse/literatur.bib @@ -45,6 +45,17 @@ crossref = {DBLP:conf/kdd/2007web}, bibsource = {DBLP, http://dblp.uni-trier.de} } + +@article{DBLP:journals/corr/abs-1101-3291, + author = {Smriti Bhagat AND Graham Cormode AND S. Muthukrishnan}, + title = {Node Classification in Social Networks}, + journal = {CoRR}, + volume = {abs/1101.3291}, + year = {2011}, + ee = {http://arxiv.org/abs/1101.3291}, + bibsource = {DBLP, http://dblp.uni-trier.de} +} + @proceedings{DBLP:conf/kdd/2007web, editor = {Haizheng Zhang AND Myra Spiliopoulou AND @@ -109,14 +120,14 @@ } @MASTERSTHESIS{Lavesson, - AUTHOR = {Lavesson, Niklas}, - TITLE = {Evaluation and analysis of supervised learning algorithms and classifiers}, - SCHOOL = {Blekinge Institute of Technology}, - TYPE = {Diploma Thesis}, + AUTHOR = {Lavesson, Niklas}, + TITLE = {Evaluation and analysis of supervised learning algorithms and classifiers}, + SCHOOL = {Blekinge Institute of Technology}, + TYPE = {Diploma Thesis}, ADDRESS = {Sweden}, - MONTH = DEC, - YEAR = 2006, - PDF = {http://www.bth.se/fou/Forskinfo.nsf/Sok/c655a0b1f9f88d16c125714c00355e5d/$file/Lavesson_lic.pdf} + MONTH = DEC, + YEAR = 2006, + PDF = {http://www.bth.se/fou/Forskinfo.nsf/Sok/c655a0b1f9f88d16c125714c00355e5d/$file/Lavesson_lic.pdf} } @article{Stone1974, @@ -157,8 +168,6 @@ ption. The examples used to illustrate the application are drawn from the proble address = {San Francisco, CA, USA}, } - - @incollection{szummer, title = {Partially labeled classification with Markov random walks}, author = {Martin Szummer and Jaakkola, Tommi}, @@ -168,3 +177,107 @@ pages = {945--952}, year = {2001}, url = {http://media.nips.cc/nipsbooks/nipspapers/paper_files/nips14/AA36.pdf}, } + +@incollection{dynamic, +title ={Dynamic Label Propagation in Social Networks}, +author ={Du, Juan AND Zhu, Feida AND Lim, Ee-Peng}, +booktitle ={Database Systems for Advanced Applications}, +editor ={Meng, Weiyi AND Feng, Ling AND Bressan, Stéphane AND Winiwarter, Werner AND Song, Wei}, +pages ={194-209}, +year ={2013}, +isbn ={978-3-642-37449-4}, +volume ={7826}, +series ={Lecture Notes in Computer Science}, +doi ={10.1007/978-3-642-37450-0_14}, +url ={http://dx.doi.org/10.1007/978-3-642-37450-0_14}, +publisher ={Springer Berlin Heidelberg}, +} + +@TECHREPORT{Zhu02learningfrom, + author = {Xiaojin Zhu and Zoubin Ghahramani}, + title = {Learning from Labeled and Unlabeled Data with Label Propagation}, + institution = {Carnegie Mellon University}, + year = {2002} +} + +@TECHREPORT{Seeger01learningwith, + author = {Matthias Seeger}, + title = {Learning with Labeled and Unlabeled Data}, + institution = {University of Edinburgh}, + year = {2001} +} + +@article{Kazienko2012199, + title = "Label-dependent node classification in the network ", + journal = "Neurocomputing ", + volume = "75", + number = "1", + pages = "199 - 209", + year = "2012", + note = "Brazilian Symposium on Neural Networks (SBRN 2010) International Conference on Hybrid Artificial Intelligence Systems (HAIS 2010) ", + issn = "0925-2312", + doi = "http://dx.doi.org/10.1016/j.neucom.2011.04.047", + url = "http://www.sciencedirect.com/science/article/pii/S092523121100508X", + author = "Przemyslaw Kazienko and Tomasz Kajdanowicz", + keywords = "Classification", + keywords = "Node classification", + keywords = "Label-dependent classification", + keywords = "Label-dependent features", + keywords = "Collective classification", + keywords = "Classification in networks", + keywords = "\{LDBootstrapping\}", + keywords = "\{LDGibbs\}", + keywords = "Bootstrapping", + keywords = "Gibbs sampling " +} + +@MISC{Joachims98textcategorization, + author = {Thorsten Joachims}, + title = {Text Categorization with Support Vector Machines: Learning with Many Relevant Features}, + year = {1998} +} + +@INPROCEEDINGS{Nigam99textclassification, + author = {Kamal Nigam and Andrew Kachites Mccallum and Sebastian Thrun and Tom Mitchell}, + title = {Text Classification from Labeled and Unlabeled Documents using EM}, + booktitle = {Machine Learning}, + year = {1999}, + pages = {103--134} +} + +@article{Jiang2010302, +title = "Text classification using graph mining-based feature extraction ", +journal = "Knowledge-Based Systems ", +volume = "23", +number = "4", +pages = "302 - 308", +year = "2010", +note = "Artificial Intelligence 2009 AI-2009 The 29th \{SGAI\} International Conference on Artificial Intelligence ", +issn = "0950-7051", +doi = "http://dx.doi.org/10.1016/j.knosys.2009.11.010", +url = "http://www.sciencedirect.com/science/article/pii/S095070510900152X", +author = "Chuntao Jiang and Frans Coenen and Robert Sanderson and Michele Zito", +keywords = "Text classification", +keywords = "Graph representation", +keywords = "Graph mining", +keywords = "Weighted graph mining", +keywords = "Feature extraction " +} + +@inproceedings{Ko:2012:STW:2348283.2348453, + author = {Ko, Youngjoong}, + title = {A Study of Term Weighting Schemes Using Class Information for Text Classification}, + booktitle = {Proceedings of the 35th International ACM SIGIR Conference on Research and Development in Information Retrieval}, + series = {SIGIR '12}, + year = {2012}, + isbn = {978-1-4503-1472-5}, + location = {Portland, Oregon, USA}, + pages = {1029--1030}, + numpages = {2}, + url = {http://doi.acm.org/10.1145/2348283.2348453}, + doi = {10.1145/2348283.2348453}, + acmid = {2348453}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {idf, term weighting, text classification}, +}