\subsection{Idea} \begin{frame}{Basics of PageRank} We all know that: \begin{itemize}[<+->] \item humans know what is good for them \item[\xmark] machines don't know what's good for humans \item humans create websites \item humans will only \href{http://en.wikipedia.org/wiki/Hyperlink}{link} to websites they like \item[$\Rightarrow$] hyperlinks are a quality indicator \end{itemize} \end{frame} \begin{frame}{How Could We Use That?} \begin{itemize}[<+->] \item simply count number of links to a website \item[\xmark] 10,000 links from only one page \item count number of websites that link to a website \item[\xmark] quality of the linking website matters \item[\xmark] total number of links on the source page matters \end{itemize} \end{frame} \framedgraphic{A Brilliant Idea}{../images/BrinPage.jpg} \begin{frame}{Ideas of PageRank} \begin{itemize}[<+->] \item decisions of humans are complicated \item a lot of webpages get visited \item[$\Rightarrow$] modellize clicks on links as random behaviour \item links are important \begin{itemize} \item links of page A get less important, if A has many links \item links of page A get more important, if many link to A \end{itemize} \item[$\Rightarrow$] if B has a link from A, the rank of B increases by $\frac{Rank(A)}{Links(A)}$ \end{itemize} \pause[\thebeamerpauses] \begin{algorithmic} \If{A links to B} \State $Rank(B)$ += $\frac{Rank(A)}{Links(A)}$ \EndIf \end{algorithmic} \end{frame} \begin{frame}{What is PageRank?} The PageRank algorithm calculates the probability of a randomly clicking user ending up on a given page. \end{frame} \input{Animation} %\begin{frame}{Ants} % \begin{itemize}[<+->] % \item Websites = nodes = anthill % \item Links = edges = paths % \item You place ants on each node % \item They walk over the paths % \item[] (at random, they are ants!) % \item After some time, some anthills will have more ants than % others % \item Those hills are more attractive than others % \item \# ants is probability that a random user would end on % a website % \end{itemize} %\end{frame} \begin{frame}{Mathematics} Let $x$ be a web page. Then \begin{itemize} \item $L(x)$ is the set of websites that link to $x$ \item $C(y)$ is the out-degree of page $y$ \item $\alpha$ is probability of random jump \item $N$ is the total number of websites \end{itemize} \[\displaystyle PR(x) := \alpha \left ( \frac{1}{N} \right ) + (1-\alpha) \sum_{y\in L(x)} \frac{PR(y)}{C(y)}\] \end{frame} \begin{frame}{Pseudocode} \begin{algorithmic} \alertline<1> \Function{PageRank}{Graph $web$, double $q=0.15$, int $iterations$} %q is a damping factor %\alertline<2> \ForAll{$page \in web$} %\alertline<3> \State $page.pageRank = \frac{1}{|web|}$ \Comment{intial probability} %\alertline<2> \EndFor \alertline<2> \While{$iterations > 0$} \alertline<3> \ForAll{$page \in web$} \Comment{calculate pageRank of $page$} \alertline<4> \State $page.pageRank = q$ \alertline<5> \ForAll{$y \in L(page)$} \alertline<6> \State $page.pageRank$ += $\frac{y.pageRank}{C(y)}$ \alertline<5> \EndFor \alertline<3> \EndFor \alertline<2> \State $iterations$ -= $1$ \alertline<2> \EndWhile \alertline<1> \EndFunction \end{algorithmic} \end{frame}