mirror of
https://github.com/MartinThoma/LaTeX-examples.git
synced 2025-04-19 11:38:05 +02:00
88 lines
3.4 KiB
TeX
88 lines
3.4 KiB
TeX
\subsection{Idea}
|
|
\begin{frame}{Basics}
|
|
\begin{itemize}[<+->]
|
|
\item Humans know what is good for them
|
|
\item Humans create Websites
|
|
\item Humans will only \href{http://en.wikipedia.org/wiki/Hyperlink}{link} to Websites they like
|
|
\item[$\Rightarrow$] Hyperlinks are a quality indicator
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{How could we use that?}
|
|
\begin{itemize}[<+->]
|
|
\item Simply count number of links to a Website
|
|
\item[\xmark] 10,000 links from only one page
|
|
\item Count numbers of Websites that link to a Website
|
|
\item[\xmark] Quality of the page matters
|
|
\item[\xmark] Total number of links on the source page matters
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\framedgraphic{A brilliant idea}{../images/BrinPage.jpg}
|
|
|
|
\begin{frame}{Ideas of PageRank}
|
|
\begin{itemize}[<+->]
|
|
\item Decisions of humans are complicated
|
|
\item A lot of webpages get visited
|
|
\item[$\Rightarrow$] modellize clicks on links as random behaviour
|
|
\item Links are important
|
|
\item Links of page A get less important, if A has many links
|
|
\item Links of page A get more important, if many link to A
|
|
\item[$\Rightarrow$] if B has a link from A, the rank of B increases by $\frac{Rank(A)}{Links(A)}$
|
|
\end{itemize}
|
|
|
|
\pause[\thebeamerpauses]
|
|
|
|
\begin{algorithmic}
|
|
\If{A links to B}
|
|
\State $Rank(B)$ += $\frac{Rank(A)}{Links(A)}$
|
|
\EndIf
|
|
\end{algorithmic}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Ants}
|
|
\begin{itemize}[<+->]
|
|
\item Websites = nodes = anthill
|
|
\item Links = edges = paths
|
|
\item You place ants on each node
|
|
\item They walk over the paths
|
|
\item[] (at random, they are ants!)
|
|
\item After some time, some anthills will have more ants than
|
|
others
|
|
\item Those hills are more attractive than others
|
|
\item \# ants is probability that a random user would end on
|
|
a website
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Mathematics}
|
|
Let $x$ be a web page. Then
|
|
\begin{itemize}
|
|
\item $L(x)$ is the set of Websites that link to $x$
|
|
\item $C(y)$ is the out-degree of page $y$
|
|
\item $\alpha$ is probability of random jump
|
|
\item $N$ is the total number of websites
|
|
\end{itemize}
|
|
|
|
\[\displaystyle PR(x) := \alpha \left ( \frac{1}{N} \right ) + (1-\alpha) \sum_{y\in L(x)} \frac{PR(y)}{C_{y}}\]
|
|
\end{frame}
|
|
|
|
\begin{frame}{Pseudocode}
|
|
\begin{algorithmic}
|
|
\alertline<1> \Function{PageRank}{Graph $web$, double $q=0.15$, int $iterations$} %q is a damping factor
|
|
\alertline<2> \ForAll{$page \in G$}
|
|
\alertline<3> \State $page.pageRank = \frac{1}{|G|}$ \Comment{intial probability}
|
|
\alertline<2> \EndFor
|
|
|
|
\alertline<4> \While{$iterations > 0$}
|
|
\alertline<5> \ForAll{$page \in G$} \Comment{calculate pageRank of $page$}
|
|
\alertline<6> \State $page.pageRank = q$
|
|
\alertline<7> \ForAll{$y \in L(page)$}
|
|
\alertline<8> \State $page.pageRank$ += $\frac{y.pageRank}{C(y)}$
|
|
\alertline<7> \EndFor
|
|
\alertline<5> \EndFor
|
|
\alertline<4> \State $iterations$ -= $1$
|
|
\alertline<4> \EndWhile
|
|
\alertline<1> \EndFunction
|
|
\end{algorithmic}
|
|
\end{frame}
|