diff --git a/source-code/Pseudocode/Value-Iteration/Value-Iteration.png b/source-code/Pseudocode/Value-Iteration/Value-Iteration.png index e1bbc29..f0736dd 100644 Binary files a/source-code/Pseudocode/Value-Iteration/Value-Iteration.png and b/source-code/Pseudocode/Value-Iteration/Value-Iteration.png differ diff --git a/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex b/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex index e8aac78..c0b5895 100644 --- a/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex +++ b/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex @@ -23,8 +23,8 @@ \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$ \Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ \Statex Transition probabilities $f$ - % TODO: \alpha? - \Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$} + \Statex Learning rate $\alpha \in [0, 1]$, typically $\alpha = 0.1$ + \Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$, $\alpha$} \State Initialize $J, J': \mathcal{X} \rightarrow \mathbb{R}_0^+$ arbitrarily \While{$J$ is not converged} \State $J' \gets J$ @@ -37,9 +37,10 @@ \State $J(x) \gets \min_a \{Q(x, a)\}$ \EndFor \EndWhile + \Return $J$ \EndProcedure \end{algorithmic} - \caption{Value Iteration} + \caption{Value Iteration: Learn function $J: \mathcal{X} \rightarrow \mathbb{R}$} \label{alg:value-iteration} \end{algorithm} \end{preview} diff --git a/source-code/Pseudocode/q-learning/Makefile b/source-code/Pseudocode/q-learning/Makefile new file mode 100644 index 0000000..fb4b183 --- /dev/null +++ b/source-code/Pseudocode/q-learning/Makefile @@ -0,0 +1,36 @@ +SOURCE = q-learning +DELAY = 80 +DENSITY = 300 +WIDTH = 512 + +make: + pdflatex $(SOURCE).tex -output-format=pdf + pdflatex $(SOURCE).tex -output-format=pdf + make clean + +clean: + rm -rf $(TARGET) *.class *.html *.log *.aux *.data *.gnuplot + +gif: + pdfcrop $(SOURCE).pdf + convert -verbose -delay $(DELAY) -loop 0 -density $(DENSITY) $(SOURCE)-crop.pdf $(SOURCE).gif + make clean + +png: + make + make svg + inkscape $(SOURCE).svg -w $(WIDTH) --export-png=$(SOURCE).png + +transparentGif: + convert $(SOURCE).pdf -transparent white result.gif + make clean + +svg: + make + #inkscape $(SOURCE).pdf --export-plain-svg=$(SOURCE).svg + pdf2svg $(SOURCE).pdf $(SOURCE).svg + # Necessary, as pdf2svg does not always create valid svgs: + inkscape $(SOURCE).svg --export-plain-svg=$(SOURCE).svg + rsvg-convert -a -w $(WIDTH) -f svg $(SOURCE).svg -o $(SOURCE)2.svg + inkscape $(SOURCE)2.svg --export-plain-svg=$(SOURCE).svg + rm $(SOURCE)2.svg diff --git a/source-code/Pseudocode/q-learning/README.md b/source-code/Pseudocode/q-learning/README.md new file mode 100644 index 0000000..c260ec5 --- /dev/null +++ b/source-code/Pseudocode/q-learning/README.md @@ -0,0 +1,3 @@ +Compiled example +---------------- +![Example](q-learning.png) diff --git a/source-code/Pseudocode/q-learning/q-learning.png b/source-code/Pseudocode/q-learning/q-learning.png new file mode 100644 index 0000000..59a678c Binary files /dev/null and b/source-code/Pseudocode/q-learning/q-learning.png differ diff --git a/source-code/Pseudocode/q-learning/q-learning.tex b/source-code/Pseudocode/q-learning/q-learning.tex new file mode 100644 index 0000000..221a6fc --- /dev/null +++ b/source-code/Pseudocode/q-learning/q-learning.tex @@ -0,0 +1,45 @@ +\documentclass{article} +\usepackage[pdftex,active,tightpage]{preview} +\setlength\PreviewBorder{2mm} + +\usepackage[utf8]{inputenc} % this is needed for umlauts +\usepackage[ngerman]{babel} % this is needed for umlauts +\usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf +\usepackage{amssymb,amsmath,amsfonts} % nice math rendering +\usepackage{braket} % needed for \Set +\usepackage{caption} +\usepackage{algorithm} +\usepackage[noend]{algpseudocode} + +\DeclareCaptionFormat{myformat}{#3} +\captionsetup[algorithm]{format=myformat} + +\begin{document} +\begin{preview} + \begin{algorithm}[H] + \begin{algorithmic} + \Require + \Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$ + \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$ + \Statex Reward function $R: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ + \Statex Black-box (probabilistic) transition function $T: \mathcal{X} \times \mathcal{A} \rightarrow \mathcal{X}$ + \Statex Learning rate $\alpha \in [0, 1]$, typically $\alpha = 0.1$ + \Statex Discounting factor $\gamma \in [0, 1]$ + \Procedure{QLearning}{$\mathcal{X}$, $A$, $R$, $T$, $\alpha$, $\gamma$} + \State Initialize $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ arbitrarily + \State Start in state $s \in \mathcal{X}$ + \While{$Q$ is not converged} + \State Select $a \in \mathcal{A}$ by $Q$ and an exploration policy (e.g. $\varepsilon$ greedy) + \State $r \gets R(s, a)$ + \State $s' \gets T(s, a)$ \Comment{Receive the new state} + \State $Q(s', a) \gets (1 - \alpha) \cdot Q(s, a) + \alpha \cdot (r + \gamma \cdot \max_{a'} Q(s', a'))$ + \State $s \gets s'$ + \EndWhile + \Return $Q$ + \EndProcedure + \end{algorithmic} + \caption{$Q$-learning: Learn function $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$} + \label{alg:q-learning} + \end{algorithm} +\end{preview} +\end{document}