diff --git a/source-code/Pseudocode/dyna-q/Makefile b/source-code/Pseudocode/dyna-q/Makefile new file mode 100644 index 0000000..444baaf --- /dev/null +++ b/source-code/Pseudocode/dyna-q/Makefile @@ -0,0 +1,36 @@ +SOURCE = dyna-q +DELAY = 80 +DENSITY = 300 +WIDTH = 512 + +make: + pdflatex $(SOURCE).tex -output-format=pdf + pdflatex $(SOURCE).tex -output-format=pdf + make clean + +clean: + rm -rf $(TARGET) *.class *.html *.log *.aux *.data *.gnuplot + +gif: + pdfcrop $(SOURCE).pdf + convert -verbose -delay $(DELAY) -loop 0 -density $(DENSITY) $(SOURCE)-crop.pdf $(SOURCE).gif + make clean + +png: + make + make svg + inkscape $(SOURCE).svg -w $(WIDTH) --export-png=$(SOURCE).png + +transparentGif: + convert $(SOURCE).pdf -transparent white result.gif + make clean + +svg: + make + #inkscape $(SOURCE).pdf --export-plain-svg=$(SOURCE).svg + pdf2svg $(SOURCE).pdf $(SOURCE).svg + # Necessary, as pdf2svg does not always create valid svgs: + inkscape $(SOURCE).svg --export-plain-svg=$(SOURCE).svg + rsvg-convert -a -w $(WIDTH) -f svg $(SOURCE).svg -o $(SOURCE)2.svg + inkscape $(SOURCE)2.svg --export-plain-svg=$(SOURCE).svg + rm $(SOURCE)2.svg diff --git a/source-code/Pseudocode/dyna-q/README.md b/source-code/Pseudocode/dyna-q/README.md new file mode 100644 index 0000000..a538dc4 --- /dev/null +++ b/source-code/Pseudocode/dyna-q/README.md @@ -0,0 +1,3 @@ +Compiled example +---------------- +![Example](dyna-q.png) diff --git a/source-code/Pseudocode/dyna-q/dyna-q.png b/source-code/Pseudocode/dyna-q/dyna-q.png new file mode 100644 index 0000000..f82f68c Binary files /dev/null and b/source-code/Pseudocode/dyna-q/dyna-q.png differ diff --git a/source-code/Pseudocode/dyna-q/dyna-q.tex b/source-code/Pseudocode/dyna-q/dyna-q.tex new file mode 100644 index 0000000..456d45e --- /dev/null +++ b/source-code/Pseudocode/dyna-q/dyna-q.tex @@ -0,0 +1,56 @@ +\documentclass{article} +\usepackage[pdftex,active,tightpage]{preview} +\setlength\PreviewBorder{2mm} + +\usepackage[utf8]{inputenc} % this is needed for umlauts +\usepackage[ngerman]{babel} % this is needed for umlauts +\usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf +\usepackage{amssymb,amsmath,amsfonts} % nice math rendering +\usepackage{braket} % needed for \Set +\usepackage{caption} +\usepackage{algorithm} +\usepackage{xcolor} +\usepackage[noend]{algpseudocode} +\usepackage{mathtools,bm} +\DeclareMathOperator*{\argmax}{arg\,max} + +\DeclareCaptionFormat{myformat}{#3} +\captionsetup[algorithm]{format=myformat} + +\begin{document} +\begin{preview} + \begin{algorithm}[H] + \begin{algorithmic} + \Require + \Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$ + \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$ + \Statex Reward function $R: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ + \Statex Black-box (probabilistic) transition function $T: \mathcal{X} \times \mathcal{A} \rightarrow \mathcal{X}$ + \Statex Learning rate $\alpha \in [0, 1]$, typically $\alpha = 0.1$ + \Statex Discounting factor $\gamma \in [0, 1]$ + \Statex $\lambda \in [0, 1]$: Trade-off between TD and MC + \Procedure{QLearning}{$\mathcal{X}$, $A$, $R$, $T$, $\alpha$, $\gamma$, $\lambda$} + \State Initialize $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ arbitrarily + \State Initialize $M: \mathcal{X} \times \mathcal{A} \rightarrow \mathcal{X} \times \mathbb{R}$ arbitrarily \Comment{Model} + \While{$Q$ is not converged} + \State Select $s \in \mathcal{X}$ arbitrarily + \State $a \gets \pi(s)$ + \State $r \gets R(s, a)$ + \State $s' \gets T(s, a)$ \Comment{Receive the new state} + \State $Q(s, a) \gets (1 - \alpha) \cdot Q(s, a) + \alpha \cdot (r + \gamma \cdot \max_{a'} Q(s, a'))$ + \State $M(s, a) \gets (s', r)$ + \For{$i$ in range $1, \dots, N$} + \State Select $(\tilde{s}, \tilde{a}) \in \mathcal{X} \times \mathcal{A}$ arbitrarily + \State $(s', r) \gets M(\tilde{x}, \tilde{a})$ + \State $Q(\tilde{s}, \tilde{a}) \gets (1 - \alpha) \cdot Q(\tilde{s}, \tilde{a}) + \alpha \cdot (r + \gamma \cdot \max_{a'} Q(s', a'))$ + \EndFor + \State Calculate $\pi$ based on $Q$ (e.g. $\varepsilon$-greedy) + \EndWhile + \Return $Q$ + \EndProcedure + \end{algorithmic} + \caption{Dyna-Q: Learn function $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$} + \label{alg:dyna-q} + \end{algorithm} +\end{preview} +\end{document}