diff --git a/source-code/Pseudocode/sarsa/Makefile b/source-code/Pseudocode/sarsa/Makefile new file mode 100644 index 0000000..fa17354 --- /dev/null +++ b/source-code/Pseudocode/sarsa/Makefile @@ -0,0 +1,36 @@ +SOURCE = sarsa +DELAY = 80 +DENSITY = 300 +WIDTH = 512 + +make: + pdflatex $(SOURCE).tex -output-format=pdf + pdflatex $(SOURCE).tex -output-format=pdf + make clean + +clean: + rm -rf $(TARGET) *.class *.html *.log *.aux *.data *.gnuplot + +gif: + pdfcrop $(SOURCE).pdf + convert -verbose -delay $(DELAY) -loop 0 -density $(DENSITY) $(SOURCE)-crop.pdf $(SOURCE).gif + make clean + +png: + make + make svg + inkscape $(SOURCE).svg -w $(WIDTH) --export-png=$(SOURCE).png + +transparentGif: + convert $(SOURCE).pdf -transparent white result.gif + make clean + +svg: + make + #inkscape $(SOURCE).pdf --export-plain-svg=$(SOURCE).svg + pdf2svg $(SOURCE).pdf $(SOURCE).svg + # Necessary, as pdf2svg does not always create valid svgs: + inkscape $(SOURCE).svg --export-plain-svg=$(SOURCE).svg + rsvg-convert -a -w $(WIDTH) -f svg $(SOURCE).svg -o $(SOURCE)2.svg + inkscape $(SOURCE)2.svg --export-plain-svg=$(SOURCE).svg + rm $(SOURCE)2.svg diff --git a/source-code/Pseudocode/sarsa/README.md b/source-code/Pseudocode/sarsa/README.md new file mode 100644 index 0000000..8296a6c --- /dev/null +++ b/source-code/Pseudocode/sarsa/README.md @@ -0,0 +1,3 @@ +Compiled example +---------------- +![Example](sarsa.png) diff --git a/source-code/Pseudocode/sarsa/sarsa.png b/source-code/Pseudocode/sarsa/sarsa.png new file mode 100644 index 0000000..8f41064 Binary files /dev/null and b/source-code/Pseudocode/sarsa/sarsa.png differ diff --git a/source-code/Pseudocode/sarsa/sarsa.tex b/source-code/Pseudocode/sarsa/sarsa.tex new file mode 100644 index 0000000..9ee3a20 --- /dev/null +++ b/source-code/Pseudocode/sarsa/sarsa.tex @@ -0,0 +1,50 @@ +\documentclass{article} +\usepackage[pdftex,active,tightpage]{preview} +\setlength\PreviewBorder{2mm} + +\usepackage[utf8]{inputenc} % this is needed for umlauts +\usepackage[ngerman]{babel} % this is needed for umlauts +\usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf +\usepackage{amssymb,amsmath,amsfonts} % nice math rendering +\usepackage{braket} % needed for \Set +\usepackage{caption} +\usepackage{algorithm} +\usepackage[noend]{algpseudocode} + +\DeclareCaptionFormat{myformat}{#3} +\captionsetup[algorithm]{format=myformat} + +\begin{document} +\begin{preview} + \begin{algorithm}[H] + \begin{algorithmic} + \Require + \Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$ + \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$ + \Statex Reward function $R: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ + \Statex Black-box (probabilistic) transition function $T: \mathcal{X} \times \mathcal{A} \rightarrow \mathcal{X}$ + \Statex Learning rate $\alpha \in [0, 1]$, typically $\alpha = 0.1$ + \Statex Discounting factor $\gamma \in [0, 1]$ + \Statex $\lambda \in [0, 1]$: Trade-off between TD and MC + \Procedure{SARSA}{$\mathcal{X}$, $A$, $R$, $T$, $\alpha$, $\gamma$, $\lambda$} + \State Initialize $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ arbitrarily + \While{$Q$ is not converged} + \State Select $(s, a) \in \mathcal{X} \times \mathcal{A}$ arbitrarily + \While{$s$ is not terminal} + \State $r \gets R(s, a)$ \Comment{Receive the reward} + \State $s' \gets T(s, a)$ \Comment{Receive the new state} + \State Calculate $\pi$ based on $Q$ (e.g. epsilon-greedy) + \State $a' \gets \pi(s')$ + \State $Q(s, a) \gets (1 - \alpha ) \cdot Q(s, a) + \alpha \cdot (r + \gamma Q(s', a'))$ + \State $s \gets s'$ + \State $a \gets a'$ + \EndWhile + \EndWhile + \Return $Q$ + \EndProcedure + \end{algorithmic} + \caption{SARSA: Learn function $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$} + \label{alg:sarsa} + \end{algorithm} +\end{preview} +\end{document} \ No newline at end of file