2
0
Fork 0
mirror of https://github.com/MartinThoma/LaTeX-examples.git synced 2025-04-25 22:38:04 +02:00

Fix pseudocode

This commit is contained in:
Martin Thoma 2016-07-23 13:49:40 +02:00
parent c0bbfa6811
commit 578245c784
4 changed files with 11 additions and 9 deletions

View file

@ -27,13 +27,15 @@
\Statex Discounting factor $\gamma \in [0, 1]$
\Procedure{QLearning}{$\mathcal{X}$, $A$, $R$, $T$, $\alpha$, $\gamma$}
\State Initialize $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ arbitrarily
\State Start in state $s \in \mathcal{X}$
\While{$Q$ is not converged}
\State Select $a \in \mathcal{A}$ by $Q$ and an exploration policy (e.g. $\varepsilon$ greedy)
\State $r \gets R(s, a)$
\State $s' \gets T(s, a)$ \Comment{Receive the new state}
\State $Q(s', a) \gets (1 - \alpha) \cdot Q(s, a) + \alpha \cdot (r + \gamma \cdot \max_{a'} Q(s', a'))$
\State $s \gets s'$
\State Start in state $s \in \mathcal{X}$
\While{$s$ is not terminal}
\State Select $a \in \mathcal{A}$ by $Q$ and an exploration policy (e.g. $\varepsilon$ greedy)
\State $r \gets R(s, a)$
\State $s' \gets T(s, a)$ \Comment{Receive the new state}
\State $Q(s', a) \gets (1 - \alpha) \cdot Q(s, a) + \alpha \cdot (r + \gamma \cdot \max_{a'} Q(s', a'))$
\State $s \gets s'$
\EndWhile
\EndWhile
\Return $Q$
\EndProcedure