2
0
Fork 0
mirror of https://github.com/MartinThoma/LaTeX-examples.git synced 2025-04-19 11:38:05 +02:00

Improve pseudocode

This commit is contained in:
Martin Thoma 2016-08-04 23:19:19 +02:00
parent dd9390388d
commit ddd08a2a45
10 changed files with 11 additions and 10 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 29 KiB

Before After
Before After

View file

@ -22,8 +22,9 @@
\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$
\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
\Statex Transition probabilities $f$
\Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$, $f$}
\Statex Transition probabilities $f$, $F$
\Statex $\alpha \in (0, 1)$
\Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$, $f$, $F$, $\alpha$}
\State Initialize $\pi$ arbitrarily
\While{$\pi$ is not converged}
\State $J \gets$ solve system of linear equations $(I - \alpha \cdot F(\pi)) \cdot J = g(\pi)$

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 29 KiB

Before After
Before After

View file

@ -22,7 +22,7 @@
\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$
\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
\Statex Transition probabilities $f$
\Statex Transition probabilities $f_{xy}(a) = \mathbb{P}(y | x, a)$
\Statex Discounting factor $\alpha \in (0, 1)$, typically $\alpha = 0.9$
\Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$, $\alpha$}
\State Initialize $J, J': \mathcal{X} \rightarrow \mathbb{R}_0^+$ arbitrarily

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Before After
Before After

View file

@ -22,7 +22,7 @@
\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$
\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
\Statex Horizon $N$
\Statex Horizon $N \in \mathbb{N}_{\geq 1}$
\Statex Discounting factor $\alpha \in [0, 1]$
\Procedure{DynamicProgramming}{$\mathcal{X}$, $A$, $g$, $N$, $\alpha$}
\State $J_N(x) \gets g_N(x) \quad \forall x \in \mathcal{X}$
@ -36,10 +36,11 @@
\State $\pi_k(x) \gets \arg \min_a (Q_k(x, a))$
\EndFor
\EndFor
\Return $\pi_{0:N-1}$
\EndProcedure
\end{algorithmic}
\caption{Dynamic Programming}
\label{alg:dynamic-programming}
\label{alg:dynamic-programming: Learn a strategy}
\end{algorithm}
\end{preview}
\end{document}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

After

Width:  |  Height:  |  Size: 41 KiB

Before After
Before After

View file

@ -43,14 +43,13 @@
\State $u \gets d_v + g_{vt}$
\EndIf
\EndIf
\If{$d_c + m_c < u$}
\State $u \gets d_c + m_c$
\EndIf
\State $u \gets \min (u, d_c + m_c)$
\EndFor
\EndWhile
\Return $u, t$
\EndProcedure
\end{algorithmic}
\caption{Label correction algorithm}
\caption{Label correction algorithm: Find shortest path}
\label{alg:label-correction-algorithm}
\end{algorithm}
\end{preview}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 41 KiB

Before After
Before After

View file

@ -33,7 +33,7 @@
\While{$s$ is not terminal}
\State Calculate $\pi$ according to Q and exploration strategy (e.g. $\pi(x) \gets \argmax_{a} Q(x, a)$)
\State $a \gets \pi(s)$
\State $r \gets R(s, a)$
\State $r \gets R(s, a)$ \Comment{Receive the reward}
\State $s' \gets T(s, a)$ \Comment{Receive the new state}
\State $Q(s', a) \gets (1 - \alpha) \cdot Q(s, a) + \alpha \cdot (r + \gamma \cdot \max_{a'} Q(s', a'))$
\State $s \gets s'$