Improve pseudocode
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 29 KiB |
|
@ -22,8 +22,9 @@
|
|||
\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$
|
||||
\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
|
||||
\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
|
||||
\Statex Transition probabilities $f$
|
||||
\Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$, $f$}
|
||||
\Statex Transition probabilities $f$, $F$
|
||||
\Statex $\alpha \in (0, 1)$
|
||||
\Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$, $f$, $F$, $\alpha$}
|
||||
\State Initialize $\pi$ arbitrarily
|
||||
\While{$\pi$ is not converged}
|
||||
\State $J \gets$ solve system of linear equations $(I - \alpha \cdot F(\pi)) \cdot J = g(\pi)$
|
||||
|
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 29 KiB |
|
@ -22,7 +22,7 @@
|
|||
\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$
|
||||
\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
|
||||
\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
|
||||
\Statex Transition probabilities $f$
|
||||
\Statex Transition probabilities $f_{xy}(a) = \mathbb{P}(y | x, a)$
|
||||
\Statex Discounting factor $\alpha \in (0, 1)$, typically $\alpha = 0.9$
|
||||
\Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$, $\alpha$}
|
||||
\State Initialize $J, J': \mathcal{X} \rightarrow \mathbb{R}_0^+$ arbitrarily
|
||||
|
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 26 KiB |
|
@ -22,7 +22,7 @@
|
|||
\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$
|
||||
\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
|
||||
\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
|
||||
\Statex Horizon $N$
|
||||
\Statex Horizon $N \in \mathbb{N}_{\geq 1}$
|
||||
\Statex Discounting factor $\alpha \in [0, 1]$
|
||||
\Procedure{DynamicProgramming}{$\mathcal{X}$, $A$, $g$, $N$, $\alpha$}
|
||||
\State $J_N(x) \gets g_N(x) \quad \forall x \in \mathcal{X}$
|
||||
|
@ -36,10 +36,11 @@
|
|||
\State $\pi_k(x) \gets \arg \min_a (Q_k(x, a))$
|
||||
\EndFor
|
||||
\EndFor
|
||||
\Return $\pi_{0:N-1}$
|
||||
\EndProcedure
|
||||
\end{algorithmic}
|
||||
\caption{Dynamic Programming}
|
||||
\label{alg:dynamic-programming}
|
||||
\label{alg:dynamic-programming: Learn a strategy}
|
||||
\end{algorithm}
|
||||
\end{preview}
|
||||
\end{document}
|
||||
|
|
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 41 KiB |
|
@ -43,14 +43,13 @@
|
|||
\State $u \gets d_v + g_{vt}$
|
||||
\EndIf
|
||||
\EndIf
|
||||
\If{$d_c + m_c < u$}
|
||||
\State $u \gets d_c + m_c$
|
||||
\EndIf
|
||||
\State $u \gets \min (u, d_c + m_c)$
|
||||
\EndFor
|
||||
\EndWhile
|
||||
\Return $u, t$
|
||||
\EndProcedure
|
||||
\end{algorithmic}
|
||||
\caption{Label correction algorithm}
|
||||
\caption{Label correction algorithm: Find shortest path}
|
||||
\label{alg:label-correction-algorithm}
|
||||
\end{algorithm}
|
||||
\end{preview}
|
||||
|
|
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 41 KiB |
|
@ -33,7 +33,7 @@
|
|||
\While{$s$ is not terminal}
|
||||
\State Calculate $\pi$ according to Q and exploration strategy (e.g. $\pi(x) \gets \argmax_{a} Q(x, a)$)
|
||||
\State $a \gets \pi(s)$
|
||||
\State $r \gets R(s, a)$
|
||||
\State $r \gets R(s, a)$ \Comment{Receive the reward}
|
||||
\State $s' \gets T(s, a)$ \Comment{Receive the new state}
|
||||
\State $Q(s', a) \gets (1 - \alpha) \cdot Q(s, a) + \alpha \cdot (r + \gamma \cdot \max_{a'} Q(s', a'))$
|
||||
\State $s \gets s'$
|
||||
|
|