Add q-learning and improve value iteration pseudocode

2025-04-26 06:48:04 +02:00 · 2016-07-14 15:20:45 +02:00 · 2016-07-14 15:20:45 +02:00 · 001350bae4
commit 001350bae4
parent 807b9268d0
6 changed files with 88 additions and 3 deletions
--- a/source-code/Pseudocode/Value-Iteration/Value-Iteration.png
+++ b/source-code/Pseudocode/Value-Iteration/Value-Iteration.png
--- a/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex
+++ b/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex
@ -23,8 +23,8 @@
        \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
        \Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
        \Statex Transition probabilities $f$
-        % TODO: \alpha?
-        \Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$}
+        \Statex Learning rate $\alpha \in [0, 1]$, typically $\alpha = 0.1$
+        \Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$, $\alpha$}
            \State Initialize $J, J': \mathcal{X} \rightarrow \mathbb{R}_0^+$ arbitrarily
            \While{$J$ is not converged}
                \State $J' \gets J$
@ -37,9 +37,10 @@
                    \State $J(x) \gets \min_a \{Q(x, a)\}$
                \EndFor
            \EndWhile
+            \Return $J$
        \EndProcedure
        \end{algorithmic}
-    \caption{Value Iteration}
+    \caption{Value Iteration: Learn function $J: \mathcal{X} \rightarrow \mathbb{R}$}
    \label{alg:value-iteration}
    \end{algorithm}
 \end{preview}