diff --git a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png index 9d73b6a..a07805c 100644 Binary files a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png and b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png differ diff --git a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex index 74434df..7ded37c 100644 --- a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex +++ b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex @@ -22,7 +22,8 @@ \Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$ \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$ \Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ - \Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$} + \Statex Transition probabilities $f$ + \Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$, $f$} \State Initialize $\pi$ arbitrarily \While{$\pi$ is not converged} \State $J \gets$ solve system of linear equations $I - \alpha \cdot F(\pi) \cdot J = g(\pi)$ diff --git a/source-code/Pseudocode/Value-Iteration/Value-Iteration.png b/source-code/Pseudocode/Value-Iteration/Value-Iteration.png index 2000f94..e1bbc29 100644 Binary files a/source-code/Pseudocode/Value-Iteration/Value-Iteration.png and b/source-code/Pseudocode/Value-Iteration/Value-Iteration.png differ diff --git a/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex b/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex index 2124523..e8aac78 100644 --- a/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex +++ b/source-code/Pseudocode/Value-Iteration/Value-Iteration.tex @@ -22,9 +22,9 @@ \Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$ \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$ \Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ - % TODO: g? \alpha? - % f_ij(a) - \Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$} + \Statex Transition probabilities $f$ + % TODO: \alpha? + \Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$} \State Initialize $J, J': \mathcal{X} \rightarrow \mathbb{R}_0^+$ arbitrarily \While{$J$ is not converged} \State $J' \gets J$ @@ -39,7 +39,7 @@ \EndWhile \EndProcedure \end{algorithmic} - \caption{Calculate value function} + \caption{Value Iteration} \label{alg:value-iteration} \end{algorithm} \end{preview}