diff --git a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png index a07805c..9ece999 100644 Binary files a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png and b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.png differ diff --git a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex index 7ded37c..aabf610 100644 --- a/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex +++ b/source-code/Pseudocode/Policy-Iteration/Policy-Iteration.tex @@ -37,9 +37,10 @@ \State $\pi(x) \gets \arg \min_a \{Q(x, a)\}$ \EndFor \EndWhile + \Return $\pi$ \EndProcedure \end{algorithmic} - \caption{Policy Iteration} + \caption{Policy Iteration: Learning a policy $\pi: \mathcal{X} \rightarrow \mathcal{A}$} \label{alg:policy-iteration} \end{algorithm} \end{preview}