\documentclass{article} \usepackage[pdftex,active,tightpage]{preview} \setlength\PreviewBorder{2mm} \usepackage[utf8]{inputenc} % this is needed for umlauts \usepackage[ngerman]{babel} % this is needed for umlauts \usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf \usepackage{amssymb,amsmath,amsfonts} % nice math rendering \usepackage{braket} % needed for \Set \usepackage{caption} \usepackage{algorithm} \usepackage[noend]{algpseudocode} \DeclareCaptionFormat{myformat}{#3} \captionsetup[algorithm]{format=myformat} \begin{document} \begin{preview} \begin{algorithm}[H] \begin{algorithmic} \Require \Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$ \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$ \Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ \Statex Transition probabilities $f$ \Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$, $f$} \State Initialize $\pi$ arbitrarily \While{$\pi$ is not converged} \State $J \gets$ solve system of linear equations $(I - \alpha \cdot F(\pi)) \cdot J = g(\pi)$ \For{$x \in \mathcal{X}$} \For{$a \in A(x)$} \State $Q(x, a) \gets g(x, a) + \alpha \sum_{j=1}^{n_x} f_{xj}(a) \cdot J(j)$ \EndFor \EndFor \For{$x \in \mathcal{X}$} \State $\pi(x) \gets \arg \min_a \{Q(x, a)\}$ \EndFor \EndWhile \Return $\pi$ \EndProcedure \end{algorithmic} \caption{Policy Iteration: Learning a policy $\pi: \mathcal{X} \rightarrow \mathcal{A}$} \label{alg:policy-iteration} \end{algorithm} \end{preview} \end{document}