#!/usr/bin/env python def wer(r, h): """ Calculation of WER with Levenshtein distance. Works only for iterables up to 254 elements (uint8). O(nm) time ans space complexity. >>> wer("who is there".split(), "is there".split()) 1 >>> wer("who is there".split(), "".split()) 3 >>> wer("".split(), "who is there".split()) 3 """ # initialisation import numpy d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8) d = d.reshape((len(r)+1, len(h)+1)) for i in range(len(r)+1): for j in range(len(h)+1): if i == 0: d[0][j] = j elif j == 0: d[i][0] = i # computation for i in range(1, len(r)+1): for j in range(1, len(h)+1): if r[i-1] == h[j-1]: d[i][j] = d[i-1][j-1] else: substitution = d[i-1][j-1] + 1 insertion = d[i][j-1] + 1 deletion = d[i-1][j] + 1 d[i][j] = min(substitution, insertion, deletion) return d[len(r)][len(h)] if __name__ == "__main__": import doctest doctest.testmod()