2013-11-15 13:39:25 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
2015-11-20 23:12:22 +01:00
|
|
|
|
2013-11-15 13:39:25 +01:00
|
|
|
def wer(r, h):
|
|
|
|
"""
|
2015-11-20 23:12:22 +01:00
|
|
|
Calculation of WER with Levenshtein distance.
|
|
|
|
Works only for iterables up to 254 elements (uint8).
|
|
|
|
O(nm) time ans space complexity.
|
2013-11-15 13:39:25 +01:00
|
|
|
|
2015-11-20 23:12:22 +01:00
|
|
|
>>> wer("who is there".split(), "is there".split())
|
|
|
|
1
|
|
|
|
>>> wer("who is there".split(), "".split())
|
|
|
|
3
|
|
|
|
>>> wer("".split(), "who is there".split())
|
|
|
|
3
|
2013-11-15 13:39:25 +01:00
|
|
|
"""
|
|
|
|
# initialisation
|
|
|
|
import numpy
|
|
|
|
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
|
|
|
|
d = d.reshape((len(r)+1, len(h)+1))
|
|
|
|
for i in range(len(r)+1):
|
|
|
|
for j in range(len(h)+1):
|
|
|
|
if i == 0:
|
|
|
|
d[0][j] = j
|
|
|
|
elif j == 0:
|
|
|
|
d[i][0] = i
|
|
|
|
|
|
|
|
# computation
|
|
|
|
for i in range(1, len(r)+1):
|
|
|
|
for j in range(1, len(h)+1):
|
|
|
|
if r[i-1] == h[j-1]:
|
|
|
|
d[i][j] = d[i-1][j-1]
|
|
|
|
else:
|
|
|
|
substitution = d[i-1][j-1] + 1
|
2015-11-20 23:12:22 +01:00
|
|
|
insertion = d[i][j-1] + 1
|
|
|
|
deletion = d[i-1][j] + 1
|
2013-11-15 13:39:25 +01:00
|
|
|
d[i][j] = min(substitution, insertion, deletion)
|
|
|
|
|
|
|
|
return d[len(r)][len(h)]
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import doctest
|
|
|
|
doctest.testmod()
|