diff --git a/src/obj.js b/src/obj.js index 3432ac68d..acc9e1284 100644 --- a/src/obj.js +++ b/src/obj.js @@ -140,7 +140,12 @@ var Catalog = (function CatalogClosure() { if (isName(type) && isName(subtype) && type.name === 'Metadata' && subtype.name === 'XML') { - metadata = stringToPDFString(bytesToString(stream.getBytes())); + // XXX: This should examine the charset the XML document defines, + // however since there are currently no real means to decode + // arbitrary charsets, let's just hope that the author of the PDF + // was reasonable enough to stick with the XML default charset, + // which is UTF-8. + metadata = stringToUTF8String(bytesToString(stream.getBytes())); } } diff --git a/src/util.js b/src/util.js index 90e6cee5d..fe5d895e3 100644 --- a/src/util.js +++ b/src/util.js @@ -302,6 +302,10 @@ function stringToPDFString(str) { return str2; } +function stringToUTF8String(str) { + return decodeURIComponent(escape(str)); +} + function isBool(v) { return typeof v == 'boolean'; }