mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 10:08:06 +02:00
Decode XML metadata as UTF-8
XML uses UTF-8 by default, which needs to be decoded to a Javascript String prior to feeding it to the DOMParser. In an ideal world, the XML would actually be analyzed and the specified charset would be used, however that does not seem feasible unless JS engines get iconv bindings. Fixes GH-1692
This commit is contained in:
parent
e9632120c4
commit
40b9be137f
2 changed files with 10 additions and 1 deletions
|
@ -140,7 +140,12 @@ var Catalog = (function CatalogClosure() {
|
|||
|
||||
if (isName(type) && isName(subtype) &&
|
||||
type.name === 'Metadata' && subtype.name === 'XML') {
|
||||
metadata = stringToPDFString(bytesToString(stream.getBytes()));
|
||||
// XXX: This should examine the charset the XML document defines,
|
||||
// however since there are currently no real means to decode
|
||||
// arbitrary charsets, let's just hope that the author of the PDF
|
||||
// was reasonable enough to stick with the XML default charset,
|
||||
// which is UTF-8.
|
||||
metadata = stringToUTF8String(bytesToString(stream.getBytes()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue