1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-26 10:08:06 +02:00

Replace DOMParser with SimpleXMLParser

The `DOMParser` is most likely overkill and may be less secure.
Moreover, it is not supported in Node.js environments.

This patch replaces the `DOMParser` with a simple XML parser. This
should be faster and gives us Node.js support for free. The simple XML
parser is a port of the one that existed in the examples folder with a
small regex fix to make the parsing work correctly.

The unit tests are extended for increased test coverage of the metadata
code. The new method `getAll` is provided so the example does not have
to access internal properties of the object anymore.
This commit is contained in:
Tim van der Meij 2017-09-13 23:37:51 +02:00
parent bc9afdf3c4
commit d4309614f9
No known key found for this signature in database
GPG key ID: 8C3FD2925A5F2762
5 changed files with 179 additions and 128 deletions

View file

@ -9,9 +9,6 @@
var fs = require('fs');
// HACK adding DOMParser to read XMP metadata.
global.DOMParser = require('./domparsermock.js').DOMParserMock;
// Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
var pdfjsLib = require('pdfjs-dist');
@ -34,7 +31,7 @@ pdfjsLib.getDocument(pdfPath).then(function (doc) {
console.log();
if (data.metadata) {
console.log('## Metadata');
console.log(JSON.stringify(data.metadata.metadata, null, 2));
console.log(JSON.stringify(data.metadata.getAll(), null, 2));
console.log();
}
});