Replace DOMParser with SimpleXMLParser

The `DOMParser` is most likely overkill and may be less secure. Moreover, it is not supported in Node.js environments. This patch replaces the `DOMParser` with a simple XML parser. This should be faster and gives us Node.js support for free. The simple XML parser is a port of the one that existed in the examples folder with a small regex fix to make the parsing work correctly. The unit tests are extended for increased test coverage of the metadata code. The new method `getAll` is provided so the example does not have to access internal properties of the object anymore.
2025-04-26 10:08:06 +02:00 · 2017-09-13 23:37:51 +02:00 · 2017-09-13 23:37:51 +02:00 · d4309614f9
commit d4309614f9
parent bc9afdf3c4
5 changed files with 179 additions and 128 deletions
--- a/examples/node/getinfo.js
+++ b/examples/node/getinfo.js
@ -9,9 +9,6 @@

 var fs = require('fs');

-// HACK adding DOMParser to read XMP metadata.
-global.DOMParser = require('./domparsermock.js').DOMParserMock;
-
 // Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
 var pdfjsLib = require('pdfjs-dist');

@ -34,7 +31,7 @@ pdfjsLib.getDocument(pdfPath).then(function (doc) {
    console.log();
    if (data.metadata) {
      console.log('## Metadata');
-      console.log(JSON.stringify(data.metadata.metadata, null, 2));
+      console.log(JSON.stringify(data.metadata.getAll(), null, 2));
      console.log();
    }
  });