1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

Replace DOMParser with SimpleXMLParser

The `DOMParser` is most likely overkill and may be less secure.
Moreover, it is not supported in Node.js environments.

This patch replaces the `DOMParser` with a simple XML parser. This
should be faster and gives us Node.js support for free. The simple XML
parser is a port of the one that existed in the examples folder with a
small regex fix to make the parsing work correctly.

The unit tests are extended for increased test coverage of the metadata
code. The new method `getAll` is provided so the example does not have
to access internal properties of the object anymore.
This commit is contained in:
Tim van der Meij 2017-09-13 23:37:51 +02:00
parent bc9afdf3c4
commit d4309614f9
No known key found for this signature in database
GPG key ID: 8C3FD2925A5F2762
5 changed files with 179 additions and 128 deletions

View file

@ -16,15 +16,37 @@
import { Metadata } from '../../src/display/metadata';
describe('metadata', function() {
describe('incorrect_xmp', function() {
it('should fix the incorrect XMP data', function() {
var invalidXMP = '<x:xmpmeta xmlns:x=\'adobe:ns:meta/\'>' +
'<rdf:RDF xmlns:rdf=\'http://www.w3.org/1999/02/22-rdf-syntax-ns#\'>' +
'<rdf:Description xmlns:dc=\'http://purl.org/dc/elements/1.1/\'>' +
'<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>' +
'</rdf:Description></rdf:RDF></x:xmpmeta>';
var meta = new Metadata(invalidXMP);
expect(meta.get('dc:title')).toEqual('PDF&');
});
it('should handle valid metadata', function() {
var validData = '<x:xmpmeta xmlns:x=\'adobe:ns:meta/\'>' +
'<rdf:RDF xmlns:rdf=\'http://www.w3.org/1999/02/22-rdf-syntax-ns#\'>' +
'<rdf:Description xmlns:dc=\'http://purl.org/dc/elements/1.1/\'>' +
'<dc:title><rdf:Alt><rdf:li xml:lang="x-default">Foo bar baz</rdf:li>' +
'</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>';
var metadata = new Metadata(validData);
expect(metadata.has('dc:title')).toBeTruthy();
expect(metadata.has('dc:qux')).toBeFalsy();
expect(metadata.get('dc:title')).toEqual('Foo bar baz');
expect(metadata.get('dc:qux')).toEqual(null);
expect(metadata.getAll()).toEqual({ 'dc:title': 'Foo bar baz', });
});
it('should repair and handle invalid metadata', function() {
var invalidData = '<x:xmpmeta xmlns:x=\'adobe:ns:meta/\'>' +
'<rdf:RDF xmlns:rdf=\'http://www.w3.org/1999/02/22-rdf-syntax-ns#\'>' +
'<rdf:Description xmlns:dc=\'http://purl.org/dc/elements/1.1/\'>' +
'<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>' +
'</rdf:Description></rdf:RDF></x:xmpmeta>';
var metadata = new Metadata(invalidData);
expect(metadata.has('dc:title')).toBeTruthy();
expect(metadata.has('dc:qux')).toBeFalsy();
expect(metadata.get('dc:title')).toEqual('PDF&');
expect(metadata.get('dc:qux')).toEqual(null);
expect(metadata.getAll()).toEqual({ 'dc:title': 'PDF&', });
});
});