1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

When parsing Metadata, attempt to remove "junk" before the first tag (PR 10398 follow-up)

This will allow the Metadata to be successfully extracted from the PDF file in issue 10395.
Furthermore, this patch also fixes a bug in `Metadata.get` which causes the method to return `null` rather than an empty string or zero (since either ought to be allowed).
This commit is contained in:
Jonas Jenwald 2019-01-16 12:37:21 +01:00
parent 5d90224409
commit 9f45f8dfda
2 changed files with 25 additions and 3 deletions

View file

@ -156,7 +156,26 @@ describe('metadata', function() {
'</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
const metadata = new Metadata(data);
expect(isEmptyObj(metadata.getAll())).toEqual(true);
expect(metadata.has('dc:title')).toBeTruthy();
expect(metadata.has('dc:qux')).toBeFalsy();
expect(metadata.get('dc:title')).toEqual('');
expect(metadata.get('dc:qux')).toEqual(null);
expect(metadata.getAll()).toEqual({
'dc:creator': '',
'dc:description': '',
'dc:format': 'application/pdf',
'dc:subject': '',
'dc:title': '',
'pdf:keywords': '',
'pdf:pdfversion': '1.7',
'pdf:producer': 'PDFKit.NET 4.0.102.0',
'xap:createdate': '2018-12-27T13:50:36-08:00',
'xap:creatortool': '',
'xap:metadatadate': '2018-12-27T13:50:38-08:00',
'xap:modifydate': '2018-12-27T13:50:38-08:00',
});
});
it('should correctly handle metadata containing "&apos" (issue 10407)',