1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-21 23:58:07 +02:00

Move the Metadata parsing to the worker-thread

The only reason, as far as I can tell, for parsing the Metadata on the main-thread is how it was originally implemented. When Metadata support was first implemented, it utilized the [`DOMParser`](https://developer.mozilla.org/en-US/docs/Web/API/DOMParser) which isn't available in workers.
Today, with the custom XML-parser being used, that's no longer an issue and it seems reasonable to move the Metadata parsing to the worker-thread[1], since that's where all parsing should happen (for performance reasons).

Based on these changes, we'll be able to reduce the now unnecessary duplication of the XML-parser (and related code) in both of the *built* `pdf.js`/`pdf.worker.js` files.

Finally, this patch changes the `_repair` method to use "Array + join" rather than string concatenation.

---
[1] This needed the previous patch, to enable sending of `Map`s between threads with workers disabled.
This commit is contained in:
Jonas Jenwald 2021-02-16 14:13:39 +01:00
parent 73bf45e64b
commit cc3a6563ee
6 changed files with 177 additions and 142 deletions

View file

@ -15,6 +15,12 @@
import { isEmptyObj } from "./test_utils.js";
import { Metadata } from "../../src/display/metadata.js";
import { MetadataParser } from "../../src/core/metadata_parser.js";
function createMetadata(data) {
const metadataParser = new MetadataParser(data);
return new Metadata(metadataParser.serializable);
}
describe("metadata", function () {
it("should handle valid metadata", function () {
@ -24,7 +30,7 @@ describe("metadata", function () {
"<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
'<dc:title><rdf:Alt><rdf:li xml:lang="x-default">Foo bar baz</rdf:li>' +
"</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(metadata.has("dc:title")).toBeTruthy();
expect(metadata.has("dc:qux")).toBeFalsy();
@ -42,7 +48,7 @@ describe("metadata", function () {
"<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
"<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>" +
"</rdf:Description></rdf:RDF></x:xmpmeta>";
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(metadata.has("dc:title")).toBeTruthy();
expect(metadata.has("dc:qux")).toBeFalsy();
@ -85,7 +91,7 @@ describe("metadata", function () {
"<dc:creator><rdf:Seq><rdf:li>\\376\\377\\000O\\000D\\000I\\000S" +
"</rdf:li></rdf:Seq></dc:creator></rdf:Description></rdf:RDF>" +
"</x:xmpmeta>";
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(metadata.has("dc:title")).toBeTruthy();
expect(metadata.has("dc:qux")).toBeFalsy();
@ -128,7 +134,7 @@ describe("metadata", function () {
"</rdf:RDF>" +
"</x:xmpmeta>" +
'<?xpacket end="w"?>';
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(isEmptyObj(metadata.getAll())).toEqual(true);
});
@ -159,7 +165,7 @@ describe("metadata", function () {
'<dc:title><rdf:Alt><rdf:li xml:lang="x-default"></rdf:li>' +
"</rdf:Alt></dc:title><dc:format>application/pdf</dc:format>" +
'</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(metadata.has("dc:title")).toBeTruthy();
expect(metadata.has("dc:qux")).toBeFalsy();
@ -191,7 +197,7 @@ describe("metadata", function () {
"<dc:title><rdf:Alt>" +
'<rdf:li xml:lang="x-default">&apos;Foo bar baz&apos;</rdf:li>' +
"</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(metadata.has("dc:title")).toBeTruthy();
expect(metadata.has("dc:qux")).toBeFalsy();
@ -220,7 +226,7 @@ describe("metadata", function () {
"<xmpMM:DocumentID>uuid:00000000-1c84-3cf9-89ba-bef0e729c831" +
"</xmpMM:DocumentID></rdf:Description>" +
'</rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(isEmptyObj(metadata.getAll())).toEqual(true);
});
@ -249,7 +255,7 @@ describe("metadata", function () {
" </dc:title>" +
" </rdf:Description>" +
"</rdf:RDF>";
const metadata = new Metadata(data);
const metadata = createMetadata(data);
expect(metadata.has("dc:title")).toBeTruthy();
expect(metadata.has("dc:qux")).toBeFalsy();