From 4aa27cc645b7c88e23f644ca258fa933c845891b Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 12:02:16 +0200 Subject: [PATCH 1/2] Re-factor `Catalog._collectJavaScript` to use a `Map` rather than an Object Given that this only an internal helper method, used by the `Catalog.{javaScript, jsActions}` getters, this change simplifies iteration of the returned data. We can also (slightly) re-factor the code of the `jsActions` getter, and remove an obsolete[1] JSDoc-comment from the `openAction` getter. --- [1] Not really relevant now that we've got proper scripting support. --- src/core/document.js | 1 - src/core/obj.js | 22 +++++++++------------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 27cbede28..b67834c96 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -517,7 +517,6 @@ class Page { this.pageDict, PageActionEventType ); - return shadow(this, "jsActions", actions); } } diff --git a/src/core/obj.js b/src/core/obj.js index 717404fcb..c5b2c7b82 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -856,9 +856,6 @@ class Catalog { return shadow(this, "viewerPreferences", prefs); } - /** - * NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below. - */ get openAction() { const obj = this._catDict.get("OpenAction"); const openAction = Object.create(null); @@ -923,9 +920,9 @@ class Catalog { } if (javaScript === null) { - javaScript = Object.create(null); + javaScript = new Map(); } - javaScript[name] = stringToPDFString(js); + javaScript.set(name, stringToPDFString(js)); } if (obj && obj.has("JavaScript")) { @@ -955,23 +952,23 @@ class Catalog { return shadow( this, "javaScript", - javaScript ? Object.values(javaScript) : null + javaScript ? [...javaScript.values()] : null ); } get jsActions() { - const js = this._collectJavaScript(); + const javaScript = this._collectJavaScript(); let actions = collectActions( this.xref, this._catDict, DocumentActionEventType ); - if (!actions && js) { - actions = Object.create(null); - } - if (actions && js) { - for (const [key, val] of Object.entries(js)) { + if (javaScript) { + if (!actions) { + actions = Object.create(null); + } + for (const [key, val] of javaScript) { if (key in actions) { actions[key].push(val); } else { @@ -979,7 +976,6 @@ class Catalog { } } } - return shadow(this, "jsActions", actions); } From 2b2234fd5a789c059c334fda44573a52ee6fbb23 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 12:30:20 +0200 Subject: [PATCH 2/2] [api-minor] Ensure that `PDFDocumentProxy.hasJSActions` won't fail if `MissingDataException`s are thrown during the associated worker-thread parsing With the current implementation of `PDFDocument.hasJSActions`, in the worker-thread, we're not actually handling not-yet-loaded data correctly. This can thus fail in *two* different ways: - The `PDFDocument.fieldObjects` getter (and its helper method), while it may *return* a Promise, still fetches all of its data synchronously and it can thus throw a `MissingDataException` during parsing. - The `Catalog.jsActions` getter, which is completely synchronous, can obviously throw a `MissingDataException` during parsing. If either of these cases occur currently, the `PDFDocumentProxy.hasJSActions` method in the API can either return a *rejected* Promise (which it never should) or possibly "hang" and never resolve. *Please note:* While I've not *yet* seen this error in an actual PDF document, it can happen during loading if you're unlucky enough with e.g. the structure of the PDF document and/or the download speed offered by the server. This patch is thus based on code-inspection *and* on manually throwing a `MissingDataException` on the first access of `Catalog.jsActions` to simulate this situation. Finally, this patch adds a couple of *API* unit-tests for this (since none existed). --- src/core/document.js | 35 ++++++++++++++++++++++------------- test/unit/api_spec.js | 17 +++++++++++++++++ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index b67834c96..1e32007c1 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1182,19 +1182,28 @@ class PDFDocument { } get hasJSActions() { - return shadow( - this, - "hasJSActions", - this.fieldObjects.then(fieldObjects => { - return ( - (fieldObjects !== null && - Object.values(fieldObjects).some(fieldObject => - fieldObject.some(object => object.actions !== null) - )) || - !!this.catalog.jsActions - ); - }) - ); + const promise = this.pdfManager.ensure(this, "_parseHasJSActions"); + return shadow(this, "hasJSActions", promise); + } + + /** + * @private + */ + async _parseHasJSActions() { + const [catalogJsActions, fieldObjects] = await Promise.all([ + this.pdfManager.ensureCatalog("jsActions"), + this.pdfManager.ensure(this, "fieldObjects"), + ]); + + if (catalogJsActions) { + return true; + } + if (fieldObjects) { + return Object.values(fieldObjects).some(fieldObject => + fieldObject.some(object => object.actions !== null) + ); + } + return false; } get calculationOrderIds() { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index af9a494bf..40958ff9e 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1014,6 +1014,23 @@ describe("api", function () { .catch(done.fail); }); + it("gets hasJSActions, in document without javaScript", async function () { + const hasJSActions = await pdfDocument.hasJSActions(); + + expect(hasJSActions).toEqual(false); + }); + it("gets hasJSActions, in document with javaScript", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("doc_actions.pdf") + ); + const pdfDoc = await loadingTask.promise; + const hasJSActions = await pdfDoc.hasJSActions(); + + expect(hasJSActions).toEqual(true); + + await loadingTask.destroy(); + }); + it("gets JSActions (none)", function (done) { const promise = pdfDocument.getJSActions(); promise