From 3351d3476d1f3bf6ff9098d267fbba0ff4f3257a Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 16 Oct 2020 12:20:44 +0200 Subject: [PATCH] Don't store complex data in `PDFDocument.formInfo`, and replace the `fields` object with a `hasFields` boolean instead *This patch is based on a couple of smaller things that I noticed when working on PR 12479.* - Don't store the /Fields on the `formInfo` getter, since that feels like overloading it with unintended (and too complex) data, and utilize a `hasFields` boolean instead. This functionality was originally added in PR 12271, to help determine what kind of form data a PDF document contains, and I think that we should ensure that the return value of `formInfo` only consists of "simple" data. With these changes the `fieldObjects` getter instead has to look-up the /Fields manually, however that shouldn't be a problem since the access is guarded by a `formInfo.hasFields` check which ensures that the data both exists and is valid. Furthermore, most documents doesn't even have any /AcroForm data anyway. - Determine the `hasFields` property *first*, to ensure that it's always correct even if there's errors when checking e.g. the /XFA or /SigFlags entires, since the `fieldObjects` getter depends on it. - Simplify a loop in `fieldObjects`, since the object being accessed is a `Map` and those have built-in iteration support. - Use a higher logging level for errors in the `formInfo` getter, and include the actual error message, since that'd have helped with fixing PR 12479 a lot quicker. - Update the JSDoc comment in `src/display/api.js` to list the return values correctly, and also slightly extend/improve the description. --- src/core/document.js | 23 ++++++++++------------- src/display/api.js | 5 +++-- test/unit/document_spec.js | 18 +++++++++--------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index ee194660a..8702ec21d 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -708,20 +708,23 @@ class PDFDocument { } get formInfo() { - const formInfo = { hasAcroForm: false, hasXfa: false, fields: null }; + const formInfo = { hasFields: false, hasAcroForm: false, hasXfa: false }; const acroForm = this.catalog.acroForm; if (!acroForm) { return shadow(this, "formInfo", formInfo); } try { + const fields = acroForm.get("Fields"); + const hasFields = Array.isArray(fields) && fields.length > 0; + formInfo.hasFields = hasFields; // Used by the `fieldObjects` getter. + // The document contains XFA data if the `XFA` entry is a non-empty // array or stream. const xfa = acroForm.get("XFA"); - const hasXfa = + formInfo.hasXfa = (Array.isArray(xfa) && xfa.length > 0) || (isStream(xfa) && !xfa.isEmpty); - formInfo.hasXfa = hasXfa; // The document contains AcroForm data if the `Fields` entry is a // non-empty array and it doesn't consist of only document signatures. @@ -730,20 +733,15 @@ class PDFDocument { // store (invisible) document signatures. This can be detected using // the first bit of the `SigFlags` integer (see Table 219 in the // specification). - const fields = acroForm.get("Fields"); - const hasFields = Array.isArray(fields) && fields.length > 0; const sigFlags = acroForm.get("SigFlags"); const hasOnlyDocumentSignatures = !!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields); formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures; - if (hasFields) { - formInfo.fields = fields; - } } catch (ex) { if (ex instanceof MissingDataException) { throw ex; } - info("Cannot fetch form information."); + warn(`Cannot fetch form information: "${ex}".`); } return shadow(this, "formInfo", formInfo); } @@ -976,19 +974,18 @@ class PDFDocument { } get fieldObjects() { - const formInfo = this.formInfo; - if (!formInfo.fields) { + if (!this.formInfo.hasFields) { return shadow(this, "fieldObjects", Promise.resolve(null)); } const allFields = Object.create(null); const fieldPromises = new Map(); - for (const fieldRef of formInfo.fields) { + for (const fieldRef of this.catalog.acroForm.get("Fields")) { this._collectFieldObjects("", fieldRef, fieldPromises); } const allPromises = []; - for (const [name, promises] of fieldPromises.entries()) { + for (const [name, promises] of fieldPromises) { allPromises.push( Promise.all(promises).then(fields => { fields = fields.filter(field => field !== null); diff --git a/src/display/api.js b/src/display/api.js index 335e050d1..d8ab4ec64 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -878,8 +878,9 @@ class PDFDocumentProxy { } /** - * @returns {Promise>} A promise that is resolved with an - * {Array} containing field data for the JS sandbox. + * @returns {Promise | null>} A promise that is resolved with an + * {Array} containing /AcroForm field data for the JS sandbox, + * or `null` when no field data is present in the PDF file. */ getFieldObjects() { return this._transport.getFieldObjects(); diff --git a/test/unit/document_spec.js b/test/unit/document_spec.js index 5224cbd16..be58d85c9 100644 --- a/test/unit/document_spec.js +++ b/test/unit/document_spec.js @@ -64,7 +64,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: false, hasXfa: false, - fields: null, + hasFields: false, }); }); @@ -77,7 +77,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: false, hasXfa: false, - fields: null, + hasFields: false, }); acroForm.set("XFA", ["foo", "bar"]); @@ -85,7 +85,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: false, hasXfa: true, - fields: null, + hasFields: false, }); acroForm.set("XFA", new StringStream("")); @@ -93,7 +93,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: false, hasXfa: false, - fields: null, + hasFields: false, }); acroForm.set("XFA", new StringStream("non-empty")); @@ -101,7 +101,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: false, hasXfa: true, - fields: null, + hasFields: false, }); }); @@ -114,7 +114,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: false, hasXfa: false, - fields: null, + hasFields: false, }); acroForm.set("Fields", ["foo", "bar"]); @@ -122,7 +122,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: true, hasXfa: false, - fields: ["foo", "bar"], + hasFields: true, }); // If the first bit of the `SigFlags` entry is set and the `Fields` array @@ -133,7 +133,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: true, hasXfa: false, - fields: ["foo", "bar"], + hasFields: true, }); const annotationDict = new Dict(); @@ -156,7 +156,7 @@ describe("document", function () { expect(pdfDocument.formInfo).toEqual({ hasAcroForm: false, hasXfa: false, - fields: [kidsRef], + hasFields: true, }); }); });