From ff4dae05b0f8349b6277d45e2d282ad893a4b3af Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 11 Apr 2021 12:00:14 +0200 Subject: [PATCH 1/2] Ensure that `getStructTree` won't break with `disableAutoFetch = true` set (PR 13171 follow-up) Open http://localhost:8888/web/viewer.html?file=/test/pdfs/pdf.pdf#disableStream=true&disableAutoFetch=true and observe the following message in the console (repeated for each page of the document): ``` Uncaught (in promise) Object { message: "Missing data [19787293, 19787294)", name: "UnknownErrorException", details: "MissingDataException: Missing data [19787293, 19787294)", stack: "BaseExceptionClosure@http://localhost:8888/src/shared/util.js:458:29\n@http://localhost:8888/src/shared/util.js:462:3\n" } ``` --- src/core/document.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/core/document.js b/src/core/document.js index 11830f0b0..27cbede28 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -454,6 +454,13 @@ class Page { const structTreeRoot = await this.pdfManager.ensureCatalog( "structTreeRoot" ); + return this.pdfManager.ensure(this, "_parseStructTree", [structTreeRoot]); + } + + /** + * @private + */ + _parseStructTree(structTreeRoot) { const tree = new StructTreePage(structTreeRoot, this.pageDict); tree.parse(); return tree; From 5adee0cdd14017ca79a56ef00c4ec07659f17084 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 11 Apr 2021 12:04:29 +0200 Subject: [PATCH 2/2] [api-minor] Let `PDFPageProxy.getStructTree` return `null`, rather than an empty structTree, for documents without any accessibility data (PR 13171 follow-up) This is first of all consistent with existing API-methods, where we return `null` when the data in question doesn't exist. Secondly, it should also be (slightly) more efficient since there's less dummy-data that we need to transfer between threads. Finally, this prevents us from adding an empty/unnecessary span to *every* single page even in documents without any structure tree data. --- src/core/struct_tree.js | 4 +++ src/display/api.js | 3 +- test/unit/api_spec.js | 63 +++++++++++++++++++++++++++++++++++++++++ web/pdf_page_view.js | 3 ++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index 41587d45c..a07d99b96 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -328,6 +328,10 @@ class StructTreePage { } nodeToSerializable(child, root); } + + if (root.children.length === 0) { + return null; + } return root; } } diff --git a/src/display/api.js b/src/display/api.js index 9b5f70536..b05fd4641 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -1522,7 +1522,8 @@ class PDFPageProxy { /** * @returns {Promise} A promise that is resolved with a - * {@link StructTreeNode} object that represents the page's structure tree. + * {@link StructTreeNode} object that represents the page's structure tree, + * or `null` when no structure tree is present for the current page. */ getStructTree() { return (this._structTreePromise ||= this._transport.getStructTree( diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index cc0548335..af9a494bf 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1702,6 +1702,69 @@ describe("api", function () { .catch(done.fail); }); + it("gets empty structure tree", async function () { + const tree = await page.getStructTree(); + + expect(tree).toEqual(null); + }); + it("gets simple structure tree", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("structure_simple.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const tree = await pdfPage.getStructTree(); + + expect(tree).toEqual({ + role: "Root", + children: [ + { + role: "Document", + children: [ + { + role: "H1", + children: [ + { + role: "NonStruct", + children: [{ type: "content", id: "page2R_mcid0" }], + }, + ], + }, + { + role: "P", + children: [ + { + role: "NonStruct", + children: [{ type: "content", id: "page2R_mcid1" }], + }, + ], + }, + { + role: "H2", + children: [ + { + role: "NonStruct", + children: [{ type: "content", id: "page2R_mcid2" }], + }, + ], + }, + { + role: "P", + children: [ + { + role: "NonStruct", + children: [{ type: "content", id: "page2R_mcid3" }], + }, + ], + }, + ], + }, + ], + }); + + await loadingTask.destroy(); + }); + it("gets operator list", function (done) { const promise = page.getOperatorList(); promise diff --git a/web/pdf_page_view.js b/web/pdf_page_view.js index f83b0568d..8d6771129 100644 --- a/web/pdf_page_view.js +++ b/web/pdf_page_view.js @@ -619,6 +619,9 @@ class PDFPageView { this.eventBus._off("textlayerrendered", this._onTextLayerRendered); this._onTextLayerRendered = null; this.pdfPage.getStructTree().then(tree => { + if (!tree) { + return; + } const treeDom = this.structTreeLayer.render(tree); treeDom.classList.add("structTree"); this.canvas.appendChild(treeDom);