diff --git a/src/core/catalog.js b/src/core/catalog.js index c3777866a..54c6849ef 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -34,7 +34,6 @@ import { XRefEntryException, } from "./core_utils.js"; import { - createPromiseCapability, createValidAbsoluteUrl, DocumentActionEventType, FormatError, @@ -1091,8 +1090,7 @@ class Catalog { }); } - getPageDict(pageIndex) { - const capability = createPromiseCapability(); + async getPageDict(pageIndex) { const nodesToVisit = [this.toplevelPagesDict]; const visitedNodes = new RefSet(); @@ -1104,130 +1102,105 @@ class Catalog { pageKidsCountCache = this.pageKidsCountCache; let currentPageIndex = 0; - function next() { - while (nodesToVisit.length) { - const currentNode = nodesToVisit.pop(); + while (nodesToVisit.length) { + const currentNode = nodesToVisit.pop(); - if (currentNode instanceof Ref) { - const count = pageKidsCountCache.get(currentNode); - // Skip nodes where the page can't be. - if (count >= 0 && currentPageIndex + count <= pageIndex) { - currentPageIndex += count; - continue; + if (currentNode instanceof Ref) { + const count = pageKidsCountCache.get(currentNode); + // Skip nodes where the page can't be. + if (count >= 0 && currentPageIndex + count <= pageIndex) { + currentPageIndex += count; + continue; + } + // Prevent circular references in the /Pages tree. + if (visitedNodes.has(currentNode)) { + throw new FormatError("Pages tree contains circular reference."); + } + visitedNodes.put(currentNode); + + const obj = await xref.fetchAsync(currentNode); + if (obj instanceof Dict) { + let type = obj.getRaw("Type"); + if (type instanceof Ref) { + type = await xref.fetchAsync(type); } - // Prevent circular references in the /Pages tree. - if (visitedNodes.has(currentNode)) { - capability.reject( - new FormatError("Pages tree contains circular reference.") - ); - return; - } - visitedNodes.put(currentNode); - - xref.fetchAsync(currentNode).then(function (obj) { - if (isDict(obj, "Page") || (isDict(obj) && !obj.has("Kids"))) { - // Cache the Page reference, since it can *greatly* improve - // performance by reducing redundant lookups in long documents - // where all nodes are found at *one* level of the tree. - if (currentNode && !pageKidsCountCache.has(currentNode)) { - pageKidsCountCache.put(currentNode, 1); - } - - if (pageIndex === currentPageIndex) { - capability.resolve([obj, currentNode]); - } else { - currentPageIndex++; - next(); - } - return; + if (isName(type, "Page") || !obj.has("Kids")) { + // Cache the Page reference, since it can *greatly* improve + // performance by reducing redundant lookups in long documents + // where all nodes are found at *one* level of the tree. + if (currentNode && !pageKidsCountCache.has(currentNode)) { + pageKidsCountCache.put(currentNode, 1); } - nodesToVisit.push(obj); - next(); - }, capability.reject); - return; - } - // Must be a child page dictionary. - if (!(currentNode instanceof Dict)) { - capability.reject( - new FormatError( - "Page dictionary kid reference points to wrong type of object." - ) - ); - return; - } - - let count; - try { - count = currentNode.get("Count"); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - } - if (Number.isInteger(count) && count >= 0) { - // Cache the Kids count, since it can reduce redundant lookups in - // documents where all nodes are found at *one* level of the tree. - const objId = currentNode.objId; - if (objId && !pageKidsCountCache.has(objId)) { - pageKidsCountCache.put(objId, count); - } - // Skip nodes where the page can't be. - if (currentPageIndex + count <= pageIndex) { - currentPageIndex += count; - continue; - } - } - - let kids; - try { - kids = currentNode.get("Kids"); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - } - if (!Array.isArray(kids)) { - // Prevent errors in corrupt PDF documents that violate the - // specification by *inlining* Page dicts directly in the Kids - // array, rather than using indirect objects (fixes issue9540.pdf). - let type; - try { - type = currentNode.get("Type"); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - } - if ( - isName(type, "Page") || - (!currentNode.has("Type") && currentNode.has("Contents")) - ) { if (currentPageIndex === pageIndex) { - capability.resolve([currentNode, null]); - return; + return [obj, currentNode]; } currentPageIndex++; continue; } + } + nodesToVisit.push(obj); + continue; + } - capability.reject( - new FormatError("Page dictionary kids object is not an array.") - ); - return; + // Must be a child page dictionary. + if (!(currentNode instanceof Dict)) { + throw new FormatError( + "Page dictionary kid reference points to wrong type of object." + ); + } + const { objId } = currentNode; + + let count = currentNode.getRaw("Count"); + if (count instanceof Ref) { + count = await xref.fetchAsync(count); + } + if (Number.isInteger(count) && count >= 0) { + // Cache the Kids count, since it can reduce redundant lookups in + // documents where all nodes are found at *one* level of the tree. + if (objId && !pageKidsCountCache.has(objId)) { + pageKidsCountCache.put(objId, count); } - // Always check all `Kids` nodes, to avoid getting stuck in an empty - // node further down in the tree (see issue5644.pdf, issue8088.pdf), - // and to ensure that we actually find the correct `Page` dict. - for (let last = kids.length - 1; last >= 0; last--) { - nodesToVisit.push(kids[last]); + // Skip nodes where the page can't be. + if (currentPageIndex + count <= pageIndex) { + currentPageIndex += count; + continue; } } - capability.reject(new Error(`Page index ${pageIndex} not found.`)); + + let kids = currentNode.getRaw("Kids"); + if (kids instanceof Ref) { + kids = await xref.fetchAsync(kids); + } + if (!Array.isArray(kids)) { + // Prevent errors in corrupt PDF documents that violate the + // specification by *inlining* Page dicts directly in the Kids + // array, rather than using indirect objects (fixes issue9540.pdf). + let type = currentNode.getRaw("Type"); + if (type instanceof Ref) { + type = await xref.fetchAsync(type); + } + if (isName(type, "Page") || !currentNode.has("Kids")) { + if (currentPageIndex === pageIndex) { + return [currentNode, null]; + } + currentPageIndex++; + continue; + } + + throw new FormatError("Page dictionary kids object is not an array."); + } + + // Always check all `Kids` nodes, to avoid getting stuck in an empty + // node further down in the tree (see issue5644.pdf, issue8088.pdf), + // and to ensure that we actually find the correct `Page` dict. + for (let last = kids.length - 1; last >= 0; last--) { + nodesToVisit.push(kids[last]); + } } - next(); - return capability.promise; + + throw new Error(`Page index ${pageIndex} not found.`); } /** @@ -1319,7 +1292,20 @@ class Catalog { break; } - if (isDict(obj, "Page") || !obj.has("Kids")) { + let type; + try { + type = obj.get("Type"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + if (ex instanceof XRefEntryException && !recoveryMode) { + throw ex; + } + addPageError(ex); + break; + } + if (isName(type, "Page") || !obj.has("Kids")) { addPageDict(obj, kidObj instanceof Ref ? kidObj : null); } else { queue.push({ currentNode: obj, posInKids: 0 }); diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 7eef2d2e4..24cc581bf 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -622,9 +622,7 @@ describe("api", function () { expect(false).toEqual(true); } catch (reason) { expect(reason instanceof UnknownErrorException).toEqual(true); - expect(reason.message).toEqual( - "Page dictionary kids object is not an array." - ); + expect(reason.message).toEqual("Illegal character: 41"); } try { await pdfDocument2.getPage(1); @@ -633,9 +631,7 @@ describe("api", function () { expect(false).toEqual(true); } catch (reason) { expect(reason instanceof UnknownErrorException).toEqual(true); - expect(reason.message).toEqual( - "Page dictionary kids object is not an array." - ); + expect(reason.message).toEqual("End of file inside array."); } await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); diff --git a/web/base_viewer.js b/web/base_viewer.js index 757d65d12..f73d876ba 100644 --- a/web/base_viewer.js +++ b/web/base_viewer.js @@ -76,7 +76,7 @@ const ENABLE_PERMISSIONS_CLASS = "enablePermissions"; const PagesCountLimit = { FORCE_SCROLL_MODE_PAGE: 15000, FORCE_LAZY_PAGE_INIT: 7500, - PAUSE_EAGER_PAGE_INIT: 500, + PAUSE_EAGER_PAGE_INIT: 250, }; /**