From 0ebac67a9fedbc655500f29285d1632a8bb09a70 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 20 Nov 2021 18:24:12 +0100 Subject: [PATCH 1/2] Remove the `{BaseViewer, PDFThumbnailViewer}._pagesRequests` caches In the `BaseViewer` this cache is mostly relevant in the `disableAutoFetch = true` mode, since the pages are being initialized *lazily* in that case. In the `PDFThumbnailViewer` this cache is mostly used for thumbnails that are actually being rendered, as opposed to those created directly from the "regular" pages. Please note that I'm not suggesting that we remove these caches because they're only used in some situations, but rather because they're for all intents and purposes actually *redundant*. In the API itself, we're already caching both the page-promises and the actual pages themselves on the `WorkerTransport`-instance. Hence these viewer-caches aren't really necessary in practice, and adds what to me mostly seems like an unnecessary level of indirection.[1] Given that the viewer now relies on caching in the API itself, this patch also adds a new unit-test to ensure that page-caching works (and keep working) as expected. --- [1] In the `WorkerTransport.getPage`-method the parameter is being validated on every call, but that's hardly enough code to warrant keeping the "duplicate" caches in the viewer in my opinion. --- test/unit/api_spec.js | 14 +++++++++++++ web/base_viewer.js | 39 +++++++++++++------------------------ web/pdf_thumbnail_viewer.js | 39 +++++++++++++------------------------ 3 files changed, 42 insertions(+), 50 deletions(-) diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index a2442e296..b41a0f91d 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -660,6 +660,20 @@ describe("api", function () { await loadingTask.destroy(); }); + it("gets page multiple time, with working caches", async function () { + const promiseA = pdfDocument.getPage(1); + const promiseB = pdfDocument.getPage(1); + + expect(promiseA instanceof Promise).toEqual(true); + expect(promiseA).toBe(promiseB); + + const pageA = await promiseA; + const pageB = await promiseB; + + expect(pageA instanceof PDFPageProxy).toEqual(true); + expect(pageA).toBe(pageB); + }); + it("gets page index", async function () { const ref = { num: 17, gen: 0 }; // Reference to second page. const pageIndex = await pdfDocument.getPageIndex(ref); diff --git a/web/base_viewer.js b/web/base_viewer.js index feb4471a5..dd8084877 100644 --- a/web/base_viewer.js +++ b/web/base_viewer.js @@ -601,7 +601,7 @@ class BaseViewer { } // Set the first `pdfPage` immediately, since it's already loaded, // rather than having to repeat the `PDFDocumentProxy.getPage` call in - // the `this._ensurePdfPageLoaded` method before rendering can start. + // the `this.#ensurePdfPageLoaded` method before rendering can start. const firstPageView = this._pages[0]; if (firstPageView) { firstPageView.setPdfPage(firstPdfPage); @@ -708,7 +708,6 @@ class BaseViewer { this._location = null; this._pagesRotation = 0; this._optionalContentConfigPromise = null; - this._pagesRequests = new WeakMap(); this._firstPageCapability = createPromiseCapability(); this._onePageRenderedCapability = createPromiseCapability(); this._pagesCapability = createPromiseCapability(); @@ -1356,32 +1355,22 @@ class BaseViewer { /** * @param {PDFPageView} pageView - * @returns {Promise} Returns a promise containing a {PDFPageProxy} object. - * @private + * @returns {Promise} */ - _ensurePdfPageLoaded(pageView) { + async #ensurePdfPageLoaded(pageView) { if (pageView.pdfPage) { - return Promise.resolve(pageView.pdfPage); + return pageView.pdfPage; } - if (this._pagesRequests.has(pageView)) { - return this._pagesRequests.get(pageView); + try { + const pdfPage = await this.pdfDocument.getPage(pageView.id); + if (!pageView.pdfPage) { + pageView.setPdfPage(pdfPage); + } + return pdfPage; + } catch (reason) { + console.error("Unable to get page for page view", reason); + return null; // Page error -- there is nothing that can be done. } - const promise = this.pdfDocument - .getPage(pageView.id) - .then(pdfPage => { - if (!pageView.pdfPage) { - pageView.setPdfPage(pdfPage); - } - this._pagesRequests.delete(pageView); - return pdfPage; - }) - .catch(reason => { - console.error("Unable to get page for page view", reason); - // Page error -- there is nothing that can be done. - this._pagesRequests.delete(pageView); - }); - this._pagesRequests.set(pageView, promise); - return promise; } #getScrollAhead(visible) { @@ -1432,7 +1421,7 @@ class BaseViewer { this.#toggleLoadingIconSpinner(visiblePages.ids); if (pageView) { - this._ensurePdfPageLoaded(pageView).then(() => { + this.#ensurePdfPageLoaded(pageView).then(() => { this.renderingQueue.renderView(pageView); }); return true; diff --git a/web/pdf_thumbnail_viewer.js b/web/pdf_thumbnail_viewer.js index 0d83ca25d..e30501455 100644 --- a/web/pdf_thumbnail_viewer.js +++ b/web/pdf_thumbnail_viewer.js @@ -166,7 +166,6 @@ class PDFThumbnailViewer { this._pageLabels = null; this._pagesRotation = 0; this._optionalContentConfigPromise = null; - this._pagesRequests = new WeakMap(); this._setImageDisabled = false; // Remove the thumbnails from the DOM. @@ -211,7 +210,7 @@ class PDFThumbnailViewer { } // Set the first `pdfPage` immediately, since it's already loaded, // rather than having to repeat the `PDFDocumentProxy.getPage` call in - // the `this._ensurePdfPageLoaded` method before rendering can start. + // the `this.#ensurePdfPageLoaded` method before rendering can start. const firstThumbnailView = this._thumbnails[0]; if (firstThumbnailView) { firstThumbnailView.setPdfPage(firstPdfPage); @@ -262,32 +261,22 @@ class PDFThumbnailViewer { /** * @param {PDFThumbnailView} thumbView - * @returns {PDFPage} - * @private + * @returns {Promise} */ - _ensurePdfPageLoaded(thumbView) { + async #ensurePdfPageLoaded(thumbView) { if (thumbView.pdfPage) { - return Promise.resolve(thumbView.pdfPage); + return thumbView.pdfPage; } - if (this._pagesRequests.has(thumbView)) { - return this._pagesRequests.get(thumbView); + try { + const pdfPage = await this.pdfDocument.getPage(thumbView.id); + if (!thumbView.pdfPage) { + thumbView.setPdfPage(pdfPage); + } + return pdfPage; + } catch (reason) { + console.error("Unable to get page for thumb view", reason); + return null; // Page error -- there is nothing that can be done. } - const promise = this.pdfDocument - .getPage(thumbView.id) - .then(pdfPage => { - if (!thumbView.pdfPage) { - thumbView.setPdfPage(pdfPage); - } - this._pagesRequests.delete(thumbView); - return pdfPage; - }) - .catch(reason => { - console.error("Unable to get page for thumb view", reason); - // Page error -- there is nothing that can be done. - this._pagesRequests.delete(thumbView); - }); - this._pagesRequests.set(thumbView, promise); - return promise; } #getScrollAhead(visible) { @@ -308,7 +297,7 @@ class PDFThumbnailViewer { scrollAhead ); if (thumbView) { - this._ensurePdfPageLoaded(thumbView).then(() => { + this.#ensurePdfPageLoaded(thumbView).then(() => { this.renderingQueue.renderView(thumbView); }); return true; From 58a27286476646ef9b2a4b10b63aea564d13a518 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 20 Nov 2021 22:31:31 +0100 Subject: [PATCH 2/2] Ensure that `BaseViewer.#ensurePdfPageLoaded` updates the `PDFLinkService`-pagesRefCache if necessary The issue that this patch fixes has existed ever since the viewer was first re-factored into components, however it only really affects the `disableAutoFetch = true` mode. By default we're fetching all pages in `BaseViewer.setDocument`, and as part of the parsing/initialization we're also populating the `PDFLinkService`-pagesRefCache. The purpose of that cache is to make navigating to any internal destinations faster, by not having to (asynchronously) lookup the pageNumber via the API when handling the destination. In comparison, when the `disableAutoFetch = true` mode is being used we're instead *lazily* initializing the pages in the `BaseViewer.#ensurePdfPageLoaded`-method. For some reason, that I can only assume is a simple oversight, we're not attempting to update the `PDFLinkService`-pagesRefCache in that case. --- web/base_viewer.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web/base_viewer.js b/web/base_viewer.js index dd8084877..074447bed 100644 --- a/web/base_viewer.js +++ b/web/base_viewer.js @@ -1366,6 +1366,9 @@ class BaseViewer { if (!pageView.pdfPage) { pageView.setPdfPage(pdfPage); } + if (!this.linkService._cachedPageNumber(pdfPage.ref)) { + this.linkService.cachePageRef(pageView.id, pdfPage.ref); + } return pdfPage; } catch (reason) { console.error("Unable to get page for page view", reason);