diff --git a/src/core/evaluator.js b/src/core/evaluator.js index e1bd19434..8a822954a 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -793,30 +793,42 @@ class PartialEvaluator { args = [objId, w, h]; operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent); - // For large (at least 500x500) or more complex images that we'll cache - // globally, check if the image is still cached locally on the main-thread - // to avoid having to re-parse the image (since that can be slow). - if ( - cacheGlobally && - (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) - ) { - const localLength = await this.handler.sendWithPromise("commonobj", [ - objId, - "CopyLocalImage", - { imageRef }, - ]); - - if (localLength) { + if (cacheGlobally) { + if (this.globalImageCache.hasDecodeFailed(imageRef)) { this.globalImageCache.setData(imageRef, { objId, fn: OPS.paintImageXObject, args, optionalContent, - byteSize: 0, // Temporary entry, to avoid `setData` returning early. + byteSize: 0, // Data is `null`, since decoding failed previously. }); - this.globalImageCache.addByteSize(imageRef, localLength); + + this._sendImgData(objId, /* imgData = */ null, cacheGlobally); return; } + + // For large (at least 500x500) or more complex images that we'll cache + // globally, check if the image is still cached locally on the main-thread + // to avoid having to re-parse the image (since that can be slow). + if (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) { + const localLength = await this.handler.sendWithPromise("commonobj", [ + objId, + "CopyLocalImage", + { imageRef }, + ]); + + if (localLength) { + this.globalImageCache.setData(imageRef, { + objId, + fn: OPS.paintImageXObject, + args, + optionalContent, + byteSize: 0, // Temporary entry, to avoid `setData` returning early. + }); + this.globalImageCache.addByteSize(imageRef, localLength); + return; + } + } } PDFImage.buildImage({ @@ -846,6 +858,9 @@ class PartialEvaluator { .catch(reason => { warn(`Unable to decode image "${objId}": "${reason}".`); + if (imageRef) { + this.globalImageCache.addDecodeFailed(imageRef); + } return this._sendImgData(objId, /* imgData = */ null, cacheGlobally); }); diff --git a/src/core/image_utils.js b/src/core/image_utils.js index 021f4373f..110277c9e 100644 --- a/src/core/image_utils.js +++ b/src/core/image_utils.js @@ -19,7 +19,7 @@ import { unreachable, warn, } from "../shared/util.js"; -import { RefSetCache } from "./primitives.js"; +import { RefSet, RefSetCache } from "./primitives.js"; class BaseLocalCache { constructor(options) { @@ -178,6 +178,8 @@ class GlobalImageCache { static MAX_BYTE_SIZE = 5 * MAX_IMAGE_SIZE_TO_CACHE; + #decodeFailedSet = new RefSet(); + constructor() { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( @@ -189,7 +191,7 @@ class GlobalImageCache { this._imageCache = new RefSetCache(); } - get _byteSize() { + get #byteSize() { let byteSize = 0; for (const imageData of this._imageCache) { byteSize += imageData.byteSize; @@ -197,11 +199,11 @@ class GlobalImageCache { return byteSize; } - get _cacheLimitReached() { + get #cacheLimitReached() { if (this._imageCache.size < GlobalImageCache.MIN_IMAGES_TO_CACHE) { return false; } - if (this._byteSize < GlobalImageCache.MAX_BYTE_SIZE) { + if (this.#byteSize < GlobalImageCache.MAX_BYTE_SIZE) { return false; } return true; @@ -218,12 +220,20 @@ class GlobalImageCache { if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) { return false; } - if (!this._imageCache.has(ref) && this._cacheLimitReached) { + if (!this._imageCache.has(ref) && this.#cacheLimitReached) { return false; } return true; } + addDecodeFailed(ref) { + this.#decodeFailedSet.put(ref); + } + + hasDecodeFailed(ref) { + return this.#decodeFailedSet.has(ref); + } + /** * PLEASE NOTE: Must be called *after* the `setData` method. */ @@ -265,7 +275,7 @@ class GlobalImageCache { if (this._imageCache.has(ref)) { return; } - if (this._cacheLimitReached) { + if (this.#cacheLimitReached) { warn("GlobalImageCache.setData - cache limit reached."); return; } @@ -274,6 +284,7 @@ class GlobalImageCache { clear(onlyData = false) { if (!onlyData) { + this.#decodeFailedSet.clear(); this._refCache.clear(); } this._imageCache.clear(); diff --git a/src/display/api.js b/src/display/api.js index 8430e0f3a..b7415600c 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -2783,7 +2783,7 @@ class WorkerTransport { for (const pageProxy of this.#pageCache.values()) { for (const [, data] of pageProxy.objs) { - if (data.ref !== imageRef) { + if (data?.ref !== imageRef) { continue; } if (!data.dataLen) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 1f1198aa8..e458cc908 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -55,6 +55,7 @@ !issue17679.pdf !issue17679_2.pdf !issue18030.pdf +!issue18042.pdf !issue14953.pdf !issue15367.pdf !issue15372.pdf diff --git a/test/pdfs/issue18042.pdf b/test/pdfs/issue18042.pdf new file mode 100644 index 000000000..26c3b9c39 Binary files /dev/null and b/test/pdfs/issue18042.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index c1ab291df..1aa43aa83 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -4203,6 +4203,65 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) firstStatsOverall = null; }); + it("caches image resources at the document/page level, with corrupt images (issue 18042)", async function () { + const { NUM_PAGES_THRESHOLD } = GlobalImageCache; + + const loadingTask = getDocument(buildGetDocumentParams("issue18042.pdf")); + const pdfDoc = await loadingTask.promise; + let checkedGlobalDecodeFailed = false; + + for (let i = 1; i <= pdfDoc.numPages; i++) { + const pdfPage = await pdfDoc.getPage(i); + const viewport = pdfPage.getViewport({ scale: 1 }); + + const canvasAndCtx = CanvasFactory.create( + viewport.width, + viewport.height + ); + const renderTask = pdfPage.render({ + canvasContext: canvasAndCtx.context, + viewport, + }); + + await renderTask.promise; + const opList = renderTask.getOperatorList(); + // The canvas is no longer necessary, since we only care about + // the image-data below. + CanvasFactory.destroy(canvasAndCtx); + + const { commonObjs, objs } = pdfPage; + const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject); + const [objId] = opList.argsArray[imgIndex]; + + if (i < NUM_PAGES_THRESHOLD) { + expect(objId).toEqual(`img_p${i - 1}_1`); + + expect(objs.has(objId)).toEqual(true); + expect(commonObjs.has(objId)).toEqual(false); + } else { + expect(objId).toEqual( + `g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1` + ); + + expect(objs.has(objId)).toEqual(false); + expect(commonObjs.has(objId)).toEqual(true); + } + + // Ensure that the actual image data is identical for all pages. + const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs; + const imgData = objsPool.get(objId); + + expect(imgData).toBe(null); + + if (i === NUM_PAGES_THRESHOLD) { + checkedGlobalDecodeFailed = true; + } + } + expect(checkedGlobalDecodeFailed).toBeTruthy(); + + await loadingTask.destroy(); + }); + it("render for printing, with `printAnnotationStorage` set", async function () { async function getPrintData(printAnnotationStorage = null) { const canvasAndCtx = CanvasFactory.create(