1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Avoid re-parsing global images that failed decoding (issue 18042, PR 17428 follow-up)

For images that failed to decode once we want to avoid a pointless round-trip to the main-thread, which could otherwise happen for globally cached images.
This commit is contained in:
Jonas Jenwald 2024-05-04 15:59:07 +02:00
parent bb9bb34721
commit c5f92437f7
6 changed files with 109 additions and 23 deletions

View file

@ -793,30 +793,42 @@ class PartialEvaluator {
args = [objId, w, h];
operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent);
// For large (at least 500x500) or more complex images that we'll cache
// globally, check if the image is still cached locally on the main-thread
// to avoid having to re-parse the image (since that can be slow).
if (
cacheGlobally &&
(w * h > 250000 || dict.has("SMask") || dict.has("Mask"))
) {
const localLength = await this.handler.sendWithPromise("commonobj", [
objId,
"CopyLocalImage",
{ imageRef },
]);
if (localLength) {
if (cacheGlobally) {
if (this.globalImageCache.hasDecodeFailed(imageRef)) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
optionalContent,
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
byteSize: 0, // Data is `null`, since decoding failed previously.
});
this.globalImageCache.addByteSize(imageRef, localLength);
this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
return;
}
// For large (at least 500x500) or more complex images that we'll cache
// globally, check if the image is still cached locally on the main-thread
// to avoid having to re-parse the image (since that can be slow).
if (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) {
const localLength = await this.handler.sendWithPromise("commonobj", [
objId,
"CopyLocalImage",
{ imageRef },
]);
if (localLength) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
optionalContent,
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
});
this.globalImageCache.addByteSize(imageRef, localLength);
return;
}
}
}
PDFImage.buildImage({
@ -846,6 +858,9 @@ class PartialEvaluator {
.catch(reason => {
warn(`Unable to decode image "${objId}": "${reason}".`);
if (imageRef) {
this.globalImageCache.addDecodeFailed(imageRef);
}
return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
});

View file

@ -19,7 +19,7 @@ import {
unreachable,
warn,
} from "../shared/util.js";
import { RefSetCache } from "./primitives.js";
import { RefSet, RefSetCache } from "./primitives.js";
class BaseLocalCache {
constructor(options) {
@ -178,6 +178,8 @@ class GlobalImageCache {
static MAX_BYTE_SIZE = 5 * MAX_IMAGE_SIZE_TO_CACHE;
#decodeFailedSet = new RefSet();
constructor() {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
@ -189,7 +191,7 @@ class GlobalImageCache {
this._imageCache = new RefSetCache();
}
get _byteSize() {
get #byteSize() {
let byteSize = 0;
for (const imageData of this._imageCache) {
byteSize += imageData.byteSize;
@ -197,11 +199,11 @@ class GlobalImageCache {
return byteSize;
}
get _cacheLimitReached() {
get #cacheLimitReached() {
if (this._imageCache.size < GlobalImageCache.MIN_IMAGES_TO_CACHE) {
return false;
}
if (this._byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
if (this.#byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
return false;
}
return true;
@ -218,12 +220,20 @@ class GlobalImageCache {
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
return false;
}
if (!this._imageCache.has(ref) && this._cacheLimitReached) {
if (!this._imageCache.has(ref) && this.#cacheLimitReached) {
return false;
}
return true;
}
addDecodeFailed(ref) {
this.#decodeFailedSet.put(ref);
}
hasDecodeFailed(ref) {
return this.#decodeFailedSet.has(ref);
}
/**
* PLEASE NOTE: Must be called *after* the `setData` method.
*/
@ -265,7 +275,7 @@ class GlobalImageCache {
if (this._imageCache.has(ref)) {
return;
}
if (this._cacheLimitReached) {
if (this.#cacheLimitReached) {
warn("GlobalImageCache.setData - cache limit reached.");
return;
}
@ -274,6 +284,7 @@ class GlobalImageCache {
clear(onlyData = false) {
if (!onlyData) {
this.#decodeFailedSet.clear();
this._refCache.clear();
}
this._imageCache.clear();

View file

@ -2806,7 +2806,7 @@ class WorkerTransport {
for (const pageProxy of this.#pageCache.values()) {
for (const [, data] of pageProxy.objs) {
if (data.ref !== imageRef) {
if (data?.ref !== imageRef) {
continue;
}
if (!data.dataLen) {

View file

@ -55,6 +55,7 @@
!issue17679.pdf
!issue17679_2.pdf
!issue18030.pdf
!issue18042.pdf
!issue14953.pdf
!issue15367.pdf
!issue15372.pdf

BIN
test/pdfs/issue18042.pdf Normal file

Binary file not shown.

View file

@ -4203,6 +4203,65 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
firstStatsOverall = null;
});
it("caches image resources at the document/page level, with corrupt images (issue 18042)", async function () {
const { NUM_PAGES_THRESHOLD } = GlobalImageCache;
const loadingTask = getDocument(buildGetDocumentParams("issue18042.pdf"));
const pdfDoc = await loadingTask.promise;
let checkedGlobalDecodeFailed = false;
for (let i = 1; i <= pdfDoc.numPages; i++) {
const pdfPage = await pdfDoc.getPage(i);
const viewport = pdfPage.getViewport({ scale: 1 });
const canvasAndCtx = CanvasFactory.create(
viewport.width,
viewport.height
);
const renderTask = pdfPage.render({
canvasContext: canvasAndCtx.context,
viewport,
});
await renderTask.promise;
const opList = renderTask.getOperatorList();
// The canvas is no longer necessary, since we only care about
// the image-data below.
CanvasFactory.destroy(canvasAndCtx);
const { commonObjs, objs } = pdfPage;
const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject);
const [objId] = opList.argsArray[imgIndex];
if (i < NUM_PAGES_THRESHOLD) {
expect(objId).toEqual(`img_p${i - 1}_1`);
expect(objs.has(objId)).toEqual(true);
expect(commonObjs.has(objId)).toEqual(false);
} else {
expect(objId).toEqual(
`g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1`
);
expect(objs.has(objId)).toEqual(false);
expect(commonObjs.has(objId)).toEqual(true);
}
// Ensure that the actual image data is identical for all pages.
const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs;
const imgData = objsPool.get(objId);
expect(imgData).toBe(null);
if (i === NUM_PAGES_THRESHOLD) {
checkedGlobalDecodeFailed = true;
}
}
expect(checkedGlobalDecodeFailed).toBeTruthy();
await loadingTask.destroy();
});
it("render for printing, with `printAnnotationStorage` set", async function () {
async function getPrintData(printAnnotationStorage = null) {
const canvasAndCtx = CanvasFactory.create(