mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 14:48:08 +02:00
Avoid re-parsing global images that failed decoding (issue 18042, PR 17428 follow-up)
For images that failed to decode once we want to avoid a pointless round-trip to the main-thread, which could otherwise happen for globally cached images.
This commit is contained in:
parent
bb9bb34721
commit
c5f92437f7
6 changed files with 109 additions and 23 deletions
|
@ -793,30 +793,42 @@ class PartialEvaluator {
|
|||
args = [objId, w, h];
|
||||
operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent);
|
||||
|
||||
// For large (at least 500x500) or more complex images that we'll cache
|
||||
// globally, check if the image is still cached locally on the main-thread
|
||||
// to avoid having to re-parse the image (since that can be slow).
|
||||
if (
|
||||
cacheGlobally &&
|
||||
(w * h > 250000 || dict.has("SMask") || dict.has("Mask"))
|
||||
) {
|
||||
const localLength = await this.handler.sendWithPromise("commonobj", [
|
||||
objId,
|
||||
"CopyLocalImage",
|
||||
{ imageRef },
|
||||
]);
|
||||
|
||||
if (localLength) {
|
||||
if (cacheGlobally) {
|
||||
if (this.globalImageCache.hasDecodeFailed(imageRef)) {
|
||||
this.globalImageCache.setData(imageRef, {
|
||||
objId,
|
||||
fn: OPS.paintImageXObject,
|
||||
args,
|
||||
optionalContent,
|
||||
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
|
||||
byteSize: 0, // Data is `null`, since decoding failed previously.
|
||||
});
|
||||
this.globalImageCache.addByteSize(imageRef, localLength);
|
||||
|
||||
this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
|
||||
return;
|
||||
}
|
||||
|
||||
// For large (at least 500x500) or more complex images that we'll cache
|
||||
// globally, check if the image is still cached locally on the main-thread
|
||||
// to avoid having to re-parse the image (since that can be slow).
|
||||
if (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) {
|
||||
const localLength = await this.handler.sendWithPromise("commonobj", [
|
||||
objId,
|
||||
"CopyLocalImage",
|
||||
{ imageRef },
|
||||
]);
|
||||
|
||||
if (localLength) {
|
||||
this.globalImageCache.setData(imageRef, {
|
||||
objId,
|
||||
fn: OPS.paintImageXObject,
|
||||
args,
|
||||
optionalContent,
|
||||
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
|
||||
});
|
||||
this.globalImageCache.addByteSize(imageRef, localLength);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PDFImage.buildImage({
|
||||
|
@ -846,6 +858,9 @@ class PartialEvaluator {
|
|||
.catch(reason => {
|
||||
warn(`Unable to decode image "${objId}": "${reason}".`);
|
||||
|
||||
if (imageRef) {
|
||||
this.globalImageCache.addDecodeFailed(imageRef);
|
||||
}
|
||||
return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
|
||||
});
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ import {
|
|||
unreachable,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import { RefSetCache } from "./primitives.js";
|
||||
import { RefSet, RefSetCache } from "./primitives.js";
|
||||
|
||||
class BaseLocalCache {
|
||||
constructor(options) {
|
||||
|
@ -178,6 +178,8 @@ class GlobalImageCache {
|
|||
|
||||
static MAX_BYTE_SIZE = 5 * MAX_IMAGE_SIZE_TO_CACHE;
|
||||
|
||||
#decodeFailedSet = new RefSet();
|
||||
|
||||
constructor() {
|
||||
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
||||
assert(
|
||||
|
@ -189,7 +191,7 @@ class GlobalImageCache {
|
|||
this._imageCache = new RefSetCache();
|
||||
}
|
||||
|
||||
get _byteSize() {
|
||||
get #byteSize() {
|
||||
let byteSize = 0;
|
||||
for (const imageData of this._imageCache) {
|
||||
byteSize += imageData.byteSize;
|
||||
|
@ -197,11 +199,11 @@ class GlobalImageCache {
|
|||
return byteSize;
|
||||
}
|
||||
|
||||
get _cacheLimitReached() {
|
||||
get #cacheLimitReached() {
|
||||
if (this._imageCache.size < GlobalImageCache.MIN_IMAGES_TO_CACHE) {
|
||||
return false;
|
||||
}
|
||||
if (this._byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
|
||||
if (this.#byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -218,12 +220,20 @@ class GlobalImageCache {
|
|||
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
|
||||
return false;
|
||||
}
|
||||
if (!this._imageCache.has(ref) && this._cacheLimitReached) {
|
||||
if (!this._imageCache.has(ref) && this.#cacheLimitReached) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
addDecodeFailed(ref) {
|
||||
this.#decodeFailedSet.put(ref);
|
||||
}
|
||||
|
||||
hasDecodeFailed(ref) {
|
||||
return this.#decodeFailedSet.has(ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* PLEASE NOTE: Must be called *after* the `setData` method.
|
||||
*/
|
||||
|
@ -265,7 +275,7 @@ class GlobalImageCache {
|
|||
if (this._imageCache.has(ref)) {
|
||||
return;
|
||||
}
|
||||
if (this._cacheLimitReached) {
|
||||
if (this.#cacheLimitReached) {
|
||||
warn("GlobalImageCache.setData - cache limit reached.");
|
||||
return;
|
||||
}
|
||||
|
@ -274,6 +284,7 @@ class GlobalImageCache {
|
|||
|
||||
clear(onlyData = false) {
|
||||
if (!onlyData) {
|
||||
this.#decodeFailedSet.clear();
|
||||
this._refCache.clear();
|
||||
}
|
||||
this._imageCache.clear();
|
||||
|
|
|
@ -2806,7 +2806,7 @@ class WorkerTransport {
|
|||
|
||||
for (const pageProxy of this.#pageCache.values()) {
|
||||
for (const [, data] of pageProxy.objs) {
|
||||
if (data.ref !== imageRef) {
|
||||
if (data?.ref !== imageRef) {
|
||||
continue;
|
||||
}
|
||||
if (!data.dataLen) {
|
||||
|
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
|
@ -55,6 +55,7 @@
|
|||
!issue17679.pdf
|
||||
!issue17679_2.pdf
|
||||
!issue18030.pdf
|
||||
!issue18042.pdf
|
||||
!issue14953.pdf
|
||||
!issue15367.pdf
|
||||
!issue15372.pdf
|
||||
|
|
BIN
test/pdfs/issue18042.pdf
Normal file
BIN
test/pdfs/issue18042.pdf
Normal file
Binary file not shown.
|
@ -4203,6 +4203,65 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
|||
firstStatsOverall = null;
|
||||
});
|
||||
|
||||
it("caches image resources at the document/page level, with corrupt images (issue 18042)", async function () {
|
||||
const { NUM_PAGES_THRESHOLD } = GlobalImageCache;
|
||||
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue18042.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
let checkedGlobalDecodeFailed = false;
|
||||
|
||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||
const pdfPage = await pdfDoc.getPage(i);
|
||||
const viewport = pdfPage.getViewport({ scale: 1 });
|
||||
|
||||
const canvasAndCtx = CanvasFactory.create(
|
||||
viewport.width,
|
||||
viewport.height
|
||||
);
|
||||
const renderTask = pdfPage.render({
|
||||
canvasContext: canvasAndCtx.context,
|
||||
viewport,
|
||||
});
|
||||
|
||||
await renderTask.promise;
|
||||
const opList = renderTask.getOperatorList();
|
||||
// The canvas is no longer necessary, since we only care about
|
||||
// the image-data below.
|
||||
CanvasFactory.destroy(canvasAndCtx);
|
||||
|
||||
const { commonObjs, objs } = pdfPage;
|
||||
const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject);
|
||||
const [objId] = opList.argsArray[imgIndex];
|
||||
|
||||
if (i < NUM_PAGES_THRESHOLD) {
|
||||
expect(objId).toEqual(`img_p${i - 1}_1`);
|
||||
|
||||
expect(objs.has(objId)).toEqual(true);
|
||||
expect(commonObjs.has(objId)).toEqual(false);
|
||||
} else {
|
||||
expect(objId).toEqual(
|
||||
`g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1`
|
||||
);
|
||||
|
||||
expect(objs.has(objId)).toEqual(false);
|
||||
expect(commonObjs.has(objId)).toEqual(true);
|
||||
}
|
||||
|
||||
// Ensure that the actual image data is identical for all pages.
|
||||
const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs;
|
||||
const imgData = objsPool.get(objId);
|
||||
|
||||
expect(imgData).toBe(null);
|
||||
|
||||
if (i === NUM_PAGES_THRESHOLD) {
|
||||
checkedGlobalDecodeFailed = true;
|
||||
}
|
||||
}
|
||||
expect(checkedGlobalDecodeFailed).toBeTruthy();
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("render for printing, with `printAnnotationStorage` set", async function () {
|
||||
async function getPrintData(printAnnotationStorage = null) {
|
||||
const canvasAndCtx = CanvasFactory.create(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue