mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 01:58:06 +02:00
[Regression] Eagerly fetch/parse the entire /Pages-tree in corrupt documents (issue 14303, PR 14311 follow-up)
*Please note:* This is similar to the method that existed prior to PR 3848, but the new method will *only* be used as a fallback when parsing of corrupt PDF documents. The implementation in PR 14311 unfortunately turned out to be *way* too simplistic, as evident by the recently added test-files in issue 14303, since it may *cause* infinite loops in `PDFDocument.checkLastPage` for some corrupt PDF documents.[1] To avoid this, the easiest solution that I could come up with was to fallback to eagerly parsing the *entire* /Pages-tree when the /Count-entry validation fails during document initialization. Fixes *at least* two of the issues listed in issue 14303, namely the `poppler-395-0.pdf...` and `GHOSTSCRIPT-698804-1.pdf...` documents. --- [1] The whole point of PR 14311 was obviously to *get rid of* infinte loops during document initialization, not to introduce any more of those.
This commit is contained in:
parent
f61b74e38e
commit
1fac6371d3
7 changed files with 504 additions and 35 deletions
|
@ -50,7 +50,6 @@ import {
|
|||
getInheritableProperty,
|
||||
isWhiteSpace,
|
||||
MissingDataException,
|
||||
PageDictMissingException,
|
||||
validateCSSFont,
|
||||
XRefEntryException,
|
||||
XRefParseException,
|
||||
|
@ -1354,14 +1353,16 @@ class PDFDocument {
|
|||
}
|
||||
|
||||
async checkLastPage(recoveryMode = false) {
|
||||
this.catalog.setActualNumPages(); // Ensure that it's always reset.
|
||||
const { catalog, pdfManager } = this;
|
||||
|
||||
catalog.setActualNumPages(); // Ensure that it's always reset.
|
||||
let numPages;
|
||||
|
||||
try {
|
||||
await Promise.all([
|
||||
this.pdfManager.ensureDoc("xfaFactory"),
|
||||
this.pdfManager.ensureDoc("linearization"),
|
||||
this.pdfManager.ensureCatalog("numPages"),
|
||||
pdfManager.ensureDoc("xfaFactory"),
|
||||
pdfManager.ensureDoc("linearization"),
|
||||
pdfManager.ensureCatalog("numPages"),
|
||||
]);
|
||||
|
||||
if (this.xfaFactory) {
|
||||
|
@ -1369,13 +1370,13 @@ class PDFDocument {
|
|||
} else if (this.linearization) {
|
||||
numPages = this.linearization.numPages;
|
||||
} else {
|
||||
numPages = this.catalog.numPages;
|
||||
numPages = catalog.numPages;
|
||||
}
|
||||
|
||||
if (numPages === 1) {
|
||||
return;
|
||||
} else if (!Number.isInteger(numPages)) {
|
||||
if (!Number.isInteger(numPages)) {
|
||||
throw new FormatError("Page count is not an integer.");
|
||||
} else if (numPages <= 1) {
|
||||
return;
|
||||
}
|
||||
await this.getPage(numPages - 1);
|
||||
} catch (reason) {
|
||||
|
@ -1385,24 +1386,48 @@ class PDFDocument {
|
|||
// subsequent `this.getPage` calls.
|
||||
await this.cleanup();
|
||||
|
||||
let pageIndex = 1; // The first page was already loaded.
|
||||
while (true) {
|
||||
try {
|
||||
await this.getPage(pageIndex);
|
||||
} catch (reasonLoop) {
|
||||
if (reasonLoop instanceof PageDictMissingException) {
|
||||
break;
|
||||
}
|
||||
if (reasonLoop instanceof XRefEntryException) {
|
||||
if (!recoveryMode) {
|
||||
throw new XRefParseException();
|
||||
}
|
||||
break;
|
||||
let pagesTree;
|
||||
try {
|
||||
pagesTree = await pdfManager.ensureCatalog("getAllPageDicts");
|
||||
} catch (reasonAll) {
|
||||
if (reasonAll instanceof XRefEntryException) {
|
||||
if (!recoveryMode) {
|
||||
throw new XRefParseException();
|
||||
}
|
||||
}
|
||||
pageIndex++;
|
||||
catalog.setActualNumPages(1);
|
||||
return;
|
||||
}
|
||||
this.catalog.setActualNumPages(pageIndex);
|
||||
|
||||
for (const [pageIndex, [pageDict, ref]] of pagesTree) {
|
||||
let promise;
|
||||
if (pageDict instanceof Error) {
|
||||
promise = Promise.reject(pageDict);
|
||||
|
||||
// Prevent "uncaught exception: Object"-messages in the console.
|
||||
promise.catch(() => {});
|
||||
} else {
|
||||
promise = Promise.resolve(
|
||||
new Page({
|
||||
pdfManager,
|
||||
xref: this.xref,
|
||||
pageIndex,
|
||||
pageDict,
|
||||
ref,
|
||||
globalIdFactory: this._globalIdFactory,
|
||||
fontCache: catalog.fontCache,
|
||||
builtInCMapCache: catalog.builtInCMapCache,
|
||||
standardFontDataCache: catalog.standardFontDataCache,
|
||||
globalImageCache: catalog.globalImageCache,
|
||||
nonBlendModesSet: catalog.nonBlendModesSet,
|
||||
xfaFactory: null,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
this._pagePromises.set(pageIndex, promise);
|
||||
}
|
||||
catalog.setActualNumPages(pagesTree.size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue