1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

[Regression] Eagerly fetch/parse the entire /Pages-tree in corrupt documents (issue 14303, PR 14311 follow-up)

*Please note:* This is similar to the method that existed prior to PR 3848, but the new method will *only* be used as a fallback when parsing of corrupt PDF documents.

The implementation in PR 14311 unfortunately turned out to be *way* too simplistic, as evident by the recently added test-files in issue 14303, since it may *cause* infinite loops in `PDFDocument.checkLastPage` for some corrupt PDF documents.[1]
To avoid this, the easiest solution that I could come up with was to fallback to eagerly parsing the *entire* /Pages-tree when the /Count-entry validation fails during document initialization.

Fixes *at least* two of the issues listed in issue 14303, namely the `poppler-395-0.pdf...` and `GHOSTSCRIPT-698804-1.pdf...` documents.

---
[1] The whole point of PR 14311 was obviously to *get rid of* infinte loops during document initialization, not to introduce any more of those.
This commit is contained in:
Jonas Jenwald 2021-12-02 01:40:52 +01:00
parent f61b74e38e
commit 1fac6371d3
7 changed files with 504 additions and 35 deletions

View file

@ -492,6 +492,8 @@
!xfa_issue14315.pdf
!poppler-67295-0.pdf
!poppler-85140-0.pdf
!poppler-395-0-fuzzed.pdf
!GHOSTSCRIPT-698804-1-fuzzed.pdf
!poppler-91414-0-53.pdf
!poppler-91414-0-54.pdf
!poppler-742-0-fuzzed.pdf

View file

@ -0,0 +1,69 @@
%PDF-1.4
%âãÏÓ
1 0 obj
<<
/Type /Catalog
/Outline 2 0 R
/Pages 3 0 R
>>
endobj
2 0 obj
<<
/Type /Outlines
/Count 0
>>
endobj
3 0 obj
<<
/Type /Pages
/Kids [ 4 0 R ]
/Count 1
>>
endobj
4 0 obj
<<
/Type /Page
/Parent 3 0 R
/MediaBox [ 0 0 612 792 ]
/Contents 5 0 R
/Resources <<
/ProcSet 6 0 R
>>
>>
endobj
5 0 obj
<<
/Length 0
>>
stream
endstream
endobj
6 0 obj
[ /PDF ]
endobj
xref
0 2
0000000000 65536 f
0000000016 00000 n
00000004294967296 3
0000000138 00000 n
0000000204 00000 n
0000000342 00000 n
trailer
<<
/Size 7
/Root 1 0 R
>>
startxref
418
%%EOF

File diff suppressed because one or more lines are too long

View file

@ -495,14 +495,27 @@ describe("api", function () {
const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-85140-0.pdf")
);
const loadingTask3 = getDocument(
buildGetDocumentParams("poppler-395-0-fuzzed.pdf")
);
const loadingTask4 = getDocument(
buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf")
);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask3 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask4 instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise;
const pdfDocument3 = await loadingTask3.promise;
const pdfDocument4 = await loadingTask4.promise;
expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1);
expect(pdfDocument3.numPages).toEqual(1);
expect(pdfDocument4.numPages).toEqual(1);
const pageA = await pdfDocument1.getPage(1);
expect(pageA instanceof PDFPageProxy).toEqual(true);
@ -516,6 +529,28 @@ describe("api", function () {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R");
}
try {
await pdfDocument3.getPage(1);
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kid reference points to wrong type of object."
);
}
try {
await pdfDocument4.getPage(1);
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kid reference points to wrong type of object."
);
}
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});