mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-20 15:18:08 +02:00
[Regression] Eagerly fetch/parse the entire /Pages-tree in corrupt documents (issue 14303, PR 14311 follow-up)
*Please note:* This is similar to the method that existed prior to PR 3848, but the new method will *only* be used as a fallback when parsing of corrupt PDF documents. The implementation in PR 14311 unfortunately turned out to be *way* too simplistic, as evident by the recently added test-files in issue 14303, since it may *cause* infinite loops in `PDFDocument.checkLastPage` for some corrupt PDF documents.[1] To avoid this, the easiest solution that I could come up with was to fallback to eagerly parsing the *entire* /Pages-tree when the /Count-entry validation fails during document initialization. Fixes *at least* two of the issues listed in issue 14303, namely the `poppler-395-0.pdf...` and `GHOSTSCRIPT-698804-1.pdf...` documents. --- [1] The whole point of PR 14311 was obviously to *get rid of* infinte loops during document initialization, not to introduce any more of those.
This commit is contained in:
parent
f61b74e38e
commit
1fac6371d3
7 changed files with 504 additions and 35 deletions
2
test/pdfs/.gitignore
vendored
2
test/pdfs/.gitignore
vendored
|
@ -492,6 +492,8 @@
|
|||
!xfa_issue14315.pdf
|
||||
!poppler-67295-0.pdf
|
||||
!poppler-85140-0.pdf
|
||||
!poppler-395-0-fuzzed.pdf
|
||||
!GHOSTSCRIPT-698804-1-fuzzed.pdf
|
||||
!poppler-91414-0-53.pdf
|
||||
!poppler-91414-0-54.pdf
|
||||
!poppler-742-0-fuzzed.pdf
|
||||
|
|
69
test/pdfs/GHOSTSCRIPT-698804-1-fuzzed.pdf
Normal file
69
test/pdfs/GHOSTSCRIPT-698804-1-fuzzed.pdf
Normal file
|
@ -0,0 +1,69 @@
|
|||
%PDF-1.4
|
||||
%âãÏÓ
|
||||
|
||||
1 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Outline 2 0 R
|
||||
/Pages 3 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<<
|
||||
/Type /Outlines
|
||||
/Count 0
|
||||
>>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<<
|
||||
/Type /Pages
|
||||
/Kids [ 4 0 R ]
|
||||
/Count 1
|
||||
>>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<<
|
||||
/Type /Page
|
||||
/Parent 3 0 R
|
||||
/MediaBox [ 0 0 612 792 ]
|
||||
/Contents 5 0 R
|
||||
/Resources <<
|
||||
/ProcSet 6 0 R
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
<<
|
||||
/Length 0
|
||||
>>
|
||||
stream
|
||||
endstream
|
||||
endobj
|
||||
|
||||
6 0 obj
|
||||
[ /PDF ]
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 2
|
||||
0000000000 65536 f
|
||||
0000000016 00000 n
|
||||
00000004294967296 3
|
||||
0000000138 00000 n
|
||||
0000000204 00000 n
|
||||
0000000342 00000 n
|
||||
|
||||
|
||||
trailer
|
||||
<<
|
||||
/Size 7
|
||||
/Root 1 0 R
|
||||
>>
|
||||
|
||||
startxref
|
||||
418
|
||||
%%EOF
|
262
test/pdfs/poppler-395-0-fuzzed.pdf
Normal file
262
test/pdfs/poppler-395-0-fuzzed.pdf
Normal file
File diff suppressed because one or more lines are too long
|
@ -495,14 +495,27 @@ describe("api", function () {
|
|||
const loadingTask2 = getDocument(
|
||||
buildGetDocumentParams("poppler-85140-0.pdf")
|
||||
);
|
||||
const loadingTask3 = getDocument(
|
||||
buildGetDocumentParams("poppler-395-0-fuzzed.pdf")
|
||||
);
|
||||
const loadingTask4 = getDocument(
|
||||
buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf")
|
||||
);
|
||||
|
||||
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
expect(loadingTask3 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
expect(loadingTask4 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
|
||||
const pdfDocument1 = await loadingTask1.promise;
|
||||
const pdfDocument2 = await loadingTask2.promise;
|
||||
const pdfDocument3 = await loadingTask3.promise;
|
||||
const pdfDocument4 = await loadingTask4.promise;
|
||||
|
||||
expect(pdfDocument1.numPages).toEqual(1);
|
||||
expect(pdfDocument2.numPages).toEqual(1);
|
||||
expect(pdfDocument3.numPages).toEqual(1);
|
||||
expect(pdfDocument4.numPages).toEqual(1);
|
||||
|
||||
const pageA = await pdfDocument1.getPage(1);
|
||||
expect(pageA instanceof PDFPageProxy).toEqual(true);
|
||||
|
@ -516,6 +529,28 @@ describe("api", function () {
|
|||
expect(reason instanceof UnknownErrorException).toEqual(true);
|
||||
expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R");
|
||||
}
|
||||
try {
|
||||
await pdfDocument3.getPage(1);
|
||||
|
||||
// Shouldn't get here.
|
||||
expect(false).toEqual(true);
|
||||
} catch (reason) {
|
||||
expect(reason instanceof UnknownErrorException).toEqual(true);
|
||||
expect(reason.message).toEqual(
|
||||
"Page dictionary kid reference points to wrong type of object."
|
||||
);
|
||||
}
|
||||
try {
|
||||
await pdfDocument4.getPage(1);
|
||||
|
||||
// Shouldn't get here.
|
||||
expect(false).toEqual(true);
|
||||
} catch (reason) {
|
||||
expect(reason instanceof UnknownErrorException).toEqual(true);
|
||||
expect(reason.message).toEqual(
|
||||
"Page dictionary kid reference points to wrong type of object."
|
||||
);
|
||||
}
|
||||
|
||||
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue