diff --git a/src/core/catalog.js b/src/core/catalog.js index c3911c352..2dff6d2d0 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1191,6 +1191,8 @@ class Catalog { * @returns {Promise} */ async getAllPageDicts(recoveryMode = false) { + const { ignoreErrors } = this.pdfManager.evaluatorOptions; + const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }]; const visitedNodes = new RefSet(); @@ -1215,6 +1217,11 @@ class Catalog { if (error instanceof XRefEntryException && !recoveryMode) { throw error; } + if (recoveryMode && ignoreErrors && pageIndex === 0) { + // Ensure that the viewer will always load (fixes issue15590.pdf). + warn(`getAllPageDicts - Skipping invalid first page: "${error}".`); + error = Dict.empty; + } map.set(pageIndex++, [error, null]); } diff --git a/src/core/xref.js b/src/core/xref.js index e273eeeba..0f1e957ea 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -503,7 +503,7 @@ class XRef { // Find the next "obj" string, rather than "endobj", to ensure that // we won't skip over a new 'obj' operator in corrupt files where // 'endobj' operators are missing (fixes issue9105_reduced.pdf). - while (startPos < buffer.length) { + while (startPos < length) { const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4; contentLength = endPos - position; @@ -545,7 +545,29 @@ class XRef { (token.length === 7 || /\s/.test(token[7])) ) { trailers.push(position); - position += skipUntil(buffer, position, startxrefBytes); + + const contentLength = skipUntil(buffer, position, startxrefBytes); + // Attempt to handle (some) corrupt documents, where no 'startxref' + // operators are present (fixes issue15590.pdf). + if (position + contentLength >= length) { + const endPos = position + skipUntil(buffer, position, objBytes) + 4; + + const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, position); + const tokenStr = bytesToString(buffer.subarray(checkPos, endPos)); + + // Find the first "obj" occurrence after the 'trailer' operator. + const objToken = nestedObjRegExp.exec(tokenStr); + + if (objToken && objToken[1]) { + warn( + 'indexObjects: Found first "obj" after "trailer", ' + + 'caused by missing "startxref" -- trying to recover.' + ); + position = endPos - objToken[1].length; + continue; + } + } + position += contentLength; } else { position += token.length + 1; } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 87d4c0278..c0ef6527d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -351,6 +351,7 @@ !issue9534_reduced.pdf !attachment.pdf !basicapi.pdf +!issue15590.pdf !issue15594_reduced.pdf !issue2884_reduced.pdf !mixedfonts.pdf diff --git a/test/pdfs/issue15590.pdf b/test/pdfs/issue15590.pdf new file mode 100644 index 000000000..7af8ce482 --- /dev/null +++ b/test/pdfs/issue15590.pdf @@ -0,0 +1,23 @@ +%PDF-1.7 + +trailer +<< +/Root 1 0 R +>> + +1 0 obj +<< +/Type /Catalog +/Pages 2 0 R +/OpenAction 2 0 R +>> +endobj + +2 0 obj +<< +/S /JavaScript +/JS(func=function(){app.alert(1)};func();) +>> +endobj + +%%EOF diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index cea08e235..16c12be66 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -602,27 +602,42 @@ describe("api", function () { const loadingTask2 = getDocument( buildGetDocumentParams("poppler-85140-0.pdf") ); + const loadingTask3 = getDocument( + buildGetDocumentParams("poppler-85140-0.pdf", { stopAtErrors: true }) + ); expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true); + expect(loadingTask3 instanceof PDFDocumentLoadingTask).toEqual(true); const pdfDocument1 = await loadingTask1.promise; const pdfDocument2 = await loadingTask2.promise; + const pdfDocument3 = await loadingTask3.promise; expect(pdfDocument1.numPages).toEqual(1); expect(pdfDocument2.numPages).toEqual(1); + expect(pdfDocument3.numPages).toEqual(1); - const page = await pdfDocument1.getPage(1); - expect(page instanceof PDFPageProxy).toEqual(true); + const pageA = await pdfDocument1.getPage(1); + expect(pageA instanceof PDFPageProxy).toEqual(true); - const opList = await page.getOperatorList(); - expect(opList.fnArray.length).toBeGreaterThan(5); - expect(opList.argsArray.length).toBeGreaterThan(5); - expect(opList.lastChunk).toEqual(true); - expect(opList.separateAnnots).toEqual(null); + const opListA = await pageA.getOperatorList(); + expect(opListA.fnArray.length).toBeGreaterThan(5); + expect(opListA.argsArray.length).toBeGreaterThan(5); + expect(opListA.lastChunk).toEqual(true); + expect(opListA.separateAnnots).toEqual(null); + + const pageB = await pdfDocument2.getPage(1); + expect(pageB instanceof PDFPageProxy).toEqual(true); + + const opListB = await pageB.getOperatorList(); + expect(opListB.fnArray.length).toBe(0); + expect(opListB.argsArray.length).toBe(0); + expect(opListB.lastChunk).toEqual(true); + expect(opListB.separateAnnots).toEqual(null); try { - await pdfDocument2.getPage(1); + await pdfDocument3.getPage(1); // Shouldn't get here. expect(false).toEqual(true); @@ -631,7 +646,11 @@ describe("api", function () { expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R"); } - await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); + await Promise.all([ + loadingTask1.destroy(), + loadingTask2.destroy(), + loadingTask3.destroy(), + ]); }); it("creates pdf doc from PDF files, with circular references", async function () { @@ -738,6 +757,24 @@ describe("api", function () { await loadingTask.destroy(); }); + + it("creates pdf doc from PDF file, with incomplete trailer", async function () { + const loadingTask = getDocument(buildGetDocumentParams("issue15590.pdf")); + expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument = await loadingTask.promise; + expect(pdfDocument.numPages).toEqual(1); + + const jsActions = await pdfDocument.getJSActions(); + expect(jsActions).toEqual({ + OpenAction: ["func=function(){app.alert(1)};func();"], + }); + + const page = await pdfDocument.getPage(1); + expect(page instanceof PDFPageProxy).toEqual(true); + + await loadingTask.destroy(); + }); }); describe("PDFWorker", function () {