diff --git a/src/core/parser.js b/src/core/parser.js index 4d9a4267c..c5e141f9c 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -632,7 +632,7 @@ class Parser { // Get the length. let length = dict.get("Length"); if (!Number.isInteger(length)) { - info(`Bad length "${length}" in stream`); + info(`Bad length "${length && length.toString()}" in stream.`); length = 0; } diff --git a/src/core/primitives.js b/src/core/primitives.js index cc37a5f68..4b5c53221 100644 --- a/src/core/primitives.js +++ b/src/core/primitives.js @@ -16,6 +16,7 @@ import { assert, shadow, unreachable } from "../shared/util.js"; import { BaseStream } from "./base_stream.js"; +const CIRCULAR_REF = Symbol("CIRCULAR_REF"); const EOF = Symbol("EOF"); const Name = (function NameClosure() { @@ -422,6 +423,7 @@ function clearPrimitiveCaches() { } export { + CIRCULAR_REF, clearPrimitiveCaches, Cmd, Dict, diff --git a/src/core/xref.js b/src/core/xref.js index 568c44cad..7c2c2172a 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -21,15 +21,7 @@ import { InvalidPDFException, warn, } from "../shared/util.js"; -import { - Cmd, - Dict, - isCmd, - isDict, - isRef, - isStream, - Ref, -} from "./primitives.js"; +import { CIRCULAR_REF, Cmd, Dict, isCmd, Ref, RefSet } from "./primitives.js"; import { DocStats, MissingDataException, @@ -38,6 +30,7 @@ import { XRefParseException, } from "./core_utils.js"; import { Lexer, Parser } from "./parser.js"; +import { BaseStream } from "./base_stream.js"; import { CipherTransformFactory } from "./crypto.js"; class XRef { @@ -47,6 +40,7 @@ class XRef { this.entries = []; this.xrefstms = Object.create(null); this._cacheMap = new Map(); // Prepare the XRef cache. + this._pendingRefs = new RefSet(); this.stats = new DocStats(pdfManager.msgHandler); this._newRefNum = null; } @@ -88,7 +82,7 @@ class XRef { } warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); } - if (isDict(encrypt)) { + if (encrypt instanceof Dict) { const ids = trailerDict.get("ID"); const fileId = ids && ids.length ? ids[0] : ""; // The 'Encrypt' dictionary itself should not be encrypted, and by @@ -113,7 +107,7 @@ class XRef { } warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); } - if (isDict(root) && root.has("Pages")) { + if (root instanceof Dict && root.has("Pages")) { this.root = root; } else { if (!recoveryMode) { @@ -155,10 +149,10 @@ class XRef { let dict = parser.getObj(); // The pdflib PDF generator can generate a nested trailer dictionary - if (!isDict(dict) && dict.dict) { + if (!(dict instanceof Dict) && dict.dict) { dict = dict.dict; } - if (!isDict(dict)) { + if (!(dict instanceof Dict)) { throw new FormatError( "Invalid XRef table: could not parse trailer dictionary" ); @@ -289,19 +283,15 @@ class XRef { } readXRefStream(stream) { - let i, j; const streamState = this.streamState; stream.pos = streamState.streamPos; - const byteWidths = streamState.byteWidths; - const typeFieldWidth = byteWidths[0]; - const offsetFieldWidth = byteWidths[1]; - const generationFieldWidth = byteWidths[2]; + const [typeFieldWidth, offsetFieldWidth, generationFieldWidth] = + streamState.byteWidths; const entryRanges = streamState.entryRanges; while (entryRanges.length > 0) { - const first = entryRanges[0]; - const n = entryRanges[1]; + const [first, n] = entryRanges; if (!Number.isInteger(first) || !Number.isInteger(n)) { throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`); @@ -315,14 +305,14 @@ class XRef { `Invalid XRef entry fields length: ${first}, ${n}` ); } - for (i = streamState.entryNum; i < n; ++i) { + for (let i = streamState.entryNum; i < n; ++i) { streamState.entryNum = i; streamState.streamPos = stream.pos; let type = 0, offset = 0, generation = 0; - for (j = 0; j < typeFieldWidth; ++j) { + for (let j = 0; j < typeFieldWidth; ++j) { const typeByte = stream.getByte(); if (typeByte === -1) { throw new FormatError("Invalid XRef byteWidths 'type'."); @@ -333,14 +323,14 @@ class XRef { if (typeFieldWidth === 0) { type = 1; } - for (j = 0; j < offsetFieldWidth; ++j) { + for (let j = 0; j < offsetFieldWidth; ++j) { const offsetByte = stream.getByte(); if (offsetByte === -1) { throw new FormatError("Invalid XRef byteWidths 'offset'."); } offset = (offset << 8) | offsetByte; } - for (j = 0; j < generationFieldWidth; ++j) { + for (let j = 0; j < generationFieldWidth; ++j) { const generationByte = stream.getByte(); if (generationByte === -1) { throw new FormatError("Invalid XRef byteWidths 'generation'."); @@ -568,7 +558,7 @@ class XRef { } // read the trailer dictionary const dict = parser.getObj(); - if (!isDict(dict)) { + if (!(dict instanceof Dict)) { continue; } // Do some basic validation of the trailer/root dictionary candidate. @@ -660,7 +650,7 @@ class XRef { if ( !Number.isInteger(parser.getObj()) || !isCmd(parser.getObj(), "obj") || - !isStream((obj = parser.getObj())) + !((obj = parser.getObj()) instanceof BaseStream) ) { throw new FormatError("Invalid XRef stream"); } @@ -679,7 +669,7 @@ class XRef { obj = dict.get("Prev"); if (Number.isInteger(obj)) { this.startXRefQueue.push(obj); - } else if (isRef(obj)) { + } else if (obj instanceof Ref) { // The spec says Prev must not be a reference, i.e. "/Prev NNN" // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R" this.startXRefQueue.push(obj.num); @@ -744,15 +734,30 @@ class XRef { this._cacheMap.set(num, xrefEntry); return xrefEntry; } + // Prevent circular references, in corrupt PDF documents, from hanging the + // worker-thread. This relies, implicitly, on the parsing being synchronous. + if (this._pendingRefs.has(ref)) { + this._pendingRefs.remove(ref); - if (xrefEntry.uncompressed) { - xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); - } else { - xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + warn(`Ignoring circular reference: ${ref}.`); + return CIRCULAR_REF; } - if (isDict(xrefEntry)) { + this._pendingRefs.put(ref); + + try { + if (xrefEntry.uncompressed) { + xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); + } else { + xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + } + this._pendingRefs.remove(ref); + } catch (ex) { + this._pendingRefs.remove(ref); + throw ex; + } + if (xrefEntry instanceof Dict) { xrefEntry.objId = ref.toString(); - } else if (isStream(xrefEntry)) { + } else if (xrefEntry instanceof BaseStream) { xrefEntry.dict.objId = ref.toString(); } return xrefEntry; @@ -794,7 +799,7 @@ class XRef { } else { xrefEntry = parser.getObj(); } - if (!isStream(xrefEntry)) { + if (!(xrefEntry instanceof BaseStream)) { if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") @@ -812,7 +817,7 @@ class XRef { fetchCompressed(ref, xrefEntry, suppressEncryption = false) { const tableOffset = xrefEntry.offset; const stream = this.fetch(Ref.get(tableOffset, 0)); - if (!isStream(stream)) { + if (!(stream instanceof BaseStream)) { throw new FormatError("bad ObjStm stream"); } const first = stream.dict.get("First"); @@ -863,7 +868,7 @@ class XRef { const obj = parser.getObj(); entries[i] = obj; - if (isStream(obj)) { + if (obj instanceof BaseStream) { continue; } const num = nums[i], diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index c4fcf9d30..d8681a1e7 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -492,3 +492,5 @@ !xfa_issue14315.pdf !poppler-67295-0.pdf !poppler-85140-0.pdf +!poppler-91414-0-53.pdf +!poppler-91414-0-54.pdf diff --git a/test/pdfs/poppler-91414-0-53.pdf b/test/pdfs/poppler-91414-0-53.pdf new file mode 100644 index 000000000..3d9305e76 --- /dev/null +++ b/test/pdfs/poppler-91414-0-53.pdf @@ -0,0 +1,70 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 0 R + /Kids [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +>> +endobj +4 0 obj +<< /Length 43 >> +stream +BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +endobj +6 0 obj +<< /Length 6 0 R >> +stream +2 +endstream +endobj +7 0 obj +<<>> +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +0000000446 00000 n +0000000500 00000 n +trailer +<< + /Size 8 + /Root 1 0 R + /Info 7 0 R +>> +startxref +520 +%%EOF \ No newline at end of file diff --git a/test/pdfs/poppler-91414-0-54.pdf b/test/pdfs/poppler-91414-0-54.pdf new file mode 100644 index 000000000..c6ef3a691 --- /dev/null +++ b/test/pdfs/poppler-91414-0-54.pdf @@ -0,0 +1,77 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 0 R + /Kids [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +>> +endobj +4 0 obj +<< /Length 43 >> +stream +BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +endobj +6 0 obj +<< /Length 7 0 R >> +stream + foobar +endstream +endobj +7 0 obj +<< /Length 6 0 R >> +stream + foobar +endstream +endobj +8 0 obj +<<>> +endobj +xref +0 9 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +0000000446 00000 n +0000000506 00000 n +0000000566 00000 n +trailer +<< + /Size 9 + /Root 1 0 R + /Info 8 0 R +>> +startxref +586 +%%EOF \ No newline at end of file diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index c8d98acf3..6b8c1c4f2 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -511,6 +511,40 @@ describe("api", function () { await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); }); + + it("creates pdf doc from PDF files, with circular references", async function () { + const loadingTask1 = getDocument( + buildGetDocumentParams("poppler-91414-0-53.pdf") + ); + const loadingTask2 = getDocument( + buildGetDocumentParams("poppler-91414-0-54.pdf") + ); + expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); + expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument1 = await loadingTask1.promise; + const pdfDocument2 = await loadingTask2.promise; + + expect(pdfDocument1.numPages).toEqual(1); + expect(pdfDocument2.numPages).toEqual(1); + + const pageA = await pdfDocument1.getPage(1); + const pageB = await pdfDocument2.getPage(1); + + expect(pageA instanceof PDFPageProxy).toEqual(true); + expect(pageB instanceof PDFPageProxy).toEqual(true); + + for (const opList of [ + await pageA.getOperatorList(), + await pageB.getOperatorList(), + ]) { + expect(opList.fnArray.length).toBeGreaterThan(5); + expect(opList.argsArray.length).toBeGreaterThan(5); + expect(opList.lastChunk).toEqual(true); + } + + await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); + }); }); describe("PDFWorker", function () {