From e92a929a5817a9b30531242beaa9ce2a1d37138f Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 5 Nov 2024 16:42:31 +0100 Subject: [PATCH] Try to improve handling of missing trailer dictionaries in `XRef.indexObjects` (issue 18986) The problem with the referenced PDF document has nothing to do with invalid dates, as the issue seems to suggest, but rather with the fact that it has neither an XRef table nor a trailer dictionary. Given that crucial parts of the internal document structure is missing, you might argue that it's not really a PDF document. In an attempt to support this kind of corruption, we'll simply iterate through all (previously found) XRef entries and pick one that *might* be a valid /Root dictionary. There's obviously no guarantee that this works, and it might not be fast in larger PDF documents, but at least it cannot be any worse than *immediately* throwing `InvalidPDFException` as we previously did here. *Please note:* I'm totally fine with this patch being rejected, since it's somewhat questionable if we should actually attempt to support "PDF documents" with this level of corruption. --- src/core/xref.js | 25 +++++++++++++++++++++++++ test/pdfs/.gitignore | 1 + test/pdfs/issue18986.pdf | Bin 0 -> 862 bytes test/test_manifest.json | 7 +++++++ test/unit/api_spec.js | 2 +- 5 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/issue18986.pdf diff --git a/src/core/xref.js b/src/core/xref.js index 313f18a71..75e5d1626 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -680,6 +680,31 @@ class XRef { if (this.topDict) { return this.topDict; } + + // When no trailer dictionary candidate exists, try picking the first + // dictionary that contains a /Root entry (fixes issue18986.pdf). + if (!trailerDicts.length) { + for (const [num, entry] of this.entries.entries()) { + if (!entry) { + continue; + } + const ref = Ref.get(num, entry.gen); + let obj; + + try { + obj = this.fetch(ref); + } catch { + continue; + } + if (obj instanceof BaseStream) { + obj = obj.dict; + } + if (obj instanceof Dict && obj.has("Root")) { + return obj; + } + } + } + // nothing helps throw new InvalidPDFException("Invalid PDF structure."); } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index b10be1106..4bf8b6554 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -156,6 +156,7 @@ !bug1020858.pdf !prefilled_f1040.pdf !bug1050040.pdf +!issue18986.pdf !bug1200096.pdf !bug1068432.pdf !issue12295.pdf diff --git a/test/pdfs/issue18986.pdf b/test/pdfs/issue18986.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f23047bf73c0bc81aa6d8de0c8e8ca8af43ba985 GIT binary patch literal 862 zcmZuv;cnV65Ki}Rc?$ojnrP~r&_ZECNF*U~D6A;3tr|j8N)DO|V&pg)?A@k4(q3Wb zq$Ox=tmw{npMCdzzVnIaU)mq9bemX&KRoH(E}b2AKqv7Ua_aITlf>cg^B9)b)6nTww8~?>h*FdAl~Zg+R7y_6;#Flo1zhyu>@k# zF(hg1@crCEShLe4?d!&Z&P2L>6?6sPy$ViZ0g9b!kw%d7LM$OCib90UGl|)psFqI{ zv~YXck{io-!`G!=qxirJN$)an`KH)rWpKY^?(<(Y?k4eyiPxv?x8SNUC9)`ewXm=p`V3HYD@76O9V^anok{Z|k<-e#C|mdXtn zY&ISdlpq0-W^0nbmYcL+$VV;^Qx-41iQq}G1QnbX4^Oa^=!FTYldjS-d<%!4TqL<+ z)~wn!tB7++lyDIZvlSZJ`p37Lsy?~Y8vKspN`QXm~2PPTwMgRZ+ literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 7cfd0356e..65cff4331 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -4727,6 +4727,13 @@ "link": false, "type": "eq" }, + { + "id": "issue18986", + "file": "pdfs/issue18986.pdf", + "md5": "e147084fabd9677366f6ae3586dd311b", + "rounds": 1, + "type": "load" + }, { "id": "issue6652", "file": "pdfs/issue6652.pdf", diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index bb78c9c32..62380c6d3 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -623,7 +623,7 @@ describe("api", function () { expect(false).toEqual(true); } catch (reason) { expect(reason instanceof InvalidPDFException).toEqual(true); - expect(reason.message).toEqual("Invalid PDF structure."); + expect(reason.message).toEqual("Invalid Root reference."); } await loadingTask.destroy();