From db7cf40a307ff2c5cd90fc23258ffe7925659d8e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 18 Feb 2025 17:53:36 +0100 Subject: [PATCH] Don't cache free/missing XRef entries (issue 19510) During the XRef stream parsing we're attempting to lookup an entry that hasn't yet been found, since parsing is currently running, and given that we'd also cache free/missing XRef entries we'd then return an incorrect value during normal PDF parsing. The simplest solution here is to just not cache free/missing XRef entries, since a properly generated PDF document shouldn't be trying to access objects it doesn't contain. Furthermore, the amount of "extra" parsing now needed for such XRef entries shouldn't be significant enough to be an issue. --- src/core/xref.js | 1 - test/pdfs/issue19510.pdf.link | 1 + test/test_manifest.json | 8 ++++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/issue19510.pdf.link diff --git a/src/core/xref.js b/src/core/xref.js index 669af0ed8..4a02f5ed8 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -842,7 +842,6 @@ class XRef { if (xrefEntry === null) { // The referenced entry can be free. - this._cacheMap.set(num, xrefEntry); return xrefEntry; } // Prevent circular references, in corrupt PDF documents, from hanging the diff --git a/test/pdfs/issue19510.pdf.link b/test/pdfs/issue19510.pdf.link new file mode 100644 index 000000000..91015fdd2 --- /dev/null +++ b/test/pdfs/issue19510.pdf.link @@ -0,0 +1 @@ +https://github.com/user-attachments/files/18841919/geht_nicht_02.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index d4deda407..6549e49e1 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -5626,6 +5626,14 @@ "type": "eq", "lastPage": 1 }, + { + "id": "issue19510", + "file": "pdfs/issue19510.pdf", + "md5": "3ff133f633cea3e2f13f08f8c3414cc6", + "link": true, + "rounds": 1, + "type": "eq" + }, { "id": "issue11768", "file": "pdfs/issue11768_reduced.pdf",