Always parse the entire startXRefQueue in XRef.readXRef (issue 15833)

Previously we'd abort all parsing if an Error was encountered, despite the fact that multiple `startXRefQueue`-entries may be available and that continued parsing could thus eventually be able to find usable data. Note that in the referenced PDF document the `startxref`-operator, at the end of the file, points to a position in the middle of an arbitrary `stream` which is why things break.
2025-04-22 16:18:08 +02:00 · 2022-12-15 13:35:39 +01:00 · 2022-12-15 13:35:39 +01:00 · 26135b0313
commit 26135b0313
parent 8587ce6afd
3 changed files with 19 additions and 12 deletions
--- a/src/core/xref.js
+++ b/src/core/xref.js
@ -665,8 +665,8 @@ class XRef {
    // circular dependency between tables (fixes bug1393476.pdf).
    const startXRefParsedCache = new Set();

-    try {
-      while (this.startXRefQueue.length) {
+    while (this.startXRefQueue.length) {
+      try {
        const startXRef = this.startXRefQueue[0];

        if (startXRefParsedCache.has(startXRef)) {
@ -734,20 +734,18 @@ class XRef {
          // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
          this.startXRefQueue.push(obj.num);
        }
-
-        this.startXRefQueue.shift();
+      } catch (e) {
+        if (e instanceof MissingDataException) {
+          throw e;
+        }
+        info("(while reading XRef): " + e);
      }
-
-      return this.topDict;
-    } catch (e) {
-      if (e instanceof MissingDataException) {
-        throw e;
-      }
-      info("(while reading XRef): " + e);
-
      this.startXRefQueue.shift();
    }

+    if (this.topDict) {
+      return this.topDict;
+    }
    if (recoveryMode) {
      return undefined;
    }