From 65e6ea2cb257cc06fee637532be27e0ac89fc45b Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 9 Mar 2020 11:37:33 +0100 Subject: [PATCH 1/2] Prevent lookup errors in `PartialEvaluator.hasBlendModes` from breaking all parsing/rendering of a page (issue 11678) The PDF document in question is *corrupt*, since it contains an XObject with a truncated dictionary and where the stream contents start without a "stream" operator. --- src/core/evaluator.js | 46 ++++++++++++++++++++++++++++++++--- test/pdfs/issue11678.pdf.link | 1 + test/test_manifest.json | 9 +++++++ 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue11678.pdf.link diff --git a/src/core/evaluator.js b/src/core/evaluator.js index cdac17bf9..87cf0f70c 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -61,6 +61,7 @@ import { WinAnsiEncoding, ZapfDingbatsEncoding, } from "./encodings.js"; +import { getLookupTableFactory, MissingDataException } from "./core_utils.js"; import { getNormalizedUnicodes, getUnicodeForGlyph, @@ -77,7 +78,6 @@ import { bidi } from "./bidi.js"; import { ColorSpace } from "./colorspace.js"; import { DecodeStream } from "./stream.js"; import { getGlyphsUnicode } from "./glyphlist.js"; -import { getLookupTableFactory } from "./core_utils.js"; import { getMetrics } from "./metrics.js"; import { isPDFFunction } from "./function.js"; import { JpegStream } from "./jpeg_stream.js"; @@ -266,7 +266,27 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (processed[graphicState.toString()]) { continue; // The ExtGState has already been processed. } - graphicState = xref.fetch(graphicState); + try { + graphicState = xref.fetch(graphicState); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + if (this.options.ignoreErrors) { + if (graphicState instanceof Ref) { + // Avoid parsing a corrupt ExtGState more than once. + processed[graphicState.toString()] = true; + } + // Error(s) in the ExtGState -- sending unsupported feature + // notification and allow parsing/rendering to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.unknown, + }); + warn(`hasBlendModes - ignoring ExtGState: "${ex}".`); + continue; + } + throw ex; + } } if (!(graphicState instanceof Dict)) { continue; @@ -308,7 +328,27 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // time for badly generated PDF files (fixes issue6961.pdf). continue; } - xObject = xref.fetch(xObject); + try { + xObject = xref.fetch(xObject); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + if (this.options.ignoreErrors) { + if (xObject instanceof Ref) { + // Avoid parsing a corrupt XObject more than once. + processed[xObject.toString()] = true; + } + // Error(s) in the XObject -- sending unsupported feature + // notification and allow parsing/rendering to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.unknown, + }); + warn(`hasBlendModes - ignoring XObject: "${ex}".`); + continue; + } + throw ex; + } } if (!isStream(xObject)) { continue; diff --git a/test/pdfs/issue11678.pdf.link b/test/pdfs/issue11678.pdf.link new file mode 100644 index 000000000..c768d08ad --- /dev/null +++ b/test/pdfs/issue11678.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/4304559/default.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index b94ab450c..bc4589531 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3064,6 +3064,15 @@ "link": false, "type": "eq" }, + { "id": "issue11678", + "file": "pdfs/issue11678.pdf", + "md5": "e2efadeb91932f4c21e4fc682cce7de9", + "rounds": 1, + "link": true, + "firstPage": 2, + "lastPage": 2, + "type": "eq" + }, { "id": "issue4890", "file": "pdfs/issue4890.pdf", "md5": "1666feb4cd26318c2bdbea6a175dce87", From 3adbba55b2c6c24ff94dfa6d53c9655ca01eb5f7 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 9 Mar 2020 13:24:10 +0100 Subject: [PATCH 2/2] Limit the number of warning messages printed by any one `Lexer.getHexString` invocation *This patch fixes something that's annoyed me every now and then over the years, when debugging/fixing corrupt PDF documents.* For corrupt PDF documents where `Lexer.getHexString` encounters invalid characters, there's very rarely just a handful of them. In practice it's not uncommon for there to be many hundreds, or even many thousands, invalid hex characters found. Not only is the resulting console warning spam utterly useless in these cases, there's often enough of it that performance may even suffer; hence this patch which limits the amount of messages that any one `Lexer.getHexString` invocation may print. --- src/core/parser.js | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/core/parser.js b/src/core/parser.js index bd92cf4ab..1a64455e1 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -842,6 +842,7 @@ class Lexer { // other commands or literals as a prefix. The knowCommands is optional. this.knownCommands = knownCommands; + this._hexStringNumWarn = 0; this.beginInlineImagePos = -1; } @@ -1099,12 +1100,32 @@ class Lexer { return Name.get(strBuf.join("")); } + /** + * @private + */ + _hexStringWarn(ch) { + const MAX_HEX_STRING_NUM_WARN = 5; + + if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) { + warn("getHexString - ignoring additional invalid characters."); + return; + } + if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) { + // Limit the number of warning messages printed for a `this.getHexString` + // invocation, since corrupt PDF documents may otherwise spam the console + // enough to affect general performance negatively. + return; + } + warn(`getHexString - ignoring invalid character: ${ch}`); + } + getHexString() { const strBuf = this.strBuf; strBuf.length = 0; let ch = this.currentChar; let isFirstHex = true; let firstDigit, secondDigit; + this._hexStringNumWarn = 0; while (true) { if (ch < 0) { @@ -1120,14 +1141,14 @@ class Lexer { if (isFirstHex) { firstDigit = toHexDigit(ch); if (firstDigit === -1) { - warn(`Ignoring invalid character "${ch}" in hex string`); + this._hexStringWarn(ch); ch = this.nextChar(); continue; } } else { secondDigit = toHexDigit(ch); if (secondDigit === -1) { - warn(`Ignoring invalid character "${ch}" in hex string`); + this._hexStringWarn(ch); ch = this.nextChar(); continue; }