From 3adbba55b2c6c24ff94dfa6d53c9655ca01eb5f7 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 9 Mar 2020 13:24:10 +0100 Subject: [PATCH] Limit the number of warning messages printed by any one `Lexer.getHexString` invocation *This patch fixes something that's annoyed me every now and then over the years, when debugging/fixing corrupt PDF documents.* For corrupt PDF documents where `Lexer.getHexString` encounters invalid characters, there's very rarely just a handful of them. In practice it's not uncommon for there to be many hundreds, or even many thousands, invalid hex characters found. Not only is the resulting console warning spam utterly useless in these cases, there's often enough of it that performance may even suffer; hence this patch which limits the amount of messages that any one `Lexer.getHexString` invocation may print. --- src/core/parser.js | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/core/parser.js b/src/core/parser.js index bd92cf4ab..1a64455e1 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -842,6 +842,7 @@ class Lexer { // other commands or literals as a prefix. The knowCommands is optional. this.knownCommands = knownCommands; + this._hexStringNumWarn = 0; this.beginInlineImagePos = -1; } @@ -1099,12 +1100,32 @@ class Lexer { return Name.get(strBuf.join("")); } + /** + * @private + */ + _hexStringWarn(ch) { + const MAX_HEX_STRING_NUM_WARN = 5; + + if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) { + warn("getHexString - ignoring additional invalid characters."); + return; + } + if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) { + // Limit the number of warning messages printed for a `this.getHexString` + // invocation, since corrupt PDF documents may otherwise spam the console + // enough to affect general performance negatively. + return; + } + warn(`getHexString - ignoring invalid character: ${ch}`); + } + getHexString() { const strBuf = this.strBuf; strBuf.length = 0; let ch = this.currentChar; let isFirstHex = true; let firstDigit, secondDigit; + this._hexStringNumWarn = 0; while (true) { if (ch < 0) { @@ -1120,14 +1141,14 @@ class Lexer { if (isFirstHex) { firstDigit = toHexDigit(ch); if (firstDigit === -1) { - warn(`Ignoring invalid character "${ch}" in hex string`); + this._hexStringWarn(ch); ch = this.nextChar(); continue; } } else { secondDigit = toHexDigit(ch); if (secondDigit === -1) { - warn(`Ignoring invalid character "${ch}" in hex string`); + this._hexStringWarn(ch); ch = this.nextChar(); continue; }