1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-25 09:38:06 +02:00

Attempt to detect inline images which contain "EI" sequence in the actual image data (issue 11124)

This should reduce the possibility of accidentally truncating some inline images, while *not* causing the "EI" detection to become significantly slower.[1]
There's obviously a possibility that these added checks are not sufficient to catch *every* single case of "EI" sequences within the actual inline image data, but without specific test-cases I decided against over-engineering the solution here.

*Please note:* The interpolation issues are somewhat orthogonal to the main issue here, which is the truncated image, and it's already tracked elsewhere.

---
[1] I've looked at the issue a few times, and this is the first approach that I was able to come up with that didn't cause *unacceptable* performance regressions in e.g. issue 2618.
This commit is contained in:
Jonas Jenwald 2020-06-26 12:36:28 +02:00
parent 276d917b7c
commit 28d2ada59c
4 changed files with 85 additions and 3 deletions

View file

@ -203,10 +203,11 @@ class Parser {
I = 0x49,
SPACE = 0x20,
LF = 0xa,
CR = 0xd;
const n = 10,
CR = 0xd,
NUL = 0x0;
const startPos = stream.pos;
const lexer = this.lexer,
startPos = stream.pos,
n = 10;
let state = 0,
ch,
maybeEIPos;
@ -243,6 +244,25 @@ class Parser {
break;
}
}
if (state !== 2) {
continue;
}
// Check that the "EI" sequence isn't part of the image data, since
// that would cause the image to be truncated (fixes issue11124.pdf).
if (lexer.knownCommands) {
const nextObj = lexer.peekObj();
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
// Not a valid command, i.e. the inline image data *itself*
// contains an "EI" sequence. Resetting the state.
state = 0;
}
} else {
warn(
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
);
}
if (state === 2) {
break; // Finished!
}
@ -1276,6 +1296,28 @@ class Lexer {
return Cmd.get(str);
}
peekObj() {
const streamPos = this.stream.pos,
currentChar = this.currentChar,
beginInlineImagePos = this.beginInlineImagePos;
let nextObj;
try {
nextObj = this.getObj();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`peekObj: ${ex}`);
}
// Ensure that we reset *all* relevant `Lexer`-instance state.
this.stream.pos = streamPos;
this.currentChar = currentChar;
this.beginInlineImagePos = beginInlineImagePos;
return nextObj;
}
skipToNextLine() {
let ch = this.currentChar;
while (ch >= 0) {