mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-25 09:38:06 +02:00
Attempt to detect inline images which contain "EI" sequence in the actual image data (issue 11124)
This should reduce the possibility of accidentally truncating some inline images, while *not* causing the "EI" detection to become significantly slower.[1] There's obviously a possibility that these added checks are not sufficient to catch *every* single case of "EI" sequences within the actual inline image data, but without specific test-cases I decided against over-engineering the solution here. *Please note:* The interpolation issues are somewhat orthogonal to the main issue here, which is the truncated image, and it's already tracked elsewhere. --- [1] I've looked at the issue a few times, and this is the first approach that I was able to come up with that didn't cause *unacceptable* performance regressions in e.g. issue 2618.
This commit is contained in:
parent
276d917b7c
commit
28d2ada59c
4 changed files with 85 additions and 3 deletions
|
@ -203,10 +203,11 @@ class Parser {
|
|||
I = 0x49,
|
||||
SPACE = 0x20,
|
||||
LF = 0xa,
|
||||
CR = 0xd;
|
||||
const n = 10,
|
||||
CR = 0xd,
|
||||
NUL = 0x0;
|
||||
const startPos = stream.pos;
|
||||
const lexer = this.lexer,
|
||||
startPos = stream.pos,
|
||||
n = 10;
|
||||
let state = 0,
|
||||
ch,
|
||||
maybeEIPos;
|
||||
|
@ -243,6 +244,25 @@ class Parser {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (state !== 2) {
|
||||
continue;
|
||||
}
|
||||
// Check that the "EI" sequence isn't part of the image data, since
|
||||
// that would cause the image to be truncated (fixes issue11124.pdf).
|
||||
if (lexer.knownCommands) {
|
||||
const nextObj = lexer.peekObj();
|
||||
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
|
||||
// Not a valid command, i.e. the inline image data *itself*
|
||||
// contains an "EI" sequence. Resetting the state.
|
||||
state = 0;
|
||||
}
|
||||
} else {
|
||||
warn(
|
||||
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
|
||||
);
|
||||
}
|
||||
|
||||
if (state === 2) {
|
||||
break; // Finished!
|
||||
}
|
||||
|
@ -1276,6 +1296,28 @@ class Lexer {
|
|||
return Cmd.get(str);
|
||||
}
|
||||
|
||||
peekObj() {
|
||||
const streamPos = this.stream.pos,
|
||||
currentChar = this.currentChar,
|
||||
beginInlineImagePos = this.beginInlineImagePos;
|
||||
|
||||
let nextObj;
|
||||
try {
|
||||
nextObj = this.getObj();
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
warn(`peekObj: ${ex}`);
|
||||
}
|
||||
// Ensure that we reset *all* relevant `Lexer`-instance state.
|
||||
this.stream.pos = streamPos;
|
||||
this.currentChar = currentChar;
|
||||
this.beginInlineImagePos = beginInlineImagePos;
|
||||
|
||||
return nextObj;
|
||||
}
|
||||
|
||||
skipToNextLine() {
|
||||
let ch = this.currentChar;
|
||||
while (ch >= 0) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue