mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Support corrupt PDF documents that contain "endsteam" commands (issue 18122)
This patch also re-factors the findStreamLength-helper to avoid even more code duplication.
This commit is contained in:
parent
9ee7c07b83
commit
57014d0d13
3 changed files with 64 additions and 43 deletions
|
@ -609,12 +609,27 @@ class Parser {
|
|||
return imageStream;
|
||||
}
|
||||
|
||||
_findStreamLength(startPos, signature) {
|
||||
#findStreamLength(startPos) {
|
||||
const { stream } = this.lexer;
|
||||
stream.pos = startPos;
|
||||
|
||||
const SCAN_BLOCK_LENGTH = 2048;
|
||||
const signatureLength = signature.length;
|
||||
const signatureLength = "endstream".length;
|
||||
|
||||
const END_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64]);
|
||||
const endLength = END_SIGNATURE.length;
|
||||
|
||||
// Ideally we'd directly search for "endstream", however there are corrupt
|
||||
// PDF documents where the command is incomplete; hence we search for:
|
||||
// 1. The normal case.
|
||||
// 2. The misspelled case (fixes issue18122.pdf).
|
||||
// 3. The truncated case (fixes issue10004.pdf).
|
||||
const PARTIAL_SIGNATURE = [
|
||||
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]), // "stream"
|
||||
new Uint8Array([0x73, 0x74, 0x65, 0x61, 0x6d]), // "steam",
|
||||
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61]), // "strea"
|
||||
];
|
||||
const normalLength = signatureLength - endLength;
|
||||
|
||||
while (stream.pos < stream.end) {
|
||||
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
|
||||
|
@ -626,13 +641,43 @@ class Parser {
|
|||
let pos = 0;
|
||||
while (pos < scanLength) {
|
||||
let j = 0;
|
||||
while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
|
||||
while (j < endLength && scanBytes[pos + j] === END_SIGNATURE[j]) {
|
||||
j++;
|
||||
}
|
||||
if (j >= signatureLength) {
|
||||
// `signature` found.
|
||||
stream.pos += pos;
|
||||
return stream.pos - startPos;
|
||||
if (j >= endLength) {
|
||||
// "end" found, find the complete command.
|
||||
let found = false;
|
||||
for (const part of PARTIAL_SIGNATURE) {
|
||||
const partLen = part.length;
|
||||
let k = 0;
|
||||
while (k < partLen && scanBytes[pos + j + k] === part[k]) {
|
||||
k++;
|
||||
}
|
||||
if (k >= normalLength) {
|
||||
// Found "endstream" command.
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if (k >= partLen) {
|
||||
// Found "endsteam" or "endstea" command.
|
||||
// Ensure that the byte immediately following the corrupt
|
||||
// endstream command is a space, to prevent false positives.
|
||||
const lastByte = scanBytes[pos + j + k];
|
||||
if (isWhiteSpace(lastByte)) {
|
||||
info(
|
||||
`Found "${bytesToString([...END_SIGNATURE, ...part])}" when ` +
|
||||
"searching for endstream command."
|
||||
);
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
stream.pos += pos;
|
||||
return stream.pos - startPos;
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
|
@ -665,43 +710,9 @@ class Parser {
|
|||
this.shift(); // 'stream'
|
||||
} else {
|
||||
// Bad stream length, scanning for endstream command.
|
||||
const ENDSTREAM_SIGNATURE = new Uint8Array([
|
||||
0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d,
|
||||
]);
|
||||
let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
|
||||
const actualLength = this.#findStreamLength(startPos);
|
||||
if (actualLength < 0) {
|
||||
// Only allow limited truncation of the endstream signature,
|
||||
// to prevent false positives.
|
||||
const MAX_TRUNCATION = 1;
|
||||
// Check if the PDF generator included truncated endstream commands,
|
||||
// such as e.g. "endstrea" (fixes issue10004.pdf).
|
||||
for (let i = 1; i <= MAX_TRUNCATION; i++) {
|
||||
const end = ENDSTREAM_SIGNATURE.length - i;
|
||||
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
|
||||
|
||||
const maybeLength = this._findStreamLength(
|
||||
startPos,
|
||||
TRUNCATED_SIGNATURE
|
||||
);
|
||||
if (maybeLength >= 0) {
|
||||
// Ensure that the byte immediately following the truncated
|
||||
// endstream command is a space, to prevent false positives.
|
||||
const lastByte = stream.peekBytes(end + 1)[end];
|
||||
if (!isWhiteSpace(lastByte)) {
|
||||
break;
|
||||
}
|
||||
info(
|
||||
`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
|
||||
"searching for endstream command."
|
||||
);
|
||||
actualLength = maybeLength;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (actualLength < 0) {
|
||||
throw new FormatError("Missing endstream command.");
|
||||
}
|
||||
throw new FormatError("Missing endstream command.");
|
||||
}
|
||||
length = actualLength;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue