1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Support corrupt PDF documents that contain "endsteam" commands (issue 18122)

This patch also re-factors the findStreamLength-helper to avoid even more code duplication.
This commit is contained in:
Jonas Jenwald 2024-05-21 12:33:19 +02:00
parent 9ee7c07b83
commit 57014d0d13
3 changed files with 64 additions and 43 deletions

View file

@ -609,12 +609,27 @@ class Parser {
return imageStream;
}
_findStreamLength(startPos, signature) {
#findStreamLength(startPos) {
const { stream } = this.lexer;
stream.pos = startPos;
const SCAN_BLOCK_LENGTH = 2048;
const signatureLength = signature.length;
const signatureLength = "endstream".length;
const END_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64]);
const endLength = END_SIGNATURE.length;
// Ideally we'd directly search for "endstream", however there are corrupt
// PDF documents where the command is incomplete; hence we search for:
// 1. The normal case.
// 2. The misspelled case (fixes issue18122.pdf).
// 3. The truncated case (fixes issue10004.pdf).
const PARTIAL_SIGNATURE = [
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]), // "stream"
new Uint8Array([0x73, 0x74, 0x65, 0x61, 0x6d]), // "steam",
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61]), // "strea"
];
const normalLength = signatureLength - endLength;
while (stream.pos < stream.end) {
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
@ -626,13 +641,43 @@ class Parser {
let pos = 0;
while (pos < scanLength) {
let j = 0;
while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
while (j < endLength && scanBytes[pos + j] === END_SIGNATURE[j]) {
j++;
}
if (j >= signatureLength) {
// `signature` found.
stream.pos += pos;
return stream.pos - startPos;
if (j >= endLength) {
// "end" found, find the complete command.
let found = false;
for (const part of PARTIAL_SIGNATURE) {
const partLen = part.length;
let k = 0;
while (k < partLen && scanBytes[pos + j + k] === part[k]) {
k++;
}
if (k >= normalLength) {
// Found "endstream" command.
found = true;
break;
}
if (k >= partLen) {
// Found "endsteam" or "endstea" command.
// Ensure that the byte immediately following the corrupt
// endstream command is a space, to prevent false positives.
const lastByte = scanBytes[pos + j + k];
if (isWhiteSpace(lastByte)) {
info(
`Found "${bytesToString([...END_SIGNATURE, ...part])}" when ` +
"searching for endstream command."
);
found = true;
}
break;
}
}
if (found) {
stream.pos += pos;
return stream.pos - startPos;
}
}
pos++;
}
@ -665,43 +710,9 @@ class Parser {
this.shift(); // 'stream'
} else {
// Bad stream length, scanning for endstream command.
const ENDSTREAM_SIGNATURE = new Uint8Array([
0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d,
]);
let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
const actualLength = this.#findStreamLength(startPos);
if (actualLength < 0) {
// Only allow limited truncation of the endstream signature,
// to prevent false positives.
const MAX_TRUNCATION = 1;
// Check if the PDF generator included truncated endstream commands,
// such as e.g. "endstrea" (fixes issue10004.pdf).
for (let i = 1; i <= MAX_TRUNCATION; i++) {
const end = ENDSTREAM_SIGNATURE.length - i;
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
const maybeLength = this._findStreamLength(
startPos,
TRUNCATED_SIGNATURE
);
if (maybeLength >= 0) {
// Ensure that the byte immediately following the truncated
// endstream command is a space, to prevent false positives.
const lastByte = stream.peekBytes(end + 1)[end];
if (!isWhiteSpace(lastByte)) {
break;
}
info(
`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
"searching for endstream command."
);
actualLength = maybeLength;
break;
}
}
if (actualLength < 0) {
throw new FormatError("Missing endstream command.");
}
throw new FormatError("Missing endstream command.");
}
length = actualLength;