mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 22:58:07 +02:00
Merge pull request #18125 from Snuffleupagus/issue-18122
Support corrupt PDF documents that contain "endsteam" commands (issue 18122)
This commit is contained in:
commit
dce9c6d956
3 changed files with 64 additions and 43 deletions
|
@ -609,12 +609,27 @@ class Parser {
|
|||
return imageStream;
|
||||
}
|
||||
|
||||
_findStreamLength(startPos, signature) {
|
||||
#findStreamLength(startPos) {
|
||||
const { stream } = this.lexer;
|
||||
stream.pos = startPos;
|
||||
|
||||
const SCAN_BLOCK_LENGTH = 2048;
|
||||
const signatureLength = signature.length;
|
||||
const signatureLength = "endstream".length;
|
||||
|
||||
const END_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64]);
|
||||
const endLength = END_SIGNATURE.length;
|
||||
|
||||
// Ideally we'd directly search for "endstream", however there are corrupt
|
||||
// PDF documents where the command is incomplete; hence we search for:
|
||||
// 1. The normal case.
|
||||
// 2. The misspelled case (fixes issue18122.pdf).
|
||||
// 3. The truncated case (fixes issue10004.pdf).
|
||||
const PARTIAL_SIGNATURE = [
|
||||
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]), // "stream"
|
||||
new Uint8Array([0x73, 0x74, 0x65, 0x61, 0x6d]), // "steam",
|
||||
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61]), // "strea"
|
||||
];
|
||||
const normalLength = signatureLength - endLength;
|
||||
|
||||
while (stream.pos < stream.end) {
|
||||
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
|
||||
|
@ -626,13 +641,43 @@ class Parser {
|
|||
let pos = 0;
|
||||
while (pos < scanLength) {
|
||||
let j = 0;
|
||||
while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
|
||||
while (j < endLength && scanBytes[pos + j] === END_SIGNATURE[j]) {
|
||||
j++;
|
||||
}
|
||||
if (j >= signatureLength) {
|
||||
// `signature` found.
|
||||
stream.pos += pos;
|
||||
return stream.pos - startPos;
|
||||
if (j >= endLength) {
|
||||
// "end" found, find the complete command.
|
||||
let found = false;
|
||||
for (const part of PARTIAL_SIGNATURE) {
|
||||
const partLen = part.length;
|
||||
let k = 0;
|
||||
while (k < partLen && scanBytes[pos + j + k] === part[k]) {
|
||||
k++;
|
||||
}
|
||||
if (k >= normalLength) {
|
||||
// Found "endstream" command.
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if (k >= partLen) {
|
||||
// Found "endsteam" or "endstea" command.
|
||||
// Ensure that the byte immediately following the corrupt
|
||||
// endstream command is a space, to prevent false positives.
|
||||
const lastByte = scanBytes[pos + j + k];
|
||||
if (isWhiteSpace(lastByte)) {
|
||||
info(
|
||||
`Found "${bytesToString([...END_SIGNATURE, ...part])}" when ` +
|
||||
"searching for endstream command."
|
||||
);
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
stream.pos += pos;
|
||||
return stream.pos - startPos;
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
|
@ -665,43 +710,9 @@ class Parser {
|
|||
this.shift(); // 'stream'
|
||||
} else {
|
||||
// Bad stream length, scanning for endstream command.
|
||||
const ENDSTREAM_SIGNATURE = new Uint8Array([
|
||||
0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d,
|
||||
]);
|
||||
let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
|
||||
const actualLength = this.#findStreamLength(startPos);
|
||||
if (actualLength < 0) {
|
||||
// Only allow limited truncation of the endstream signature,
|
||||
// to prevent false positives.
|
||||
const MAX_TRUNCATION = 1;
|
||||
// Check if the PDF generator included truncated endstream commands,
|
||||
// such as e.g. "endstrea" (fixes issue10004.pdf).
|
||||
for (let i = 1; i <= MAX_TRUNCATION; i++) {
|
||||
const end = ENDSTREAM_SIGNATURE.length - i;
|
||||
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
|
||||
|
||||
const maybeLength = this._findStreamLength(
|
||||
startPos,
|
||||
TRUNCATED_SIGNATURE
|
||||
);
|
||||
if (maybeLength >= 0) {
|
||||
// Ensure that the byte immediately following the truncated
|
||||
// endstream command is a space, to prevent false positives.
|
||||
const lastByte = stream.peekBytes(end + 1)[end];
|
||||
if (!isWhiteSpace(lastByte)) {
|
||||
break;
|
||||
}
|
||||
info(
|
||||
`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
|
||||
"searching for endstream command."
|
||||
);
|
||||
actualLength = maybeLength;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (actualLength < 0) {
|
||||
throw new FormatError("Missing endstream command.");
|
||||
}
|
||||
throw new FormatError("Missing endstream command.");
|
||||
}
|
||||
length = actualLength;
|
||||
|
||||
|
|
1
test/pdfs/issue18122.pdf.link
Normal file
1
test/pdfs/issue18122.pdf.link
Normal file
|
@ -0,0 +1 @@
|
|||
https://github.com/mozilla/pdf.js/files/15385405/Factuur_1105490.pdf
|
|
@ -5420,6 +5420,15 @@
|
|||
"rounds": 1,
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"id": "issue18122",
|
||||
"file": "pdfs/issue18122.pdf",
|
||||
"md5": "b9be483e6bc0f35994f536d8f6a685c1",
|
||||
"rounds": 1,
|
||||
"link": true,
|
||||
"lastPage": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{
|
||||
"id": "issue4890",
|
||||
"file": "pdfs/issue4890.pdf",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue