mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-25 17:48:07 +02:00
Fallback to finding the first "obj" occurrence, when the trailer-dictionary is incomplete (issue 15590)
Note that the "trailer"-case is already a fallback, since normally we're able to use the "xref"-operator even in corrupt documents. However, when a "trailer"-operator is found we still expect "startxref" to exist and be usable in order to advance the stream position. When that's not the case, as happens in the referenced issue, we use a simple fallback to find the first "obj" occurrence instead. This *partially* fixes issue 15590, since without this patch we fail to find any objects at all during `XRef.indexObjects`. However, note that the PDF document is still corrupt and won't render since there's no actual /Pages-dictionary and the /Root-entry simply points to the /OpenAction-dictionary instead.
This commit is contained in:
parent
2ae90f9615
commit
2516ffa78e
4 changed files with 75 additions and 2 deletions
|
@ -503,7 +503,7 @@ class XRef {
|
|||
// Find the next "obj" string, rather than "endobj", to ensure that
|
||||
// we won't skip over a new 'obj' operator in corrupt files where
|
||||
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
|
||||
while (startPos < buffer.length) {
|
||||
while (startPos < length) {
|
||||
const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
|
||||
contentLength = endPos - position;
|
||||
|
||||
|
@ -545,7 +545,29 @@ class XRef {
|
|||
(token.length === 7 || /\s/.test(token[7]))
|
||||
) {
|
||||
trailers.push(position);
|
||||
position += skipUntil(buffer, position, startxrefBytes);
|
||||
|
||||
const contentLength = skipUntil(buffer, position, startxrefBytes);
|
||||
// Attempt to handle (some) corrupt documents, where no 'startxref'
|
||||
// operators are present (fixes issue15590.pdf).
|
||||
if (position + contentLength >= length) {
|
||||
const endPos = position + skipUntil(buffer, position, objBytes) + 4;
|
||||
|
||||
const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, position);
|
||||
const tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
|
||||
|
||||
// Find the first "obj" occurrence after the 'trailer' operator.
|
||||
const objToken = nestedObjRegExp.exec(tokenStr);
|
||||
|
||||
if (objToken && objToken[1]) {
|
||||
warn(
|
||||
'indexObjects: Found first "obj" after "trailer", ' +
|
||||
'caused by missing "startxref" -- trying to recover.'
|
||||
);
|
||||
position = endPos - objToken[1].length;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
position += contentLength;
|
||||
} else {
|
||||
position += token.length + 1;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue