1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-25 09:38:06 +02:00

When parsing corrupt documents, avoid inserting obviously broken data in the XRef-table (issue 13783)

In cases where even the very *first* attempt at reading from an object will throw, simply ignoring such objects will help improve rendering of *some* corrupt documents.
Note that this will lead to more parsing in some cases, but considering that this only applies to *corrupt* documents that shouldn't be a big deal.
This commit is contained in:
Jonas Jenwald 2021-07-23 17:37:55 +02:00
parent 51f0a81085
commit b82c802dff
5 changed files with 51 additions and 10 deletions

View file

@ -33,6 +33,7 @@ import {
import { Lexer, Parser } from "./parser.js";
import {
MissingDataException,
ParserEOFException,
XRefEntryException,
XRefParseException,
} from "./core_utils.js";
@ -453,15 +454,38 @@ class XRef {
} else if ((m = objRegExp.exec(token))) {
const num = m[1] | 0,
gen = m[2] | 0;
if (!this.entries[num] || this.entries[num].gen === gen) {
let contentLength,
startPos = position + token.length,
updateEntries = false;
if (!this.entries[num]) {
updateEntries = true;
} else if (this.entries[num].gen === gen) {
// Before overwriting an existing entry, ensure that the new one won't
// cause *immediate* errors when it's accessed (fixes issue13783.pdf).
try {
const parser = new Parser({
lexer: new Lexer(stream.makeSubStream(startPos)),
});
parser.getObj();
updateEntries = true;
} catch (ex) {
if (ex instanceof ParserEOFException) {
warn(`indexObjects -- checking object (${token}): "${ex}".`);
} else {
// The error may come from the `Parser`-instance being initialized
// without an `XRef`-instance (we don't have a usable one yet).
updateEntries = true;
}
}
}
if (updateEntries) {
this.entries[num] = {
offset: position - stream.start,
gen,
uncompressed: true,
};
}
let contentLength,
startPos = position + token.length;
// Find the next "obj" string, rather than "endobj", to ensure that
// we won't skip over a new 'obj' operator in corrupt files where