1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Allow getOperatorList/getTextContent to skip errors when parsing broken XObjects (issue 8702, issue 8704)

This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects.

Fixes 8702.
Fixes 8704.
This commit is contained in:
Jonas Jenwald 2017-09-17 13:35:18 +02:00
parent b3f8411264
commit b1472cddbb
5 changed files with 138 additions and 86 deletions

View file

@ -948,52 +948,65 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
case OPS.paintXObject:
// eagerly compile XForm objects
var name = args[0].name;
if (!name) {
warn('XObject must be referred to by name.');
continue;
}
if (imageCache[name] !== undefined) {
if (name && imageCache[name] !== undefined) {
operatorList.addOp(imageCache[name].fn, imageCache[name].args);
args = null;
continue;
}
var xobj = xobjs.get(name);
if (xobj) {
next(new Promise(function(resolveXObject, rejectXObject) {
if (!name) {
throw new FormatError('XObject must be referred to by name.');
}
let xobj = xobjs.get(name);
if (!xobj) {
operatorList.addOp(fn, args);
resolveXObject();
return;
}
if (!isStream(xobj)) {
throw new FormatError('XObject should be a stream');
}
var type = xobj.dict.get('Subtype');
let type = xobj.dict.get('Subtype');
if (!isName(type)) {
throw new FormatError('XObject should have a Name subtype');
}
if (type.name === 'Form') {
stateManager.save();
next(self.buildFormXObject(resources, xobj, null,
operatorList, task,
stateManager.state.clone()).
then(function () {
self.buildFormXObject(resources, xobj, null, operatorList,
task, stateManager.state.clone()).
then(function() {
stateManager.restore();
}));
resolveXObject();
}, rejectXObject);
return;
} else if (type.name === 'Image') {
self.buildPaintImageXObject(resources, xobj, false,
operatorList, name, imageCache);
args = null;
continue;
operatorList, name, imageCache);
} else if (type.name === 'PS') {
// PostScript XObjects are unused when viewing documents.
// See section 4.7.1 of Adobe's PDF reference.
info('Ignored XObject subtype PS');
continue;
} else {
throw new FormatError(
`Unhandled XObject subtype ${type.name}`);
}
}
break;
resolveXObject();
}).catch(function(reason) {
if (self.options.ignoreErrors) {
// Error(s) in the XObject -- sending unsupported feature
// notification and allow rendering to continue.
self.handler.send('UnsupportedFeature',
{ featureId: UNSUPPORTED_FEATURES.unknown, });
warn(`getOperatorList - ignoring XObject: "${reason}".`);
return;
}
throw reason;
}));
return;
case OPS.setFont:
var fontSize = args[1];
// eagerly collect all fonts
@ -1666,73 +1679,93 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
var name = args[0].name;
if (name in skipEmptyXObjs) {
if (name && skipEmptyXObjs[name] !== undefined) {
break;
}
var xobj = xobjs.get(name);
if (!xobj) {
break;
}
if (!isStream(xobj)) {
throw new FormatError('XObject should be a stream');
}
var type = xobj.dict.get('Subtype');
if (!isName(type)) {
throw new FormatError('XObject should have a Name subtype');
}
if (type.name !== 'Form') {
skipEmptyXObjs[name] = true;
break;
}
// Use a new `StateManager` to prevent incorrect positioning of
// textItems *after* the Form XObject, since errors in the data
// can otherwise prevent `restore` operators from being executed.
// NOTE: This is only an issue when `options.ignoreErrors = true`.
var currentState = stateManager.state.clone();
var xObjStateManager = new StateManager(currentState);
var matrix = xobj.dict.getArray('Matrix');
if (Array.isArray(matrix) && matrix.length === 6) {
xObjStateManager.transform(matrix);
}
// Enqueue the `textContent` chunk before parsing the /Form
// XObject.
enqueueChunk();
let sinkWrapper = {
enqueueInvoked: false,
enqueue(chunk, size) {
this.enqueueInvoked = true;
sink.enqueue(chunk, size);
},
get desiredSize() {
return sink.desiredSize;
},
get ready() {
return sink.ready;
},
};
next(self.getTextContent({
stream: xobj,
task,
resources: xobj.dict.get('Resources') || resources,
stateManager: xObjStateManager,
normalizeWhitespace,
combineTextItems,
sink: sinkWrapper,
seenStyles,
}).then(function() {
if (!sinkWrapper.enqueueInvoked) {
skipEmptyXObjs[name] = true;
next(new Promise(function(resolveXObject, rejectXObject) {
if (!name) {
throw new FormatError('XObject must be referred to by name.');
}
let xobj = xobjs.get(name);
if (!xobj) {
resolveXObject();
return;
}
if (!isStream(xobj)) {
throw new FormatError('XObject should be a stream');
}
let type = xobj.dict.get('Subtype');
if (!isName(type)) {
throw new FormatError('XObject should have a Name subtype');
}
if (type.name !== 'Form') {
skipEmptyXObjs[name] = true;
resolveXObject();
return;
}
// Use a new `StateManager` to prevent incorrect positioning of
// textItems *after* the Form XObject, since errors in the data
// can otherwise prevent `restore` operators from executing.
// NOTE: Only an issue when `options.ignoreErrors === true`.
let currentState = stateManager.state.clone();
let xObjStateManager = new StateManager(currentState);
let matrix = xobj.dict.getArray('Matrix');
if (Array.isArray(matrix) && matrix.length === 6) {
xObjStateManager.transform(matrix);
}
// Enqueue the `textContent` chunk before parsing the /Form
// XObject.
enqueueChunk();
let sinkWrapper = {
enqueueInvoked: false,
enqueue(chunk, size) {
this.enqueueInvoked = true;
sink.enqueue(chunk, size);
},
get desiredSize() {
return sink.desiredSize;
},
get ready() {
return sink.ready;
},
};
self.getTextContent({
stream: xobj,
task,
resources: xobj.dict.get('Resources') || resources,
stateManager: xObjStateManager,
normalizeWhitespace,
combineTextItems,
sink: sinkWrapper,
seenStyles,
}).then(function() {
if (!sinkWrapper.enqueueInvoked) {
skipEmptyXObjs[name] = true;
}
resolveXObject();
}, rejectXObject);
}).catch(function(reason) {
if (reason instanceof AbortException) {
return;
}
if (self.options.ignoreErrors) {
// Error(s) in the XObject -- allow text-extraction to
// continue.
warn(`getTextContent - ignoring XObject: "${reason}".`);
return;
}
throw reason;
}));
return;
case OPS.setGState: