1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-25 17:48:07 +02:00

[api-minor] Always allow e.g. rendering to continue even if there are errors, and add a stopAtErrors parameter to getDocument to opt-out of this behaviour (issue 6342, issue 3795, bug 1130815)

Other PDF readers, e.g. Adobe Reader and PDFium (in Chrome), will attempt to render as much of a page as possible even if there are errors present.
Currently we just bail as soon the first error is hit, which means that we'll usually not render anything in these cases and just display a blank page instead.

NOTE: This patch changes the default behaviour of the PDF.js API to always attempt to recover as much data as possible, even when encountering errors during e.g. `getOperatorList`/`getTextContent`, which thus improve our handling of corrupt PDF files and allow the default viewer to handle errors slightly more gracefully.
In the event that an API consumer wishes to use the old behaviour, where we stop parsing as soon as an error is encountered, the `stopAtErrors` parameter can be set at `getDocument`.

Fixes, inasmuch it's possible since the PDF files are corrupt, e.g. issue 6342, issue 3795, and [bug 1130815](https://bugzilla.mozilla.org/show_bug.cgi?id=1130815) (and probably others too).
This commit is contained in:
Jonas Jenwald 2017-02-19 14:03:08 +01:00
parent 10e5f766a2
commit a39d636eb8
8 changed files with 255 additions and 50 deletions

View file

@ -114,6 +114,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
maxImageSize: -1,
disableFontFace: false,
disableNativeImageDecoder: false,
ignoreErrors: false,
};
function NativeImageDecoder(xref, resources, handler, forceDataSchema) {
@ -342,9 +343,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList,
task,
initialState) {
var matrix = xobj.dict.getArray('Matrix');
var bbox = xobj.dict.getArray('BBox');
var group = xobj.dict.get('Group');
var dict = xobj.dict;
var matrix = dict.getArray('Matrix');
var bbox = dict.getArray('BBox');
var group = dict.get('Group');
if (group) {
var groupOptions = {
matrix: matrix,
@ -374,8 +376,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]);
return this.getOperatorList(xobj, task,
(xobj.dict.get('Resources') || resources), operatorList, initialState).
then(function () {
(dict.get('Resources') || resources),
operatorList, initialState).then(function () {
operatorList.addOp(OPS.paintFormXObjectEnd, []);
if (group) {
@ -522,7 +524,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
return this.buildFormXObject(resources, smaskContent, smaskOptions,
operatorList, task, stateManager.state.clone());
operatorList, task,
stateManager.state.clone());
},
handleTilingType:
@ -538,14 +541,14 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return this.getOperatorList(pattern, task, patternResources,
tilingOpList).then(function () {
// Add the dependencies to the parent operator list so they are
// resolved before sub operator list is executed synchronously.
operatorList.addDependencies(tilingOpList.dependencies);
operatorList.addOp(fn, getTilingPatternIR({
fnArray: tilingOpList.fnArray,
argsArray: tilingOpList.argsArray
}, patternDict, args));
});
// Add the dependencies to the parent operator list so they are
// resolved before sub operator list is executed synchronously.
operatorList.addDependencies(tilingOpList.dependencies);
operatorList.addOp(fn, getTilingPatternIR({
fnArray: tilingOpList.fnArray,
argsArray: tilingOpList.argsArray
}, patternDict, args));
});
},
handleSetFont:
@ -899,7 +902,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
resources,
operatorList,
initialState) {
var self = this;
var xref = this.xref;
var imageCache = Object.create(null);
@ -913,6 +915,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
var timeSlotManager = new TimeSlotManager();
function closePendingRestoreOPS(argument) {
for (var i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
operatorList.addOp(OPS.restore, []);
}
}
return new Promise(function promiseBody(resolve, reject) {
var next = function (promise) {
promise.then(function () {
@ -1187,11 +1195,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
// Some PDFs don't close all restores inside object/form.
// Closing those for them.
for (i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
operatorList.addOp(OPS.restore, []);
}
closePendingRestoreOPS();
resolve();
});
}).catch(function(reason) {
if (this.options.ignoreErrors) {
// Error(s) in the OperatorList -- sending unsupported feature
// notification and allow rendering to continue.
this.handler.send('UnsupportedFeature',
{ featureId: UNSUPPORTED_FEATURES.unknown });
warn('getOperatorList - ignoring errors during task: ' + task.name);
closePendingRestoreOPS();
return;
}
throw reason;
}.bind(this));
},
getTextContent:
@ -1660,19 +1678,24 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
break;
}
stateManager.save();
// Use a new `StateManager` to prevent incorrect positioning of
// textItems *after* the Form XObject, since errors in the data
// can otherwise prevent `restore` operators from being executed.
// NOTE: This is only an issue when `options.ignoreErrors = true`.
var currentState = stateManager.state.clone();
var xObjStateManager = new StateManager(currentState);
var matrix = xobj.dict.getArray('Matrix');
if (isArray(matrix) && matrix.length === 6) {
stateManager.transform(matrix);
xObjStateManager.transform(matrix);
}
next(self.getTextContent(xobj, task,
xobj.dict.get('Resources') || resources, stateManager,
xobj.dict.get('Resources') || resources, xObjStateManager,
normalizeWhitespace, combineTextItems).then(
function (formTextContent) {
Util.appendToArray(textContent.items, formTextContent.items);
Util.extendObj(textContent.styles, formTextContent.styles);
stateManager.restore();
xobjsCache.key = name;
xobjsCache.texts = formTextContent;
@ -1706,7 +1729,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
flushTextContentItem();
resolve(textContent);
});
}).catch(function(reason) {
if (this.options.ignoreErrors) {
// Error(s) in the TextContent -- allow text-extraction to continue.
warn('getTextContent - ignoring errors during task: ' + task.name);
flushTextContentItem();
return textContent;
}
throw reason;
}.bind(this));
},
extractDataStructures: