mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 10:08:06 +02:00
Add local caching of non-font Graphics State (ExtGState) data in PartialEvaluator.getTextContent
It turns out that `getTextContent` suffers from *similar* problems with repeated GStates as `getOperatorList`; please see the previous patch. While only `/ExtGState` resources containing Fonts will actually be *parsed* by `PartialEvaluator.getTextContent`, we're still forced to fetch/validate repeated `/ExtGState` resources even though *most* of them won't affect the textContent (since they mostly contain purely graphical state). With these changes we also no longer need to immediately reset the current text-state when encountering a `setGState` operator, which may thus improve text-selection in some cases.
This commit is contained in:
parent
90eb579713
commit
981ff41b5f
1 changed files with 53 additions and 18 deletions
|
@ -1848,6 +1848,7 @@ class PartialEvaluator {
|
||||||
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
||||||
var xobjs = null;
|
var xobjs = null;
|
||||||
const emptyXObjectCache = new LocalImageCache();
|
const emptyXObjectCache = new LocalImageCache();
|
||||||
|
const emptyGStateCache = new LocalGStateCache();
|
||||||
|
|
||||||
var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
|
var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
|
||||||
|
|
||||||
|
@ -2420,25 +2421,59 @@ class PartialEvaluator {
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
case OPS.setGState:
|
case OPS.setGState:
|
||||||
flushTextContentItem();
|
name = args[0].name;
|
||||||
var dictName = args[0];
|
if (name && emptyGStateCache.getByName(name)) {
|
||||||
var extGState = resources.get("ExtGState");
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!isDict(extGState) || !isName(dictName)) {
|
next(
|
||||||
break;
|
new Promise(function (resolveGState, rejectGState) {
|
||||||
}
|
if (!name) {
|
||||||
var gState = extGState.get(dictName.name);
|
throw new FormatError("GState must be referred to by name.");
|
||||||
if (!isDict(gState)) {
|
}
|
||||||
break;
|
|
||||||
}
|
const extGState = resources.get("ExtGState");
|
||||||
var gStateFont = gState.get("Font");
|
if (!(extGState instanceof Dict)) {
|
||||||
if (gStateFont) {
|
throw new FormatError("ExtGState should be a dictionary.");
|
||||||
textState.fontName = null;
|
}
|
||||||
textState.fontSize = gStateFont[1];
|
|
||||||
next(handleSetFont(null, gStateFont[0]));
|
const gState = extGState.get(name);
|
||||||
return;
|
// TODO: Attempt to lookup cached GStates by reference as well,
|
||||||
}
|
// if and only if there are PDF documents where doing so
|
||||||
break;
|
// would significantly improve performance.
|
||||||
|
if (!(gState instanceof Dict)) {
|
||||||
|
throw new FormatError("GState should be a dictionary.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const gStateFont = gState.get("Font");
|
||||||
|
if (!gStateFont) {
|
||||||
|
emptyGStateCache.set(name, gState.objId, true);
|
||||||
|
|
||||||
|
resolveGState();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
flushTextContentItem();
|
||||||
|
|
||||||
|
textState.fontName = null;
|
||||||
|
textState.fontSize = gStateFont[1];
|
||||||
|
handleSetFont(null, gStateFont[0]).then(
|
||||||
|
resolveGState,
|
||||||
|
rejectGState
|
||||||
|
);
|
||||||
|
}).catch(function (reason) {
|
||||||
|
if (reason instanceof AbortException) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (self.options.ignoreErrors) {
|
||||||
|
// Error(s) in the ExtGState -- allow text-extraction to
|
||||||
|
// continue.
|
||||||
|
warn(`getTextContent - ignoring ExtGState: "${reason}".`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw reason;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
return;
|
||||||
} // switch
|
} // switch
|
||||||
if (textContent.items.length >= sink.desiredSize) {
|
if (textContent.items.length >= sink.desiredSize) {
|
||||||
// Wait for ready, if we reach highWaterMark.
|
// Wait for ready, if we reach highWaterMark.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue