From 4ef547f4008b6dc02700768c6e038a64bc2e88f6 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 26 May 2020 09:47:59 +0200 Subject: [PATCH] Improve caching of empty `/XObject`s in the `PartialEvaluator.getTextContent` method It turns out that `getTextContent` suffers from *similar* problems with repeated images as `getOperatorList`; please see the previous patch. While only `/XObject` resources of the `Form`-type will actually be *parsed* in `PartialEvaluator.getTextContent`, since those are the only ones that may contain text, we're still forced to fetch repeated image resources where the name differs (but not the reference). Obviously it's less bad in this case, since we're not actually parsing `/XObject`s of e.g. the `Image`-type. However, you still want to avoid even fetching the data whenever possible, since `Stream`s are not cached on the `XRef` instance (given their potential size) and the lookup can thus be somewhat expensive in general. To address these issues, we can simply replace the exiting name-only caching in `PartialEvaluator.getTextContent` with a new cache backed by `LocalImageCache` instead. --- src/core/evaluator.js | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 05c287f0e..05e033244 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1724,7 +1724,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. var xobjs = null; - var skipEmptyXObjs = Object.create(null); + const emptyXObjectCache = new LocalImageCache(); var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); @@ -2200,7 +2200,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } var name = args[0].name; - if (name && skipEmptyXObjs[name] !== undefined) { + if (name && emptyXObjectCache.getByName(name)) { break; } @@ -2212,7 +2212,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { ); } - const xobj = xobjs.get(name); + let xobj = xobjs.getRaw(name); + if (xobj instanceof Ref) { + if (emptyXObjectCache.getByRef(xobj)) { + resolveXObject(); + return; + } + + xobj = xref.fetch(xobj); + } + if (!xobj) { resolveXObject(); return; @@ -2227,7 +2236,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } if (type.name !== "Form") { - skipEmptyXObjs[name] = true; + emptyXObjectCache.set(name, xobj.dict.objId, true); + resolveXObject(); return; } @@ -2278,7 +2288,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }) .then(function () { if (!sinkWrapper.enqueueInvoked) { - skipEmptyXObjs[name] = true; + emptyXObjectCache.set(name, xobj.dict.objId, true); } resolveXObject(); }, rejectXObject);