From 1cd9a28c8126e8461b7fe8c8a253d6d7ee328236 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 17 Aug 2019 11:44:51 +0200 Subject: [PATCH] Replace the `XRef.cache` Array with a Map instead Given that the different types of `Stream`s will never be cached, this thus implies that the `XRef.cache` Array will *always* be more-or-less sparse. Generally speaking, the longer the document the more sparse the `XRef.cache` will thus become. For example, looking at the `pdf.pdf` file from the test-suite: The length of the `XRef.cache` Array will be a few hundred thousand elements, with approximately 95% of them being empty. Hence it seems pretty clear that an Array isn't really the best data-structure for this kind of cache, and this patch thus changes it to a Map instead. This patch-series was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 200, "type": "eq" } ] ``` which gave the following results when comparing this patch-series against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 200 | 2736 | 2736 | 1 | 0.02 | Firefox | Page Request | 200 | 2 | 2 | 0 | -8.26 | faster Firefox | Rendering | 200 | 2733 | 2734 | 1 | 0.03 | ``` --- src/core/obj.js | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/core/obj.js b/src/core/obj.js index 3446f8821..9b74c7b1b 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -1043,8 +1043,7 @@ var XRef = (function XRefClosure() { this.pdfManager = pdfManager; this.entries = []; this.xrefstms = Object.create(null); - // prepare the XRef cache - this.cache = []; + this._cacheMap = new Map(); // Prepare the XRef cache. this.stats = { streamTypes: Object.create(null), fontTypes: Object.create(null), @@ -1636,9 +1635,10 @@ var XRef = (function XRefClosure() { if (!(ref instanceof Ref)) { throw new Error('ref object is not a reference'); } - var num = ref.num; - if (num in this.cache) { - var cacheEntry = this.cache[num]; + const num = ref.num; + + if (this._cacheMap.has(num)) { + const cacheEntry = this._cacheMap.get(num); // In documents with Object Streams, it's possible that cached `Dict`s // have not been assigned an `objId` yet (see e.g. issue3115r.pdf). if (cacheEntry instanceof Dict && !cacheEntry.objId) { @@ -1646,12 +1646,11 @@ var XRef = (function XRefClosure() { } return cacheEntry; } + let xrefEntry = this.getEntry(num); - var xrefEntry = this.getEntry(num); - - // the referenced entry can be free - if (xrefEntry === null) { - return (this.cache[num] = null); + if (xrefEntry === null) { // The referenced entry can be free. + this._cacheMap.set(num, xrefEntry); + return xrefEntry; } if (xrefEntry.uncompressed) { @@ -1709,7 +1708,7 @@ var XRef = (function XRefClosure() { xrefEntry = parser.getObj(); } if (!isStream(xrefEntry)) { - this.cache[num] = xrefEntry; + this._cacheMap.set(num, xrefEntry); } return xrefEntry; }, @@ -1757,7 +1756,7 @@ var XRef = (function XRefClosure() { num = nums[i]; var entry = this.entries[num]; if (entry && entry.offset === tableOffset && entry.gen === i) { - this.cache[num] = entries[i]; + this._cacheMap.set(num, entries[i]); } } xrefEntry = entries[xrefEntry.gen];