From beecde32295ed0cbea2b4d26143379eba4a74507 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 20 Feb 2022 12:17:10 +0100 Subject: [PATCH 1/3] Introduce (some) private properties/methods in the `PDFObjects` class This ensures that the underlying data cannot be accessed directly, from the outside, since that's definately not intended here. Note that we expose `PDFObjects`-instances, via the `commonObjs` and `objs` properties, on the `PDFPageProxy`-instances hence these changes really cannot hurt. --- src/display/api.js | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/display/api.js b/src/display/api.js index 5c48b6a8e..26a3d3389 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -3049,19 +3049,17 @@ class WorkerTransport { * @ignore */ class PDFObjects { - constructor() { - this._objs = Object.create(null); - } + #objs = Object.create(null); /** * Ensures there is an object defined for `objId`. - * @private */ - _ensureObj(objId) { - if (this._objs[objId]) { - return this._objs[objId]; + #ensureObj(objId) { + const obj = this.#objs[objId]; + if (obj) { + return obj; } - return (this._objs[objId] = { + return (this.#objs[objId] = { capability: createPromiseCapability(), data: null, resolved: false, @@ -3080,30 +3078,30 @@ class PDFObjects { // If there is a callback, then the get can be async and the object is // not required to be resolved right now. if (callback) { - this._ensureObj(objId).capability.promise.then(callback); + this.#ensureObj(objId).capability.promise.then(callback); return null; } // If there isn't a callback, the user expects to get the resolved data // directly. - const obj = this._objs[objId]; + const obj = this.#objs[objId]; // If there isn't an object yet or the object isn't resolved, then the // data isn't ready yet! - if (!obj || !obj.resolved) { + if (!obj?.resolved) { throw new Error(`Requesting object that isn't resolved yet ${objId}.`); } return obj.data; } has(objId) { - const obj = this._objs[objId]; + const obj = this.#objs[objId]; return obj?.resolved || false; } /** * Resolves the object `objId` with optional `data`. */ - resolve(objId, data) { - const obj = this._ensureObj(objId); + resolve(objId, data = null) { + const obj = this.#ensureObj(objId); obj.resolved = true; obj.data = data; @@ -3111,7 +3109,7 @@ class PDFObjects { } clear() { - this._objs = Object.create(null); + this.#objs = Object.create(null); } } From f4712bc0ad967af3b153e760749da935a7b1f448 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 20 Feb 2022 12:33:33 +0100 Subject: [PATCH 2/3] Simplify the data stored on `PDFObjects`-instances The manually tracked `resolved`-property is no longer necessary, since the same information is now directly available on all `PromiseCapability`-instances. Furthermore, since the `PDFObjects.resolve` method is not documented as accepting e.g. only Object-data, we probably shouldn't resolve the `PromiseCapability` with the `data` and instead only store it on the `PDFObjects`-instance.[1] --- [1] While Objects are passed by reference in JavaScript, other primitives such as e.g. strings are passed by value and the current implementation *could* thus lead to increased memory usage. Given how we're using `PDFObjects` in the PDF.js code-base none of this should be an issue, but it still cannot hurt to change this. --- src/display/api.js | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/display/api.js b/src/display/api.js index 26a3d3389..e99ff17d5 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -3062,7 +3062,6 @@ class PDFObjects { return (this.#objs[objId] = { capability: createPromiseCapability(), data: null, - resolved: false, }); } @@ -3078,7 +3077,8 @@ class PDFObjects { // If there is a callback, then the get can be async and the object is // not required to be resolved right now. if (callback) { - this.#ensureObj(objId).capability.promise.then(callback); + const obj = this.#ensureObj(objId); + obj.capability.promise.then(() => callback(obj.data)); return null; } // If there isn't a callback, the user expects to get the resolved data @@ -3086,7 +3086,7 @@ class PDFObjects { const obj = this.#objs[objId]; // If there isn't an object yet or the object isn't resolved, then the // data isn't ready yet! - if (!obj?.resolved) { + if (!obj?.capability.settled) { throw new Error(`Requesting object that isn't resolved yet ${objId}.`); } return obj.data; @@ -3094,7 +3094,7 @@ class PDFObjects { has(objId) { const obj = this.#objs[objId]; - return obj?.resolved || false; + return obj?.capability.settled || false; } /** @@ -3102,10 +3102,8 @@ class PDFObjects { */ resolve(objId, data = null) { const obj = this.#ensureObj(objId); - - obj.resolved = true; obj.data = data; - obj.capability.resolve(data); + obj.capability.resolve(); } clear() { From bad15894fc0ceac0cd8823dd4b55689261d29ca1 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 20 Feb 2022 12:52:28 +0100 Subject: [PATCH 3/3] Improve the JSDocs for the `PDFObjects` class Given that we expose `PDFObjects`-instances, via the `commonObjs` and `objs` properties, on the `PDFPageProxy`-instances this ought to help provide slightly better TypeScript definitions. --- src/display/api.js | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/display/api.js b/src/display/api.js index e99ff17d5..de847bb50 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -91,7 +91,7 @@ const DefaultStandardFontDataFactory = */ /** - * @type IPDFStreamFactory + * @type {IPDFStreamFactory} * @private */ let createPDFNetworkStream; @@ -1229,6 +1229,7 @@ class PDFPageProxy { this._transport = transport; this._stats = pdfBug ? new StatTimer() : null; this._pdfBug = pdfBug; + /** @type {PDFObjects} */ this.commonObjs = transport.commonObjs; this.objs = new PDFObjects(); @@ -3046,13 +3047,15 @@ class WorkerTransport { * A PDF document and page is built of many objects. E.g. there are objects for * fonts, images, rendering code, etc. These objects may get processed inside of * a worker. This class implements some basic methods to manage these objects. - * @ignore */ class PDFObjects { #objs = Object.create(null); /** * Ensures there is an object defined for `objId`. + * + * @param {string} objId + * @returns {Object} */ #ensureObj(objId) { const obj = this.#objs[objId]; @@ -3072,6 +3075,10 @@ class PDFObjects { * If called *with* a callback, the callback is called with the data of the * object once the object is resolved. That means, if you call this method * and the object is already resolved, the callback gets called right away. + * + * @param {string} objId + * @param {function} [callback] + * @returns {any} */ get(objId, callback = null) { // If there is a callback, then the get can be async and the object is @@ -3092,6 +3099,10 @@ class PDFObjects { return obj.data; } + /** + * @param {string} objId + * @returns {boolean} + */ has(objId) { const obj = this.#objs[objId]; return obj?.capability.settled || false; @@ -3099,6 +3110,9 @@ class PDFObjects { /** * Resolves the object `objId` with optional `data`. + * + * @param {string} objId + * @param {any} [data] */ resolve(objId, data = null) { const obj = this.#ensureObj(objId);