From 2643570364e1ed59a8d2d42b3f00674bd175cfbe Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 2 Feb 2024 11:05:47 +0100 Subject: [PATCH 1/2] [api-minor] Re-factor how Node.js packages/polyfills are loaded (issue 17245) *Please note:* This removes top level await from the GENERIC builds of the PDF.js library. Despite top level await being supported in all modern browsers/environments, note [the MDN compatibility data](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/await#browser_compatibility), it seems that many frameworks and build-tools unfortunately have trouble with it. Hence, in order to reduce the influx of support requests regarding top level await it thus seems that we'll have to try and fix this. Given that top level await is only needed for Node.js environments, to load packages/polyfills, we re-factor things to limit the asynchronicity to that environment. The "best" solution, with the least likelihood of causing future problems, would probably be to await the load of Node.js packages/polyfills e.g. at the top of the `getDocument`-function. Unfortunately that doesn't work though, since that's a *synchronous* function that we cannot change without breaking "the world". Hence we instead await the load of Node.js packages/polyfills together with the `PDFWorker` initialization, since that's the *first point* of asynchronicity during initialization/loading of a PDF document. The reason that this works is that the Node.js packages/polyfills are only needed during fetching of the PDF document respectively during rendering, neither of which can happen *until* the worker has been initialized. Hopefully this won't cause any future problems, since looking at the history of the PDF.js project I don't believe that we've (thus far) ever needed a Node.js dependency at an earlier point. This new pattern for accessing Node.js packages/polyfills will also require some care during development *and* importantly reviewing, to ensure that no new top level await is added in the main code-base. --- src/display/api.js | 9 +++ src/display/node_stream.js | 24 +++---- src/display/node_utils.js | 122 +++++++++++++++++++++++------------ src/display/stubs.js | 2 + test/unit/clitests_helper.js | 4 ++ 5 files changed, 103 insertions(+), 58 deletions(-) diff --git a/src/display/api.js b/src/display/api.js index 9c70a7c22..fa6fdaece 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -58,6 +58,7 @@ import { NodeCanvasFactory, NodeCMapReaderFactory, NodeFilterFactory, + NodePackages, NodeStandardFontDataFactory, } from "display-node_utils"; import { CanvasGraphics } from "./canvas.js"; @@ -2089,6 +2090,14 @@ class PDFWorker { * @type {Promise} */ get promise() { + if ( + typeof PDFJSDev !== "undefined" && + PDFJSDev.test("GENERIC") && + isNodeJS + ) { + // Ensure that all Node.js packages/polyfills have loaded. + return Promise.all([NodePackages.promise, this._readyCapability.promise]); + } return this._readyCapability.promise; } diff --git a/src/display/node_stream.js b/src/display/node_stream.js index 5c583ee2c..3ffabffe0 100644 --- a/src/display/node_stream.js +++ b/src/display/node_stream.js @@ -13,16 +13,12 @@ * limitations under the License. */ -import { - AbortException, - assert, - isNodeJS, - MissingPDFException, -} from "../shared/util.js"; +import { AbortException, assert, MissingPDFException } from "../shared/util.js"; import { extractFilenameFromHeader, validateRangeRequestCapabilities, } from "./network_utils.js"; +import { NodePackages } from "./node_utils.js"; if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { throw new Error( @@ -30,18 +26,10 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { ); } -let fs, http, https, url; -if (isNodeJS) { - // Native packages. - fs = await __non_webpack_import__("fs"); - http = await __non_webpack_import__("http"); - https = await __non_webpack_import__("https"); - url = await __non_webpack_import__("url"); -} - const fileUriRegex = /^file:\/\/\/[a-zA-Z]:\//; function parseUrl(sourceUrl) { + const url = NodePackages.get("url"); const parsedUrl = url.parse(sourceUrl); if (parsedUrl.protocol === "file:" || parsedUrl.host) { return parsedUrl; @@ -347,11 +335,13 @@ class PDFNodeStreamFullReader extends BaseFullReader { this._request = null; if (this._url.protocol === "http:") { + const http = NodePackages.get("http"); this._request = http.request( createRequestOptions(this._url, stream.httpHeaders), handleResponse ); } else { + const https = NodePackages.get("https"); this._request = https.request( createRequestOptions(this._url, stream.httpHeaders), handleResponse @@ -394,11 +384,13 @@ class PDFNodeStreamRangeReader extends BaseRangeReader { this._request = null; if (this._url.protocol === "http:") { + const http = NodePackages.get("http"); this._request = http.request( createRequestOptions(this._url, this._httpHeaders), handleResponse ); } else { + const https = NodePackages.get("https"); this._request = https.request( createRequestOptions(this._url, this._httpHeaders), handleResponse @@ -423,6 +415,7 @@ class PDFNodeStreamFsFullReader extends BaseFullReader { path = path.replace(/^\//, ""); } + const fs = NodePackages.get("fs"); fs.promises.lstat(path).then( stat => { // Setting right content length. @@ -453,6 +446,7 @@ class PDFNodeStreamFsRangeReader extends BaseRangeReader { path = path.replace(/^\//, ""); } + const fs = NodePackages.get("fs"); this._setReadableStream(fs.createReadStream(path, { start, end: end - 1 })); } } diff --git a/src/display/node_utils.js b/src/display/node_utils.js index 4281c1a83..aca4ad750 100644 --- a/src/display/node_utils.js +++ b/src/display/node_utils.js @@ -27,56 +27,90 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { ); } -let fs, canvas, path2d; if (isNodeJS) { - // Native packages. - fs = await __non_webpack_import__("fs"); - // Optional, third-party, packages. - try { - canvas = await __non_webpack_import__("canvas"); - } catch {} - try { - path2d = await __non_webpack_import__("path2d"); - } catch {} + // eslint-disable-next-line no-var + var packageCapability = Promise.withResolvers(); + // eslint-disable-next-line no-var + var packageMap = null; + + const loadPackages = async () => { + // Native packages. + const fs = await __non_webpack_import__("fs"), + http = await __non_webpack_import__("http"), + https = await __non_webpack_import__("https"), + url = await __non_webpack_import__("url"); + + // Optional, third-party, packages. + let canvas, path2d; + if (typeof PDFJSDev !== "undefined" && !PDFJSDev.test("SKIP_BABEL")) { + try { + canvas = await __non_webpack_import__("canvas"); + } catch {} + try { + path2d = await __non_webpack_import__("path2d"); + } catch {} + } + + return new Map(Object.entries({ fs, http, https, url, canvas, path2d })); + }; + + loadPackages().then( + map => { + packageMap = map; + packageCapability.resolve(); + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("SKIP_BABEL")) { + return; + } + if (!globalThis.DOMMatrix) { + const DOMMatrix = map.get("canvas")?.DOMMatrix; + + if (DOMMatrix) { + globalThis.DOMMatrix = DOMMatrix; + } else { + warn("Cannot polyfill `DOMMatrix`, rendering may be broken."); + } + } + if (!globalThis.Path2D) { + const CanvasRenderingContext2D = + map.get("canvas")?.CanvasRenderingContext2D; + const applyPath2DToCanvasRenderingContext = + map.get("path2d")?.applyPath2DToCanvasRenderingContext; + const Path2D = map.get("path2d")?.Path2D; + + if ( + CanvasRenderingContext2D && + applyPath2DToCanvasRenderingContext && + Path2D + ) { + applyPath2DToCanvasRenderingContext(CanvasRenderingContext2D); + globalThis.Path2D = Path2D; + } else { + warn("Cannot polyfill `Path2D`, rendering may be broken."); + } + } + }, + reason => { + warn(`loadPackages: ${reason}`); + + packageMap = new Map(); + packageCapability.resolve(); + } + ); } -if (typeof PDFJSDev !== "undefined" && !PDFJSDev.test("SKIP_BABEL")) { - (function checkDOMMatrix() { - if (globalThis.DOMMatrix || !isNodeJS) { - return; - } - const DOMMatrix = canvas?.DOMMatrix; +class NodePackages { + static get promise() { + return packageCapability.promise; + } - if (DOMMatrix) { - globalThis.DOMMatrix = DOMMatrix; - } else { - warn("Cannot polyfill `DOMMatrix`, rendering may be broken."); - } - })(); - - (function checkPath2D() { - if (globalThis.Path2D || !isNodeJS) { - return; - } - const CanvasRenderingContext2D = canvas?.CanvasRenderingContext2D; - const applyPath2DToCanvasRenderingContext = - path2d?.applyPath2DToCanvasRenderingContext; - const Path2D = path2d?.Path2D; - - if ( - CanvasRenderingContext2D && - applyPath2DToCanvasRenderingContext && - Path2D - ) { - applyPath2DToCanvasRenderingContext(CanvasRenderingContext2D); - globalThis.Path2D = Path2D; - } else { - warn("Cannot polyfill `Path2D`, rendering may be broken."); - } - })(); + static get(name) { + return packageMap?.get(name); + } } const fetchData = function (url) { + const fs = NodePackages.get("fs"); return fs.promises.readFile(url).then(data => new Uint8Array(data)); }; @@ -87,6 +121,7 @@ class NodeCanvasFactory extends BaseCanvasFactory { * @ignore */ _createCanvas(width, height) { + const canvas = NodePackages.get("canvas"); return canvas.createCanvas(width, height); } } @@ -113,5 +148,6 @@ export { NodeCanvasFactory, NodeCMapReaderFactory, NodeFilterFactory, + NodePackages, NodeStandardFontDataFactory, }; diff --git a/src/display/stubs.js b/src/display/stubs.js index 271f0d700..0b3b9a0bb 100644 --- a/src/display/stubs.js +++ b/src/display/stubs.js @@ -16,6 +16,7 @@ const NodeCanvasFactory = null; const NodeCMapReaderFactory = null; const NodeFilterFactory = null; +const NodePackages = null; const NodeStandardFontDataFactory = null; const PDFFetchStream = null; const PDFNetworkStream = null; @@ -25,6 +26,7 @@ export { NodeCanvasFactory, NodeCMapReaderFactory, NodeFilterFactory, + NodePackages, NodeStandardFontDataFactory, PDFFetchStream, PDFNetworkStream, diff --git a/test/unit/clitests_helper.js b/test/unit/clitests_helper.js index d86eeacbb..a5a1c788e 100644 --- a/test/unit/clitests_helper.js +++ b/test/unit/clitests_helper.js @@ -18,6 +18,7 @@ import { setVerbosityLevel, VerbosityLevel, } from "../../src/shared/util.js"; +import { NodePackages } from "../../src/display/node_utils.js"; // Sets longer timeout, similar to `jasmine-boot.js`. jasmine.DEFAULT_TIMEOUT_INTERVAL = 30000; @@ -29,6 +30,9 @@ if (!isNodeJS) { ); } +// Ensure that all Node.js packages/polyfills have loaded. +await NodePackages.promise; + // Reduce the amount of console "spam", by ignoring `info`/`warn` calls, // when running the unit-tests in Node.js/Travis. setVerbosityLevel(VerbosityLevel.ERRORS); From 9418ed19e7dcd799a5bec12a0c7328a98f291bb5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 2 Feb 2024 12:00:06 +0100 Subject: [PATCH 2/2] [api-minor] Remove the, now unused, `pdfjsLibPromise` global This global was only introduced to work-around problems caused by the GENERIC PDF.js build using top level await. Since that was removed in the previous commit, this global is now dead code. --- gulpfile.mjs | 10 ++-------- web/pdfjs.js | 9 --------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/gulpfile.mjs b/gulpfile.mjs index 4f3af9d64..55629e6d2 100644 --- a/gulpfile.mjs +++ b/gulpfile.mjs @@ -445,10 +445,8 @@ function checkChromePreferencesFile(chromePrefsPath, webPrefs) { function tweakWebpackOutput(jsName) { const replacer = [ - " __webpack_exports__ = {};", - ",__webpack_exports__={};", - " __webpack_exports__ = await __webpack_exports__;", - "\\(__webpack_exports__=await __webpack_exports__\\)", + " __webpack_exports__ = {};", // Normal builds. + ",__webpack_exports__={};", // Minified builds. ]; const regex = new RegExp(`(${replacer.join("|")})`, "gm"); @@ -458,10 +456,6 @@ function tweakWebpackOutput(jsName) { return ` __webpack_exports__ = globalThis.${jsName} = {};`; case ",__webpack_exports__={};": return `,__webpack_exports__=globalThis.${jsName}={};`; - case " __webpack_exports__ = await __webpack_exports__;": - return ` __webpack_exports__ = globalThis.${jsName} = await (globalThis.${jsName}Promise = __webpack_exports__);`; - case "(__webpack_exports__=await __webpack_exports__)": - return `(__webpack_exports__=globalThis.${jsName}=await (globalThis.${jsName}Promise=__webpack_exports__))`; } return match; }); diff --git a/web/pdfjs.js b/web/pdfjs.js index 670df4e07..4db23cc05 100644 --- a/web/pdfjs.js +++ b/web/pdfjs.js @@ -13,15 +13,6 @@ * limitations under the License. */ -// Ensure that the viewer waits for the library to complete loading, -// to avoid breaking e.g. the standalone viewer components (see issue 17228). -if ( - (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && - !globalThis.pdfjsLib -) { - await globalThis.pdfjsLibPromise; -} - const { AbortException, AnnotationEditorLayer,