1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Merge pull request #18959 from Snuffleupagus/Node-20

[api-minor] Update the minimum supported Node.js version to 20, and only support the Fetch API for "remote" PDF documents in Node.js
This commit is contained in:
Jonas Jenwald 2024-11-04 10:29:55 +01:00 committed by GitHub
commit cefd1ebcd2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 145 additions and 375 deletions

View file

@ -11,7 +11,7 @@ jobs:
strategy:
fail-fast: false
matrix:
node-version: [18, lts/*, 22, 23]
node-version: [20, 22, 23]
steps:
- name: Checkout repository

View file

@ -84,7 +84,7 @@ const ENV_TARGETS = [
"Chrome >= 103",
"Firefox ESR",
"Safari >= 16.4",
"Node >= 18",
"Node >= 20",
"> 1%",
"not IE > 0",
"not dead",
@ -2271,7 +2271,7 @@ function packageJson() {
url: `git+${DIST_GIT_URL}`,
},
engines: {
node: ">=18",
node: ">=20",
},
scripts: {},
};

2
package-lock.json generated
View file

@ -64,7 +64,7 @@
"yargs": "^17.7.2"
},
"engines": {
"node": ">=18"
"node": ">=20"
}
},
"node_modules/@aashutoshrathi/word-wrap": {

View file

@ -63,7 +63,7 @@
"url": "git://github.com/mozilla/pdf.js.git"
},
"engines": {
"node": ">=18"
"node": ">=20"
},
"license": "Apache-2.0"
}

View file

@ -56,7 +56,6 @@ import {
NodeCanvasFactory,
NodeCMapReaderFactory,
NodeFilterFactory,
NodePackages,
NodeStandardFontDataFactory,
} from "display-node_utils";
import { CanvasGraphics } from "./canvas.js";
@ -451,15 +450,20 @@ function getDocument(src = {}) {
PDFJSDev.test("GENERIC") &&
isNodeJS
) {
const isFetchSupported =
typeof fetch !== "undefined" &&
typeof Response !== "undefined" &&
"body" in Response.prototype;
NetworkStream =
isFetchSupported && isValidFetchUrl(url)
? PDFFetchStream
: PDFNodeStream;
if (isValidFetchUrl(url)) {
if (
typeof fetch === "undefined" ||
typeof Response === "undefined" ||
!("body" in Response.prototype)
) {
throw new Error(
"getDocument - the Fetch API was disabled in Node.js, see `--no-experimental-fetch`."
);
}
NetworkStream = PDFFetchStream;
} else {
NetworkStream = PDFNodeStream;
}
} else {
NetworkStream = isValidFetchUrl(url)
? PDFFetchStream
@ -2137,14 +2141,6 @@ class PDFWorker {
* @type {Promise<void>}
*/
get promise() {
if (
typeof PDFJSDev !== "undefined" &&
PDFJSDev.test("GENERIC") &&
isNodeJS
) {
// Ensure that all Node.js packages/polyfills have loaded.
return Promise.all([NodePackages.promise, this._readyCapability.promise]);
}
return this._readyCapability.promise;
}

View file

@ -12,14 +12,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals process */
import { AbortException, assert, MissingPDFException } from "../shared/util.js";
import {
createHeaders,
extractFilenameFromHeader,
validateRangeRequestCapabilities,
} from "./network_utils.js";
import { NodePackages } from "./node_utils.js";
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
throw new Error(
@ -33,28 +28,18 @@ function parseUrlOrPath(sourceUrl) {
if (urlRegex.test(sourceUrl)) {
return new URL(sourceUrl);
}
const url = NodePackages.get("url");
const url = process.getBuiltinModule("url");
return new URL(url.pathToFileURL(sourceUrl));
}
function createRequest(url, headers, callback) {
if (url.protocol === "http:") {
const http = NodePackages.get("http");
return http.request(url, { headers }, callback);
}
const https = NodePackages.get("https");
return https.request(url, { headers }, callback);
}
class PDFNodeStream {
constructor(source) {
this.source = source;
this.url = parseUrlOrPath(source.url);
this.isHttp =
this.url.protocol === "http:" || this.url.protocol === "https:";
// Check if url refers to filesystem.
this.isFsUrl = this.url.protocol === "file:";
this.headers = createHeaders(this.isHttp, source.httpHeaders);
assert(
this.url.protocol === "file:",
"PDFNodeStream only supports file:// URLs."
);
this._fullRequestReader = null;
this._rangeRequestReaders = [];
@ -69,9 +54,7 @@ class PDFNodeStream {
!this._fullRequestReader,
"PDFNodeStream.getFullReader can only be called once."
);
this._fullRequestReader = this.isFsUrl
? new PDFNodeStreamFsFullReader(this)
: new PDFNodeStreamFullReader(this);
this._fullRequestReader = new PDFNodeStreamFsFullReader(this);
return this._fullRequestReader;
}
@ -79,9 +62,7 @@ class PDFNodeStream {
if (end <= this._progressiveDataLength) {
return null;
}
const rangeReader = this.isFsUrl
? new PDFNodeStreamFsRangeReader(this, start, end)
: new PDFNodeStreamRangeReader(this, start, end);
const rangeReader = new PDFNodeStreamFsRangeReader(this, start, end);
this._rangeRequestReaders.push(rangeReader);
return rangeReader;
}
@ -95,7 +76,7 @@ class PDFNodeStream {
}
}
class BaseFullReader {
class PDFNodeStreamFsFullReader {
constructor(stream) {
this._url = stream.url;
this._done = false;
@ -118,6 +99,24 @@ class BaseFullReader {
this._readableStream = null;
this._readCapability = Promise.withResolvers();
this._headersCapability = Promise.withResolvers();
const fs = process.getBuiltinModule("fs");
fs.promises.lstat(this._url).then(
stat => {
// Setting right content length.
this._contentLength = stat.size;
this._setReadableStream(fs.createReadStream(this._url));
this._headersCapability.resolve();
},
error => {
if (error.code === "ENOENT") {
error = new MissingPDFException(`Missing PDF "${this._url}".`);
}
this._storedError = error;
this._headersCapability.reject(error);
}
);
}
get headersReady() {
@ -210,8 +209,8 @@ class BaseFullReader {
}
}
class BaseRangeReader {
constructor(stream) {
class PDFNodeStreamFsRangeReader {
constructor(stream, start, end) {
this._url = stream.url;
this._done = false;
this._storedError = null;
@ -221,6 +220,11 @@ class BaseRangeReader {
this._readCapability = Promise.withResolvers();
const source = stream.source;
this._isStreamingSupported = !source.disableStream;
const fs = process.getBuiltinModule("fs");
this._setReadableStream(
fs.createReadStream(this._url, { start, end: end - 1 })
);
}
get isStreamingSupported() {
@ -288,112 +292,4 @@ class BaseRangeReader {
}
}
class PDFNodeStreamFullReader extends BaseFullReader {
constructor(stream) {
super(stream);
// Node.js requires the `headers` to be a regular Object.
const headers = Object.fromEntries(stream.headers);
const handleResponse = response => {
if (response.statusCode === 404) {
const error = new MissingPDFException(`Missing PDF "${this._url}".`);
this._storedError = error;
this._headersCapability.reject(error);
return;
}
this._headersCapability.resolve();
this._setReadableStream(response);
const responseHeaders = new Headers(this._readableStream.headers);
const { allowRangeRequests, suggestedLength } =
validateRangeRequestCapabilities({
responseHeaders,
isHttp: stream.isHttp,
rangeChunkSize: this._rangeChunkSize,
disableRange: this._disableRange,
});
this._isRangeSupported = allowRangeRequests;
// Setting right content length.
this._contentLength = suggestedLength || this._contentLength;
this._filename = extractFilenameFromHeader(responseHeaders);
};
this._request = createRequest(this._url, headers, handleResponse);
this._request.on("error", reason => {
this._storedError = reason;
this._headersCapability.reject(reason);
});
// Note: `request.end(data)` is used to write `data` to request body
// and notify end of request. But one should always call `request.end()`
// even if there is no data to write -- (to notify the end of request).
this._request.end();
}
}
class PDFNodeStreamRangeReader extends BaseRangeReader {
constructor(stream, start, end) {
super(stream);
// Node.js requires the `headers` to be a regular Object.
const headers = Object.fromEntries(stream.headers);
headers.Range = `bytes=${start}-${end - 1}`;
const handleResponse = response => {
if (response.statusCode === 404) {
const error = new MissingPDFException(`Missing PDF "${this._url}".`);
this._storedError = error;
return;
}
this._setReadableStream(response);
};
this._request = createRequest(this._url, headers, handleResponse);
this._request.on("error", reason => {
this._storedError = reason;
});
this._request.end();
}
}
class PDFNodeStreamFsFullReader extends BaseFullReader {
constructor(stream) {
super(stream);
const fs = NodePackages.get("fs");
fs.promises.lstat(this._url).then(
stat => {
// Setting right content length.
this._contentLength = stat.size;
this._setReadableStream(fs.createReadStream(this._url));
this._headersCapability.resolve();
},
error => {
if (error.code === "ENOENT") {
error = new MissingPDFException(`Missing PDF "${this._url}".`);
}
this._storedError = error;
this._headersCapability.reject(error);
}
);
}
}
class PDFNodeStreamFsRangeReader extends BaseRangeReader {
constructor(stream, start, end) {
super(stream);
const fs = NodePackages.get("fs");
this._setReadableStream(
fs.createReadStream(this._url, { start, end: end - 1 })
);
}
}
export { PDFNodeStream };

View file

@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals process */
import { isNodeJS, warn } from "../shared/util.js";
import { BaseCanvasFactory } from "./canvas_factory.js";
@ -25,94 +26,63 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
);
}
if (isNodeJS) {
// eslint-disable-next-line no-var
var packageCapability = Promise.withResolvers();
// eslint-disable-next-line no-var
var packageMap = null;
if (
typeof PDFJSDev !== "undefined" &&
!PDFJSDev.test("SKIP_BABEL") &&
isNodeJS
) {
let canvas, path2d;
try {
const require = process
.getBuiltinModule("module")
.createRequire(import.meta.url);
const loadPackages = async () => {
// Native packages.
const fs = await __non_webpack_import__("fs"),
http = await __non_webpack_import__("http"),
https = await __non_webpack_import__("https"),
url = await __non_webpack_import__("url");
// Optional, third-party, packages.
let canvas, path2d;
if (typeof PDFJSDev !== "undefined" && !PDFJSDev.test("SKIP_BABEL")) {
try {
canvas = await __non_webpack_import__("canvas");
} catch {}
try {
path2d = await __non_webpack_import__("path2d");
} catch {}
try {
canvas = require("canvas");
} catch (ex) {
warn(`Cannot load "canvas" package: "${ex}".`);
}
return new Map(Object.entries({ fs, http, https, url, canvas, path2d }));
};
loadPackages().then(
map => {
packageMap = map;
packageCapability.resolve();
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("SKIP_BABEL")) {
return;
}
if (!globalThis.DOMMatrix) {
const DOMMatrix = map.get("canvas")?.DOMMatrix;
if (DOMMatrix) {
globalThis.DOMMatrix = DOMMatrix;
} else {
warn("Cannot polyfill `DOMMatrix`, rendering may be broken.");
}
}
if (!globalThis.Path2D) {
const CanvasRenderingContext2D =
map.get("canvas")?.CanvasRenderingContext2D;
const applyPath2DToCanvasRenderingContext =
map.get("path2d")?.applyPath2DToCanvasRenderingContext;
const Path2D = map.get("path2d")?.Path2D;
if (
CanvasRenderingContext2D &&
applyPath2DToCanvasRenderingContext &&
Path2D
) {
try {
applyPath2DToCanvasRenderingContext(CanvasRenderingContext2D);
} catch (ex) {
warn(`applyPath2DToCanvasRenderingContext: "${ex}".`);
}
globalThis.Path2D = Path2D;
} else {
warn("Cannot polyfill `Path2D`, rendering may be broken.");
}
}
},
reason => {
warn(`loadPackages: ${reason}`);
packageMap = new Map();
packageCapability.resolve();
try {
path2d = require("path2d");
} catch (ex) {
warn(`Cannot load "path2d" package: "${ex}".`);
}
);
}
} catch {}
class NodePackages {
static get promise() {
return packageCapability.promise;
if (!globalThis.DOMMatrix) {
const DOMMatrix = canvas?.DOMMatrix;
if (DOMMatrix) {
globalThis.DOMMatrix = DOMMatrix;
} else {
warn("Cannot polyfill `DOMMatrix`, rendering may be broken.");
}
}
if (!globalThis.Path2D) {
const CanvasRenderingContext2D = canvas?.CanvasRenderingContext2D;
const applyPath2DToCanvasRenderingContext =
path2d?.applyPath2DToCanvasRenderingContext;
const Path2D = path2d?.Path2D;
static get(name) {
return packageMap?.get(name);
if (
CanvasRenderingContext2D &&
applyPath2DToCanvasRenderingContext &&
Path2D
) {
try {
applyPath2DToCanvasRenderingContext(CanvasRenderingContext2D);
} catch (ex) {
warn(`applyPath2DToCanvasRenderingContext: "${ex}".`);
}
globalThis.Path2D = Path2D;
} else {
warn("Cannot polyfill `Path2D`, rendering may be broken.");
}
}
}
async function fetchData(url) {
const fs = NodePackages.get("fs");
const fs = process.getBuiltinModule("fs");
const data = await fs.promises.readFile(url);
return new Uint8Array(data);
}
@ -124,7 +94,10 @@ class NodeCanvasFactory extends BaseCanvasFactory {
* @ignore
*/
_createCanvas(width, height) {
const canvas = NodePackages.get("canvas");
const require = process
.getBuiltinModule("module")
.createRequire(import.meta.url);
const canvas = require("canvas");
return canvas.createCanvas(width, height);
}
}
@ -152,6 +125,5 @@ export {
NodeCanvasFactory,
NodeCMapReaderFactory,
NodeFilterFactory,
NodePackages,
NodeStandardFontDataFactory,
};

View file

@ -18,7 +18,6 @@ const DOMStandardFontDataFactory = null;
const NodeCanvasFactory = null;
const NodeCMapReaderFactory = null;
const NodeFilterFactory = null;
const NodePackages = null;
const NodeStandardFontDataFactory = null;
const PDFFetchStream = null;
const PDFNetworkStream = null;
@ -30,7 +29,6 @@ export {
NodeCanvasFactory,
NodeCMapReaderFactory,
NodeFilterFactory,
NodePackages,
NodeStandardFontDataFactory,
PDFFetchStream,
PDFNetworkStream,

View file

@ -18,7 +18,6 @@ import {
setVerbosityLevel,
VerbosityLevel,
} from "../../src/shared/util.js";
import { NodePackages } from "../../src/display/node_utils.js";
// Sets longer timeout, similar to `jasmine-boot.js`.
jasmine.DEFAULT_TIMEOUT_INTERVAL = 30000;
@ -30,9 +29,6 @@ if (!isNodeJS) {
);
}
// Ensure that all Node.js packages/polyfills have loaded.
await NodePackages.promise;
// Reduce the amount of console "spam", by ignoring `info`/`warn` calls,
// when running the unit-tests in Node.js/Travis.
setVerbosityLevel(VerbosityLevel.ERRORS);

View file

@ -14,7 +14,6 @@
*/
import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFNodeStream } from "../../src/display/node_stream.js";
// Ensure that these tests only run in Node.js environments.
@ -24,98 +23,49 @@ if (!isNodeJS) {
);
}
const url = await __non_webpack_import__("url");
describe("node_stream", function () {
let tempServer = null;
const url = process.getBuiltinModule("url");
const cwdURL = url.pathToFileURL(process.cwd()) + "/";
const pdf = new URL("./test/pdfs/tracemonkey.pdf", cwdURL).href;
const pdfLength = 1016315;
beforeAll(function () {
tempServer = createTemporaryNodeServer();
});
afterAll(function () {
// Close the server from accepting new connections after all test finishes.
const { server } = tempServer;
server.close();
tempServer = null;
});
it("read both http(s) and filesystem pdf files", async function () {
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
rangeChunkSize: 65536,
disableStream: true,
disableRange: true,
});
const stream2 = new PDFNodeStream({
it("read filesystem pdf files", async function () {
const stream = new PDFNodeStream({
url: pdf,
rangeChunkSize: 65536,
disableStream: true,
disableRange: true,
});
const fullReader1 = stream1.getFullReader();
const fullReader2 = stream2.getFullReader();
const fullReader = stream.getFullReader();
let isStreamingSupported1, isRangeSupported1;
const promise1 = fullReader1.headersReady.then(() => {
isStreamingSupported1 = fullReader1.isStreamingSupported;
isRangeSupported1 = fullReader1.isRangeSupported;
let isStreamingSupported, isRangeSupported;
const promise = fullReader.headersReady.then(() => {
isStreamingSupported = fullReader.isStreamingSupported;
isRangeSupported = fullReader.isRangeSupported;
});
let isStreamingSupported2, isRangeSupported2;
const promise2 = fullReader2.headersReady.then(() => {
isStreamingSupported2 = fullReader2.isStreamingSupported;
isRangeSupported2 = fullReader2.isRangeSupported;
});
let len1 = 0,
len2 = 0;
const read1 = function () {
return fullReader1.read().then(function (result) {
let len = 0;
const read = function () {
return fullReader.read().then(function (result) {
if (result.done) {
return undefined;
}
len1 += result.value.byteLength;
return read1();
});
};
const read2 = function () {
return fullReader2.read().then(function (result) {
if (result.done) {
return undefined;
}
len2 += result.value.byteLength;
return read2();
len += result.value.byteLength;
return read();
});
};
await Promise.all([read1(), read2(), promise1, promise2]);
await Promise.all([read(), promise]);
expect(isStreamingSupported1).toEqual(false);
expect(isRangeSupported1).toEqual(false);
expect(isStreamingSupported2).toEqual(false);
expect(isRangeSupported2).toEqual(false);
expect(len1).toEqual(pdfLength);
expect(len1).toEqual(len2);
expect(isStreamingSupported).toEqual(false);
expect(isRangeSupported).toEqual(false);
expect(len).toEqual(pdfLength);
});
it("read custom ranges for both http(s) and filesystem urls", async function () {
it("read custom ranges for filesystem urls", async function () {
const rangeSize = 32768;
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
length: pdfLength,
rangeChunkSize: rangeSize,
disableStream: true,
disableRange: false,
});
const stream2 = new PDFNodeStream({
const stream = new PDFNodeStream({
url: pdf,
length: pdfLength,
rangeChunkSize: rangeSize,
@ -123,53 +73,28 @@ describe("node_stream", function () {
disableRange: false,
});
const fullReader1 = stream1.getFullReader();
const fullReader2 = stream2.getFullReader();
const fullReader = stream.getFullReader();
let isStreamingSupported1, isRangeSupported1, fullReaderCancelled1;
let isStreamingSupported2, isRangeSupported2, fullReaderCancelled2;
const promise1 = fullReader1.headersReady.then(function () {
isStreamingSupported1 = fullReader1.isStreamingSupported;
isRangeSupported1 = fullReader1.isRangeSupported;
let isStreamingSupported, isRangeSupported, fullReaderCancelled;
const promise = fullReader.headersReady.then(function () {
isStreamingSupported = fullReader.isStreamingSupported;
isRangeSupported = fullReader.isRangeSupported;
// we shall be able to close the full reader without issues
fullReader1.cancel(new AbortException("Don't need fullReader1."));
fullReaderCancelled1 = true;
});
const promise2 = fullReader2.headersReady.then(function () {
isStreamingSupported2 = fullReader2.isStreamingSupported;
isRangeSupported2 = fullReader2.isRangeSupported;
fullReader2.cancel(new AbortException("Don't need fullReader2."));
fullReaderCancelled2 = true;
fullReader.cancel(new AbortException("Don't need fullReader."));
fullReaderCancelled = true;
});
// Skipping fullReader results, requesting something from the PDF end.
const tailSize = pdfLength % rangeSize || rangeSize;
const range11Reader = stream1.getRangeReader(
const range1Reader = stream.getRangeReader(
pdfLength - tailSize - rangeSize,
pdfLength - tailSize
);
const range12Reader = stream1.getRangeReader(
pdfLength - tailSize,
pdfLength
);
const range21Reader = stream2.getRangeReader(
pdfLength - tailSize - rangeSize,
pdfLength - tailSize
);
const range22Reader = stream2.getRangeReader(
pdfLength - tailSize,
pdfLength
);
const result11 = { value: 0 },
result12 = { value: 0 };
const result21 = { value: 0 },
result22 = { value: 0 };
const range2Reader = stream.getRangeReader(pdfLength - tailSize, pdfLength);
const result1 = { value: 0 },
result2 = { value: 0 };
const read = function (reader, lenResult) {
return reader.read().then(function (result) {
if (result.done) {
@ -181,23 +106,15 @@ describe("node_stream", function () {
};
await Promise.all([
read(range11Reader, result11),
read(range12Reader, result12),
read(range21Reader, result21),
read(range22Reader, result22),
promise1,
promise2,
read(range1Reader, result1),
read(range2Reader, result2),
promise,
]);
expect(result11.value).toEqual(rangeSize);
expect(result12.value).toEqual(tailSize);
expect(result21.value).toEqual(rangeSize);
expect(result22.value).toEqual(tailSize);
expect(isStreamingSupported1).toEqual(false);
expect(isRangeSupported1).toEqual(true);
expect(fullReaderCancelled1).toEqual(true);
expect(isStreamingSupported2).toEqual(false);
expect(isRangeSupported2).toEqual(true);
expect(fullReaderCancelled2).toEqual(true);
expect(result1.value).toEqual(rangeSize);
expect(result2.value).toEqual(tailSize);
expect(isStreamingSupported).toEqual(false);
expect(isRangeSupported).toEqual(true);
expect(fullReaderCancelled).toEqual(true);
});
});

View file

@ -20,13 +20,6 @@ import { fetchData as fetchDataDOM } from "../../src/display/display_utils.js";
import { fetchData as fetchDataNode } from "../../src/display/node_utils.js";
import { Ref } from "../../src/core/primitives.js";
let fs, http;
if (isNodeJS) {
// Native packages.
fs = await __non_webpack_import__("fs");
http = await __non_webpack_import__("http");
}
const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";
const CMAP_URL = isNodeJS ? "./external/bcmaps/" : "../../external/bcmaps/";
@ -132,6 +125,8 @@ function createIdFactory(pageIndex) {
function createTemporaryNodeServer() {
assert(isNodeJS, "Should only be used in Node.js environments.");
const fs = process.getBuiltinModule("fs"),
http = process.getBuiltinModule("http");
// Create http server to serve pdf data for tests.
const server = http
.createServer((request, response) => {