From 28b0220bc2ababce340ee0a1e8a80026a6999446 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Sun, 24 Nov 2024 23:05:59 +0100 Subject: [PATCH 1/2] Replace createTemporaryNodeServer with TestPdfsServer Some tests rely on the presence of a server that serves PDF files. When tests are run from a web browser, the test files and PDF files are served by the same server (WebServer), but in Node.js that server is not around. Currently, the tests that depend on it start a minimal Node.js server that re-implements part of the functionality from WebServer. To avoid code duplication when tests depend on more complex behaviors, this patch replaces createTemporaryNodeServer with the existing WebServer, wrapped in a new test utility that has the same interface in Node.js and non-Node.js environments (=TestPdfsServer). This patch has been tested by running the refactored tests in the following three configurations: 1. From the browser: - http://localhost:8888/test/unit/unit_test.html?spec=api - http://localhost:8888/test/unit/unit_test.html?spec=fetch_stream 2. Run specific tests directly with jasmine without legacy bundling: `JASMINE_CONFIG_PATH=test/unit/clitests.json ./node_modules/.bin/jasmine --filter='^api|^fetch_stream'` 3. `gulp unittestcli` --- test/unit/api_spec.js | 24 ++---- test/unit/fetch_stream_spec.js | 27 ++----- test/unit/test_utils.js | 141 ++++++++++++++++++++------------- 3 files changed, 98 insertions(+), 94 deletions(-) diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index d73d23225..412ddc918 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -31,9 +31,9 @@ import { import { buildGetDocumentParams, CMAP_URL, - createTemporaryNodeServer, DefaultFileReaderFactory, TEST_PDFS_PATH, + TestPdfsServer, } from "./test_utils.js"; import { DefaultCanvasFactory, @@ -67,27 +67,17 @@ describe("api", function () { buildGetDocumentParams(tracemonkeyFileName); let CanvasFactory; - let tempServer = null; - beforeAll(function () { + beforeAll(async function () { CanvasFactory = new DefaultCanvasFactory({}); - if (isNodeJS) { - tempServer = createTemporaryNodeServer(); - } + await TestPdfsServer.ensureStarted(); }); - afterAll(function () { + afterAll(async function () { CanvasFactory = null; - if (isNodeJS) { - // Close the server from accepting new connections after all test - // finishes. - const { server } = tempServer; - server.close(); - - tempServer = null; - } + await TestPdfsServer.ensureStopped(); }); function waitSome(callback) { @@ -148,9 +138,7 @@ describe("api", function () { }); it("creates pdf doc from URL-object", async function () { - const urlObj = isNodeJS - ? new URL(`http://127.0.0.1:${tempServer.port}/${basicApiFileName}`) - : new URL(TEST_PDFS_PATH + basicApiFileName, window.location); + const urlObj = TestPdfsServer.resolveURL(basicApiFileName); const loadingTask = getDocument(urlObj); expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); diff --git a/test/unit/fetch_stream_spec.js b/test/unit/fetch_stream_spec.js index 3fbe1efc1..cc9fcbbb5 100644 --- a/test/unit/fetch_stream_spec.js +++ b/test/unit/fetch_stream_spec.js @@ -13,35 +13,22 @@ * limitations under the License. */ -import { AbortException, isNodeJS } from "../../src/shared/util.js"; -import { createTemporaryNodeServer } from "./test_utils.js"; +import { AbortException } from "../../src/shared/util.js"; import { PDFFetchStream } from "../../src/display/fetch_stream.js"; +import { TestPdfsServer } from "./test_utils.js"; describe("fetch_stream", function () { - let tempServer = null; - function getPdfUrl() { - return isNodeJS - ? `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf` - : new URL("../pdfs/tracemonkey.pdf", window.location).href; + return TestPdfsServer.resolveURL("tracemonkey.pdf").href; } const pdfLength = 1016315; - beforeAll(function () { - if (isNodeJS) { - tempServer = createTemporaryNodeServer(); - } + beforeAll(async function () { + await TestPdfsServer.ensureStarted(); }); - afterAll(function () { - if (isNodeJS) { - // Close the server from accepting new connections after all test - // finishes. - const { server } = tempServer; - server.close(); - - tempServer = null; - } + afterAll(async function () { + await TestPdfsServer.ensureStopped(); }); it("read with streaming", async function () { diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index db2fdc2fd..d08098fdc 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -122,73 +122,102 @@ function createIdFactory(pageIndex) { return page._localIdFactory; } -function createTemporaryNodeServer() { - assert(isNodeJS, "Should only be used in Node.js environments."); +// Some tests rely on special behavior from webserver.mjs. When loaded in the +// browser, the page is already served from WebServer. When running from +// Node.js, that is not the case. This helper starts the WebServer if needed, +// and offers a mechanism to resolve the URL in a uniform way. +class TestPdfsServer { + static #webServer; - const fs = process.getBuiltinModule("fs"), - http = process.getBuiltinModule("http"); - function isAcceptablePath(requestUrl) { - try { - // Reject unnormalized paths, to protect against path traversal attacks. - const url = new URL(requestUrl, "https://localhost/"); - return url.pathname === requestUrl; - } catch { - return false; + static #startCount = 0; + + static #startPromise; + + static async ensureStarted() { + if (this.#startCount++) { + // Already started before. E.g. from another beforeAll call. + return this.#startPromise; + } + if (!isNodeJS) { + // In web browsers, tests are presumably served by webserver.mjs. + return undefined; } - } - // Create http server to serve pdf data for tests. - const server = http - .createServer((request, response) => { - if (!isAcceptablePath(request.url)) { - response.writeHead(400); - response.end("Invalid path"); - return; - } - const filePath = process.cwd() + "/test/pdfs" + request.url; - fs.promises.lstat(filePath).then( - stat => { - if (!request.headers.range) { - const contentLength = stat.size; - const stream = fs.createReadStream(filePath); - response.writeHead(200, { - "Content-Type": "application/pdf", - "Content-Length": contentLength, - "Accept-Ranges": "bytes", - }); - stream.pipe(response); - } else { - const [start, end] = request.headers.range - .split("=")[1] - .split("-") - .map(x => Number(x)); - const stream = fs.createReadStream(filePath, { start, end }); - response.writeHead(206, { - "Content-Type": "application/pdf", - }); - stream.pipe(response); - } - }, - error => { - response.writeHead(404); - response.end(`File ${request.url} not found!`); - } - ); - }) - .listen(0); /* Listen on a random free port */ - return { - server, - port: server.address().port, - }; + this.#startPromise = this.#startServer().finally(() => { + this.#startPromise = null; + }); + return this.#startPromise; + } + + static async #startServer() { + // WebServer from webserver.mjs is imported dynamically instead of + // statically because we do not need it when running from the browser. + let WebServer; + if (import.meta.url.endsWith("/lib-legacy/test/unit/test_utils.js")) { + // When "gulp unittestcli" is used to run tests, the tests are run from + // pdf.js/build/lib-legacy/test/ instead of directly from pdf.js/test/. + // eslint-disable-next-line import/no-unresolved + ({ WebServer } = await import("../../../../test/webserver.mjs")); + } else { + ({ WebServer } = await import("../webserver.mjs")); + } + this.#webServer = new WebServer({ + host: "127.0.0.1", + root: TEST_PDFS_PATH, + }); + await new Promise(resolve => { + this.#webServer.start(resolve); + }); + } + + static async ensureStopped() { + assert(this.#startCount > 0, "ensureStarted() should be called first"); + assert(!this.#startPromise, "ensureStarted() should have resolved"); + if (--this.#startCount) { + // Keep server alive as long as there is an ensureStarted() that was not + // followed by an ensureStopped() call. + // This could happen if ensureStarted() was called again before + // ensureStopped() was called from afterAll(). + return; + } + if (!isNodeJS) { + // Web browsers cannot stop the server. + return; + } + + await new Promise(resolve => { + this.#webServer.stop(resolve); + this.#webServer = null; + }); + } + + /** + * @param {string} path - path to file within test/unit/pdf/ (TEST_PDFS_PATH). + * @returns {URL} + */ + static resolveURL(path) { + assert(this.#startCount > 0, "ensureStarted() should be called first"); + assert(!this.#startPromise, "ensureStarted() should have resolved"); + + if (isNodeJS) { + // Note: TestPdfsServer.ensureStarted() should be called first. + return new URL(path, `http://127.0.0.1:${this.#webServer.port}/`); + } + // When "gulp server" is used, our URL looks like + // http://localhost:8888/test/unit/unit_test.html + // The PDFs are served from: + // http://localhost:8888/test/pdfs/ + return new URL(TEST_PDFS_PATH + path, window.location); + } } export { buildGetDocumentParams, CMAP_URL, createIdFactory, - createTemporaryNodeServer, DefaultFileReaderFactory, STANDARD_FONT_DATA_URL, TEST_PDFS_PATH, + TestPdfsServer, XRefMock, }; From f97b4b9a66a04ca4dadcbba51750887f9c0e2d2d Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Wed, 20 Nov 2024 02:42:13 +0100 Subject: [PATCH 2/2] Add test cases for redirected responses Regression tests for issue #12744 and PR #19028 --- test/unit/api_spec.js | 12 ++-- test/unit/common_pdfstream_tests.js | 90 +++++++++++++++++++++++++++++ test/unit/fetch_stream_spec.js | 30 ++++++++++ test/unit/network_spec.js | 39 +++++++++++++ test/unit/test_utils.js | 17 ++++++ test/webserver.mjs | 66 ++++++++++++++++++--- 6 files changed, 240 insertions(+), 14 deletions(-) create mode 100644 test/unit/common_pdfstream_tests.js diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 412ddc918..539ea6875 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -32,6 +32,7 @@ import { buildGetDocumentParams, CMAP_URL, DefaultFileReaderFactory, + getCrossOriginHostname, TEST_PDFS_PATH, TestPdfsServer, } from "./test_utils.js"; @@ -2977,17 +2978,14 @@ describe("api", function () { let loadingTask; function _checkCanLoad(expectSuccess, filename, options) { if (isNodeJS) { + // We can simulate cross-origin requests, but since Node.js does not + // enforce the Same Origin Policy, requests are expected to be allowed + // independently of withCredentials. pending("Cannot simulate cross-origin requests in Node.js"); } const params = buildGetDocumentParams(filename, options); const url = new URL(params.url); - if (url.hostname === "localhost") { - url.hostname = "127.0.0.1"; - } else if (params.url.hostname === "127.0.0.1") { - url.hostname = "localhost"; - } else { - pending("Can only run cross-origin test on localhost!"); - } + url.hostname = getCrossOriginHostname(url.hostname); params.url = url.href; loadingTask = getDocument(params); return loadingTask.promise diff --git a/test/unit/common_pdfstream_tests.js b/test/unit/common_pdfstream_tests.js new file mode 100644 index 000000000..b96620df9 --- /dev/null +++ b/test/unit/common_pdfstream_tests.js @@ -0,0 +1,90 @@ +/* Copyright 2024 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { AbortException, isNodeJS } from "../../src/shared/util.js"; +import { getCrossOriginHostname, TestPdfsServer } from "./test_utils.js"; + +// Common tests to verify behavior across implementations of the IPDFStream +// interface: +// - PDFNetworkStream by network_spec.js +// - PDFFetchStream by fetch_stream_spec.js +async function testCrossOriginRedirects({ + PDFStreamClass, + redirectIfRange, + testRangeReader, +}) { + const basicApiUrl = TestPdfsServer.resolveURL("basicapi.pdf").href; + const basicApiFileLength = 105779; + + const rangeSize = 32768; + const stream = new PDFStreamClass({ + url: getCrossOriginUrlWithRedirects(basicApiUrl, redirectIfRange), + length: basicApiFileLength, + rangeChunkSize: rangeSize, + disableStream: true, + disableRange: false, + }); + + const fullReader = stream.getFullReader(); + + await fullReader.headersReady; + // Sanity check: We can only test range requests if supported: + expect(fullReader.isRangeSupported).toEqual(true); + // ^ When range requests are supported (and streaming is disabled), the full + // initial request is aborted and we do not need to call fullReader.cancel(). + + const rangeReader = stream.getRangeReader( + basicApiFileLength - rangeSize, + basicApiFileLength + ); + + try { + await testRangeReader(rangeReader); + } finally { + rangeReader.cancel(new AbortException("Don't need rangeReader")); + } +} + +/** + * @param {string} testserverUrl - A URL handled that supports CORS and + * redirects (see crossOriginHandler and redirectHandler in webserver.mjs). + * @param {boolean} redirectIfRange - Whether Range requests should be + * redirected to a different origin compared to the initial request. + * @returns {string} A URL that will be redirected by the server. + */ +function getCrossOriginUrlWithRedirects(testserverUrl, redirectIfRange) { + const url = new URL(testserverUrl); + if (!isNodeJS) { + // The responses are going to be cross-origin. In Node.js, fetch() allows + // cross-origin requests for any request, but in browser environments we + // need to enable CORS. + // This option depends on crossOriginHandler in webserver.mjs. + url.searchParams.set("cors", "withoutCredentials"); + } + + // This redirect options depend on redirectHandler in webserver.mjs. + + // We will change the host to a cross-origin domain so that the initial + // request will be cross-origin. Set "redirectToHost" to the original host + // to force a cross-origin redirect (relative to the initial URL). + url.searchParams.set("redirectToHost", url.hostname); + url.hostname = getCrossOriginHostname(url.hostname); + if (redirectIfRange) { + url.searchParams.set("redirectIfRange", "1"); + } + return url.href; +} + +export { testCrossOriginRedirects }; diff --git a/test/unit/fetch_stream_spec.js b/test/unit/fetch_stream_spec.js index cc9fcbbb5..181818a59 100644 --- a/test/unit/fetch_stream_spec.js +++ b/test/unit/fetch_stream_spec.js @@ -15,6 +15,7 @@ import { AbortException } from "../../src/shared/util.js"; import { PDFFetchStream } from "../../src/display/fetch_stream.js"; +import { testCrossOriginRedirects } from "./common_pdfstream_tests.js"; import { TestPdfsServer } from "./test_utils.js"; describe("fetch_stream", function () { @@ -116,4 +117,33 @@ describe("fetch_stream", function () { expect(result1.value).toEqual(rangeSize); expect(result2.value).toEqual(tailSize); }); + + describe("Redirects", function () { + it("redirects allowed if all responses are same-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFFetchStream, + redirectIfRange: false, + async testRangeReader(rangeReader) { + await expectAsync(rangeReader.read()).toBeResolved(); + }, + }); + }); + + it("redirects blocked if any response is cross-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFFetchStream, + redirectIfRange: true, + async testRangeReader(rangeReader) { + // When read (sync), error should be reported. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + // When read again (async), error should be consistent. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + }, + }); + }); + }); }); diff --git a/test/unit/network_spec.js b/test/unit/network_spec.js index 9a55b4771..367e52560 100644 --- a/test/unit/network_spec.js +++ b/test/unit/network_spec.js @@ -15,6 +15,8 @@ import { AbortException } from "../../src/shared/util.js"; import { PDFNetworkStream } from "../../src/display/network.js"; +import { testCrossOriginRedirects } from "./common_pdfstream_tests.js"; +import { TestPdfsServer } from "./test_utils.js"; describe("network", function () { const pdf1 = new URL("../pdfs/tracemonkey.pdf", window.location).href; @@ -115,4 +117,41 @@ describe("network", function () { expect(isRangeSupported).toEqual(true); expect(fullReaderCancelled).toEqual(true); }); + + describe("Redirects", function () { + beforeAll(async function () { + await TestPdfsServer.ensureStarted(); + }); + + afterAll(async function () { + await TestPdfsServer.ensureStopped(); + }); + + it("redirects allowed if all responses are same-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFNetworkStream, + redirectIfRange: false, + async testRangeReader(rangeReader) { + await expectAsync(rangeReader.read()).toBeResolved(); + }, + }); + }); + + it("redirects blocked if any response is cross-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFNetworkStream, + redirectIfRange: true, + async testRangeReader(rangeReader) { + // When read (sync), error should be reported. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + // When read again (async), error should be consistent. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + }, + }); + }); + }); }); diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index d08098fdc..4361ebac6 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -51,6 +51,22 @@ function buildGetDocumentParams(filename, options) { return params; } +function getCrossOriginHostname(hostname) { + if (hostname === "localhost") { + // Note: This does not work if localhost is listening on IPv6 only. + // As a work-around, visit the IPv6 version at: + // http://[::1]:8888/test/unit/unit_test.html?spec=Cross-origin + return "127.0.0.1"; + } + + if (hostname === "127.0.0.1" || hostname === "[::1]") { + return "localhost"; + } + + // FQDN are cross-origin and browsers usually resolve them to the same server. + return hostname.endsWith(".") ? hostname.slice(0, -1) : hostname + "."; +} + class XRefMock { constructor(array) { this._map = Object.create(null); @@ -216,6 +232,7 @@ export { CMAP_URL, createIdFactory, DefaultFileReaderFactory, + getCrossOriginHostname, STANDARD_FONT_DATA_URL, TEST_PDFS_PATH, TestPdfsServer, diff --git a/test/webserver.mjs b/test/webserver.mjs index e0295f3a2..fffd8d72a 100644 --- a/test/webserver.mjs +++ b/test/webserver.mjs @@ -52,7 +52,7 @@ class WebServer { this.cacheExpirationTime = cacheExpirationTime || 0; this.disableRangeRequests = false; this.hooks = { - GET: [crossOriginHandler], + GET: [crossOriginHandler, redirectHandler], POST: [], }; } @@ -308,6 +308,11 @@ class WebServer { } #serveFileRange(response, fileURL, fileSize, start, end) { + if (end > fileSize || start > end) { + response.writeHead(416); + response.end(); + return; + } const stream = fs.createReadStream(fileURL, { flags: "rs", start, @@ -336,18 +341,65 @@ class WebServer { } // This supports the "Cross-origin" test in test/unit/api_spec.js -// It is here instead of test.js so that when the test will still complete as +// and "Redirects" in test/unit/network_spec.js and +// test/unit/fetch_stream_spec.js via test/unit/common_pdfstream_tests.js. +// It is here instead of test.mjs so that when the test will still complete as // expected if the user does "gulp server" and then visits // http://localhost:8888/test/unit/unit_test.html?spec=Cross-origin function crossOriginHandler(url, request, response) { if (url.pathname === "/test/pdfs/basicapi.pdf") { - if (url.searchParams.get("cors") === "withCredentials") { - response.setHeader("Access-Control-Allow-Origin", request.headers.origin); - response.setHeader("Access-Control-Allow-Credentials", "true"); - } else if (url.searchParams.get("cors") === "withoutCredentials") { - response.setHeader("Access-Control-Allow-Origin", request.headers.origin); + if (!url.searchParams.has("cors") || !request.headers.origin) { + return; } + response.setHeader("Access-Control-Allow-Origin", request.headers.origin); + if (url.searchParams.get("cors") === "withCredentials") { + response.setHeader("Access-Control-Allow-Credentials", "true"); + } // withoutCredentials does not include Access-Control-Allow-Credentials. + response.setHeader( + "Access-Control-Expose-Headers", + "Accept-Ranges,Content-Range" + ); + response.setHeader("Vary", "Origin"); } } +// This supports the "Redirects" test in test/unit/network_spec.js and +// test/unit/fetch_stream_spec.js via test/unit/common_pdfstream_tests.js. +// It is here instead of test.mjs so that when the test will still complete as +// expected if the user does "gulp server" and then visits +// http://localhost:8888/test/unit/unit_test.html?spec=Redirects +function redirectHandler(url, request, response) { + const redirectToHost = url.searchParams.get("redirectToHost"); + if (redirectToHost) { + // Chrome may serve byte range requests directly from the cache, potentially + // from a full request or a different range, without involving the server. + // To prevent this from happening, make sure that the response is never + // cached, so that Range requests are never served from the browser cache. + response.setHeader("Cache-Control", "no-store,max-age=0"); + + if (url.searchParams.get("redirectIfRange") && !request.headers.range) { + return false; + } + try { + const newURL = new URL(url); + newURL.hostname = redirectToHost; + // Delete test-only query parameters to avoid infinite redirects. + newURL.searchParams.delete("redirectToHost"); + newURL.searchParams.delete("redirectIfRange"); + if (newURL.hostname !== redirectToHost) { + throw new Error(`Invalid hostname: ${redirectToHost}`); + } + response.setHeader("Location", newURL.href); + } catch { + response.writeHead(500); + response.end(); + return true; + } + response.writeHead(302); + response.end(); + return true; + } + return false; +} + export { WebServer };