1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Introduce some URL.parse() usage in the code-base

This (fairly new) static method allows parsing URLs without having to wrap `new URL(...)` calls within `try...catch` blocks, thus simplifying the code; see https://developer.mozilla.org/en-US/docs/Web/API/URL/parse_static

For older browsers/environments the functionality will be polyfilled, but *only* in `legacy` builds, via `core-js`; see https://github.com/zloirock/core-js?tab=readme-ov-file#url-and-urlsearchparams

*Please note:* This is currently limited to the `src/`- and `web/`-folders, such that we don't break development/testing, since the functionality is not available in all Node.js versions that we support; see https://developer.mozilla.org/en-US/docs/Web/API/URL/parse_static#browser_compatibility
This commit is contained in:
Jonas Jenwald 2025-02-15 10:49:30 +01:00
parent 3f15e0c469
commit c2e33307b1
5 changed files with 52 additions and 72 deletions

View file

@ -523,18 +523,20 @@ function getUrlProp(val) {
if (val instanceof URL) {
return val.href;
}
try {
// The full path is required in the 'url' field.
return new URL(val, window.location).href;
} catch {
if (typeof val === "string") {
if (
typeof PDFJSDev !== "undefined" &&
PDFJSDev.test("GENERIC") &&
isNodeJS &&
typeof val === "string"
isNodeJS
) {
return val; // Use the url as-is in Node.js environments.
}
// The full path is required in the 'url' field.
const url = URL.parse(val, window.location);
if (url) {
return url.href;
}
}
throw new Error(
"Invalid PDF url data: " +
@ -2080,14 +2082,9 @@ class PDFWorker {
// Check if URLs have the same origin. For non-HTTP based URLs, returns
// false.
this._isSameOrigin = (baseUrl, otherUrl) => {
let base;
try {
base = new URL(baseUrl);
if (!base.origin || base.origin === "null") {
return false; // non-HTTP url
}
} catch {
return false;
const base = URL.parse(baseUrl);
if (!base?.origin || base.origin === "null") {
return false; // non-HTTP url
}
const other = new URL(otherUrl, base);
return base.origin === other.origin;
@ -2200,7 +2197,7 @@ class PDFWorker {
if (
typeof PDFJSDev !== "undefined" &&
PDFJSDev.test("GENERIC") &&
!PDFWorker._isSameOrigin(window.location.href, workerSrc)
!PDFWorker._isSameOrigin(window.location, workerSrc)
) {
workerSrc = PDFWorker._createCDNWrapper(
new URL(workerSrc, window.location).href

View file

@ -402,13 +402,9 @@ function isValidFetchUrl(url, baseUrl) {
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
throw new Error("Not implemented: isValidFetchUrl");
}
try {
const { protocol } = baseUrl ? new URL(url, baseUrl) : new URL(url);
// The Fetch API only supports the http/https protocols, and not file/ftp.
return protocol === "http:" || protocol === "https:";
} catch {
return false; // `new URL()` will throw on incorrect data.
}
const res = baseUrl ? URL.parse(url, baseUrl) : URL.parse(url);
// The Fetch API only supports the http/https protocols, and not file/ftp.
return res?.protocol === "http:" || res?.protocol === "https:";
}
/**

View file

@ -33,13 +33,8 @@ function createHeaders(isHttp, httpHeaders) {
}
function getResponseOrigin(url) {
try {
return new URL(url).origin;
} catch {
// `new URL()` will throw on incorrect data.
}
// Notably, null is distinct from "null" string (e.g. from file:-URLs).
return null;
return URL.parse(url)?.origin ?? null;
}
function validateRangeRequestCapabilities({

View file

@ -412,35 +412,28 @@ function createValidAbsoluteUrl(url, baseUrl = null, options = null) {
if (!url) {
return null;
}
try {
if (options && typeof url === "string") {
// Let URLs beginning with "www." default to using the "http://" protocol.
if (options.addDefaultProtocol && url.startsWith("www.")) {
const dots = url.match(/\./g);
// Avoid accidentally matching a *relative* URL pointing to a file named
// e.g. "www.pdf" or similar.
if (dots?.length >= 2) {
url = `http://${url}`;
}
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see bug 1122280.
if (options.tryConvertEncoding) {
try {
url = stringToUTF8String(url);
} catch {}
if (options && typeof url === "string") {
// Let URLs beginning with "www." default to using the "http://" protocol.
if (options.addDefaultProtocol && url.startsWith("www.")) {
const dots = url.match(/\./g);
// Avoid accidentally matching a *relative* URL pointing to a file named
// e.g. "www.pdf" or similar.
if (dots?.length >= 2) {
url = `http://${url}`;
}
}
const absoluteUrl = baseUrl ? new URL(url, baseUrl) : new URL(url);
if (_isValidProtocol(absoluteUrl)) {
return absoluteUrl;
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see bug 1122280.
if (options.tryConvertEncoding) {
try {
url = stringToUTF8String(url);
} catch {}
}
} catch {
/* `new URL()` will throw on incorrect data. */
}
return null;
const absoluteUrl = baseUrl ? URL.parse(url, baseUrl) : URL.parse(url);
return _isValidProtocol(absoluteUrl) ? absoluteUrl : null;
}
function shadow(obj, prop, value, nonSerializable = false) {