mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Merge pull request #13105 from Snuffleupagus/BasePdfManager-parseDocBaseUrl
Improve memory usage around the `BasePdfManager.docBaseUrl` parameter (PR 7689 follow-up)
This commit is contained in:
commit
8269ddbd16
10 changed files with 251 additions and 244 deletions
|
@ -13,17 +13,23 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import {
|
||||
createValidAbsoluteUrl,
|
||||
shadow,
|
||||
unreachable,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js";
|
||||
import { ChunkedStreamManager } from "./chunked_stream.js";
|
||||
import { MissingDataException } from "./core_utils.js";
|
||||
import { PDFDocument } from "./document.js";
|
||||
import { Stream } from "./stream.js";
|
||||
|
||||
function parseDocBaseUrl(url) {
|
||||
if (url) {
|
||||
const absoluteUrl = createValidAbsoluteUrl(url);
|
||||
if (absoluteUrl) {
|
||||
return absoluteUrl.href;
|
||||
}
|
||||
warn(`Invalid absolute docBaseUrl: "${url}".`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
class BasePdfManager {
|
||||
constructor() {
|
||||
if (this.constructor === BasePdfManager) {
|
||||
|
@ -40,16 +46,7 @@ class BasePdfManager {
|
|||
}
|
||||
|
||||
get docBaseUrl() {
|
||||
let docBaseUrl = null;
|
||||
if (this._docBaseUrl) {
|
||||
const absoluteUrl = createValidAbsoluteUrl(this._docBaseUrl);
|
||||
if (absoluteUrl) {
|
||||
docBaseUrl = absoluteUrl.href;
|
||||
} else {
|
||||
warn(`Invalid absolute docBaseUrl: "${this._docBaseUrl}".`);
|
||||
}
|
||||
}
|
||||
return shadow(this, "docBaseUrl", docBaseUrl);
|
||||
return this._docBaseUrl;
|
||||
}
|
||||
|
||||
onLoadedStream() {
|
||||
|
@ -111,7 +108,7 @@ class LocalPdfManager extends BasePdfManager {
|
|||
|
||||
this._docId = docId;
|
||||
this._password = password;
|
||||
this._docBaseUrl = docBaseUrl;
|
||||
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
||||
this.evaluatorOptions = evaluatorOptions;
|
||||
this.enableXfa = enableXfa;
|
||||
|
||||
|
@ -154,7 +151,7 @@ class NetworkPdfManager extends BasePdfManager {
|
|||
|
||||
this._docId = docId;
|
||||
this._password = args.password;
|
||||
this._docBaseUrl = docBaseUrl;
|
||||
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
||||
this.msgHandler = args.msgHandler;
|
||||
this.evaluatorOptions = evaluatorOptions;
|
||||
this.enableXfa = enableXfa;
|
||||
|
|
|
@ -40,6 +40,7 @@ import {
|
|||
deprecated,
|
||||
DOMCanvasFactory,
|
||||
DOMCMapReaderFactory,
|
||||
isDataScheme,
|
||||
loadScript,
|
||||
PageViewport,
|
||||
RenderingCancelledException,
|
||||
|
@ -288,6 +289,15 @@ function getDocument(src) {
|
|||
params.pdfBug = params.pdfBug === true;
|
||||
params.enableXfa = params.enableXfa === true;
|
||||
|
||||
if (
|
||||
typeof params.docBaseUrl !== "string" ||
|
||||
isDataScheme(params.docBaseUrl)
|
||||
) {
|
||||
// Ignore "data:"-URLs, since they can't be used to recover valid absolute
|
||||
// URLs anyway. We want to avoid sending them to the worker-thread, since
|
||||
// they contain the *entire* PDF document and can thus be arbitrarily long.
|
||||
params.docBaseUrl = null;
|
||||
}
|
||||
if (!Number.isInteger(params.maxImageSize)) {
|
||||
params.maxImageSize = -1;
|
||||
}
|
||||
|
|
|
@ -451,13 +451,23 @@ function addLinkAttributes(link, { url, target, rel, enabled = true } = {}) {
|
|||
link.rel = typeof rel === "string" ? rel : DEFAULT_LINK_REL;
|
||||
}
|
||||
|
||||
function isDataScheme(url) {
|
||||
const ii = url.length;
|
||||
let i = 0;
|
||||
while (i < ii && url[i].trim() === "") {
|
||||
i++;
|
||||
}
|
||||
return url.substring(i, i + 5).toLowerCase() === "data:";
|
||||
}
|
||||
|
||||
function isPdfFile(filename) {
|
||||
return typeof filename === "string" && /\.pdf$/i.test(filename);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the file name from a given URL.
|
||||
* Gets the filename from a given URL.
|
||||
* @param {string} url
|
||||
* @returns {string}
|
||||
*/
|
||||
function getFilenameFromUrl(url) {
|
||||
const anchor = url.indexOf("#");
|
||||
|
@ -469,6 +479,48 @@ function getFilenameFromUrl(url) {
|
|||
return url.substring(url.lastIndexOf("/", end) + 1, end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the filename or guessed filename from the url (see issue 3455).
|
||||
* @param {string} url - The original PDF location.
|
||||
* @param {string} defaultFilename - The value returned if the filename is
|
||||
* unknown, or the protocol is unsupported.
|
||||
* @returns {string} Guessed PDF filename.
|
||||
*/
|
||||
function getPdfFilenameFromUrl(url, defaultFilename = "document.pdf") {
|
||||
if (typeof url !== "string") {
|
||||
return defaultFilename;
|
||||
}
|
||||
if (isDataScheme(url)) {
|
||||
warn('getPdfFilenameFromUrl: ignore "data:"-URL for performance reasons.');
|
||||
return defaultFilename;
|
||||
}
|
||||
const reURI = /^(?:(?:[^:]+:)?\/\/[^/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/;
|
||||
// SCHEME HOST 1.PATH 2.QUERY 3.REF
|
||||
// Pattern to get last matching NAME.pdf
|
||||
const reFilename = /[^/?#=]+\.pdf\b(?!.*\.pdf\b)/i;
|
||||
const splitURI = reURI.exec(url);
|
||||
let suggestedFilename =
|
||||
reFilename.exec(splitURI[1]) ||
|
||||
reFilename.exec(splitURI[2]) ||
|
||||
reFilename.exec(splitURI[3]);
|
||||
if (suggestedFilename) {
|
||||
suggestedFilename = suggestedFilename[0];
|
||||
if (suggestedFilename.includes("%")) {
|
||||
// URL-encoded %2Fpath%2Fto%2Ffile.pdf should be file.pdf
|
||||
try {
|
||||
suggestedFilename = reFilename.exec(
|
||||
decodeURIComponent(suggestedFilename)
|
||||
)[0];
|
||||
} catch (ex) {
|
||||
// Possible (extremely rare) errors:
|
||||
// URIError "Malformed URI", e.g. for "%AA.pdf"
|
||||
// TypeError "null has no properties", e.g. for "%2F.pdf"
|
||||
}
|
||||
}
|
||||
}
|
||||
return suggestedFilename || defaultFilename;
|
||||
}
|
||||
|
||||
class StatTimer {
|
||||
constructor() {
|
||||
this.started = Object.create(null);
|
||||
|
@ -655,6 +707,8 @@ export {
|
|||
DOMCMapReaderFactory,
|
||||
DOMSVGFactory,
|
||||
getFilenameFromUrl,
|
||||
getPdfFilenameFromUrl,
|
||||
isDataScheme,
|
||||
isFetchSupported,
|
||||
isPdfFile,
|
||||
isValidFetchUrl,
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
import {
|
||||
addLinkAttributes,
|
||||
getFilenameFromUrl,
|
||||
getPdfFilenameFromUrl,
|
||||
isFetchSupported,
|
||||
isPdfFile,
|
||||
isValidFetchUrl,
|
||||
|
@ -130,6 +131,7 @@ export {
|
|||
// From "./display/display_utils.js":
|
||||
addLinkAttributes,
|
||||
getFilenameFromUrl,
|
||||
getPdfFilenameFromUrl,
|
||||
isPdfFile,
|
||||
LinkTarget,
|
||||
loadScript,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue