1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-25 01:28:06 +02:00

Add support for the /Catalog Base-URI when resolving URLs (issue 14802)

As far as I can tell, this is actually the very first time that we've seen a PDF document with a Base-URI specified in the /Catalog; please refer to the specification:
https://web.archive.org/web/20220309040754if_/https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G11.2097122

To simplify the overall implementation, this new parameter is accessed via the existing `BasePdfManager.docBaseUrl`-getter and will thus override any user-specified `docBaseUrl` API-parameter.
This commit is contained in:
Jonas Jenwald 2022-04-19 16:53:44 +02:00
parent 32ae0e4867
commit 5bc7339c1b
6 changed files with 126 additions and 5 deletions

View file

@ -72,9 +72,12 @@ class AnnotationFactory {
static create(xref, ref, pdfManager, idFactory, collectFields) {
return Promise.all([
pdfManager.ensureCatalog("acroForm"),
// Only necessary to prevent the `pdfManager.docBaseUrl`-getter, used
// with certain Annotations, from throwing and thus breaking parsing:
pdfManager.ensureCatalog("baseUrl"),
pdfManager.ensureDoc("xfaDatasets"),
collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1,
]).then(([acroForm, xfaDatasets, pageIndex]) =>
]).then(([acroForm, baseUrl, xfaDatasets, pageIndex]) =>
pdfManager.ensure(this, "_create", [
xref,
ref,

View file

@ -1387,6 +1387,22 @@ class Catalog {
return next(pageRef);
}
get baseUrl() {
const uri = this._catDict.get("URI");
if (uri instanceof Dict) {
const base = uri.get("Base");
if (typeof base === "string") {
const absoluteUrl = createValidAbsoluteUrl(base, null, {
tryConvertEncoding: true,
});
if (absoluteUrl) {
return shadow(this, "baseUrl", absoluteUrl.href);
}
}
}
return shadow(this, "baseUrl", null);
}
/**
* @typedef {Object} ParseDestDictionaryParameters
* @property {Dict} destDict - The dictionary containing the destination.
@ -1464,8 +1480,6 @@ class Catalog {
// Some bad PDFs do not put parentheses around relative URLs.
url = "/" + url.name;
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
break;
case "GoTo":

View file

@ -13,7 +13,12 @@
* limitations under the License.
*/
import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js";
import {
createValidAbsoluteUrl,
shadow,
unreachable,
warn,
} from "../shared/util.js";
import { ChunkedStreamManager } from "./chunked_stream.js";
import { MissingDataException } from "./core_utils.js";
import { PDFDocument } from "./document.js";
@ -46,7 +51,8 @@ class BasePdfManager {
}
get docBaseUrl() {
return this._docBaseUrl;
const catalog = this.pdfDocument.catalog;
return shadow(this, "docBaseUrl", catalog.baseUrl || this._docBaseUrl);
}
onLoadedStream() {