2021-03-31 15:07:02 -07:00
|
|
|
/* Copyright 2021 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2025-01-21 23:26:20 +01:00
|
|
|
/** @typedef {import("../src/display/api").PDFPageProxy} PDFPageProxy */
|
|
|
|
|
2023-08-31 15:22:51 +02:00
|
|
|
import { removeNullCharacters } from "./ui_utils.js";
|
|
|
|
|
2021-03-31 15:07:02 -07:00
|
|
|
const PDF_ROLE_TO_HTML_ROLE = {
|
|
|
|
// Document level structure types
|
|
|
|
Document: null, // There's a "document" role, but it doesn't make sense here.
|
|
|
|
DocumentFragment: null,
|
|
|
|
// Grouping level structure types
|
|
|
|
Part: "group",
|
|
|
|
Sect: "group", // XXX: There's a "section" role, but it's abstract.
|
|
|
|
Div: "group",
|
|
|
|
Aside: "note",
|
|
|
|
NonStruct: "none",
|
|
|
|
// Block level structure types
|
|
|
|
P: null,
|
|
|
|
// H<n>,
|
|
|
|
H: "heading",
|
|
|
|
Title: null,
|
|
|
|
FENote: "note",
|
|
|
|
// Sub-block level structure type
|
|
|
|
Sub: "group",
|
|
|
|
// General inline level structure types
|
|
|
|
Lbl: null,
|
|
|
|
Span: null,
|
|
|
|
Em: null,
|
|
|
|
Strong: null,
|
|
|
|
Link: "link",
|
|
|
|
Annot: "note",
|
|
|
|
Form: "form",
|
|
|
|
// Ruby and Warichu structure types
|
|
|
|
Ruby: null,
|
|
|
|
RB: null,
|
|
|
|
RT: null,
|
|
|
|
RP: null,
|
|
|
|
Warichu: null,
|
|
|
|
WT: null,
|
|
|
|
WP: null,
|
|
|
|
// List standard structure types
|
|
|
|
L: "list",
|
|
|
|
LI: "listitem",
|
|
|
|
LBody: null,
|
|
|
|
// Table standard structure types
|
|
|
|
Table: "table",
|
|
|
|
TR: "row",
|
|
|
|
TH: "columnheader",
|
|
|
|
TD: "cell",
|
|
|
|
THead: "columnheader",
|
|
|
|
TBody: null,
|
|
|
|
TFoot: null,
|
|
|
|
// Standard structure type Caption
|
|
|
|
Caption: null,
|
|
|
|
// Standard structure type Figure
|
|
|
|
Figure: "figure",
|
|
|
|
// Standard structure type Formula
|
|
|
|
Formula: null,
|
|
|
|
// standard structure type Artifact
|
|
|
|
Artifact: null,
|
|
|
|
};
|
|
|
|
|
|
|
|
const HEADING_PATTERN = /^H(\d+)$/;
|
|
|
|
|
2025-01-21 23:26:20 +01:00
|
|
|
/**
|
|
|
|
* @typedef {Object} StructTreeLayerBuilderOptions
|
|
|
|
* @property {PDFPageProxy} pdfPage
|
|
|
|
* @property {Object} rawDims
|
|
|
|
*/
|
|
|
|
|
2021-03-31 15:07:02 -07:00
|
|
|
class StructTreeLayerBuilder {
|
2024-09-03 16:49:22 +02:00
|
|
|
#promise;
|
|
|
|
|
2024-09-04 11:08:56 +02:00
|
|
|
#treeDom = null;
|
|
|
|
|
|
|
|
#treePromise;
|
2022-12-04 00:27:44 +01:00
|
|
|
|
2024-09-03 16:49:22 +02:00
|
|
|
#elementAttributes = new Map();
|
|
|
|
|
2024-09-04 16:45:09 +02:00
|
|
|
#rawDims;
|
|
|
|
|
|
|
|
#elementsToAddToTextLayer = null;
|
|
|
|
|
2025-01-21 23:26:20 +01:00
|
|
|
/**
|
|
|
|
* @param {StructTreeLayerBuilderOptions} options
|
|
|
|
*/
|
2024-09-04 16:45:09 +02:00
|
|
|
constructor(pdfPage, rawDims) {
|
2024-09-03 16:49:22 +02:00
|
|
|
this.#promise = pdfPage.getStructTree();
|
2024-09-04 16:45:09 +02:00
|
|
|
this.#rawDims = rawDims;
|
2022-12-04 00:27:44 +01:00
|
|
|
}
|
|
|
|
|
2025-01-21 23:26:20 +01:00
|
|
|
/**
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
*/
|
2024-09-03 16:49:22 +02:00
|
|
|
async render() {
|
2024-09-04 11:08:56 +02:00
|
|
|
if (this.#treePromise) {
|
|
|
|
return this.#treePromise;
|
|
|
|
}
|
|
|
|
const { promise, resolve, reject } = Promise.withResolvers();
|
|
|
|
this.#treePromise = promise;
|
|
|
|
|
|
|
|
try {
|
|
|
|
this.#treeDom = this.#walk(await this.#promise);
|
|
|
|
} catch (ex) {
|
|
|
|
reject(ex);
|
2022-12-04 00:27:44 +01:00
|
|
|
}
|
2024-09-03 16:49:22 +02:00
|
|
|
this.#promise = null;
|
2024-09-04 11:08:56 +02:00
|
|
|
|
|
|
|
this.#treeDom?.classList.add("structTree");
|
|
|
|
resolve(this.#treeDom);
|
|
|
|
|
|
|
|
return promise;
|
2024-09-03 16:49:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
async getAriaAttributes(annotationId) {
|
Ensure that serializing of StructTree-data cannot fail during loading
I discovered that doing skip-cache re-reloading of https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf would *intermittently* cause (some of) the AnnotationLayers to break with errors printed in the console (see below).
In hindsight this bug is really obvious, however it took me quite some time to find it, since the `StructTreePage.prototype.serializable` getter will lookup various data and all of those cases can fail during loading when streaming and/or range requests are being used.
Finally, to prevent any future errors, ensure that the viewer won't break in these sort of situations.
```
Uncaught (in promise)
Object { message: "Missing data [19098296, 19098297)", name: "UnknownErrorException", details: "MissingDataException: Missing data [19098296, 19098297)", stack: "BaseExceptionClosure@resource://pdf.js/build/pdf.mjs:453:29\n@resource://pdf.js/build/pdf.mjs:456:2\n" }
viewer.mjs:8801:55
\#renderAnnotationLayer: "UnknownErrorException: Missing data [17552729, 17552730)". viewer.mjs:8737:15
Uncaught (in promise)
Object { message: "Missing data [17552729, 17552730)", name: "UnknownErrorException", details: "MissingDataException: Missing data [17552729, 17552730)", stack: "BaseExceptionClosure@resource://pdf.js/build/pdf.mjs:453:29\n@resource://pdf.js/build/pdf.mjs:456:2\n" }
viewer.mjs:8801:55
```
2024-11-01 17:22:46 +01:00
|
|
|
try {
|
|
|
|
await this.render();
|
|
|
|
return this.#elementAttributes.get(annotationId);
|
|
|
|
} catch {
|
|
|
|
// If the structTree cannot be fetched, parsed, and/or rendered,
|
|
|
|
// ensure that e.g. the AnnotationLayer won't break completely.
|
|
|
|
}
|
|
|
|
return null;
|
2021-03-31 15:07:02 -07:00
|
|
|
}
|
|
|
|
|
2023-02-07 14:38:32 +01:00
|
|
|
hide() {
|
|
|
|
if (this.#treeDom && !this.#treeDom.hidden) {
|
|
|
|
this.#treeDom.hidden = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
show() {
|
|
|
|
if (this.#treeDom?.hidden) {
|
|
|
|
this.#treeDom.hidden = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-03 22:58:37 +01:00
|
|
|
#setAttributes(structElement, htmlElement) {
|
2023-08-30 20:00:05 +02:00
|
|
|
const { alt, id, lang } = structElement;
|
|
|
|
if (alt !== undefined) {
|
2024-09-03 16:49:22 +02:00
|
|
|
// Don't add the label in the struct tree layer but on the annotation
|
|
|
|
// in the annotation layer.
|
|
|
|
let added = false;
|
|
|
|
const label = removeNullCharacters(alt);
|
|
|
|
for (const child of structElement.children) {
|
|
|
|
if (child.type === "annotation") {
|
|
|
|
let attrs = this.#elementAttributes.get(child.id);
|
|
|
|
if (!attrs) {
|
|
|
|
attrs = new Map();
|
|
|
|
this.#elementAttributes.set(child.id, attrs);
|
|
|
|
}
|
|
|
|
attrs.set("aria-label", label);
|
|
|
|
added = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!added) {
|
|
|
|
htmlElement.setAttribute("aria-label", label);
|
|
|
|
}
|
2021-03-31 15:07:02 -07:00
|
|
|
}
|
2023-08-30 20:00:05 +02:00
|
|
|
if (id !== undefined) {
|
|
|
|
htmlElement.setAttribute("aria-owns", id);
|
2021-03-31 15:07:02 -07:00
|
|
|
}
|
2023-08-30 20:00:05 +02:00
|
|
|
if (lang !== undefined) {
|
2023-08-31 15:22:51 +02:00
|
|
|
htmlElement.setAttribute(
|
|
|
|
"lang",
|
|
|
|
removeNullCharacters(lang, /* replaceInvisible = */ true)
|
|
|
|
);
|
2021-11-11 14:36:18 +01:00
|
|
|
}
|
2021-03-31 15:07:02 -07:00
|
|
|
}
|
|
|
|
|
2024-09-04 16:45:09 +02:00
|
|
|
#addImageInTextLayer(node, element) {
|
|
|
|
const { alt, bbox, children } = node;
|
|
|
|
const child = children?.[0];
|
|
|
|
if (!this.#rawDims || !alt || !bbox || child?.type !== "content") {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const { id } = child;
|
|
|
|
if (!id) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We cannot add the created element to the text layer immediately, as the
|
|
|
|
// text layer might not be ready yet. Instead, we store the element and add
|
|
|
|
// it later in `addElementsToTextLayer`.
|
|
|
|
|
|
|
|
element.setAttribute("aria-owns", id);
|
|
|
|
const img = document.createElement("span");
|
|
|
|
(this.#elementsToAddToTextLayer ||= new Map()).set(id, img);
|
|
|
|
img.setAttribute("role", "img");
|
|
|
|
img.setAttribute("aria-label", removeNullCharacters(alt));
|
|
|
|
|
|
|
|
const { pageHeight, pageX, pageY } = this.#rawDims;
|
2025-02-11 11:55:24 +01:00
|
|
|
const calc = "calc(var(--total-scale-factor) *";
|
2024-09-04 16:45:09 +02:00
|
|
|
const { style } = img;
|
|
|
|
style.width = `${calc}${bbox[2] - bbox[0]}px)`;
|
|
|
|
style.height = `${calc}${bbox[3] - bbox[1]}px)`;
|
|
|
|
style.left = `${calc}${bbox[0] - pageX}px)`;
|
|
|
|
style.top = `${calc}${pageHeight - bbox[3] + pageY}px)`;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
addElementsToTextLayer() {
|
|
|
|
if (!this.#elementsToAddToTextLayer) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (const [id, img] of this.#elementsToAddToTextLayer) {
|
|
|
|
document.getElementById(id)?.append(img);
|
|
|
|
}
|
|
|
|
this.#elementsToAddToTextLayer.clear();
|
|
|
|
this.#elementsToAddToTextLayer = null;
|
|
|
|
}
|
|
|
|
|
2022-12-03 22:58:37 +01:00
|
|
|
#walk(node) {
|
2021-03-31 15:07:02 -07:00
|
|
|
if (!node) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
const element = document.createElement("span");
|
|
|
|
if ("role" in node) {
|
|
|
|
const { role } = node;
|
|
|
|
const match = role.match(HEADING_PATTERN);
|
|
|
|
if (match) {
|
|
|
|
element.setAttribute("role", "heading");
|
|
|
|
element.setAttribute("aria-level", match[1]);
|
|
|
|
} else if (PDF_ROLE_TO_HTML_ROLE[role]) {
|
|
|
|
element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]);
|
|
|
|
}
|
2024-09-04 16:45:09 +02:00
|
|
|
if (role === "Figure" && this.#addImageInTextLayer(node, element)) {
|
|
|
|
return element;
|
|
|
|
}
|
2021-03-31 15:07:02 -07:00
|
|
|
}
|
|
|
|
|
2022-12-03 22:58:37 +01:00
|
|
|
this.#setAttributes(node, element);
|
2021-03-31 15:07:02 -07:00
|
|
|
|
|
|
|
if (node.children) {
|
|
|
|
if (node.children.length === 1 && "id" in node.children[0]) {
|
|
|
|
// Often there is only one content node so just set the values on the
|
|
|
|
// parent node to avoid creating an extra span.
|
2022-12-03 22:58:37 +01:00
|
|
|
this.#setAttributes(node.children[0], element);
|
2021-03-31 15:07:02 -07:00
|
|
|
} else {
|
|
|
|
for (const kid of node.children) {
|
2022-12-03 22:58:37 +01:00
|
|
|
element.append(this.#walk(kid));
|
2021-03-31 15:07:02 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return element;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-15 13:54:29 +01:00
|
|
|
export { StructTreeLayerBuilder };
|