mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 01:58:06 +02:00
Add support for basic structure tree for accessibility.
When a PDF is "marked" we now generate a separate DOM that represents the structure tree from the PDF. This DOM is inserted into the <canvas> element and allows screen readers to walk the tree and have more information about headings, images, links, etc. To link the structure tree DOM (which is empty) to the text layer aria-owns is used. This required modifying the text layer creation so that marked items are now tracked.
This commit is contained in:
parent
6429ccc002
commit
fc9501a637
22 changed files with 911 additions and 14 deletions
335
src/core/struct_tree.js
Normal file
335
src/core/struct_tree.js
Normal file
|
@ -0,0 +1,335 @@
|
|||
/* Copyright 2021 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { isDict, isName, isRef } from "./primitives.js";
|
||||
import { isString, stringToPDFString, warn } from "../shared/util.js";
|
||||
import { NumberTree } from "./obj.js";
|
||||
|
||||
const MAX_DEPTH = 40;
|
||||
|
||||
const StructElementType = {
|
||||
PAGE_CONTENT: "PAGE_CONTENT",
|
||||
STREAM_CONTENT: "STREAM_CONTENT",
|
||||
OBJECT: "OBJECT",
|
||||
ELEMENT: "ELEMENT",
|
||||
};
|
||||
|
||||
class StructTreeRoot {
|
||||
constructor(rootDict) {
|
||||
this.dict = rootDict;
|
||||
this.roleMap = new Map();
|
||||
}
|
||||
|
||||
init() {
|
||||
this.readRoleMap();
|
||||
}
|
||||
|
||||
readRoleMap() {
|
||||
const roleMapDict = this.dict.get("RoleMap");
|
||||
if (!isDict(roleMapDict)) {
|
||||
return;
|
||||
}
|
||||
roleMapDict.forEach((key, value) => {
|
||||
if (!isName(value)) {
|
||||
return;
|
||||
}
|
||||
this.roleMap.set(key, value.name);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Instead of loading the whole tree we load just the page's relevant structure
|
||||
* elements, which means we need a wrapper structure to represent the tree.
|
||||
*/
|
||||
class StructElementNode {
|
||||
constructor(tree, dict) {
|
||||
this.tree = tree;
|
||||
this.dict = dict;
|
||||
this.kids = [];
|
||||
this.parseKids();
|
||||
}
|
||||
|
||||
get role() {
|
||||
const nameObj = this.dict.get("S");
|
||||
const name = isName(nameObj) ? nameObj.name : "";
|
||||
const { root } = this.tree;
|
||||
if (root.roleMap.has(name)) {
|
||||
return root.roleMap.get(name);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
parseKids() {
|
||||
let pageObjId = null;
|
||||
const objRef = this.dict.getRaw("Pg");
|
||||
if (isRef(objRef)) {
|
||||
pageObjId = objRef.toString();
|
||||
}
|
||||
const kids = this.dict.get("K");
|
||||
if (Array.isArray(kids)) {
|
||||
for (const kid of kids) {
|
||||
const element = this.parseKid(pageObjId, kid);
|
||||
if (element) {
|
||||
this.kids.push(element);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const element = this.parseKid(pageObjId, kids);
|
||||
if (element) {
|
||||
this.kids.push(element);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parseKid(pageObjId, kid) {
|
||||
// A direct link to content, the integer is an mcid.
|
||||
if (Number.isInteger(kid)) {
|
||||
if (this.tree.pageDict.objId !== pageObjId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new StructElement({
|
||||
type: StructElementType.PAGE_CONTENT,
|
||||
mcid: kid,
|
||||
pageObjId,
|
||||
});
|
||||
}
|
||||
|
||||
// Find the dictionary for the kid.
|
||||
let kidDict = null;
|
||||
if (isRef(kid)) {
|
||||
kidDict = this.dict.xref.fetch(kid);
|
||||
} else if (isDict(kid)) {
|
||||
kidDict = kid;
|
||||
}
|
||||
if (!kidDict) {
|
||||
return null;
|
||||
}
|
||||
const pageRef = kidDict.getRaw("Pg");
|
||||
if (isRef(pageRef)) {
|
||||
pageObjId = pageRef.toString();
|
||||
}
|
||||
|
||||
const type = isName(kidDict.get("Type")) ? kidDict.get("Type").name : null;
|
||||
if (type === "MCR") {
|
||||
if (this.tree.pageDict.objId !== pageObjId) {
|
||||
return null;
|
||||
}
|
||||
return new StructElement({
|
||||
type: StructElementType.STREAM_CONTENT,
|
||||
refObjId: isRef(kidDict.getRaw("Stm"))
|
||||
? kidDict.getRaw("Stm").toString()
|
||||
: null,
|
||||
pageObjId,
|
||||
mcid: kidDict.get("MCID"),
|
||||
});
|
||||
}
|
||||
|
||||
if (type === "OBJR") {
|
||||
if (this.tree.pageDict.objId !== pageObjId) {
|
||||
return null;
|
||||
}
|
||||
return new StructElement({
|
||||
type: StructElementType.OBJECT,
|
||||
refObjId: isRef(kidDict.getRaw("Obj"))
|
||||
? kidDict.getRaw("Obj").toString()
|
||||
: null,
|
||||
pageObjId,
|
||||
});
|
||||
}
|
||||
|
||||
return new StructElement({
|
||||
type: StructElementType.ELEMENT,
|
||||
dict: kidDict,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
class StructElement {
|
||||
constructor({
|
||||
type,
|
||||
dict = null,
|
||||
mcid = null,
|
||||
pageObjId = null,
|
||||
refObjId = null,
|
||||
}) {
|
||||
this.type = type;
|
||||
this.dict = dict;
|
||||
this.mcid = mcid;
|
||||
this.pageObjId = pageObjId;
|
||||
this.refObjId = refObjId;
|
||||
this.parentNode = null;
|
||||
}
|
||||
}
|
||||
|
||||
class StructTreePage {
|
||||
constructor(structTreeRoot, pageDict) {
|
||||
this.root = structTreeRoot;
|
||||
this.rootDict = structTreeRoot ? structTreeRoot.dict : null;
|
||||
this.pageDict = pageDict;
|
||||
this.nodes = [];
|
||||
}
|
||||
|
||||
parse() {
|
||||
if (!this.root || !this.rootDict) {
|
||||
return;
|
||||
}
|
||||
|
||||
const parentTree = this.rootDict.get("ParentTree");
|
||||
if (!parentTree) {
|
||||
return;
|
||||
}
|
||||
const id = this.pageDict.get("StructParents");
|
||||
if (!Number.isInteger(id)) {
|
||||
return;
|
||||
}
|
||||
const numberTree = new NumberTree(parentTree, this.rootDict.xref);
|
||||
const parentArray = numberTree.get(id);
|
||||
if (!Array.isArray(parentArray)) {
|
||||
return;
|
||||
}
|
||||
const map = new Map();
|
||||
for (const ref of parentArray) {
|
||||
if (isRef(ref)) {
|
||||
this.addNode(this.rootDict.xref.fetch(ref), map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
addNode(dict, map, level = 0) {
|
||||
if (level > MAX_DEPTH) {
|
||||
warn("StructTree MAX_DEPTH reached.");
|
||||
return null;
|
||||
}
|
||||
|
||||
if (map.has(dict)) {
|
||||
return map.get(dict);
|
||||
}
|
||||
|
||||
const element = new StructElementNode(this, dict);
|
||||
map.set(dict, element);
|
||||
|
||||
const parent = dict.get("P");
|
||||
|
||||
if (!parent || isName(parent.get("Type"), "StructTreeRoot")) {
|
||||
if (!this.addTopLevelNode(dict, element)) {
|
||||
map.delete(dict);
|
||||
}
|
||||
return element;
|
||||
}
|
||||
|
||||
const parentNode = this.addNode(parent, map, level + 1);
|
||||
if (!parentNode) {
|
||||
return element;
|
||||
}
|
||||
let save = false;
|
||||
for (const kid of parentNode.kids) {
|
||||
if (kid.type === StructElementType.ELEMENT && kid.dict === dict) {
|
||||
kid.parentNode = element;
|
||||
save = true;
|
||||
}
|
||||
}
|
||||
if (!save) {
|
||||
map.delete(dict);
|
||||
}
|
||||
return element;
|
||||
}
|
||||
|
||||
addTopLevelNode(dict, element) {
|
||||
const obj = this.rootDict.get("K");
|
||||
if (!obj) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isDict(obj)) {
|
||||
if (obj.objId !== dict.objId) {
|
||||
return false;
|
||||
}
|
||||
this.nodes[0] = element;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!Array.isArray(obj)) {
|
||||
return true;
|
||||
}
|
||||
let save = false;
|
||||
for (let i = 0; i < obj.length; i++) {
|
||||
const kidRef = obj[i];
|
||||
if (kidRef && kidRef.toString() === dict.objId) {
|
||||
this.nodes[i] = element;
|
||||
save = true;
|
||||
}
|
||||
}
|
||||
return save;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the tree structure into a simplifed object literal that can
|
||||
* be sent to the main thread.
|
||||
* @returns {Object}
|
||||
*/
|
||||
get serializable() {
|
||||
function nodeToSerializable(node, parent, level = 0) {
|
||||
if (level > MAX_DEPTH) {
|
||||
warn("StructTree too deep to be fully serialized.");
|
||||
return;
|
||||
}
|
||||
const obj = Object.create(null);
|
||||
obj.role = node.role;
|
||||
obj.children = [];
|
||||
parent.children.push(obj);
|
||||
const alt = node.dict.get("Alt");
|
||||
if (isString(alt)) {
|
||||
obj.alt = stringToPDFString(alt);
|
||||
}
|
||||
|
||||
for (const kid of node.kids) {
|
||||
const kidElement =
|
||||
kid.type === StructElementType.ELEMENT ? kid.parentNode : null;
|
||||
if (kidElement) {
|
||||
nodeToSerializable(kidElement, obj, level + 1);
|
||||
continue;
|
||||
} else if (
|
||||
kid.type === StructElementType.PAGE_CONTENT ||
|
||||
kid.type === StructElementType.STREAM_CONTENT
|
||||
) {
|
||||
obj.children.push({
|
||||
type: "content",
|
||||
id: `page${kid.pageObjId}_mcid${kid.mcid}`,
|
||||
});
|
||||
} else if (kid.type === StructElementType.OBJECT) {
|
||||
obj.children.push({
|
||||
type: "object",
|
||||
id: kid.refObjId,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const root = Object.create(null);
|
||||
root.children = [];
|
||||
root.role = "Root";
|
||||
for (const child of this.nodes) {
|
||||
if (!child) {
|
||||
continue;
|
||||
}
|
||||
nodeToSerializable(child, root);
|
||||
}
|
||||
return root;
|
||||
}
|
||||
}
|
||||
|
||||
export { StructTreePage, StructTreeRoot };
|
Loading…
Add table
Add a link
Reference in a new issue