mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Merge pull request #17986 from calixteman/fix_struct_tree
Allow to insert several annotations under the same parent in the structure tree
This commit is contained in:
commit
d1f494d68c
6 changed files with 243 additions and 112 deletions
|
@ -378,6 +378,12 @@ class RefSetCache {
|
|||
clear() {
|
||||
this._map.clear();
|
||||
}
|
||||
|
||||
*items() {
|
||||
for (const [ref, value] of this._map) {
|
||||
yield [Ref.fromString(ref), value];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function isName(v, name) {
|
||||
|
|
|
@ -119,19 +119,19 @@ class StructTreeRoot {
|
|||
newRefs,
|
||||
}) {
|
||||
const root = pdfManager.catalog.cloneDict();
|
||||
const cache = new RefSetCache();
|
||||
cache.put(catalogRef, root);
|
||||
|
||||
const structTreeRootRef = xref.getNewTemporaryRef();
|
||||
root.set("StructTreeRoot", structTreeRootRef);
|
||||
|
||||
const buffer = [];
|
||||
await writeObject(catalogRef, root, buffer, xref);
|
||||
newRefs.push({ ref: catalogRef, data: buffer.join("") });
|
||||
|
||||
const structTreeRoot = new Dict(xref);
|
||||
structTreeRoot.set("Type", Name.get("StructTreeRoot"));
|
||||
const parentTreeRef = xref.getNewTemporaryRef();
|
||||
structTreeRoot.set("ParentTree", parentTreeRef);
|
||||
const kids = [];
|
||||
structTreeRoot.set("K", kids);
|
||||
cache.put(structTreeRootRef, structTreeRoot);
|
||||
|
||||
const parentTree = new Dict(xref);
|
||||
const nums = [];
|
||||
|
@ -144,18 +144,18 @@ class StructTreeRoot {
|
|||
nums,
|
||||
xref,
|
||||
pdfManager,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
});
|
||||
structTreeRoot.set("ParentTreeNextKey", nextKey);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentTreeRef, parentTree, buffer, xref);
|
||||
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });
|
||||
cache.put(parentTreeRef, parentTree);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
|
||||
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
|
||||
const buffer = [];
|
||||
for (const [ref, obj] of cache.items()) {
|
||||
buffer.length = 0;
|
||||
await writeObject(ref, obj, buffer, xref);
|
||||
newRefs.push({ ref, data: buffer.join("") });
|
||||
}
|
||||
}
|
||||
|
||||
async canUpdateStructTree({ pdfManager, xref, newAnnotationsByPage }) {
|
||||
|
@ -232,6 +232,8 @@ class StructTreeRoot {
|
|||
const xref = this.dict.xref;
|
||||
const structTreeRoot = this.dict.clone();
|
||||
const structTreeRootRef = this.ref;
|
||||
const cache = new RefSetCache();
|
||||
cache.put(structTreeRootRef, structTreeRoot);
|
||||
|
||||
let parentTreeRef = structTreeRoot.getRaw("ParentTree");
|
||||
let parentTree;
|
||||
|
@ -243,6 +245,7 @@ class StructTreeRoot {
|
|||
structTreeRoot.set("ParentTree", parentTreeRef);
|
||||
}
|
||||
parentTree = parentTree.clone();
|
||||
cache.put(parentTreeRef, parentTree);
|
||||
|
||||
let nums = parentTree.getRaw("Nums");
|
||||
let numsRef = null;
|
||||
|
@ -255,47 +258,27 @@ class StructTreeRoot {
|
|||
parentTree.set("Nums", nums);
|
||||
}
|
||||
|
||||
let kids = structTreeRoot.getRaw("K");
|
||||
let kidsRef = null;
|
||||
if (kids instanceof Ref) {
|
||||
kidsRef = kids;
|
||||
kids = xref.fetch(kidsRef);
|
||||
} else {
|
||||
kidsRef = xref.getNewTemporaryRef();
|
||||
structTreeRoot.set("K", kidsRef);
|
||||
}
|
||||
kids = Array.isArray(kids) ? kids.slice() : [kids];
|
||||
|
||||
const buffer = [];
|
||||
const newNextkey = await StructTreeRoot.#writeKids({
|
||||
newAnnotationsByPage,
|
||||
structTreeRootRef,
|
||||
kids,
|
||||
kids: null,
|
||||
nums,
|
||||
xref,
|
||||
pdfManager,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
});
|
||||
structTreeRoot.set("ParentTreeNextKey", newNextkey);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(kidsRef, kids, buffer, xref);
|
||||
newRefs.push({ ref: kidsRef, data: buffer.join("") });
|
||||
|
||||
if (numsRef) {
|
||||
buffer.length = 0;
|
||||
await writeObject(numsRef, nums, buffer, xref);
|
||||
newRefs.push({ ref: numsRef, data: buffer.join("") });
|
||||
cache.put(numsRef, nums);
|
||||
}
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentTreeRef, parentTree, buffer, xref);
|
||||
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
|
||||
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
|
||||
const buffer = [];
|
||||
for (const [ref, obj] of cache.items()) {
|
||||
buffer.length = 0;
|
||||
await writeObject(ref, obj, buffer, xref);
|
||||
newRefs.push({ ref, data: buffer.join("") });
|
||||
}
|
||||
}
|
||||
|
||||
static async #writeKids({
|
||||
|
@ -305,8 +288,7 @@ class StructTreeRoot {
|
|||
nums,
|
||||
xref,
|
||||
pdfManager,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
}) {
|
||||
const objr = Name.get("OBJR");
|
||||
let nextKey = -Infinity;
|
||||
|
@ -349,19 +331,15 @@ class StructTreeRoot {
|
|||
tagDict.set("ActualText", actualText);
|
||||
}
|
||||
|
||||
if (structTreeParent) {
|
||||
await this.#updateParentTag({
|
||||
structTreeParent,
|
||||
tagDict,
|
||||
newTagRef: tagRef,
|
||||
fallbackRef: structTreeRootRef,
|
||||
xref,
|
||||
newRefs,
|
||||
buffer,
|
||||
});
|
||||
} else {
|
||||
tagDict.set("P", structTreeRootRef);
|
||||
}
|
||||
await this.#updateParentTag({
|
||||
structTreeParent,
|
||||
tagDict,
|
||||
newTagRef: tagRef,
|
||||
structTreeRootRef,
|
||||
fallbackKids: kids,
|
||||
xref,
|
||||
cache,
|
||||
});
|
||||
|
||||
const objDict = new Dict(xref);
|
||||
tagDict.set("K", objDict);
|
||||
|
@ -372,23 +350,24 @@ class StructTreeRoot {
|
|||
}
|
||||
objDict.set("Obj", ref);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(tagRef, tagDict, buffer, xref);
|
||||
newRefs.push({ ref: tagRef, data: buffer.join("") });
|
||||
|
||||
cache.put(tagRef, tagDict);
|
||||
nums.push(parentTreeId, tagRef);
|
||||
kids.push(tagRef);
|
||||
}
|
||||
}
|
||||
return nextKey + 1;
|
||||
}
|
||||
|
||||
static #collectParents({ elements, xref, pageDict, numberTree }) {
|
||||
const idToElement = new Map();
|
||||
const idToElements = new Map();
|
||||
for (const element of elements) {
|
||||
if (element.structTreeParentId) {
|
||||
const id = parseInt(element.structTreeParentId.split("_mc")[1], 10);
|
||||
idToElement.set(id, element);
|
||||
let elems = idToElements.get(id);
|
||||
if (!elems) {
|
||||
elems = [];
|
||||
idToElements.set(id, elems);
|
||||
}
|
||||
elems.push(element);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,13 +379,16 @@ class StructTreeRoot {
|
|||
const parentArray = numberTree.get(id);
|
||||
|
||||
const updateElement = (kid, pageKid, kidRef) => {
|
||||
const element = idToElement.get(kid);
|
||||
if (element) {
|
||||
const elems = idToElements.get(kid);
|
||||
if (elems) {
|
||||
const parentRef = pageKid.getRaw("P");
|
||||
const parentDict = xref.fetchIfRef(parentRef);
|
||||
if (parentRef instanceof Ref && parentDict instanceof Dict) {
|
||||
// It should always the case, but we check just in case.
|
||||
element.structTreeParent = { ref: kidRef, dict: pageKid };
|
||||
const params = { ref: kidRef, dict: pageKid };
|
||||
for (const element of elems) {
|
||||
element.structTreeParent = params;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -431,67 +413,73 @@ class StructTreeRoot {
|
|||
if (Number.isInteger(kid) && updateElement(kid, pageKid, kidRef)) {
|
||||
break;
|
||||
}
|
||||
if (!(kid instanceof Dict)) {
|
||||
continue;
|
||||
}
|
||||
if (!isName(kid.get("Type"), "MCR")) {
|
||||
break;
|
||||
}
|
||||
const mcid = kid.get("MCID");
|
||||
if (Number.isInteger(mcid) && updateElement(mcid, pageKid, kidRef)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static async #updateParentTag({
|
||||
structTreeParent: { ref, dict },
|
||||
structTreeParent,
|
||||
tagDict,
|
||||
newTagRef,
|
||||
fallbackRef,
|
||||
structTreeRootRef,
|
||||
fallbackKids,
|
||||
xref,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
}) {
|
||||
// We get the parent of the tag.
|
||||
const parentRef = dict.getRaw("P");
|
||||
let parentDict = xref.fetchIfRef(parentRef);
|
||||
let ref = null;
|
||||
let parentRef;
|
||||
if (structTreeParent) {
|
||||
({ ref } = structTreeParent);
|
||||
|
||||
// We get the parent of the tag.
|
||||
parentRef = structTreeParent.dict.getRaw("P") || structTreeRootRef;
|
||||
} else {
|
||||
parentRef = structTreeRootRef;
|
||||
}
|
||||
|
||||
tagDict.set("P", parentRef);
|
||||
|
||||
// We get the kids in order to insert a new tag at the right position.
|
||||
let saveParentDict = false;
|
||||
let parentKids;
|
||||
let parentKidsRef = parentDict.getRaw("K");
|
||||
if (!(parentKidsRef instanceof Ref)) {
|
||||
parentKids = parentKidsRef;
|
||||
parentKidsRef = xref.getNewTemporaryRef();
|
||||
parentDict = parentDict.clone();
|
||||
parentDict.set("K", parentKidsRef);
|
||||
saveParentDict = true;
|
||||
} else {
|
||||
parentKids = xref.fetch(parentKidsRef);
|
||||
}
|
||||
|
||||
if (Array.isArray(parentKids)) {
|
||||
const index = parentKids.indexOf(ref);
|
||||
if (index >= 0) {
|
||||
parentKids = parentKids.slice();
|
||||
parentKids.splice(index + 1, 0, newTagRef);
|
||||
} else {
|
||||
warn("Cannot update the struct tree: parent kid not found.");
|
||||
tagDict.set("P", fallbackRef);
|
||||
return;
|
||||
}
|
||||
} else if (parentKids instanceof Dict) {
|
||||
parentKids = [parentKidsRef, newTagRef];
|
||||
parentKidsRef = xref.getNewTemporaryRef();
|
||||
parentDict.set("K", parentKidsRef);
|
||||
saveParentDict = true;
|
||||
}
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentKidsRef, parentKids, buffer, xref);
|
||||
newRefs.push({ ref: parentKidsRef, data: buffer.join("") });
|
||||
|
||||
if (!saveParentDict) {
|
||||
const parentDict = xref.fetchIfRef(parentRef);
|
||||
if (!parentDict) {
|
||||
fallbackKids.push(newTagRef);
|
||||
return;
|
||||
}
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentRef, parentDict, buffer, xref);
|
||||
newRefs.push({ ref: parentRef, data: buffer.join("") });
|
||||
let cachedParentDict = cache.get(parentRef);
|
||||
if (!cachedParentDict) {
|
||||
cachedParentDict = parentDict.clone();
|
||||
cache.put(parentRef, cachedParentDict);
|
||||
}
|
||||
const parentKidsRaw = cachedParentDict.getRaw("K");
|
||||
let cachedParentKids =
|
||||
parentKidsRaw instanceof Ref ? cache.get(parentKidsRaw) : null;
|
||||
if (!cachedParentKids) {
|
||||
cachedParentKids = xref.fetchIfRef(parentKidsRaw);
|
||||
cachedParentKids = Array.isArray(cachedParentKids)
|
||||
? cachedParentKids.slice()
|
||||
: [parentKidsRaw];
|
||||
const parentKidsRef = xref.getNewTemporaryRef();
|
||||
cachedParentDict.set("K", parentKidsRef);
|
||||
cache.put(parentKidsRef, cachedParentKids);
|
||||
}
|
||||
|
||||
const index = cachedParentKids.indexOf(ref);
|
||||
cachedParentKids.splice(
|
||||
index >= 0 ? index + 1 : cachedParentKids.length,
|
||||
0,
|
||||
newTagRef
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue