mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 22:58:07 +02:00
Merge pull request #17986 from calixteman/fix_struct_tree
Allow to insert several annotations under the same parent in the structure tree
This commit is contained in:
commit
d1f494d68c
6 changed files with 243 additions and 112 deletions
|
@ -378,6 +378,12 @@ class RefSetCache {
|
|||
clear() {
|
||||
this._map.clear();
|
||||
}
|
||||
|
||||
*items() {
|
||||
for (const [ref, value] of this._map) {
|
||||
yield [Ref.fromString(ref), value];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function isName(v, name) {
|
||||
|
|
|
@ -119,19 +119,19 @@ class StructTreeRoot {
|
|||
newRefs,
|
||||
}) {
|
||||
const root = pdfManager.catalog.cloneDict();
|
||||
const cache = new RefSetCache();
|
||||
cache.put(catalogRef, root);
|
||||
|
||||
const structTreeRootRef = xref.getNewTemporaryRef();
|
||||
root.set("StructTreeRoot", structTreeRootRef);
|
||||
|
||||
const buffer = [];
|
||||
await writeObject(catalogRef, root, buffer, xref);
|
||||
newRefs.push({ ref: catalogRef, data: buffer.join("") });
|
||||
|
||||
const structTreeRoot = new Dict(xref);
|
||||
structTreeRoot.set("Type", Name.get("StructTreeRoot"));
|
||||
const parentTreeRef = xref.getNewTemporaryRef();
|
||||
structTreeRoot.set("ParentTree", parentTreeRef);
|
||||
const kids = [];
|
||||
structTreeRoot.set("K", kids);
|
||||
cache.put(structTreeRootRef, structTreeRoot);
|
||||
|
||||
const parentTree = new Dict(xref);
|
||||
const nums = [];
|
||||
|
@ -144,18 +144,18 @@ class StructTreeRoot {
|
|||
nums,
|
||||
xref,
|
||||
pdfManager,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
});
|
||||
structTreeRoot.set("ParentTreeNextKey", nextKey);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentTreeRef, parentTree, buffer, xref);
|
||||
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });
|
||||
cache.put(parentTreeRef, parentTree);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
|
||||
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
|
||||
const buffer = [];
|
||||
for (const [ref, obj] of cache.items()) {
|
||||
buffer.length = 0;
|
||||
await writeObject(ref, obj, buffer, xref);
|
||||
newRefs.push({ ref, data: buffer.join("") });
|
||||
}
|
||||
}
|
||||
|
||||
async canUpdateStructTree({ pdfManager, xref, newAnnotationsByPage }) {
|
||||
|
@ -232,6 +232,8 @@ class StructTreeRoot {
|
|||
const xref = this.dict.xref;
|
||||
const structTreeRoot = this.dict.clone();
|
||||
const structTreeRootRef = this.ref;
|
||||
const cache = new RefSetCache();
|
||||
cache.put(structTreeRootRef, structTreeRoot);
|
||||
|
||||
let parentTreeRef = structTreeRoot.getRaw("ParentTree");
|
||||
let parentTree;
|
||||
|
@ -243,6 +245,7 @@ class StructTreeRoot {
|
|||
structTreeRoot.set("ParentTree", parentTreeRef);
|
||||
}
|
||||
parentTree = parentTree.clone();
|
||||
cache.put(parentTreeRef, parentTree);
|
||||
|
||||
let nums = parentTree.getRaw("Nums");
|
||||
let numsRef = null;
|
||||
|
@ -255,47 +258,27 @@ class StructTreeRoot {
|
|||
parentTree.set("Nums", nums);
|
||||
}
|
||||
|
||||
let kids = structTreeRoot.getRaw("K");
|
||||
let kidsRef = null;
|
||||
if (kids instanceof Ref) {
|
||||
kidsRef = kids;
|
||||
kids = xref.fetch(kidsRef);
|
||||
} else {
|
||||
kidsRef = xref.getNewTemporaryRef();
|
||||
structTreeRoot.set("K", kidsRef);
|
||||
}
|
||||
kids = Array.isArray(kids) ? kids.slice() : [kids];
|
||||
|
||||
const buffer = [];
|
||||
const newNextkey = await StructTreeRoot.#writeKids({
|
||||
newAnnotationsByPage,
|
||||
structTreeRootRef,
|
||||
kids,
|
||||
kids: null,
|
||||
nums,
|
||||
xref,
|
||||
pdfManager,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
});
|
||||
structTreeRoot.set("ParentTreeNextKey", newNextkey);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(kidsRef, kids, buffer, xref);
|
||||
newRefs.push({ ref: kidsRef, data: buffer.join("") });
|
||||
|
||||
if (numsRef) {
|
||||
buffer.length = 0;
|
||||
await writeObject(numsRef, nums, buffer, xref);
|
||||
newRefs.push({ ref: numsRef, data: buffer.join("") });
|
||||
cache.put(numsRef, nums);
|
||||
}
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentTreeRef, parentTree, buffer, xref);
|
||||
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
|
||||
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
|
||||
const buffer = [];
|
||||
for (const [ref, obj] of cache.items()) {
|
||||
buffer.length = 0;
|
||||
await writeObject(ref, obj, buffer, xref);
|
||||
newRefs.push({ ref, data: buffer.join("") });
|
||||
}
|
||||
}
|
||||
|
||||
static async #writeKids({
|
||||
|
@ -305,8 +288,7 @@ class StructTreeRoot {
|
|||
nums,
|
||||
xref,
|
||||
pdfManager,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
}) {
|
||||
const objr = Name.get("OBJR");
|
||||
let nextKey = -Infinity;
|
||||
|
@ -349,19 +331,15 @@ class StructTreeRoot {
|
|||
tagDict.set("ActualText", actualText);
|
||||
}
|
||||
|
||||
if (structTreeParent) {
|
||||
await this.#updateParentTag({
|
||||
structTreeParent,
|
||||
tagDict,
|
||||
newTagRef: tagRef,
|
||||
fallbackRef: structTreeRootRef,
|
||||
xref,
|
||||
newRefs,
|
||||
buffer,
|
||||
});
|
||||
} else {
|
||||
tagDict.set("P", structTreeRootRef);
|
||||
}
|
||||
await this.#updateParentTag({
|
||||
structTreeParent,
|
||||
tagDict,
|
||||
newTagRef: tagRef,
|
||||
structTreeRootRef,
|
||||
fallbackKids: kids,
|
||||
xref,
|
||||
cache,
|
||||
});
|
||||
|
||||
const objDict = new Dict(xref);
|
||||
tagDict.set("K", objDict);
|
||||
|
@ -372,23 +350,24 @@ class StructTreeRoot {
|
|||
}
|
||||
objDict.set("Obj", ref);
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(tagRef, tagDict, buffer, xref);
|
||||
newRefs.push({ ref: tagRef, data: buffer.join("") });
|
||||
|
||||
cache.put(tagRef, tagDict);
|
||||
nums.push(parentTreeId, tagRef);
|
||||
kids.push(tagRef);
|
||||
}
|
||||
}
|
||||
return nextKey + 1;
|
||||
}
|
||||
|
||||
static #collectParents({ elements, xref, pageDict, numberTree }) {
|
||||
const idToElement = new Map();
|
||||
const idToElements = new Map();
|
||||
for (const element of elements) {
|
||||
if (element.structTreeParentId) {
|
||||
const id = parseInt(element.structTreeParentId.split("_mc")[1], 10);
|
||||
idToElement.set(id, element);
|
||||
let elems = idToElements.get(id);
|
||||
if (!elems) {
|
||||
elems = [];
|
||||
idToElements.set(id, elems);
|
||||
}
|
||||
elems.push(element);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,13 +379,16 @@ class StructTreeRoot {
|
|||
const parentArray = numberTree.get(id);
|
||||
|
||||
const updateElement = (kid, pageKid, kidRef) => {
|
||||
const element = idToElement.get(kid);
|
||||
if (element) {
|
||||
const elems = idToElements.get(kid);
|
||||
if (elems) {
|
||||
const parentRef = pageKid.getRaw("P");
|
||||
const parentDict = xref.fetchIfRef(parentRef);
|
||||
if (parentRef instanceof Ref && parentDict instanceof Dict) {
|
||||
// It should always the case, but we check just in case.
|
||||
element.structTreeParent = { ref: kidRef, dict: pageKid };
|
||||
const params = { ref: kidRef, dict: pageKid };
|
||||
for (const element of elems) {
|
||||
element.structTreeParent = params;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -431,67 +413,73 @@ class StructTreeRoot {
|
|||
if (Number.isInteger(kid) && updateElement(kid, pageKid, kidRef)) {
|
||||
break;
|
||||
}
|
||||
if (!(kid instanceof Dict)) {
|
||||
continue;
|
||||
}
|
||||
if (!isName(kid.get("Type"), "MCR")) {
|
||||
break;
|
||||
}
|
||||
const mcid = kid.get("MCID");
|
||||
if (Number.isInteger(mcid) && updateElement(mcid, pageKid, kidRef)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static async #updateParentTag({
|
||||
structTreeParent: { ref, dict },
|
||||
structTreeParent,
|
||||
tagDict,
|
||||
newTagRef,
|
||||
fallbackRef,
|
||||
structTreeRootRef,
|
||||
fallbackKids,
|
||||
xref,
|
||||
newRefs,
|
||||
buffer,
|
||||
cache,
|
||||
}) {
|
||||
// We get the parent of the tag.
|
||||
const parentRef = dict.getRaw("P");
|
||||
let parentDict = xref.fetchIfRef(parentRef);
|
||||
let ref = null;
|
||||
let parentRef;
|
||||
if (structTreeParent) {
|
||||
({ ref } = structTreeParent);
|
||||
|
||||
// We get the parent of the tag.
|
||||
parentRef = structTreeParent.dict.getRaw("P") || structTreeRootRef;
|
||||
} else {
|
||||
parentRef = structTreeRootRef;
|
||||
}
|
||||
|
||||
tagDict.set("P", parentRef);
|
||||
|
||||
// We get the kids in order to insert a new tag at the right position.
|
||||
let saveParentDict = false;
|
||||
let parentKids;
|
||||
let parentKidsRef = parentDict.getRaw("K");
|
||||
if (!(parentKidsRef instanceof Ref)) {
|
||||
parentKids = parentKidsRef;
|
||||
parentKidsRef = xref.getNewTemporaryRef();
|
||||
parentDict = parentDict.clone();
|
||||
parentDict.set("K", parentKidsRef);
|
||||
saveParentDict = true;
|
||||
} else {
|
||||
parentKids = xref.fetch(parentKidsRef);
|
||||
}
|
||||
|
||||
if (Array.isArray(parentKids)) {
|
||||
const index = parentKids.indexOf(ref);
|
||||
if (index >= 0) {
|
||||
parentKids = parentKids.slice();
|
||||
parentKids.splice(index + 1, 0, newTagRef);
|
||||
} else {
|
||||
warn("Cannot update the struct tree: parent kid not found.");
|
||||
tagDict.set("P", fallbackRef);
|
||||
return;
|
||||
}
|
||||
} else if (parentKids instanceof Dict) {
|
||||
parentKids = [parentKidsRef, newTagRef];
|
||||
parentKidsRef = xref.getNewTemporaryRef();
|
||||
parentDict.set("K", parentKidsRef);
|
||||
saveParentDict = true;
|
||||
}
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentKidsRef, parentKids, buffer, xref);
|
||||
newRefs.push({ ref: parentKidsRef, data: buffer.join("") });
|
||||
|
||||
if (!saveParentDict) {
|
||||
const parentDict = xref.fetchIfRef(parentRef);
|
||||
if (!parentDict) {
|
||||
fallbackKids.push(newTagRef);
|
||||
return;
|
||||
}
|
||||
|
||||
buffer.length = 0;
|
||||
await writeObject(parentRef, parentDict, buffer, xref);
|
||||
newRefs.push({ ref: parentRef, data: buffer.join("") });
|
||||
let cachedParentDict = cache.get(parentRef);
|
||||
if (!cachedParentDict) {
|
||||
cachedParentDict = parentDict.clone();
|
||||
cache.put(parentRef, cachedParentDict);
|
||||
}
|
||||
const parentKidsRaw = cachedParentDict.getRaw("K");
|
||||
let cachedParentKids =
|
||||
parentKidsRaw instanceof Ref ? cache.get(parentKidsRaw) : null;
|
||||
if (!cachedParentKids) {
|
||||
cachedParentKids = xref.fetchIfRef(parentKidsRaw);
|
||||
cachedParentKids = Array.isArray(cachedParentKids)
|
||||
? cachedParentKids.slice()
|
||||
: [parentKidsRaw];
|
||||
const parentKidsRef = xref.getNewTemporaryRef();
|
||||
cachedParentDict.set("K", parentKidsRef);
|
||||
cache.put(parentKidsRef, cachedParentKids);
|
||||
}
|
||||
|
||||
const index = cachedParentKids.indexOf(ref);
|
||||
cachedParentKids.splice(
|
||||
index >= 0 ? index + 1 : cachedParentKids.length,
|
||||
0,
|
||||
newTagRef
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
|
@ -645,3 +645,4 @@
|
|||
!issue12213.pdf
|
||||
!tracemonkey_freetext.pdf
|
||||
!issue17998.pdf
|
||||
!pdfjs_wikipedia.pdf
|
||||
|
|
BIN
test/pdfs/pdfjs_wikipedia.pdf
Executable file
BIN
test/pdfs/pdfjs_wikipedia.pdf
Executable file
Binary file not shown.
|
@ -1030,6 +1030,20 @@ describe("api", function () {
|
|||
await pdfLoadingTask.destroy();
|
||||
});
|
||||
|
||||
function findNode(parent, node, index, check) {
|
||||
if (check(node)) {
|
||||
return [parent.children[index - 1], node];
|
||||
}
|
||||
for (let i = 0; i < node.children?.length ?? 0; i++) {
|
||||
const child = node.children[i];
|
||||
const elements = findNode(node, child, i, check);
|
||||
if (elements) {
|
||||
return elements;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
it("gets number of pages", function () {
|
||||
expect(pdfDocument.numPages).toEqual(3);
|
||||
});
|
||||
|
@ -2396,7 +2410,22 @@ describe("api", function () {
|
|||
pdfDoc = await loadingTask.promise;
|
||||
const page = await pdfDoc.getPage(1);
|
||||
const tree = await page.getStructTree();
|
||||
const leaf = tree.children[0].children[6].children[1];
|
||||
const [predecessor, leaf] = findNode(
|
||||
null,
|
||||
tree,
|
||||
0,
|
||||
node => node.role === "Figure"
|
||||
);
|
||||
|
||||
expect(predecessor).toEqual({
|
||||
role: "Span",
|
||||
children: [
|
||||
{
|
||||
type: "content",
|
||||
id: "p3R_mc12",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(leaf).toEqual({
|
||||
role: "Figure",
|
||||
|
@ -2412,6 +2441,104 @@ describe("api", function () {
|
|||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("write a new stamp annotation in a tagged pdf (with some MCIDs), save and check the structure tree", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Cannot create a bitmap from Node.js.");
|
||||
}
|
||||
|
||||
const TEST_IMAGES_PATH = "../images/";
|
||||
const filename = "firefox_logo.png";
|
||||
const path = new URL(TEST_IMAGES_PATH + filename, window.location).href;
|
||||
|
||||
const response = await fetch(path);
|
||||
const blob = await response.blob();
|
||||
const bitmap = await createImageBitmap(blob);
|
||||
|
||||
let loadingTask = getDocument(
|
||||
buildGetDocumentParams("pdfjs_wikipedia.pdf")
|
||||
);
|
||||
let pdfDoc = await loadingTask.promise;
|
||||
for (let i = 0; i < 2; i++) {
|
||||
pdfDoc.annotationStorage.setValue(`pdfjs_internal_editor_${i}`, {
|
||||
annotationType: AnnotationEditorType.STAMP,
|
||||
bitmapId: `im${i}`,
|
||||
pageIndex: 0,
|
||||
rect: [257 + i, 572 + i, 286 + i, 603 + i],
|
||||
rotation: 0,
|
||||
isSvg: false,
|
||||
structTreeParentId: "p2R_mc155",
|
||||
accessibilityData: {
|
||||
type: "Figure",
|
||||
alt: `Firefox logo ${i}`,
|
||||
},
|
||||
bitmap: structuredClone(bitmap),
|
||||
});
|
||||
}
|
||||
|
||||
const data = await pdfDoc.saveDocument();
|
||||
await loadingTask.destroy();
|
||||
|
||||
loadingTask = getDocument(data);
|
||||
pdfDoc = await loadingTask.promise;
|
||||
const page = await pdfDoc.getPage(1);
|
||||
const tree = await page.getStructTree();
|
||||
|
||||
let [predecessor, figure] = findNode(
|
||||
null,
|
||||
tree,
|
||||
0,
|
||||
node => node.role === "Figure" && node.alt === "Firefox logo 1"
|
||||
);
|
||||
expect(predecessor).toEqual({
|
||||
role: "NonStruct",
|
||||
children: [
|
||||
{
|
||||
type: "content",
|
||||
id: "p2R_mc155",
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(figure).toEqual({
|
||||
role: "Figure",
|
||||
children: [
|
||||
{
|
||||
type: "annotation",
|
||||
id: "pdfjs_internal_id_420R",
|
||||
},
|
||||
],
|
||||
alt: "Firefox logo 1",
|
||||
});
|
||||
|
||||
[predecessor, figure] = findNode(
|
||||
null,
|
||||
tree,
|
||||
0,
|
||||
node => node.role === "Figure" && node.alt === "Firefox logo 0"
|
||||
);
|
||||
expect(predecessor).toEqual({
|
||||
role: "Figure",
|
||||
children: [
|
||||
{
|
||||
type: "annotation",
|
||||
id: "pdfjs_internal_id_420R",
|
||||
},
|
||||
],
|
||||
alt: "Firefox logo 1",
|
||||
});
|
||||
expect(figure).toEqual({
|
||||
role: "Figure",
|
||||
children: [
|
||||
{
|
||||
type: "annotation",
|
||||
id: "pdfjs_internal_id_416R",
|
||||
},
|
||||
],
|
||||
alt: "Firefox logo 0",
|
||||
});
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("write a new stamp annotation in a tagged pdf, save, repeat and check the structure tree", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Cannot create a bitmap from Node.js.");
|
||||
|
|
|
@ -498,6 +498,15 @@ describe("primitives", function () {
|
|||
cache.put(ref2, obj2);
|
||||
expect([...cache]).toEqual([obj1, obj2]);
|
||||
});
|
||||
|
||||
it("should support iteration over key-value pairs", function () {
|
||||
cache.put(ref1, obj1);
|
||||
cache.put(ref2, obj2);
|
||||
expect([...cache.items()]).toEqual([
|
||||
[ref1, obj1],
|
||||
[ref2, obj2],
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isName", function () {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue