1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Merge pull request #18504 from Snuffleupagus/issue-18503

Ignore non-dictionary nodes when parsing StructTree data (issue 18503)
This commit is contained in:
Tim van der Meij 2024-07-28 13:06:08 +02:00 committed by GitHub
commit 0ef1132563
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 192 additions and 0 deletions

View file

@ -671,6 +671,9 @@ class StructTreePage {
warn("StructTree MAX_DEPTH reached.");
return null;
}
if (!(dict instanceof Dict)) {
return null;
}
if (map.has(dict)) {
return map.get(dict);

View file

@ -0,0 +1 @@
https://github.com/user-attachments/files/16402842/apl_23_003.pdf

View file

@ -15,6 +15,14 @@
"link": true,
"type": "other"
},
{
"id": "issue18503",
"file": "pdfs/issue18503.pdf",
"md5": "b6c7c8db3505d07ce8eabe6712641a94",
"rounds": 1,
"link": true,
"type": "other"
},
{
"id": "filled-background-range",
"file": "pdfs/filled-background.pdf",

View file

@ -3755,6 +3755,186 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets corrupt structure tree with non-dictionary nodes (issue 18503)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("issue18503.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const tree = await pdfPage.getStructTree();
expect(tree).toEqual({
role: "Root",
children: [
{
role: "Document",
lang: "en-US",
children: [
{
role: "Sect",
children: [
{
role: "P",
children: [{ type: "content", id: "p406R_mc2" }],
},
{
role: "Figure",
children: [{ type: "content", id: "p406R_mc11" }],
alt: "d h c s logo",
},
{
role: "Figure",
children: [{ type: "content", id: "p406R_mc1" }],
alt: "Great Seal of the State of California",
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc3" },
{ type: "content", id: "p406R_mc5" },
{ type: "content", id: "p406R_mc7" },
],
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc4" },
{ type: "content", id: "p406R_mc6" },
],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc12" }],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc13" }],
},
{
role: "P",
children: [
{
role: "Span",
children: [
{ type: "content", id: "p406R_mc15" },
{
role: "Note",
children: [{ type: "content", id: "p406R_mc32" }],
},
],
},
{ type: "content", id: "p406R_mc14" },
{ type: "content", id: "p406R_mc16" },
],
},
{
role: "H1",
children: [{ type: "content", id: "p406R_mc17" }],
},
],
},
{
role: "Sect",
children: [
{
role: "H2",
children: [{ type: "content", id: "p406R_mc18" }],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc19" }],
},
],
},
{
role: "Sect",
children: [
{
role: "H2",
children: [{ type: "content", id: "p406R_mc20" }],
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc21" },
{
role: "Span",
children: [
{ type: "content", id: "p406R_mc23" },
{
role: "Note",
children: [
{ type: "content", id: "p406R_mc33" },
{
role: "Link",
children: [
{ type: "object", id: "432R" },
{ type: "content", id: "p406R_mc34" },
],
},
],
},
],
},
{ type: "content", id: "p406R_mc22" },
{ type: "content", id: "p406R_mc24" },
{ type: "content", id: "p406R_mc25" },
{ type: "content", id: "p406R_mc26" },
{
role: "Span",
children: [
{ type: "content", id: "p406R_mc28" },
{
role: "Note",
children: [
{ type: "content", id: "p406R_mc35" },
{
role: "Link",
children: [
{ type: "object", id: "433R" },
{ type: "content", id: "p406R_mc36" },
],
},
{ type: "content", id: "p406R_mc37" },
],
},
],
},
{ type: "content", id: "p406R_mc29" },
{ type: "content", id: "p406R_mc27" },
{ type: "content", id: "p406R_mc30" },
],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc31" }],
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc8" },
{ type: "content", id: "p406R_mc9" },
{
role: "Link",
children: [
{ type: "object", id: "434R" },
{ type: "content", id: "p406R_mc10" },
],
},
],
},
],
},
],
},
],
});
await loadingTask.destroy();
});
it("gets operator list", async function () {
const operatorList = await page.getOperatorList();