1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

Fix search in pdf a containing some UTF-32 characters (bug 1820909)

Some chars were supposed to have a length equals to 1 but UTF-32 chars
can be longuer.
This commit is contained in:
Calixte Denizet 2023-03-09 14:13:28 +01:00
parent a0ef5a4ae1
commit 07b094729e
6 changed files with 66 additions and 5 deletions

View file

@ -576,3 +576,4 @@
!bug1770750.pdf
!issue16063.pdf
!issue16067.pdf
!bug1820909.1.pdf

BIN
test/pdfs/bug1820909.1.pdf Executable file

Binary file not shown.

View file

@ -0,0 +1,2 @@
https://web.archive.org/web/20221122204959/https://www.unicode.org/charts/PDF/U31350.pdf

View file

@ -7455,5 +7455,12 @@
"rounds": 1,
"link": true,
"type": "eq"
},
{
"id": "bug1820909",
"file": "pdfs/bug1820909.pdf",
"md5": "d95a83a868671a03cbf322f16b2e2b9d",
"link": true,
"type": "other"
}
]

View file

@ -854,4 +854,50 @@ describe("pdf_find_controller", function () {
pageMatchesLength: [[7]],
});
});
it("performs a search in a text with some UTF-32 chars", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
const { eventBus, pdfFindController } = await initPdfFindController(
"bug1820909.pdf"
);
await testSearch({
eventBus,
pdfFindController,
state: {
query: "31350",
},
matchesPerPage: [1, 2],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[41], [131, 1359]],
pageMatchesLength: [[5], [5, 5]],
});
});
it("performs a search in a text with some UTF-32 chars followed by a dash at the end of a line", async function () {
const { eventBus, pdfFindController } = await initPdfFindController(
"bug1820909.1.pdf"
);
await testSearch({
eventBus,
pdfFindController,
state: {
query: "abcde",
},
matchesPerPage: [2],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[42, 95]],
pageMatchesLength: [[5, 5]],
});
});
});