1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-24 09:08:07 +02:00

Allow searching for number-number on two lines

When a dash separates two digits, it's very likely to not be a hyphen
inserted to split a word into two lines (e.g. "par\n-ser"), but rather
either a minus sign, a range, or a date. For example, in the tracemonkey
PDF there is `2008-02` (a date) split across two lines.

Preserving the dash, similarly to how we do for compound words, allows
searches for "2008-02" to find a match.
This commit is contained in:
Nicolò Ribaudo 2025-01-14 17:54:37 +01:00
parent 016de74229
commit 8358ab63b3
No known key found for this signature in database
GPG key ID: AAFDA9101C58F338
2 changed files with 78 additions and 30 deletions

View file

@ -1104,6 +1104,40 @@ describe("pdf_find_controller", function () {
});
});
it("performs a search with a dash between two digits", async () => {
const { eventBus, pdfFindController } = await initPdfFindController();
await testSearch({
eventBus,
pdfFindController,
state: {
query: "2008-02",
},
matchesPerPage: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
selectedMatch: {
pageIndex: 13,
matchIndex: 0,
},
pageMatches: [[], [], [], [], [], [], [], [], [], [], [], [], [], [314]],
pageMatchesLength: [
[],
[],
[],
[],
[],
[],
[],
[],
[],
[],
[],
[],
[],
[7],
],
});
});
describe("custom matcher", () => {
it("calls to the matcher with the right arguments", async () => {
const QUERY = "Foo bar";