1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 22:58:07 +02:00

Correctly compute the mapping between text and normalized text when it contains a compound word on two lines

It fixes #19120.

The original text doesn't contain the cr so we must take that into account.
This commit is contained in:
Calixte Denizet 2024-11-28 15:56:00 +01:00
parent 22babd722f
commit aa9503e51f
4 changed files with 27 additions and 5 deletions

View file

@ -282,8 +282,7 @@ function normalize(text) {
if (p5) {
// Compound word with a line break after the hyphen.
positions.push([i - shift + 3, 1 + shift]);
shift += 1;
// Since the \n isn't in the original text, o = 3 and n = 3.
shiftOrigin += 1;
eol += 1;
return p5.replace("\n", "");