1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Keep the empty lines in the text fields

It fixes #18036.
This commit is contained in:
Calixte Denizet 2024-10-04 21:29:08 +02:00
parent 6aef332bcd
commit 3194f3de8b
4 changed files with 35 additions and 4 deletions

View file

@ -2299,7 +2299,7 @@ class WidgetAnnotation extends Annotation {
}
assert(typeof value === "string", "Expected `value` to be a string.");
value = value.trim();
value = value.trimEnd();
if (this.data.combo) {
// The value can be one of the exportValue or any other values.
@ -2934,6 +2934,28 @@ class TextWidgetAnnotation extends WidgetAnnotation {
return chunks;
}
async extractTextContent(evaluator, task, viewBox) {
await super.extractTextContent(evaluator, task, viewBox);
const text = this.data.textContent;
if (!text) {
return;
}
// The text extractor doesn't handle empty lines correctly, so if the
// content we get is more or less (modulo whitespaces) the same as the
// field value we just ignore it.
const allText = text.join("\n");
if (allText === this.data.fieldValue) {
return;
}
const regex = allText.replaceAll(/([.*+?^${}()|[\]\\])|(\s+)/g, (_m, p1) =>
p1 ? `\\${p1}` : "\\s+"
);
if (new RegExp(`^\\s*${regex}\\s*$`).test(this.data.fieldValue)) {
this.data.textContent = this.data.fieldValue.split("\n");
}
}
getFieldObject() {
return {
id: this.data.id,

View file

@ -674,3 +674,4 @@
!issue18072.pdf
!stamps.pdf
!issue15096.pdf
!issue18036.pdf

BIN
test/pdfs/issue18036.pdf Executable file

Binary file not shown.

View file

@ -8477,13 +8477,13 @@
"value": "Hello World"
},
"33R": {
"value": "Hello World\nDlrow Olleh\nHello World"
"value": "\nHello World\nDlrow Olleh\nHello World"
},
"36R": {
"value": "Hello World\nDlrow Olleh\nHello World"
"value": "\nHello World\nDlrow Olleh\nHello World"
},
"39R": {
"value": "Hello World\nDlrow Olleh\nHello World"
"value": "\nHello World\nDlrow Olleh\nHello World"
}
}
},
@ -10684,5 +10684,13 @@
"value": false
}
}
},
{
"id": "issue18036",
"file": "pdfs/issue18036.pdf",
"md5": "940ad97fc1a6f8d288a213af80313c7e",
"rounds": 1,
"type": "eq",
"annotations": true
}
]