1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

Keep the empty lines in the text fields

It fixes #18036.
This commit is contained in:
Calixte Denizet 2024-10-04 21:29:08 +02:00
parent 6aef332bcd
commit 3194f3de8b
4 changed files with 35 additions and 4 deletions

View file

@ -2299,7 +2299,7 @@ class WidgetAnnotation extends Annotation {
}
assert(typeof value === "string", "Expected `value` to be a string.");
value = value.trim();
value = value.trimEnd();
if (this.data.combo) {
// The value can be one of the exportValue or any other values.
@ -2934,6 +2934,28 @@ class TextWidgetAnnotation extends WidgetAnnotation {
return chunks;
}
async extractTextContent(evaluator, task, viewBox) {
await super.extractTextContent(evaluator, task, viewBox);
const text = this.data.textContent;
if (!text) {
return;
}
// The text extractor doesn't handle empty lines correctly, so if the
// content we get is more or less (modulo whitespaces) the same as the
// field value we just ignore it.
const allText = text.join("\n");
if (allText === this.data.fieldValue) {
return;
}
const regex = allText.replaceAll(/([.*+?^${}()|[\]\\])|(\s+)/g, (_m, p1) =>
p1 ? `\\${p1}` : "\\s+"
);
if (new RegExp(`^\\s*${regex}\\s*$`).test(this.data.fieldValue)) {
this.data.textContent = this.data.fieldValue.split("\n");
}
}
getFieldObject() {
return {
id: this.data.id,