mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
XFA - Support text search in XFA documents.
Moves the logic out of TextLayerBuilder to handle highlighting matches into a new separate class `TextHighlighter` that can be used with regular PDFs and XFA PDFs. To mimic the current find functionality in XFA, two arrays from the XFA rendering are created to get the text content and map those to DOM nodes. Fixes #13878
This commit is contained in:
parent
3c8ee25e05
commit
bb47128864
11 changed files with 531 additions and 238 deletions
|
@ -62,6 +62,7 @@ import { MessageHandler } from "../shared/message_handler.js";
|
|||
import { Metadata } from "./metadata.js";
|
||||
import { OptionalContentConfig } from "./optional_content_config.js";
|
||||
import { PDFDataTransportStream } from "./transport_stream.js";
|
||||
import { XfaText } from "./xfa_text.js";
|
||||
|
||||
const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536
|
||||
const RENDERING_CANCELLED_TIMEOUT = 100; // ms
|
||||
|
@ -1561,6 +1562,13 @@ class PDFPageProxy {
|
|||
* {@link TextContent} object that represents the page's text content.
|
||||
*/
|
||||
getTextContent(params = {}) {
|
||||
if (this._transport._htmlForXfa) {
|
||||
// TODO: We need to revisit this once the XFA foreground patch lands and
|
||||
// only do this for non-foreground XFA.
|
||||
return this.getXfa().then(xfa => {
|
||||
return XfaText.textContent(xfa);
|
||||
});
|
||||
}
|
||||
const readableStream = this.streamTextContent(params);
|
||||
|
||||
return new Promise(function (resolve, reject) {
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { XfaText } from "./xfa_text.js";
|
||||
|
||||
class XfaLayer {
|
||||
static setupStorage(html, id, element, storage, intent) {
|
||||
const storedData = storage.getValue(id, { value: null });
|
||||
|
@ -127,6 +129,9 @@ class XfaLayer {
|
|||
// Set defaults.
|
||||
rootDiv.setAttribute("class", "xfaLayer xfaFont");
|
||||
|
||||
// Text nodes used for the text highlighter.
|
||||
const textDivs = [];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [parent, i, html] = stack[stack.length - 1];
|
||||
if (i + 1 === parent.children.length) {
|
||||
|
@ -141,7 +146,9 @@ class XfaLayer {
|
|||
|
||||
const { name } = child;
|
||||
if (name === "#text") {
|
||||
html.appendChild(document.createTextNode(child.value));
|
||||
const node = document.createTextNode(child.value);
|
||||
textDivs.push(node);
|
||||
html.appendChild(node);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -160,7 +167,11 @@ class XfaLayer {
|
|||
if (child.children && child.children.length > 0) {
|
||||
stack.push([child, -1, childHtml]);
|
||||
} else if (child.value) {
|
||||
childHtml.appendChild(document.createTextNode(child.value));
|
||||
const node = document.createTextNode(child.value);
|
||||
if (XfaText.shouldBuildText(name)) {
|
||||
textDivs.push(node);
|
||||
}
|
||||
childHtml.appendChild(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,6 +196,10 @@ class XfaLayer {
|
|||
)) {
|
||||
el.setAttribute("readOnly", true);
|
||||
}
|
||||
|
||||
return {
|
||||
textDivs,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
79
src/display/xfa_text.js
Normal file
79
src/display/xfa_text.js
Normal file
|
@ -0,0 +1,79 @@
|
|||
/* Copyright 2021 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
class XfaText {
|
||||
/**
|
||||
* Walk an XFA tree and create an array of text nodes that is compatible
|
||||
* with a regular PDFs TextContent. Currently, only TextItem.str is supported,
|
||||
* all other fields and styles haven't been implemented.
|
||||
*
|
||||
* @param {Object} xfa - An XFA fake DOM object.
|
||||
*
|
||||
* @returns {TextContent}
|
||||
*/
|
||||
static textContent(xfa) {
|
||||
const items = [];
|
||||
const output = {
|
||||
items,
|
||||
styles: Object.create(null),
|
||||
};
|
||||
function walk(node) {
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
let str = null;
|
||||
const name = node.name;
|
||||
if (name === "#text") {
|
||||
str = node.value;
|
||||
} else if (!XfaText.shouldBuildText(name)) {
|
||||
return;
|
||||
} else if (node?.attributes?.textContent) {
|
||||
str = node.attributes.textContent;
|
||||
} else if (node.value) {
|
||||
str = node.value;
|
||||
}
|
||||
if (str !== null) {
|
||||
items.push({
|
||||
str,
|
||||
});
|
||||
}
|
||||
if (!node.children) {
|
||||
return;
|
||||
}
|
||||
for (const child of node.children) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
walk(xfa);
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} name - DOM node name. (lower case)
|
||||
*
|
||||
* @returns {boolean} true if the DOM node should have a corresponding text
|
||||
* node.
|
||||
*/
|
||||
static shouldBuildText(name) {
|
||||
return !(
|
||||
name === "textarea" ||
|
||||
name === "input" ||
|
||||
name === "option" ||
|
||||
name === "select"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export { XfaText };
|
Loading…
Add table
Add a link
Reference in a new issue