1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Merge pull request #13908 from brendandahl/xfa-find

[api-minor] XFA - Support text search in XFA documents.
This commit is contained in:
Brendan Dahl 2021-08-23 08:53:02 -07:00 committed by GitHub
commit bf5a45ce6d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 531 additions and 238 deletions

View file

@ -62,6 +62,7 @@ import { MessageHandler } from "../shared/message_handler.js";
import { Metadata } from "./metadata.js";
import { OptionalContentConfig } from "./optional_content_config.js";
import { PDFDataTransportStream } from "./transport_stream.js";
import { XfaText } from "./xfa_text.js";
const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536
const RENDERING_CANCELLED_TIMEOUT = 100; // ms
@ -1531,6 +1532,13 @@ class PDFPageProxy {
* {@link TextContent} object that represents the page's text content.
*/
getTextContent(params = {}) {
if (this._transport._htmlForXfa) {
// TODO: We need to revisit this once the XFA foreground patch lands and
// only do this for non-foreground XFA.
return this.getXfa().then(xfa => {
return XfaText.textContent(xfa);
});
}
const readableStream = this.streamTextContent(params);
return new Promise(function (resolve, reject) {

View file

@ -13,6 +13,8 @@
* limitations under the License.
*/
import { XfaText } from "./xfa_text.js";
class XfaLayer {
static setupStorage(html, id, element, storage, intent) {
const storedData = storage.getValue(id, { value: null });
@ -127,6 +129,9 @@ class XfaLayer {
// Set defaults.
rootDiv.setAttribute("class", "xfaLayer xfaFont");
// Text nodes used for the text highlighter.
const textDivs = [];
while (stack.length > 0) {
const [parent, i, html] = stack[stack.length - 1];
if (i + 1 === parent.children.length) {
@ -141,7 +146,9 @@ class XfaLayer {
const { name } = child;
if (name === "#text") {
html.appendChild(document.createTextNode(child.value));
const node = document.createTextNode(child.value);
textDivs.push(node);
html.appendChild(node);
continue;
}
@ -160,7 +167,11 @@ class XfaLayer {
if (child.children && child.children.length > 0) {
stack.push([child, -1, childHtml]);
} else if (child.value) {
childHtml.appendChild(document.createTextNode(child.value));
const node = document.createTextNode(child.value);
if (XfaText.shouldBuildText(name)) {
textDivs.push(node);
}
childHtml.appendChild(node);
}
}
@ -185,6 +196,10 @@ class XfaLayer {
)) {
el.setAttribute("readOnly", true);
}
return {
textDivs,
};
}
/**

79
src/display/xfa_text.js Normal file
View file

@ -0,0 +1,79 @@
/* Copyright 2021 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
class XfaText {
/**
* Walk an XFA tree and create an array of text nodes that is compatible
* with a regular PDFs TextContent. Currently, only TextItem.str is supported,
* all other fields and styles haven't been implemented.
*
* @param {Object} xfa - An XFA fake DOM object.
*
* @returns {TextContent}
*/
static textContent(xfa) {
const items = [];
const output = {
items,
styles: Object.create(null),
};
function walk(node) {
if (!node) {
return;
}
let str = null;
const name = node.name;
if (name === "#text") {
str = node.value;
} else if (!XfaText.shouldBuildText(name)) {
return;
} else if (node?.attributes?.textContent) {
str = node.attributes.textContent;
} else if (node.value) {
str = node.value;
}
if (str !== null) {
items.push({
str,
});
}
if (!node.children) {
return;
}
for (const child of node.children) {
walk(child);
}
}
walk(xfa);
return output;
}
/**
* @param {string} name - DOM node name. (lower case)
*
* @returns {boolean} true if the DOM node should have a corresponding text
* node.
*/
static shouldBuildText(name) {
return !(
name === "textarea" ||
name === "input" ||
name === "option" ||
name === "select"
);
}
}
export { XfaText };