mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Merge pull request #13908 from brendandahl/xfa-find
[api-minor] XFA - Support text search in XFA documents.
This commit is contained in:
commit
bf5a45ce6d
11 changed files with 531 additions and 238 deletions
|
@ -62,6 +62,7 @@ import { MessageHandler } from "../shared/message_handler.js";
|
|||
import { Metadata } from "./metadata.js";
|
||||
import { OptionalContentConfig } from "./optional_content_config.js";
|
||||
import { PDFDataTransportStream } from "./transport_stream.js";
|
||||
import { XfaText } from "./xfa_text.js";
|
||||
|
||||
const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536
|
||||
const RENDERING_CANCELLED_TIMEOUT = 100; // ms
|
||||
|
@ -1531,6 +1532,13 @@ class PDFPageProxy {
|
|||
* {@link TextContent} object that represents the page's text content.
|
||||
*/
|
||||
getTextContent(params = {}) {
|
||||
if (this._transport._htmlForXfa) {
|
||||
// TODO: We need to revisit this once the XFA foreground patch lands and
|
||||
// only do this for non-foreground XFA.
|
||||
return this.getXfa().then(xfa => {
|
||||
return XfaText.textContent(xfa);
|
||||
});
|
||||
}
|
||||
const readableStream = this.streamTextContent(params);
|
||||
|
||||
return new Promise(function (resolve, reject) {
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { XfaText } from "./xfa_text.js";
|
||||
|
||||
class XfaLayer {
|
||||
static setupStorage(html, id, element, storage, intent) {
|
||||
const storedData = storage.getValue(id, { value: null });
|
||||
|
@ -127,6 +129,9 @@ class XfaLayer {
|
|||
// Set defaults.
|
||||
rootDiv.setAttribute("class", "xfaLayer xfaFont");
|
||||
|
||||
// Text nodes used for the text highlighter.
|
||||
const textDivs = [];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [parent, i, html] = stack[stack.length - 1];
|
||||
if (i + 1 === parent.children.length) {
|
||||
|
@ -141,7 +146,9 @@ class XfaLayer {
|
|||
|
||||
const { name } = child;
|
||||
if (name === "#text") {
|
||||
html.appendChild(document.createTextNode(child.value));
|
||||
const node = document.createTextNode(child.value);
|
||||
textDivs.push(node);
|
||||
html.appendChild(node);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -160,7 +167,11 @@ class XfaLayer {
|
|||
if (child.children && child.children.length > 0) {
|
||||
stack.push([child, -1, childHtml]);
|
||||
} else if (child.value) {
|
||||
childHtml.appendChild(document.createTextNode(child.value));
|
||||
const node = document.createTextNode(child.value);
|
||||
if (XfaText.shouldBuildText(name)) {
|
||||
textDivs.push(node);
|
||||
}
|
||||
childHtml.appendChild(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,6 +196,10 @@ class XfaLayer {
|
|||
)) {
|
||||
el.setAttribute("readOnly", true);
|
||||
}
|
||||
|
||||
return {
|
||||
textDivs,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
79
src/display/xfa_text.js
Normal file
79
src/display/xfa_text.js
Normal file
|
@ -0,0 +1,79 @@
|
|||
/* Copyright 2021 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
class XfaText {
|
||||
/**
|
||||
* Walk an XFA tree and create an array of text nodes that is compatible
|
||||
* with a regular PDFs TextContent. Currently, only TextItem.str is supported,
|
||||
* all other fields and styles haven't been implemented.
|
||||
*
|
||||
* @param {Object} xfa - An XFA fake DOM object.
|
||||
*
|
||||
* @returns {TextContent}
|
||||
*/
|
||||
static textContent(xfa) {
|
||||
const items = [];
|
||||
const output = {
|
||||
items,
|
||||
styles: Object.create(null),
|
||||
};
|
||||
function walk(node) {
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
let str = null;
|
||||
const name = node.name;
|
||||
if (name === "#text") {
|
||||
str = node.value;
|
||||
} else if (!XfaText.shouldBuildText(name)) {
|
||||
return;
|
||||
} else if (node?.attributes?.textContent) {
|
||||
str = node.attributes.textContent;
|
||||
} else if (node.value) {
|
||||
str = node.value;
|
||||
}
|
||||
if (str !== null) {
|
||||
items.push({
|
||||
str,
|
||||
});
|
||||
}
|
||||
if (!node.children) {
|
||||
return;
|
||||
}
|
||||
for (const child of node.children) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
walk(xfa);
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} name - DOM node name. (lower case)
|
||||
*
|
||||
* @returns {boolean} true if the DOM node should have a corresponding text
|
||||
* node.
|
||||
*/
|
||||
static shouldBuildText(name) {
|
||||
return !(
|
||||
name === "textarea" ||
|
||||
name === "input" ||
|
||||
name === "option" ||
|
||||
name === "select"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export { XfaText };
|
Loading…
Add table
Add a link
Reference in a new issue