1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-25 17:48:07 +02:00

Support rich content in markup annotation

- use the xfa parser but in the xhtml namespace.
This commit is contained in:
Calixte Denizet 2021-10-24 17:29:30 +02:00
parent 0e7614df7f
commit cf8dc750d6
14 changed files with 188 additions and 39 deletions

View file

@ -55,6 +55,7 @@ import { ObjectLoader } from "./object_loader.js";
import { OperatorList } from "./operator_list.js";
import { StringStream } from "./stream.js";
import { writeDict } from "./writer.js";
import { XFAFactory } from "./xfa/factory.js";
class AnnotationFactory {
/**
@ -1098,6 +1099,10 @@ class MarkupAnnotation extends Annotation {
this.data.color = null;
}
}
if (dict.has("RC")) {
this.data.richText = XFAFactory.getRichTextAsHtml(dict.get("RC"));
}
}
/**
@ -2545,6 +2550,10 @@ class PopupAnnotation extends Annotation {
this.setContents(parentItem.get("Contents"));
this.data.contentsObj = this._contents;
if (parentItem.has("RC")) {
this.data.richText = XFAFactory.getRichTextAsHtml(parentItem.get("RC"));
}
}
}

View file

@ -66,7 +66,7 @@ class Empty extends XFAObject {
}
class Builder {
constructor() {
constructor(rootNameSpace = null) {
this._namespaceStack = [];
this._nsAgnosticLevel = 0;
@ -76,7 +76,8 @@ class Builder {
this._nextNsId = Math.max(
...Object.values(NamespaceIds).map(({ id }) => id)
);
this._currentNamespace = new UnknownNamespace(++this._nextNsId);
this._currentNamespace =
rootNameSpace || new UnknownNamespace(++this._nextNsId);
}
buildRoot(ids) {

View file

@ -13,13 +13,20 @@
* limitations under the License.
*/
import { $globalData, $toHTML } from "./xfa_object.js";
import {
$appendChild,
$globalData,
$nodeName,
$text,
$toHTML,
} from "./xfa_object.js";
import { Binder } from "./bind.js";
import { DataHandler } from "./data.js";
import { FontFinder } from "./fonts.js";
import { stripQuotes } from "./utils.js";
import { warn } from "../../shared/util.js";
import { XFAParser } from "./parser.js";
import { XhtmlNamespace } from "./xhtml.js";
class XFAFactory {
constructor(data) {
@ -106,6 +113,43 @@ class XFAFactory {
}
return Object.values(data).join("");
}
static getRichTextAsHtml(rc) {
if (!rc || typeof rc !== "string") {
return null;
}
try {
let root = new XFAParser(XhtmlNamespace, /* richText */ true).parse(rc);
if (!["body", "xhtml"].includes(root[$nodeName])) {
// No body, so create one.
const newRoot = XhtmlNamespace.body({});
newRoot[$appendChild](root);
root = newRoot;
}
const result = root[$toHTML]();
if (!result.success) {
return null;
}
const { html } = result;
const { attributes } = html;
if (attributes) {
if (attributes.class) {
attributes.class = attributes.class.filter(
attr => !attr.startsWith("xfa")
);
}
attributes.dir = "auto";
}
return { html, str: root[$text]() };
} catch (e) {
warn(`XFA - an error occurred during parsing of rich text: ${e}`);
}
return null;
}
}
export { XFAFactory };

View file

@ -606,10 +606,16 @@ function setPara(node, nodeStyle, value) {
}
function setFontFamily(xfaFont, node, fontFinder, style) {
const name = stripQuotes(xfaFont.typeface);
const typeface = fontFinder.find(name);
if (!fontFinder) {
// The font cannot be found in the pdf so use the default one.
delete style.fontFamily;
return;
}
const name = stripQuotes(xfaFont.typeface);
style.fontFamily = `"${name}"`;
const typeface = fontFinder.find(name);
if (typeface) {
const { fontFamily } = typeface.regular.cssFontInfo;
if (fontFamily !== name) {

View file

@ -30,9 +30,9 @@ import { Builder } from "./builder.js";
import { warn } from "../../shared/util.js";
class XFAParser extends XMLParserBase {
constructor() {
constructor(rootNameSpace = null, richText = false) {
super();
this._builder = new Builder();
this._builder = new Builder(rootNameSpace);
this._stack = [];
this._globalData = {
usedTypefaces: new Set(),
@ -42,6 +42,7 @@ class XFAParser extends XMLParserBase {
this._errorCode = XMLParserErrorCode.NoError;
this._whiteRegex = /^\s+$/;
this._nbsps = /\xa0+/g;
this._richText = richText;
}
parse(data) {
@ -60,8 +61,8 @@ class XFAParser extends XMLParserBase {
// Normally by definition a &nbsp is unbreakable
// but in real life Acrobat can break strings on &nbsp.
text = text.replace(this._nbsps, match => match.slice(1) + " ");
if (this._current[$acceptWhitespace]()) {
this._current[$onText](text);
if (this._richText || this._current[$acceptWhitespace]()) {
this._current[$onText](text, this._richText);
return;
}

View file

@ -20,6 +20,7 @@ import {
$content,
$extra,
$getChildren,
$getParent,
$globalData,
$nodeName,
$onText,
@ -38,6 +39,7 @@ import {
import { getMeasurement, HTMLResult, stripQuotes } from "./utils.js";
const XHTML_NS_ID = NamespaceIds.xhtml.id;
const $richText = Symbol();
const VALID_STYLES = new Set([
"color",
@ -109,6 +111,7 @@ const StyleMapping = new Map([
const spacesRegExp = /\s+/g;
const crlfRegExp = /[\r\n]+/g;
const crlfForRichTextRegExp = /\r\n?/g;
function mapStyle(styleStr, node) {
const style = Object.create(null);
@ -185,6 +188,7 @@ const NoWhites = new Set(["body", "html"]);
class XhtmlObject extends XmlObject {
constructor(attributes, name) {
super(XHTML_NS_ID, name);
this[$richText] = false;
this.style = attributes.style || "";
}
@ -197,11 +201,16 @@ class XhtmlObject extends XmlObject {
return !NoWhites.has(this[$nodeName]);
}
[$onText](str) {
str = str.replace(crlfRegExp, "");
if (!this.style.includes("xfa-spacerun:yes")) {
str = str.replace(spacesRegExp, " ");
[$onText](str, richText = false) {
if (!richText) {
str = str.replace(crlfRegExp, "");
if (!this.style.includes("xfa-spacerun:yes")) {
str = str.replace(spacesRegExp, " ");
}
} else {
this[$richText] = true;
}
if (str) {
this[$content] += str;
}
@ -311,6 +320,15 @@ class XhtmlObject extends XmlObject {
return HTMLResult.EMPTY;
}
let value;
if (this[$richText]) {
value = this[$content]
? this[$content].replace(crlfForRichTextRegExp, "\n")
: undefined;
} else {
value = this[$content] || undefined;
}
return HTMLResult.success({
name: this[$nodeName],
attributes: {
@ -318,7 +336,7 @@ class XhtmlObject extends XmlObject {
style: mapStyle(this.style, this),
},
children,
value: this[$content] || "",
value,
});
}
}
@ -457,6 +475,10 @@ class P extends XhtmlObject {
}
[$text]() {
const siblings = this[$getParent]()[$getChildren]();
if (siblings[siblings.length - 1] === this) {
return super[$text]();
}
return super[$text]() + "\n";
}
}