mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
[api-minor] Change the "dc:creator" Metadata field to an Array
- add scripting support for doc.info.authors - doc.info.metadata is the raw string with xml code
This commit is contained in:
parent
35845d1bbb
commit
43d5512f5c
10 changed files with 97 additions and 33 deletions
|
@ -130,9 +130,7 @@ function updateXFA(datasetsRef, newRefs, xref) {
|
|||
}
|
||||
const datasets = xref.fetchIfRef(datasetsRef);
|
||||
const str = bytesToString(datasets.getBytes());
|
||||
const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString(
|
||||
str
|
||||
);
|
||||
const xml = new SimpleXMLParser({ hasAttributes: true }).parseFromString(str);
|
||||
|
||||
for (const { xfa } of newRefs) {
|
||||
if (!xfa) {
|
||||
|
|
|
@ -24,7 +24,7 @@ class Metadata {
|
|||
data = this._repair(data);
|
||||
|
||||
// Convert the string to an XML document.
|
||||
const parser = new SimpleXMLParser();
|
||||
const parser = new SimpleXMLParser({ lowerCaseName: true });
|
||||
const xmlDocument = parser.parseFromString(data);
|
||||
|
||||
this._metadataMap = new Map();
|
||||
|
@ -32,6 +32,7 @@ class Metadata {
|
|||
if (xmlDocument) {
|
||||
this._parse(xmlDocument);
|
||||
}
|
||||
this._data = data;
|
||||
}
|
||||
|
||||
_repair(data) {
|
||||
|
@ -79,40 +80,71 @@ class Metadata {
|
|||
});
|
||||
}
|
||||
|
||||
_getSequence(entry) {
|
||||
const name = entry.nodeName;
|
||||
if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") {
|
||||
return null;
|
||||
}
|
||||
|
||||
return entry.childNodes.filter(node => node.nodeName === "rdf:li");
|
||||
}
|
||||
|
||||
_getCreators(entry) {
|
||||
if (entry.nodeName !== "dc:creator") {
|
||||
return false;
|
||||
}
|
||||
if (!entry.hasChildNodes()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Child must be a Bag (unordered array) or a Seq.
|
||||
const seqNode = entry.childNodes[0];
|
||||
const authors = this._getSequence(seqNode) || [];
|
||||
this._metadataMap.set(
|
||||
entry.nodeName,
|
||||
authors.map(node => node.textContent.trim())
|
||||
);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
_parse(xmlDocument) {
|
||||
let rdf = xmlDocument.documentElement;
|
||||
|
||||
if (rdf.nodeName.toLowerCase() !== "rdf:rdf") {
|
||||
if (rdf.nodeName !== "rdf:rdf") {
|
||||
// Wrapped in <xmpmeta>
|
||||
rdf = rdf.firstChild;
|
||||
while (rdf && rdf.nodeName.toLowerCase() !== "rdf:rdf") {
|
||||
while (rdf && rdf.nodeName !== "rdf:rdf") {
|
||||
rdf = rdf.nextSibling;
|
||||
}
|
||||
}
|
||||
|
||||
const nodeName = rdf ? rdf.nodeName.toLowerCase() : null;
|
||||
if (!rdf || nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
|
||||
if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const children = rdf.childNodes;
|
||||
for (let i = 0, ii = children.length; i < ii; i++) {
|
||||
const desc = children[i];
|
||||
if (desc.nodeName.toLowerCase() !== "rdf:description") {
|
||||
for (const desc of rdf.childNodes) {
|
||||
if (desc.nodeName !== "rdf:description") {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (let j = 0, jj = desc.childNodes.length; j < jj; j++) {
|
||||
if (desc.childNodes[j].nodeName.toLowerCase() !== "#text") {
|
||||
const entry = desc.childNodes[j];
|
||||
const name = entry.nodeName.toLowerCase();
|
||||
|
||||
this._metadataMap.set(name, entry.textContent.trim());
|
||||
for (const entry of desc.childNodes) {
|
||||
const name = entry.nodeName;
|
||||
if (name === "#text") {
|
||||
continue;
|
||||
}
|
||||
if (this._getCreators(entry)) {
|
||||
continue;
|
||||
}
|
||||
this._metadataMap.set(name, entry.textContent.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getRaw() {
|
||||
return this._data;
|
||||
}
|
||||
|
||||
get(name) {
|
||||
return this._metadataMap.has(name) ? this._metadataMap.get(name) : null;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ class Doc extends PDFObject {
|
|||
this._dirty = false;
|
||||
this._disclosed = false;
|
||||
this._media = undefined;
|
||||
this._metadata = data.metadata;
|
||||
this._metadata = data.metadata || "";
|
||||
this._noautocomplete = undefined;
|
||||
this._nocache = undefined;
|
||||
this._spellDictionaryOrder = [];
|
||||
|
@ -74,12 +74,13 @@ class Doc extends PDFObject {
|
|||
// and they're are read-only.
|
||||
this._info = new Proxy(
|
||||
{
|
||||
title: this.title,
|
||||
author: this.author,
|
||||
subject: this.subject,
|
||||
keywords: this.keywords,
|
||||
creator: this.creator,
|
||||
producer: this.producer,
|
||||
title: this._title,
|
||||
author: this._author,
|
||||
authors: data.authors || [this._author],
|
||||
subject: this._subject,
|
||||
keywords: this._keywords,
|
||||
creator: this._creator,
|
||||
producer: this._producer,
|
||||
creationdate: this._creationDate,
|
||||
moddate: this._modDate,
|
||||
trapped: data.Trapped || "Unknown",
|
||||
|
|
|
@ -427,12 +427,13 @@ class SimpleDOMNode {
|
|||
}
|
||||
|
||||
class SimpleXMLParser extends XMLParserBase {
|
||||
constructor(hasAttributes = false) {
|
||||
constructor({ hasAttributes = false, lowerCaseName = false }) {
|
||||
super();
|
||||
this._currentFragment = null;
|
||||
this._stack = null;
|
||||
this._errorCode = XMLParserErrorCode.NoError;
|
||||
this._hasAttributes = hasAttributes;
|
||||
this._lowerCaseName = lowerCaseName;
|
||||
}
|
||||
|
||||
parseFromString(data) {
|
||||
|
@ -476,6 +477,9 @@ class SimpleXMLParser extends XMLParserBase {
|
|||
}
|
||||
|
||||
onBeginElement(name, attributes, isEmpty) {
|
||||
if (this._lowerCaseName) {
|
||||
name = name.toLowerCase();
|
||||
}
|
||||
const node = new SimpleDOMNode(name);
|
||||
node.childNodes = [];
|
||||
if (this._hasAttributes) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue