mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 10:08:06 +02:00
Merge pull request #12997 from Snuffleupagus/metadata-worker
Move the Metadata parsing to the worker-thread
This commit is contained in:
commit
4619b1b568
14 changed files with 258 additions and 209 deletions
|
@ -316,8 +316,54 @@ function collectActions(xref, dict, eventType) {
|
|||
return objectSize(actions) > 0 ? actions : null;
|
||||
}
|
||||
|
||||
const XMLEntities = {
|
||||
/* < */ 0x3c: "<",
|
||||
/* > */ 0x3e: ">",
|
||||
/* & */ 0x26: "&",
|
||||
/* " */ 0x22: """,
|
||||
/* ' */ 0x27: "'",
|
||||
};
|
||||
|
||||
function encodeToXmlString(str) {
|
||||
const buffer = [];
|
||||
let start = 0;
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const char = str.codePointAt(i);
|
||||
if (0x20 <= char && char <= 0x7e) {
|
||||
// ascii
|
||||
const entity = XMLEntities[char];
|
||||
if (entity) {
|
||||
if (start < i) {
|
||||
buffer.push(str.substring(start, i));
|
||||
}
|
||||
buffer.push(entity);
|
||||
start = i + 1;
|
||||
}
|
||||
} else {
|
||||
if (start < i) {
|
||||
buffer.push(str.substring(start, i));
|
||||
}
|
||||
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
|
||||
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
|
||||
// char is represented by two u16
|
||||
i++;
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer.length === 0) {
|
||||
return str;
|
||||
}
|
||||
if (start < str.length) {
|
||||
buffer.push(str.substring(start, str.length));
|
||||
}
|
||||
return buffer.join("");
|
||||
}
|
||||
|
||||
export {
|
||||
collectActions,
|
||||
encodeToXmlString,
|
||||
escapePDFName,
|
||||
getArrayLookupTableFactory,
|
||||
getInheritableProperty,
|
||||
|
|
146
src/core/metadata_parser.js
Normal file
146
src/core/metadata_parser.js
Normal file
|
@ -0,0 +1,146 @@
|
|||
/* Copyright 2012 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { SimpleXMLParser } from "./xml_parser.js";
|
||||
|
||||
class MetadataParser {
|
||||
constructor(data) {
|
||||
// Ghostscript may produce invalid metadata, so try to repair that first.
|
||||
data = this._repair(data);
|
||||
|
||||
// Convert the string to an XML document.
|
||||
const parser = new SimpleXMLParser({ lowerCaseName: true });
|
||||
const xmlDocument = parser.parseFromString(data);
|
||||
|
||||
this._metadataMap = new Map();
|
||||
this._data = data;
|
||||
|
||||
if (xmlDocument) {
|
||||
this._parse(xmlDocument);
|
||||
}
|
||||
}
|
||||
|
||||
_repair(data) {
|
||||
// Start by removing any "junk" before the first tag (see issue 10395).
|
||||
return data
|
||||
.replace(/^[^<]+/, "")
|
||||
.replace(/>\\376\\377([^<]+)/g, function (all, codes) {
|
||||
const bytes = codes
|
||||
.replace(/\\([0-3])([0-7])([0-7])/g, function (code, d1, d2, d3) {
|
||||
return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1);
|
||||
})
|
||||
.replace(/&(amp|apos|gt|lt|quot);/g, function (str, name) {
|
||||
switch (name) {
|
||||
case "amp":
|
||||
return "&";
|
||||
case "apos":
|
||||
return "'";
|
||||
case "gt":
|
||||
return ">";
|
||||
case "lt":
|
||||
return "<";
|
||||
case "quot":
|
||||
return '"';
|
||||
}
|
||||
throw new Error(`_repair: ${name} isn't defined.`);
|
||||
});
|
||||
|
||||
const charBuf = [];
|
||||
for (let i = 0, ii = bytes.length; i < ii; i += 2) {
|
||||
const code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1);
|
||||
if (
|
||||
code >= /* Space = */ 32 &&
|
||||
code < /* Delete = */ 127 &&
|
||||
code !== /* '<' = */ 60 &&
|
||||
code !== /* '>' = */ 62 &&
|
||||
code !== /* '&' = */ 38
|
||||
) {
|
||||
charBuf.push(String.fromCharCode(code));
|
||||
} else {
|
||||
charBuf.push(
|
||||
"&#x" + (0x10000 + code).toString(16).substring(1) + ";"
|
||||
);
|
||||
}
|
||||
}
|
||||
return ">" + charBuf.join("");
|
||||
});
|
||||
}
|
||||
|
||||
_getSequence(entry) {
|
||||
const name = entry.nodeName;
|
||||
if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") {
|
||||
return null;
|
||||
}
|
||||
return entry.childNodes.filter(node => node.nodeName === "rdf:li");
|
||||
}
|
||||
|
||||
_parseArray(entry) {
|
||||
if (!entry.hasChildNodes()) {
|
||||
return;
|
||||
}
|
||||
// Child must be a Bag (unordered array) or a Seq.
|
||||
const [seqNode] = entry.childNodes;
|
||||
const sequence = this._getSequence(seqNode) || [];
|
||||
|
||||
this._metadataMap.set(
|
||||
entry.nodeName,
|
||||
sequence.map(node => node.textContent.trim())
|
||||
);
|
||||
}
|
||||
|
||||
_parse(xmlDocument) {
|
||||
let rdf = xmlDocument.documentElement;
|
||||
|
||||
if (rdf.nodeName !== "rdf:rdf") {
|
||||
// Wrapped in <xmpmeta>
|
||||
rdf = rdf.firstChild;
|
||||
while (rdf && rdf.nodeName !== "rdf:rdf") {
|
||||
rdf = rdf.nextSibling;
|
||||
}
|
||||
}
|
||||
|
||||
if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const desc of rdf.childNodes) {
|
||||
if (desc.nodeName !== "rdf:description") {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const entry of desc.childNodes) {
|
||||
const name = entry.nodeName;
|
||||
switch (name) {
|
||||
case "#text":
|
||||
continue;
|
||||
case "dc:creator":
|
||||
case "dc:subject":
|
||||
this._parseArray(entry);
|
||||
continue;
|
||||
}
|
||||
this._metadataMap.set(name, entry.textContent.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
get serializable() {
|
||||
return {
|
||||
parsedData: this._metadataMap,
|
||||
rawData: this._data,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export { MetadataParser };
|
|
@ -59,6 +59,7 @@ import { Lexer, Parser } from "./parser.js";
|
|||
import { CipherTransformFactory } from "./crypto.js";
|
||||
import { ColorSpace } from "./colorspace.js";
|
||||
import { GlobalImageCache } from "./image_utils.js";
|
||||
import { MetadataParser } from "./metadata_parser.js";
|
||||
|
||||
function fetchDestination(dest) {
|
||||
return isDict(dest) ? dest.get("D") : dest;
|
||||
|
@ -131,20 +132,22 @@ class Catalog {
|
|||
this.xref.encrypt && this.xref.encrypt.encryptMetadata
|
||||
);
|
||||
const stream = this.xref.fetch(streamRef, suppressEncryption);
|
||||
let metadata;
|
||||
let metadata = null;
|
||||
|
||||
if (stream && isDict(stream.dict)) {
|
||||
if (isStream(stream) && isDict(stream.dict)) {
|
||||
const type = stream.dict.get("Type");
|
||||
const subtype = stream.dict.get("Subtype");
|
||||
|
||||
if (isName(type, "Metadata") && isName(subtype, "XML")) {
|
||||
// XXX: This should examine the charset the XML document defines,
|
||||
// however since there are currently no real means to decode
|
||||
// arbitrary charsets, let's just hope that the author of the PDF
|
||||
// was reasonable enough to stick with the XML default charset,
|
||||
// which is UTF-8.
|
||||
// however since there are currently no real means to decode arbitrary
|
||||
// charsets, let's just hope that the author of the PDF was reasonable
|
||||
// enough to stick with the XML default charset, which is UTF-8.
|
||||
try {
|
||||
metadata = stringToUTF8String(bytesToString(stream.getBytes()));
|
||||
const data = stringToUTF8String(bytesToString(stream.getBytes()));
|
||||
if (data) {
|
||||
metadata = new MetadataParser(data).serializable;
|
||||
}
|
||||
} catch (e) {
|
||||
if (e instanceof MissingDataException) {
|
||||
throw e;
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
import { bytesToString, escapeString, warn } from "../shared/util.js";
|
||||
import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js";
|
||||
import { escapePDFName, parseXFAPath } from "./core_utils.js";
|
||||
import { SimpleDOMNode, SimpleXMLParser } from "../shared/xml_parser.js";
|
||||
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
|
||||
import { calculateMD5 } from "./crypto.js";
|
||||
|
||||
function writeDict(dict, buffer, transform) {
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
*/
|
||||
|
||||
import { $clean, $finalize, $onChild, $onText } from "./xfa_object.js";
|
||||
import { XMLParserBase, XMLParserErrorCode } from "../../shared/xml_parser.js";
|
||||
import { XMLParserBase, XMLParserErrorCode } from "../xml_parser.js";
|
||||
import { Builder } from "./builder.js";
|
||||
import { warn } from "../../shared/util.js";
|
||||
|
||||
|
|
506
src/core/xml_parser.js
Normal file
506
src/core/xml_parser.js
Normal file
|
@ -0,0 +1,506 @@
|
|||
/* Copyright 2018 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// The code for XMLParserBase copied from
|
||||
// https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts
|
||||
|
||||
import { encodeToXmlString } from "./core_utils.js";
|
||||
|
||||
const XMLParserErrorCode = {
|
||||
NoError: 0,
|
||||
EndOfDocument: -1,
|
||||
UnterminatedCdat: -2,
|
||||
UnterminatedXmlDeclaration: -3,
|
||||
UnterminatedDoctypeDeclaration: -4,
|
||||
UnterminatedComment: -5,
|
||||
MalformedElement: -6,
|
||||
OutOfMemory: -7,
|
||||
UnterminatedAttributeValue: -8,
|
||||
UnterminatedElement: -9,
|
||||
ElementNeverBegun: -10,
|
||||
};
|
||||
|
||||
function isWhitespace(s, index) {
|
||||
const ch = s[index];
|
||||
return ch === " " || ch === "\n" || ch === "\r" || ch === "\t";
|
||||
}
|
||||
|
||||
function isWhitespaceString(s) {
|
||||
for (let i = 0, ii = s.length; i < ii; i++) {
|
||||
if (!isWhitespace(s, i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
class XMLParserBase {
|
||||
_resolveEntities(s) {
|
||||
return s.replace(/&([^;]+);/g, (all, entity) => {
|
||||
if (entity.substring(0, 2) === "#x") {
|
||||
return String.fromCodePoint(parseInt(entity.substring(2), 16));
|
||||
} else if (entity.substring(0, 1) === "#") {
|
||||
return String.fromCodePoint(parseInt(entity.substring(1), 10));
|
||||
}
|
||||
switch (entity) {
|
||||
case "lt":
|
||||
return "<";
|
||||
case "gt":
|
||||
return ">";
|
||||
case "amp":
|
||||
return "&";
|
||||
case "quot":
|
||||
return '"';
|
||||
case "apos":
|
||||
return "'";
|
||||
}
|
||||
return this.onResolveEntity(entity);
|
||||
});
|
||||
}
|
||||
|
||||
_parseContent(s, start) {
|
||||
const attributes = [];
|
||||
let pos = start;
|
||||
|
||||
function skipWs() {
|
||||
while (pos < s.length && isWhitespace(s, pos)) {
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
while (
|
||||
pos < s.length &&
|
||||
!isWhitespace(s, pos) &&
|
||||
s[pos] !== ">" &&
|
||||
s[pos] !== "/"
|
||||
) {
|
||||
++pos;
|
||||
}
|
||||
const name = s.substring(start, pos);
|
||||
skipWs();
|
||||
while (
|
||||
pos < s.length &&
|
||||
s[pos] !== ">" &&
|
||||
s[pos] !== "/" &&
|
||||
s[pos] !== "?"
|
||||
) {
|
||||
skipWs();
|
||||
let attrName = "",
|
||||
attrValue = "";
|
||||
while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== "=") {
|
||||
attrName += s[pos];
|
||||
++pos;
|
||||
}
|
||||
skipWs();
|
||||
if (s[pos] !== "=") {
|
||||
return null;
|
||||
}
|
||||
++pos;
|
||||
skipWs();
|
||||
const attrEndChar = s[pos];
|
||||
if (attrEndChar !== '"' && attrEndChar !== "'") {
|
||||
return null;
|
||||
}
|
||||
const attrEndIndex = s.indexOf(attrEndChar, ++pos);
|
||||
if (attrEndIndex < 0) {
|
||||
return null;
|
||||
}
|
||||
attrValue = s.substring(pos, attrEndIndex);
|
||||
attributes.push({
|
||||
name: attrName,
|
||||
value: this._resolveEntities(attrValue),
|
||||
});
|
||||
pos = attrEndIndex + 1;
|
||||
skipWs();
|
||||
}
|
||||
return {
|
||||
name,
|
||||
attributes,
|
||||
parsed: pos - start,
|
||||
};
|
||||
}
|
||||
|
||||
_parseProcessingInstruction(s, start) {
|
||||
let pos = start;
|
||||
|
||||
function skipWs() {
|
||||
while (pos < s.length && isWhitespace(s, pos)) {
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
while (
|
||||
pos < s.length &&
|
||||
!isWhitespace(s, pos) &&
|
||||
s[pos] !== ">" &&
|
||||
s[pos] !== "/"
|
||||
) {
|
||||
++pos;
|
||||
}
|
||||
const name = s.substring(start, pos);
|
||||
skipWs();
|
||||
const attrStart = pos;
|
||||
while (pos < s.length && (s[pos] !== "?" || s[pos + 1] !== ">")) {
|
||||
++pos;
|
||||
}
|
||||
const value = s.substring(attrStart, pos);
|
||||
return {
|
||||
name,
|
||||
value,
|
||||
parsed: pos - start,
|
||||
};
|
||||
}
|
||||
|
||||
parseXml(s) {
|
||||
let i = 0;
|
||||
while (i < s.length) {
|
||||
const ch = s[i];
|
||||
let j = i;
|
||||
if (ch === "<") {
|
||||
++j;
|
||||
const ch2 = s[j];
|
||||
let q;
|
||||
switch (ch2) {
|
||||
case "/":
|
||||
++j;
|
||||
q = s.indexOf(">", j);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedElement);
|
||||
return;
|
||||
}
|
||||
this.onEndElement(s.substring(j, q));
|
||||
j = q + 1;
|
||||
break;
|
||||
case "?":
|
||||
++j;
|
||||
const pi = this._parseProcessingInstruction(s, j);
|
||||
if (s.substring(j + pi.parsed, j + pi.parsed + 2) !== "?>") {
|
||||
this.onError(XMLParserErrorCode.UnterminatedXmlDeclaration);
|
||||
return;
|
||||
}
|
||||
this.onPi(pi.name, pi.value);
|
||||
j += pi.parsed + 2;
|
||||
break;
|
||||
case "!":
|
||||
if (s.substring(j + 1, j + 3) === "--") {
|
||||
q = s.indexOf("-->", j + 3);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedComment);
|
||||
return;
|
||||
}
|
||||
this.onComment(s.substring(j + 3, q));
|
||||
j = q + 3;
|
||||
} else if (s.substring(j + 1, j + 8) === "[CDATA[") {
|
||||
q = s.indexOf("]]>", j + 8);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedCdat);
|
||||
return;
|
||||
}
|
||||
this.onCdata(s.substring(j + 8, q));
|
||||
j = q + 3;
|
||||
} else if (s.substring(j + 1, j + 8) === "DOCTYPE") {
|
||||
const q2 = s.indexOf("[", j + 8);
|
||||
let complexDoctype = false;
|
||||
q = s.indexOf(">", j + 8);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
|
||||
return;
|
||||
}
|
||||
if (q2 > 0 && q > q2) {
|
||||
q = s.indexOf("]>", j + 8);
|
||||
if (q < 0) {
|
||||
this.onError(
|
||||
XMLParserErrorCode.UnterminatedDoctypeDeclaration
|
||||
);
|
||||
return;
|
||||
}
|
||||
complexDoctype = true;
|
||||
}
|
||||
const doctypeContent = s.substring(
|
||||
j + 8,
|
||||
q + (complexDoctype ? 1 : 0)
|
||||
);
|
||||
this.onDoctype(doctypeContent);
|
||||
j = q + (complexDoctype ? 2 : 1);
|
||||
} else {
|
||||
this.onError(XMLParserErrorCode.MalformedElement);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
const content = this._parseContent(s, j);
|
||||
if (content === null) {
|
||||
this.onError(XMLParserErrorCode.MalformedElement);
|
||||
return;
|
||||
}
|
||||
let isClosed = false;
|
||||
if (
|
||||
s.substring(j + content.parsed, j + content.parsed + 2) === "/>"
|
||||
) {
|
||||
isClosed = true;
|
||||
} else if (
|
||||
s.substring(j + content.parsed, j + content.parsed + 1) !== ">"
|
||||
) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedElement);
|
||||
return;
|
||||
}
|
||||
this.onBeginElement(content.name, content.attributes, isClosed);
|
||||
j += content.parsed + (isClosed ? 2 : 1);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
while (j < s.length && s[j] !== "<") {
|
||||
j++;
|
||||
}
|
||||
const text = s.substring(i, j);
|
||||
this.onText(this._resolveEntities(text));
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
onResolveEntity(name) {
|
||||
return `&${name};`;
|
||||
}
|
||||
|
||||
onPi(name, value) {}
|
||||
|
||||
onComment(text) {}
|
||||
|
||||
onCdata(text) {}
|
||||
|
||||
onDoctype(doctypeContent) {}
|
||||
|
||||
onText(text) {}
|
||||
|
||||
onBeginElement(name, attributes, isEmpty) {}
|
||||
|
||||
onEndElement(name) {}
|
||||
|
||||
onError(code) {}
|
||||
}
|
||||
|
||||
class SimpleDOMNode {
|
||||
constructor(nodeName, nodeValue) {
|
||||
this.nodeName = nodeName;
|
||||
this.nodeValue = nodeValue;
|
||||
|
||||
Object.defineProperty(this, "parentNode", { value: null, writable: true });
|
||||
}
|
||||
|
||||
get firstChild() {
|
||||
return this.childNodes && this.childNodes[0];
|
||||
}
|
||||
|
||||
get nextSibling() {
|
||||
const childNodes = this.parentNode.childNodes;
|
||||
if (!childNodes) {
|
||||
return undefined;
|
||||
}
|
||||
const index = childNodes.indexOf(this);
|
||||
if (index === -1) {
|
||||
return undefined;
|
||||
}
|
||||
return childNodes[index + 1];
|
||||
}
|
||||
|
||||
get textContent() {
|
||||
if (!this.childNodes) {
|
||||
return this.nodeValue || "";
|
||||
}
|
||||
return this.childNodes
|
||||
.map(function (child) {
|
||||
return child.textContent;
|
||||
})
|
||||
.join("");
|
||||
}
|
||||
|
||||
hasChildNodes() {
|
||||
return this.childNodes && this.childNodes.length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search a node in the tree with the given path
|
||||
* foo.bar[nnn], i.e. find the nnn-th node named
|
||||
* bar under a node named foo.
|
||||
*
|
||||
* @param {Array} paths - an array of objects as
|
||||
* returned by {parseXFAPath}.
|
||||
* @param {number} pos - the current position in
|
||||
* the paths array.
|
||||
* @returns {SimpleDOMNode} The node corresponding
|
||||
* to the path or null if not found.
|
||||
*/
|
||||
searchNode(paths, pos) {
|
||||
if (pos >= paths.length) {
|
||||
return this;
|
||||
}
|
||||
|
||||
const component = paths[pos];
|
||||
const stack = [];
|
||||
let node = this;
|
||||
|
||||
while (true) {
|
||||
if (component.name === node.nodeName) {
|
||||
if (component.pos === 0) {
|
||||
const res = node.searchNode(paths, pos + 1);
|
||||
if (res !== null) {
|
||||
return res;
|
||||
}
|
||||
} else if (stack.length === 0) {
|
||||
return null;
|
||||
} else {
|
||||
const [parent] = stack.pop();
|
||||
let siblingPos = 0;
|
||||
for (const child of parent.childNodes) {
|
||||
if (component.name === child.nodeName) {
|
||||
if (siblingPos === component.pos) {
|
||||
return child.searchNode(paths, pos + 1);
|
||||
}
|
||||
siblingPos++;
|
||||
}
|
||||
}
|
||||
// We didn't find the correct sibling
|
||||
// so just return the first found node
|
||||
return node.searchNode(paths, pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (node.childNodes && node.childNodes.length !== 0) {
|
||||
stack.push([node, 0]);
|
||||
node = node.childNodes[0];
|
||||
} else if (stack.length === 0) {
|
||||
return null;
|
||||
} else {
|
||||
while (stack.length !== 0) {
|
||||
const [parent, currentPos] = stack.pop();
|
||||
const newPos = currentPos + 1;
|
||||
if (newPos < parent.childNodes.length) {
|
||||
stack.push([parent, newPos]);
|
||||
node = parent.childNodes[newPos];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (stack.length === 0) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dump(buffer) {
|
||||
if (this.nodeName === "#text") {
|
||||
buffer.push(encodeToXmlString(this.nodeValue));
|
||||
return;
|
||||
}
|
||||
|
||||
buffer.push(`<${this.nodeName}`);
|
||||
if (this.attributes) {
|
||||
for (const attribute of this.attributes) {
|
||||
buffer.push(
|
||||
` ${attribute.name}="${encodeToXmlString(attribute.value)}"`
|
||||
);
|
||||
}
|
||||
}
|
||||
if (this.hasChildNodes()) {
|
||||
buffer.push(">");
|
||||
for (const child of this.childNodes) {
|
||||
child.dump(buffer);
|
||||
}
|
||||
buffer.push(`</${this.nodeName}>`);
|
||||
} else if (this.nodeValue) {
|
||||
buffer.push(`>${encodeToXmlString(this.nodeValue)}</${this.nodeName}>`);
|
||||
} else {
|
||||
buffer.push("/>");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class SimpleXMLParser extends XMLParserBase {
|
||||
constructor({ hasAttributes = false, lowerCaseName = false }) {
|
||||
super();
|
||||
this._currentFragment = null;
|
||||
this._stack = null;
|
||||
this._errorCode = XMLParserErrorCode.NoError;
|
||||
this._hasAttributes = hasAttributes;
|
||||
this._lowerCaseName = lowerCaseName;
|
||||
}
|
||||
|
||||
parseFromString(data) {
|
||||
this._currentFragment = [];
|
||||
this._stack = [];
|
||||
this._errorCode = XMLParserErrorCode.NoError;
|
||||
|
||||
this.parseXml(data);
|
||||
|
||||
if (this._errorCode !== XMLParserErrorCode.NoError) {
|
||||
return undefined; // return undefined on error
|
||||
}
|
||||
|
||||
// We should only have one root.
|
||||
const [documentElement] = this._currentFragment;
|
||||
if (!documentElement) {
|
||||
return undefined; // Return undefined if no root was found.
|
||||
}
|
||||
return { documentElement };
|
||||
}
|
||||
|
||||
onText(text) {
|
||||
if (isWhitespaceString(text)) {
|
||||
return;
|
||||
}
|
||||
const node = new SimpleDOMNode("#text", text);
|
||||
this._currentFragment.push(node);
|
||||
}
|
||||
|
||||
onCdata(text) {
|
||||
const node = new SimpleDOMNode("#text", text);
|
||||
this._currentFragment.push(node);
|
||||
}
|
||||
|
||||
onBeginElement(name, attributes, isEmpty) {
|
||||
if (this._lowerCaseName) {
|
||||
name = name.toLowerCase();
|
||||
}
|
||||
const node = new SimpleDOMNode(name);
|
||||
node.childNodes = [];
|
||||
if (this._hasAttributes) {
|
||||
node.attributes = attributes;
|
||||
}
|
||||
this._currentFragment.push(node);
|
||||
if (isEmpty) {
|
||||
return;
|
||||
}
|
||||
this._stack.push(this._currentFragment);
|
||||
this._currentFragment = node.childNodes;
|
||||
}
|
||||
|
||||
onEndElement(name) {
|
||||
this._currentFragment = this._stack.pop() || [];
|
||||
const lastElement = this._currentFragment[this._currentFragment.length - 1];
|
||||
if (!lastElement) {
|
||||
return;
|
||||
}
|
||||
for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
|
||||
lastElement.childNodes[i].parentNode = lastElement;
|
||||
}
|
||||
}
|
||||
|
||||
onError(code) {
|
||||
this._errorCode = code;
|
||||
}
|
||||
}
|
||||
|
||||
export { SimpleDOMNode, SimpleXMLParser, XMLParserBase, XMLParserErrorCode };
|
Loading…
Add table
Add a link
Reference in a new issue