From d366bbdf5137681a492ff14495b5c56fc6bad069 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 16 Feb 2021 14:13:51 +0100 Subject: [PATCH] Move the `encodeToXmlString` helper function to `src/core/core_utils.js` With the previous patch this function is now *only* accessed on the worker-thread, hence it's no longer necessary to include it in the *built* `pdf.js` file. --- src/core/core_utils.js | 46 ++++++++++++++++++++++++++++++++++ src/core/xml_parser.js | 2 +- src/shared/util.js | 48 ------------------------------------ test/unit/core_utils_spec.js | 15 +++++++++++ test/unit/util_spec.js | 15 ----------- 5 files changed, 62 insertions(+), 64 deletions(-) diff --git a/src/core/core_utils.js b/src/core/core_utils.js index cb205f141..70309919e 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -316,8 +316,54 @@ function collectActions(xref, dict, eventType) { return objectSize(actions) > 0 ? actions : null; } +const XMLEntities = { + /* < */ 0x3c: "<", + /* > */ 0x3e: ">", + /* & */ 0x26: "&", + /* " */ 0x22: """, + /* ' */ 0x27: "'", +}; + +function encodeToXmlString(str) { + const buffer = []; + let start = 0; + for (let i = 0, ii = str.length; i < ii; i++) { + const char = str.codePointAt(i); + if (0x20 <= char && char <= 0x7e) { + // ascii + const entity = XMLEntities[char]; + if (entity) { + if (start < i) { + buffer.push(str.substring(start, i)); + } + buffer.push(entity); + start = i + 1; + } + } else { + if (start < i) { + buffer.push(str.substring(start, i)); + } + buffer.push(`&#x${char.toString(16).toUpperCase()};`); + if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) { + // char is represented by two u16 + i++; + } + start = i + 1; + } + } + + if (buffer.length === 0) { + return str; + } + if (start < str.length) { + buffer.push(str.substring(start, str.length)); + } + return buffer.join(""); +} + export { collectActions, + encodeToXmlString, escapePDFName, getArrayLookupTableFactory, getInheritableProperty, diff --git a/src/core/xml_parser.js b/src/core/xml_parser.js index 96bf6d3df..bf053f156 100644 --- a/src/core/xml_parser.js +++ b/src/core/xml_parser.js @@ -16,7 +16,7 @@ // The code for XMLParserBase copied from // https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts -import { encodeToXmlString } from "../shared/util.js"; +import { encodeToXmlString } from "./core_utils.js"; const XMLParserErrorCode = { NoError: 0, diff --git a/src/shared/util.js b/src/shared/util.js index 391072709..e0159595a 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -967,53 +967,6 @@ const createObjectURL = (function createObjectURLClosure() { }; })(); -const XMLEntities = { - /* < */ 0x3c: "<", - /* > */ 0x3e: ">", - /* & */ 0x26: "&", - /* " */ 0x22: """, - /* ' */ 0x27: "'", -}; - -function encodeToXmlString(str) { - const buffer = []; - let start = 0; - for (let i = 0, ii = str.length; i < ii; i++) { - const char = str.codePointAt(i); - if (0x20 <= char && char <= 0x7e) { - // ascii - const entity = XMLEntities[char]; - if (entity) { - if (start < i) { - buffer.push(str.substring(start, i)); - } - buffer.push(entity); - start = i + 1; - } - } else { - if (start < i) { - buffer.push(str.substring(start, i)); - } - buffer.push(`&#x${char.toString(16).toUpperCase()};`); - if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) { - // char is represented by two u16 - i++; - } - start = i + 1; - } - } - - if (buffer.length === 0) { - return str; - } - - if (start < str.length) { - buffer.push(str.substring(start, str.length)); - } - - return buffer.join(""); -} - export { AbortException, AnnotationActionEventType, @@ -1035,7 +988,6 @@ export { createPromiseCapability, createValidAbsoluteUrl, DocumentActionEventType, - encodeToXmlString, escapeString, FONT_IDENTITY_MATRIX, FontType, diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js index 033df42df..4a597d075 100644 --- a/test/unit/core_utils_spec.js +++ b/test/unit/core_utils_spec.js @@ -15,6 +15,7 @@ import { Dict, Ref } from "../../src/core/primitives.js"; import { + encodeToXmlString, escapePDFName, getInheritableProperty, isWhiteSpace, @@ -218,4 +219,18 @@ describe("core_utils", function () { ); }); }); + + describe("encodeToXmlString", function () { + it("should get a correctly encoded string with some entities", function () { + const str = "\"\u0397ell😂' & "; + expect(encodeToXmlString(str)).toEqual( + ""Ηell😂' & <W😂rld>" + ); + }); + + it("should get a correctly encoded basic ascii string", function () { + const str = "hello world"; + expect(encodeToXmlString(str)).toEqual(str); + }); + }); }); diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js index 52c9f41d7..3ac30e717 100644 --- a/test/unit/util_spec.js +++ b/test/unit/util_spec.js @@ -17,7 +17,6 @@ import { bytesToString, createPromiseCapability, createValidAbsoluteUrl, - encodeToXmlString, escapeString, getModificationDate, isArrayBuffer, @@ -335,20 +334,6 @@ describe("util", function () { }); }); - describe("encodeToXmlString", function () { - it("should get a correctly encoded string with some entities", function () { - const str = "\"\u0397ell😂' & "; - expect(encodeToXmlString(str)).toEqual( - ""Ηell😂' & <W😂rld>" - ); - }); - - it("should get a correctly encoded basic ascii string", function () { - const str = "hello world"; - expect(encodeToXmlString(str)).toEqual(str); - }); - }); - describe("isAscii", function () { it("handles ascii/non-ascii strings", function () { expect(isAscii("hello world")).toEqual(true);