From 2619272d73615220f9736ca83b3b3711238ae735 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 3 Apr 2020 10:19:02 +0200 Subject: [PATCH 1/2] Change the signature of `TranslatedFont`, and convert it to a proper class In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments. Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`. --- src/core/evaluator.js | 213 +++++++++++++++++++++--------------------- 1 file changed, 108 insertions(+), 105 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index ec05ce27c..fe79687c5 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -803,11 +803,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { this.handler.send("UnsupportedFeature", { featureId: UNSUPPORTED_FEATURES.font, }); - return new TranslatedFont( - "g_font_error", - new ErrorFont("Type3 font load error: " + reason), - translated.font - ); + return new TranslatedFont({ + loadedName: "g_font_error", + font: new ErrorFont(`Type3 font load error: ${reason}`), + dict: translated.font, + }); }); }) .then(translated => { @@ -958,11 +958,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { loadFont: function PartialEvaluator_loadFont(fontName, font, resources) { function errorFont() { return Promise.resolve( - new TranslatedFont( - "g_font_error", - new ErrorFont("Font " + fontName + " is not available"), - font - ) + new TranslatedFont({ + loadedName: "g_font_error", + font: new ErrorFont(`Font "${fontName}" is not available.`), + dict: font, + }) ); } @@ -1103,7 +1103,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } fontCapability.resolve( - new TranslatedFont(font.loadedName, translatedFont, font) + new TranslatedFont({ + loadedName: font.loadedName, + font: translatedFont, + dict: font, + }) ); }) .catch(reason => { @@ -1126,11 +1130,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } catch (ex) {} fontCapability.resolve( - new TranslatedFont( - font.loadedName, - new ErrorFont(reason instanceof Error ? reason.message : reason), - font - ) + new TranslatedFont({ + loadedName: font.loadedName, + font: new ErrorFont( + reason instanceof Error ? reason.message : reason + ), + dict: font, + }) ); }); return fontCapability.promise; @@ -3266,107 +3272,104 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return PartialEvaluator; })(); -var TranslatedFont = (function TranslatedFontClosure() { - // eslint-disable-next-line no-shadow - function TranslatedFont(loadedName, font, dict) { +class TranslatedFont { + constructor({ loadedName, font, dict }) { this.loadedName = loadedName; this.font = font; this.dict = dict; this.type3Loaded = null; this.sent = false; } - TranslatedFont.prototype = { - send(handler) { - if (this.sent) { - return; - } - this.sent = true; - handler.send("commonobj", [ - this.loadedName, - "Font", - this.font.exportData(), - ]); - }, + send(handler) { + if (this.sent) { + return; + } + this.sent = true; - fallback(handler) { - if (!this.font.data) { - return; - } - // When font loading failed, fall back to the built-in font renderer. - this.font.disableFontFace = true; - // An arbitrary number of text rendering operators could have been - // encountered between the point in time when the 'Font' message was sent - // to the main-thread, and the point in time when the 'FontFallback' - // message was received on the worker-thread. - // To ensure that all 'FontPath's are available on the main-thread, when - // font loading failed, attempt to resend *all* previously parsed glyphs. - const glyphs = this.font.glyphCacheValues; - PartialEvaluator.buildFontPaths(this.font, glyphs, handler); - }, + handler.send("commonobj", [ + this.loadedName, + "Font", + this.font.exportData(), + ]); + } - loadType3Data(evaluator, resources, parentOperatorList, task) { - if (!this.font.isType3Font) { - throw new Error("Must be a Type3 font."); - } + fallback(handler) { + if (!this.font.data) { + return; + } + // When font loading failed, fall back to the built-in font renderer. + this.font.disableFontFace = true; + // An arbitrary number of text rendering operators could have been + // encountered between the point in time when the 'Font' message was sent + // to the main-thread, and the point in time when the 'FontFallback' + // message was received on the worker-thread. + // To ensure that all 'FontPath's are available on the main-thread, when + // font loading failed, attempt to resend *all* previously parsed glyphs. + const glyphs = this.font.glyphCacheValues; + PartialEvaluator.buildFontPaths(this.font, glyphs, handler); + } - if (this.type3Loaded) { - return this.type3Loaded; - } - // When parsing Type3 glyphs, always ignore them if there are errors. - // Compared to the parsing of e.g. an entire page, it doesn't really - // make sense to only be able to render a Type3 glyph partially. - // - // Also, ensure that any Type3 image resources (which should be very rare - // in practice) are completely decoded on the worker-thread, to simplify - // the rendering code on the main-thread (see issue10717.pdf). - var type3Options = Object.create(evaluator.options); - type3Options.ignoreErrors = false; - type3Options.nativeImageDecoderSupport = NativeImageDecoding.NONE; - var type3Evaluator = evaluator.clone(type3Options); - type3Evaluator.parsingType3Font = true; + loadType3Data(evaluator, resources, parentOperatorList, task) { + if (!this.font.isType3Font) { + throw new Error("Must be a Type3 font."); + } - var translatedFont = this.font; - var loadCharProcsPromise = Promise.resolve(); - var charProcs = this.dict.get("CharProcs"); - var fontResources = this.dict.get("Resources") || resources; - var charProcKeys = charProcs.getKeys(); - var charProcOperatorList = Object.create(null); - - for (var i = 0, n = charProcKeys.length; i < n; ++i) { - const key = charProcKeys[i]; - loadCharProcsPromise = loadCharProcsPromise.then(function() { - var glyphStream = charProcs.get(key); - var operatorList = new OperatorList(); - return type3Evaluator - .getOperatorList({ - stream: glyphStream, - task, - resources: fontResources, - operatorList, - }) - .then(function() { - charProcOperatorList[key] = operatorList.getIR(); - - // Add the dependencies to the parent operator list so they are - // resolved before sub operator list is executed synchronously. - parentOperatorList.addDependencies(operatorList.dependencies); - }) - .catch(function(reason) { - warn(`Type3 font resource "${key}" is not available.`); - const dummyOperatorList = new OperatorList(); - charProcOperatorList[key] = dummyOperatorList.getIR(); - }); - }); - } - this.type3Loaded = loadCharProcsPromise.then(function() { - translatedFont.charProcOperatorList = charProcOperatorList; - }); + if (this.type3Loaded) { return this.type3Loaded; - }, - }; - return TranslatedFont; -})(); + } + // When parsing Type3 glyphs, always ignore them if there are errors. + // Compared to the parsing of e.g. an entire page, it doesn't really + // make sense to only be able to render a Type3 glyph partially. + // + // Also, ensure that any Type3 image resources (which should be very rare + // in practice) are completely decoded on the worker-thread, to simplify + // the rendering code on the main-thread (see issue10717.pdf). + var type3Options = Object.create(evaluator.options); + type3Options.ignoreErrors = false; + type3Options.nativeImageDecoderSupport = NativeImageDecoding.NONE; + var type3Evaluator = evaluator.clone(type3Options); + type3Evaluator.parsingType3Font = true; + + var translatedFont = this.font; + var loadCharProcsPromise = Promise.resolve(); + var charProcs = this.dict.get("CharProcs"); + var fontResources = this.dict.get("Resources") || resources; + var charProcKeys = charProcs.getKeys(); + var charProcOperatorList = Object.create(null); + + for (var i = 0, n = charProcKeys.length; i < n; ++i) { + const key = charProcKeys[i]; + loadCharProcsPromise = loadCharProcsPromise.then(function() { + var glyphStream = charProcs.get(key); + var operatorList = new OperatorList(); + return type3Evaluator + .getOperatorList({ + stream: glyphStream, + task, + resources: fontResources, + operatorList, + }) + .then(function() { + charProcOperatorList[key] = operatorList.getIR(); + + // Add the dependencies to the parent operator list so they are + // resolved before sub operator list is executed synchronously. + parentOperatorList.addDependencies(operatorList.dependencies); + }) + .catch(function(reason) { + warn(`Type3 font resource "${key}" is not available.`); + const dummyOperatorList = new OperatorList(); + charProcOperatorList[key] = dummyOperatorList.getIR(); + }); + }); + } + this.type3Loaded = loadCharProcsPromise.then(function() { + translatedFont.charProcOperatorList = charProcOperatorList; + }); + return this.type3Loaded; + } +} var StateManager = (function StateManagerClosure() { // eslint-disable-next-line no-shadow From 2d46230d23dc506e43692ccef8f24468bd9c38d0 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 3 Apr 2020 11:51:46 +0200 Subject: [PATCH 2/2] [api-minor] Change `Font.exportData` to, by default, stop exporting properties which are completely unused on the main-thread and/or in the API (PR 11773 follow-up) For years now, the `Font.exportData` method has (because of its previous implementation) been exporting many properties despite them being completely unused on the main-thread and/or in the API. This is unfortunate, since among those properties there's a number of potentially very large data-structures, containing e.g. Arrays and Objects, which thus have to be first structured cloned and then stored on the main-thread. With the changes in this patch, we'll thus by default save memory for *every* `Font` instance created (there can be a lot in longer documents). The memory savings obviously depends a lot on the actual font data, but some approximate figures are: For non-embedded fonts it can save a couple of kilobytes, for simple embedded fonts a handful of kilobytes, and for composite fonts the size of this auxiliary can even be larger than the actual font program itself. All-in-all, there's no good reason to keep exporting these properties by default when they're unused. However, since we cannot be sure that every property is unused in custom implementations of the PDF.js library, this patch adds a new `getDocument` option (named `fontExtraProperties`) that still allows access to the following properties: - "cMap": An internal data structure, only used with composite fonts and never really intended to be exposed on the main-thread and/or in the API. Note also that the `CMap`/`IdentityCMap` classes are a lot more complex than simple Objects, but only their "internal" properties survive the structured cloning used to send data to the main-thread. Given that CMaps can often be *very* large, not exporting them can also save a fair bit of memory. - "defaultEncoding": An internal property used with simple fonts, and used when building the glyph mapping on the worker-thread. Considering how complex that topic is, and given that not all font types are handled identically, exposing this on the main-thread and/or in the API most likely isn't useful. - "differences": An internal property used with simple fonts, and used when building the glyph mapping on the worker-thread. Considering how complex that topic is, and given that not all font types are handled identically, exposing this on the main-thread and/or in the API most likely isn't useful. - "isSymbolicFont": An internal property, used during font parsing and building of the glyph mapping on the worker-thread. - "seacMap": An internal map, only potentially used with *some* Type1/CFF fonts and never intended to be exposed in the API. The existing `Font.{charToGlyph, charToGlyphs}` functionality already takes this data into account when handling text. - "toFontChar": The glyph map, necessary for mapping characters to glyphs in the font, which is built upon the various encoding information contained in the font dictionary and/or font program. This is not directly used on the main-thread and/or in the API. - "toUnicode": The unicode map, necessary for text-extraction to work correctly, which is built upon the ToUnicode/CMap information contained in the font dictionary, but not directly used on the main-thread and/or in the API. - "vmetrics": An array of width data used with fonts which are composite *and* vertical, but not directly used on the main-thread and/or in the API. - "widths": An array of width data used with most fonts, but not directly used on the main-thread and/or in the API. --- src/core/evaluator.js | 16 +++++++++++----- src/core/fonts.js | 27 +++++++++++++++++---------- src/core/worker.js | 1 + src/display/api.js | 7 +++++++ web/app.js | 2 ++ web/app_options.js | 5 +++++ 6 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index fe79687c5..a39bc2055 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -94,6 +94,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { nativeImageDecoderSupport: NativeImageDecoding.DECODE, ignoreErrors: false, isEvalSupported: true, + fontExtraProperties: false, }; // eslint-disable-next-line no-shadow @@ -807,6 +808,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { loadedName: "g_font_error", font: new ErrorFont(`Type3 font load error: ${reason}`), dict: translated.font, + extraProperties: this.options.fontExtraProperties, }); }); }) @@ -956,15 +958,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, loadFont: function PartialEvaluator_loadFont(fontName, font, resources) { - function errorFont() { + const errorFont = () => { return Promise.resolve( new TranslatedFont({ loadedName: "g_font_error", font: new ErrorFont(`Font "${fontName}" is not available.`), dict: font, + extraProperties: this.options.fontExtraProperties, }) ); - } + }; var fontRef, xref = this.xref; @@ -1096,7 +1099,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } translatedPromise - .then(function(translatedFont) { + .then(translatedFont => { if (translatedFont.fontType !== undefined) { var xrefFontStats = xref.stats.fontTypes; xrefFontStats[translatedFont.fontType] = true; @@ -1107,6 +1110,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { loadedName: font.loadedName, font: translatedFont, dict: font, + extraProperties: this.options.fontExtraProperties, }) ); }) @@ -1136,6 +1140,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { reason instanceof Error ? reason.message : reason ), dict: font, + extraProperties: this.options.fontExtraProperties, }) ); }); @@ -3273,10 +3278,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { })(); class TranslatedFont { - constructor({ loadedName, font, dict }) { + constructor({ loadedName, font, dict, extraProperties = false }) { this.loadedName = loadedName; this.font = font; this.dict = dict; + this._extraProperties = extraProperties; this.type3Loaded = null; this.sent = false; } @@ -3290,7 +3296,7 @@ class TranslatedFont { handler.send("commonobj", [ this.loadedName, "Font", - this.font.exportData(), + this.font.exportData(this._extraProperties), ]); } diff --git a/src/core/fonts.js b/src/core/fonts.js index add8fd973..241e50daf 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -92,21 +92,17 @@ const EXPORT_DATA_PROPERTIES = [ "bbox", "black", "bold", - "cMap", "charProcOperatorList", "composite", "data", - "defaultEncoding", "defaultVMetrics", "defaultWidth", "descent", - "differences", "fallbackName", "fontMatrix", "fontType", "isMonospace", "isSerifFont", - "isSymbolicFont", "isType3Font", "italic", "loadedName", @@ -114,12 +110,19 @@ const EXPORT_DATA_PROPERTIES = [ "missingFile", "name", "remeasure", - "seacMap", "subtype", - "toFontChar", - "toUnicode", "type", "vertical", +]; + +const EXPORT_DATA_EXTRA_PROPERTIES = [ + "cMap", + "defaultEncoding", + "differences", + "isSymbolicFont", + "seacMap", + "toFontChar", + "toUnicode", "vmetrics", "widths", ]; @@ -1295,10 +1298,14 @@ var Font = (function FontClosure() { return shadow(this, "renderer", renderer); }, - exportData() { + exportData(extraProperties = false) { + const exportDataProperties = extraProperties + ? [...EXPORT_DATA_PROPERTIES, ...EXPORT_DATA_EXTRA_PROPERTIES] + : EXPORT_DATA_PROPERTIES; + const data = Object.create(null); let property, value; - for (property of EXPORT_DATA_PROPERTIES) { + for (property of exportDataProperties) { value = this[property]; // Ignore properties that haven't been explicitly set. if (value !== undefined) { @@ -3352,7 +3359,7 @@ var ErrorFont = (function ErrorFontClosure() { charsToGlyphs: function ErrorFont_charsToGlyphs() { return []; }, - exportData: function ErrorFont_exportData() { + exportData(extraProperties = false) { return { error: this.error }; }, }; diff --git a/src/core/worker.js b/src/core/worker.js index ba565a487..b9afe7bed 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -403,6 +403,7 @@ var WorkerMessageHandler = { nativeImageDecoderSupport: data.nativeImageDecoderSupport, ignoreErrors: data.ignoreErrors, isEvalSupported: data.isEvalSupported, + fontExtraProperties: data.fontExtraProperties, }; getPdfManager(data, evaluatorOptions) diff --git a/src/display/api.js b/src/display/api.js index 4f5749f35..8921cc690 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -145,6 +145,11 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) { * converted to OpenType fonts and loaded via font face rules. If disabled, * fonts will be rendered using a built-in font renderer that constructs the * glyphs with primitive path commands. The default value is `false`. + * @property {boolean} [fontExtraProperties] - Include additional properties, + * which are unused during rendering of PDF documents, when exporting the + * parsed font data from the worker-thread. This may be useful for debugging + * purposes (and backwards compatibility), but note that it will lead to + * increased memory usage. The default value is `false`. * @property {boolean} [disableRange] - Disable range request loading * of PDF files. When enabled, and if the server supports partial content * requests, then the PDF will be fetched in chunks. @@ -251,6 +256,7 @@ function getDocument(src) { params.rangeChunkSize = params.rangeChunkSize || DEFAULT_RANGE_CHUNK_SIZE; params.CMapReaderFactory = params.CMapReaderFactory || DOMCMapReaderFactory; params.ignoreErrors = params.stopAtErrors !== true; + params.fontExtraProperties = params.fontExtraProperties === true; params.pdfBug = params.pdfBug === true; const NativeImageDecoderValues = Object.values(NativeImageDecoding); @@ -403,6 +409,7 @@ function _fetchDocument(worker, source, pdfDataRangeTransport, docId) { nativeImageDecoderSupport: source.nativeImageDecoderSupport, ignoreErrors: source.ignoreErrors, isEvalSupported: source.isEvalSupported, + fontExtraProperties: source.fontExtraProperties, }) .then(function(workerId) { if (worker.destroyed) { diff --git a/web/app.js b/web/app.js index 5ddfd8adf..135882176 100644 --- a/web/app.js +++ b/web/app.js @@ -309,6 +309,8 @@ const PDFViewerApplication = { } if ("pdfbug" in hashParams) { AppOptions.set("pdfBug", true); + AppOptions.set("fontExtraProperties", true); + const enabled = hashParams["pdfbug"].split(","); waitOn.push(loadAndEnablePDFBug(enabled)); } diff --git a/web/app_options.js b/web/app_options.js index 3fa1187a1..0f3b5af25 100644 --- a/web/app_options.js +++ b/web/app_options.js @@ -193,6 +193,11 @@ const defaultOptions = { value: "", kind: OptionKind.API, }, + fontExtraProperties: { + /** @type {boolean} */ + value: false, + kind: OptionKind.API, + }, isEvalSupported: { /** @type {boolean} */ value: true,