diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 163c10756..81a05a736 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -163,28 +163,35 @@ function normalizeBlendMode(value, parsingArray = false) { } // Trying to minimize Date.now() usage and check every 100 time. -var TIME_SLOT_DURATION_MS = 20; -var CHECK_TIME_EVERY = 100; -function TimeSlotManager() { - this.reset(); -} -TimeSlotManager.prototype = { - check: function TimeSlotManager_check() { - if (++this.checked < CHECK_TIME_EVERY) { +class TimeSlotManager { + static get TIME_SLOT_DURATION_MS() { + return shadow(this, "TIME_SLOT_DURATION_MS", 20); + } + + static get CHECK_TIME_EVERY() { + return shadow(this, "CHECK_TIME_EVERY", 100); + } + + constructor() { + this.reset(); + } + + check() { + if (++this.checked < TimeSlotManager.CHECK_TIME_EVERY) { return false; } this.checked = 0; return this.endTime <= Date.now(); - }, - reset: function TimeSlotManager_reset() { - this.endTime = Date.now() + TIME_SLOT_DURATION_MS; - this.checked = 0; - }, -}; + } -var PartialEvaluator = (function PartialEvaluatorClosure() { - // eslint-disable-next-line no-shadow - function PartialEvaluator({ + reset() { + this.endTime = Date.now() + TimeSlotManager.TIME_SLOT_DURATION_MS; + this.checked = 0; + } +} + +class PartialEvaluator { + constructor({ xref, handler, pageIndex, @@ -207,3089 +214,3047 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { this._fetchBuiltInCMapBound = this.fetchBuiltInCMap.bind(this); } - PartialEvaluator.prototype = { - /** - * Since Functions are only cached (locally) by reference, we can share one - * `PDFFunctionFactory` instance within this `PartialEvaluator` instance. - */ - get _pdfFunctionFactory() { - const pdfFunctionFactory = new PDFFunctionFactory({ - xref: this.xref, - isEvalSupported: this.options.isEvalSupported, - }); - return shadow(this, "_pdfFunctionFactory", pdfFunctionFactory); - }, + /** + * Since Functions are only cached (locally) by reference, we can share one + * `PDFFunctionFactory` instance within this `PartialEvaluator` instance. + */ + get _pdfFunctionFactory() { + const pdfFunctionFactory = new PDFFunctionFactory({ + xref: this.xref, + isEvalSupported: this.options.isEvalSupported, + }); + return shadow(this, "_pdfFunctionFactory", pdfFunctionFactory); + } - clone(newOptions = DefaultPartialEvaluatorOptions) { - var newEvaluator = Object.create(this); - newEvaluator.options = newOptions; - return newEvaluator; - }, + clone(newOptions = DefaultPartialEvaluatorOptions) { + var newEvaluator = Object.create(this); + newEvaluator.options = newOptions; + return newEvaluator; + } - hasBlendModes: function PartialEvaluator_hasBlendModes(resources) { - if (!(resources instanceof Dict)) { - return false; - } + hasBlendModes(resources) { + if (!(resources instanceof Dict)) { + return false; + } - var processed = Object.create(null); - if (resources.objId) { - processed[resources.objId] = true; - } + var processed = Object.create(null); + if (resources.objId) { + processed[resources.objId] = true; + } - var nodes = [resources], - xref = this.xref; - while (nodes.length) { - var node = nodes.shift(); - // First check the current resources for blend modes. - var graphicStates = node.get("ExtGState"); - if (graphicStates instanceof Dict) { - var graphicStatesKeys = graphicStates.getKeys(); - for (let i = 0, ii = graphicStatesKeys.length; i < ii; i++) { - const key = graphicStatesKeys[i]; + var nodes = [resources], + xref = this.xref; + while (nodes.length) { + var node = nodes.shift(); + // First check the current resources for blend modes. + var graphicStates = node.get("ExtGState"); + if (graphicStates instanceof Dict) { + var graphicStatesKeys = graphicStates.getKeys(); + for (let i = 0, ii = graphicStatesKeys.length; i < ii; i++) { + const key = graphicStatesKeys[i]; - let graphicState = graphicStates.getRaw(key); - if (graphicState instanceof Ref) { - if (processed[graphicState.toString()]) { - continue; // The ExtGState has already been processed. - } - try { - graphicState = xref.fetch(graphicState); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - if (this.options.ignoreErrors) { - if (graphicState instanceof Ref) { - // Avoid parsing a corrupt ExtGState more than once. - processed[graphicState.toString()] = true; - } - // Error(s) in the ExtGState -- sending unsupported feature - // notification and allow parsing/rendering to continue. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorExtGState, - }); - warn(`hasBlendModes - ignoring ExtGState: "${ex}".`); - continue; - } - throw ex; - } - } - if (!(graphicState instanceof Dict)) { - continue; - } - if (graphicState.objId) { - processed[graphicState.objId] = true; - } - - const bm = graphicState.get("BM"); - if (bm instanceof Name) { - if (bm.name !== "Normal") { - return true; - } - continue; - } - if (bm !== undefined && Array.isArray(bm)) { - for (let j = 0, jj = bm.length; j < jj; j++) { - if (bm[j] instanceof Name && bm[j].name !== "Normal") { - return true; - } - } - } - } - } - // Descend into the XObjects to look for more resources and blend modes. - var xObjects = node.get("XObject"); - if (!(xObjects instanceof Dict)) { - continue; - } - var xObjectsKeys = xObjects.getKeys(); - for (let i = 0, ii = xObjectsKeys.length; i < ii; i++) { - const key = xObjectsKeys[i]; - - var xObject = xObjects.getRaw(key); - if (xObject instanceof Ref) { - if (processed[xObject.toString()]) { - // The XObject has already been processed, and by avoiding a - // redundant `xref.fetch` we can *significantly* reduce the load - // time for badly generated PDF files (fixes issue6961.pdf). - continue; + let graphicState = graphicStates.getRaw(key); + if (graphicState instanceof Ref) { + if (processed[graphicState.toString()]) { + continue; // The ExtGState has already been processed. } try { - xObject = xref.fetch(xObject); + graphicState = xref.fetch(graphicState); } catch (ex) { if (ex instanceof MissingDataException) { throw ex; } if (this.options.ignoreErrors) { - if (xObject instanceof Ref) { - // Avoid parsing a corrupt XObject more than once. - processed[xObject.toString()] = true; + if (graphicState instanceof Ref) { + // Avoid parsing a corrupt ExtGState more than once. + processed[graphicState.toString()] = true; } - // Error(s) in the XObject -- sending unsupported feature + // Error(s) in the ExtGState -- sending unsupported feature // notification and allow parsing/rendering to continue. this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorXObject, + featureId: UNSUPPORTED_FEATURES.errorExtGState, }); - warn(`hasBlendModes - ignoring XObject: "${ex}".`); + warn(`hasBlendModes - ignoring ExtGState: "${ex}".`); continue; } throw ex; } } - if (!isStream(xObject)) { + if (!(graphicState instanceof Dict)) { continue; } - if (xObject.dict.objId) { - if (processed[xObject.dict.objId]) { - continue; // Stream has objId and was processed already. - } - processed[xObject.dict.objId] = true; + if (graphicState.objId) { + processed[graphicState.objId] = true; } - var xResources = xObject.dict.get("Resources"); - // Checking objId to detect an infinite loop. - if ( - xResources instanceof Dict && - (!xResources.objId || !processed[xResources.objId]) - ) { - nodes.push(xResources); - if (xResources.objId) { - processed[xResources.objId] = true; + + const bm = graphicState.get("BM"); + if (bm instanceof Name) { + if (bm.name !== "Normal") { + return true; + } + continue; + } + if (bm !== undefined && Array.isArray(bm)) { + for (let j = 0, jj = bm.length; j < jj; j++) { + if (bm[j] instanceof Name && bm[j].name !== "Normal") { + return true; + } } } } } - return false; - }, - - async fetchBuiltInCMap(name) { - const cachedData = this.builtInCMapCache.get(name); - if (cachedData) { - return cachedData; + // Descend into the XObjects to look for more resources and blend modes. + var xObjects = node.get("XObject"); + if (!(xObjects instanceof Dict)) { + continue; } - const readableStream = this.handler.sendWithStream("FetchBuiltInCMap", { - name, - }); - const reader = readableStream.getReader(); + var xObjectsKeys = xObjects.getKeys(); + for (let i = 0, ii = xObjectsKeys.length; i < ii; i++) { + const key = xObjectsKeys[i]; - const data = await new Promise(function (resolve, reject) { - function pump() { - reader.read().then(function ({ value, done }) { - if (done) { - return; + var xObject = xObjects.getRaw(key); + if (xObject instanceof Ref) { + if (processed[xObject.toString()]) { + // The XObject has already been processed, and by avoiding a + // redundant `xref.fetch` we can *significantly* reduce the load + // time for badly generated PDF files (fixes issue6961.pdf). + continue; + } + try { + xObject = xref.fetch(xObject); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; } - resolve(value); - pump(); - }, reject); - } - pump(); - }); - - if (data.compressionType !== CMapCompressionType.NONE) { - // Given the size of uncompressed CMaps, only cache compressed ones. - this.builtInCMapCache.set(name, data); - } - return data; - }, - - async buildFormXObject( - resources, - xobj, - smask, - operatorList, - task, - initialState, - localColorSpaceCache - ) { - var dict = xobj.dict; - var matrix = dict.getArray("Matrix"); - var bbox = dict.getArray("BBox"); - if (Array.isArray(bbox) && bbox.length === 4) { - bbox = Util.normalizeRect(bbox); - } else { - bbox = null; - } - var group = dict.get("Group"); - if (group) { - var groupOptions = { - matrix, - bbox, - smask, - isolated: false, - knockout: false, - }; - - var groupSubtype = group.get("S"); - var colorSpace = null; - if (isName(groupSubtype, "Transparency")) { - groupOptions.isolated = group.get("I") || false; - groupOptions.knockout = group.get("K") || false; - if (group.has("CS")) { - const cs = group.getRaw("CS"); - - const cachedColorSpace = ColorSpace.getCached( - cs, - this.xref, - localColorSpaceCache - ); - if (cachedColorSpace) { - colorSpace = cachedColorSpace; - } else { - colorSpace = await this.parseColorSpace({ - cs, - resources, - localColorSpaceCache, + if (this.options.ignoreErrors) { + if (xObject instanceof Ref) { + // Avoid parsing a corrupt XObject more than once. + processed[xObject.toString()] = true; + } + // Error(s) in the XObject -- sending unsupported feature + // notification and allow parsing/rendering to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorXObject, }); + warn(`hasBlendModes - ignoring XObject: "${ex}".`); + continue; } + throw ex; } } - - if (smask && smask.backdrop) { - colorSpace = colorSpace || ColorSpace.singletons.rgb; - smask.backdrop = colorSpace.getRgb(smask.backdrop, 0); + if (!isStream(xObject)) { + continue; } - - operatorList.addOp(OPS.beginGroup, [groupOptions]); - } - - operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]); - - return this.getOperatorList({ - stream: xobj, - task, - resources: dict.get("Resources") || resources, - operatorList, - initialState, - }).then(function () { - operatorList.addOp(OPS.paintFormXObjectEnd, []); - - if (group) { - operatorList.addOp(OPS.endGroup, [groupOptions]); + if (xObject.dict.objId) { + if (processed[xObject.dict.objId]) { + continue; // Stream has objId and was processed already. + } + processed[xObject.dict.objId] = true; + } + var xResources = xObject.dict.get("Resources"); + // Checking objId to detect an infinite loop. + if ( + xResources instanceof Dict && + (!xResources.objId || !processed[xResources.objId]) + ) { + nodes.push(xResources); + if (xResources.objId) { + processed[xResources.objId] = true; + } } - }); - }, - - _sendImgData(objId, imgData, cacheGlobally = false) { - const transfers = imgData ? [imgData.data.buffer] : null; - - if (this.parsingType3Font) { - return this.handler.sendWithPromise( - "commonobj", - [objId, "FontType3Res", imgData], - transfers - ); } - if (cacheGlobally) { - return this.handler.send( - "commonobj", - [objId, "Image", imgData], - transfers - ); + } + return false; + } + + async fetchBuiltInCMap(name) { + const cachedData = this.builtInCMapCache.get(name); + if (cachedData) { + return cachedData; + } + const readableStream = this.handler.sendWithStream("FetchBuiltInCMap", { + name, + }); + const reader = readableStream.getReader(); + + const data = await new Promise(function (resolve, reject) { + function pump() { + reader.read().then(function ({ value, done }) { + if (done) { + return; + } + resolve(value); + pump(); + }, reject); } - return this.handler.send( - "obj", - [objId, this.pageIndex, "Image", imgData], + pump(); + }); + + if (data.compressionType !== CMapCompressionType.NONE) { + // Given the size of uncompressed CMaps, only cache compressed ones. + this.builtInCMapCache.set(name, data); + } + return data; + } + + async buildFormXObject( + resources, + xobj, + smask, + operatorList, + task, + initialState, + localColorSpaceCache + ) { + var dict = xobj.dict; + var matrix = dict.getArray("Matrix"); + var bbox = dict.getArray("BBox"); + if (Array.isArray(bbox) && bbox.length === 4) { + bbox = Util.normalizeRect(bbox); + } else { + bbox = null; + } + var group = dict.get("Group"); + if (group) { + var groupOptions = { + matrix, + bbox, + smask, + isolated: false, + knockout: false, + }; + + var groupSubtype = group.get("S"); + var colorSpace = null; + if (isName(groupSubtype, "Transparency")) { + groupOptions.isolated = group.get("I") || false; + groupOptions.knockout = group.get("K") || false; + if (group.has("CS")) { + const cs = group.getRaw("CS"); + + const cachedColorSpace = ColorSpace.getCached( + cs, + this.xref, + localColorSpaceCache + ); + if (cachedColorSpace) { + colorSpace = cachedColorSpace; + } else { + colorSpace = await this.parseColorSpace({ + cs, + resources, + localColorSpaceCache, + }); + } + } + } + + if (smask && smask.backdrop) { + colorSpace = colorSpace || ColorSpace.singletons.rgb; + smask.backdrop = colorSpace.getRgb(smask.backdrop, 0); + } + + operatorList.addOp(OPS.beginGroup, [groupOptions]); + } + + operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]); + + return this.getOperatorList({ + stream: xobj, + task, + resources: dict.get("Resources") || resources, + operatorList, + initialState, + }).then(function () { + operatorList.addOp(OPS.paintFormXObjectEnd, []); + + if (group) { + operatorList.addOp(OPS.endGroup, [groupOptions]); + } + }); + } + + _sendImgData(objId, imgData, cacheGlobally = false) { + const transfers = imgData ? [imgData.data.buffer] : null; + + if (this.parsingType3Font) { + return this.handler.sendWithPromise( + "commonobj", + [objId, "FontType3Res", imgData], transfers ); - }, + } + if (cacheGlobally) { + return this.handler.send( + "commonobj", + [objId, "Image", imgData], + transfers + ); + } + return this.handler.send( + "obj", + [objId, this.pageIndex, "Image", imgData], + transfers + ); + } - async buildPaintImageXObject({ - resources, - image, - isInline = false, - operatorList, - cacheKey, - localImageCache, - localColorSpaceCache, - }) { - var dict = image.dict; - const imageRef = dict.objId; - var w = dict.get("Width", "W"); - var h = dict.get("Height", "H"); + async buildPaintImageXObject({ + resources, + image, + isInline = false, + operatorList, + cacheKey, + localImageCache, + localColorSpaceCache, + }) { + var dict = image.dict; + const imageRef = dict.objId; + var w = dict.get("Width", "W"); + var h = dict.get("Height", "H"); - if (!(w && isNum(w)) || !(h && isNum(h))) { - warn("Image dimensions are missing, or not numbers."); - return undefined; - } - var maxImageSize = this.options.maxImageSize; - if (maxImageSize !== -1 && w * h > maxImageSize) { - warn("Image exceeded maximum allowed size and was removed."); - return undefined; - } + if (!(w && isNum(w)) || !(h && isNum(h))) { + warn("Image dimensions are missing, or not numbers."); + return undefined; + } + var maxImageSize = this.options.maxImageSize; + if (maxImageSize !== -1 && w * h > maxImageSize) { + warn("Image exceeded maximum allowed size and was removed."); + return undefined; + } - var imageMask = dict.get("ImageMask", "IM") || false; - var imgData, args; - if (imageMask) { - // This depends on a tmpCanvas being filled with the - // current fillStyle, such that processing the pixel - // data can't be done here. Instead of creating a - // complete PDFImage, only read the information needed - // for later. + var imageMask = dict.get("ImageMask", "IM") || false; + var imgData, args; + if (imageMask) { + // This depends on a tmpCanvas being filled with the + // current fillStyle, such that processing the pixel + // data can't be done here. Instead of creating a + // complete PDFImage, only read the information needed + // for later. - var width = dict.get("Width", "W"); - var height = dict.get("Height", "H"); - var bitStrideLength = (width + 7) >> 3; - var imgArray = image.getBytes( - bitStrideLength * height, - /* forceClamped = */ true - ); - var decode = dict.getArray("Decode", "D"); + var width = dict.get("Width", "W"); + var height = dict.get("Height", "H"); + var bitStrideLength = (width + 7) >> 3; + var imgArray = image.getBytes( + bitStrideLength * height, + /* forceClamped = */ true + ); + var decode = dict.getArray("Decode", "D"); - imgData = PDFImage.createMask({ - imgArray, - width, - height, - imageIsFromDecodeStream: image instanceof DecodeStream, - inverseDecode: !!decode && decode[0] > 0, + imgData = PDFImage.createMask({ + imgArray, + width, + height, + imageIsFromDecodeStream: image instanceof DecodeStream, + inverseDecode: !!decode && decode[0] > 0, + }); + imgData.cached = !!cacheKey; + args = [imgData]; + + operatorList.addOp(OPS.paintImageMaskXObject, args); + if (cacheKey) { + localImageCache.set(cacheKey, imageRef, { + fn: OPS.paintImageMaskXObject, + args, }); - imgData.cached = !!cacheKey; - args = [imgData]; - - operatorList.addOp(OPS.paintImageMaskXObject, args); - if (cacheKey) { - localImageCache.set(cacheKey, imageRef, { - fn: OPS.paintImageMaskXObject, - args, - }); - } - return undefined; } + return undefined; + } - var softMask = dict.get("SMask", "SM") || false; - var mask = dict.get("Mask") || false; + var softMask = dict.get("SMask", "SM") || false; + var mask = dict.get("Mask") || false; - var SMALL_IMAGE_DIMENSIONS = 200; - // Inlining small images into the queue as RGB data - if (isInline && !softMask && !mask && w + h < SMALL_IMAGE_DIMENSIONS) { - const imageObj = new PDFImage({ - xref: this.xref, - res: resources, - image, - isInline, - pdfFunctionFactory: this._pdfFunctionFactory, - localColorSpaceCache, - }); - // We force the use of RGBA_32BPP images here, because we can't handle - // any other kind. - imgData = imageObj.createImageData(/* forceRGBA = */ true); - operatorList.addOp(OPS.paintInlineImageXObject, [imgData]); - return undefined; - } - - // If there is no imageMask, create the PDFImage and a lot - // of image processing can be done here. - let objId = `img_${this.idFactory.createObjId()}`, - cacheGlobally = false; - - if (this.parsingType3Font) { - objId = `${this.idFactory.getDocId()}_type3res_${objId}`; - } else if (imageRef) { - cacheGlobally = this.globalImageCache.shouldCache( - imageRef, - this.pageIndex - ); - - if (cacheGlobally) { - objId = `${this.idFactory.getDocId()}_${objId}`; - } - } - - // Ensure that the dependency is added before the image is decoded. - operatorList.addDependency(objId); - args = [objId, w, h]; - - const imgPromise = PDFImage.buildImage({ + var SMALL_IMAGE_DIMENSIONS = 200; + // Inlining small images into the queue as RGB data + if (isInline && !softMask && !mask && w + h < SMALL_IMAGE_DIMENSIONS) { + const imageObj = new PDFImage({ xref: this.xref, res: resources, image, isInline, pdfFunctionFactory: this._pdfFunctionFactory, localColorSpaceCache, - }) - .then(imageObj => { - imgData = imageObj.createImageData(/* forceRGBA = */ false); - - return this._sendImgData(objId, imgData, cacheGlobally); - }) - .catch(reason => { - warn(`Unable to decode image "${objId}": "${reason}".`); - - return this._sendImgData(objId, /* imgData = */ null, cacheGlobally); - }); - - if (this.parsingType3Font) { - // In the very rare case where a Type3 image resource is being parsed, - // wait for the image to be both decoded *and* sent to simplify the - // rendering code on the main-thread (see issue10717.pdf). - await imgPromise; - } - - operatorList.addOp(OPS.paintImageXObject, args); - if (cacheKey) { - localImageCache.set(cacheKey, imageRef, { - fn: OPS.paintImageXObject, - args, - }); - - if (imageRef) { - assert(!isInline, "Cannot cache an inline image globally."); - this.globalImageCache.addPageIndex(imageRef, this.pageIndex); - - if (cacheGlobally) { - this.globalImageCache.setData(imageRef, { - objId, - fn: OPS.paintImageXObject, - args, - }); - } - } - } + }); + // We force the use of RGBA_32BPP images here, because we can't handle + // any other kind. + imgData = imageObj.createImageData(/* forceRGBA = */ true); + operatorList.addOp(OPS.paintInlineImageXObject, [imgData]); return undefined; - }, + } - handleSMask: function PartialEvaluator_handleSmask( - smask, - resources, - operatorList, - task, - stateManager, - localColorSpaceCache - ) { - var smaskContent = smask.get("G"); - var smaskOptions = { - subtype: smask.get("S").name, - backdrop: smask.get("BC"), - }; + // If there is no imageMask, create the PDFImage and a lot + // of image processing can be done here. + let objId = `img_${this.idFactory.createObjId()}`, + cacheGlobally = false; - // The SMask might have a alpha/luminosity value transfer function -- - // we will build a map of integer values in range 0..255 to be fast. - var transferObj = smask.get("TR"); - if (isPDFFunction(transferObj)) { - const transferFn = this._pdfFunctionFactory.create(transferObj); - var transferMap = new Uint8Array(256); - var tmp = new Float32Array(1); - for (var i = 0; i < 256; i++) { - tmp[0] = i / 255; - transferFn(tmp, 0, tmp, 0); - transferMap[i] = (tmp[0] * 255) | 0; - } - smaskOptions.transferMap = transferMap; - } - - return this.buildFormXObject( - resources, - smaskContent, - smaskOptions, - operatorList, - task, - stateManager.state.clone(), - localColorSpaceCache + if (this.parsingType3Font) { + objId = `${this.idFactory.getDocId()}_type3res_${objId}`; + } else if (imageRef) { + cacheGlobally = this.globalImageCache.shouldCache( + imageRef, + this.pageIndex ); - }, - handleTilingType( - fn, - args, - resources, - pattern, - patternDict, - operatorList, - task - ) { - // Create an IR of the pattern code. - const tilingOpList = new OperatorList(); - // Merge the available resources, to prevent issues when the patternDict - // is missing some /Resources entries (fixes issue6541.pdf). - const resourcesArray = [patternDict.get("Resources"), resources]; - const patternResources = Dict.merge(this.xref, resourcesArray); + if (cacheGlobally) { + objId = `${this.idFactory.getDocId()}_${objId}`; + } + } - return this.getOperatorList({ - stream: pattern, - task, - resources: patternResources, - operatorList: tilingOpList, + // Ensure that the dependency is added before the image is decoded. + operatorList.addDependency(objId); + args = [objId, w, h]; + + const imgPromise = PDFImage.buildImage({ + xref: this.xref, + res: resources, + image, + isInline, + pdfFunctionFactory: this._pdfFunctionFactory, + localColorSpaceCache, + }) + .then(imageObj => { + imgData = imageObj.createImageData(/* forceRGBA = */ false); + + return this._sendImgData(objId, imgData, cacheGlobally); }) - .then(function () { - return getTilingPatternIR( - { - fnArray: tilingOpList.fnArray, - argsArray: tilingOpList.argsArray, - }, - patternDict, - args - ); - }) - .then( - function (tilingPatternIR) { - // Add the dependencies to the parent operator list so they are - // resolved before the sub operator list is executed synchronously. - operatorList.addDependencies(tilingOpList.dependencies); - operatorList.addOp(fn, tilingPatternIR); - }, - reason => { - if (reason instanceof AbortException) { - return; - } - if (this.options.ignoreErrors) { - // Error(s) in the TilingPattern -- sending unsupported feature - // notification and allow rendering to continue. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorTilingPattern, - }); - warn(`handleTilingType - ignoring pattern: "${reason}".`); - return; - } - throw reason; - } - ); - }, + .catch(reason => { + warn(`Unable to decode image "${objId}": "${reason}".`); - handleSetFont: function PartialEvaluator_handleSetFont( - resources, - fontArgs, - fontRef, - operatorList, - task, - state - ) { - // TODO(mack): Not needed? - var fontName; - if (fontArgs) { - fontArgs = fontArgs.slice(); - fontName = fontArgs[0].name; - } + return this._sendImgData(objId, /* imgData = */ null, cacheGlobally); + }); - return this.loadFont(fontName, fontRef, resources) - .then(translated => { - if (!translated.font.isType3Font) { - return translated; - } - return translated - .loadType3Data(this, resources, operatorList, task) - .then(function () { - return translated; - }) - .catch(reason => { - // Error in the font data -- sending unsupported feature - // notification. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorFontLoadType3, - }); - return new TranslatedFont({ - loadedName: "g_font_error", - font: new ErrorFont(`Type3 font load error: ${reason}`), - dict: translated.font, - extraProperties: this.options.fontExtraProperties, - }); - }); - }) - .then(translated => { - state.font = translated.font; - translated.send(this.handler); - return translated.loadedName; - }); - }, + if (this.parsingType3Font) { + // In the very rare case where a Type3 image resource is being parsed, + // wait for the image to be both decoded *and* sent to simplify the + // rendering code on the main-thread (see issue10717.pdf). + await imgPromise; + } - handleText(chars, state) { - const font = state.font; - const glyphs = font.charsToGlyphs(chars); + operatorList.addOp(OPS.paintImageXObject, args); + if (cacheKey) { + localImageCache.set(cacheKey, imageRef, { + fn: OPS.paintImageXObject, + args, + }); - if (font.data) { - const isAddToPathSet = !!( - state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG - ); - if ( - isAddToPathSet || - state.fillColorSpace.name === "Pattern" || - font.disableFontFace || - this.options.disableFontFace - ) { - PartialEvaluator.buildFontPaths(font, glyphs, this.handler); + if (imageRef) { + assert(!isInline, "Cannot cache an inline image globally."); + this.globalImageCache.addPageIndex(imageRef, this.pageIndex); + + if (cacheGlobally) { + this.globalImageCache.setData(imageRef, { + objId, + fn: OPS.paintImageXObject, + args, + }); } } - return glyphs; - }, + } + return undefined; + } - ensureStateFont(state) { - if (state.font) { - return; + handleSMask( + smask, + resources, + operatorList, + task, + stateManager, + localColorSpaceCache + ) { + var smaskContent = smask.get("G"); + var smaskOptions = { + subtype: smask.get("S").name, + backdrop: smask.get("BC"), + }; + + // The SMask might have a alpha/luminosity value transfer function -- + // we will build a map of integer values in range 0..255 to be fast. + var transferObj = smask.get("TR"); + if (isPDFFunction(transferObj)) { + const transferFn = this._pdfFunctionFactory.create(transferObj); + var transferMap = new Uint8Array(256); + var tmp = new Float32Array(1); + for (var i = 0; i < 256; i++) { + tmp[0] = i / 255; + transferFn(tmp, 0, tmp, 0); + transferMap[i] = (tmp[0] * 255) | 0; } - const reason = new FormatError( - "Missing setFont (Tf) operator before text rendering operator." - ); + smaskOptions.transferMap = transferMap; + } - if (this.options.ignoreErrors) { - // Missing setFont operator before text rendering operator -- sending - // unsupported feature notification and allow rendering to continue. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorFontState, - }); - warn(`ensureStateFont: "${reason}".`); - return; - } - throw reason; - }, - - setGState: function PartialEvaluator_setGState( + return this.buildFormXObject( resources, - gState, + smaskContent, + smaskOptions, operatorList, task, - stateManager, + stateManager.state.clone(), localColorSpaceCache - ) { - // This array holds the converted/processed state data. - var gStateObj = []; - var gStateKeys = gState.getKeys(); - var promise = Promise.resolve(); - for (var i = 0, ii = gStateKeys.length; i < ii; i++) { - const key = gStateKeys[i]; - const value = gState.get(key); - switch (key) { - case "Type": + ); + } + + handleTilingType( + fn, + args, + resources, + pattern, + patternDict, + operatorList, + task + ) { + // Create an IR of the pattern code. + const tilingOpList = new OperatorList(); + // Merge the available resources, to prevent issues when the patternDict + // is missing some /Resources entries (fixes issue6541.pdf). + const resourcesArray = [patternDict.get("Resources"), resources]; + const patternResources = Dict.merge(this.xref, resourcesArray); + + return this.getOperatorList({ + stream: pattern, + task, + resources: patternResources, + operatorList: tilingOpList, + }) + .then(function () { + return getTilingPatternIR( + { + fnArray: tilingOpList.fnArray, + argsArray: tilingOpList.argsArray, + }, + patternDict, + args + ); + }) + .then( + function (tilingPatternIR) { + // Add the dependencies to the parent operator list so they are + // resolved before the sub operator list is executed synchronously. + operatorList.addDependencies(tilingOpList.dependencies); + operatorList.addOp(fn, tilingPatternIR); + }, + reason => { + if (reason instanceof AbortException) { + return; + } + if (this.options.ignoreErrors) { + // Error(s) in the TilingPattern -- sending unsupported feature + // notification and allow rendering to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorTilingPattern, + }); + warn(`handleTilingType - ignoring pattern: "${reason}".`); + return; + } + throw reason; + } + ); + } + + handleSetFont(resources, fontArgs, fontRef, operatorList, task, state) { + // TODO(mack): Not needed? + var fontName; + if (fontArgs) { + fontArgs = fontArgs.slice(); + fontName = fontArgs[0].name; + } + + return this.loadFont(fontName, fontRef, resources) + .then(translated => { + if (!translated.font.isType3Font) { + return translated; + } + return translated + .loadType3Data(this, resources, operatorList, task) + .then(function () { + return translated; + }) + .catch(reason => { + // Error in the font data -- sending unsupported feature + // notification. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorFontLoadType3, + }); + return new TranslatedFont({ + loadedName: "g_font_error", + font: new ErrorFont(`Type3 font load error: ${reason}`), + dict: translated.font, + extraProperties: this.options.fontExtraProperties, + }); + }); + }) + .then(translated => { + state.font = translated.font; + translated.send(this.handler); + return translated.loadedName; + }); + } + + handleText(chars, state) { + const font = state.font; + const glyphs = font.charsToGlyphs(chars); + + if (font.data) { + const isAddToPathSet = !!( + state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG + ); + if ( + isAddToPathSet || + state.fillColorSpace.name === "Pattern" || + font.disableFontFace || + this.options.disableFontFace + ) { + PartialEvaluator.buildFontPaths(font, glyphs, this.handler); + } + } + return glyphs; + } + + ensureStateFont(state) { + if (state.font) { + return; + } + const reason = new FormatError( + "Missing setFont (Tf) operator before text rendering operator." + ); + + if (this.options.ignoreErrors) { + // Missing setFont operator before text rendering operator -- sending + // unsupported feature notification and allow rendering to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorFontState, + }); + warn(`ensureStateFont: "${reason}".`); + return; + } + throw reason; + } + + setGState( + resources, + gState, + operatorList, + task, + stateManager, + localColorSpaceCache + ) { + // This array holds the converted/processed state data. + var gStateObj = []; + var gStateKeys = gState.getKeys(); + var promise = Promise.resolve(); + for (var i = 0, ii = gStateKeys.length; i < ii; i++) { + const key = gStateKeys[i]; + const value = gState.get(key); + switch (key) { + case "Type": + break; + case "LW": + case "LC": + case "LJ": + case "ML": + case "D": + case "RI": + case "FL": + case "CA": + case "ca": + gStateObj.push([key, value]); + break; + case "Font": + promise = promise.then(() => { + return this.handleSetFont( + resources, + null, + value[0], + operatorList, + task, + stateManager.state + ).then(function (loadedName) { + operatorList.addDependency(loadedName); + gStateObj.push([key, [loadedName, value[1]]]); + }); + }); + break; + case "BM": + gStateObj.push([key, normalizeBlendMode(value)]); + break; + case "SMask": + if (isName(value, "None")) { + gStateObj.push([key, false]); break; - case "LW": - case "LC": - case "LJ": - case "ML": - case "D": - case "RI": - case "FL": - case "CA": - case "ca": - gStateObj.push([key, value]); - break; - case "Font": + } + if (isDict(value)) { promise = promise.then(() => { - return this.handleSetFont( + return this.handleSMask( + value, resources, - null, - value[0], operatorList, task, - stateManager.state - ).then(function (loadedName) { - operatorList.addDependency(loadedName); - gStateObj.push([key, [loadedName, value[1]]]); - }); + stateManager, + localColorSpaceCache + ); }); - break; - case "BM": - gStateObj.push([key, normalizeBlendMode(value)]); - break; - case "SMask": - if (isName(value, "None")) { - gStateObj.push([key, false]); - break; - } - if (isDict(value)) { - promise = promise.then(() => { - return this.handleSMask( - value, - resources, - operatorList, - task, - stateManager, - localColorSpaceCache - ); - }); - gStateObj.push([key, true]); - } else { - warn("Unsupported SMask type"); - } + gStateObj.push([key, true]); + } else { + warn("Unsupported SMask type"); + } - break; - // Only generate info log messages for the following since - // they are unlikely to have a big impact on the rendering. - case "OP": - case "op": - case "OPM": - case "BG": - case "BG2": - case "UCR": - case "UCR2": - case "TR": - case "TR2": - case "HT": - case "SM": - case "SA": - case "AIS": - case "TK": - // TODO implement these operators. - info("graphic state operator " + key); - break; - default: - info("Unknown graphic state operator " + key); - break; - } + break; + // Only generate info log messages for the following since + // they are unlikely to have a big impact on the rendering. + case "OP": + case "op": + case "OPM": + case "BG": + case "BG2": + case "UCR": + case "UCR2": + case "TR": + case "TR2": + case "HT": + case "SM": + case "SA": + case "AIS": + case "TK": + // TODO implement these operators. + info("graphic state operator " + key); + break; + default: + info("Unknown graphic state operator " + key); + break; } - return promise.then(function () { - if (gStateObj.length > 0) { - operatorList.addOp(OPS.setGState, [gStateObj]); - } - }); - }, + } + return promise.then(function () { + if (gStateObj.length > 0) { + operatorList.addOp(OPS.setGState, [gStateObj]); + } + }); + } - loadFont: function PartialEvaluator_loadFont(fontName, font, resources) { - const errorFont = () => { - return Promise.resolve( + loadFont(fontName, font, resources) { + const errorFont = () => { + return Promise.resolve( + new TranslatedFont({ + loadedName: "g_font_error", + font: new ErrorFont(`Font "${fontName}" is not available.`), + dict: font, + extraProperties: this.options.fontExtraProperties, + }) + ); + }; + + var fontRef, + xref = this.xref; + if (font) { + // Loading by ref. + if (!isRef(font)) { + throw new FormatError('The "font" object should be a reference.'); + } + fontRef = font; + } else { + // Loading by name. + var fontRes = resources.get("Font"); + if (fontRes) { + fontRef = fontRes.getRaw(fontName); + } + } + if (!fontRef) { + const partialMsg = `Font "${ + fontName || (font && font.toString()) + }" is not available`; + + if (!this.options.ignoreErrors && !this.parsingType3Font) { + warn(`${partialMsg}.`); + return errorFont(); + } + // Font not found -- sending unsupported feature notification. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorFontMissing, + }); + warn(`${partialMsg} -- attempting to fallback to a default font.`); + + // Falling back to a default font to avoid completely broken rendering, + // but note that there're no guarantees that things will look "correct". + fontRef = PartialEvaluator.fallbackFontDict; + } + + if (this.fontCache.has(fontRef)) { + return this.fontCache.get(fontRef); + } + + font = xref.fetchIfRef(fontRef); + if (!isDict(font)) { + return errorFont(); + } + + // We are holding `font.translated` references just for `fontRef`s that + // are not actually `Ref`s, but rather `Dict`s. See explanation below. + if (font.translated) { + return font.translated; + } + + var fontCapability = createPromiseCapability(); + + var preEvaluatedFont = this.preEvaluateFont(font); + const { descriptor, hash } = preEvaluatedFont; + + var fontRefIsRef = isRef(fontRef), + fontID; + if (fontRefIsRef) { + fontID = fontRef.toString(); + } + + if (hash && isDict(descriptor)) { + if (!descriptor.fontAliases) { + descriptor.fontAliases = Object.create(null); + } + var fontAliases = descriptor.fontAliases; + + if (fontAliases[hash]) { + var aliasFontRef = fontAliases[hash].aliasRef; + if (fontRefIsRef && aliasFontRef && this.fontCache.has(aliasFontRef)) { + this.fontCache.putAlias(fontRef, aliasFontRef); + return this.fontCache.get(fontRef); + } + } else { + fontAliases[hash] = { + fontID: Font.getFontID(), + }; + } + + if (fontRefIsRef) { + fontAliases[hash].aliasRef = fontRef; + } + fontID = fontAliases[hash].fontID; + } + + // Workaround for bad PDF generators that reference fonts incorrectly, + // where `fontRef` is a `Dict` rather than a `Ref` (fixes bug946506.pdf). + // In this case we should not put the font into `this.fontCache` (which is + // a `RefSetCache`), since it's not meaningful to use a `Dict` as a key. + // + // However, if we don't cache the font it's not possible to remove it + // when `cleanup` is triggered from the API, which causes issues on + // subsequent rendering operations (see issue7403.pdf). + // A simple workaround would be to just not hold `font.translated` + // references in this case, but this would force us to unnecessarily load + // the same fonts over and over. + // + // Instead, we cheat a bit by attempting to use a modified `fontID` as a + // key in `this.fontCache`, to allow the font to be cached. + // NOTE: This works because `RefSetCache` calls `toString()` on provided + // keys. Also, since `fontRef` is used when getting cached fonts, + // we'll not accidentally match fonts cached with the `fontID`. + if (fontRefIsRef) { + this.fontCache.put(fontRef, fontCapability.promise); + } else { + if (!fontID) { + fontID = this.idFactory.createObjId(); + } + this.fontCache.put(`id_${fontID}`, fontCapability.promise); + } + assert(fontID, 'The "fontID" must be defined.'); + + // Keep track of each font we translated so the caller can + // load them asynchronously before calling display on a page. + font.loadedName = `${this.idFactory.getDocId()}_f${fontID}`; + + font.translated = fontCapability.promise; + + // TODO move promises into translate font + var translatedPromise; + try { + translatedPromise = this.translateFont(preEvaluatedFont); + } catch (e) { + translatedPromise = Promise.reject(e); + } + + translatedPromise + .then(translatedFont => { + if (translatedFont.fontType !== undefined) { + var xrefFontStats = xref.stats.fontTypes; + xrefFontStats[translatedFont.fontType] = true; + } + + fontCapability.resolve( new TranslatedFont({ - loadedName: "g_font_error", - font: new ErrorFont(`Font "${fontName}" is not available.`), + loadedName: font.loadedName, + font: translatedFont, dict: font, extraProperties: this.options.fontExtraProperties, }) ); - }; - - var fontRef, - xref = this.xref; - if (font) { - // Loading by ref. - if (!isRef(font)) { - throw new FormatError('The "font" object should be a reference.'); - } - fontRef = font; - } else { - // Loading by name. - var fontRes = resources.get("Font"); - if (fontRes) { - fontRef = fontRes.getRaw(fontName); - } - } - if (!fontRef) { - const partialMsg = `Font "${ - fontName || (font && font.toString()) - }" is not available`; - - if (!this.options.ignoreErrors && !this.parsingType3Font) { - warn(`${partialMsg}.`); - return errorFont(); - } - // Font not found -- sending unsupported feature notification. + }) + .catch(reason => { + // TODO fontCapability.reject? + // Error in the font data -- sending unsupported feature notification. this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorFontMissing, + featureId: UNSUPPORTED_FEATURES.errorFontTranslate, }); - warn(`${partialMsg} -- attempting to fallback to a default font.`); - // Falling back to a default font to avoid completely broken rendering, - // but note that there're no guarantees that things will look "correct". - fontRef = PartialEvaluator.getFallbackFontDict(); - } - - if (this.fontCache.has(fontRef)) { - return this.fontCache.get(fontRef); - } - - font = xref.fetchIfRef(fontRef); - if (!isDict(font)) { - return errorFont(); - } - - // We are holding `font.translated` references just for `fontRef`s that - // are not actually `Ref`s, but rather `Dict`s. See explanation below. - if (font.translated) { - return font.translated; - } - - var fontCapability = createPromiseCapability(); - - var preEvaluatedFont = this.preEvaluateFont(font); - const { descriptor, hash } = preEvaluatedFont; - - var fontRefIsRef = isRef(fontRef), - fontID; - if (fontRefIsRef) { - fontID = fontRef.toString(); - } - - if (hash && isDict(descriptor)) { - if (!descriptor.fontAliases) { - descriptor.fontAliases = Object.create(null); - } - var fontAliases = descriptor.fontAliases; - - if (fontAliases[hash]) { - var aliasFontRef = fontAliases[hash].aliasRef; - if ( - fontRefIsRef && - aliasFontRef && - this.fontCache.has(aliasFontRef) - ) { - this.fontCache.putAlias(fontRef, aliasFontRef); - return this.fontCache.get(fontRef); - } - } else { - fontAliases[hash] = { - fontID: Font.getFontID(), - }; - } - - if (fontRefIsRef) { - fontAliases[hash].aliasRef = fontRef; - } - fontID = fontAliases[hash].fontID; - } - - // Workaround for bad PDF generators that reference fonts incorrectly, - // where `fontRef` is a `Dict` rather than a `Ref` (fixes bug946506.pdf). - // In this case we should not put the font into `this.fontCache` (which is - // a `RefSetCache`), since it's not meaningful to use a `Dict` as a key. - // - // However, if we don't cache the font it's not possible to remove it - // when `cleanup` is triggered from the API, which causes issues on - // subsequent rendering operations (see issue7403.pdf). - // A simple workaround would be to just not hold `font.translated` - // references in this case, but this would force us to unnecessarily load - // the same fonts over and over. - // - // Instead, we cheat a bit by attempting to use a modified `fontID` as a - // key in `this.fontCache`, to allow the font to be cached. - // NOTE: This works because `RefSetCache` calls `toString()` on provided - // keys. Also, since `fontRef` is used when getting cached fonts, - // we'll not accidentally match fonts cached with the `fontID`. - if (fontRefIsRef) { - this.fontCache.put(fontRef, fontCapability.promise); - } else { - if (!fontID) { - fontID = this.idFactory.createObjId(); - } - this.fontCache.put(`id_${fontID}`, fontCapability.promise); - } - assert(fontID, 'The "fontID" must be defined.'); - - // Keep track of each font we translated so the caller can - // load them asynchronously before calling display on a page. - font.loadedName = `${this.idFactory.getDocId()}_f${fontID}`; - - font.translated = fontCapability.promise; - - // TODO move promises into translate font - var translatedPromise; - try { - translatedPromise = this.translateFont(preEvaluatedFont); - } catch (e) { - translatedPromise = Promise.reject(e); - } - - translatedPromise - .then(translatedFont => { - if (translatedFont.fontType !== undefined) { - var xrefFontStats = xref.stats.fontTypes; - xrefFontStats[translatedFont.fontType] = true; - } - - fontCapability.resolve( - new TranslatedFont({ - loadedName: font.loadedName, - font: translatedFont, - dict: font, - extraProperties: this.options.fontExtraProperties, - }) + try { + // error, but it's still nice to have font type reported + var fontFile3 = descriptor && descriptor.get("FontFile3"); + var subtype = fontFile3 && fontFile3.get("Subtype"); + var fontType = getFontType( + preEvaluatedFont.type, + subtype && subtype.name ); - }) - .catch(reason => { - // TODO fontCapability.reject? - // Error in the font data -- sending unsupported feature notification. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorFontTranslate, - }); + var xrefFontStats = xref.stats.fontTypes; + xrefFontStats[fontType] = true; + } catch (ex) {} - try { - // error, but it's still nice to have font type reported - var fontFile3 = descriptor && descriptor.get("FontFile3"); - var subtype = fontFile3 && fontFile3.get("Subtype"); - var fontType = getFontType( - preEvaluatedFont.type, - subtype && subtype.name - ); - var xrefFontStats = xref.stats.fontTypes; - xrefFontStats[fontType] = true; - } catch (ex) {} - - fontCapability.resolve( - new TranslatedFont({ - loadedName: font.loadedName, - font: new ErrorFont( - reason instanceof Error ? reason.message : reason - ), - dict: font, - extraProperties: this.options.fontExtraProperties, - }) - ); - }); - return fontCapability.promise; - }, - - buildPath(operatorList, fn, args, parsingText = false) { - var lastIndex = operatorList.length - 1; - if (!args) { - args = []; - } - if ( - lastIndex < 0 || - operatorList.fnArray[lastIndex] !== OPS.constructPath - ) { - // Handle corrupt PDF documents that contains path operators inside of - // text objects, which may shift subsequent text, by enclosing the path - // operator in save/restore operators (fixes issue10542_reduced.pdf). - // - // Note that this will effectively disable the optimization in the - // `else` branch below, but given that this type of corruption is - // *extremely* rare that shouldn't really matter much in practice. - if (parsingText) { - warn(`Encountered path operator "${fn}" inside of a text object.`); - operatorList.addOp(OPS.save, null); - } - - operatorList.addOp(OPS.constructPath, [[fn], args]); - - if (parsingText) { - operatorList.addOp(OPS.restore, null); - } - } else { - var opArgs = operatorList.argsArray[lastIndex]; - opArgs[0].push(fn); - Array.prototype.push.apply(opArgs[1], args); - } - }, - - parseColorSpace({ cs, resources, localColorSpaceCache }) { - return ColorSpace.parseAsync({ - cs, - xref: this.xref, - resources, - pdfFunctionFactory: this._pdfFunctionFactory, - localColorSpaceCache, - }).catch(reason => { - if (reason instanceof AbortException) { - return null; - } - if (this.options.ignoreErrors) { - // Error(s) in the ColorSpace -- sending unsupported feature - // notification and allow rendering to continue. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorColorSpace, - }); - warn(`parseColorSpace - ignoring ColorSpace: "${reason}".`); - return null; - } - throw reason; + fontCapability.resolve( + new TranslatedFont({ + loadedName: font.loadedName, + font: new ErrorFont( + reason instanceof Error ? reason.message : reason + ), + dict: font, + extraProperties: this.options.fontExtraProperties, + }) + ); }); - }, + return fontCapability.promise; + } - async handleColorN( - operatorList, - fn, - args, - cs, - patterns, - resources, - task, - localColorSpaceCache + buildPath(operatorList, fn, args, parsingText = false) { + var lastIndex = operatorList.length - 1; + if (!args) { + args = []; + } + if ( + lastIndex < 0 || + operatorList.fnArray[lastIndex] !== OPS.constructPath ) { - // compile tiling patterns - var patternName = args[args.length - 1]; - // SCN/scn applies patterns along with normal colors - var pattern; - if (isName(patternName) && (pattern = patterns.get(patternName.name))) { - var dict = isStream(pattern) ? pattern.dict : pattern; - var typeNum = dict.get("PatternType"); - - if (typeNum === PatternType.TILING) { - var color = cs.base ? cs.base.getRgb(args, 0) : null; - return this.handleTilingType( - fn, - color, - resources, - pattern, - dict, - operatorList, - task - ); - } else if (typeNum === PatternType.SHADING) { - var shading = dict.get("Shading"); - var matrix = dict.getArray("Matrix"); - pattern = Pattern.parseShading( - shading, - matrix, - this.xref, - resources, - this.handler, - this._pdfFunctionFactory, - localColorSpaceCache - ); - operatorList.addOp(fn, pattern.getIR()); - return undefined; - } - throw new FormatError(`Unknown PatternType: ${typeNum}`); + // Handle corrupt PDF documents that contains path operators inside of + // text objects, which may shift subsequent text, by enclosing the path + // operator in save/restore operators (fixes issue10542_reduced.pdf). + // + // Note that this will effectively disable the optimization in the + // `else` branch below, but given that this type of corruption is + // *extremely* rare that shouldn't really matter much in practice. + if (parsingText) { + warn(`Encountered path operator "${fn}" inside of a text object.`); + operatorList.addOp(OPS.save, null); } - throw new FormatError(`Unknown PatternName: ${patternName}`); - }, - getOperatorList({ - stream, - task, + operatorList.addOp(OPS.constructPath, [[fn], args]); + + if (parsingText) { + operatorList.addOp(OPS.restore, null); + } + } else { + var opArgs = operatorList.argsArray[lastIndex]; + opArgs[0].push(fn); + Array.prototype.push.apply(opArgs[1], args); + } + } + + parseColorSpace({ cs, resources, localColorSpaceCache }) { + return ColorSpace.parseAsync({ + cs, + xref: this.xref, resources, - operatorList, - initialState = null, - }) { - // Ensure that `resources`/`initialState` is correctly initialized, - // even if the provided parameter is e.g. `null`. - resources = resources || Dict.empty; - initialState = initialState || new EvalState(); - - if (!operatorList) { - throw new Error('getOperatorList: missing "operatorList" parameter'); + pdfFunctionFactory: this._pdfFunctionFactory, + localColorSpaceCache, + }).catch(reason => { + if (reason instanceof AbortException) { + return null; } - - var self = this; - var xref = this.xref; - let parsingText = false; - const localImageCache = new LocalImageCache(); - const localColorSpaceCache = new LocalColorSpaceCache(); - - var xobjs = resources.get("XObject") || Dict.empty; - var patterns = resources.get("Pattern") || Dict.empty; - var stateManager = new StateManager(initialState); - var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); - var timeSlotManager = new TimeSlotManager(); - - function closePendingRestoreOPS(argument) { - for (var i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) { - operatorList.addOp(OPS.restore, []); - } + if (this.options.ignoreErrors) { + // Error(s) in the ColorSpace -- sending unsupported feature + // notification and allow rendering to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorColorSpace, + }); + warn(`parseColorSpace - ignoring ColorSpace: "${reason}".`); + return null; } + throw reason; + }); + } - return new Promise(function promiseBody(resolve, reject) { - const next = function (promise) { - Promise.all([promise, operatorList.ready]).then(function () { - try { - promiseBody(resolve, reject); - } catch (ex) { - reject(ex); - } - }, reject); - }; - task.ensureNotTerminated(); - timeSlotManager.reset(); - var stop, - operation = {}, - i, - ii, - cs; - while (!(stop = timeSlotManager.check())) { - // The arguments parsed by read() are used beyond this loop, so we - // cannot reuse the same array on each iteration. Therefore we pass - // in |null| as the initial value (see the comment on - // EvaluatorPreprocessor_read() for why). - operation.args = null; - if (!preprocessor.read(operation)) { - break; + async handleColorN( + operatorList, + fn, + args, + cs, + patterns, + resources, + task, + localColorSpaceCache + ) { + // compile tiling patterns + var patternName = args[args.length - 1]; + // SCN/scn applies patterns along with normal colors + var pattern; + if (isName(patternName) && (pattern = patterns.get(patternName.name))) { + var dict = isStream(pattern) ? pattern.dict : pattern; + var typeNum = dict.get("PatternType"); + + if (typeNum === PatternType.TILING) { + var color = cs.base ? cs.base.getRgb(args, 0) : null; + return this.handleTilingType( + fn, + color, + resources, + pattern, + dict, + operatorList, + task + ); + } else if (typeNum === PatternType.SHADING) { + var shading = dict.get("Shading"); + var matrix = dict.getArray("Matrix"); + pattern = Pattern.parseShading( + shading, + matrix, + this.xref, + resources, + this.handler, + this._pdfFunctionFactory, + localColorSpaceCache + ); + operatorList.addOp(fn, pattern.getIR()); + return undefined; + } + throw new FormatError(`Unknown PatternType: ${typeNum}`); + } + throw new FormatError(`Unknown PatternName: ${patternName}`); + } + + getOperatorList({ + stream, + task, + resources, + operatorList, + initialState = null, + }) { + // Ensure that `resources`/`initialState` is correctly initialized, + // even if the provided parameter is e.g. `null`. + resources = resources || Dict.empty; + initialState = initialState || new EvalState(); + + if (!operatorList) { + throw new Error('getOperatorList: missing "operatorList" parameter'); + } + + var self = this; + var xref = this.xref; + let parsingText = false; + const localImageCache = new LocalImageCache(); + const localColorSpaceCache = new LocalColorSpaceCache(); + + var xobjs = resources.get("XObject") || Dict.empty; + var patterns = resources.get("Pattern") || Dict.empty; + var stateManager = new StateManager(initialState); + var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); + var timeSlotManager = new TimeSlotManager(); + + function closePendingRestoreOPS(argument) { + for (var i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) { + operatorList.addOp(OPS.restore, []); + } + } + + return new Promise(function promiseBody(resolve, reject) { + const next = function (promise) { + Promise.all([promise, operatorList.ready]).then(function () { + try { + promiseBody(resolve, reject); + } catch (ex) { + reject(ex); } - var args = operation.args; - var fn = operation.fn; + }, reject); + }; + task.ensureNotTerminated(); + timeSlotManager.reset(); + var stop, + operation = {}, + i, + ii, + cs; + while (!(stop = timeSlotManager.check())) { + // The arguments parsed by read() are used beyond this loop, so we + // cannot reuse the same array on each iteration. Therefore we pass + // in |null| as the initial value (see the comment on + // EvaluatorPreprocessor_read() for why). + operation.args = null; + if (!preprocessor.read(operation)) { + break; + } + var args = operation.args; + var fn = operation.fn; - switch (fn | 0) { - case OPS.paintXObject: - // eagerly compile XForm objects - var name = args[0].name; - if (name) { - const localImage = localImageCache.getByName(name); - if (localImage) { - operatorList.addOp(localImage.fn, localImage.args); - args = null; - continue; - } + switch (fn | 0) { + case OPS.paintXObject: + // eagerly compile XForm objects + var name = args[0].name; + if (name) { + const localImage = localImageCache.getByName(name); + if (localImage) { + operatorList.addOp(localImage.fn, localImage.args); + args = null; + continue; } + } - next( - new Promise(function (resolveXObject, rejectXObject) { - if (!name) { - throw new FormatError( - "XObject must be referred to by name." - ); - } + next( + new Promise(function (resolveXObject, rejectXObject) { + if (!name) { + throw new FormatError("XObject must be referred to by name."); + } - let xobj = xobjs.getRaw(name); - if (xobj instanceof Ref) { - const localImage = localImageCache.getByRef(xobj); - if (localImage) { - operatorList.addOp(localImage.fn, localImage.args); + let xobj = xobjs.getRaw(name); + if (xobj instanceof Ref) { + const localImage = localImageCache.getByRef(xobj); + if (localImage) { + operatorList.addOp(localImage.fn, localImage.args); - resolveXObject(); - return; - } - - const globalImage = self.globalImageCache.getData( - xobj, - self.pageIndex - ); - if (globalImage) { - operatorList.addDependency(globalImage.objId); - operatorList.addOp(globalImage.fn, globalImage.args); - - resolveXObject(); - return; - } - - xobj = xref.fetch(xobj); - } - - if (!xobj) { - operatorList.addOp(fn, args); resolveXObject(); return; } - if (!isStream(xobj)) { - throw new FormatError("XObject should be a stream"); - } - const type = xobj.dict.get("Subtype"); - if (!isName(type)) { - throw new FormatError("XObject should have a Name subtype"); - } - - if (type.name === "Form") { - stateManager.save(); - self - .buildFormXObject( - resources, - xobj, - null, - operatorList, - task, - stateManager.state.clone(), - localColorSpaceCache - ) - .then(function () { - stateManager.restore(); - resolveXObject(); - }, rejectXObject); - return; - } else if (type.name === "Image") { - self - .buildPaintImageXObject({ - resources, - image: xobj, - operatorList, - cacheKey: name, - localImageCache, - localColorSpaceCache, - }) - .then(resolveXObject, rejectXObject); - return; - } else if (type.name === "PS") { - // PostScript XObjects are unused when viewing documents. - // See section 4.7.1 of Adobe's PDF reference. - info("Ignored XObject subtype PS"); - } else { - throw new FormatError( - `Unhandled XObject subtype ${type.name}` - ); - } - resolveXObject(); - }).catch(function (reason) { - if (reason instanceof AbortException) { - return; - } - if (self.options.ignoreErrors) { - // Error(s) in the XObject -- sending unsupported feature - // notification and allow rendering to continue. - self.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorXObject, - }); - warn(`getOperatorList - ignoring XObject: "${reason}".`); - return; - } - throw reason; - }) - ); - return; - case OPS.setFont: - var fontSize = args[1]; - // eagerly collect all fonts - next( - self - .handleSetFont( - resources, - args, - null, - operatorList, - task, - stateManager.state - ) - .then(function (loadedName) { - operatorList.addDependency(loadedName); - operatorList.addOp(OPS.setFont, [loadedName, fontSize]); - }) - ); - return; - case OPS.beginText: - parsingText = true; - break; - case OPS.endText: - parsingText = false; - break; - case OPS.endInlineImage: - var cacheKey = args[0].cacheKey; - if (cacheKey) { - const localImage = localImageCache.getByName(cacheKey); - if (localImage) { - operatorList.addOp(localImage.fn, localImage.args); - args = null; - continue; - } - } - next( - self.buildPaintImageXObject({ - resources, - image: args[0], - isInline: true, - operatorList, - cacheKey, - localImageCache, - localColorSpaceCache, - }) - ); - return; - case OPS.showText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - args[0] = self.handleText(args[0], stateManager.state); - break; - case OPS.showSpacedText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - var arr = args[0]; - var combinedGlyphs = []; - var arrLength = arr.length; - var state = stateManager.state; - for (i = 0; i < arrLength; ++i) { - var arrItem = arr[i]; - if (isString(arrItem)) { - Array.prototype.push.apply( - combinedGlyphs, - self.handleText(arrItem, state) + const globalImage = self.globalImageCache.getData( + xobj, + self.pageIndex ); - } else if (isNum(arrItem)) { - combinedGlyphs.push(arrItem); + if (globalImage) { + operatorList.addDependency(globalImage.objId); + operatorList.addOp(globalImage.fn, globalImage.args); + + resolveXObject(); + return; + } + + xobj = xref.fetch(xobj); } - } - args[0] = combinedGlyphs; - fn = OPS.showText; - break; - case OPS.nextLineShowText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - operatorList.addOp(OPS.nextLine); - args[0] = self.handleText(args[0], stateManager.state); - fn = OPS.showText; - break; - case OPS.nextLineSetSpacingShowText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - operatorList.addOp(OPS.nextLine); - operatorList.addOp(OPS.setWordSpacing, [args.shift()]); - operatorList.addOp(OPS.setCharSpacing, [args.shift()]); - args[0] = self.handleText(args[0], stateManager.state); - fn = OPS.showText; - break; - case OPS.setTextRenderingMode: - stateManager.state.textRenderingMode = args[0]; - break; - case OPS.setFillColorSpace: { - const cachedColorSpace = ColorSpace.getCached( - args[0], - xref, - localColorSpaceCache - ); - if (cachedColorSpace) { - stateManager.state.fillColorSpace = cachedColorSpace; - continue; - } + if (!xobj) { + operatorList.addOp(fn, args); + resolveXObject(); + return; + } + if (!isStream(xobj)) { + throw new FormatError("XObject should be a stream"); + } - next( - self - .parseColorSpace({ - cs: args[0], - resources, - localColorSpaceCache, - }) - .then(function (colorSpace) { - if (colorSpace) { - stateManager.state.fillColorSpace = colorSpace; - } - }) - ); - return; - } - case OPS.setStrokeColorSpace: { - const cachedColorSpace = ColorSpace.getCached( - args[0], - xref, - localColorSpaceCache - ); - if (cachedColorSpace) { - stateManager.state.strokeColorSpace = cachedColorSpace; - continue; - } + const type = xobj.dict.get("Subtype"); + if (!isName(type)) { + throw new FormatError("XObject should have a Name subtype"); + } - next( - self - .parseColorSpace({ - cs: args[0], - resources, - localColorSpaceCache, - }) - .then(function (colorSpace) { - if (colorSpace) { - stateManager.state.strokeColorSpace = colorSpace; - } - }) - ); - return; - } - case OPS.setFillColor: - cs = stateManager.state.fillColorSpace; - args = cs.getRgb(args, 0); - fn = OPS.setFillRGBColor; - break; - case OPS.setStrokeColor: - cs = stateManager.state.strokeColorSpace; - args = cs.getRgb(args, 0); - fn = OPS.setStrokeRGBColor; - break; - case OPS.setFillGray: - stateManager.state.fillColorSpace = ColorSpace.singletons.gray; - args = ColorSpace.singletons.gray.getRgb(args, 0); - fn = OPS.setFillRGBColor; - break; - case OPS.setStrokeGray: - stateManager.state.strokeColorSpace = ColorSpace.singletons.gray; - args = ColorSpace.singletons.gray.getRgb(args, 0); - fn = OPS.setStrokeRGBColor; - break; - case OPS.setFillCMYKColor: - stateManager.state.fillColorSpace = ColorSpace.singletons.cmyk; - args = ColorSpace.singletons.cmyk.getRgb(args, 0); - fn = OPS.setFillRGBColor; - break; - case OPS.setStrokeCMYKColor: - stateManager.state.strokeColorSpace = ColorSpace.singletons.cmyk; - args = ColorSpace.singletons.cmyk.getRgb(args, 0); - fn = OPS.setStrokeRGBColor; - break; - case OPS.setFillRGBColor: - stateManager.state.fillColorSpace = ColorSpace.singletons.rgb; - args = ColorSpace.singletons.rgb.getRgb(args, 0); - break; - case OPS.setStrokeRGBColor: - stateManager.state.strokeColorSpace = ColorSpace.singletons.rgb; - args = ColorSpace.singletons.rgb.getRgb(args, 0); - break; - case OPS.setFillColorN: - cs = stateManager.state.fillColorSpace; - if (cs.name === "Pattern") { - next( - self.handleColorN( - operatorList, - OPS.setFillColorN, - args, - cs, - patterns, - resources, - task, - localColorSpaceCache - ) - ); - return; - } - args = cs.getRgb(args, 0); - fn = OPS.setFillRGBColor; - break; - case OPS.setStrokeColorN: - cs = stateManager.state.strokeColorSpace; - if (cs.name === "Pattern") { - next( - self.handleColorN( - operatorList, - OPS.setStrokeColorN, - args, - cs, - patterns, - resources, - task, - localColorSpaceCache - ) - ); - return; - } - args = cs.getRgb(args, 0); - fn = OPS.setStrokeRGBColor; - break; - - case OPS.shadingFill: - var shadingRes = resources.get("Shading"); - if (!shadingRes) { - throw new FormatError("No shading resource found"); - } - - var shading = shadingRes.get(args[0].name); - if (!shading) { - throw new FormatError("No shading object found"); - } - - var shadingFill = Pattern.parseShading( - shading, - null, - xref, - resources, - self.handler, - self._pdfFunctionFactory, - localColorSpaceCache - ); - var patternIR = shadingFill.getIR(); - args = [patternIR]; - fn = OPS.shadingFill; - break; - case OPS.setGState: - var dictName = args[0]; - var extGState = resources.get("ExtGState"); - - if (!isDict(extGState) || !extGState.has(dictName.name)) { - break; - } - - var gState = extGState.get(dictName.name); - next( - self.setGState( + if (type.name === "Form") { + stateManager.save(); + self + .buildFormXObject( + resources, + xobj, + null, + operatorList, + task, + stateManager.state.clone(), + localColorSpaceCache + ) + .then(function () { + stateManager.restore(); + resolveXObject(); + }, rejectXObject); + return; + } else if (type.name === "Image") { + self + .buildPaintImageXObject({ + resources, + image: xobj, + operatorList, + cacheKey: name, + localImageCache, + localColorSpaceCache, + }) + .then(resolveXObject, rejectXObject); + return; + } else if (type.name === "PS") { + // PostScript XObjects are unused when viewing documents. + // See section 4.7.1 of Adobe's PDF reference. + info("Ignored XObject subtype PS"); + } else { + throw new FormatError( + `Unhandled XObject subtype ${type.name}` + ); + } + resolveXObject(); + }).catch(function (reason) { + if (reason instanceof AbortException) { + return; + } + if (self.options.ignoreErrors) { + // Error(s) in the XObject -- sending unsupported feature + // notification and allow rendering to continue. + self.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorXObject, + }); + warn(`getOperatorList - ignoring XObject: "${reason}".`); + return; + } + throw reason; + }) + ); + return; + case OPS.setFont: + var fontSize = args[1]; + // eagerly collect all fonts + next( + self + .handleSetFont( resources, - gState, + args, + null, operatorList, task, - stateManager, + stateManager.state + ) + .then(function (loadedName) { + operatorList.addDependency(loadedName); + operatorList.addOp(OPS.setFont, [loadedName, fontSize]); + }) + ); + return; + case OPS.beginText: + parsingText = true; + break; + case OPS.endText: + parsingText = false; + break; + case OPS.endInlineImage: + var cacheKey = args[0].cacheKey; + if (cacheKey) { + const localImage = localImageCache.getByName(cacheKey); + if (localImage) { + operatorList.addOp(localImage.fn, localImage.args); + args = null; + continue; + } + } + next( + self.buildPaintImageXObject({ + resources, + image: args[0], + isInline: true, + operatorList, + cacheKey, + localImageCache, + localColorSpaceCache, + }) + ); + return; + case OPS.showText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + args[0] = self.handleText(args[0], stateManager.state); + break; + case OPS.showSpacedText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + var arr = args[0]; + var combinedGlyphs = []; + var arrLength = arr.length; + var state = stateManager.state; + for (i = 0; i < arrLength; ++i) { + var arrItem = arr[i]; + if (isString(arrItem)) { + Array.prototype.push.apply( + combinedGlyphs, + self.handleText(arrItem, state) + ); + } else if (isNum(arrItem)) { + combinedGlyphs.push(arrItem); + } + } + args[0] = combinedGlyphs; + fn = OPS.showText; + break; + case OPS.nextLineShowText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + operatorList.addOp(OPS.nextLine); + args[0] = self.handleText(args[0], stateManager.state); + fn = OPS.showText; + break; + case OPS.nextLineSetSpacingShowText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + operatorList.addOp(OPS.nextLine); + operatorList.addOp(OPS.setWordSpacing, [args.shift()]); + operatorList.addOp(OPS.setCharSpacing, [args.shift()]); + args[0] = self.handleText(args[0], stateManager.state); + fn = OPS.showText; + break; + case OPS.setTextRenderingMode: + stateManager.state.textRenderingMode = args[0]; + break; + + case OPS.setFillColorSpace: { + const cachedColorSpace = ColorSpace.getCached( + args[0], + xref, + localColorSpaceCache + ); + if (cachedColorSpace) { + stateManager.state.fillColorSpace = cachedColorSpace; + continue; + } + + next( + self + .parseColorSpace({ + cs: args[0], + resources, + localColorSpaceCache, + }) + .then(function (colorSpace) { + if (colorSpace) { + stateManager.state.fillColorSpace = colorSpace; + } + }) + ); + return; + } + case OPS.setStrokeColorSpace: { + const cachedColorSpace = ColorSpace.getCached( + args[0], + xref, + localColorSpaceCache + ); + if (cachedColorSpace) { + stateManager.state.strokeColorSpace = cachedColorSpace; + continue; + } + + next( + self + .parseColorSpace({ + cs: args[0], + resources, + localColorSpaceCache, + }) + .then(function (colorSpace) { + if (colorSpace) { + stateManager.state.strokeColorSpace = colorSpace; + } + }) + ); + return; + } + case OPS.setFillColor: + cs = stateManager.state.fillColorSpace; + args = cs.getRgb(args, 0); + fn = OPS.setFillRGBColor; + break; + case OPS.setStrokeColor: + cs = stateManager.state.strokeColorSpace; + args = cs.getRgb(args, 0); + fn = OPS.setStrokeRGBColor; + break; + case OPS.setFillGray: + stateManager.state.fillColorSpace = ColorSpace.singletons.gray; + args = ColorSpace.singletons.gray.getRgb(args, 0); + fn = OPS.setFillRGBColor; + break; + case OPS.setStrokeGray: + stateManager.state.strokeColorSpace = ColorSpace.singletons.gray; + args = ColorSpace.singletons.gray.getRgb(args, 0); + fn = OPS.setStrokeRGBColor; + break; + case OPS.setFillCMYKColor: + stateManager.state.fillColorSpace = ColorSpace.singletons.cmyk; + args = ColorSpace.singletons.cmyk.getRgb(args, 0); + fn = OPS.setFillRGBColor; + break; + case OPS.setStrokeCMYKColor: + stateManager.state.strokeColorSpace = ColorSpace.singletons.cmyk; + args = ColorSpace.singletons.cmyk.getRgb(args, 0); + fn = OPS.setStrokeRGBColor; + break; + case OPS.setFillRGBColor: + stateManager.state.fillColorSpace = ColorSpace.singletons.rgb; + args = ColorSpace.singletons.rgb.getRgb(args, 0); + break; + case OPS.setStrokeRGBColor: + stateManager.state.strokeColorSpace = ColorSpace.singletons.rgb; + args = ColorSpace.singletons.rgb.getRgb(args, 0); + break; + case OPS.setFillColorN: + cs = stateManager.state.fillColorSpace; + if (cs.name === "Pattern") { + next( + self.handleColorN( + operatorList, + OPS.setFillColorN, + args, + cs, + patterns, + resources, + task, localColorSpaceCache ) ); return; - case OPS.moveTo: - case OPS.lineTo: - case OPS.curveTo: - case OPS.curveTo2: - case OPS.curveTo3: - case OPS.closePath: - case OPS.rectangle: - self.buildPath(operatorList, fn, args, parsingText); - continue; - case OPS.markPoint: - case OPS.markPointProps: - case OPS.beginMarkedContent: - case OPS.beginMarkedContentProps: - case OPS.endMarkedContent: - case OPS.beginCompat: - case OPS.endCompat: - // Ignore operators where the corresponding handlers are known to - // be no-op in CanvasGraphics (display/canvas.js). This prevents - // serialization errors and is also a bit more efficient. - // We could also try to serialize all objects in a general way, - // e.g. as done in https://github.com/mozilla/pdf.js/pull/6266, - // but doing so is meaningless without knowing the semantics. - continue; - default: - // Note: Ignore the operator if it has `Dict` arguments, since - // those are non-serializable, otherwise postMessage will throw - // "An object could not be cloned.". - if (args !== null) { - for (i = 0, ii = args.length; i < ii; i++) { - if (args[i] instanceof Dict) { - break; - } - } - if (i < ii) { - warn("getOperatorList - ignoring operator: " + fn); - continue; - } - } - } - operatorList.addOp(fn, args); - } - if (stop) { - next(deferred); - return; - } - // Some PDFs don't close all restores inside object/form. - // Closing those for them. - closePendingRestoreOPS(); - resolve(); - }).catch(reason => { - if (reason instanceof AbortException) { - return; - } - if (this.options.ignoreErrors) { - // Error(s) in the OperatorList -- sending unsupported feature - // notification and allow rendering to continue. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorOperatorList, - }); - warn( - `getOperatorList - ignoring errors during "${task.name}" ` + - `task: "${reason}".` - ); - - closePendingRestoreOPS(); - return; - } - throw reason; - }); - }, - - getTextContent({ - stream, - task, - resources, - stateManager = null, - normalizeWhitespace = false, - combineTextItems = false, - sink, - seenStyles = Object.create(null), - }) { - // Ensure that `resources`/`stateManager` is correctly initialized, - // even if the provided parameter is e.g. `null`. - resources = resources || Dict.empty; - stateManager = stateManager || new StateManager(new TextState()); - - var WhitespaceRegexp = /\s/g; - - var textContent = { - items: [], - styles: Object.create(null), - }; - var textContentItem = { - initialized: false, - str: [], - width: 0, - height: 0, - vertical: false, - lastAdvanceWidth: 0, - lastAdvanceHeight: 0, - textAdvanceScale: 0, - spaceWidth: 0, - fakeSpaceMin: Infinity, - fakeMultiSpaceMin: Infinity, - fakeMultiSpaceMax: -0, - textRunBreakAllowed: false, - transform: null, - fontName: null, - }; - var SPACE_FACTOR = 0.3; - var MULTI_SPACE_FACTOR = 1.5; - var MULTI_SPACE_FACTOR_MAX = 4; - - var self = this; - var xref = this.xref; - - // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. - var xobjs = null; - const emptyXObjectCache = new LocalImageCache(); - - var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); - - var textState; - - function ensureTextContentItem() { - if (textContentItem.initialized) { - return textContentItem; - } - var font = textState.font; - if (!(font.loadedName in seenStyles)) { - seenStyles[font.loadedName] = true; - textContent.styles[font.loadedName] = { - fontFamily: font.fallbackName, - ascent: font.ascent, - descent: font.descent, - vertical: font.vertical, - }; - } - textContentItem.fontName = font.loadedName; - - // 9.4.4 Text Space Details - var tsm = [ - textState.fontSize * textState.textHScale, - 0, - 0, - textState.fontSize, - 0, - textState.textRise, - ]; - - if ( - font.isType3Font && - textState.fontSize <= 1 && - !isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX) - ) { - const glyphHeight = font.bbox[3] - font.bbox[1]; - if (glyphHeight > 0) { - tsm[3] *= glyphHeight * textState.fontMatrix[3]; - } - } - - var trm = Util.transform( - textState.ctm, - Util.transform(textState.textMatrix, tsm) - ); - textContentItem.transform = trm; - if (!font.vertical) { - textContentItem.width = 0; - textContentItem.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]); - textContentItem.vertical = false; - } else { - textContentItem.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]); - textContentItem.height = 0; - textContentItem.vertical = true; - } - - var a = textState.textLineMatrix[0]; - var b = textState.textLineMatrix[1]; - var scaleLineX = Math.sqrt(a * a + b * b); - a = textState.ctm[0]; - b = textState.ctm[1]; - var scaleCtmX = Math.sqrt(a * a + b * b); - textContentItem.textAdvanceScale = scaleCtmX * scaleLineX; - textContentItem.lastAdvanceWidth = 0; - textContentItem.lastAdvanceHeight = 0; - - var spaceWidth = (font.spaceWidth / 1000) * textState.fontSize; - if (spaceWidth) { - textContentItem.spaceWidth = spaceWidth; - textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR; - textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR; - textContentItem.fakeMultiSpaceMax = - spaceWidth * MULTI_SPACE_FACTOR_MAX; - // It's okay for monospace fonts to fake as much space as needed. - textContentItem.textRunBreakAllowed = !font.isMonospace; - } else { - textContentItem.spaceWidth = 0; - textContentItem.fakeSpaceMin = Infinity; - textContentItem.fakeMultiSpaceMin = Infinity; - textContentItem.fakeMultiSpaceMax = 0; - textContentItem.textRunBreakAllowed = false; - } - - textContentItem.initialized = true; - return textContentItem; - } - - function replaceWhitespace(str) { - // Replaces all whitespaces with standard spaces (0x20), to avoid - // alignment issues between the textLayer and the canvas if the text - // contains e.g. tabs (fixes issue6612.pdf). - var i = 0, - ii = str.length, - code; - while (i < ii && (code = str.charCodeAt(i)) >= 0x20 && code <= 0x7f) { - i++; - } - return i < ii ? str.replace(WhitespaceRegexp, " ") : str; - } - - function runBidiTransform(textChunk) { - var str = textChunk.str.join(""); - var bidiResult = bidi(str, -1, textChunk.vertical); - return { - str: normalizeWhitespace - ? replaceWhitespace(bidiResult.str) - : bidiResult.str, - dir: bidiResult.dir, - width: textChunk.width, - height: textChunk.height, - transform: textChunk.transform, - fontName: textChunk.fontName, - }; - } - - function handleSetFont(fontName, fontRef) { - return self - .loadFont(fontName, fontRef, resources) - .then(function (translated) { - textState.font = translated.font; - textState.fontMatrix = - translated.font.fontMatrix || FONT_IDENTITY_MATRIX; - }); - } - - function buildTextContentItem(chars) { - var font = textState.font; - var textChunk = ensureTextContentItem(); - var width = 0; - var height = 0; - var glyphs = font.charsToGlyphs(chars); - for (var i = 0; i < glyphs.length; i++) { - var glyph = glyphs[i]; - var glyphWidth = null; - if (font.vertical && glyph.vmetric) { - glyphWidth = glyph.vmetric[0]; - } else { - glyphWidth = glyph.width; - } - - var glyphUnicode = glyph.unicode; - var NormalizedUnicodes = getNormalizedUnicodes(); - if (NormalizedUnicodes[glyphUnicode] !== undefined) { - glyphUnicode = NormalizedUnicodes[glyphUnicode]; - } - glyphUnicode = reverseIfRtl(glyphUnicode); - - var charSpacing = textState.charSpacing; - if (glyph.isSpace) { - var wordSpacing = textState.wordSpacing; - charSpacing += wordSpacing; - if (wordSpacing > 0) { - addFakeSpaces(wordSpacing, textChunk.str); } - } - - var tx = 0; - var ty = 0; - if (!font.vertical) { - var w0 = glyphWidth * textState.fontMatrix[0]; - tx = (w0 * textState.fontSize + charSpacing) * textState.textHScale; - width += tx; - } else { - var w1 = glyphWidth * textState.fontMatrix[0]; - ty = w1 * textState.fontSize + charSpacing; - height += ty; - } - textState.translateTextMatrix(tx, ty); - - textChunk.str.push(glyphUnicode); - } - - if (!font.vertical) { - textChunk.lastAdvanceWidth = width; - textChunk.width += width; - } else { - textChunk.lastAdvanceHeight = height; - textChunk.height += Math.abs(height); - } - - return textChunk; - } - - function addFakeSpaces(width, strBuf) { - if (width < textContentItem.fakeSpaceMin) { - return; - } - if (width < textContentItem.fakeMultiSpaceMin) { - strBuf.push(" "); - return; - } - var fakeSpaces = Math.round(width / textContentItem.spaceWidth); - while (fakeSpaces-- > 0) { - strBuf.push(" "); - } - } - - function flushTextContentItem() { - if (!textContentItem.initialized) { - return; - } - - // Do final text scaling. - if (!textContentItem.vertical) { - textContentItem.width *= textContentItem.textAdvanceScale; - } else { - textContentItem.height *= textContentItem.textAdvanceScale; - } - textContent.items.push(runBidiTransform(textContentItem)); - - textContentItem.initialized = false; - textContentItem.str.length = 0; - } - - function enqueueChunk() { - const length = textContent.items.length; - if (length > 0) { - sink.enqueue(textContent, length); - textContent.items = []; - textContent.styles = Object.create(null); - } - } - - var timeSlotManager = new TimeSlotManager(); - - return new Promise(function promiseBody(resolve, reject) { - const next = function (promise) { - enqueueChunk(); - Promise.all([promise, sink.ready]).then(function () { - try { - promiseBody(resolve, reject); - } catch (ex) { - reject(ex); - } - }, reject); - }; - task.ensureNotTerminated(); - timeSlotManager.reset(); - var stop, - operation = {}, - args = []; - while (!(stop = timeSlotManager.check())) { - // The arguments parsed by read() are not used beyond this loop, so - // we can reuse the same array on every iteration, thus avoiding - // unnecessary allocations. - args.length = 0; - operation.args = args; - if (!preprocessor.read(operation)) { + args = cs.getRgb(args, 0); + fn = OPS.setFillRGBColor; break; - } - textState = stateManager.state; - var fn = operation.fn; - args = operation.args; - var advance, diff; - - switch (fn | 0) { - case OPS.setFont: - // Optimization to ignore multiple identical Tf commands. - var fontNameArg = args[0].name, - fontSizeArg = args[1]; - if ( - textState.font && - fontNameArg === textState.fontName && - fontSizeArg === textState.fontSize - ) { - break; - } - - flushTextContentItem(); - textState.fontName = fontNameArg; - textState.fontSize = fontSizeArg; - next(handleSetFont(fontNameArg, null)); - return; - case OPS.setTextRise: - flushTextContentItem(); - textState.textRise = args[0]; - break; - case OPS.setHScale: - flushTextContentItem(); - textState.textHScale = args[0] / 100; - break; - case OPS.setLeading: - flushTextContentItem(); - textState.leading = args[0]; - break; - case OPS.moveText: - // Optimization to treat same line movement as advance - var isSameTextLine = !textState.font - ? false - : (textState.font.vertical ? args[0] : args[1]) === 0; - advance = args[0] - args[1]; - if ( - combineTextItems && - isSameTextLine && - textContentItem.initialized && - advance > 0 && - advance <= textContentItem.fakeMultiSpaceMax - ) { - textState.translateTextLineMatrix(args[0], args[1]); - textContentItem.width += - args[0] - textContentItem.lastAdvanceWidth; - textContentItem.height += - args[1] - textContentItem.lastAdvanceHeight; - diff = - args[0] - - textContentItem.lastAdvanceWidth - - (args[1] - textContentItem.lastAdvanceHeight); - addFakeSpaces(diff, textContentItem.str); - break; - } - - flushTextContentItem(); - textState.translateTextLineMatrix(args[0], args[1]); - textState.textMatrix = textState.textLineMatrix.slice(); - break; - case OPS.setLeadingMoveText: - flushTextContentItem(); - textState.leading = -args[1]; - textState.translateTextLineMatrix(args[0], args[1]); - textState.textMatrix = textState.textLineMatrix.slice(); - break; - case OPS.nextLine: - flushTextContentItem(); - textState.carriageReturn(); - break; - case OPS.setTextMatrix: - // Optimization to treat same line movement as advance. - advance = textState.calcTextLineMatrixAdvance( - args[0], - args[1], - args[2], - args[3], - args[4], - args[5] - ); - if ( - combineTextItems && - advance !== null && - textContentItem.initialized && - advance.value > 0 && - advance.value <= textContentItem.fakeMultiSpaceMax - ) { - textState.translateTextLineMatrix( - advance.width, - advance.height - ); - textContentItem.width += - advance.width - textContentItem.lastAdvanceWidth; - textContentItem.height += - advance.height - textContentItem.lastAdvanceHeight; - diff = - advance.width - - textContentItem.lastAdvanceWidth - - (advance.height - textContentItem.lastAdvanceHeight); - addFakeSpaces(diff, textContentItem.str); - break; - } - - flushTextContentItem(); - textState.setTextMatrix( - args[0], - args[1], - args[2], - args[3], - args[4], - args[5] - ); - textState.setTextLineMatrix( - args[0], - args[1], - args[2], - args[3], - args[4], - args[5] - ); - break; - case OPS.setCharSpacing: - textState.charSpacing = args[0]; - break; - case OPS.setWordSpacing: - textState.wordSpacing = args[0]; - break; - case OPS.beginText: - flushTextContentItem(); - textState.textMatrix = IDENTITY_MATRIX.slice(); - textState.textLineMatrix = IDENTITY_MATRIX.slice(); - break; - case OPS.showSpacedText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - var items = args[0]; - var offset; - for (var j = 0, jj = items.length; j < jj; j++) { - if (typeof items[j] === "string") { - buildTextContentItem(items[j]); - } else if (isNum(items[j])) { - ensureTextContentItem(); - - // PDF Specification 5.3.2 states: - // The number is expressed in thousandths of a unit of text - // space. - // This amount is subtracted from the current horizontal or - // vertical coordinate, depending on the writing mode. - // In the default coordinate system, a positive adjustment - // has the effect of moving the next glyph painted either to - // the left or down by the given amount. - advance = (items[j] * textState.fontSize) / 1000; - var breakTextRun = false; - if (textState.font.vertical) { - offset = advance; - textState.translateTextMatrix(0, offset); - breakTextRun = - textContentItem.textRunBreakAllowed && - advance > textContentItem.fakeMultiSpaceMax; - if (!breakTextRun) { - // Value needs to be added to height to paint down. - textContentItem.height += offset; - } - } else { - advance = -advance; - offset = advance * textState.textHScale; - textState.translateTextMatrix(offset, 0); - breakTextRun = - textContentItem.textRunBreakAllowed && - advance > textContentItem.fakeMultiSpaceMax; - if (!breakTextRun) { - // Value needs to be subtracted from width to paint left. - textContentItem.width += offset; - } - } - if (breakTextRun) { - flushTextContentItem(); - } else if (advance > 0) { - addFakeSpaces(advance, textContentItem.str); - } - } - } - break; - case OPS.showText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - buildTextContentItem(args[0]); - break; - case OPS.nextLineShowText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - flushTextContentItem(); - textState.carriageReturn(); - buildTextContentItem(args[0]); - break; - case OPS.nextLineSetSpacingShowText: - if (!stateManager.state.font) { - self.ensureStateFont(stateManager.state); - continue; - } - flushTextContentItem(); - textState.wordSpacing = args[0]; - textState.charSpacing = args[1]; - textState.carriageReturn(); - buildTextContentItem(args[2]); - break; - case OPS.paintXObject: - flushTextContentItem(); - if (!xobjs) { - xobjs = resources.get("XObject") || Dict.empty; - } - - var name = args[0].name; - if (name && emptyXObjectCache.getByName(name)) { - break; - } - + case OPS.setStrokeColorN: + cs = stateManager.state.strokeColorSpace; + if (cs.name === "Pattern") { next( - new Promise(function (resolveXObject, rejectXObject) { - if (!name) { - throw new FormatError( - "XObject must be referred to by name." - ); - } - - let xobj = xobjs.getRaw(name); - if (xobj instanceof Ref) { - if (emptyXObjectCache.getByRef(xobj)) { - resolveXObject(); - return; - } - - xobj = xref.fetch(xobj); - } - - if (!xobj) { - resolveXObject(); - return; - } - if (!isStream(xobj)) { - throw new FormatError("XObject should be a stream"); - } - - const type = xobj.dict.get("Subtype"); - if (!isName(type)) { - throw new FormatError("XObject should have a Name subtype"); - } - - if (type.name !== "Form") { - emptyXObjectCache.set(name, xobj.dict.objId, true); - - resolveXObject(); - return; - } - - // Use a new `StateManager` to prevent incorrect positioning - // of textItems *after* the Form XObject, since errors in the - // data can otherwise prevent `restore` operators from - // executing. - // NOTE: Only an issue when `options.ignoreErrors === true`. - const currentState = stateManager.state.clone(); - const xObjStateManager = new StateManager(currentState); - - const matrix = xobj.dict.getArray("Matrix"); - if (Array.isArray(matrix) && matrix.length === 6) { - xObjStateManager.transform(matrix); - } - - // Enqueue the `textContent` chunk before parsing the /Form - // XObject. - enqueueChunk(); - const sinkWrapper = { - enqueueInvoked: false, - - enqueue(chunk, size) { - this.enqueueInvoked = true; - sink.enqueue(chunk, size); - }, - - get desiredSize() { - return sink.desiredSize; - }, - - get ready() { - return sink.ready; - }, - }; - - self - .getTextContent({ - stream: xobj, - task, - resources: xobj.dict.get("Resources") || resources, - stateManager: xObjStateManager, - normalizeWhitespace, - combineTextItems, - sink: sinkWrapper, - seenStyles, - }) - .then(function () { - if (!sinkWrapper.enqueueInvoked) { - emptyXObjectCache.set(name, xobj.dict.objId, true); - } - resolveXObject(); - }, rejectXObject); - }).catch(function (reason) { - if (reason instanceof AbortException) { - return; - } - if (self.options.ignoreErrors) { - // Error(s) in the XObject -- allow text-extraction to - // continue. - warn(`getTextContent - ignoring XObject: "${reason}".`); - return; - } - throw reason; - }) + self.handleColorN( + operatorList, + OPS.setStrokeColorN, + args, + cs, + patterns, + resources, + task, + localColorSpaceCache + ) ); return; - case OPS.setGState: - flushTextContentItem(); - var dictName = args[0]; - var extGState = resources.get("ExtGState"); - - if (!isDict(extGState) || !isName(dictName)) { - break; - } - var gState = extGState.get(dictName.name); - if (!isDict(gState)) { - break; - } - var gStateFont = gState.get("Font"); - if (gStateFont) { - textState.fontName = null; - textState.fontSize = gStateFont[1]; - next(handleSetFont(null, gStateFont[0])); - return; - } - break; - } // switch - if (textContent.items.length >= sink.desiredSize) { - // Wait for ready, if we reach highWaterMark. - stop = true; + } + args = cs.getRgb(args, 0); + fn = OPS.setStrokeRGBColor; break; - } - } // while - if (stop) { - next(deferred); - return; - } - flushTextContentItem(); - enqueueChunk(); - resolve(); - }).catch(reason => { - if (reason instanceof AbortException) { - return; - } - if (this.options.ignoreErrors) { - // Error(s) in the TextContent -- allow text-extraction to continue. - warn( - `getTextContent - ignoring errors during "${task.name}" ` + - `task: "${reason}".` - ); - flushTextContentItem(); - enqueueChunk(); - return; - } - throw reason; - }); - }, - - extractDataStructures: function PartialEvaluator_extractDataStructures( - dict, - baseDict, - properties - ) { - const xref = this.xref; - let cidToGidBytes; - // 9.10.2 - var toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode"); - var toUnicodePromise = toUnicode - ? this.readToUnicode(toUnicode) - : Promise.resolve(undefined); - - if (properties.composite) { - // CIDSystemInfo helps to match CID to glyphs - var cidSystemInfo = dict.get("CIDSystemInfo"); - if (isDict(cidSystemInfo)) { - properties.cidSystemInfo = { - registry: stringToPDFString(cidSystemInfo.get("Registry")), - ordering: stringToPDFString(cidSystemInfo.get("Ordering")), - supplement: cidSystemInfo.get("Supplement"), - }; - } - - var cidToGidMap = dict.get("CIDToGIDMap"); - if (isStream(cidToGidMap)) { - cidToGidBytes = cidToGidMap.getBytes(); - } - } - - // Based on 9.6.6 of the spec the encoding can come from multiple places - // and depends on the font type. The base encoding and differences are - // read here, but the encoding that is actually used is chosen during - // glyph mapping in the font. - // TODO: Loading the built in encoding in the font would allow the - // differences to be merged in here not require us to hold on to it. - var differences = []; - var baseEncodingName = null; - var encoding; - if (dict.has("Encoding")) { - encoding = dict.get("Encoding"); - if (isDict(encoding)) { - baseEncodingName = encoding.get("BaseEncoding"); - baseEncodingName = isName(baseEncodingName) - ? baseEncodingName.name - : null; - // Load the differences between the base and original - if (encoding.has("Differences")) { - var diffEncoding = encoding.get("Differences"); - var index = 0; - for (var j = 0, jj = diffEncoding.length; j < jj; j++) { - var data = xref.fetchIfRef(diffEncoding[j]); - if (isNum(data)) { - index = data; - } else if (isName(data)) { - differences[index++] = data.name; - } else { - throw new FormatError( - `Invalid entry in 'Differences' array: ${data}` - ); - } + case OPS.shadingFill: + var shadingRes = resources.get("Shading"); + if (!shadingRes) { + throw new FormatError("No shading resource found"); } - } - } else if (isName(encoding)) { - baseEncodingName = encoding.name; - } else { - throw new FormatError("Encoding is not a Name nor a Dict"); - } - // According to table 114 if the encoding is a named encoding it must be - // one of these predefined encodings. - if ( - baseEncodingName !== "MacRomanEncoding" && - baseEncodingName !== "MacExpertEncoding" && - baseEncodingName !== "WinAnsiEncoding" - ) { - baseEncodingName = null; - } - } - if (baseEncodingName) { - properties.defaultEncoding = getEncoding(baseEncodingName).slice(); - } else { - var isSymbolicFont = !!(properties.flags & FontFlags.Symbolic); - var isNonsymbolicFont = !!(properties.flags & FontFlags.Nonsymbolic); - // According to "Table 114" in section "9.6.6.1 General" (under - // "9.6.6 Character Encoding") of the PDF specification, a Nonsymbolic - // font should use the `StandardEncoding` if no encoding is specified. - encoding = StandardEncoding; - if (properties.type === "TrueType" && !isNonsymbolicFont) { - encoding = WinAnsiEncoding; - } - // The Symbolic attribute can be misused for regular fonts - // Heuristic: we have to check if the font is a standard one also - if (isSymbolicFont) { - encoding = MacRomanEncoding; - if (!properties.file) { - if (/Symbol/i.test(properties.name)) { - encoding = SymbolSetEncoding; - } else if (/Dingbats|Wingdings/i.test(properties.name)) { - encoding = ZapfDingbatsEncoding; + var shading = shadingRes.get(args[0].name); + if (!shading) { + throw new FormatError("No shading object found"); } - } - } - properties.defaultEncoding = encoding; - } - properties.differences = differences; - properties.baseEncodingName = baseEncodingName; - properties.hasEncoding = !!baseEncodingName || differences.length > 0; - properties.dict = dict; - return toUnicodePromise - .then(readToUnicode => { - properties.toUnicode = readToUnicode; - return this.buildToUnicode(properties); - }) - .then(builtToUnicode => { - properties.toUnicode = builtToUnicode; - if (cidToGidBytes) { - properties.cidToGidMap = this.readCidToGidMap( - cidToGidBytes, - builtToUnicode + var shadingFill = Pattern.parseShading( + shading, + null, + xref, + resources, + self.handler, + self._pdfFunctionFactory, + localColorSpaceCache ); - } - return properties; - }); - }, + var patternIR = shadingFill.getIR(); + args = [patternIR]; + fn = OPS.shadingFill; + break; + case OPS.setGState: + var dictName = args[0]; + var extGState = resources.get("ExtGState"); - /** - * @returns {ToUnicodeMap} - * @private - */ - _buildSimpleFontToUnicode(properties, forceGlyphs = false) { - assert(!properties.composite, "Must be a simple font."); - - const toUnicode = []; - const encoding = properties.defaultEncoding.slice(); - const baseEncodingName = properties.baseEncodingName; - // Merge in the differences array. - const differences = properties.differences; - for (const charcode in differences) { - const glyphName = differences[charcode]; - if (glyphName === ".notdef") { - // Skip .notdef to prevent rendering errors, e.g. boxes appearing - // where there should be spaces (fixes issue5256.pdf). - continue; - } - encoding[charcode] = glyphName; - } - const glyphsUnicodeMap = getGlyphsUnicode(); - for (const charcode in encoding) { - // a) Map the character code to a character name. - let glyphName = encoding[charcode]; - // b) Look up the character name in the Adobe Glyph List (see the - // Bibliography) to obtain the corresponding Unicode value. - if (glyphName === "") { - continue; - } else if (glyphsUnicodeMap[glyphName] === undefined) { - // (undocumented) c) Few heuristics to recognize unknown glyphs - // NOTE: Adobe Reader does not do this step, but OSX Preview does - let code = 0; - switch (glyphName[0]) { - case "G": // Gxx glyph - if (glyphName.length === 3) { - code = parseInt(glyphName.substring(1), 16); - } + if (!isDict(extGState) || !extGState.has(dictName.name)) { break; - case "g": // g00xx glyph - if (glyphName.length === 5) { - code = parseInt(glyphName.substring(1), 16); - } - break; - case "C": // Cdd{d} glyph - case "c": // cdd{d} glyph - if (glyphName.length >= 3 && glyphName.length <= 4) { - const codeStr = glyphName.substring(1); + } - if (forceGlyphs) { - code = parseInt(codeStr, 16); + var gState = extGState.get(dictName.name); + next( + self.setGState( + resources, + gState, + operatorList, + task, + stateManager, + localColorSpaceCache + ) + ); + return; + case OPS.moveTo: + case OPS.lineTo: + case OPS.curveTo: + case OPS.curveTo2: + case OPS.curveTo3: + case OPS.closePath: + case OPS.rectangle: + self.buildPath(operatorList, fn, args, parsingText); + continue; + case OPS.markPoint: + case OPS.markPointProps: + case OPS.beginMarkedContent: + case OPS.beginMarkedContentProps: + case OPS.endMarkedContent: + case OPS.beginCompat: + case OPS.endCompat: + // Ignore operators where the corresponding handlers are known to + // be no-op in CanvasGraphics (display/canvas.js). This prevents + // serialization errors and is also a bit more efficient. + // We could also try to serialize all objects in a general way, + // e.g. as done in https://github.com/mozilla/pdf.js/pull/6266, + // but doing so is meaningless without knowing the semantics. + continue; + default: + // Note: Ignore the operator if it has `Dict` arguments, since + // those are non-serializable, otherwise postMessage will throw + // "An object could not be cloned.". + if (args !== null) { + for (i = 0, ii = args.length; i < ii; i++) { + if (args[i] instanceof Dict) { break; } - // Normally the Cdd{d}/cdd{d} glyphName format will contain - // regular, i.e. base 10, charCodes (see issue4550.pdf)... - code = +codeStr; - - // ... however some PDF generators violate that assumption by - // containing glyph, i.e. base 16, codes instead. - // In that case we need to re-parse the *entire* encoding to - // prevent broken text-selection (fixes issue9655_reduced.pdf). - if ( - Number.isNaN(code) && - Number.isInteger(parseInt(codeStr, 16)) - ) { - return this._buildSimpleFontToUnicode( - properties, - /* forceGlyphs */ true - ); - } } - break; - default: - // 'uniXXXX'/'uXXXX{XX}' glyphs - const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); - if (unicode !== -1) { - code = unicode; - } - } - if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) { - // If `baseEncodingName` is one the predefined encodings, and `code` - // equals `charcode`, using the glyph defined in the baseEncoding - // seems to yield a better `toUnicode` mapping (fixes issue 5070). - if (baseEncodingName && code === +charcode) { - const baseEncoding = getEncoding(baseEncodingName); - if (baseEncoding && (glyphName = baseEncoding[charcode])) { - toUnicode[charcode] = String.fromCharCode( - glyphsUnicodeMap[glyphName] - ); + if (i < ii) { + warn("getOperatorList - ignoring operator: " + fn); continue; } } - toUnicode[charcode] = String.fromCodePoint(code); - } - continue; } - toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]); + operatorList.addOp(fn, args); } - return new ToUnicodeMap(toUnicode); - }, - - /** - * Builds a char code to unicode map based on section 9.10 of the spec. - * @param {Object} properties Font properties object. - * @returns {Promise} A Promise that is resolved with a - * {ToUnicodeMap|IdentityToUnicodeMap} object. - */ - buildToUnicode(properties) { - properties.hasIncludedToUnicodeMap = - !!properties.toUnicode && properties.toUnicode.length > 0; - - // Section 9.10.2 Mapping Character Codes to Unicode Values - if (properties.hasIncludedToUnicodeMap) { - // Some fonts contain incomplete ToUnicode data, causing issues with - // text-extraction. For simple fonts, containing encoding information, - // use a fallback ToUnicode map to improve this (fixes issue8229.pdf). - if (!properties.composite && properties.hasEncoding) { - properties.fallbackToUnicode = this._buildSimpleFontToUnicode( - properties - ); - } - - return Promise.resolve(properties.toUnicode); + if (stop) { + next(deferred); + return; } - - // According to the spec if the font is a simple font we should only map - // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or - // the differences array only contains adobe standard or symbol set names, - // in pratice it seems better to always try to create a toUnicode map - // based of the default encoding. - if (!properties.composite /* is simple font */) { - return Promise.resolve(this._buildSimpleFontToUnicode(properties)); + // Some PDFs don't close all restores inside object/form. + // Closing those for them. + closePendingRestoreOPS(); + resolve(); + }).catch(reason => { + if (reason instanceof AbortException) { + return; } + if (this.options.ignoreErrors) { + // Error(s) in the OperatorList -- sending unsupported feature + // notification and allow rendering to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorOperatorList, + }); + warn( + `getOperatorList - ignoring errors during "${task.name}" ` + + `task: "${reason}".` + ); + + closePendingRestoreOPS(); + return; + } + throw reason; + }); + } + + getTextContent({ + stream, + task, + resources, + stateManager = null, + normalizeWhitespace = false, + combineTextItems = false, + sink, + seenStyles = Object.create(null), + }) { + // Ensure that `resources`/`stateManager` is correctly initialized, + // even if the provided parameter is e.g. `null`. + resources = resources || Dict.empty; + stateManager = stateManager || new StateManager(new TextState()); + + var WhitespaceRegexp = /\s/g; + + var textContent = { + items: [], + styles: Object.create(null), + }; + var textContentItem = { + initialized: false, + str: [], + width: 0, + height: 0, + vertical: false, + lastAdvanceWidth: 0, + lastAdvanceHeight: 0, + textAdvanceScale: 0, + spaceWidth: 0, + fakeSpaceMin: Infinity, + fakeMultiSpaceMin: Infinity, + fakeMultiSpaceMax: -0, + textRunBreakAllowed: false, + transform: null, + fontName: null, + }; + var SPACE_FACTOR = 0.3; + var MULTI_SPACE_FACTOR = 1.5; + var MULTI_SPACE_FACTOR_MAX = 4; + + var self = this; + var xref = this.xref; + + // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. + var xobjs = null; + const emptyXObjectCache = new LocalImageCache(); + + var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); + + var textState; + + function ensureTextContentItem() { + if (textContentItem.initialized) { + return textContentItem; + } + var font = textState.font; + if (!(font.loadedName in seenStyles)) { + seenStyles[font.loadedName] = true; + textContent.styles[font.loadedName] = { + fontFamily: font.fallbackName, + ascent: font.ascent, + descent: font.descent, + vertical: font.vertical, + }; + } + textContentItem.fontName = font.loadedName; + + // 9.4.4 Text Space Details + var tsm = [ + textState.fontSize * textState.textHScale, + 0, + 0, + textState.fontSize, + 0, + textState.textRise, + ]; - // If the font is a composite font that uses one of the predefined CMaps - // listed in Table 118 (except Identity–H and Identity–V) or whose - // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or - // Adobe-Korea1 character collection: if ( - properties.composite && - ((properties.cMap.builtInCMap && - !(properties.cMap instanceof IdentityCMap)) || - (properties.cidSystemInfo.registry === "Adobe" && - (properties.cidSystemInfo.ordering === "GB1" || - properties.cidSystemInfo.ordering === "CNS1" || - properties.cidSystemInfo.ordering === "Japan1" || - properties.cidSystemInfo.ordering === "Korea1"))) + font.isType3Font && + textState.fontSize <= 1 && + !isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX) ) { - // Then: - // a) Map the character code to a character identifier (CID) according - // to the font’s CMap. - // b) Obtain the registry and ordering of the character collection used - // by the font’s CMap (for example, Adobe and Japan1) from its - // CIDSystemInfo dictionary. - const registry = properties.cidSystemInfo.registry; - const ordering = properties.cidSystemInfo.ordering; - // c) Construct a second CMap name by concatenating the registry and - // ordering obtained in step (b) in the format registry–ordering–UCS2 - // (for example, Adobe–Japan1–UCS2). - const ucs2CMapName = Name.get(registry + "-" + ordering + "-UCS2"); - // d) Obtain the CMap with the name constructed in step (c) (available - // from the ASN Web site; see the Bibliography). - return CMapFactory.create({ - encoding: ucs2CMapName, - fetchBuiltInCMap: this._fetchBuiltInCMapBound, - useCMap: null, - }).then(function (ucs2CMap) { - const cMap = properties.cMap; - const toUnicode = []; - cMap.forEach(function (charcode, cid) { - if (cid > 0xffff) { - throw new FormatError("Max size of CID is 65,535"); + const glyphHeight = font.bbox[3] - font.bbox[1]; + if (glyphHeight > 0) { + tsm[3] *= glyphHeight * textState.fontMatrix[3]; + } + } + + var trm = Util.transform( + textState.ctm, + Util.transform(textState.textMatrix, tsm) + ); + textContentItem.transform = trm; + if (!font.vertical) { + textContentItem.width = 0; + textContentItem.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]); + textContentItem.vertical = false; + } else { + textContentItem.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]); + textContentItem.height = 0; + textContentItem.vertical = true; + } + + var a = textState.textLineMatrix[0]; + var b = textState.textLineMatrix[1]; + var scaleLineX = Math.sqrt(a * a + b * b); + a = textState.ctm[0]; + b = textState.ctm[1]; + var scaleCtmX = Math.sqrt(a * a + b * b); + textContentItem.textAdvanceScale = scaleCtmX * scaleLineX; + textContentItem.lastAdvanceWidth = 0; + textContentItem.lastAdvanceHeight = 0; + + var spaceWidth = (font.spaceWidth / 1000) * textState.fontSize; + if (spaceWidth) { + textContentItem.spaceWidth = spaceWidth; + textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR; + textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR; + textContentItem.fakeMultiSpaceMax = spaceWidth * MULTI_SPACE_FACTOR_MAX; + // It's okay for monospace fonts to fake as much space as needed. + textContentItem.textRunBreakAllowed = !font.isMonospace; + } else { + textContentItem.spaceWidth = 0; + textContentItem.fakeSpaceMin = Infinity; + textContentItem.fakeMultiSpaceMin = Infinity; + textContentItem.fakeMultiSpaceMax = 0; + textContentItem.textRunBreakAllowed = false; + } + + textContentItem.initialized = true; + return textContentItem; + } + + function replaceWhitespace(str) { + // Replaces all whitespaces with standard spaces (0x20), to avoid + // alignment issues between the textLayer and the canvas if the text + // contains e.g. tabs (fixes issue6612.pdf). + var i = 0, + ii = str.length, + code; + while (i < ii && (code = str.charCodeAt(i)) >= 0x20 && code <= 0x7f) { + i++; + } + return i < ii ? str.replace(WhitespaceRegexp, " ") : str; + } + + function runBidiTransform(textChunk) { + var str = textChunk.str.join(""); + var bidiResult = bidi(str, -1, textChunk.vertical); + return { + str: normalizeWhitespace + ? replaceWhitespace(bidiResult.str) + : bidiResult.str, + dir: bidiResult.dir, + width: textChunk.width, + height: textChunk.height, + transform: textChunk.transform, + fontName: textChunk.fontName, + }; + } + + function handleSetFont(fontName, fontRef) { + return self + .loadFont(fontName, fontRef, resources) + .then(function (translated) { + textState.font = translated.font; + textState.fontMatrix = + translated.font.fontMatrix || FONT_IDENTITY_MATRIX; + }); + } + + function buildTextContentItem(chars) { + var font = textState.font; + var textChunk = ensureTextContentItem(); + var width = 0; + var height = 0; + var glyphs = font.charsToGlyphs(chars); + for (var i = 0; i < glyphs.length; i++) { + var glyph = glyphs[i]; + var glyphWidth = null; + if (font.vertical && glyph.vmetric) { + glyphWidth = glyph.vmetric[0]; + } else { + glyphWidth = glyph.width; + } + + var glyphUnicode = glyph.unicode; + var NormalizedUnicodes = getNormalizedUnicodes(); + if (NormalizedUnicodes[glyphUnicode] !== undefined) { + glyphUnicode = NormalizedUnicodes[glyphUnicode]; + } + glyphUnicode = reverseIfRtl(glyphUnicode); + + var charSpacing = textState.charSpacing; + if (glyph.isSpace) { + var wordSpacing = textState.wordSpacing; + charSpacing += wordSpacing; + if (wordSpacing > 0) { + addFakeSpaces(wordSpacing, textChunk.str); + } + } + + var tx = 0; + var ty = 0; + if (!font.vertical) { + var w0 = glyphWidth * textState.fontMatrix[0]; + tx = (w0 * textState.fontSize + charSpacing) * textState.textHScale; + width += tx; + } else { + var w1 = glyphWidth * textState.fontMatrix[0]; + ty = w1 * textState.fontSize + charSpacing; + height += ty; + } + textState.translateTextMatrix(tx, ty); + + textChunk.str.push(glyphUnicode); + } + + if (!font.vertical) { + textChunk.lastAdvanceWidth = width; + textChunk.width += width; + } else { + textChunk.lastAdvanceHeight = height; + textChunk.height += Math.abs(height); + } + + return textChunk; + } + + function addFakeSpaces(width, strBuf) { + if (width < textContentItem.fakeSpaceMin) { + return; + } + if (width < textContentItem.fakeMultiSpaceMin) { + strBuf.push(" "); + return; + } + var fakeSpaces = Math.round(width / textContentItem.spaceWidth); + while (fakeSpaces-- > 0) { + strBuf.push(" "); + } + } + + function flushTextContentItem() { + if (!textContentItem.initialized) { + return; + } + + // Do final text scaling. + if (!textContentItem.vertical) { + textContentItem.width *= textContentItem.textAdvanceScale; + } else { + textContentItem.height *= textContentItem.textAdvanceScale; + } + textContent.items.push(runBidiTransform(textContentItem)); + + textContentItem.initialized = false; + textContentItem.str.length = 0; + } + + function enqueueChunk() { + const length = textContent.items.length; + if (length > 0) { + sink.enqueue(textContent, length); + textContent.items = []; + textContent.styles = Object.create(null); + } + } + + var timeSlotManager = new TimeSlotManager(); + + return new Promise(function promiseBody(resolve, reject) { + const next = function (promise) { + enqueueChunk(); + Promise.all([promise, sink.ready]).then(function () { + try { + promiseBody(resolve, reject); + } catch (ex) { + reject(ex); + } + }, reject); + }; + task.ensureNotTerminated(); + timeSlotManager.reset(); + var stop, + operation = {}, + args = []; + while (!(stop = timeSlotManager.check())) { + // The arguments parsed by read() are not used beyond this loop, so + // we can reuse the same array on every iteration, thus avoiding + // unnecessary allocations. + args.length = 0; + operation.args = args; + if (!preprocessor.read(operation)) { + break; + } + textState = stateManager.state; + var fn = operation.fn; + args = operation.args; + var advance, diff; + + switch (fn | 0) { + case OPS.setFont: + // Optimization to ignore multiple identical Tf commands. + var fontNameArg = args[0].name, + fontSizeArg = args[1]; + if ( + textState.font && + fontNameArg === textState.fontName && + fontSizeArg === textState.fontSize + ) { + break; } - // e) Map the CID obtained in step (a) according to the CMap - // obtained in step (d), producing a Unicode value. - const ucs2 = ucs2CMap.lookup(cid); - if (ucs2) { - toUnicode[charcode] = String.fromCharCode( - (ucs2.charCodeAt(0) << 8) + ucs2.charCodeAt(1) + + flushTextContentItem(); + textState.fontName = fontNameArg; + textState.fontSize = fontSizeArg; + next(handleSetFont(fontNameArg, null)); + return; + case OPS.setTextRise: + flushTextContentItem(); + textState.textRise = args[0]; + break; + case OPS.setHScale: + flushTextContentItem(); + textState.textHScale = args[0] / 100; + break; + case OPS.setLeading: + flushTextContentItem(); + textState.leading = args[0]; + break; + case OPS.moveText: + // Optimization to treat same line movement as advance + var isSameTextLine = !textState.font + ? false + : (textState.font.vertical ? args[0] : args[1]) === 0; + advance = args[0] - args[1]; + if ( + combineTextItems && + isSameTextLine && + textContentItem.initialized && + advance > 0 && + advance <= textContentItem.fakeMultiSpaceMax + ) { + textState.translateTextLineMatrix(args[0], args[1]); + textContentItem.width += + args[0] - textContentItem.lastAdvanceWidth; + textContentItem.height += + args[1] - textContentItem.lastAdvanceHeight; + diff = + args[0] - + textContentItem.lastAdvanceWidth - + (args[1] - textContentItem.lastAdvanceHeight); + addFakeSpaces(diff, textContentItem.str); + break; + } + + flushTextContentItem(); + textState.translateTextLineMatrix(args[0], args[1]); + textState.textMatrix = textState.textLineMatrix.slice(); + break; + case OPS.setLeadingMoveText: + flushTextContentItem(); + textState.leading = -args[1]; + textState.translateTextLineMatrix(args[0], args[1]); + textState.textMatrix = textState.textLineMatrix.slice(); + break; + case OPS.nextLine: + flushTextContentItem(); + textState.carriageReturn(); + break; + case OPS.setTextMatrix: + // Optimization to treat same line movement as advance. + advance = textState.calcTextLineMatrixAdvance( + args[0], + args[1], + args[2], + args[3], + args[4], + args[5] + ); + if ( + combineTextItems && + advance !== null && + textContentItem.initialized && + advance.value > 0 && + advance.value <= textContentItem.fakeMultiSpaceMax + ) { + textState.translateTextLineMatrix(advance.width, advance.height); + textContentItem.width += + advance.width - textContentItem.lastAdvanceWidth; + textContentItem.height += + advance.height - textContentItem.lastAdvanceHeight; + diff = + advance.width - + textContentItem.lastAdvanceWidth - + (advance.height - textContentItem.lastAdvanceHeight); + addFakeSpaces(diff, textContentItem.str); + break; + } + + flushTextContentItem(); + textState.setTextMatrix( + args[0], + args[1], + args[2], + args[3], + args[4], + args[5] + ); + textState.setTextLineMatrix( + args[0], + args[1], + args[2], + args[3], + args[4], + args[5] + ); + break; + case OPS.setCharSpacing: + textState.charSpacing = args[0]; + break; + case OPS.setWordSpacing: + textState.wordSpacing = args[0]; + break; + case OPS.beginText: + flushTextContentItem(); + textState.textMatrix = IDENTITY_MATRIX.slice(); + textState.textLineMatrix = IDENTITY_MATRIX.slice(); + break; + case OPS.showSpacedText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + var items = args[0]; + var offset; + for (var j = 0, jj = items.length; j < jj; j++) { + if (typeof items[j] === "string") { + buildTextContentItem(items[j]); + } else if (isNum(items[j])) { + ensureTextContentItem(); + + // PDF Specification 5.3.2 states: + // The number is expressed in thousandths of a unit of text + // space. + // This amount is subtracted from the current horizontal or + // vertical coordinate, depending on the writing mode. + // In the default coordinate system, a positive adjustment + // has the effect of moving the next glyph painted either to + // the left or down by the given amount. + advance = (items[j] * textState.fontSize) / 1000; + var breakTextRun = false; + if (textState.font.vertical) { + offset = advance; + textState.translateTextMatrix(0, offset); + breakTextRun = + textContentItem.textRunBreakAllowed && + advance > textContentItem.fakeMultiSpaceMax; + if (!breakTextRun) { + // Value needs to be added to height to paint down. + textContentItem.height += offset; + } + } else { + advance = -advance; + offset = advance * textState.textHScale; + textState.translateTextMatrix(offset, 0); + breakTextRun = + textContentItem.textRunBreakAllowed && + advance > textContentItem.fakeMultiSpaceMax; + if (!breakTextRun) { + // Value needs to be subtracted from width to paint left. + textContentItem.width += offset; + } + } + if (breakTextRun) { + flushTextContentItem(); + } else if (advance > 0) { + addFakeSpaces(advance, textContentItem.str); + } + } + } + break; + case OPS.showText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + buildTextContentItem(args[0]); + break; + case OPS.nextLineShowText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + flushTextContentItem(); + textState.carriageReturn(); + buildTextContentItem(args[0]); + break; + case OPS.nextLineSetSpacingShowText: + if (!stateManager.state.font) { + self.ensureStateFont(stateManager.state); + continue; + } + flushTextContentItem(); + textState.wordSpacing = args[0]; + textState.charSpacing = args[1]; + textState.carriageReturn(); + buildTextContentItem(args[2]); + break; + case OPS.paintXObject: + flushTextContentItem(); + if (!xobjs) { + xobjs = resources.get("XObject") || Dict.empty; + } + + var name = args[0].name; + if (name && emptyXObjectCache.getByName(name)) { + break; + } + + next( + new Promise(function (resolveXObject, rejectXObject) { + if (!name) { + throw new FormatError("XObject must be referred to by name."); + } + + let xobj = xobjs.getRaw(name); + if (xobj instanceof Ref) { + if (emptyXObjectCache.getByRef(xobj)) { + resolveXObject(); + return; + } + + xobj = xref.fetch(xobj); + } + + if (!xobj) { + resolveXObject(); + return; + } + if (!isStream(xobj)) { + throw new FormatError("XObject should be a stream"); + } + + const type = xobj.dict.get("Subtype"); + if (!isName(type)) { + throw new FormatError("XObject should have a Name subtype"); + } + + if (type.name !== "Form") { + emptyXObjectCache.set(name, xobj.dict.objId, true); + + resolveXObject(); + return; + } + + // Use a new `StateManager` to prevent incorrect positioning + // of textItems *after* the Form XObject, since errors in the + // data can otherwise prevent `restore` operators from + // executing. + // NOTE: Only an issue when `options.ignoreErrors === true`. + const currentState = stateManager.state.clone(); + const xObjStateManager = new StateManager(currentState); + + const matrix = xobj.dict.getArray("Matrix"); + if (Array.isArray(matrix) && matrix.length === 6) { + xObjStateManager.transform(matrix); + } + + // Enqueue the `textContent` chunk before parsing the /Form + // XObject. + enqueueChunk(); + const sinkWrapper = { + enqueueInvoked: false, + + enqueue(chunk, size) { + this.enqueueInvoked = true; + sink.enqueue(chunk, size); + }, + + get desiredSize() { + return sink.desiredSize; + }, + + get ready() { + return sink.ready; + }, + }; + + self + .getTextContent({ + stream: xobj, + task, + resources: xobj.dict.get("Resources") || resources, + stateManager: xObjStateManager, + normalizeWhitespace, + combineTextItems, + sink: sinkWrapper, + seenStyles, + }) + .then(function () { + if (!sinkWrapper.enqueueInvoked) { + emptyXObjectCache.set(name, xobj.dict.objId, true); + } + resolveXObject(); + }, rejectXObject); + }).catch(function (reason) { + if (reason instanceof AbortException) { + return; + } + if (self.options.ignoreErrors) { + // Error(s) in the XObject -- allow text-extraction to + // continue. + warn(`getTextContent - ignoring XObject: "${reason}".`); + return; + } + throw reason; + }) + ); + return; + case OPS.setGState: + flushTextContentItem(); + var dictName = args[0]; + var extGState = resources.get("ExtGState"); + + if (!isDict(extGState) || !isName(dictName)) { + break; + } + var gState = extGState.get(dictName.name); + if (!isDict(gState)) { + break; + } + var gStateFont = gState.get("Font"); + if (gStateFont) { + textState.fontName = null; + textState.fontSize = gStateFont[1]; + next(handleSetFont(null, gStateFont[0])); + return; + } + break; + } // switch + if (textContent.items.length >= sink.desiredSize) { + // Wait for ready, if we reach highWaterMark. + stop = true; + break; + } + } // while + if (stop) { + next(deferred); + return; + } + flushTextContentItem(); + enqueueChunk(); + resolve(); + }).catch(reason => { + if (reason instanceof AbortException) { + return; + } + if (this.options.ignoreErrors) { + // Error(s) in the TextContent -- allow text-extraction to continue. + warn( + `getTextContent - ignoring errors during "${task.name}" ` + + `task: "${reason}".` + ); + + flushTextContentItem(); + enqueueChunk(); + return; + } + throw reason; + }); + } + + extractDataStructures(dict, baseDict, properties) { + const xref = this.xref; + let cidToGidBytes; + // 9.10.2 + var toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode"); + var toUnicodePromise = toUnicode + ? this.readToUnicode(toUnicode) + : Promise.resolve(undefined); + + if (properties.composite) { + // CIDSystemInfo helps to match CID to glyphs + var cidSystemInfo = dict.get("CIDSystemInfo"); + if (isDict(cidSystemInfo)) { + properties.cidSystemInfo = { + registry: stringToPDFString(cidSystemInfo.get("Registry")), + ordering: stringToPDFString(cidSystemInfo.get("Ordering")), + supplement: cidSystemInfo.get("Supplement"), + }; + } + + var cidToGidMap = dict.get("CIDToGIDMap"); + if (isStream(cidToGidMap)) { + cidToGidBytes = cidToGidMap.getBytes(); + } + } + + // Based on 9.6.6 of the spec the encoding can come from multiple places + // and depends on the font type. The base encoding and differences are + // read here, but the encoding that is actually used is chosen during + // glyph mapping in the font. + // TODO: Loading the built in encoding in the font would allow the + // differences to be merged in here not require us to hold on to it. + var differences = []; + var baseEncodingName = null; + var encoding; + if (dict.has("Encoding")) { + encoding = dict.get("Encoding"); + if (isDict(encoding)) { + baseEncodingName = encoding.get("BaseEncoding"); + baseEncodingName = isName(baseEncodingName) + ? baseEncodingName.name + : null; + // Load the differences between the base and original + if (encoding.has("Differences")) { + var diffEncoding = encoding.get("Differences"); + var index = 0; + for (var j = 0, jj = diffEncoding.length; j < jj; j++) { + var data = xref.fetchIfRef(diffEncoding[j]); + if (isNum(data)) { + index = data; + } else if (isName(data)) { + differences[index++] = data.name; + } else { + throw new FormatError( + `Invalid entry in 'Differences' array: ${data}` ); } - }); - return new ToUnicodeMap(toUnicode); - }); + } + } + } else if (isName(encoding)) { + baseEncodingName = encoding.name; + } else { + throw new FormatError("Encoding is not a Name nor a Dict"); + } + // According to table 114 if the encoding is a named encoding it must be + // one of these predefined encodings. + if ( + baseEncodingName !== "MacRomanEncoding" && + baseEncodingName !== "MacExpertEncoding" && + baseEncodingName !== "WinAnsiEncoding" + ) { + baseEncodingName = null; + } + } + + if (baseEncodingName) { + properties.defaultEncoding = getEncoding(baseEncodingName).slice(); + } else { + var isSymbolicFont = !!(properties.flags & FontFlags.Symbolic); + var isNonsymbolicFont = !!(properties.flags & FontFlags.Nonsymbolic); + // According to "Table 114" in section "9.6.6.1 General" (under + // "9.6.6 Character Encoding") of the PDF specification, a Nonsymbolic + // font should use the `StandardEncoding` if no encoding is specified. + encoding = StandardEncoding; + if (properties.type === "TrueType" && !isNonsymbolicFont) { + encoding = WinAnsiEncoding; + } + // The Symbolic attribute can be misused for regular fonts + // Heuristic: we have to check if the font is a standard one also + if (isSymbolicFont) { + encoding = MacRomanEncoding; + if (!properties.file) { + if (/Symbol/i.test(properties.name)) { + encoding = SymbolSetEncoding; + } else if (/Dingbats|Wingdings/i.test(properties.name)) { + encoding = ZapfDingbatsEncoding; + } + } + } + properties.defaultEncoding = encoding; + } + + properties.differences = differences; + properties.baseEncodingName = baseEncodingName; + properties.hasEncoding = !!baseEncodingName || differences.length > 0; + properties.dict = dict; + return toUnicodePromise + .then(readToUnicode => { + properties.toUnicode = readToUnicode; + return this.buildToUnicode(properties); + }) + .then(builtToUnicode => { + properties.toUnicode = builtToUnicode; + if (cidToGidBytes) { + properties.cidToGidMap = this.readCidToGidMap( + cidToGidBytes, + builtToUnicode + ); + } + return properties; + }); + } + + /** + * @returns {ToUnicodeMap} + * @private + */ + _buildSimpleFontToUnicode(properties, forceGlyphs = false) { + assert(!properties.composite, "Must be a simple font."); + + const toUnicode = []; + const encoding = properties.defaultEncoding.slice(); + const baseEncodingName = properties.baseEncodingName; + // Merge in the differences array. + const differences = properties.differences; + for (const charcode in differences) { + const glyphName = differences[charcode]; + if (glyphName === ".notdef") { + // Skip .notdef to prevent rendering errors, e.g. boxes appearing + // where there should be spaces (fixes issue5256.pdf). + continue; + } + encoding[charcode] = glyphName; + } + const glyphsUnicodeMap = getGlyphsUnicode(); + for (const charcode in encoding) { + // a) Map the character code to a character name. + let glyphName = encoding[charcode]; + // b) Look up the character name in the Adobe Glyph List (see the + // Bibliography) to obtain the corresponding Unicode value. + if (glyphName === "") { + continue; + } else if (glyphsUnicodeMap[glyphName] === undefined) { + // (undocumented) c) Few heuristics to recognize unknown glyphs + // NOTE: Adobe Reader does not do this step, but OSX Preview does + let code = 0; + switch (glyphName[0]) { + case "G": // Gxx glyph + if (glyphName.length === 3) { + code = parseInt(glyphName.substring(1), 16); + } + break; + case "g": // g00xx glyph + if (glyphName.length === 5) { + code = parseInt(glyphName.substring(1), 16); + } + break; + case "C": // Cdd{d} glyph + case "c": // cdd{d} glyph + if (glyphName.length >= 3 && glyphName.length <= 4) { + const codeStr = glyphName.substring(1); + + if (forceGlyphs) { + code = parseInt(codeStr, 16); + break; + } + // Normally the Cdd{d}/cdd{d} glyphName format will contain + // regular, i.e. base 10, charCodes (see issue4550.pdf)... + code = +codeStr; + + // ... however some PDF generators violate that assumption by + // containing glyph, i.e. base 16, codes instead. + // In that case we need to re-parse the *entire* encoding to + // prevent broken text-selection (fixes issue9655_reduced.pdf). + if ( + Number.isNaN(code) && + Number.isInteger(parseInt(codeStr, 16)) + ) { + return this._buildSimpleFontToUnicode( + properties, + /* forceGlyphs */ true + ); + } + } + break; + default: + // 'uniXXXX'/'uXXXX{XX}' glyphs + const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + code = unicode; + } + } + if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) { + // If `baseEncodingName` is one the predefined encodings, and `code` + // equals `charcode`, using the glyph defined in the baseEncoding + // seems to yield a better `toUnicode` mapping (fixes issue 5070). + if (baseEncodingName && code === +charcode) { + const baseEncoding = getEncoding(baseEncodingName); + if (baseEncoding && (glyphName = baseEncoding[charcode])) { + toUnicode[charcode] = String.fromCharCode( + glyphsUnicodeMap[glyphName] + ); + continue; + } + } + toUnicode[charcode] = String.fromCodePoint(code); + } + continue; + } + toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]); + } + return new ToUnicodeMap(toUnicode); + } + + /** + * Builds a char code to unicode map based on section 9.10 of the spec. + * @param {Object} properties Font properties object. + * @returns {Promise} A Promise that is resolved with a + * {ToUnicodeMap|IdentityToUnicodeMap} object. + */ + buildToUnicode(properties) { + properties.hasIncludedToUnicodeMap = + !!properties.toUnicode && properties.toUnicode.length > 0; + + // Section 9.10.2 Mapping Character Codes to Unicode Values + if (properties.hasIncludedToUnicodeMap) { + // Some fonts contain incomplete ToUnicode data, causing issues with + // text-extraction. For simple fonts, containing encoding information, + // use a fallback ToUnicode map to improve this (fixes issue8229.pdf). + if (!properties.composite && properties.hasEncoding) { + properties.fallbackToUnicode = this._buildSimpleFontToUnicode( + properties + ); } - // The viewer's choice, just use an identity map. - return Promise.resolve( - new IdentityToUnicodeMap(properties.firstChar, properties.lastChar) - ); - }, + return Promise.resolve(properties.toUnicode); + } - readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) { - var cmapObj = toUnicode; - if (isName(cmapObj)) { - return CMapFactory.create({ - encoding: cmapObj, - fetchBuiltInCMap: this._fetchBuiltInCMapBound, - useCMap: null, - }).then(function (cmap) { + // According to the spec if the font is a simple font we should only map + // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or + // the differences array only contains adobe standard or symbol set names, + // in pratice it seems better to always try to create a toUnicode map + // based of the default encoding. + if (!properties.composite /* is simple font */) { + return Promise.resolve(this._buildSimpleFontToUnicode(properties)); + } + + // If the font is a composite font that uses one of the predefined CMaps + // listed in Table 118 (except Identity–H and Identity–V) or whose + // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or + // Adobe-Korea1 character collection: + if ( + properties.composite && + ((properties.cMap.builtInCMap && + !(properties.cMap instanceof IdentityCMap)) || + (properties.cidSystemInfo.registry === "Adobe" && + (properties.cidSystemInfo.ordering === "GB1" || + properties.cidSystemInfo.ordering === "CNS1" || + properties.cidSystemInfo.ordering === "Japan1" || + properties.cidSystemInfo.ordering === "Korea1"))) + ) { + // Then: + // a) Map the character code to a character identifier (CID) according + // to the font’s CMap. + // b) Obtain the registry and ordering of the character collection used + // by the font’s CMap (for example, Adobe and Japan1) from its + // CIDSystemInfo dictionary. + const registry = properties.cidSystemInfo.registry; + const ordering = properties.cidSystemInfo.ordering; + // c) Construct a second CMap name by concatenating the registry and + // ordering obtained in step (b) in the format registry–ordering–UCS2 + // (for example, Adobe–Japan1–UCS2). + const ucs2CMapName = Name.get(registry + "-" + ordering + "-UCS2"); + // d) Obtain the CMap with the name constructed in step (c) (available + // from the ASN Web site; see the Bibliography). + return CMapFactory.create({ + encoding: ucs2CMapName, + fetchBuiltInCMap: this._fetchBuiltInCMapBound, + useCMap: null, + }).then(function (ucs2CMap) { + const cMap = properties.cMap; + const toUnicode = []; + cMap.forEach(function (charcode, cid) { + if (cid > 0xffff) { + throw new FormatError("Max size of CID is 65,535"); + } + // e) Map the CID obtained in step (a) according to the CMap + // obtained in step (d), producing a Unicode value. + const ucs2 = ucs2CMap.lookup(cid); + if (ucs2) { + toUnicode[charcode] = String.fromCharCode( + (ucs2.charCodeAt(0) << 8) + ucs2.charCodeAt(1) + ); + } + }); + return new ToUnicodeMap(toUnicode); + }); + } + + // The viewer's choice, just use an identity map. + return Promise.resolve( + new IdentityToUnicodeMap(properties.firstChar, properties.lastChar) + ); + } + + readToUnicode(toUnicode) { + var cmapObj = toUnicode; + if (isName(cmapObj)) { + return CMapFactory.create({ + encoding: cmapObj, + fetchBuiltInCMap: this._fetchBuiltInCMapBound, + useCMap: null, + }).then(function (cmap) { + if (cmap instanceof IdentityCMap) { + return new IdentityToUnicodeMap(0, 0xffff); + } + return new ToUnicodeMap(cmap.getMap()); + }); + } else if (isStream(cmapObj)) { + return CMapFactory.create({ + encoding: cmapObj, + fetchBuiltInCMap: this._fetchBuiltInCMapBound, + useCMap: null, + }).then( + function (cmap) { if (cmap instanceof IdentityCMap) { return new IdentityToUnicodeMap(0, 0xffff); } - return new ToUnicodeMap(cmap.getMap()); - }); - } else if (isStream(cmapObj)) { - return CMapFactory.create({ - encoding: cmapObj, - fetchBuiltInCMap: this._fetchBuiltInCMapBound, - useCMap: null, - }).then( - function (cmap) { - if (cmap instanceof IdentityCMap) { - return new IdentityToUnicodeMap(0, 0xffff); - } - var map = new Array(cmap.length); - // Convert UTF-16BE - // NOTE: cmap can be a sparse array, so use forEach instead of - // `for(;;)` to iterate over all keys. - cmap.forEach(function (charCode, token) { - var str = []; - for (var k = 0; k < token.length; k += 2) { - var w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1); - if ((w1 & 0xf800) !== 0xd800) { - // w1 < 0xD800 || w1 > 0xDFFF - str.push(w1); - continue; - } - k += 2; - var w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1); - str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000); + var map = new Array(cmap.length); + // Convert UTF-16BE + // NOTE: cmap can be a sparse array, so use forEach instead of + // `for(;;)` to iterate over all keys. + cmap.forEach(function (charCode, token) { + var str = []; + for (var k = 0; k < token.length; k += 2) { + var w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1); + if ((w1 & 0xf800) !== 0xd800) { + // w1 < 0xD800 || w1 > 0xDFFF + str.push(w1); + continue; } - map[charCode] = String.fromCodePoint.apply(String, str); - }); - return new ToUnicodeMap(map); - }, - reason => { - if (reason instanceof AbortException) { - return null; + k += 2; + var w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1); + str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000); } - if (this.options.ignoreErrors) { - // Error in the ToUnicode data -- sending unsupported feature - // notification and allow font parsing to continue. - this.handler.send("UnsupportedFeature", { - featureId: UNSUPPORTED_FEATURES.errorFontToUnicode, - }); - warn(`readToUnicode - ignoring ToUnicode data: "${reason}".`); - return null; - } - throw reason; + map[charCode] = String.fromCodePoint.apply(String, str); + }); + return new ToUnicodeMap(map); + }, + reason => { + if (reason instanceof AbortException) { + return null; } - ); - } - return Promise.resolve(null); - }, - - readCidToGidMap(glyphsData, toUnicode) { - // Extract the encoding from the CIDToGIDMap - - // Set encoding 0 to later verify the font has an encoding - var result = []; - for (var j = 0, jj = glyphsData.length; j < jj; j++) { - var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; - const code = j >> 1; - if (glyphID === 0 && !toUnicode.has(code)) { - continue; + if (this.options.ignoreErrors) { + // Error in the ToUnicode data -- sending unsupported feature + // notification and allow font parsing to continue. + this.handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorFontToUnicode, + }); + warn(`readToUnicode - ignoring ToUnicode data: "${reason}".`); + return null; + } + throw reason; } - result[code] = glyphID; + ); + } + return Promise.resolve(null); + } + + readCidToGidMap(glyphsData, toUnicode) { + // Extract the encoding from the CIDToGIDMap + + // Set encoding 0 to later verify the font has an encoding + var result = []; + for (var j = 0, jj = glyphsData.length; j < jj; j++) { + var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; + const code = j >> 1; + if (glyphID === 0 && !toUnicode.has(code)) { + continue; } - return result; - }, + result[code] = glyphID; + } + return result; + } - extractWidths: function PartialEvaluator_extractWidths( - dict, - descriptor, - properties - ) { - var xref = this.xref; - var glyphsWidths = []; - var defaultWidth = 0; - var glyphsVMetrics = []; - var defaultVMetrics; - var i, ii, j, jj, start, code, widths; - if (properties.composite) { - defaultWidth = dict.has("DW") ? dict.get("DW") : 1000; + extractWidths(dict, descriptor, properties) { + var xref = this.xref; + var glyphsWidths = []; + var defaultWidth = 0; + var glyphsVMetrics = []; + var defaultVMetrics; + var i, ii, j, jj, start, code, widths; + if (properties.composite) { + defaultWidth = dict.has("DW") ? dict.get("DW") : 1000; - widths = dict.get("W"); - if (widths) { - for (i = 0, ii = widths.length; i < ii; i++) { - start = xref.fetchIfRef(widths[i++]); - code = xref.fetchIfRef(widths[i]); + widths = dict.get("W"); + if (widths) { + for (i = 0, ii = widths.length; i < ii; i++) { + start = xref.fetchIfRef(widths[i++]); + code = xref.fetchIfRef(widths[i]); + if (Array.isArray(code)) { + for (j = 0, jj = code.length; j < jj; j++) { + glyphsWidths[start++] = xref.fetchIfRef(code[j]); + } + } else { + var width = xref.fetchIfRef(widths[++i]); + for (j = start; j <= code; j++) { + glyphsWidths[j] = width; + } + } + } + } + + if (properties.vertical) { + var vmetrics = dict.getArray("DW2") || [880, -1000]; + defaultVMetrics = [vmetrics[1], defaultWidth * 0.5, vmetrics[0]]; + vmetrics = dict.get("W2"); + if (vmetrics) { + for (i = 0, ii = vmetrics.length; i < ii; i++) { + start = xref.fetchIfRef(vmetrics[i++]); + code = xref.fetchIfRef(vmetrics[i]); if (Array.isArray(code)) { for (j = 0, jj = code.length; j < jj; j++) { - glyphsWidths[start++] = xref.fetchIfRef(code[j]); + glyphsVMetrics[start++] = [ + xref.fetchIfRef(code[j++]), + xref.fetchIfRef(code[j++]), + xref.fetchIfRef(code[j]), + ]; } } else { - var width = xref.fetchIfRef(widths[++i]); + var vmetric = [ + xref.fetchIfRef(vmetrics[++i]), + xref.fetchIfRef(vmetrics[++i]), + xref.fetchIfRef(vmetrics[++i]), + ]; for (j = start; j <= code; j++) { - glyphsWidths[j] = width; + glyphsVMetrics[j] = vmetric; } } } } - - if (properties.vertical) { - var vmetrics = dict.getArray("DW2") || [880, -1000]; - defaultVMetrics = [vmetrics[1], defaultWidth * 0.5, vmetrics[0]]; - vmetrics = dict.get("W2"); - if (vmetrics) { - for (i = 0, ii = vmetrics.length; i < ii; i++) { - start = xref.fetchIfRef(vmetrics[i++]); - code = xref.fetchIfRef(vmetrics[i]); - if (Array.isArray(code)) { - for (j = 0, jj = code.length; j < jj; j++) { - glyphsVMetrics[start++] = [ - xref.fetchIfRef(code[j++]), - xref.fetchIfRef(code[j++]), - xref.fetchIfRef(code[j]), - ]; - } - } else { - var vmetric = [ - xref.fetchIfRef(vmetrics[++i]), - xref.fetchIfRef(vmetrics[++i]), - xref.fetchIfRef(vmetrics[++i]), - ]; - for (j = start; j <= code; j++) { - glyphsVMetrics[j] = vmetric; - } - } - } - } + } + } else { + var firstChar = properties.firstChar; + widths = dict.get("Widths"); + if (widths) { + j = firstChar; + for (i = 0, ii = widths.length; i < ii; i++) { + glyphsWidths[j++] = xref.fetchIfRef(widths[i]); } + defaultWidth = parseFloat(descriptor.get("MissingWidth")) || 0; } else { - var firstChar = properties.firstChar; - widths = dict.get("Widths"); - if (widths) { - j = firstChar; - for (i = 0, ii = widths.length; i < ii; i++) { - glyphsWidths[j++] = xref.fetchIfRef(widths[i]); - } - defaultWidth = parseFloat(descriptor.get("MissingWidth")) || 0; - } else { - // Trying get the BaseFont metrics (see comment above). - var baseFontName = dict.get("BaseFont"); - if (isName(baseFontName)) { - var metrics = this.getBaseFontMetrics(baseFontName.name); + // Trying get the BaseFont metrics (see comment above). + var baseFontName = dict.get("BaseFont"); + if (isName(baseFontName)) { + var metrics = this.getBaseFontMetrics(baseFontName.name); - glyphsWidths = this.buildCharCodeToWidth( - metrics.widths, - properties - ); - defaultWidth = metrics.defaultWidth; - } + glyphsWidths = this.buildCharCodeToWidth(metrics.widths, properties); + defaultWidth = metrics.defaultWidth; } } + } - // Heuristic: detection of monospace font by checking all non-zero widths - var isMonospace = true; - var firstWidth = defaultWidth; - for (var glyph in glyphsWidths) { - var glyphWidth = glyphsWidths[glyph]; - if (!glyphWidth) { - continue; - } - if (!firstWidth) { - firstWidth = glyphWidth; - continue; - } - if (firstWidth !== glyphWidth) { - isMonospace = false; - break; - } + // Heuristic: detection of monospace font by checking all non-zero widths + var isMonospace = true; + var firstWidth = defaultWidth; + for (var glyph in glyphsWidths) { + var glyphWidth = glyphsWidths[glyph]; + if (!glyphWidth) { + continue; } - if (isMonospace) { - properties.flags |= FontFlags.FixedPitch; + if (!firstWidth) { + firstWidth = glyphWidth; + continue; } - - properties.defaultWidth = defaultWidth; - properties.widths = glyphsWidths; - properties.defaultVMetrics = defaultVMetrics; - properties.vmetrics = glyphsVMetrics; - }, - - isSerifFont: function PartialEvaluator_isSerifFont(baseFontName) { - // Simulating descriptor flags attribute - var fontNameWoStyle = baseFontName.split("-")[0]; - return ( - fontNameWoStyle in getSerifFonts() || - fontNameWoStyle.search(/serif/gi) !== -1 - ); - }, - - getBaseFontMetrics: function PartialEvaluator_getBaseFontMetrics(name) { - var defaultWidth = 0; - var widths = []; - var monospace = false; - var stdFontMap = getStdFontMap(); - var lookupName = stdFontMap[name] || name; - var Metrics = getMetrics(); - - if (!(lookupName in Metrics)) { - // Use default fonts for looking up font metrics if the passed - // font is not a base font - if (this.isSerifFont(name)) { - lookupName = "Times-Roman"; - } else { - lookupName = "Helvetica"; - } + if (firstWidth !== glyphWidth) { + isMonospace = false; + break; } - var glyphWidths = Metrics[lookupName]; + } + if (isMonospace) { + properties.flags |= FontFlags.FixedPitch; + } - if (isNum(glyphWidths)) { - defaultWidth = glyphWidths; - monospace = true; + properties.defaultWidth = defaultWidth; + properties.widths = glyphsWidths; + properties.defaultVMetrics = defaultVMetrics; + properties.vmetrics = glyphsVMetrics; + } + + isSerifFont(baseFontName) { + // Simulating descriptor flags attribute + var fontNameWoStyle = baseFontName.split("-")[0]; + return ( + fontNameWoStyle in getSerifFonts() || + fontNameWoStyle.search(/serif/gi) !== -1 + ); + } + + getBaseFontMetrics(name) { + var defaultWidth = 0; + var widths = []; + var monospace = false; + var stdFontMap = getStdFontMap(); + var lookupName = stdFontMap[name] || name; + var Metrics = getMetrics(); + + if (!(lookupName in Metrics)) { + // Use default fonts for looking up font metrics if the passed + // font is not a base font + if (this.isSerifFont(name)) { + lookupName = "Times-Roman"; } else { - widths = glyphWidths(); // expand lazy widths array + lookupName = "Helvetica"; } + } + var glyphWidths = Metrics[lookupName]; - return { - defaultWidth, - monospace, - widths, - }; - }, + if (isNum(glyphWidths)) { + defaultWidth = glyphWidths; + monospace = true; + } else { + widths = glyphWidths(); // expand lazy widths array + } - buildCharCodeToWidth: function PartialEvaluator_bulildCharCodeToWidth( - widthsByGlyphName, - properties - ) { - var widths = Object.create(null); - var differences = properties.differences; - var encoding = properties.defaultEncoding; - for (var charCode = 0; charCode < 256; charCode++) { - if ( - charCode in differences && - widthsByGlyphName[differences[charCode]] - ) { - widths[charCode] = widthsByGlyphName[differences[charCode]]; - continue; - } - if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) { - widths[charCode] = widthsByGlyphName[encoding[charCode]]; - continue; - } + return { + defaultWidth, + monospace, + widths, + }; + } + + buildCharCodeToWidth(widthsByGlyphName, properties) { + var widths = Object.create(null); + var differences = properties.differences; + var encoding = properties.defaultEncoding; + for (var charCode = 0; charCode < 256; charCode++) { + if (charCode in differences && widthsByGlyphName[differences[charCode]]) { + widths[charCode] = widthsByGlyphName[differences[charCode]]; + continue; } - return widths; - }, + if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) { + widths[charCode] = widthsByGlyphName[encoding[charCode]]; + continue; + } + } + return widths; + } - preEvaluateFont: function PartialEvaluator_preEvaluateFont(dict) { - var baseDict = dict; - var type = dict.get("Subtype"); + preEvaluateFont(dict) { + var baseDict = dict; + var type = dict.get("Subtype"); + if (!isName(type)) { + throw new FormatError("invalid font Subtype"); + } + + var composite = false; + var uint8array; + if (type.name === "Type0") { + // If font is a composite + // - get the descendant font + // - set the type according to the descendant font + // - get the FontDescriptor from the descendant font + var df = dict.get("DescendantFonts"); + if (!df) { + throw new FormatError("Descendant fonts are not specified"); + } + dict = Array.isArray(df) ? this.xref.fetchIfRef(df[0]) : df; + + type = dict.get("Subtype"); if (!isName(type)) { throw new FormatError("invalid font Subtype"); } + composite = true; + } - var composite = false; - var uint8array; - if (type.name === "Type0") { - // If font is a composite - // - get the descendant font - // - set the type according to the descendant font - // - get the FontDescriptor from the descendant font - var df = dict.get("DescendantFonts"); - if (!df) { - throw new FormatError("Descendant fonts are not specified"); - } - dict = Array.isArray(df) ? this.xref.fetchIfRef(df[0]) : df; + var descriptor = dict.get("FontDescriptor"); + if (descriptor) { + var hash = new MurmurHash3_64(); + var encoding = baseDict.getRaw("Encoding"); + if (isName(encoding)) { + hash.update(encoding.name); + } else if (isRef(encoding)) { + hash.update(encoding.toString()); + } else if (isDict(encoding)) { + var keys = encoding.getKeys(); + for (var i = 0, ii = keys.length; i < ii; i++) { + var entry = encoding.getRaw(keys[i]); + if (isName(entry)) { + hash.update(entry.name); + } else if (isRef(entry)) { + hash.update(entry.toString()); + } else if (Array.isArray(entry)) { + // 'Differences' array (fixes bug1157493.pdf). + var diffLength = entry.length, + diffBuf = new Array(diffLength); - type = dict.get("Subtype"); - if (!isName(type)) { - throw new FormatError("invalid font Subtype"); - } - composite = true; - } - - var descriptor = dict.get("FontDescriptor"); - if (descriptor) { - var hash = new MurmurHash3_64(); - var encoding = baseDict.getRaw("Encoding"); - if (isName(encoding)) { - hash.update(encoding.name); - } else if (isRef(encoding)) { - hash.update(encoding.toString()); - } else if (isDict(encoding)) { - var keys = encoding.getKeys(); - for (var i = 0, ii = keys.length; i < ii; i++) { - var entry = encoding.getRaw(keys[i]); - if (isName(entry)) { - hash.update(entry.name); - } else if (isRef(entry)) { - hash.update(entry.toString()); - } else if (Array.isArray(entry)) { - // 'Differences' array (fixes bug1157493.pdf). - var diffLength = entry.length, - diffBuf = new Array(diffLength); - - for (var j = 0; j < diffLength; j++) { - var diffEntry = entry[j]; - if (isName(diffEntry)) { - diffBuf[j] = diffEntry.name; - } else if (isNum(diffEntry) || isRef(diffEntry)) { - diffBuf[j] = diffEntry.toString(); - } + for (var j = 0; j < diffLength; j++) { + var diffEntry = entry[j]; + if (isName(diffEntry)) { + diffBuf[j] = diffEntry.name; + } else if (isNum(diffEntry) || isRef(diffEntry)) { + diffBuf[j] = diffEntry.toString(); } - hash.update(diffBuf.join()); } + hash.update(diffBuf.join()); } } - - const firstChar = dict.get("FirstChar") || 0; - const lastChar = dict.get("LastChar") || (composite ? 0xffff : 0xff); - hash.update(`${firstChar}-${lastChar}`); - - var toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode"); - if (isStream(toUnicode)) { - var stream = toUnicode.str || toUnicode; - uint8array = stream.buffer - ? new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength) - : new Uint8Array( - stream.bytes.buffer, - stream.start, - stream.end - stream.start - ); - hash.update(uint8array); - } else if (isName(toUnicode)) { - hash.update(toUnicode.name); - } - - var widths = dict.get("Widths") || baseDict.get("Widths"); - if (widths) { - uint8array = new Uint8Array(new Uint32Array(widths).buffer); - hash.update(uint8array); - } } - return { - descriptor, - dict, - baseDict, - composite, - type: type.name, - hash: hash ? hash.hexdigest() : "", - }; - }, - - translateFont: function PartialEvaluator_translateFont(preEvaluatedFont) { - var baseDict = preEvaluatedFont.baseDict; - var dict = preEvaluatedFont.dict; - var composite = preEvaluatedFont.composite; - var descriptor = preEvaluatedFont.descriptor; - var type = preEvaluatedFont.type; - var maxCharIndex = composite ? 0xffff : 0xff; - var properties; const firstChar = dict.get("FirstChar") || 0; - const lastChar = dict.get("LastChar") || maxCharIndex; + const lastChar = dict.get("LastChar") || (composite ? 0xffff : 0xff); + hash.update(`${firstChar}-${lastChar}`); - if (!descriptor) { - if (type === "Type3") { - // FontDescriptor is only required for Type3 fonts when the document - // is a tagged pdf. Create a barbebones one to get by. - descriptor = new Dict(null); - descriptor.set("FontName", Name.get(type)); - descriptor.set("FontBBox", dict.getArray("FontBBox") || [0, 0, 0, 0]); - } else { - // Before PDF 1.5 if the font was one of the base 14 fonts, having a - // FontDescriptor was not required. - // This case is here for compatibility. - var baseFontName = dict.get("BaseFont"); - if (!isName(baseFontName)) { - throw new FormatError("Base font is not specified"); - } - - // Using base font name as a font name. - baseFontName = baseFontName.name.replace(/[,_]/g, "-"); - var metrics = this.getBaseFontMetrics(baseFontName); - - // Simulating descriptor flags attribute - var fontNameWoStyle = baseFontName.split("-")[0]; - var flags = - (this.isSerifFont(fontNameWoStyle) ? FontFlags.Serif : 0) | - (metrics.monospace ? FontFlags.FixedPitch : 0) | - (getSymbolsFonts()[fontNameWoStyle] - ? FontFlags.Symbolic - : FontFlags.Nonsymbolic); - - properties = { - type, - name: baseFontName, - widths: metrics.widths, - defaultWidth: metrics.defaultWidth, - flags, - firstChar, - lastChar, - }; - const widths = dict.get("Widths"); - return this.extractDataStructures(dict, dict, properties).then( - newProperties => { - if (widths) { - const glyphWidths = []; - let j = firstChar; - for (let i = 0, ii = widths.length; i < ii; i++) { - glyphWidths[j++] = this.xref.fetchIfRef(widths[i]); - } - newProperties.widths = glyphWidths; - } else { - newProperties.widths = this.buildCharCodeToWidth( - metrics.widths, - newProperties - ); - } - return new Font(baseFontName, null, newProperties); - } - ); - } + var toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode"); + if (isStream(toUnicode)) { + var stream = toUnicode.str || toUnicode; + uint8array = stream.buffer + ? new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength) + : new Uint8Array( + stream.bytes.buffer, + stream.start, + stream.end - stream.start + ); + hash.update(uint8array); + } else if (isName(toUnicode)) { + hash.update(toUnicode.name); } - // According to the spec if 'FontDescriptor' is declared, 'FirstChar', - // 'LastChar' and 'Widths' should exist too, but some PDF encoders seem - // to ignore this rule when a variant of a standard font is used. - // TODO Fill the width array depending on which of the base font this is - // a variant. - - var fontName = descriptor.get("FontName"); - var baseFont = dict.get("BaseFont"); - // Some bad PDFs have a string as the font name. - if (isString(fontName)) { - fontName = Name.get(fontName); - } - if (isString(baseFont)) { - baseFont = Name.get(baseFont); + var widths = dict.get("Widths") || baseDict.get("Widths"); + if (widths) { + uint8array = new Uint8Array(new Uint32Array(widths).buffer); + hash.update(uint8array); } + } - if (type !== "Type3") { - var fontNameStr = fontName && fontName.name; - var baseFontStr = baseFont && baseFont.name; - if (fontNameStr !== baseFontStr) { - info( - `The FontDescriptor\'s FontName is "${fontNameStr}" but ` + - `should be the same as the Font\'s BaseFont "${baseFontStr}".` - ); - // Workaround for cases where e.g. fontNameStr = 'Arial' and - // baseFontStr = 'Arial,Bold' (needed when no font file is embedded). - if ( - fontNameStr && - baseFontStr && - baseFontStr.startsWith(fontNameStr) - ) { - fontName = baseFont; - } - } - } - fontName = fontName || baseFont; + return { + descriptor, + dict, + baseDict, + composite, + type: type.name, + hash: hash ? hash.hexdigest() : "", + }; + } - if (!isName(fontName)) { - throw new FormatError("invalid font name"); - } + translateFont(preEvaluatedFont) { + var baseDict = preEvaluatedFont.baseDict; + var dict = preEvaluatedFont.dict; + var composite = preEvaluatedFont.composite; + var descriptor = preEvaluatedFont.descriptor; + var type = preEvaluatedFont.type; + var maxCharIndex = composite ? 0xffff : 0xff; + var properties; + const firstChar = dict.get("FirstChar") || 0; + const lastChar = dict.get("LastChar") || maxCharIndex; - var fontFile = descriptor.get("FontFile", "FontFile2", "FontFile3"); - if (fontFile) { - if (fontFile.dict) { - var subtype = fontFile.dict.get("Subtype"); - if (subtype) { - subtype = subtype.name; - } - var length1 = fontFile.dict.get("Length1"); - var length2 = fontFile.dict.get("Length2"); - var length3 = fontFile.dict.get("Length3"); - } - } - - properties = { - type, - name: fontName.name, - subtype, - file: fontFile, - length1, - length2, - length3, - loadedName: baseDict.loadedName, - composite, - fixedPitch: false, - fontMatrix: dict.getArray("FontMatrix") || FONT_IDENTITY_MATRIX, - firstChar: firstChar || 0, - lastChar: lastChar || maxCharIndex, - bbox: descriptor.getArray("FontBBox"), - ascent: descriptor.get("Ascent"), - descent: descriptor.get("Descent"), - xHeight: descriptor.get("XHeight"), - capHeight: descriptor.get("CapHeight"), - flags: descriptor.get("Flags"), - italicAngle: descriptor.get("ItalicAngle"), - isType3Font: false, - }; - - var cMapPromise; - if (composite) { - var cidEncoding = baseDict.get("Encoding"); - if (isName(cidEncoding)) { - properties.cidEncoding = cidEncoding.name; - } - cMapPromise = CMapFactory.create({ - encoding: cidEncoding, - fetchBuiltInCMap: this._fetchBuiltInCMapBound, - useCMap: null, - }).then(function (cMap) { - properties.cMap = cMap; - properties.vertical = properties.cMap.vertical; - }); + if (!descriptor) { + if (type === "Type3") { + // FontDescriptor is only required for Type3 fonts when the document + // is a tagged pdf. Create a barbebones one to get by. + descriptor = new Dict(null); + descriptor.set("FontName", Name.get(type)); + descriptor.set("FontBBox", dict.getArray("FontBBox") || [0, 0, 0, 0]); } else { - cMapPromise = Promise.resolve(undefined); - } + // Before PDF 1.5 if the font was one of the base 14 fonts, having a + // FontDescriptor was not required. + // This case is here for compatibility. + var baseFontName = dict.get("BaseFont"); + if (!isName(baseFontName)) { + throw new FormatError("Base font is not specified"); + } - return cMapPromise - .then(() => { - return this.extractDataStructures(dict, baseDict, properties); - }) - .then(newProperties => { - this.extractWidths(dict, descriptor, newProperties); + // Using base font name as a font name. + baseFontName = baseFontName.name.replace(/[,_]/g, "-"); + var metrics = this.getBaseFontMetrics(baseFontName); - if (type === "Type3") { - newProperties.isType3Font = true; + // Simulating descriptor flags attribute + var fontNameWoStyle = baseFontName.split("-")[0]; + var flags = + (this.isSerifFont(fontNameWoStyle) ? FontFlags.Serif : 0) | + (metrics.monospace ? FontFlags.FixedPitch : 0) | + (getSymbolsFonts()[fontNameWoStyle] + ? FontFlags.Symbolic + : FontFlags.Nonsymbolic); + + properties = { + type, + name: baseFontName, + widths: metrics.widths, + defaultWidth: metrics.defaultWidth, + flags, + firstChar, + lastChar, + }; + const widths = dict.get("Widths"); + return this.extractDataStructures(dict, dict, properties).then( + newProperties => { + if (widths) { + const glyphWidths = []; + let j = firstChar; + for (let i = 0, ii = widths.length; i < ii; i++) { + glyphWidths[j++] = this.xref.fetchIfRef(widths[i]); + } + newProperties.widths = glyphWidths; + } else { + newProperties.widths = this.buildCharCodeToWidth( + metrics.widths, + newProperties + ); + } + return new Font(baseFontName, null, newProperties); } - return new Font(fontName.name, fontFile, newProperties); - }); - }, - }; + ); + } + } - PartialEvaluator.buildFontPaths = function (font, glyphs, handler) { + // According to the spec if 'FontDescriptor' is declared, 'FirstChar', + // 'LastChar' and 'Widths' should exist too, but some PDF encoders seem + // to ignore this rule when a variant of a standard font is used. + // TODO Fill the width array depending on which of the base font this is + // a variant. + + var fontName = descriptor.get("FontName"); + var baseFont = dict.get("BaseFont"); + // Some bad PDFs have a string as the font name. + if (isString(fontName)) { + fontName = Name.get(fontName); + } + if (isString(baseFont)) { + baseFont = Name.get(baseFont); + } + + if (type !== "Type3") { + var fontNameStr = fontName && fontName.name; + var baseFontStr = baseFont && baseFont.name; + if (fontNameStr !== baseFontStr) { + info( + `The FontDescriptor\'s FontName is "${fontNameStr}" but ` + + `should be the same as the Font\'s BaseFont "${baseFontStr}".` + ); + // Workaround for cases where e.g. fontNameStr = 'Arial' and + // baseFontStr = 'Arial,Bold' (needed when no font file is embedded). + if (fontNameStr && baseFontStr && baseFontStr.startsWith(fontNameStr)) { + fontName = baseFont; + } + } + } + fontName = fontName || baseFont; + + if (!isName(fontName)) { + throw new FormatError("invalid font name"); + } + + var fontFile = descriptor.get("FontFile", "FontFile2", "FontFile3"); + if (fontFile) { + if (fontFile.dict) { + var subtype = fontFile.dict.get("Subtype"); + if (subtype) { + subtype = subtype.name; + } + var length1 = fontFile.dict.get("Length1"); + var length2 = fontFile.dict.get("Length2"); + var length3 = fontFile.dict.get("Length3"); + } + } + + properties = { + type, + name: fontName.name, + subtype, + file: fontFile, + length1, + length2, + length3, + loadedName: baseDict.loadedName, + composite, + fixedPitch: false, + fontMatrix: dict.getArray("FontMatrix") || FONT_IDENTITY_MATRIX, + firstChar: firstChar || 0, + lastChar: lastChar || maxCharIndex, + bbox: descriptor.getArray("FontBBox"), + ascent: descriptor.get("Ascent"), + descent: descriptor.get("Descent"), + xHeight: descriptor.get("XHeight"), + capHeight: descriptor.get("CapHeight"), + flags: descriptor.get("Flags"), + italicAngle: descriptor.get("ItalicAngle"), + isType3Font: false, + }; + + var cMapPromise; + if (composite) { + var cidEncoding = baseDict.get("Encoding"); + if (isName(cidEncoding)) { + properties.cidEncoding = cidEncoding.name; + } + cMapPromise = CMapFactory.create({ + encoding: cidEncoding, + fetchBuiltInCMap: this._fetchBuiltInCMapBound, + useCMap: null, + }).then(function (cMap) { + properties.cMap = cMap; + properties.vertical = properties.cMap.vertical; + }); + } else { + cMapPromise = Promise.resolve(undefined); + } + + return cMapPromise + .then(() => { + return this.extractDataStructures(dict, baseDict, properties); + }) + .then(newProperties => { + this.extractWidths(dict, descriptor, newProperties); + + if (type === "Type3") { + newProperties.isType3Font = true; + } + return new Font(fontName.name, fontFile, newProperties); + }); + } + + static buildFontPaths(font, glyphs, handler) { function buildPath(fontChar) { if (font.renderer.hasBuiltPath(fontChar)) { return; @@ -3311,25 +3276,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { buildPath(accent.fontChar); } } - }; + } - // TODO: Change this to a `static` getter, using shadowing, once - // `PartialEvaluator` is converted to a proper class. - PartialEvaluator.getFallbackFontDict = function () { - if (this._fallbackFontDict) { - return this._fallbackFontDict; - } + static get fallbackFontDict() { const dict = new Dict(); dict.set("BaseFont", Name.get("PDFJS-FallbackFont")); dict.set("Type", Name.get("FallbackType")); dict.set("Subtype", Name.get("FallbackType")); dict.set("Encoding", Name.get("WinAnsiEncoding")); - return (this._fallbackFontDict = dict); - }; - - return PartialEvaluator; -})(); + return shadow(this, "fallbackFontDict", dict); + } +} class TranslatedFont { constructor({ loadedName, font, dict, extraProperties = false }) { @@ -3430,34 +3388,32 @@ class TranslatedFont { } } -var StateManager = (function StateManagerClosure() { - // eslint-disable-next-line no-shadow - function StateManager(initialState) { +class StateManager { + constructor(initialState) { this.state = initialState; this.stateStack = []; } - StateManager.prototype = { - save() { - var old = this.state; - this.stateStack.push(this.state); - this.state = old.clone(); - }, - restore() { - var prev = this.stateStack.pop(); - if (prev) { - this.state = prev; - } - }, - transform(args) { - this.state.ctm = Util.transform(this.state.ctm, args); - }, - }; - return StateManager; -})(); -var TextState = (function TextStateClosure() { - // eslint-disable-next-line no-shadow - function TextState() { + save() { + var old = this.state; + this.stateStack.push(this.state); + this.state = old.clone(); + } + + restore() { + var prev = this.stateStack.pop(); + if (prev) { + this.state = prev; + } + } + + transform(args) { + this.state.ctm = Util.transform(this.state.ctm, args); + } +} + +class TextState { + constructor() { this.ctm = new Float32Array(IDENTITY_MATRIX); this.fontName = null; this.fontSize = 0; @@ -3472,245 +3428,243 @@ var TextState = (function TextStateClosure() { this.textRise = 0; } - TextState.prototype = { - setTextMatrix: function TextState_setTextMatrix(a, b, c, d, e, f) { - var m = this.textMatrix; - m[0] = a; - m[1] = b; - m[2] = c; - m[3] = d; - m[4] = e; - m[5] = f; - }, - setTextLineMatrix: function TextState_setTextMatrix(a, b, c, d, e, f) { - var m = this.textLineMatrix; - m[0] = a; - m[1] = b; - m[2] = c; - m[3] = d; - m[4] = e; - m[5] = f; - }, - translateTextMatrix: function TextState_translateTextMatrix(x, y) { - var m = this.textMatrix; - m[4] = m[0] * x + m[2] * y + m[4]; - m[5] = m[1] * x + m[3] * y + m[5]; - }, - translateTextLineMatrix: function TextState_translateTextMatrix(x, y) { - var m = this.textLineMatrix; - m[4] = m[0] * x + m[2] * y + m[4]; - m[5] = m[1] * x + m[3] * y + m[5]; - }, - calcTextLineMatrixAdvance: function TextState_calcTextLineMatrixAdvance( - a, - b, - c, - d, - e, - f - ) { - var font = this.font; - if (!font) { - return null; - } - var m = this.textLineMatrix; - if (!(a === m[0] && b === m[1] && c === m[2] && d === m[3])) { - return null; - } - var txDiff = e - m[4], - tyDiff = f - m[5]; - if ((font.vertical && txDiff !== 0) || (!font.vertical && tyDiff !== 0)) { - return null; - } - var tx, - ty, - denominator = a * d - b * c; - if (font.vertical) { - tx = (-tyDiff * c) / denominator; - ty = (tyDiff * a) / denominator; - } else { - tx = (txDiff * d) / denominator; - ty = (-txDiff * b) / denominator; - } - return { width: tx, height: ty, value: font.vertical ? ty : tx }; - }, - calcRenderMatrix: function TextState_calcRendeMatrix(ctm) { - // 9.4.4 Text Space Details - var tsm = [ - this.fontSize * this.textHScale, - 0, - 0, - this.fontSize, - 0, - this.textRise, - ]; - return Util.transform(ctm, Util.transform(this.textMatrix, tsm)); - }, - carriageReturn: function TextState_carriageReturn() { - this.translateTextLineMatrix(0, -this.leading); - this.textMatrix = this.textLineMatrix.slice(); - }, - clone: function TextState_clone() { - var clone = Object.create(this); - clone.textMatrix = this.textMatrix.slice(); - clone.textLineMatrix = this.textLineMatrix.slice(); - clone.fontMatrix = this.fontMatrix.slice(); - return clone; - }, - }; - return TextState; -})(); + setTextMatrix(a, b, c, d, e, f) { + var m = this.textMatrix; + m[0] = a; + m[1] = b; + m[2] = c; + m[3] = d; + m[4] = e; + m[5] = f; + } -var EvalState = (function EvalStateClosure() { - // eslint-disable-next-line no-shadow - function EvalState() { + setTextLineMatrix(a, b, c, d, e, f) { + var m = this.textLineMatrix; + m[0] = a; + m[1] = b; + m[2] = c; + m[3] = d; + m[4] = e; + m[5] = f; + } + + translateTextMatrix(x, y) { + var m = this.textMatrix; + m[4] = m[0] * x + m[2] * y + m[4]; + m[5] = m[1] * x + m[3] * y + m[5]; + } + + translateTextLineMatrix(x, y) { + var m = this.textLineMatrix; + m[4] = m[0] * x + m[2] * y + m[4]; + m[5] = m[1] * x + m[3] * y + m[5]; + } + + calcTextLineMatrixAdvance(a, b, c, d, e, f) { + var font = this.font; + if (!font) { + return null; + } + var m = this.textLineMatrix; + if (!(a === m[0] && b === m[1] && c === m[2] && d === m[3])) { + return null; + } + var txDiff = e - m[4], + tyDiff = f - m[5]; + if ((font.vertical && txDiff !== 0) || (!font.vertical && tyDiff !== 0)) { + return null; + } + var tx, + ty, + denominator = a * d - b * c; + if (font.vertical) { + tx = (-tyDiff * c) / denominator; + ty = (tyDiff * a) / denominator; + } else { + tx = (txDiff * d) / denominator; + ty = (-txDiff * b) / denominator; + } + return { width: tx, height: ty, value: font.vertical ? ty : tx }; + } + + calcRenderMatrix(ctm) { + // 9.4.4 Text Space Details + var tsm = [ + this.fontSize * this.textHScale, + 0, + 0, + this.fontSize, + 0, + this.textRise, + ]; + return Util.transform(ctm, Util.transform(this.textMatrix, tsm)); + } + + carriageReturn() { + this.translateTextLineMatrix(0, -this.leading); + this.textMatrix = this.textLineMatrix.slice(); + } + + clone() { + var clone = Object.create(this); + clone.textMatrix = this.textMatrix.slice(); + clone.textLineMatrix = this.textLineMatrix.slice(); + clone.fontMatrix = this.fontMatrix.slice(); + return clone; + } +} + +class EvalState { + constructor() { this.ctm = new Float32Array(IDENTITY_MATRIX); this.font = null; this.textRenderingMode = TextRenderingMode.FILL; this.fillColorSpace = ColorSpace.singletons.gray; this.strokeColorSpace = ColorSpace.singletons.gray; } - EvalState.prototype = { - clone: function CanvasExtraState_clone() { - return Object.create(this); - }, - }; - return EvalState; -})(); -var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { - // Specifies properties for each command - // - // If variableArgs === true: [0, `numArgs`] expected - // If variableArgs === false: exactly `numArgs` expected - var getOPMap = getLookupTableFactory(function (t) { - // Graphic state - t.w = { id: OPS.setLineWidth, numArgs: 1, variableArgs: false }; - t.J = { id: OPS.setLineCap, numArgs: 1, variableArgs: false }; - t.j = { id: OPS.setLineJoin, numArgs: 1, variableArgs: false }; - t.M = { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false }; - t.d = { id: OPS.setDash, numArgs: 2, variableArgs: false }; - t.ri = { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false }; - t.i = { id: OPS.setFlatness, numArgs: 1, variableArgs: false }; - t.gs = { id: OPS.setGState, numArgs: 1, variableArgs: false }; - t.q = { id: OPS.save, numArgs: 0, variableArgs: false }; - t.Q = { id: OPS.restore, numArgs: 0, variableArgs: false }; - t.cm = { id: OPS.transform, numArgs: 6, variableArgs: false }; + clone() { + return Object.create(this); + } +} - // Path - t.m = { id: OPS.moveTo, numArgs: 2, variableArgs: false }; - t.l = { id: OPS.lineTo, numArgs: 2, variableArgs: false }; - t.c = { id: OPS.curveTo, numArgs: 6, variableArgs: false }; - t.v = { id: OPS.curveTo2, numArgs: 4, variableArgs: false }; - t.y = { id: OPS.curveTo3, numArgs: 4, variableArgs: false }; - t.h = { id: OPS.closePath, numArgs: 0, variableArgs: false }; - t.re = { id: OPS.rectangle, numArgs: 4, variableArgs: false }; - t.S = { id: OPS.stroke, numArgs: 0, variableArgs: false }; - t.s = { id: OPS.closeStroke, numArgs: 0, variableArgs: false }; - t.f = { id: OPS.fill, numArgs: 0, variableArgs: false }; - t.F = { id: OPS.fill, numArgs: 0, variableArgs: false }; - t["f*"] = { id: OPS.eoFill, numArgs: 0, variableArgs: false }; - t.B = { id: OPS.fillStroke, numArgs: 0, variableArgs: false }; - t["B*"] = { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false }; - t.b = { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false }; - t["b*"] = { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false }; - t.n = { id: OPS.endPath, numArgs: 0, variableArgs: false }; +class EvaluatorPreprocessor { + static get opMap() { + // Specifies properties for each command + // + // If variableArgs === true: [0, `numArgs`] expected + // If variableArgs === false: exactly `numArgs` expected + const getOPMap = getLookupTableFactory(function (t) { + // Graphic state + t.w = { id: OPS.setLineWidth, numArgs: 1, variableArgs: false }; + t.J = { id: OPS.setLineCap, numArgs: 1, variableArgs: false }; + t.j = { id: OPS.setLineJoin, numArgs: 1, variableArgs: false }; + t.M = { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false }; + t.d = { id: OPS.setDash, numArgs: 2, variableArgs: false }; + t.ri = { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false }; + t.i = { id: OPS.setFlatness, numArgs: 1, variableArgs: false }; + t.gs = { id: OPS.setGState, numArgs: 1, variableArgs: false }; + t.q = { id: OPS.save, numArgs: 0, variableArgs: false }; + t.Q = { id: OPS.restore, numArgs: 0, variableArgs: false }; + t.cm = { id: OPS.transform, numArgs: 6, variableArgs: false }; - // Clipping - t.W = { id: OPS.clip, numArgs: 0, variableArgs: false }; - t["W*"] = { id: OPS.eoClip, numArgs: 0, variableArgs: false }; + // Path + t.m = { id: OPS.moveTo, numArgs: 2, variableArgs: false }; + t.l = { id: OPS.lineTo, numArgs: 2, variableArgs: false }; + t.c = { id: OPS.curveTo, numArgs: 6, variableArgs: false }; + t.v = { id: OPS.curveTo2, numArgs: 4, variableArgs: false }; + t.y = { id: OPS.curveTo3, numArgs: 4, variableArgs: false }; + t.h = { id: OPS.closePath, numArgs: 0, variableArgs: false }; + t.re = { id: OPS.rectangle, numArgs: 4, variableArgs: false }; + t.S = { id: OPS.stroke, numArgs: 0, variableArgs: false }; + t.s = { id: OPS.closeStroke, numArgs: 0, variableArgs: false }; + t.f = { id: OPS.fill, numArgs: 0, variableArgs: false }; + t.F = { id: OPS.fill, numArgs: 0, variableArgs: false }; + t["f*"] = { id: OPS.eoFill, numArgs: 0, variableArgs: false }; + t.B = { id: OPS.fillStroke, numArgs: 0, variableArgs: false }; + t["B*"] = { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false }; + t.b = { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false }; + t["b*"] = { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false }; + t.n = { id: OPS.endPath, numArgs: 0, variableArgs: false }; - // Text - t.BT = { id: OPS.beginText, numArgs: 0, variableArgs: false }; - t.ET = { id: OPS.endText, numArgs: 0, variableArgs: false }; - t.Tc = { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false }; - t.Tw = { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false }; - t.Tz = { id: OPS.setHScale, numArgs: 1, variableArgs: false }; - t.TL = { id: OPS.setLeading, numArgs: 1, variableArgs: false }; - t.Tf = { id: OPS.setFont, numArgs: 2, variableArgs: false }; - t.Tr = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false }; - t.Ts = { id: OPS.setTextRise, numArgs: 1, variableArgs: false }; - t.Td = { id: OPS.moveText, numArgs: 2, variableArgs: false }; - t.TD = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false }; - t.Tm = { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false }; - t["T*"] = { id: OPS.nextLine, numArgs: 0, variableArgs: false }; - t.Tj = { id: OPS.showText, numArgs: 1, variableArgs: false }; - t.TJ = { id: OPS.showSpacedText, numArgs: 1, variableArgs: false }; - t["'"] = { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false }; - t['"'] = { - id: OPS.nextLineSetSpacingShowText, - numArgs: 3, - variableArgs: false, - }; + // Clipping + t.W = { id: OPS.clip, numArgs: 0, variableArgs: false }; + t["W*"] = { id: OPS.eoClip, numArgs: 0, variableArgs: false }; - // Type3 fonts - t.d0 = { id: OPS.setCharWidth, numArgs: 2, variableArgs: false }; - t.d1 = { - id: OPS.setCharWidthAndBounds, - numArgs: 6, - variableArgs: false, - }; + // Text + t.BT = { id: OPS.beginText, numArgs: 0, variableArgs: false }; + t.ET = { id: OPS.endText, numArgs: 0, variableArgs: false }; + t.Tc = { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false }; + t.Tw = { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false }; + t.Tz = { id: OPS.setHScale, numArgs: 1, variableArgs: false }; + t.TL = { id: OPS.setLeading, numArgs: 1, variableArgs: false }; + t.Tf = { id: OPS.setFont, numArgs: 2, variableArgs: false }; + t.Tr = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false }; + t.Ts = { id: OPS.setTextRise, numArgs: 1, variableArgs: false }; + t.Td = { id: OPS.moveText, numArgs: 2, variableArgs: false }; + t.TD = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false }; + t.Tm = { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false }; + t["T*"] = { id: OPS.nextLine, numArgs: 0, variableArgs: false }; + t.Tj = { id: OPS.showText, numArgs: 1, variableArgs: false }; + t.TJ = { id: OPS.showSpacedText, numArgs: 1, variableArgs: false }; + t["'"] = { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false }; + t['"'] = { + id: OPS.nextLineSetSpacingShowText, + numArgs: 3, + variableArgs: false, + }; - // Color - t.CS = { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false }; - t.cs = { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false }; - t.SC = { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true }; - t.SCN = { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true }; - t.sc = { id: OPS.setFillColor, numArgs: 4, variableArgs: true }; - t.scn = { id: OPS.setFillColorN, numArgs: 33, variableArgs: true }; - t.G = { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false }; - t.g = { id: OPS.setFillGray, numArgs: 1, variableArgs: false }; - t.RG = { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false }; - t.rg = { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false }; - t.K = { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false }; - t.k = { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false }; + // Type3 fonts + t.d0 = { id: OPS.setCharWidth, numArgs: 2, variableArgs: false }; + t.d1 = { + id: OPS.setCharWidthAndBounds, + numArgs: 6, + variableArgs: false, + }; - // Shading - t.sh = { id: OPS.shadingFill, numArgs: 1, variableArgs: false }; + // Color + t.CS = { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false }; + t.cs = { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false }; + t.SC = { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true }; + t.SCN = { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true }; + t.sc = { id: OPS.setFillColor, numArgs: 4, variableArgs: true }; + t.scn = { id: OPS.setFillColorN, numArgs: 33, variableArgs: true }; + t.G = { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false }; + t.g = { id: OPS.setFillGray, numArgs: 1, variableArgs: false }; + t.RG = { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false }; + t.rg = { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false }; + t.K = { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false }; + t.k = { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false }; - // Images - t.BI = { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false }; - t.ID = { id: OPS.beginImageData, numArgs: 0, variableArgs: false }; - t.EI = { id: OPS.endInlineImage, numArgs: 1, variableArgs: false }; + // Shading + t.sh = { id: OPS.shadingFill, numArgs: 1, variableArgs: false }; - // XObjects - t.Do = { id: OPS.paintXObject, numArgs: 1, variableArgs: false }; - t.MP = { id: OPS.markPoint, numArgs: 1, variableArgs: false }; - t.DP = { id: OPS.markPointProps, numArgs: 2, variableArgs: false }; - t.BMC = { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false }; - t.BDC = { - id: OPS.beginMarkedContentProps, - numArgs: 2, - variableArgs: false, - }; - t.EMC = { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false }; + // Images + t.BI = { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false }; + t.ID = { id: OPS.beginImageData, numArgs: 0, variableArgs: false }; + t.EI = { id: OPS.endInlineImage, numArgs: 1, variableArgs: false }; - // Compatibility - t.BX = { id: OPS.beginCompat, numArgs: 0, variableArgs: false }; - t.EX = { id: OPS.endCompat, numArgs: 0, variableArgs: false }; + // XObjects + t.Do = { id: OPS.paintXObject, numArgs: 1, variableArgs: false }; + t.MP = { id: OPS.markPoint, numArgs: 1, variableArgs: false }; + t.DP = { id: OPS.markPointProps, numArgs: 2, variableArgs: false }; + t.BMC = { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false }; + t.BDC = { + id: OPS.beginMarkedContentProps, + numArgs: 2, + variableArgs: false, + }; + t.EMC = { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false }; - // (reserved partial commands for the lexer) - t.BM = null; - t.BD = null; - t.true = null; - t.fa = null; - t.fal = null; - t.fals = null; - t.false = null; - t.nu = null; - t.nul = null; - t.null = null; - }); + // Compatibility + t.BX = { id: OPS.beginCompat, numArgs: 0, variableArgs: false }; + t.EX = { id: OPS.endCompat, numArgs: 0, variableArgs: false }; - const MAX_INVALID_PATH_OPS = 20; + // (reserved partial commands for the lexer) + t.BM = null; + t.BD = null; + t.true = null; + t.fa = null; + t.fal = null; + t.fals = null; + t.false = null; + t.nu = null; + t.nul = null; + t.null = null; + }); - // eslint-disable-next-line no-shadow - function EvaluatorPreprocessor(stream, xref, stateManager) { - this.opMap = getOPMap(); + return shadow(this, "opMap", getOPMap()); + } + + static get MAX_INVALID_PATH_OPS() { + return shadow(this, "MAX_INVALID_PATH_OPS", 20); + } + + constructor(stream, xref, stateManager) { // TODO(mduan): pass array of knownCommands rather than this.opMap // dictionary this.parser = new Parser({ - lexer: new Lexer(stream, this.opMap), + lexer: new Lexer(stream, EvaluatorPreprocessor.opMap), xref, }); this.stateManager = stateManager; @@ -3718,138 +3672,133 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { this._numInvalidPathOPS = 0; } - EvaluatorPreprocessor.prototype = { - get savedStatesDepth() { - return this.stateManager.stateStack.length; - }, + get savedStatesDepth() { + return this.stateManager.stateStack.length; + } - // |operation| is an object with two fields: - // - // - |fn| is an out param. - // - // - |args| is an inout param. On entry, it should have one of two values. - // - // - An empty array. This indicates that the caller is providing the - // array in which the args will be stored in. The caller should use - // this value if it can reuse a single array for each call to read(). - // - // - |null|. This indicates that the caller needs this function to create - // the array in which any args are stored in. If there are zero args, - // this function will leave |operation.args| as |null| (thus avoiding - // allocations that would occur if we used an empty array to represent - // zero arguments). Otherwise, it will replace |null| with a new array - // containing the arguments. The caller should use this value if it - // cannot reuse an array for each call to read(). - // - // These two modes are present because this function is very hot and so - // avoiding allocations where possible is worthwhile. - // - read: function EvaluatorPreprocessor_read(operation) { - var args = operation.args; - while (true) { - var obj = this.parser.getObj(); - if (obj instanceof Cmd) { - var cmd = obj.cmd; - // Check that the command is valid - var opSpec = this.opMap[cmd]; - if (!opSpec) { - warn(`Unknown command "${cmd}".`); + // |operation| is an object with two fields: + // + // - |fn| is an out param. + // + // - |args| is an inout param. On entry, it should have one of two values. + // + // - An empty array. This indicates that the caller is providing the + // array in which the args will be stored in. The caller should use + // this value if it can reuse a single array for each call to read(). + // + // - |null|. This indicates that the caller needs this function to create + // the array in which any args are stored in. If there are zero args, + // this function will leave |operation.args| as |null| (thus avoiding + // allocations that would occur if we used an empty array to represent + // zero arguments). Otherwise, it will replace |null| with a new array + // containing the arguments. The caller should use this value if it + // cannot reuse an array for each call to read(). + // + // These two modes are present because this function is very hot and so + // avoiding allocations where possible is worthwhile. + // + read(operation) { + var args = operation.args; + while (true) { + var obj = this.parser.getObj(); + if (obj instanceof Cmd) { + var cmd = obj.cmd; + // Check that the command is valid + var opSpec = EvaluatorPreprocessor.opMap[cmd]; + if (!opSpec) { + warn(`Unknown command "${cmd}".`); + continue; + } + + var fn = opSpec.id; + var numArgs = opSpec.numArgs; + var argsLength = args !== null ? args.length : 0; + + if (!opSpec.variableArgs) { + // Postscript commands can be nested, e.g. /F2 /GS2 gs 5.711 Tf + if (argsLength !== numArgs) { + var nonProcessedArgs = this.nonProcessedArgs; + while (argsLength > numArgs) { + nonProcessedArgs.push(args.shift()); + argsLength--; + } + while (argsLength < numArgs && nonProcessedArgs.length !== 0) { + if (args === null) { + args = []; + } + args.unshift(nonProcessedArgs.pop()); + argsLength++; + } + } + + if (argsLength < numArgs) { + const partialMsg = + `command ${cmd}: expected ${numArgs} args, ` + + `but received ${argsLength} args.`; + + // Incomplete path operators, in particular, can result in fairly + // chaotic rendering artifacts. Hence the following heuristics is + // used to error, rather than just warn, once a number of invalid + // path operators have been encountered (fixes bug1443140.pdf). + if ( + fn >= OPS.moveTo && + fn <= OPS.endPath && // Path operator + ++this._numInvalidPathOPS > + EvaluatorPreprocessor.MAX_INVALID_PATH_OPS + ) { + throw new FormatError(`Invalid ${partialMsg}`); + } + // If we receive too few arguments, it's not possible to execute + // the command, hence we skip the command. + warn(`Skipping ${partialMsg}`); + if (args !== null) { + args.length = 0; + } continue; } - - var fn = opSpec.id; - var numArgs = opSpec.numArgs; - var argsLength = args !== null ? args.length : 0; - - if (!opSpec.variableArgs) { - // Postscript commands can be nested, e.g. /F2 /GS2 gs 5.711 Tf - if (argsLength !== numArgs) { - var nonProcessedArgs = this.nonProcessedArgs; - while (argsLength > numArgs) { - nonProcessedArgs.push(args.shift()); - argsLength--; - } - while (argsLength < numArgs && nonProcessedArgs.length !== 0) { - if (args === null) { - args = []; - } - args.unshift(nonProcessedArgs.pop()); - argsLength++; - } - } - - if (argsLength < numArgs) { - const partialMsg = - `command ${cmd}: expected ${numArgs} args, ` + - `but received ${argsLength} args.`; - - // Incomplete path operators, in particular, can result in fairly - // chaotic rendering artifacts. Hence the following heuristics is - // used to error, rather than just warn, once a number of invalid - // path operators have been encountered (fixes bug1443140.pdf). - if ( - fn >= OPS.moveTo && - fn <= OPS.endPath && // Path operator - ++this._numInvalidPathOPS > MAX_INVALID_PATH_OPS - ) { - throw new FormatError(`Invalid ${partialMsg}`); - } - // If we receive too few arguments, it's not possible to execute - // the command, hence we skip the command. - warn(`Skipping ${partialMsg}`); - if (args !== null) { - args.length = 0; - } - continue; - } - } else if (argsLength > numArgs) { - info( - `Command ${cmd}: expected [0, ${numArgs}] args, ` + - `but received ${argsLength} args.` - ); - } - - // TODO figure out how to type-check vararg functions - this.preprocessCommand(fn, args); - - operation.fn = fn; - operation.args = args; - return true; + } else if (argsLength > numArgs) { + info( + `Command ${cmd}: expected [0, ${numArgs}] args, ` + + `but received ${argsLength} args.` + ); } - if (obj === EOF) { - return false; // no more commands + + // TODO figure out how to type-check vararg functions + this.preprocessCommand(fn, args); + + operation.fn = fn; + operation.args = args; + return true; + } + if (obj === EOF) { + return false; // no more commands + } + // argument + if (obj !== null) { + if (args === null) { + args = []; } - // argument - if (obj !== null) { - if (args === null) { - args = []; - } - args.push(obj); - if (args.length > 33) { - throw new FormatError("Too many arguments"); - } + args.push(obj); + if (args.length > 33) { + throw new FormatError("Too many arguments"); } } - }, + } + } - preprocessCommand: function EvaluatorPreprocessor_preprocessCommand( - fn, - args - ) { - switch (fn | 0) { - case OPS.save: - this.stateManager.save(); - break; - case OPS.restore: - this.stateManager.restore(); - break; - case OPS.transform: - this.stateManager.transform(args); - break; - } - }, - }; - return EvaluatorPreprocessor; -})(); + preprocessCommand(fn, args) { + switch (fn | 0) { + case OPS.save: + this.stateManager.save(); + break; + case OPS.restore: + this.stateManager.restore(); + break; + case OPS.transform: + this.stateManager.transform(args); + break; + } + } +} export { PartialEvaluator };