diff --git a/src/core/core.js b/src/core/core.js index 7138ce091..bff7ab5bd 100644 --- a/src/core/core.js +++ b/src/core/core.js @@ -233,8 +233,6 @@ var Page = (function PageClosure() { var self = this; - var textContentPromise = new LegacyPromise(); - var pdfManager = this.pdfManager; var contentStreamPromise = pdfManager.ensure(this, 'getContentStream', []); @@ -247,7 +245,7 @@ var Page = (function PageClosure() { var dataPromises = Promise.all([contentStreamPromise, resourcesPromise]); - dataPromises.then(function(data) { + return dataPromises.then(function(data) { var contentStream = data[0]; var partialEvaluator = new PartialEvaluator(pdfManager, self.xref, handler, self.pageIndex, @@ -255,12 +253,9 @@ var Page = (function PageClosure() { self.idCounters, self.fontCache); - var bidiTexts = partialEvaluator.getTextContent(contentStream, - self.resources); - textContentPromise.resolve(bidiTexts); + return partialEvaluator.getTextContent(contentStream, + self.resources); }); - - return textContentPromise; }, getAnnotationsData: function Page_getAnnotationsData() { diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 6e550fd03..936cc189e 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -21,16 +21,14 @@ MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise, LegacyPromise, RefSetCache, isRef, TextRenderingMode, CMapFactory, - OPS, UNSUPPORTED_FEATURES, UnsupportedManager */ + OPS, UNSUPPORTED_FEATURES, UnsupportedManager, NormalizedUnicodes, + IDENTITY_MATRIX, reverseIfRtl */ 'use strict'; var PartialEvaluator = (function PartialEvaluatorClosure() { function PartialEvaluator(pdfManager, xref, handler, pageIndex, uniquePrefix, idCounters, fontCache) { - this.state = new EvalState(); - this.stateStack = []; - this.pdfManager = pdfManager; this.xref = xref; this.handler = handler; @@ -96,7 +94,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { buildFormXObject: function PartialEvaluator_buildFormXObject(resources, xobj, smask, operatorList, - state) { + initialState) { var matrix = xobj.dict.get('Matrix'); var bbox = xobj.dict.get('BBox'); var group = xobj.dict.get('Group'); @@ -123,7 +121,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]); this.getOperatorList(xobj, (xobj.dict.get('Resources') || resources), - operatorList, state); + operatorList, initialState); operatorList.addOp(OPS.paintFormXObjectEnd, []); if (group) { @@ -221,15 +219,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, handleSMask: function PartialEvaluator_handleSmask(smask, resources, - operatorList) { + operatorList, + stateManager) { var smaskContent = smask.get('G'); var smaskOptions = { subtype: smask.get('S').name, backdrop: smask.get('BC') }; - this.buildFormXObject(resources, smaskContent, smaskOptions, - operatorList); + operatorList, stateManager.state.clone()); }, handleTilingType: @@ -250,7 +248,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { handleSetFont: function PartialEvaluator_handleSetFont(resources, fontArgs, fontRef, - operatorList) { + operatorList, state) { // TODO(mack): Not needed? var fontName; if (fontArgs) { @@ -260,7 +258,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var self = this; var font = this.loadFont(fontName, fontRef, this.xref, resources, operatorList); - this.state.font = font; + state.font = font; var loadedName = font.loadedName; if (!font.sent) { var fontData = font.translated.exportData(); @@ -276,10 +274,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return loadedName; }, - handleText: function PartialEvaluator_handleText(chars) { - var font = this.state.font.translated; + handleText: function PartialEvaluator_handleText(chars, state) { + var font = state.font.translated; var glyphs = font.charsToGlyphs(chars); - var isAddToPathSet = !!(this.state.textRenderingMode & + var isAddToPathSet = !!(state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG); if (font.data && (isAddToPathSet || PDFJS.disableFontFace)) { for (var i = 0; i < glyphs.length; i++) { @@ -302,7 +300,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, setGState: function PartialEvaluator_setGState(resources, gState, - operatorList, xref) { + operatorList, xref, + stateManager) { var self = this; // TODO(mack): This should be rewritten so that this function returns @@ -324,7 +323,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { break; case 'Font': var loadedName = self.handleSetFont(resources, null, value[0], - operatorList); + operatorList, + stateManager.state); operatorList.addDependency(loadedName); gStateObj.push([key, [loadedName, value[1]]]); break; @@ -338,7 +338,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } var dict = xref.fetchIfRef(value); if (isDict(dict)) { - self.handleSMask(dict, resources, operatorList); + self.handleSMask(dict, resources, operatorList, stateManager); gStateObj.push([key, true]); } else { warn('Unsupported SMask type'); @@ -495,7 +495,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { getOperatorList: function PartialEvaluator_getOperatorList(stream, resources, operatorList, - evaluatorState) { + initialState) { var self = this; var xref = this.xref; @@ -507,10 +507,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { resources = (resources || Dict.empty); var xobjs = (resources.get('XObject') || Dict.empty); var patterns = (resources.get('Pattern') || Dict.empty); - var preprocessor = new EvaluatorPreprocessor(stream, xref); - if (evaluatorState) { - preprocessor.setState(evaluatorState); - } + var stateManager = new StateManager(initialState || new EvalState()); + var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); var promise = new LegacyPromise(); var operation; @@ -570,9 +568,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { 'XObject should have a Name subtype'); if ('Form' == type.name) { + stateManager.save(); self.buildFormXObject(resources, xobj, null, operatorList, - preprocessor.getState()); + stateManager.state.clone()); args = []; + stateManager.restore(); continue; } else if ('Image' == type.name) { self.buildPaintImageXObject(resources, xobj, false, @@ -587,7 +587,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { case OPS.setFont: // eagerly collect all fonts var loadedName = self.handleSetFont(resources, args, null, - operatorList); + operatorList, + stateManager.state); operatorList.addDependency(loadedName); args[0] = loadedName; break; @@ -602,37 +603,26 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList, cacheKey, imageCache); args = []; continue; - case OPS.save: - var old = this.state; - this.stateStack.push(this.state); - this.state = old.clone(); - break; - case OPS.restore: - var prev = this.stateStack.pop(); - if (prev) { - this.state = prev; - } - break; case OPS.showText: - args[0] = this.handleText(args[0]); + args[0] = this.handleText(args[0], stateManager.state); break; case OPS.showSpacedText: var arr = args[0]; var arrLength = arr.length; for (var i = 0; i < arrLength; ++i) { if (isString(arr[i])) { - arr[i] = this.handleText(arr[i]); + arr[i] = this.handleText(arr[i], stateManager.state); } } break; case OPS.nextLineShowText: - args[0] = this.handleText(args[0]); + args[0] = this.handleText(args[0], stateManager.state); break; case OPS.nextLineSetSpacingShowText: - args[2] = this.handleText(args[2]); + args[2] = this.handleText(args[2], stateManager.state); break; case OPS.setTextRenderingMode: - this.state.textRenderingMode = args[0]; + stateManager.state.textRenderingMode = args[0]; break; // Parse the ColorSpace data to a raw format. case OPS.setFillColorSpace: @@ -665,7 +655,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } var gState = extGState.get(dictName.name); - self.setGState(resources, gState, operatorList, xref); + self.setGState(resources, gState, operatorList, xref, + stateManager); args = []; continue; } @@ -682,40 +673,165 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, getTextContent: function PartialEvaluator_getTextContent(stream, resources, - textState) { + stateManager) { - textState = (textState || new TextState()); + stateManager = (stateManager || new StateManager(new TextState())); - var bidiTexts = []; + var textContent = { + items: [], + styles: Object.create(null) + }; + var bidiTexts = textContent.items; var SPACE_FACTOR = 0.35; var MULTI_SPACE_FACTOR = 1.5; var self = this; var xref = this.xref; - function handleSetFont(fontName, fontRef) { - return self.loadFont(fontName, fontRef, xref, resources, null); - } - resources = (xref.fetchIfRef(resources) || Dict.empty); + // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. var xobjs = null; var xobjsCache = {}; - var preprocessor = new EvaluatorPreprocessor(stream, xref); + var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); var res = resources; - var chunkBuf = []; - var font = null; - var charSpace = 0, wordSpace = 0; var operation; + var textState; + + function newTextChunk() { + var font = textState.font; + if (!(font.loadedName in textContent.styles)) { + textContent.styles[font.loadedName] = { + fontFamily: font.fallbackName, + ascent: font.ascent, + descent: font.descent, + vertical: font.vertical + }; + } + return { + str: '', + dir: null, + width: 0, + height: 0, + transform: null, + fontName: font.loadedName + }; + } + + function runBidi(textChunk) { + var bidiResult = PDFJS.bidi(textChunk.str, -1, textState.font.vertical); + textChunk.str = bidiResult.str; + textChunk.dir = bidiResult.dir; + return textChunk; + } + + function handleSetFont(fontName, fontRef) { + var font = textState.font = self.loadFont(fontName, fontRef, xref, + resources, null).translated; + textState.fontMatrix = font.fontMatrix ? font.fontMatrix : + FONT_IDENTITY_MATRIX; + } + + function buildTextGeometry(chars, textChunk) { + var font = textState.font; + textChunk = textChunk || newTextChunk(); + if (!textChunk.transform) { + // 9.4.4 Text Space Details + var tsm = [textState.fontSize * textState.textHScale, 0, + 0, textState.fontSize, + 0, textState.textRise]; + var trm = textChunk.transform = Util.transform(textState.ctm, + Util.transform(textState.textMatrix, tsm)); + if (!font.vertical) { + textChunk.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]); + } else { + textChunk.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]); + } + } + var width = 0; + var height = 0; + var glyphs = font.charsToGlyphs(chars); + var defaultVMetrics = font.defaultVMetrics; + for (var i = 0; i < glyphs.length; i++) { + var glyph = glyphs[i]; + if (!glyph) { // Previous glyph was a space. + continue; + } + var vMetricX = null; + var vMetricY = null; + var glyphWidth = null; + if (font.vertical) { + if (glyph.vmetric) { + glyphWidth = glyph.vmetric[0]; + vMetricX = glyph.vmetric[1]; + vMetricY = glyph.vmetric[2]; + } else { + glyphWidth = glyph.width; + vMetricX = glyph.width * 0.5; + vMetricY = defaultVMetrics[2]; + } + } else { + glyphWidth = glyph.width; + } + + var glyphUnicode = glyph.unicode; + if (glyphUnicode in NormalizedUnicodes) { + glyphUnicode = NormalizedUnicodes[glyphUnicode]; + } + glyphUnicode = reverseIfRtl(glyphUnicode); + + // The following will calculate the x and y of the individual glyphs. + // if (font.vertical) { + // tsm[4] -= vMetricX * Math.abs(textState.fontSize) * + // textState.fontMatrix[0]; + // tsm[5] -= vMetricY * textState.fontSize * + // textState.fontMatrix[0]; + // } + // var trm = Util.transform(textState.textMatrix, tsm); + // var pt = Util.applyTransform([trm[4], trm[5]], textState.ctm); + // var x = pt[0]; + // var y = pt[1]; + + var tx = 0; + var ty = 0; + if (!font.vertical) { + var w0 = glyphWidth * textState.fontMatrix[0]; + tx = (w0 * textState.fontSize + textState.charSpacing) * + textState.textHScale; + width += tx; + } else { + var w1 = glyphWidth * textState.fontMatrix[0]; + ty = w1 * textState.fontSize + textState.charSpacing; + height += ty; + } + textState.translateTextMatrix(tx, ty); + + textChunk.str += glyphUnicode; + } + + var a = textState.textLineMatrix[0]; + var b = textState.textLineMatrix[1]; + var scaleLineX = Math.sqrt(a * a + b * b); + a = textState.ctm[0]; + b = textState.ctm[1]; + var scaleCtmX = Math.sqrt(a * a + b * b); + if (!font.vertical) { + textChunk.width += width * scaleCtmX * scaleLineX; + } else { + textChunk.height += Math.abs(height * scaleCtmX * scaleLineX); + } + return textChunk; + } + while ((operation = preprocessor.read())) { + textState = stateManager.state; var fn = operation.fn; var args = operation.args; switch (fn) { - // TODO: Add support for SAVE/RESTORE and XFORM here. case OPS.setFont: - font = handleSetFont(args[0].name).translated; + handleSetFont(args[0].name); textState.fontSize = args[1]; break; case OPS.setTextRise: @@ -728,64 +844,79 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { textState.leading = args[0]; break; case OPS.moveText: - textState.translateTextMatrix(args[0], args[1]); + textState.translateTextLineMatrix(args[0], args[1]); + textState.textMatrix = textState.textLineMatrix.slice(); break; case OPS.setLeadingMoveText: textState.leading = -args[1]; - textState.translateTextMatrix(args[0], args[1]); + textState.translateTextLineMatrix(args[0], args[1]); + textState.textMatrix = textState.textLineMatrix.slice(); break; case OPS.nextLine: - textState.translateTextMatrix(0, -textState.leading); + textState.carriageReturn(); break; case OPS.setTextMatrix: - textState.setTextMatrix(args[0], args[1], - args[2], args[3], args[4], args[5]); + textState.setTextMatrix(args[0], args[1], args[2], args[3], + args[4], args[5]); + textState.setTextLineMatrix(args[0], args[1], args[2], args[3], + args[4], args[5]); break; case OPS.setCharSpacing: - charSpace = args[0]; + textState.charSpace = args[0]; break; case OPS.setWordSpacing: - wordSpace = args[0]; + textState.wordSpace = args[0]; break; case OPS.beginText: - textState.initialiseTextObj(); + textState.textMatrix = IDENTITY_MATRIX.slice(); + textState.textLineMatrix = IDENTITY_MATRIX.slice(); break; case OPS.showSpacedText: var items = args[0]; + var textChunk = newTextChunk(); for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { - chunkBuf.push(fontCharsToUnicode(items[j], font)); - } else if (items[j] < 0 && font.spaceWidth > 0) { - var fakeSpaces = -items[j] / font.spaceWidth; - if (fakeSpaces > MULTI_SPACE_FACTOR) { - fakeSpaces = Math.round(fakeSpaces); - while (fakeSpaces--) { - chunkBuf.push(' '); + buildTextGeometry(items[j], textChunk); + } else { + var val = items[j] / 1000; + if (!textState.font.vertical) { + var offset = -val * textState.fontSize * textState.textHScale; + textState.translateTextMatrix(offset, 0); + textChunk.width += offset; + } else { + var offset = -val * textState.fontSize; + textState.translateTextMatrix(0, offset); + textChunk.height += offset; + } + if (items[j] < 0 && textState.font.spaceWidth > 0) { + var fakeSpaces = -items[j] / textState.font.spaceWidth; + if (fakeSpaces > MULTI_SPACE_FACTOR) { + fakeSpaces = Math.round(fakeSpaces); + while (fakeSpaces--) { + textChunk.str += ' '; + } + } else if (fakeSpaces > SPACE_FACTOR) { + textChunk.str += ' '; } - } else if (fakeSpaces > SPACE_FACTOR) { - chunkBuf.push(' '); } } } + bidiTexts.push(runBidi(textChunk)); break; case OPS.showText: - chunkBuf.push(fontCharsToUnicode(args[0], font)); + bidiTexts.push(runBidi(buildTextGeometry(args[0]))); break; case OPS.nextLineShowText: - // For search, adding a extra white space for line breaks would be - // better here, but that causes too much spaces in the - // text-selection divs. - chunkBuf.push(fontCharsToUnicode(args[0], font)); + textState.carriageReturn(); + bidiTexts.push(runBidi(buildTextGeometry(args[0]))); break; case OPS.nextLineSetSpacingShowText: - // Note comment in "'" - chunkBuf.push(fontCharsToUnicode(args[2], font)); + textState.wordSpacing = args[0]; + textState.charSpacing = args[1]; + textState.carriageReturn(); + bidiTexts.push(runBidi(buildTextGeometry(args[2]))); break; case OPS.paintXObject: - // Set the chunk such that the following if won't add something - // to the state. - chunkBuf.length = 0; - if (args[0].code) { break; } @@ -797,7 +928,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var name = args[0].name; if (xobjsCache.key === name) { if (xobjsCache.texts) { - Util.concatenateToArray(bidiTexts, xobjsCache.texts); + Util.concatenateToArray(bidiTexts, xobjsCache.texts.items); + Util.extendObj(textContent.styles, xobjsCache.texts.styles); } break; } @@ -818,11 +950,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { break; } - var formTexts = this.getTextContent(xobj, - (xobj.dict.get('Resources') || resources), textState); + stateManager.save(); + var matrix = xobj.dict.get('Matrix'); + if (isArray(matrix) && matrix.length === 6) { + stateManager.transform(matrix); + } + + var formTextContent = this.getTextContent( + xobj, + xobj.dict.get('Resources') || resources, + stateManager + ); + Util.concatenateToArray(bidiTexts, formTextContent.items); + Util.extendObj(textContent.styles, formTextContent.styles); + stateManager.restore(); + xobjsCache.key = name; - xobjsCache.texts = formTexts; - Util.concatenateToArray(bidiTexts, formTexts); + xobjsCache.texts = formTextContent; break; case OPS.setGState: var dictName = args[0]; @@ -836,36 +980,14 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { for (var i = 0; i < gsState.length; i++) { if (gsState[i] === 'Font') { - font = handleSetFont(args[0].name).translated; + handleSetFont(args[0].name); } } break; - } + } // switch + } // while - if (chunkBuf.length > 0) { - var chunk = chunkBuf.join(''); - var bidiText = PDFJS.bidi(chunk, -1, font.vertical); - var renderParams = textState.calcRenderParams(preprocessor.ctm); - var fontHeight = textState.fontSize * renderParams.vScale; - var fontAscent = (font.ascent ? (font.ascent * fontHeight) : - (font.descent ? ((1 + font.descent) * fontHeight) : fontHeight)); - bidiText.x = renderParams.renderMatrix[4] - (fontAscent * - Math.sin(renderParams.angle)); - bidiText.y = renderParams.renderMatrix[5] + (fontAscent * - Math.cos(renderParams.angle)); - if (bidiText.dir == 'ttb') { - bidiText.x += renderParams.vScale / 2; - bidiText.y -= renderParams.vScale; - } - bidiText.angle = renderParams.angle; - bidiText.size = fontHeight; - bidiTexts.push(bidiText); - - chunkBuf.length = 0; - } - } - - return bidiTexts; + return textContent; }, extractDataStructures: function @@ -1467,64 +1589,89 @@ var OperatorList = (function OperatorListClosure() { return OperatorList; })(); +var StateManager = (function StateManagerClosure() { + function StateManager(initialState) { + this.state = initialState; + this.stateStack = []; + } + StateManager.prototype = { + save: function () { + var old = this.state; + this.stateStack.push(this.state); + this.state = old.clone(); + }, + restore: function () { + var prev = this.stateStack.pop(); + if (prev) { + this.state = prev; + } + }, + transform: function (args) { + this.state.ctm = Util.transform(this.state.ctm, args); + } + }; + return StateManager; +})(); + var TextState = (function TextStateClosure() { function TextState() { + this.ctm = new Float32Array(IDENTITY_MATRIX); this.fontSize = 0; - this.textMatrix = [1, 0, 0, 1, 0, 0]; - this.stateStack = []; - //textState variables + this.font = null; + this.fontMatrix = FONT_IDENTITY_MATRIX; + this.textMatrix = IDENTITY_MATRIX.slice(); + this.textLineMatrix = IDENTITY_MATRIX.slice(); + this.charSpacing = 0; + this.wordSpacing = 0; this.leading = 0; this.textHScale = 1; this.textRise = 0; } TextState.prototype = { - initialiseTextObj: function TextState_initialiseTextObj() { - var m = this.textMatrix; - m[0] = 1; m[1] = 0; m[2] = 0; m[3] = 1; m[4] = 0; m[5] = 0; - }, setTextMatrix: function TextState_setTextMatrix(a, b, c, d, e, f) { var m = this.textMatrix; m[0] = a; m[1] = b; m[2] = c; m[3] = d; m[4] = e; m[5] = f; }, + setTextLineMatrix: function TextState_setTextMatrix(a, b, c, d, e, f) { + var m = this.textLineMatrix; + m[0] = a; m[1] = b; m[2] = c; m[3] = d; m[4] = e; m[5] = f; + }, translateTextMatrix: function TextState_translateTextMatrix(x, y) { var m = this.textMatrix; m[4] = m[0] * x + m[2] * y + m[4]; m[5] = m[1] * x + m[3] * y + m[5]; }, - calcRenderParams: function TextState_calcRenderingParams(cm) { - var tm = this.textMatrix; - var a = this.fontSize; - var b = a * this.textHScale; - var c = this.textRise; - var vScale = Math.sqrt((tm[2] * tm[2]) + (tm[3] * tm[3])); - var angle = Math.atan2(tm[1], tm[0]); - var m0 = tm[0] * cm[0] + tm[1] * cm[2]; - var m1 = tm[0] * cm[1] + tm[1] * cm[3]; - var m2 = tm[2] * cm[0] + tm[3] * cm[2]; - var m3 = tm[2] * cm[1] + tm[3] * cm[3]; - var m4 = tm[4] * cm[0] + tm[5] * cm[2] + cm[4]; - var m5 = tm[4] * cm[1] + tm[5] * cm[3] + cm[5]; - var renderMatrix = [ - b * m0, - b * m1, - a * m2, - a * m3, - c * m2 + m4, - c * m3 + m5 - ]; - return { - renderMatrix: renderMatrix, - vScale: vScale, - angle: angle - }; + translateTextLineMatrix: function TextState_translateTextMatrix(x, y) { + var m = this.textLineMatrix; + m[4] = m[0] * x + m[2] * y + m[4]; + m[5] = m[1] * x + m[3] * y + m[5]; }, + calcRenderMatrix: function TextState_calcRendeMatrix(ctm) { + // 9.4.4 Text Space Details + var tsm = [this.fontSize * this.textHScale, 0, + 0, this.fontSize, + 0, this.textRise]; + return Util.transform(ctm, Util.transform(this.textMatrix, tsm)); + }, + carriageReturn: function TextState_carriageReturn() { + this.translateTextLineMatrix(0, -this.leading); + this.textMatrix = this.textLineMatrix.slice(); + }, + clone: function TextState_clone() { + var clone = Object.create(this); + clone.textMatrix = this.textMatrix.slice(); + clone.textLineMatrix = this.textLineMatrix.slice(); + clone.fontMatrix = this.fontMatrix.slice(); + return clone; + } }; return TextState; })(); var EvalState = (function EvalStateClosure() { function EvalState() { + this.ctm = new Float32Array(IDENTITY_MATRIX); this.font = null; this.textRenderingMode = TextRenderingMode.FILL; } @@ -1650,17 +1797,16 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessor() { 'null': null }; - function EvaluatorPreprocessor(stream, xref) { + function EvaluatorPreprocessor(stream, xref, stateManager) { // TODO(mduan): pass array of knownCommands rather than OP_MAP // dictionary this.parser = new Parser(new Lexer(stream, OP_MAP), false, xref); - this.ctm = new Float32Array([1, 0, 0, 1, 0, 0]); - this.savedStates = []; + this.stateManager = stateManager; } EvaluatorPreprocessor.prototype = { get savedStatesDepth() { - return this.savedStates.length; + return this.stateManager.stateStack.length; }, read: function EvaluatorPreprocessor_read() { @@ -1717,38 +1863,17 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessor() { } }, - getState: function EvaluatorPreprocessor_getState() { - return { - ctm: this.ctm - }; - }, - - setState: function EvaluatorPreprocessor_setState(state) { - this.ctm = state.ctm; - }, - preprocessCommand: function EvaluatorPreprocessor_preprocessCommand(fn, args) { switch (fn | 0) { case OPS.save: - this.savedStates.push(this.getState()); + this.stateManager.save(); break; case OPS.restore: - var previousState = this.savedStates.pop(); - if (previousState) { - this.setState(previousState); - } + this.stateManager.restore(); break; case OPS.transform: - var ctm = this.ctm; - var m = new Float32Array(6); - m[0] = ctm[0] * args[0] + ctm[2] * args[1]; - m[1] = ctm[1] * args[0] + ctm[3] * args[1]; - m[2] = ctm[0] * args[2] + ctm[2] * args[3]; - m[3] = ctm[1] * args[2] + ctm[3] * args[3]; - m[4] = ctm[0] * args[4] + ctm[2] * args[5] + ctm[4]; - m[5] = ctm[1] * args[4] + ctm[3] * args[5] + ctm[5]; - this.ctm = m; + this.stateManager.transform(args); break; } } diff --git a/src/core/fonts.js b/src/core/fonts.js index ffcd03095..3da4cae21 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -2102,23 +2102,6 @@ function reverseIfRtl(chars) { return s; } -function fontCharsToUnicode(charCodes, font) { - var glyphs = font.charsToGlyphs(charCodes); - var result = ''; - for (var i = 0, ii = glyphs.length; i < ii; i++) { - var glyph = glyphs[i]; - if (!glyph) { - continue; - } - var glyphUnicode = glyph.unicode; - if (glyphUnicode in NormalizedUnicodes) { - glyphUnicode = NormalizedUnicodes[glyphUnicode]; - } - result += reverseIfRtl(glyphUnicode); - } - return result; -} - function adjustWidths(properties) { if (properties.fontMatrix[0] === FONT_IDENTITY_MATRIX[0]) { return; diff --git a/src/display/api.js b/src/display/api.js index 320f83d4b..aff296f64 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -339,16 +339,34 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() { return PDFDocumentProxy; })(); +/** + * Page text content. + * + * @typedef {Object} TextContent + * @property {array} items - array of {@link TextItem} + * @property {Object} styles - {@link TextStyles} objects, indexed by font + * name. + */ + /** * Page text content part. * - * @typedef {Object} BidiText + * @typedef {Object} TextItem * @property {string} str - text content. * @property {string} dir - text direction: 'ttb', 'ltr' or 'rtl'. - * @property {number} x - x position of the text on the page. - * @property {number} y - y position of the text on the page. - * @property {number} angle - text rotation. - * @property {number} size - font size. + * @property {array} transform - transformation matrix. + * @property {number} width - width in device space. + * @property {number} height - height in device space. + * @property {string} fontName - font name used by pdf.js for converted font. + */ + +/** + * Text style + * @typedef {Object} TextStyle + * @property {number} ascent - font ascent. + * @property {number} descent - font descent. + * @property {boolean} vertical - text is in vertical mode. + * @property {string} fontFamily - possible font family */ /** @@ -522,8 +540,8 @@ var PDFPageProxy = (function PDFPageProxyClosure() { return renderTask; }, /** - * @return {Promise} That is resolved with the array of {@link BidiText} - * objects that represent the page text content. + * @return {Promise} That is resolved a {@link TextContent} + * object that represent the page text content. */ getTextContent: function PDFPageProxy_getTextContent() { var promise = new PDFJS.LegacyPromise(); @@ -1210,8 +1228,7 @@ var InternalRenderTask = (function InternalRenderTaskClosure() { var params = this.params; this.gfx = new CanvasGraphics(params.canvasContext, this.commonObjs, - this.objs, params.textLayer, - params.imageLayer); + this.objs, params.imageLayer); this.gfx.beginDrawing(params.viewport, transparency); this.operatorListIdx = 0; diff --git a/src/display/canvas.js b/src/display/canvas.js index bf247df91..55d11dcf5 100644 --- a/src/display/canvas.js +++ b/src/display/canvas.js @@ -402,7 +402,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { // before it stops and shedules a continue of execution. var EXECUTION_TIME = 15; - function CanvasGraphics(canvasCtx, commonObjs, objs, textLayer, imageLayer) { + function CanvasGraphics(canvasCtx, commonObjs, objs, imageLayer) { this.ctx = canvasCtx; this.current = new CanvasExtraState(); this.stateStack = []; @@ -412,7 +412,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { this.xobjs = null; this.commonObjs = commonObjs; this.objs = objs; - this.textLayer = textLayer; this.imageLayer = imageLayer; this.groupStack = []; this.processingType3 = null; @@ -718,9 +717,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { this.baseTransform = this.ctx.mozCurrentTransform.slice(); - if (this.textLayer) { - this.textLayer.beginLayout(); - } if (this.imageLayer) { this.imageLayer.beginLayout(); } @@ -802,9 +798,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { CachedCanvases.clear(); WebGLUtils.clear(); - if (this.textLayer) { - this.textLayer.endLayout(); - } if (this.imageLayer) { this.imageLayer.endLayout(); } @@ -1234,33 +1227,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.scale(-current.textHScale, 1); } }, - createTextGeometry: function CanvasGraphics_createTextGeometry() { - var geometry = {}; - var ctx = this.ctx; - var font = this.current.font; - var ctxMatrix = ctx.mozCurrentTransform; - var a = ctxMatrix[0], b = ctxMatrix[1], c = ctxMatrix[2]; - var d = ctxMatrix[3], e = ctxMatrix[4], f = ctxMatrix[5]; - var sx = (a >= 0) ? - Math.sqrt((a * a) + (b * b)) : -Math.sqrt((a * a) + (b * b)); - var sy = (d >= 0) ? - Math.sqrt((c * c) + (d * d)) : -Math.sqrt((c * c) + (d * d)); - var angle = Math.atan2(b, a); - var x = e; - var y = f; - geometry.x = x; - geometry.y = y; - geometry.hScale = sx; - geometry.vScale = sy; - geometry.angle = angle; - geometry.spaceWidth = font.spaceWidth; - geometry.fontName = font.loadedName; - geometry.fontFamily = font.fallbackName; - geometry.fontSize = this.current.fontSize; - geometry.ascent = font.ascent; - geometry.descent = font.descent; - return geometry; - }, paintChar: function (character, x, y) { var ctx = this.ctx; @@ -1332,7 +1298,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { return shadow(this, 'isFontSubpixelAAEnabled', enabled); }, - showText: function CanvasGraphics_showText(glyphs, skipTextSelection) { + showText: function CanvasGraphics_showText(glyphs) { var ctx = this.ctx; var current = this.current; var font = current.font; @@ -1343,24 +1309,11 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var textHScale = current.textHScale * current.fontDirection; var fontMatrix = current.fontMatrix || FONT_IDENTITY_MATRIX; var glyphsLength = glyphs.length; - var textLayer = this.textLayer; - var geom; - var textSelection = textLayer && !skipTextSelection ? true : false; - var canvasWidth = 0.0; var vertical = font.vertical; var defaultVMetrics = font.defaultVMetrics; if (fontSize === 0) { - if (textSelection) { - geom = this.createTextGeometry(); - geom.canvasWidth = canvasWidth; - if (vertical) { - var VERTICAL_TEXT_ROTATION = Math.PI / 2; - geom.angle += VERTICAL_TEXT_ROTATION; - } - this.textLayer.appendText(geom); - } - return canvasWidth; + return; } // Type3 fonts - each glyph is a "mini-PDF" @@ -1371,12 +1324,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.scale(textHScale, 1); - if (textSelection) { - this.save(); - ctx.scale(1, -1); - geom = this.createTextGeometry(); - this.restore(); - } for (var i = 0; i < glyphsLength; ++i) { var glyph = glyphs[i]; @@ -1400,8 +1347,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.translate(width, 0); current.x += width * textHScale; - - canvasWidth += width; } ctx.restore(); this.processingType3 = null; @@ -1418,10 +1363,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { lineWidth /= scale; } - if (textSelection) { - geom = this.createTextGeometry(); - } - if (fontSizeScale != 1.0) { ctx.scale(fontSizeScale, fontSizeScale); lineWidth /= fontSizeScale; @@ -1485,8 +1426,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { x += charWidth; - canvasWidth += charWidth; - if (restoreNeeded) { ctx.restore(); } @@ -1498,17 +1437,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { } ctx.restore(); } - - if (textSelection) { - geom.canvasWidth = canvasWidth; - if (vertical) { - var VERTICAL_TEXT_ROTATION = Math.PI / 2; - geom.angle += VERTICAL_TEXT_ROTATION; - } - this.textLayer.appendText(geom); - } - - return canvasWidth; }, showSpacedText: function CanvasGraphics_showSpacedText(arr) { var ctx = this.ctx; @@ -1518,19 +1446,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { // TJ array's number is independent from fontMatrix var textHScale = current.textHScale * 0.001 * current.fontDirection; var arrLength = arr.length; - var textLayer = this.textLayer; - var geom; - var canvasWidth = 0.0; - var textSelection = textLayer ? true : false; var vertical = font.vertical; - var spacingAccumulator = 0; - - if (textSelection) { - ctx.save(); - this.applyTextTransforms(); - geom = this.createTextGeometry(); - ctx.restore(); - } for (var i = 0; i < arrLength; ++i) { var e = arr[i]; @@ -1542,27 +1458,10 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { current.x += spacingLength; } - if (textSelection) { - spacingAccumulator += spacingLength; - } } else { - var shownCanvasWidth = this.showText(e, true); - - if (textSelection) { - canvasWidth += spacingAccumulator + shownCanvasWidth; - spacingAccumulator = 0; - } + this.showText(e); } } - - if (textSelection) { - geom.canvasWidth = canvasWidth; - if (vertical) { - var VERTICAL_TEXT_ROTATION = Math.PI / 2; - geom.angle += VERTICAL_TEXT_ROTATION; - } - this.textLayer.appendText(geom); - } }, nextLineShowText: function CanvasGraphics_nextLineShowText(text) { this.nextLine(); diff --git a/test/driver.js b/test/driver.js index f171c3614..ccc38f816 100644 --- a/test/driver.js +++ b/test/driver.js @@ -203,38 +203,39 @@ function SimpleTextLayerBuilder(ctx, viewport) { this.textCounter = 0; } SimpleTextLayerBuilder.prototype = { - beginLayout: function SimpleTextLayerBuilder_BeginLayout() { - this.ctx.save(); - }, - endLayout: function SimpleTextLayerBuilder_EndLayout() { - this.ctx.restore(); - }, - appendText: function SimpleTextLayerBuilder_AppendText(geom) { + appendText: function SimpleTextLayerBuilder_AppendText(geom, styles) { + var style = styles[geom.fontName]; var ctx = this.ctx, viewport = this.viewport; - // vScale and hScale already contain the scaling to pixel units - var fontHeight = geom.fontSize * Math.abs(geom.vScale); - var fontAscent = (geom.ascent ? geom.ascent * fontHeight : - (geom.descent ? (1 + geom.descent) * fontHeight : fontHeight)); + var tx = PDFJS.Util.transform(this.viewport.transform, geom.transform); + var angle = Math.atan2(tx[1], tx[0]); + var fontHeight = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3])); + var fontAscent = (style.ascent ? style.ascent * fontHeight : + (style.descent ? (1 + style.descent) * fontHeight : fontHeight)); ctx.save(); ctx.beginPath(); ctx.strokeStyle = 'red'; ctx.fillStyle = 'yellow'; - ctx.translate(geom.x + (fontAscent * Math.sin(geom.angle)), - geom.y - (fontAscent * Math.cos(geom.angle))); - ctx.rotate(geom.angle); - ctx.rect(0, 0, geom.canvasWidth * Math.abs(geom.hScale), fontHeight); + ctx.translate(tx[4] + (fontAscent * Math.sin(angle)), + tx[5] - (fontAscent * Math.cos(angle))); + ctx.rotate(angle); + ctx.rect(0, 0, geom.width * viewport.scale, geom.height * viewport.scale); ctx.stroke(); ctx.fill(); ctx.restore(); - var textContent = this.textContent[this.textCounter].str; - ctx.font = fontHeight + 'px ' + geom.fontFamily; + ctx.font = fontHeight + 'px ' + style.fontFamily; ctx.fillStyle = 'black'; - ctx.fillText(textContent, geom.x, geom.y); + ctx.fillText(geom.str, tx[4], tx[5]); this.textCounter++; }, setTextContent: function SimpleTextLayerBuilder_SetTextContent(textContent) { - this.textContent = textContent; + this.ctx.save(); + var textItems = textContent.items; + for (var i = 0; i < textItems.length; i++) { + this.appendText(textItems[i], textContent.styles); + } + + this.ctx.restore(); } }; diff --git a/web/page_view.js b/web/page_view.js index ac693078a..d961079e4 100644 --- a/web/page_view.js +++ b/web/page_view.js @@ -485,8 +485,8 @@ var PageView = function pageView(container, id, scale, if (!PDFJS.disableTextLayer) { textLayerDiv = document.createElement('div'); textLayerDiv.className = 'textLayer'; - textLayerDiv.style.width = canvas.width + 'px'; - textLayerDiv.style.height = canvas.height + 'px'; + textLayerDiv.style.width = canvas.style.width; + textLayerDiv.style.height = canvas.style.height; div.appendChild(textLayerDiv); } var textLayer = this.textLayer = @@ -503,14 +503,6 @@ var PageView = function pageView(container, id, scale, if (outputScale.scaled) { ctx.scale(outputScale.sx, outputScale.sy); } - if (outputScale.scaled && textLayerDiv) { - var cssScale = 'scale(' + (1 / outputScale.sx) + ', ' + - (1 / outputScale.sy) + ')'; - CustomStyle.setProp('transform' , textLayerDiv, cssScale); - CustomStyle.setProp('transformOrigin' , textLayerDiv, '0% 0%'); - textLayerDiv.dataset._scaleX = outputScale.sx; - textLayerDiv.dataset._scaleY = outputScale.sy; - } // Rendering area @@ -600,20 +592,19 @@ var PageView = function pageView(container, id, scale, this.renderTask.promise.then( function pdfPageRenderCallback() { pageViewDrawCallback(null); + if (textLayer) { + self.getTextContent().then( + function textContentResolved(textContent) { + textLayer.setTextContent(textContent); + } + ); + } }, function pdfPageRenderError(error) { pageViewDrawCallback(error); } ); - if (textLayer) { - this.getTextContent().then( - function textContentResolved(textContent) { - textLayer.setTextContent(textContent); - } - ); - } - setupAnnotations(div, pdfPage, this.viewport); div.setAttribute('data-loaded', true); }; diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 337622c8f..87c8f3935 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -145,11 +145,12 @@ var PDFFindController = { var self = this; function extractPageText(pageIndex) { self.pdfPageSource.pages[pageIndex].getTextContent().then( - function textContentResolved(bidiTexts) { + function textContentResolved(textContent) { + var textItems = textContent.items; var str = ''; - for (var i = 0; i < bidiTexts.length; i++) { - str += bidiTexts[i].str; + for (var i = 0; i < textItems.length; i++) { + str += textItems[i].str; } // Store the pageContent as a string. diff --git a/web/text_layer_builder.js b/web/text_layer_builder.js index 6829b5e34..40aa8575f 100644 --- a/web/text_layer_builder.js +++ b/web/text_layer_builder.js @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* globals CustomStyle, PDFFindController, scrollIntoView */ +/* globals CustomStyle, PDFFindController, scrollIntoView, PDFJS */ 'use strict'; @@ -40,6 +40,7 @@ var TextLayerBuilder = function textLayerBuilder(options) { this.lastScrollSource = options.lastScrollSource; this.viewport = options.viewport; this.isViewerInPresentationMode = options.isViewerInPresentationMode; + this.textDivs = []; if (typeof PDFFindController === 'undefined') { window.PDFFindController = null; @@ -49,16 +50,6 @@ var TextLayerBuilder = function textLayerBuilder(options) { this.lastScrollSource = null; } - this.beginLayout = function textLayerBuilderBeginLayout() { - this.textDivs = []; - this.renderingDone = false; - }; - - this.endLayout = function textLayerBuilderEndLayout() { - this.layoutDone = true; - this.insertDivContent(); - }; - this.renderLayer = function textLayerBuilderRenderLayer() { var textDivs = this.textDivs; var canvas = document.createElement('canvas'); @@ -118,70 +109,56 @@ var TextLayerBuilder = function textLayerBuilder(options) { } }; - this.appendText = function textLayerBuilderAppendText(geom) { + this.appendText = function textLayerBuilderAppendText(geom, styles) { + var style = styles[geom.fontName]; var textDiv = document.createElement('div'); + if (!/\S/.test(geom.str)) { + textDiv.dataset.isWhitespace = true; + return; + } + var tx = PDFJS.Util.transform(this.viewport.transform, geom.transform); + var angle = Math.atan2(tx[1], tx[0]); + if (style.vertical) { + angle += Math.PI / 2; + } + var fontHeight = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3])); + var fontAscent = (style.ascent ? style.ascent * fontHeight : + (style.descent ? (1 + style.descent) * fontHeight : fontHeight)); - // vScale and hScale already contain the scaling to pixel units - var fontHeight = geom.fontSize * Math.abs(geom.vScale); - textDiv.dataset.canvasWidth = geom.canvasWidth * Math.abs(geom.hScale); - textDiv.dataset.fontName = geom.fontName; - textDiv.dataset.angle = geom.angle * (180 / Math.PI); - + textDiv.style.position = 'absolute'; + textDiv.style.left = (tx[4] + (fontAscent * Math.sin(angle))) + 'px'; + textDiv.style.top = (tx[5] - (fontAscent * Math.cos(angle))) + 'px'; textDiv.style.fontSize = fontHeight + 'px'; - textDiv.style.fontFamily = geom.fontFamily; - var fontAscent = (geom.ascent ? geom.ascent * fontHeight : - (geom.descent ? (1 + geom.descent) * fontHeight : fontHeight)); - textDiv.style.left = (geom.x + (fontAscent * Math.sin(geom.angle))) + 'px'; - textDiv.style.top = (geom.y - (fontAscent * Math.cos(geom.angle))) + 'px'; + textDiv.style.fontFamily = style.fontFamily; - // The content of the div is set in the `setTextContent` function. + textDiv.textContent = geom.str; + textDiv.dataset.fontName = geom.fontName; + textDiv.dataset.angle = angle * (180 / Math.PI); + if (style.vertical) { + textDiv.dataset.canvasWidth = geom.height * this.viewport.scale; + } else { + textDiv.dataset.canvasWidth = geom.width * this.viewport.scale; + } this.textDivs.push(textDiv); }; - this.insertDivContent = function textLayerUpdateTextContent() { - // Only set the content of the divs once layout has finished, the content - // for the divs is available and content is not yet set on the divs. - if (!this.layoutDone || this.divContentDone || !this.textContent) { - return; - } - - this.divContentDone = true; - - var textDivs = this.textDivs; - var bidiTexts = this.textContent; - - for (var i = 0; i < bidiTexts.length; i++) { - var bidiText = bidiTexts[i]; - var textDiv = textDivs[i]; - if (!/\S/.test(bidiText.str)) { - textDiv.dataset.isWhitespace = true; - continue; - } - - textDiv.textContent = bidiText.str; - // TODO refactor text layer to use text content position - /** - * var arr = this.viewport.convertToViewportPoint(bidiText.x, bidiText.y); - * textDiv.style.left = arr[0] + 'px'; - * textDiv.style.top = arr[1] + 'px'; - */ - // bidiText.dir may be 'ttb' for vertical texts. - textDiv.dir = bidiText.dir; - } - - this.setupRenderLayoutTimer(); - }; - this.setTextContent = function textLayerBuilderSetTextContent(textContent) { this.textContent = textContent; - this.insertDivContent(); + + var textItems = textContent.items; + for (var i = 0; i < textItems.length; i++) { + this.appendText(textItems[i], textContent.styles); + } + this.divContentDone = true; + + this.setupRenderLayoutTimer(); }; this.convertMatches = function textLayerBuilderConvertMatches(matches) { var i = 0; var iIndex = 0; - var bidiTexts = this.textContent; + var bidiTexts = this.textContent.items; var end = bidiTexts.length - 1; var queryLen = (PDFFindController === null ? 0 : PDFFindController.state.query.length); @@ -240,7 +217,7 @@ var TextLayerBuilder = function textLayerBuilder(options) { return; } - var bidiTexts = this.textContent; + var bidiTexts = this.textContent.items; var textDivs = this.textDivs; var prevEnd = null; var isSelectedPage = (PDFFindController === null ? @@ -356,7 +333,7 @@ var TextLayerBuilder = function textLayerBuilder(options) { // Clear out all matches. var matches = this.matches; var textDivs = this.textDivs; - var bidiTexts = this.textContent; + var bidiTexts = this.textContent.items; var clearedUntilDivIdx = -1; // Clear out all current matches. diff --git a/web/viewer.css b/web/viewer.css index 7f242774d..84e3bc6e9 100644 --- a/web/viewer.css +++ b/web/viewer.css @@ -1286,7 +1286,6 @@ canvas { .textLayer > div { color: transparent; position: absolute; - line-height: 1; white-space: pre; cursor: text; }