From af4dc55019efd82cdb002dc25901d6d113406ebe Mon Sep 17 00:00:00 2001 From: calixteman Date: Fri, 30 Apr 2021 14:41:13 +0200 Subject: [PATCH] [api-minor] Fix the way to chunk the strings (#13257) - Improve chunking in order to fix some bugs where the spaces aren't here: * track the last position where a glyph has been drawn; * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break: - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions; - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done. - Add some breaks in order to get lines; - Remove the multiple whites spaces: * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool; * other pdf readers replace spaces by one white space. Update src/core/evaluator.js Co-authored-by: Jonas Jenwald Co-authored-by: Jonas Jenwald --- src/core/evaluator.js | 649 +++++++++++++++++++++++--------------- src/display/text_layer.js | 42 ++- test/unit/api_spec.js | 9 +- 3 files changed, 428 insertions(+), 272 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 46c2cb3f1..eb3ec46df 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2042,7 +2042,7 @@ class PartialEvaluator { resources = resources || Dict.empty; stateManager = stateManager || new StateManager(new TextState()); - var WhitespaceRegexp = /\s/g; + const WhitespaceRegexp = /\s/g; var textContent = { items: [], @@ -2051,26 +2051,43 @@ class PartialEvaluator { var textContentItem = { initialized: false, str: [], + totalWidth: 0, + totalHeight: 0, width: 0, height: 0, vertical: false, - lastAdvanceWidth: 0, - lastAdvanceHeight: 0, + lastCharSize: 0, + prevTransform: null, textAdvanceScale: 0, spaceWidth: 0, - fakeSpaceMin: Infinity, - fakeMultiSpaceMin: Infinity, - fakeMultiSpaceMax: -0, - textRunBreakAllowed: false, + spaceInFlowMin: 0, + spaceInFlowMax: 0, + trackingSpaceMin: Infinity, transform: null, fontName: null, + hasEOL: false, + isLastCharWhiteSpace: false, }; - var SPACE_FACTOR = 0.3; - var MULTI_SPACE_FACTOR = 1.5; - var MULTI_SPACE_FACTOR_MAX = 4; + + // Used in addFakeSpaces. + // wsw stands for whitespace width. + + // A white <= wsw * TRACKING_SPACE_FACTOR is a tracking space + // so it doesn't count as a space. + const TRACKING_SPACE_FACTOR = 0.3; + + // A white with a width in [wsw * MIN_FACTOR; wsw * MAX_FACTOR] + // is a space which will be inserted in the current flow of words. + // If the width is outside of this range then the flow is broken + // (which means a new span in the text layer). + // It's useful to adjust the best as possible the span in the layer + // to what is displayed in the canvas. + const SPACE_IN_FLOW_MIN_FACTOR = 0.3; + const SPACE_IN_FLOW_MAX_FACTOR = 1.3; var self = this; var xref = this.xref; + const showSpacedTextBuffer = []; // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. var xobjs = null; @@ -2081,26 +2098,10 @@ class PartialEvaluator { var textState; - function ensureTextContentItem() { - if (textContentItem.initialized) { - return textContentItem; - } - const font = textState.font, - loadedName = font.loadedName; - if (!seenStyles.has(loadedName)) { - seenStyles.add(loadedName); - - textContent.styles[loadedName] = { - fontFamily: font.fallbackName, - ascent: font.ascent, - descent: font.descent, - vertical: font.vertical, - }; - } - textContentItem.fontName = loadedName; - + function getCurrentTextTransform() { // 9.4.4 Text Space Details - var tsm = [ + const font = textState.font; + const tsm = [ textState.fontSize * textState.textHScale, 0, 0, @@ -2120,18 +2121,44 @@ class PartialEvaluator { } } - var trm = Util.transform( + return Util.transform( textState.ctm, Util.transform(textState.textMatrix, tsm) ); - textContentItem.transform = trm; + } + + function ensureTextContentItem() { + if (textContentItem.initialized) { + return textContentItem; + } + const font = textState.font, + loadedName = font.loadedName; + if (!seenStyles.has(loadedName)) { + seenStyles.add(loadedName); + + textContent.styles[loadedName] = { + fontFamily: font.fallbackName, + ascent: font.ascent, + descent: font.descent, + vertical: font.vertical, + }; + } + textContentItem.fontName = loadedName; + + const trm = (textContentItem.transform = getCurrentTextTransform()); if (!font.vertical) { - textContentItem.width = 0; - textContentItem.height = Math.hypot(trm[2], trm[3]); + textContentItem.width = textContentItem.totalWidth = 0; + textContentItem.height = textContentItem.totalHeight = Math.hypot( + trm[2], + trm[3] + ); textContentItem.vertical = false; } else { - textContentItem.width = Math.hypot(trm[0], trm[1]); - textContentItem.height = 0; + textContentItem.width = textContentItem.totalWidth = Math.hypot( + trm[0], + trm[1] + ); + textContentItem.height = textContentItem.totalHeight = 0; textContentItem.vertical = true; } @@ -2141,29 +2168,53 @@ class PartialEvaluator { ); const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]); textContentItem.textAdvanceScale = scaleCtmX * scaleLineX; - textContentItem.lastAdvanceWidth = 0; - textContentItem.lastAdvanceHeight = 0; + textContentItem.lastCharSize = textContentItem.lastCharSize || 0; var spaceWidth = (font.spaceWidth / 1000) * textState.fontSize; if (spaceWidth) { textContentItem.spaceWidth = spaceWidth; - textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR; - textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR; - textContentItem.fakeMultiSpaceMax = spaceWidth * MULTI_SPACE_FACTOR_MAX; - // It's okay for monospace fonts to fake as much space as needed. - textContentItem.textRunBreakAllowed = !font.isMonospace; + textContentItem.trackingSpaceMin = spaceWidth * TRACKING_SPACE_FACTOR; + textContentItem.spaceInFlowMin = spaceWidth * SPACE_IN_FLOW_MIN_FACTOR; + textContentItem.spaceInFlowMax = spaceWidth * SPACE_IN_FLOW_MAX_FACTOR; } else { textContentItem.spaceWidth = 0; - textContentItem.fakeSpaceMin = Infinity; - textContentItem.fakeMultiSpaceMin = Infinity; - textContentItem.fakeMultiSpaceMax = 0; - textContentItem.textRunBreakAllowed = false; + textContentItem.trackingSpaceMin = Infinity; } + textContentItem.hasEOL = false; + textContentItem.initialized = true; return textContentItem; } + function updateAdvanceScale() { + if (!textContentItem.initialized) { + return; + } + + const scaleLineX = Math.hypot( + textState.textLineMatrix[0], + textState.textLineMatrix[1] + ); + const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]); + const scaleFactor = scaleCtmX * scaleLineX; + if (scaleFactor === textContentItem.textAdvanceScale) { + return; + } + + if (!textContentItem.vertical) { + textContentItem.totalWidth += + textContentItem.width * textContentItem.textAdvanceScale; + textContentItem.width = 0; + } else { + textContentItem.totalHeight += + textContentItem.height * textContentItem.textAdvanceScale; + textContentItem.height = 0; + } + + textContentItem.textAdvanceScale = scaleFactor; + } + function replaceWhitespace(str) { // Replaces all whitespaces with standard spaces (0x20), to avoid // alignment issues between the textLayer and the canvas if the text @@ -2178,17 +2229,19 @@ class PartialEvaluator { } function runBidiTransform(textChunk) { - var str = textChunk.str.join(""); - var bidiResult = bidi(str, -1, textChunk.vertical); + const text = textChunk.str.join(""); + const bidiResult = bidi(text, -1, textChunk.vertical); + const str = normalizeWhitespace + ? replaceWhitespace(bidiResult.str) + : bidiResult.str; return { - str: normalizeWhitespace - ? replaceWhitespace(bidiResult.str) - : bidiResult.str, + str, dir: bidiResult.dir, - width: textChunk.width, - height: textChunk.height, + width: textChunk.totalWidth, + height: textChunk.totalHeight, transform: textChunk.transform, fontName: textChunk.fontName, + hasEOL: textChunk.hasEOL, }; } @@ -2202,91 +2255,276 @@ class PartialEvaluator { }); } - function buildTextContentItem(chars) { - var font = textState.font; - var textChunk = ensureTextContentItem(); - var width = 0; - var height = 0; - var glyphs = font.charsToGlyphs(chars); - for (var i = 0; i < glyphs.length; i++) { - var glyph = glyphs[i]; - var glyphWidth = null; - if (font.vertical && glyph.vmetric) { - glyphWidth = glyph.vmetric[0]; - } else { - glyphWidth = glyph.width; + function compareWithLastPosition(fontSize) { + if ( + !combineTextItems || + !textState.font || + !textContentItem.prevTransform + ) { + return; + } + + const currentTransform = getCurrentTextTransform(); + const posX = currentTransform[4]; + const posY = currentTransform[5]; + const lastPosX = textContentItem.prevTransform[4]; + const lastPosY = textContentItem.prevTransform[5]; + + if (lastPosX === posX && lastPosY === posY) { + return; + } + + const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale; + const advanceY = (posY - lastPosY) / textContentItem.textAdvanceScale; + const HALF_LAST_CHAR = -0.5 * textContentItem.lastCharSize; + + if (textState.font.vertical) { + if ( + Math.abs(advanceX) > + textContentItem.width / + textContentItem.textAdvanceScale /* not the same column */ + ) { + appendEOL(); + return; } - var glyphUnicode = glyph.unicode; - var NormalizedUnicodes = getNormalizedUnicodes(); - if (NormalizedUnicodes[glyphUnicode] !== undefined) { - glyphUnicode = NormalizedUnicodes[glyphUnicode]; + if (HALF_LAST_CHAR > advanceY) { + return; } - glyphUnicode = reverseIfRtl(glyphUnicode); - var charSpacing = textState.charSpacing; - if (glyph.isSpace) { - var wordSpacing = textState.wordSpacing; - charSpacing += wordSpacing; - if (wordSpacing > 0) { - addFakeSpaces(wordSpacing, textChunk.str); + if (advanceY > textContentItem.trackingSpaceMin) { + textContentItem.height += advanceY; + } else if (!addFakeSpaces(advanceY, 0, textContentItem.prevTransform)) { + if (textContentItem.str.length === 0) { + textContent.items.push({ + str: " ", + dir: "ltr", + width: 0, + height: advanceY, + transform: textContentItem.prevTransform, + fontName: textContentItem.fontName, + hasEOL: false, + }); + textContentItem.isLastCharWhiteSpace = true; + } else { + textContentItem.height += advanceY; } } - var tx = 0; - var ty = 0; - if (!font.vertical) { - var w0 = glyphWidth * textState.fontMatrix[0]; - tx = (w0 * textState.fontSize + charSpacing) * textState.textHScale; - width += tx; + return; + } + + if ( + Math.abs(advanceY) > + textContentItem.height / + textContentItem.textAdvanceScale /* not the same line */ + ) { + appendEOL(); + return; + } + + if (HALF_LAST_CHAR > advanceX) { + return; + } + + if (advanceX <= textContentItem.trackingSpaceMin) { + textContentItem.width += advanceX; + } else if (!addFakeSpaces(advanceX, 0, textContentItem.prevTransform)) { + if (textContentItem.str.length === 0) { + textContent.items.push({ + str: " ", + dir: "ltr", + width: advanceX, + height: 0, + transform: textContentItem.prevTransform, + fontName: textContentItem.fontName, + hasEOL: false, + }); + textContentItem.isLastCharWhiteSpace = true; } else { - var w1 = glyphWidth * textState.fontMatrix[0]; - ty = w1 * textState.fontSize + charSpacing; - height += ty; + textContentItem.width += advanceX; } - textState.translateTextMatrix(tx, ty); - - textChunk.str.push(glyphUnicode); } - - if (!font.vertical) { - textChunk.lastAdvanceWidth = width; - textChunk.width += width; - } else { - textChunk.lastAdvanceHeight = height; - textChunk.height += Math.abs(height); - } - - return textChunk; } - function addFakeSpaces(width, strBuf) { - if (width < textContentItem.fakeSpaceMin) { + function buildTextContentItem({ chars, extraSpacing, isFirstChunk }) { + const font = textState.font; + if (!chars) { + // Just move according to the space we have. + const charSpacing = textState.charSpacing + extraSpacing; + if (charSpacing) { + if (!font.vertical) { + textState.translateTextMatrix( + charSpacing * textState.textHScale, + 0 + ); + } else { + textState.translateTextMatrix(0, charSpacing); + } + } + return; } - if (width < textContentItem.fakeMultiSpaceMin) { - strBuf.push(" "); - return; + + const NormalizedUnicodes = getNormalizedUnicodes(); + const glyphs = font.charsToGlyphs(chars); + const scale = textState.fontMatrix[0] * textState.fontSize; + if (isFirstChunk) { + compareWithLastPosition(scale); } - var fakeSpaces = Math.round(width / textContentItem.spaceWidth); - while (fakeSpaces-- > 0) { - strBuf.push(" "); + + let textChunk = ensureTextContentItem(); + let size = 0; + let lastCharSize = 0; + + for (let i = 0, ii = glyphs.length; i < ii; i++) { + const glyph = glyphs[i]; + let charSpacing = + textState.charSpacing + (i === ii - 1 ? extraSpacing : 0); + + let glyphUnicode = glyph.unicode; + if (glyph.isSpace) { + charSpacing += textState.wordSpacing; + textChunk.isLastCharWhiteSpace = true; + } else { + glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode; + glyphUnicode = reverseIfRtl(glyphUnicode); + textChunk.isLastCharWhiteSpace = false; + } + textChunk.str.push(glyphUnicode); + + const glyphWidth = + font.vertical && glyph.vmetric ? glyph.vmetric[0] : glyph.width; + + let scaledDim = glyphWidth * scale; + if (!font.vertical) { + scaledDim *= textState.textHScale; + textState.translateTextMatrix(scaledDim, 0); + } else { + textState.translateTextMatrix(0, scaledDim); + scaledDim = Math.abs(scaledDim); + } + size += scaledDim; + + if (charSpacing) { + if (!font.vertical) { + charSpacing *= textState.textHScale; + } + + scaledDim += charSpacing; + const wasSplit = + charSpacing > textContentItem.trackingSpaceMin && + addFakeSpaces(charSpacing, size); + if (!font.vertical) { + textState.translateTextMatrix(charSpacing, 0); + } else { + textState.translateTextMatrix(0, charSpacing); + } + + if (wasSplit) { + textChunk = ensureTextContentItem(); + size = 0; + } else { + size += charSpacing; + } + } + + lastCharSize = scaledDim; } + + textChunk.lastCharSize = lastCharSize; + if (!font.vertical) { + textChunk.width += size; + } else { + textChunk.height += size; + } + + textChunk.prevTransform = getCurrentTextTransform(); + } + + function appendEOL() { + if (textContentItem.initialized) { + textContentItem.hasEOL = true; + flushTextContentItem(); + } else if (textContent.items.length > 0) { + textContent.items[textContent.items.length - 1].hasEOL = true; + } else { + textContent.items.push({ + str: "", + dir: "ltr", + width: 0, + height: 0, + transform: getCurrentTextTransform(), + fontName: textState.font.loadedName, + hasEOL: true, + }); + } + + textContentItem.isLastCharWhiteSpace = false; + textContentItem.lastCharSize = 0; + } + + function addFakeSpaces(width, size, transf = null) { + if ( + textContentItem.spaceInFlowMin <= width && + width <= textContentItem.spaceInFlowMax + ) { + if (textContentItem.initialized) { + textContentItem.str.push(" "); + textContentItem.isLastCharWhiteSpace = true; + } + return false; + } + + const fontName = textContentItem.fontName; + + let height = 0; + width *= textContentItem.textAdvanceScale; + if (!textContentItem.vertical) { + textContentItem.width += size; + } else { + textContentItem.height += size; + height = width; + width = 0; + } + + flushTextContentItem(); + + if (textContentItem.isLastCharWhiteSpace) { + return true; + } + + textContentItem.isLastCharWhiteSpace = true; + textContent.items.push({ + str: " ", + // TODO: check if using the orientation from last chunk is + // better or not. + dir: "ltr", + width, + height, + transform: transf ? transf : getCurrentTextTransform(), + fontName, + hasEOL: false, + }); + + return true; } function flushTextContentItem() { - if (!textContentItem.initialized) { + if (!textContentItem.initialized || !textContentItem.str) { return; } // Do final text scaling. if (!textContentItem.vertical) { - textContentItem.width *= textContentItem.textAdvanceScale; + textContentItem.totalWidth += + textContentItem.width * textContentItem.textAdvanceScale; } else { - textContentItem.height *= textContentItem.textAdvanceScale; + textContentItem.totalHeight += + textContentItem.height * textContentItem.textAdvanceScale; } - textContent.items.push(runBidiTransform(textContentItem)); + textContent.items.push(runBidiTransform(textContentItem)); textContentItem.initialized = false; textContentItem.str.length = 0; } @@ -2330,7 +2568,6 @@ class PartialEvaluator { textState = stateManager.state; var fn = operation.fn; args = operation.args; - var advance, diff; switch (fn | 0) { case OPS.setFont: @@ -2363,32 +2600,6 @@ class PartialEvaluator { textState.leading = args[0]; break; case OPS.moveText: - // Optimization to treat same line movement as advance - var isSameTextLine = !textState.font - ? false - : (textState.font.vertical ? args[0] : args[1]) === 0; - advance = args[0] - args[1]; - if ( - combineTextItems && - isSameTextLine && - textContentItem.initialized && - advance > 0 && - advance <= textContentItem.fakeMultiSpaceMax - ) { - textState.translateTextLineMatrix(args[0], args[1]); - textContentItem.width += - args[0] - textContentItem.lastAdvanceWidth; - textContentItem.height += - args[1] - textContentItem.lastAdvanceHeight; - diff = - args[0] - - textContentItem.lastAdvanceWidth - - (args[1] - textContentItem.lastAdvanceHeight); - addFakeSpaces(diff, textContentItem.str); - break; - } - - flushTextContentItem(); textState.translateTextLineMatrix(args[0], args[1]); textState.textMatrix = textState.textLineMatrix.slice(); break; @@ -2399,40 +2610,10 @@ class PartialEvaluator { textState.textMatrix = textState.textLineMatrix.slice(); break; case OPS.nextLine: - flushTextContentItem(); + appendEOL(); textState.carriageReturn(); break; case OPS.setTextMatrix: - // Optimization to treat same line movement as advance. - advance = textState.calcTextLineMatrixAdvance( - args[0], - args[1], - args[2], - args[3], - args[4], - args[5] - ); - if ( - combineTextItems && - advance !== null && - textContentItem.initialized && - advance.value > 0 && - advance.value <= textContentItem.fakeMultiSpaceMax - ) { - textState.translateTextLineMatrix(advance.width, advance.height); - textContentItem.width += - advance.width - textContentItem.lastAdvanceWidth; - textContentItem.height += - advance.height - textContentItem.lastAdvanceHeight; - diff = - advance.width - - textContentItem.lastAdvanceWidth - - (advance.height - textContentItem.lastAdvanceHeight); - addFakeSpaces(diff, textContentItem.str); - break; - } - - flushTextContentItem(); textState.setTextMatrix( args[0], args[1], @@ -2449,6 +2630,7 @@ class PartialEvaluator { args[4], args[5] ); + updateAdvanceScale(); break; case OPS.setCharSpacing: textState.charSpacing = args[0]; @@ -2466,14 +2648,16 @@ class PartialEvaluator { self.ensureStateFont(stateManager.state); continue; } - var items = args[0]; - var offset; - for (var j = 0, jj = items.length; j < jj; j++) { - if (typeof items[j] === "string") { - buildTextContentItem(items[j]); - } else if (isNum(items[j])) { - ensureTextContentItem(); + const spaceFactor = + ((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000; + const elements = args[0]; + let isFirstChunk = true; + for (let i = 0, ii = elements.length; i < ii - 1; i++) { + const item = elements[i]; + if (typeof item === "string") { + showSpacedTextBuffer.push(item); + } else if (typeof item === "number" && item !== 0) { // PDF Specification 5.3.2 states: // The number is expressed in thousandths of a unit of text // space. @@ -2482,64 +2666,75 @@ class PartialEvaluator { // In the default coordinate system, a positive adjustment // has the effect of moving the next glyph painted either to // the left or down by the given amount. - advance = (items[j] * textState.fontSize) / 1000; - var breakTextRun = false; - if (textState.font.vertical) { - offset = advance; - textState.translateTextMatrix(0, offset); - breakTextRun = - textContentItem.textRunBreakAllowed && - advance > textContentItem.fakeMultiSpaceMax; - if (!breakTextRun) { - // Value needs to be added to height to paint down. - textContentItem.height += offset; - } - } else { - advance = -advance; - offset = advance * textState.textHScale; - textState.translateTextMatrix(offset, 0); - breakTextRun = - textContentItem.textRunBreakAllowed && - advance > textContentItem.fakeMultiSpaceMax; - if (!breakTextRun) { - // Value needs to be subtracted from width to paint left. - textContentItem.width += offset; - } - } - if (breakTextRun) { - flushTextContentItem(); - } else if (advance > 0) { - addFakeSpaces(advance, textContentItem.str); + const str = showSpacedTextBuffer.join(""); + showSpacedTextBuffer.length = 0; + buildTextContentItem({ + chars: str, + extraSpacing: item * spaceFactor, + isFirstChunk, + }); + if (str && isFirstChunk) { + isFirstChunk = false; } } } + + const item = elements[elements.length - 1]; + if (typeof item === "string") { + showSpacedTextBuffer.push(item); + } + + if (showSpacedTextBuffer.length > 0) { + const str = showSpacedTextBuffer.join(""); + showSpacedTextBuffer.length = 0; + buildTextContentItem({ + chars: str, + extraSpacing: 0, + isFirstChunk, + }); + } break; case OPS.showText: if (!stateManager.state.font) { self.ensureStateFont(stateManager.state); continue; } - buildTextContentItem(args[0]); + + buildTextContentItem({ + chars: args[0], + extraSpacing: 0, + isFirstChunk: true, + }); break; case OPS.nextLineShowText: if (!stateManager.state.font) { self.ensureStateFont(stateManager.state); continue; } + textContentItem.hasEOL = true; flushTextContentItem(); textState.carriageReturn(); - buildTextContentItem(args[0]); + buildTextContentItem({ + chars: args[0], + extraSpacing: 0, + isFirstChunk: true, + }); break; case OPS.nextLineSetSpacingShowText: if (!stateManager.state.font) { self.ensureStateFont(stateManager.state); continue; } + textContentItem.hasEOL = true; flushTextContentItem(); textState.wordSpacing = args[0]; textState.charSpacing = args[1]; textState.carriageReturn(); - buildTextContentItem(args[2]); + buildTextContentItem({ + chars: args[2], + extraSpacing: 0, + isFirstChunk: true, + }); break; case OPS.paintXObject: flushTextContentItem(); @@ -3887,46 +4082,6 @@ class TextState { m[5] = m[1] * x + m[3] * y + m[5]; } - calcTextLineMatrixAdvance(a, b, c, d, e, f) { - var font = this.font; - if (!font) { - return null; - } - var m = this.textLineMatrix; - if (!(a === m[0] && b === m[1] && c === m[2] && d === m[3])) { - return null; - } - var txDiff = e - m[4], - tyDiff = f - m[5]; - if ((font.vertical && txDiff !== 0) || (!font.vertical && tyDiff !== 0)) { - return null; - } - var tx, - ty, - denominator = a * d - b * c; - if (font.vertical) { - tx = (-tyDiff * c) / denominator; - ty = (tyDiff * a) / denominator; - } else { - tx = (txDiff * d) / denominator; - ty = (-txDiff * b) / denominator; - } - return { width: tx, height: ty, value: font.vertical ? ty : tx }; - } - - calcRenderMatrix(ctm) { - // 9.4.4 Text Space Details - var tsm = [ - this.fontSize * this.textHScale, - 0, - 0, - this.fontSize, - 0, - this.textRise, - ]; - return Util.transform(ctm, Util.transform(this.textMatrix, tsm)); - } - carriageReturn() { this.translateTextLineMatrix(0, -this.leading); this.textMatrix = this.textLineMatrix.slice(); diff --git a/src/display/text_layer.js b/src/display/text_layer.js index 6282cfe59..356348308 100644 --- a/src/display/text_layer.js +++ b/src/display/text_layer.js @@ -57,12 +57,7 @@ const renderTextLayer = (function renderTextLayerClosure() { const DEFAULT_FONT_SIZE = 30; const DEFAULT_FONT_ASCENT = 0.8; const ascentCache = new Map(); - - const NonWhitespaceRegexp = /\S/; - - function isAllWhitespace(str) { - return !NonWhitespaceRegexp.test(str); - } + const AllWhitespaceRegexp = /^\s+$/g; function getAscent(fontFamily, ctx) { const cachedAscent = ascentCache.get(fontFamily); @@ -133,7 +128,8 @@ const renderTextLayer = (function renderTextLayerClosure() { const textDivProperties = { angle: 0, canvasWidth: 0, - isWhitespace: false, + hasText: geom.str !== "", + hasEOL: geom.hasEOL, originalTransform: null, paddingBottom: 0, paddingLeft: 0, @@ -142,12 +138,8 @@ const renderTextLayer = (function renderTextLayerClosure() { scale: 1, }; + textDiv.textContent = geom.str; task._textDivs.push(textDiv); - if (isAllWhitespace(geom.str)) { - textDivProperties.isWhitespace = true; - task._textDivProperties.set(textDiv, textDivProperties); - return; - } const tx = Util.transform(task._viewport.transform, geom.transform); let angle = Math.atan2(tx[1], tx[0]); @@ -176,7 +168,6 @@ const renderTextLayer = (function renderTextLayerClosure() { // Keeps screen readers from pausing on every new text span. textDiv.setAttribute("role", "presentation"); - textDiv.textContent = geom.str; // geom.dir may be 'ttb' for vertical texts. textDiv.dir = geom.dir; @@ -192,7 +183,10 @@ const renderTextLayer = (function renderTextLayerClosure() { // little effect on text highlighting. This makes scrolling on docs with // lots of such divs a lot faster. let shouldScaleText = false; - if (geom.str.length > 1) { + if ( + geom.str.length > 1 || + (task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str)) + ) { shouldScaleText = true; } else if (geom.transform[0] !== geom.transform[3]) { const absScaleX = Math.abs(geom.transform[0]), @@ -218,7 +212,7 @@ const renderTextLayer = (function renderTextLayerClosure() { task._layoutText(textDiv); } - if (task._enhanceTextSelection) { + if (task._enhanceTextSelection && textDivProperties.hasText) { let angleCos = 1, angleSin = 0; if (angle !== 0) { @@ -666,12 +660,9 @@ const renderTextLayer = (function renderTextLayerClosure() { _layoutText(textDiv) { const textDivProperties = this._textDivProperties.get(textDiv); - if (textDivProperties.isWhitespace) { - return; - } let transform = ""; - if (textDivProperties.canvasWidth !== 0) { + if (textDivProperties.canvasWidth !== 0 && textDivProperties.hasText) { const { fontSize, fontFamily } = textDiv.style; // Only build font string and set to context if different from last. @@ -700,8 +691,15 @@ const renderTextLayer = (function renderTextLayerClosure() { } textDiv.style.transform = transform; } - this._textDivProperties.set(textDiv, textDivProperties); - this._container.appendChild(textDiv); + + if (textDivProperties.hasText) { + this._container.appendChild(textDiv); + } + if (textDivProperties.hasEOL) { + const br = document.createElement("br"); + br.setAttribute("role", "presentation"); + this._container.appendChild(br); + } }, _render: function TextLayer_render(timeout) { @@ -778,7 +776,7 @@ const renderTextLayer = (function renderTextLayerClosure() { const div = this._textDivs[i]; const divProps = this._textDivProperties.get(div); - if (divProps.isWhitespace) { + if (!divProps.hasText) { continue; } if (expandDivs) { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index fe7120103..454351063 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1495,12 +1495,14 @@ describe("api", function () { }); const data = await Promise.all([defaultPromise, parametersPromise]); + expect(!!data[0].items).toEqual(true); - expect(data[0].items.length).toEqual(7); + expect(data[0].items.length).toEqual(12); expect(!!data[0].styles).toEqual(true); - // A simple check that ensures the two `textContent` object match. - expect(JSON.stringify(data[0])).toEqual(JSON.stringify(data[1])); + expect(!!data[1].items).toEqual(true); + expect(data[1].items.length).toEqual(7); + expect(!!data[1].styles).toEqual(true); }); it("gets text content, with correct properties (issue 8276)", async function () { @@ -1520,6 +1522,7 @@ describe("api", function () { str: "Issue 8276", transform: [18, 0, 0, 18, 441.81, 708.4499999999999], width: 77.49, + hasEOL: false, }); expect(styles.Times).toEqual({ fontFamily: "serif",