1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
This commit is contained in:
calixteman 2021-04-30 14:41:13 +02:00 committed by GitHub
parent e6fcb1e70b
commit af4dc55019
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 428 additions and 272 deletions

View file

@ -2042,7 +2042,7 @@ class PartialEvaluator {
resources = resources || Dict.empty;
stateManager = stateManager || new StateManager(new TextState());
var WhitespaceRegexp = /\s/g;
const WhitespaceRegexp = /\s/g;
var textContent = {
items: [],
@ -2051,26 +2051,43 @@ class PartialEvaluator {
var textContentItem = {
initialized: false,
str: [],
totalWidth: 0,
totalHeight: 0,
width: 0,
height: 0,
vertical: false,
lastAdvanceWidth: 0,
lastAdvanceHeight: 0,
lastCharSize: 0,
prevTransform: null,
textAdvanceScale: 0,
spaceWidth: 0,
fakeSpaceMin: Infinity,
fakeMultiSpaceMin: Infinity,
fakeMultiSpaceMax: -0,
textRunBreakAllowed: false,
spaceInFlowMin: 0,
spaceInFlowMax: 0,
trackingSpaceMin: Infinity,
transform: null,
fontName: null,
hasEOL: false,
isLastCharWhiteSpace: false,
};
var SPACE_FACTOR = 0.3;
var MULTI_SPACE_FACTOR = 1.5;
var MULTI_SPACE_FACTOR_MAX = 4;
// Used in addFakeSpaces.
// wsw stands for whitespace width.
// A white <= wsw * TRACKING_SPACE_FACTOR is a tracking space
// so it doesn't count as a space.
const TRACKING_SPACE_FACTOR = 0.3;
// A white with a width in [wsw * MIN_FACTOR; wsw * MAX_FACTOR]
// is a space which will be inserted in the current flow of words.
// If the width is outside of this range then the flow is broken
// (which means a new span in the text layer).
// It's useful to adjust the best as possible the span in the layer
// to what is displayed in the canvas.
const SPACE_IN_FLOW_MIN_FACTOR = 0.3;
const SPACE_IN_FLOW_MAX_FACTOR = 1.3;
var self = this;
var xref = this.xref;
const showSpacedTextBuffer = [];
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
var xobjs = null;
@ -2081,26 +2098,10 @@ class PartialEvaluator {
var textState;
function ensureTextContentItem() {
if (textContentItem.initialized) {
return textContentItem;
}
const font = textState.font,
loadedName = font.loadedName;
if (!seenStyles.has(loadedName)) {
seenStyles.add(loadedName);
textContent.styles[loadedName] = {
fontFamily: font.fallbackName,
ascent: font.ascent,
descent: font.descent,
vertical: font.vertical,
};
}
textContentItem.fontName = loadedName;
function getCurrentTextTransform() {
// 9.4.4 Text Space Details
var tsm = [
const font = textState.font;
const tsm = [
textState.fontSize * textState.textHScale,
0,
0,
@ -2120,18 +2121,44 @@ class PartialEvaluator {
}
}
var trm = Util.transform(
return Util.transform(
textState.ctm,
Util.transform(textState.textMatrix, tsm)
);
textContentItem.transform = trm;
}
function ensureTextContentItem() {
if (textContentItem.initialized) {
return textContentItem;
}
const font = textState.font,
loadedName = font.loadedName;
if (!seenStyles.has(loadedName)) {
seenStyles.add(loadedName);
textContent.styles[loadedName] = {
fontFamily: font.fallbackName,
ascent: font.ascent,
descent: font.descent,
vertical: font.vertical,
};
}
textContentItem.fontName = loadedName;
const trm = (textContentItem.transform = getCurrentTextTransform());
if (!font.vertical) {
textContentItem.width = 0;
textContentItem.height = Math.hypot(trm[2], trm[3]);
textContentItem.width = textContentItem.totalWidth = 0;
textContentItem.height = textContentItem.totalHeight = Math.hypot(
trm[2],
trm[3]
);
textContentItem.vertical = false;
} else {
textContentItem.width = Math.hypot(trm[0], trm[1]);
textContentItem.height = 0;
textContentItem.width = textContentItem.totalWidth = Math.hypot(
trm[0],
trm[1]
);
textContentItem.height = textContentItem.totalHeight = 0;
textContentItem.vertical = true;
}
@ -2141,29 +2168,53 @@ class PartialEvaluator {
);
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
textContentItem.lastAdvanceWidth = 0;
textContentItem.lastAdvanceHeight = 0;
textContentItem.lastCharSize = textContentItem.lastCharSize || 0;
var spaceWidth = (font.spaceWidth / 1000) * textState.fontSize;
if (spaceWidth) {
textContentItem.spaceWidth = spaceWidth;
textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR;
textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR;
textContentItem.fakeMultiSpaceMax = spaceWidth * MULTI_SPACE_FACTOR_MAX;
// It's okay for monospace fonts to fake as much space as needed.
textContentItem.textRunBreakAllowed = !font.isMonospace;
textContentItem.trackingSpaceMin = spaceWidth * TRACKING_SPACE_FACTOR;
textContentItem.spaceInFlowMin = spaceWidth * SPACE_IN_FLOW_MIN_FACTOR;
textContentItem.spaceInFlowMax = spaceWidth * SPACE_IN_FLOW_MAX_FACTOR;
} else {
textContentItem.spaceWidth = 0;
textContentItem.fakeSpaceMin = Infinity;
textContentItem.fakeMultiSpaceMin = Infinity;
textContentItem.fakeMultiSpaceMax = 0;
textContentItem.textRunBreakAllowed = false;
textContentItem.trackingSpaceMin = Infinity;
}
textContentItem.hasEOL = false;
textContentItem.initialized = true;
return textContentItem;
}
function updateAdvanceScale() {
if (!textContentItem.initialized) {
return;
}
const scaleLineX = Math.hypot(
textState.textLineMatrix[0],
textState.textLineMatrix[1]
);
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
const scaleFactor = scaleCtmX * scaleLineX;
if (scaleFactor === textContentItem.textAdvanceScale) {
return;
}
if (!textContentItem.vertical) {
textContentItem.totalWidth +=
textContentItem.width * textContentItem.textAdvanceScale;
textContentItem.width = 0;
} else {
textContentItem.totalHeight +=
textContentItem.height * textContentItem.textAdvanceScale;
textContentItem.height = 0;
}
textContentItem.textAdvanceScale = scaleFactor;
}
function replaceWhitespace(str) {
// Replaces all whitespaces with standard spaces (0x20), to avoid
// alignment issues between the textLayer and the canvas if the text
@ -2178,17 +2229,19 @@ class PartialEvaluator {
}
function runBidiTransform(textChunk) {
var str = textChunk.str.join("");
var bidiResult = bidi(str, -1, textChunk.vertical);
const text = textChunk.str.join("");
const bidiResult = bidi(text, -1, textChunk.vertical);
const str = normalizeWhitespace
? replaceWhitespace(bidiResult.str)
: bidiResult.str;
return {
str: normalizeWhitespace
? replaceWhitespace(bidiResult.str)
: bidiResult.str,
str,
dir: bidiResult.dir,
width: textChunk.width,
height: textChunk.height,
width: textChunk.totalWidth,
height: textChunk.totalHeight,
transform: textChunk.transform,
fontName: textChunk.fontName,
hasEOL: textChunk.hasEOL,
};
}
@ -2202,91 +2255,276 @@ class PartialEvaluator {
});
}
function buildTextContentItem(chars) {
var font = textState.font;
var textChunk = ensureTextContentItem();
var width = 0;
var height = 0;
var glyphs = font.charsToGlyphs(chars);
for (var i = 0; i < glyphs.length; i++) {
var glyph = glyphs[i];
var glyphWidth = null;
if (font.vertical && glyph.vmetric) {
glyphWidth = glyph.vmetric[0];
} else {
glyphWidth = glyph.width;
function compareWithLastPosition(fontSize) {
if (
!combineTextItems ||
!textState.font ||
!textContentItem.prevTransform
) {
return;
}
const currentTransform = getCurrentTextTransform();
const posX = currentTransform[4];
const posY = currentTransform[5];
const lastPosX = textContentItem.prevTransform[4];
const lastPosY = textContentItem.prevTransform[5];
if (lastPosX === posX && lastPosY === posY) {
return;
}
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
const advanceY = (posY - lastPosY) / textContentItem.textAdvanceScale;
const HALF_LAST_CHAR = -0.5 * textContentItem.lastCharSize;
if (textState.font.vertical) {
if (
Math.abs(advanceX) >
textContentItem.width /
textContentItem.textAdvanceScale /* not the same column */
) {
appendEOL();
return;
}
var glyphUnicode = glyph.unicode;
var NormalizedUnicodes = getNormalizedUnicodes();
if (NormalizedUnicodes[glyphUnicode] !== undefined) {
glyphUnicode = NormalizedUnicodes[glyphUnicode];
if (HALF_LAST_CHAR > advanceY) {
return;
}
glyphUnicode = reverseIfRtl(glyphUnicode);
var charSpacing = textState.charSpacing;
if (glyph.isSpace) {
var wordSpacing = textState.wordSpacing;
charSpacing += wordSpacing;
if (wordSpacing > 0) {
addFakeSpaces(wordSpacing, textChunk.str);
if (advanceY > textContentItem.trackingSpaceMin) {
textContentItem.height += advanceY;
} else if (!addFakeSpaces(advanceY, 0, textContentItem.prevTransform)) {
if (textContentItem.str.length === 0) {
textContent.items.push({
str: " ",
dir: "ltr",
width: 0,
height: advanceY,
transform: textContentItem.prevTransform,
fontName: textContentItem.fontName,
hasEOL: false,
});
textContentItem.isLastCharWhiteSpace = true;
} else {
textContentItem.height += advanceY;
}
}
var tx = 0;
var ty = 0;
if (!font.vertical) {
var w0 = glyphWidth * textState.fontMatrix[0];
tx = (w0 * textState.fontSize + charSpacing) * textState.textHScale;
width += tx;
return;
}
if (
Math.abs(advanceY) >
textContentItem.height /
textContentItem.textAdvanceScale /* not the same line */
) {
appendEOL();
return;
}
if (HALF_LAST_CHAR > advanceX) {
return;
}
if (advanceX <= textContentItem.trackingSpaceMin) {
textContentItem.width += advanceX;
} else if (!addFakeSpaces(advanceX, 0, textContentItem.prevTransform)) {
if (textContentItem.str.length === 0) {
textContent.items.push({
str: " ",
dir: "ltr",
width: advanceX,
height: 0,
transform: textContentItem.prevTransform,
fontName: textContentItem.fontName,
hasEOL: false,
});
textContentItem.isLastCharWhiteSpace = true;
} else {
var w1 = glyphWidth * textState.fontMatrix[0];
ty = w1 * textState.fontSize + charSpacing;
height += ty;
textContentItem.width += advanceX;
}
textState.translateTextMatrix(tx, ty);
textChunk.str.push(glyphUnicode);
}
if (!font.vertical) {
textChunk.lastAdvanceWidth = width;
textChunk.width += width;
} else {
textChunk.lastAdvanceHeight = height;
textChunk.height += Math.abs(height);
}
return textChunk;
}
function addFakeSpaces(width, strBuf) {
if (width < textContentItem.fakeSpaceMin) {
function buildTextContentItem({ chars, extraSpacing, isFirstChunk }) {
const font = textState.font;
if (!chars) {
// Just move according to the space we have.
const charSpacing = textState.charSpacing + extraSpacing;
if (charSpacing) {
if (!font.vertical) {
textState.translateTextMatrix(
charSpacing * textState.textHScale,
0
);
} else {
textState.translateTextMatrix(0, charSpacing);
}
}
return;
}
if (width < textContentItem.fakeMultiSpaceMin) {
strBuf.push(" ");
return;
const NormalizedUnicodes = getNormalizedUnicodes();
const glyphs = font.charsToGlyphs(chars);
const scale = textState.fontMatrix[0] * textState.fontSize;
if (isFirstChunk) {
compareWithLastPosition(scale);
}
var fakeSpaces = Math.round(width / textContentItem.spaceWidth);
while (fakeSpaces-- > 0) {
strBuf.push(" ");
let textChunk = ensureTextContentItem();
let size = 0;
let lastCharSize = 0;
for (let i = 0, ii = glyphs.length; i < ii; i++) {
const glyph = glyphs[i];
let charSpacing =
textState.charSpacing + (i === ii - 1 ? extraSpacing : 0);
let glyphUnicode = glyph.unicode;
if (glyph.isSpace) {
charSpacing += textState.wordSpacing;
textChunk.isLastCharWhiteSpace = true;
} else {
glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
glyphUnicode = reverseIfRtl(glyphUnicode);
textChunk.isLastCharWhiteSpace = false;
}
textChunk.str.push(glyphUnicode);
const glyphWidth =
font.vertical && glyph.vmetric ? glyph.vmetric[0] : glyph.width;
let scaledDim = glyphWidth * scale;
if (!font.vertical) {
scaledDim *= textState.textHScale;
textState.translateTextMatrix(scaledDim, 0);
} else {
textState.translateTextMatrix(0, scaledDim);
scaledDim = Math.abs(scaledDim);
}
size += scaledDim;
if (charSpacing) {
if (!font.vertical) {
charSpacing *= textState.textHScale;
}
scaledDim += charSpacing;
const wasSplit =
charSpacing > textContentItem.trackingSpaceMin &&
addFakeSpaces(charSpacing, size);
if (!font.vertical) {
textState.translateTextMatrix(charSpacing, 0);
} else {
textState.translateTextMatrix(0, charSpacing);
}
if (wasSplit) {
textChunk = ensureTextContentItem();
size = 0;
} else {
size += charSpacing;
}
}
lastCharSize = scaledDim;
}
textChunk.lastCharSize = lastCharSize;
if (!font.vertical) {
textChunk.width += size;
} else {
textChunk.height += size;
}
textChunk.prevTransform = getCurrentTextTransform();
}
function appendEOL() {
if (textContentItem.initialized) {
textContentItem.hasEOL = true;
flushTextContentItem();
} else if (textContent.items.length > 0) {
textContent.items[textContent.items.length - 1].hasEOL = true;
} else {
textContent.items.push({
str: "",
dir: "ltr",
width: 0,
height: 0,
transform: getCurrentTextTransform(),
fontName: textState.font.loadedName,
hasEOL: true,
});
}
textContentItem.isLastCharWhiteSpace = false;
textContentItem.lastCharSize = 0;
}
function addFakeSpaces(width, size, transf = null) {
if (
textContentItem.spaceInFlowMin <= width &&
width <= textContentItem.spaceInFlowMax
) {
if (textContentItem.initialized) {
textContentItem.str.push(" ");
textContentItem.isLastCharWhiteSpace = true;
}
return false;
}
const fontName = textContentItem.fontName;
let height = 0;
width *= textContentItem.textAdvanceScale;
if (!textContentItem.vertical) {
textContentItem.width += size;
} else {
textContentItem.height += size;
height = width;
width = 0;
}
flushTextContentItem();
if (textContentItem.isLastCharWhiteSpace) {
return true;
}
textContentItem.isLastCharWhiteSpace = true;
textContent.items.push({
str: " ",
// TODO: check if using the orientation from last chunk is
// better or not.
dir: "ltr",
width,
height,
transform: transf ? transf : getCurrentTextTransform(),
fontName,
hasEOL: false,
});
return true;
}
function flushTextContentItem() {
if (!textContentItem.initialized) {
if (!textContentItem.initialized || !textContentItem.str) {
return;
}
// Do final text scaling.
if (!textContentItem.vertical) {
textContentItem.width *= textContentItem.textAdvanceScale;
textContentItem.totalWidth +=
textContentItem.width * textContentItem.textAdvanceScale;
} else {
textContentItem.height *= textContentItem.textAdvanceScale;
textContentItem.totalHeight +=
textContentItem.height * textContentItem.textAdvanceScale;
}
textContent.items.push(runBidiTransform(textContentItem));
textContent.items.push(runBidiTransform(textContentItem));
textContentItem.initialized = false;
textContentItem.str.length = 0;
}
@ -2330,7 +2568,6 @@ class PartialEvaluator {
textState = stateManager.state;
var fn = operation.fn;
args = operation.args;
var advance, diff;
switch (fn | 0) {
case OPS.setFont:
@ -2363,32 +2600,6 @@ class PartialEvaluator {
textState.leading = args[0];
break;
case OPS.moveText:
// Optimization to treat same line movement as advance
var isSameTextLine = !textState.font
? false
: (textState.font.vertical ? args[0] : args[1]) === 0;
advance = args[0] - args[1];
if (
combineTextItems &&
isSameTextLine &&
textContentItem.initialized &&
advance > 0 &&
advance <= textContentItem.fakeMultiSpaceMax
) {
textState.translateTextLineMatrix(args[0], args[1]);
textContentItem.width +=
args[0] - textContentItem.lastAdvanceWidth;
textContentItem.height +=
args[1] - textContentItem.lastAdvanceHeight;
diff =
args[0] -
textContentItem.lastAdvanceWidth -
(args[1] - textContentItem.lastAdvanceHeight);
addFakeSpaces(diff, textContentItem.str);
break;
}
flushTextContentItem();
textState.translateTextLineMatrix(args[0], args[1]);
textState.textMatrix = textState.textLineMatrix.slice();
break;
@ -2399,40 +2610,10 @@ class PartialEvaluator {
textState.textMatrix = textState.textLineMatrix.slice();
break;
case OPS.nextLine:
flushTextContentItem();
appendEOL();
textState.carriageReturn();
break;
case OPS.setTextMatrix:
// Optimization to treat same line movement as advance.
advance = textState.calcTextLineMatrixAdvance(
args[0],
args[1],
args[2],
args[3],
args[4],
args[5]
);
if (
combineTextItems &&
advance !== null &&
textContentItem.initialized &&
advance.value > 0 &&
advance.value <= textContentItem.fakeMultiSpaceMax
) {
textState.translateTextLineMatrix(advance.width, advance.height);
textContentItem.width +=
advance.width - textContentItem.lastAdvanceWidth;
textContentItem.height +=
advance.height - textContentItem.lastAdvanceHeight;
diff =
advance.width -
textContentItem.lastAdvanceWidth -
(advance.height - textContentItem.lastAdvanceHeight);
addFakeSpaces(diff, textContentItem.str);
break;
}
flushTextContentItem();
textState.setTextMatrix(
args[0],
args[1],
@ -2449,6 +2630,7 @@ class PartialEvaluator {
args[4],
args[5]
);
updateAdvanceScale();
break;
case OPS.setCharSpacing:
textState.charSpacing = args[0];
@ -2466,14 +2648,16 @@ class PartialEvaluator {
self.ensureStateFont(stateManager.state);
continue;
}
var items = args[0];
var offset;
for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === "string") {
buildTextContentItem(items[j]);
} else if (isNum(items[j])) {
ensureTextContentItem();
const spaceFactor =
((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
const elements = args[0];
let isFirstChunk = true;
for (let i = 0, ii = elements.length; i < ii - 1; i++) {
const item = elements[i];
if (typeof item === "string") {
showSpacedTextBuffer.push(item);
} else if (typeof item === "number" && item !== 0) {
// PDF Specification 5.3.2 states:
// The number is expressed in thousandths of a unit of text
// space.
@ -2482,64 +2666,75 @@ class PartialEvaluator {
// In the default coordinate system, a positive adjustment
// has the effect of moving the next glyph painted either to
// the left or down by the given amount.
advance = (items[j] * textState.fontSize) / 1000;
var breakTextRun = false;
if (textState.font.vertical) {
offset = advance;
textState.translateTextMatrix(0, offset);
breakTextRun =
textContentItem.textRunBreakAllowed &&
advance > textContentItem.fakeMultiSpaceMax;
if (!breakTextRun) {
// Value needs to be added to height to paint down.
textContentItem.height += offset;
}
} else {
advance = -advance;
offset = advance * textState.textHScale;
textState.translateTextMatrix(offset, 0);
breakTextRun =
textContentItem.textRunBreakAllowed &&
advance > textContentItem.fakeMultiSpaceMax;
if (!breakTextRun) {
// Value needs to be subtracted from width to paint left.
textContentItem.width += offset;
}
}
if (breakTextRun) {
flushTextContentItem();
} else if (advance > 0) {
addFakeSpaces(advance, textContentItem.str);
const str = showSpacedTextBuffer.join("");
showSpacedTextBuffer.length = 0;
buildTextContentItem({
chars: str,
extraSpacing: item * spaceFactor,
isFirstChunk,
});
if (str && isFirstChunk) {
isFirstChunk = false;
}
}
}
const item = elements[elements.length - 1];
if (typeof item === "string") {
showSpacedTextBuffer.push(item);
}
if (showSpacedTextBuffer.length > 0) {
const str = showSpacedTextBuffer.join("");
showSpacedTextBuffer.length = 0;
buildTextContentItem({
chars: str,
extraSpacing: 0,
isFirstChunk,
});
}
break;
case OPS.showText:
if (!stateManager.state.font) {
self.ensureStateFont(stateManager.state);
continue;
}
buildTextContentItem(args[0]);
buildTextContentItem({
chars: args[0],
extraSpacing: 0,
isFirstChunk: true,
});
break;
case OPS.nextLineShowText:
if (!stateManager.state.font) {
self.ensureStateFont(stateManager.state);
continue;
}
textContentItem.hasEOL = true;
flushTextContentItem();
textState.carriageReturn();
buildTextContentItem(args[0]);
buildTextContentItem({
chars: args[0],
extraSpacing: 0,
isFirstChunk: true,
});
break;
case OPS.nextLineSetSpacingShowText:
if (!stateManager.state.font) {
self.ensureStateFont(stateManager.state);
continue;
}
textContentItem.hasEOL = true;
flushTextContentItem();
textState.wordSpacing = args[0];
textState.charSpacing = args[1];
textState.carriageReturn();
buildTextContentItem(args[2]);
buildTextContentItem({
chars: args[2],
extraSpacing: 0,
isFirstChunk: true,
});
break;
case OPS.paintXObject:
flushTextContentItem();
@ -3887,46 +4082,6 @@ class TextState {
m[5] = m[1] * x + m[3] * y + m[5];
}
calcTextLineMatrixAdvance(a, b, c, d, e, f) {
var font = this.font;
if (!font) {
return null;
}
var m = this.textLineMatrix;
if (!(a === m[0] && b === m[1] && c === m[2] && d === m[3])) {
return null;
}
var txDiff = e - m[4],
tyDiff = f - m[5];
if ((font.vertical && txDiff !== 0) || (!font.vertical && tyDiff !== 0)) {
return null;
}
var tx,
ty,
denominator = a * d - b * c;
if (font.vertical) {
tx = (-tyDiff * c) / denominator;
ty = (tyDiff * a) / denominator;
} else {
tx = (txDiff * d) / denominator;
ty = (-txDiff * b) / denominator;
}
return { width: tx, height: ty, value: font.vertical ? ty : tx };
}
calcRenderMatrix(ctm) {
// 9.4.4 Text Space Details
var tsm = [
this.fontSize * this.textHScale,
0,
0,
this.fontSize,
0,
this.textRise,
];
return Util.transform(ctm, Util.transform(this.textMatrix, tsm));
}
carriageReturn() {
this.translateTextLineMatrix(0, -this.leading);
this.textMatrix = this.textLineMatrix.slice();

View file

@ -57,12 +57,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
const DEFAULT_FONT_SIZE = 30;
const DEFAULT_FONT_ASCENT = 0.8;
const ascentCache = new Map();
const NonWhitespaceRegexp = /\S/;
function isAllWhitespace(str) {
return !NonWhitespaceRegexp.test(str);
}
const AllWhitespaceRegexp = /^\s+$/g;
function getAscent(fontFamily, ctx) {
const cachedAscent = ascentCache.get(fontFamily);
@ -133,7 +128,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
const textDivProperties = {
angle: 0,
canvasWidth: 0,
isWhitespace: false,
hasText: geom.str !== "",
hasEOL: geom.hasEOL,
originalTransform: null,
paddingBottom: 0,
paddingLeft: 0,
@ -142,12 +138,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
scale: 1,
};
textDiv.textContent = geom.str;
task._textDivs.push(textDiv);
if (isAllWhitespace(geom.str)) {
textDivProperties.isWhitespace = true;
task._textDivProperties.set(textDiv, textDivProperties);
return;
}
const tx = Util.transform(task._viewport.transform, geom.transform);
let angle = Math.atan2(tx[1], tx[0]);
@ -176,7 +168,6 @@ const renderTextLayer = (function renderTextLayerClosure() {
// Keeps screen readers from pausing on every new text span.
textDiv.setAttribute("role", "presentation");
textDiv.textContent = geom.str;
// geom.dir may be 'ttb' for vertical texts.
textDiv.dir = geom.dir;
@ -192,7 +183,10 @@ const renderTextLayer = (function renderTextLayerClosure() {
// little effect on text highlighting. This makes scrolling on docs with
// lots of such divs a lot faster.
let shouldScaleText = false;
if (geom.str.length > 1) {
if (
geom.str.length > 1 ||
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
) {
shouldScaleText = true;
} else if (geom.transform[0] !== geom.transform[3]) {
const absScaleX = Math.abs(geom.transform[0]),
@ -218,7 +212,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
task._layoutText(textDiv);
}
if (task._enhanceTextSelection) {
if (task._enhanceTextSelection && textDivProperties.hasText) {
let angleCos = 1,
angleSin = 0;
if (angle !== 0) {
@ -666,12 +660,9 @@ const renderTextLayer = (function renderTextLayerClosure() {
_layoutText(textDiv) {
const textDivProperties = this._textDivProperties.get(textDiv);
if (textDivProperties.isWhitespace) {
return;
}
let transform = "";
if (textDivProperties.canvasWidth !== 0) {
if (textDivProperties.canvasWidth !== 0 && textDivProperties.hasText) {
const { fontSize, fontFamily } = textDiv.style;
// Only build font string and set to context if different from last.
@ -700,8 +691,15 @@ const renderTextLayer = (function renderTextLayerClosure() {
}
textDiv.style.transform = transform;
}
this._textDivProperties.set(textDiv, textDivProperties);
this._container.appendChild(textDiv);
if (textDivProperties.hasText) {
this._container.appendChild(textDiv);
}
if (textDivProperties.hasEOL) {
const br = document.createElement("br");
br.setAttribute("role", "presentation");
this._container.appendChild(br);
}
},
_render: function TextLayer_render(timeout) {
@ -778,7 +776,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
const div = this._textDivs[i];
const divProps = this._textDivProperties.get(div);
if (divProps.isWhitespace) {
if (!divProps.hasText) {
continue;
}
if (expandDivs) {