mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
[api-minor] Fix the way to chunk the strings (#13257)
- Improve chunking in order to fix some bugs where the spaces aren't here: * track the last position where a glyph has been drawn; * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break: - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions; - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done. - Add some breaks in order to get lines; - Remove the multiple whites spaces: * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool; * other pdf readers replace spaces by one white space. Update src/core/evaluator.js Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com> Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
This commit is contained in:
parent
e6fcb1e70b
commit
af4dc55019
3 changed files with 428 additions and 272 deletions
|
@ -2042,7 +2042,7 @@ class PartialEvaluator {
|
|||
resources = resources || Dict.empty;
|
||||
stateManager = stateManager || new StateManager(new TextState());
|
||||
|
||||
var WhitespaceRegexp = /\s/g;
|
||||
const WhitespaceRegexp = /\s/g;
|
||||
|
||||
var textContent = {
|
||||
items: [],
|
||||
|
@ -2051,26 +2051,43 @@ class PartialEvaluator {
|
|||
var textContentItem = {
|
||||
initialized: false,
|
||||
str: [],
|
||||
totalWidth: 0,
|
||||
totalHeight: 0,
|
||||
width: 0,
|
||||
height: 0,
|
||||
vertical: false,
|
||||
lastAdvanceWidth: 0,
|
||||
lastAdvanceHeight: 0,
|
||||
lastCharSize: 0,
|
||||
prevTransform: null,
|
||||
textAdvanceScale: 0,
|
||||
spaceWidth: 0,
|
||||
fakeSpaceMin: Infinity,
|
||||
fakeMultiSpaceMin: Infinity,
|
||||
fakeMultiSpaceMax: -0,
|
||||
textRunBreakAllowed: false,
|
||||
spaceInFlowMin: 0,
|
||||
spaceInFlowMax: 0,
|
||||
trackingSpaceMin: Infinity,
|
||||
transform: null,
|
||||
fontName: null,
|
||||
hasEOL: false,
|
||||
isLastCharWhiteSpace: false,
|
||||
};
|
||||
var SPACE_FACTOR = 0.3;
|
||||
var MULTI_SPACE_FACTOR = 1.5;
|
||||
var MULTI_SPACE_FACTOR_MAX = 4;
|
||||
|
||||
// Used in addFakeSpaces.
|
||||
// wsw stands for whitespace width.
|
||||
|
||||
// A white <= wsw * TRACKING_SPACE_FACTOR is a tracking space
|
||||
// so it doesn't count as a space.
|
||||
const TRACKING_SPACE_FACTOR = 0.3;
|
||||
|
||||
// A white with a width in [wsw * MIN_FACTOR; wsw * MAX_FACTOR]
|
||||
// is a space which will be inserted in the current flow of words.
|
||||
// If the width is outside of this range then the flow is broken
|
||||
// (which means a new span in the text layer).
|
||||
// It's useful to adjust the best as possible the span in the layer
|
||||
// to what is displayed in the canvas.
|
||||
const SPACE_IN_FLOW_MIN_FACTOR = 0.3;
|
||||
const SPACE_IN_FLOW_MAX_FACTOR = 1.3;
|
||||
|
||||
var self = this;
|
||||
var xref = this.xref;
|
||||
const showSpacedTextBuffer = [];
|
||||
|
||||
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
||||
var xobjs = null;
|
||||
|
@ -2081,26 +2098,10 @@ class PartialEvaluator {
|
|||
|
||||
var textState;
|
||||
|
||||
function ensureTextContentItem() {
|
||||
if (textContentItem.initialized) {
|
||||
return textContentItem;
|
||||
}
|
||||
const font = textState.font,
|
||||
loadedName = font.loadedName;
|
||||
if (!seenStyles.has(loadedName)) {
|
||||
seenStyles.add(loadedName);
|
||||
|
||||
textContent.styles[loadedName] = {
|
||||
fontFamily: font.fallbackName,
|
||||
ascent: font.ascent,
|
||||
descent: font.descent,
|
||||
vertical: font.vertical,
|
||||
};
|
||||
}
|
||||
textContentItem.fontName = loadedName;
|
||||
|
||||
function getCurrentTextTransform() {
|
||||
// 9.4.4 Text Space Details
|
||||
var tsm = [
|
||||
const font = textState.font;
|
||||
const tsm = [
|
||||
textState.fontSize * textState.textHScale,
|
||||
0,
|
||||
0,
|
||||
|
@ -2120,18 +2121,44 @@ class PartialEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
var trm = Util.transform(
|
||||
return Util.transform(
|
||||
textState.ctm,
|
||||
Util.transform(textState.textMatrix, tsm)
|
||||
);
|
||||
textContentItem.transform = trm;
|
||||
}
|
||||
|
||||
function ensureTextContentItem() {
|
||||
if (textContentItem.initialized) {
|
||||
return textContentItem;
|
||||
}
|
||||
const font = textState.font,
|
||||
loadedName = font.loadedName;
|
||||
if (!seenStyles.has(loadedName)) {
|
||||
seenStyles.add(loadedName);
|
||||
|
||||
textContent.styles[loadedName] = {
|
||||
fontFamily: font.fallbackName,
|
||||
ascent: font.ascent,
|
||||
descent: font.descent,
|
||||
vertical: font.vertical,
|
||||
};
|
||||
}
|
||||
textContentItem.fontName = loadedName;
|
||||
|
||||
const trm = (textContentItem.transform = getCurrentTextTransform());
|
||||
if (!font.vertical) {
|
||||
textContentItem.width = 0;
|
||||
textContentItem.height = Math.hypot(trm[2], trm[3]);
|
||||
textContentItem.width = textContentItem.totalWidth = 0;
|
||||
textContentItem.height = textContentItem.totalHeight = Math.hypot(
|
||||
trm[2],
|
||||
trm[3]
|
||||
);
|
||||
textContentItem.vertical = false;
|
||||
} else {
|
||||
textContentItem.width = Math.hypot(trm[0], trm[1]);
|
||||
textContentItem.height = 0;
|
||||
textContentItem.width = textContentItem.totalWidth = Math.hypot(
|
||||
trm[0],
|
||||
trm[1]
|
||||
);
|
||||
textContentItem.height = textContentItem.totalHeight = 0;
|
||||
textContentItem.vertical = true;
|
||||
}
|
||||
|
||||
|
@ -2141,29 +2168,53 @@ class PartialEvaluator {
|
|||
);
|
||||
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
|
||||
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
|
||||
textContentItem.lastAdvanceWidth = 0;
|
||||
textContentItem.lastAdvanceHeight = 0;
|
||||
textContentItem.lastCharSize = textContentItem.lastCharSize || 0;
|
||||
|
||||
var spaceWidth = (font.spaceWidth / 1000) * textState.fontSize;
|
||||
if (spaceWidth) {
|
||||
textContentItem.spaceWidth = spaceWidth;
|
||||
textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR;
|
||||
textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR;
|
||||
textContentItem.fakeMultiSpaceMax = spaceWidth * MULTI_SPACE_FACTOR_MAX;
|
||||
// It's okay for monospace fonts to fake as much space as needed.
|
||||
textContentItem.textRunBreakAllowed = !font.isMonospace;
|
||||
textContentItem.trackingSpaceMin = spaceWidth * TRACKING_SPACE_FACTOR;
|
||||
textContentItem.spaceInFlowMin = spaceWidth * SPACE_IN_FLOW_MIN_FACTOR;
|
||||
textContentItem.spaceInFlowMax = spaceWidth * SPACE_IN_FLOW_MAX_FACTOR;
|
||||
} else {
|
||||
textContentItem.spaceWidth = 0;
|
||||
textContentItem.fakeSpaceMin = Infinity;
|
||||
textContentItem.fakeMultiSpaceMin = Infinity;
|
||||
textContentItem.fakeMultiSpaceMax = 0;
|
||||
textContentItem.textRunBreakAllowed = false;
|
||||
textContentItem.trackingSpaceMin = Infinity;
|
||||
}
|
||||
|
||||
textContentItem.hasEOL = false;
|
||||
|
||||
textContentItem.initialized = true;
|
||||
return textContentItem;
|
||||
}
|
||||
|
||||
function updateAdvanceScale() {
|
||||
if (!textContentItem.initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
const scaleLineX = Math.hypot(
|
||||
textState.textLineMatrix[0],
|
||||
textState.textLineMatrix[1]
|
||||
);
|
||||
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
|
||||
const scaleFactor = scaleCtmX * scaleLineX;
|
||||
if (scaleFactor === textContentItem.textAdvanceScale) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!textContentItem.vertical) {
|
||||
textContentItem.totalWidth +=
|
||||
textContentItem.width * textContentItem.textAdvanceScale;
|
||||
textContentItem.width = 0;
|
||||
} else {
|
||||
textContentItem.totalHeight +=
|
||||
textContentItem.height * textContentItem.textAdvanceScale;
|
||||
textContentItem.height = 0;
|
||||
}
|
||||
|
||||
textContentItem.textAdvanceScale = scaleFactor;
|
||||
}
|
||||
|
||||
function replaceWhitespace(str) {
|
||||
// Replaces all whitespaces with standard spaces (0x20), to avoid
|
||||
// alignment issues between the textLayer and the canvas if the text
|
||||
|
@ -2178,17 +2229,19 @@ class PartialEvaluator {
|
|||
}
|
||||
|
||||
function runBidiTransform(textChunk) {
|
||||
var str = textChunk.str.join("");
|
||||
var bidiResult = bidi(str, -1, textChunk.vertical);
|
||||
const text = textChunk.str.join("");
|
||||
const bidiResult = bidi(text, -1, textChunk.vertical);
|
||||
const str = normalizeWhitespace
|
||||
? replaceWhitespace(bidiResult.str)
|
||||
: bidiResult.str;
|
||||
return {
|
||||
str: normalizeWhitespace
|
||||
? replaceWhitespace(bidiResult.str)
|
||||
: bidiResult.str,
|
||||
str,
|
||||
dir: bidiResult.dir,
|
||||
width: textChunk.width,
|
||||
height: textChunk.height,
|
||||
width: textChunk.totalWidth,
|
||||
height: textChunk.totalHeight,
|
||||
transform: textChunk.transform,
|
||||
fontName: textChunk.fontName,
|
||||
hasEOL: textChunk.hasEOL,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -2202,91 +2255,276 @@ class PartialEvaluator {
|
|||
});
|
||||
}
|
||||
|
||||
function buildTextContentItem(chars) {
|
||||
var font = textState.font;
|
||||
var textChunk = ensureTextContentItem();
|
||||
var width = 0;
|
||||
var height = 0;
|
||||
var glyphs = font.charsToGlyphs(chars);
|
||||
for (var i = 0; i < glyphs.length; i++) {
|
||||
var glyph = glyphs[i];
|
||||
var glyphWidth = null;
|
||||
if (font.vertical && glyph.vmetric) {
|
||||
glyphWidth = glyph.vmetric[0];
|
||||
} else {
|
||||
glyphWidth = glyph.width;
|
||||
function compareWithLastPosition(fontSize) {
|
||||
if (
|
||||
!combineTextItems ||
|
||||
!textState.font ||
|
||||
!textContentItem.prevTransform
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
const currentTransform = getCurrentTextTransform();
|
||||
const posX = currentTransform[4];
|
||||
const posY = currentTransform[5];
|
||||
const lastPosX = textContentItem.prevTransform[4];
|
||||
const lastPosY = textContentItem.prevTransform[5];
|
||||
|
||||
if (lastPosX === posX && lastPosY === posY) {
|
||||
return;
|
||||
}
|
||||
|
||||
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
||||
const advanceY = (posY - lastPosY) / textContentItem.textAdvanceScale;
|
||||
const HALF_LAST_CHAR = -0.5 * textContentItem.lastCharSize;
|
||||
|
||||
if (textState.font.vertical) {
|
||||
if (
|
||||
Math.abs(advanceX) >
|
||||
textContentItem.width /
|
||||
textContentItem.textAdvanceScale /* not the same column */
|
||||
) {
|
||||
appendEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
var glyphUnicode = glyph.unicode;
|
||||
var NormalizedUnicodes = getNormalizedUnicodes();
|
||||
if (NormalizedUnicodes[glyphUnicode] !== undefined) {
|
||||
glyphUnicode = NormalizedUnicodes[glyphUnicode];
|
||||
if (HALF_LAST_CHAR > advanceY) {
|
||||
return;
|
||||
}
|
||||
glyphUnicode = reverseIfRtl(glyphUnicode);
|
||||
|
||||
var charSpacing = textState.charSpacing;
|
||||
if (glyph.isSpace) {
|
||||
var wordSpacing = textState.wordSpacing;
|
||||
charSpacing += wordSpacing;
|
||||
if (wordSpacing > 0) {
|
||||
addFakeSpaces(wordSpacing, textChunk.str);
|
||||
if (advanceY > textContentItem.trackingSpaceMin) {
|
||||
textContentItem.height += advanceY;
|
||||
} else if (!addFakeSpaces(advanceY, 0, textContentItem.prevTransform)) {
|
||||
if (textContentItem.str.length === 0) {
|
||||
textContent.items.push({
|
||||
str: " ",
|
||||
dir: "ltr",
|
||||
width: 0,
|
||||
height: advanceY,
|
||||
transform: textContentItem.prevTransform,
|
||||
fontName: textContentItem.fontName,
|
||||
hasEOL: false,
|
||||
});
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
} else {
|
||||
textContentItem.height += advanceY;
|
||||
}
|
||||
}
|
||||
|
||||
var tx = 0;
|
||||
var ty = 0;
|
||||
if (!font.vertical) {
|
||||
var w0 = glyphWidth * textState.fontMatrix[0];
|
||||
tx = (w0 * textState.fontSize + charSpacing) * textState.textHScale;
|
||||
width += tx;
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
Math.abs(advanceY) >
|
||||
textContentItem.height /
|
||||
textContentItem.textAdvanceScale /* not the same line */
|
||||
) {
|
||||
appendEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
if (HALF_LAST_CHAR > advanceX) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (advanceX <= textContentItem.trackingSpaceMin) {
|
||||
textContentItem.width += advanceX;
|
||||
} else if (!addFakeSpaces(advanceX, 0, textContentItem.prevTransform)) {
|
||||
if (textContentItem.str.length === 0) {
|
||||
textContent.items.push({
|
||||
str: " ",
|
||||
dir: "ltr",
|
||||
width: advanceX,
|
||||
height: 0,
|
||||
transform: textContentItem.prevTransform,
|
||||
fontName: textContentItem.fontName,
|
||||
hasEOL: false,
|
||||
});
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
} else {
|
||||
var w1 = glyphWidth * textState.fontMatrix[0];
|
||||
ty = w1 * textState.fontSize + charSpacing;
|
||||
height += ty;
|
||||
textContentItem.width += advanceX;
|
||||
}
|
||||
textState.translateTextMatrix(tx, ty);
|
||||
|
||||
textChunk.str.push(glyphUnicode);
|
||||
}
|
||||
|
||||
if (!font.vertical) {
|
||||
textChunk.lastAdvanceWidth = width;
|
||||
textChunk.width += width;
|
||||
} else {
|
||||
textChunk.lastAdvanceHeight = height;
|
||||
textChunk.height += Math.abs(height);
|
||||
}
|
||||
|
||||
return textChunk;
|
||||
}
|
||||
|
||||
function addFakeSpaces(width, strBuf) {
|
||||
if (width < textContentItem.fakeSpaceMin) {
|
||||
function buildTextContentItem({ chars, extraSpacing, isFirstChunk }) {
|
||||
const font = textState.font;
|
||||
if (!chars) {
|
||||
// Just move according to the space we have.
|
||||
const charSpacing = textState.charSpacing + extraSpacing;
|
||||
if (charSpacing) {
|
||||
if (!font.vertical) {
|
||||
textState.translateTextMatrix(
|
||||
charSpacing * textState.textHScale,
|
||||
0
|
||||
);
|
||||
} else {
|
||||
textState.translateTextMatrix(0, charSpacing);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
if (width < textContentItem.fakeMultiSpaceMin) {
|
||||
strBuf.push(" ");
|
||||
return;
|
||||
|
||||
const NormalizedUnicodes = getNormalizedUnicodes();
|
||||
const glyphs = font.charsToGlyphs(chars);
|
||||
const scale = textState.fontMatrix[0] * textState.fontSize;
|
||||
if (isFirstChunk) {
|
||||
compareWithLastPosition(scale);
|
||||
}
|
||||
var fakeSpaces = Math.round(width / textContentItem.spaceWidth);
|
||||
while (fakeSpaces-- > 0) {
|
||||
strBuf.push(" ");
|
||||
|
||||
let textChunk = ensureTextContentItem();
|
||||
let size = 0;
|
||||
let lastCharSize = 0;
|
||||
|
||||
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
||||
const glyph = glyphs[i];
|
||||
let charSpacing =
|
||||
textState.charSpacing + (i === ii - 1 ? extraSpacing : 0);
|
||||
|
||||
let glyphUnicode = glyph.unicode;
|
||||
if (glyph.isSpace) {
|
||||
charSpacing += textState.wordSpacing;
|
||||
textChunk.isLastCharWhiteSpace = true;
|
||||
} else {
|
||||
glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
|
||||
glyphUnicode = reverseIfRtl(glyphUnicode);
|
||||
textChunk.isLastCharWhiteSpace = false;
|
||||
}
|
||||
textChunk.str.push(glyphUnicode);
|
||||
|
||||
const glyphWidth =
|
||||
font.vertical && glyph.vmetric ? glyph.vmetric[0] : glyph.width;
|
||||
|
||||
let scaledDim = glyphWidth * scale;
|
||||
if (!font.vertical) {
|
||||
scaledDim *= textState.textHScale;
|
||||
textState.translateTextMatrix(scaledDim, 0);
|
||||
} else {
|
||||
textState.translateTextMatrix(0, scaledDim);
|
||||
scaledDim = Math.abs(scaledDim);
|
||||
}
|
||||
size += scaledDim;
|
||||
|
||||
if (charSpacing) {
|
||||
if (!font.vertical) {
|
||||
charSpacing *= textState.textHScale;
|
||||
}
|
||||
|
||||
scaledDim += charSpacing;
|
||||
const wasSplit =
|
||||
charSpacing > textContentItem.trackingSpaceMin &&
|
||||
addFakeSpaces(charSpacing, size);
|
||||
if (!font.vertical) {
|
||||
textState.translateTextMatrix(charSpacing, 0);
|
||||
} else {
|
||||
textState.translateTextMatrix(0, charSpacing);
|
||||
}
|
||||
|
||||
if (wasSplit) {
|
||||
textChunk = ensureTextContentItem();
|
||||
size = 0;
|
||||
} else {
|
||||
size += charSpacing;
|
||||
}
|
||||
}
|
||||
|
||||
lastCharSize = scaledDim;
|
||||
}
|
||||
|
||||
textChunk.lastCharSize = lastCharSize;
|
||||
if (!font.vertical) {
|
||||
textChunk.width += size;
|
||||
} else {
|
||||
textChunk.height += size;
|
||||
}
|
||||
|
||||
textChunk.prevTransform = getCurrentTextTransform();
|
||||
}
|
||||
|
||||
function appendEOL() {
|
||||
if (textContentItem.initialized) {
|
||||
textContentItem.hasEOL = true;
|
||||
flushTextContentItem();
|
||||
} else if (textContent.items.length > 0) {
|
||||
textContent.items[textContent.items.length - 1].hasEOL = true;
|
||||
} else {
|
||||
textContent.items.push({
|
||||
str: "",
|
||||
dir: "ltr",
|
||||
width: 0,
|
||||
height: 0,
|
||||
transform: getCurrentTextTransform(),
|
||||
fontName: textState.font.loadedName,
|
||||
hasEOL: true,
|
||||
});
|
||||
}
|
||||
|
||||
textContentItem.isLastCharWhiteSpace = false;
|
||||
textContentItem.lastCharSize = 0;
|
||||
}
|
||||
|
||||
function addFakeSpaces(width, size, transf = null) {
|
||||
if (
|
||||
textContentItem.spaceInFlowMin <= width &&
|
||||
width <= textContentItem.spaceInFlowMax
|
||||
) {
|
||||
if (textContentItem.initialized) {
|
||||
textContentItem.str.push(" ");
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const fontName = textContentItem.fontName;
|
||||
|
||||
let height = 0;
|
||||
width *= textContentItem.textAdvanceScale;
|
||||
if (!textContentItem.vertical) {
|
||||
textContentItem.width += size;
|
||||
} else {
|
||||
textContentItem.height += size;
|
||||
height = width;
|
||||
width = 0;
|
||||
}
|
||||
|
||||
flushTextContentItem();
|
||||
|
||||
if (textContentItem.isLastCharWhiteSpace) {
|
||||
return true;
|
||||
}
|
||||
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
textContent.items.push({
|
||||
str: " ",
|
||||
// TODO: check if using the orientation from last chunk is
|
||||
// better or not.
|
||||
dir: "ltr",
|
||||
width,
|
||||
height,
|
||||
transform: transf ? transf : getCurrentTextTransform(),
|
||||
fontName,
|
||||
hasEOL: false,
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function flushTextContentItem() {
|
||||
if (!textContentItem.initialized) {
|
||||
if (!textContentItem.initialized || !textContentItem.str) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Do final text scaling.
|
||||
if (!textContentItem.vertical) {
|
||||
textContentItem.width *= textContentItem.textAdvanceScale;
|
||||
textContentItem.totalWidth +=
|
||||
textContentItem.width * textContentItem.textAdvanceScale;
|
||||
} else {
|
||||
textContentItem.height *= textContentItem.textAdvanceScale;
|
||||
textContentItem.totalHeight +=
|
||||
textContentItem.height * textContentItem.textAdvanceScale;
|
||||
}
|
||||
textContent.items.push(runBidiTransform(textContentItem));
|
||||
|
||||
textContent.items.push(runBidiTransform(textContentItem));
|
||||
textContentItem.initialized = false;
|
||||
textContentItem.str.length = 0;
|
||||
}
|
||||
|
@ -2330,7 +2568,6 @@ class PartialEvaluator {
|
|||
textState = stateManager.state;
|
||||
var fn = operation.fn;
|
||||
args = operation.args;
|
||||
var advance, diff;
|
||||
|
||||
switch (fn | 0) {
|
||||
case OPS.setFont:
|
||||
|
@ -2363,32 +2600,6 @@ class PartialEvaluator {
|
|||
textState.leading = args[0];
|
||||
break;
|
||||
case OPS.moveText:
|
||||
// Optimization to treat same line movement as advance
|
||||
var isSameTextLine = !textState.font
|
||||
? false
|
||||
: (textState.font.vertical ? args[0] : args[1]) === 0;
|
||||
advance = args[0] - args[1];
|
||||
if (
|
||||
combineTextItems &&
|
||||
isSameTextLine &&
|
||||
textContentItem.initialized &&
|
||||
advance > 0 &&
|
||||
advance <= textContentItem.fakeMultiSpaceMax
|
||||
) {
|
||||
textState.translateTextLineMatrix(args[0], args[1]);
|
||||
textContentItem.width +=
|
||||
args[0] - textContentItem.lastAdvanceWidth;
|
||||
textContentItem.height +=
|
||||
args[1] - textContentItem.lastAdvanceHeight;
|
||||
diff =
|
||||
args[0] -
|
||||
textContentItem.lastAdvanceWidth -
|
||||
(args[1] - textContentItem.lastAdvanceHeight);
|
||||
addFakeSpaces(diff, textContentItem.str);
|
||||
break;
|
||||
}
|
||||
|
||||
flushTextContentItem();
|
||||
textState.translateTextLineMatrix(args[0], args[1]);
|
||||
textState.textMatrix = textState.textLineMatrix.slice();
|
||||
break;
|
||||
|
@ -2399,40 +2610,10 @@ class PartialEvaluator {
|
|||
textState.textMatrix = textState.textLineMatrix.slice();
|
||||
break;
|
||||
case OPS.nextLine:
|
||||
flushTextContentItem();
|
||||
appendEOL();
|
||||
textState.carriageReturn();
|
||||
break;
|
||||
case OPS.setTextMatrix:
|
||||
// Optimization to treat same line movement as advance.
|
||||
advance = textState.calcTextLineMatrixAdvance(
|
||||
args[0],
|
||||
args[1],
|
||||
args[2],
|
||||
args[3],
|
||||
args[4],
|
||||
args[5]
|
||||
);
|
||||
if (
|
||||
combineTextItems &&
|
||||
advance !== null &&
|
||||
textContentItem.initialized &&
|
||||
advance.value > 0 &&
|
||||
advance.value <= textContentItem.fakeMultiSpaceMax
|
||||
) {
|
||||
textState.translateTextLineMatrix(advance.width, advance.height);
|
||||
textContentItem.width +=
|
||||
advance.width - textContentItem.lastAdvanceWidth;
|
||||
textContentItem.height +=
|
||||
advance.height - textContentItem.lastAdvanceHeight;
|
||||
diff =
|
||||
advance.width -
|
||||
textContentItem.lastAdvanceWidth -
|
||||
(advance.height - textContentItem.lastAdvanceHeight);
|
||||
addFakeSpaces(diff, textContentItem.str);
|
||||
break;
|
||||
}
|
||||
|
||||
flushTextContentItem();
|
||||
textState.setTextMatrix(
|
||||
args[0],
|
||||
args[1],
|
||||
|
@ -2449,6 +2630,7 @@ class PartialEvaluator {
|
|||
args[4],
|
||||
args[5]
|
||||
);
|
||||
updateAdvanceScale();
|
||||
break;
|
||||
case OPS.setCharSpacing:
|
||||
textState.charSpacing = args[0];
|
||||
|
@ -2466,14 +2648,16 @@ class PartialEvaluator {
|
|||
self.ensureStateFont(stateManager.state);
|
||||
continue;
|
||||
}
|
||||
var items = args[0];
|
||||
var offset;
|
||||
for (var j = 0, jj = items.length; j < jj; j++) {
|
||||
if (typeof items[j] === "string") {
|
||||
buildTextContentItem(items[j]);
|
||||
} else if (isNum(items[j])) {
|
||||
ensureTextContentItem();
|
||||
|
||||
const spaceFactor =
|
||||
((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
|
||||
const elements = args[0];
|
||||
let isFirstChunk = true;
|
||||
for (let i = 0, ii = elements.length; i < ii - 1; i++) {
|
||||
const item = elements[i];
|
||||
if (typeof item === "string") {
|
||||
showSpacedTextBuffer.push(item);
|
||||
} else if (typeof item === "number" && item !== 0) {
|
||||
// PDF Specification 5.3.2 states:
|
||||
// The number is expressed in thousandths of a unit of text
|
||||
// space.
|
||||
|
@ -2482,64 +2666,75 @@ class PartialEvaluator {
|
|||
// In the default coordinate system, a positive adjustment
|
||||
// has the effect of moving the next glyph painted either to
|
||||
// the left or down by the given amount.
|
||||
advance = (items[j] * textState.fontSize) / 1000;
|
||||
var breakTextRun = false;
|
||||
if (textState.font.vertical) {
|
||||
offset = advance;
|
||||
textState.translateTextMatrix(0, offset);
|
||||
breakTextRun =
|
||||
textContentItem.textRunBreakAllowed &&
|
||||
advance > textContentItem.fakeMultiSpaceMax;
|
||||
if (!breakTextRun) {
|
||||
// Value needs to be added to height to paint down.
|
||||
textContentItem.height += offset;
|
||||
}
|
||||
} else {
|
||||
advance = -advance;
|
||||
offset = advance * textState.textHScale;
|
||||
textState.translateTextMatrix(offset, 0);
|
||||
breakTextRun =
|
||||
textContentItem.textRunBreakAllowed &&
|
||||
advance > textContentItem.fakeMultiSpaceMax;
|
||||
if (!breakTextRun) {
|
||||
// Value needs to be subtracted from width to paint left.
|
||||
textContentItem.width += offset;
|
||||
}
|
||||
}
|
||||
if (breakTextRun) {
|
||||
flushTextContentItem();
|
||||
} else if (advance > 0) {
|
||||
addFakeSpaces(advance, textContentItem.str);
|
||||
const str = showSpacedTextBuffer.join("");
|
||||
showSpacedTextBuffer.length = 0;
|
||||
buildTextContentItem({
|
||||
chars: str,
|
||||
extraSpacing: item * spaceFactor,
|
||||
isFirstChunk,
|
||||
});
|
||||
if (str && isFirstChunk) {
|
||||
isFirstChunk = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const item = elements[elements.length - 1];
|
||||
if (typeof item === "string") {
|
||||
showSpacedTextBuffer.push(item);
|
||||
}
|
||||
|
||||
if (showSpacedTextBuffer.length > 0) {
|
||||
const str = showSpacedTextBuffer.join("");
|
||||
showSpacedTextBuffer.length = 0;
|
||||
buildTextContentItem({
|
||||
chars: str,
|
||||
extraSpacing: 0,
|
||||
isFirstChunk,
|
||||
});
|
||||
}
|
||||
break;
|
||||
case OPS.showText:
|
||||
if (!stateManager.state.font) {
|
||||
self.ensureStateFont(stateManager.state);
|
||||
continue;
|
||||
}
|
||||
buildTextContentItem(args[0]);
|
||||
|
||||
buildTextContentItem({
|
||||
chars: args[0],
|
||||
extraSpacing: 0,
|
||||
isFirstChunk: true,
|
||||
});
|
||||
break;
|
||||
case OPS.nextLineShowText:
|
||||
if (!stateManager.state.font) {
|
||||
self.ensureStateFont(stateManager.state);
|
||||
continue;
|
||||
}
|
||||
textContentItem.hasEOL = true;
|
||||
flushTextContentItem();
|
||||
textState.carriageReturn();
|
||||
buildTextContentItem(args[0]);
|
||||
buildTextContentItem({
|
||||
chars: args[0],
|
||||
extraSpacing: 0,
|
||||
isFirstChunk: true,
|
||||
});
|
||||
break;
|
||||
case OPS.nextLineSetSpacingShowText:
|
||||
if (!stateManager.state.font) {
|
||||
self.ensureStateFont(stateManager.state);
|
||||
continue;
|
||||
}
|
||||
textContentItem.hasEOL = true;
|
||||
flushTextContentItem();
|
||||
textState.wordSpacing = args[0];
|
||||
textState.charSpacing = args[1];
|
||||
textState.carriageReturn();
|
||||
buildTextContentItem(args[2]);
|
||||
buildTextContentItem({
|
||||
chars: args[2],
|
||||
extraSpacing: 0,
|
||||
isFirstChunk: true,
|
||||
});
|
||||
break;
|
||||
case OPS.paintXObject:
|
||||
flushTextContentItem();
|
||||
|
@ -3887,46 +4082,6 @@ class TextState {
|
|||
m[5] = m[1] * x + m[3] * y + m[5];
|
||||
}
|
||||
|
||||
calcTextLineMatrixAdvance(a, b, c, d, e, f) {
|
||||
var font = this.font;
|
||||
if (!font) {
|
||||
return null;
|
||||
}
|
||||
var m = this.textLineMatrix;
|
||||
if (!(a === m[0] && b === m[1] && c === m[2] && d === m[3])) {
|
||||
return null;
|
||||
}
|
||||
var txDiff = e - m[4],
|
||||
tyDiff = f - m[5];
|
||||
if ((font.vertical && txDiff !== 0) || (!font.vertical && tyDiff !== 0)) {
|
||||
return null;
|
||||
}
|
||||
var tx,
|
||||
ty,
|
||||
denominator = a * d - b * c;
|
||||
if (font.vertical) {
|
||||
tx = (-tyDiff * c) / denominator;
|
||||
ty = (tyDiff * a) / denominator;
|
||||
} else {
|
||||
tx = (txDiff * d) / denominator;
|
||||
ty = (-txDiff * b) / denominator;
|
||||
}
|
||||
return { width: tx, height: ty, value: font.vertical ? ty : tx };
|
||||
}
|
||||
|
||||
calcRenderMatrix(ctm) {
|
||||
// 9.4.4 Text Space Details
|
||||
var tsm = [
|
||||
this.fontSize * this.textHScale,
|
||||
0,
|
||||
0,
|
||||
this.fontSize,
|
||||
0,
|
||||
this.textRise,
|
||||
];
|
||||
return Util.transform(ctm, Util.transform(this.textMatrix, tsm));
|
||||
}
|
||||
|
||||
carriageReturn() {
|
||||
this.translateTextLineMatrix(0, -this.leading);
|
||||
this.textMatrix = this.textLineMatrix.slice();
|
||||
|
|
|
@ -57,12 +57,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
const DEFAULT_FONT_SIZE = 30;
|
||||
const DEFAULT_FONT_ASCENT = 0.8;
|
||||
const ascentCache = new Map();
|
||||
|
||||
const NonWhitespaceRegexp = /\S/;
|
||||
|
||||
function isAllWhitespace(str) {
|
||||
return !NonWhitespaceRegexp.test(str);
|
||||
}
|
||||
const AllWhitespaceRegexp = /^\s+$/g;
|
||||
|
||||
function getAscent(fontFamily, ctx) {
|
||||
const cachedAscent = ascentCache.get(fontFamily);
|
||||
|
@ -133,7 +128,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
const textDivProperties = {
|
||||
angle: 0,
|
||||
canvasWidth: 0,
|
||||
isWhitespace: false,
|
||||
hasText: geom.str !== "",
|
||||
hasEOL: geom.hasEOL,
|
||||
originalTransform: null,
|
||||
paddingBottom: 0,
|
||||
paddingLeft: 0,
|
||||
|
@ -142,12 +138,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
scale: 1,
|
||||
};
|
||||
|
||||
textDiv.textContent = geom.str;
|
||||
task._textDivs.push(textDiv);
|
||||
if (isAllWhitespace(geom.str)) {
|
||||
textDivProperties.isWhitespace = true;
|
||||
task._textDivProperties.set(textDiv, textDivProperties);
|
||||
return;
|
||||
}
|
||||
|
||||
const tx = Util.transform(task._viewport.transform, geom.transform);
|
||||
let angle = Math.atan2(tx[1], tx[0]);
|
||||
|
@ -176,7 +168,6 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
// Keeps screen readers from pausing on every new text span.
|
||||
textDiv.setAttribute("role", "presentation");
|
||||
|
||||
textDiv.textContent = geom.str;
|
||||
// geom.dir may be 'ttb' for vertical texts.
|
||||
textDiv.dir = geom.dir;
|
||||
|
||||
|
@ -192,7 +183,10 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
// little effect on text highlighting. This makes scrolling on docs with
|
||||
// lots of such divs a lot faster.
|
||||
let shouldScaleText = false;
|
||||
if (geom.str.length > 1) {
|
||||
if (
|
||||
geom.str.length > 1 ||
|
||||
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
|
||||
) {
|
||||
shouldScaleText = true;
|
||||
} else if (geom.transform[0] !== geom.transform[3]) {
|
||||
const absScaleX = Math.abs(geom.transform[0]),
|
||||
|
@ -218,7 +212,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
task._layoutText(textDiv);
|
||||
}
|
||||
|
||||
if (task._enhanceTextSelection) {
|
||||
if (task._enhanceTextSelection && textDivProperties.hasText) {
|
||||
let angleCos = 1,
|
||||
angleSin = 0;
|
||||
if (angle !== 0) {
|
||||
|
@ -666,12 +660,9 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
|
||||
_layoutText(textDiv) {
|
||||
const textDivProperties = this._textDivProperties.get(textDiv);
|
||||
if (textDivProperties.isWhitespace) {
|
||||
return;
|
||||
}
|
||||
|
||||
let transform = "";
|
||||
if (textDivProperties.canvasWidth !== 0) {
|
||||
if (textDivProperties.canvasWidth !== 0 && textDivProperties.hasText) {
|
||||
const { fontSize, fontFamily } = textDiv.style;
|
||||
|
||||
// Only build font string and set to context if different from last.
|
||||
|
@ -700,8 +691,15 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
}
|
||||
textDiv.style.transform = transform;
|
||||
}
|
||||
this._textDivProperties.set(textDiv, textDivProperties);
|
||||
this._container.appendChild(textDiv);
|
||||
|
||||
if (textDivProperties.hasText) {
|
||||
this._container.appendChild(textDiv);
|
||||
}
|
||||
if (textDivProperties.hasEOL) {
|
||||
const br = document.createElement("br");
|
||||
br.setAttribute("role", "presentation");
|
||||
this._container.appendChild(br);
|
||||
}
|
||||
},
|
||||
|
||||
_render: function TextLayer_render(timeout) {
|
||||
|
@ -778,7 +776,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
const div = this._textDivs[i];
|
||||
const divProps = this._textDivProperties.get(div);
|
||||
|
||||
if (divProps.isWhitespace) {
|
||||
if (!divProps.hasText) {
|
||||
continue;
|
||||
}
|
||||
if (expandDivs) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue