mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Merge pull request #9986 from Snuffleupagus/issue-9984
Attempt to combine separate beginText/endText sequences in `getTextContent` (issue 9984)
This commit is contained in:
commit
c94df0fef3
4 changed files with 47 additions and 7 deletions
|
@ -1510,6 +1510,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
textContentItem.str.length = 0;
|
||||
}
|
||||
|
||||
function isIdenticalSetFont(name, size) {
|
||||
return (textState.font &&
|
||||
name === textState.fontName && size === textState.fontSize);
|
||||
}
|
||||
|
||||
function handleBeginText() {
|
||||
flushTextContentItem();
|
||||
textState.textMatrix = IDENTITY_MATRIX.slice();
|
||||
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
||||
}
|
||||
|
||||
function enqueueChunk() {
|
||||
let length = textContent.items.length;
|
||||
if (length > 0) {
|
||||
|
@ -1535,6 +1546,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
task.ensureNotTerminated();
|
||||
timeSlotManager.reset();
|
||||
var stop, operation = {}, args = [];
|
||||
let pendingBeginText = false;
|
||||
while (!(stop = timeSlotManager.check())) {
|
||||
// The arguments parsed by read() are not used beyond this loop, so
|
||||
// we can reuse the same array on every iteration, thus avoiding
|
||||
|
@ -1545,16 +1557,30 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
break;
|
||||
}
|
||||
textState = stateManager.state;
|
||||
var fn = operation.fn;
|
||||
var fn = operation.fn | 0;
|
||||
args = operation.args;
|
||||
var advance, diff;
|
||||
|
||||
switch (fn | 0) {
|
||||
if (pendingBeginText) {
|
||||
if (fn === OPS.setFont) {
|
||||
const fontNameArg = args[0].name, fontSizeArg = args[1];
|
||||
// For multiple identical Tf (setFont) commands, first check if
|
||||
// the following command is Tm (setTextMatrix) before continuing.
|
||||
if (isIdenticalSetFont(fontNameArg, fontSizeArg)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (fn !== OPS.setTextMatrix) {
|
||||
handleBeginText();
|
||||
}
|
||||
pendingBeginText = false;
|
||||
}
|
||||
|
||||
switch (fn) {
|
||||
case OPS.setFont:
|
||||
// Optimization to ignore multiple identical Tf commands.
|
||||
var fontNameArg = args[0].name, fontSizeArg = args[1];
|
||||
if (textState.font && fontNameArg === textState.fontName &&
|
||||
fontSizeArg === textState.fontSize) {
|
||||
if (isIdenticalSetFont(fontNameArg, fontSizeArg)) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1642,9 +1668,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
textState.wordSpacing = args[0];
|
||||
break;
|
||||
case OPS.beginText:
|
||||
flushTextContentItem();
|
||||
textState.textMatrix = IDENTITY_MATRIX.slice();
|
||||
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
||||
// Optimization to attempt to combine separate BT/ET sequences,
|
||||
// by checking the next operator(s) before flushing text content
|
||||
// and resetting the text/textLine matrices (see above).
|
||||
if (combineTextItems) {
|
||||
pendingBeginText = true;
|
||||
break;
|
||||
}
|
||||
|
||||
handleBeginText();
|
||||
break;
|
||||
case OPS.showSpacedText:
|
||||
var items = args[0];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue