1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-28 23:28:16 +02:00

Merge pull request #7475 from Snuffleupagus/api-getTextContent-combineTextItems

[api-minor] Add a parameter to `PDFPageProxy_getTextContent` that controls whether `PartialEvaluator_getTextContent` will attempt to combine same line text items
This commit is contained in:
Yury Delendik 2016-07-27 08:34:24 -05:00 committed by GitHub
commit a02e2686b9
8 changed files with 42 additions and 27 deletions

View file

@ -265,7 +265,8 @@ var Page = (function PageClosure() {
},
extractTextContent: function Page_extractTextContent(task,
normalizeWhitespace) {
normalizeWhitespace,
combineTextItems) {
var handler = {
on: function nullHandlerOn() {},
send: function nullHandlerSend() {}
@ -298,7 +299,8 @@ var Page = (function PageClosure() {
task,
self.resources,
/* stateManager = */ null,
normalizeWhitespace);
normalizeWhitespace,
combineTextItems);
});
},

View file

@ -1132,7 +1132,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
getTextContent:
function PartialEvaluator_getTextContent(stream, task, resources,
stateManager,
normalizeWhitespace) {
normalizeWhitespace,
combineTextItems) {
stateManager = (stateManager || new StateManager(new TextState()));
@ -1443,7 +1444,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var isSameTextLine = !textState.font ? false :
((textState.font.vertical ? args[0] : args[1]) === 0);
advance = args[0] - args[1];
if (isSameTextLine && textContentItem.initialized &&
if (combineTextItems &&
isSameTextLine && textContentItem.initialized &&
advance > 0 &&
advance <= textContentItem.fakeMultiSpaceMax) {
textState.translateTextLineMatrix(args[0], args[1]);
@ -1475,7 +1477,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// Optimization to treat same line movement as advance.
advance = textState.calcTextLineMatrixAdvance(
args[0], args[1], args[2], args[3], args[4], args[5]);
if (advance !== null && textContentItem.initialized &&
if (combineTextItems &&
advance !== null && textContentItem.initialized &&
advance.value > 0 &&
advance.value <= textContentItem.fakeMultiSpaceMax) {
textState.translateTextLineMatrix(advance.width,
@ -1616,7 +1619,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
next(self.getTextContent(xobj, task,
xobj.dict.get('Resources') || resources, stateManager,
normalizeWhitespace).then(function (formTextContent) {
normalizeWhitespace, combineTextItems).then(
function (formTextContent) {
Util.appendToArray(textContent.items, formTextContent.items);
Util.extendObj(textContent.styles, formTextContent.styles);
stateManager.restore();

View file

@ -891,12 +891,14 @@ var WorkerMessageHandler = {
handler.on('GetTextContent', function wphExtractText(data) {
var pageIndex = data.pageIndex;
var normalizeWhitespace = data.normalizeWhitespace;
var combineTextItems = data.combineTextItems;
return pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
return page.extractTextContent(task, normalizeWhitespace).then(
return page.extractTextContent(task, normalizeWhitespace,
combineTextItems).then(
function(textContent) {
finishWorkerTask(task);
info('text indexing: page=' + pageNum + ' - time=' +