From a38c4bc72903073727119c29331bb3ed7abf6ce5 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Tue, 11 Sep 2012 15:10:34 -0700 Subject: [PATCH 01/28] Make getTextContent return offset array and improve the algorithm. Make parts in viewer.js work again. --- src/evaluator.js | 67 ++++++++++++++++++++++++++++++++++++++++++++---- web/viewer.html | 2 +- web/viewer.js | 2 +- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index f0e775cdb..1ac32f781 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -505,7 +505,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return queue; }, - getTextContent: function partialEvaluatorGetIRQueue(stream, resources) { + getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) { + if (!state) { + state = { + text: '', + mapping: [] + }; + } var self = this; var xref = this.xref; @@ -515,18 +521,22 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } resources = xref.fetchIfRef(resources) || new Dict(); + // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. + var xobjs = null; var parser = new Parser(new Lexer(stream), false); var res = resources; var args = [], obj; - var text = ''; + var text = state.text; var chunk = ''; + var commandOffset = state.mapping; var font = null; while (!isEOF(obj = parser.getObj())) { if (isCmd(obj)) { var cmd = obj.cmd; switch (cmd) { + // TODO: Add support for SAVE/RESTORE and XFORM here. case 'Tf': font = handleSetFont(args[0].name).translated; break; @@ -536,9 +546,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (typeof items[j] === 'string') { chunk += fontCharsToUnicode(items[j], font); } else if (items[j] < 0) { - // making all negative offsets a space - better to have - // a space in incorrect place than not have them at all chunk += ' '; + } else if (items[j] < 0 && font.spacedWidth > 0) { + var numFakeSpaces = Math.round(-e / font.spacedWidth); + if (numFakeSpaces > 0) { + chunk += ' '; + } } } break; @@ -551,8 +564,49 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { case '"': chunk += fontCharsToUnicode(args[2], font) + ' '; break; + case 'Do': + // Set the chunk such that the following if won't add something + // to the state. + chunk = ''; + + if (args[0].code) { + break; + } + + if (!xobjs) { + xobjs = resources.get('XObject') || new Dict(); + } + + var name = args[0].name; + var xobj = xobjs.get(name); + if (!xobj) + break; + assertWellFormed(isStream(xobj), 'XObject should be a stream'); + + var type = xobj.dict.get('Subtype'); + assertWellFormed( + isName(type), + 'XObject should have a Name subtype' + ); + + if ('Form' !== type.name) + break; + + // Add some spacing between the text here and the text of the + // xForm. + text = text + ' '; + + state.text = text; + state = this.getTextContent( + xobj, + xobj.dict.get('Resources') || resources, + state + ); + text = state.text; + break; } // switch if (chunk !== '') { + commandOffset.push(text.length); text += chunk; chunk = ''; } @@ -564,7 +618,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } } - return text; + return { + text: text, + mapping: commandOffset + }; }, extractDataStructures: function diff --git a/web/viewer.html b/web/viewer.html index 5a2f4f28c..813484c6c 100644 --- a/web/viewer.html +++ b/web/viewer.html @@ -88,7 +88,7 @@ limitations under the License. - diff --git a/web/viewer.js b/web/viewer.js index 5f8ded0ec..be512736f 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1043,7 +1043,7 @@ var PDFView = { function extractPageText(pageIndex) { self.pages[pageIndex].pdfPage.getTextContent().then( function textContentResolved(textContent) { - self.pageText[pageIndex] = textContent; + self.pageText[pageIndex] = textContent.text; self.search(); if ((pageIndex + 1) < self.pages.length) extractPageText(pageIndex + 1); From eced7fbb5795d806ada44ccfdee7ee619bd04b62 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Tue, 11 Sep 2012 16:05:43 -0700 Subject: [PATCH 02/28] Backup work before redoing the textContent format --- web/viewer.js | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/web/viewer.js b/web/viewer.js index be512736f..8d7c51f24 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1231,6 +1231,8 @@ var PageView = function pageView(container, pdfPage, id, scale, this.renderingState = RenderingStates.INITIAL; this.resume = null; + this.textContent = null; + var anchor = document.createElement('a'); anchor.name = '' + this.id; @@ -1485,6 +1487,22 @@ var PageView = function pageView(container, pdfPage, id, scale, var self = this; function pageViewDrawCallback(error) { + var visiblePages = PDFView.getVisiblePages(); + var pageView = PDFView.getHighestPriority(visiblePages, PDFView.pages, + PDFView.pageViewScroll.down); + + if (pageView === self) { + if (!self.textContent) { + self.textContent = {}; + self.pdfPage.getTextContent().then( + function textContentResolved(textContent) { + self.textContent = textContent; + textLayer.setTextContent(textContent); + } + ); + } + } + self.renderingState = RenderingStates.FINISHED; if (self.loadingIconDiv) { @@ -1890,10 +1908,16 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textDiv.style.fontFamily = fontName; textDiv.style.left = text.geom.x + 'px'; textDiv.style.top = (text.geom.y - fontHeight) + 'px'; - textDiv.textContent = PDFJS.bidi(text, -1); - textDiv.dir = text.direction; + + // The `text.direction` is added inside the PDFJS.bidi function. + // textDiv.textContent = PDFJS.bidi(text, -1); + // textDiv.dir = text.direction; this.textDivs.push(textDiv); }; + + this.setTextContent = function textLayerBuilderSetTextContent(textContent) { + // When calling this function, we assume rendering the textDivs has finished + }; }; document.addEventListener('DOMContentLoaded', function webViewerLoad(evt) { From 668c2867d473ff95e3f7124215e57ade489eec2d Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Tue, 11 Sep 2012 16:14:18 -0700 Subject: [PATCH 03/28] Change the format of textContent to be an array --- src/evaluator.js | 18 +++--------------- web/viewer.js | 2 +- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 1ac32f781..6afcccea8 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -507,10 +507,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) { if (!state) { - state = { - text: '', - mapping: [] - }; + state = []; } var self = this; @@ -528,9 +525,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var res = resources; var args = [], obj; - var text = state.text; var chunk = ''; - var commandOffset = state.mapping; var font = null; while (!isEOF(obj = parser.getObj())) { if (isCmd(obj)) { @@ -594,20 +589,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // Add some spacing between the text here and the text of the // xForm. - text = text + ' '; - state.text = text; state = this.getTextContent( xobj, xobj.dict.get('Resources') || resources, state ); - text = state.text; break; } // switch if (chunk !== '') { - commandOffset.push(text.length); - text += chunk; + state.push(chunk); chunk = ''; } @@ -618,10 +609,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } } - return { - text: text, - mapping: commandOffset - }; + return state; }, extractDataStructures: function diff --git a/web/viewer.js b/web/viewer.js index 8d7c51f24..12ca1aa35 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1043,7 +1043,7 @@ var PDFView = { function extractPageText(pageIndex) { self.pages[pageIndex].pdfPage.getTextContent().then( function textContentResolved(textContent) { - self.pageText[pageIndex] = textContent.text; + self.pageText[pageIndex] = textContent.join(''); self.search(); if ((pageIndex + 1) < self.pages.length) extractPageText(pageIndex + 1); From 3db4e7266e74385be011ca3084a4f8dea706c9a2 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Tue, 11 Sep 2012 16:26:29 -0700 Subject: [PATCH 04/28] Make the textLayer use extracted textContent --- web/viewer.js | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/web/viewer.js b/web/viewer.js index 12ca1aa35..b0bb0e475 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1847,6 +1847,8 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { this.textLayerQueue = []; }; + this.endLayout = function textLayerBuilderEndLayout() { }; + this.renderLayer = function textLayerBuilderRenderLayer() { var self = this; var textDivs = this.textDivs; @@ -1878,7 +1880,7 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textLayerDiv.appendChild(textLayerFrag); }; - this.endLayout = function textLayerBuilderEndLayout() { + this.setupRenderLayoutTimer = function textLayerSetupRenderLayoutTimer() { // Schedule renderLayout() if user has been scrolling, otherwise // run it right away var kRenderDelay = 200; // in ms @@ -1891,10 +1893,10 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { if (this.renderTimer) clearTimeout(this.renderTimer); this.renderTimer = setTimeout(function() { - self.endLayout(); + self.setupRenderLayoutTimer(); }, kRenderDelay); } - }; // endLayout + }; this.appendText = function textLayerBuilderAppendText(text, fontName, fontSize) { @@ -1917,6 +1919,14 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { this.setTextContent = function textLayerBuilderSetTextContent(textContent) { // When calling this function, we assume rendering the textDivs has finished + + var textDivs = this.textDivs; + + for (var i = 0; i < textContent.length; i++) { + textDivs[i].textContent = textContent[i]; + } + + this.setupRenderLayoutTimer(); }; }; From 83c499595c2720a90fe9409c0a3fd1e08c16745c Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Tue, 11 Sep 2012 16:42:24 -0700 Subject: [PATCH 05/28] Change the PDFJS.bidi function calls slightly to avoid creating a seperate object to pass to PDF.JS bidi and just pass in a string --- src/bidi.js | 22 ++++++++++++++-------- web/viewer.js | 11 +++++++---- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/bidi.js b/src/bidi.js index cf0a3a4cc..ed7b3f35c 100644 --- a/src/bidi.js +++ b/src/bidi.js @@ -138,11 +138,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() { } } - function bidi(text, startLevel) { - var str = text.str; + function bidiResult(content, direction) { + this.content = content; + this.direction = direction; + } + + function bidi(str, startLevel) { + var direction = ''; var strLength = str.length; if (strLength == 0) - return str; + return new bidiResult(str, direction); // get types, fill arrays @@ -176,16 +181,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() { // if less than 30% chars are rtl then string is primarily ltr // if more than 30% chars are rtl then string is primarily rtl if (numBidi == 0) { - text.direction = 'ltr'; - return str; + direction = 'ltr'; + return new bidiResult(str, direction); } if (startLevel == -1) { if ((strLength / numBidi) < 0.3) { - text.direction = 'ltr'; + direction = 'ltr'; startLevel = 0; } else { - text.direction = 'rtl'; + direction = 'rtl'; startLevel = 1; } } @@ -438,7 +443,8 @@ var bidi = PDFJS.bidi = (function bidiClosure() { if (ch != '<' && ch != '>') result += ch; } - return result; + + return new bidiResult(str, direction); } return bidi; diff --git a/web/viewer.js b/web/viewer.js index b0bb0e475..d87c804ae 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1911,9 +1911,8 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textDiv.style.left = text.geom.x + 'px'; textDiv.style.top = (text.geom.y - fontHeight) + 'px'; - // The `text.direction` is added inside the PDFJS.bidi function. - // textDiv.textContent = PDFJS.bidi(text, -1); - // textDiv.dir = text.direction; + // The content of the div is set in the `setTextContent` function. + this.textDivs.push(textDiv); }; @@ -1923,7 +1922,11 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { var textDivs = this.textDivs; for (var i = 0; i < textContent.length; i++) { - textDivs[i].textContent = textContent[i]; + var textDiv = textDivs[i]; + var bidiText = PDFJS.bidi(textContent[i], -1); + + textDiv.textContent = bidiText.content; + textDiv.dir = bidiText.direction; } this.setupRenderLayoutTimer(); From f0f16a6a4da1376575a26af4d6603cb76cf93ae5 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 12 Sep 2012 10:11:55 -0700 Subject: [PATCH 06/28] Do some logging of the outputted text --- web/viewer.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/web/viewer.js b/web/viewer.js index d87c804ae..653773521 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1912,8 +1912,13 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textDiv.style.top = (text.geom.y - fontHeight) + 'px'; // The content of the div is set in the `setTextContent` function. + // For debug reasons, do the bidi thing here to compare it later once the + // text from the getTextContent function comes in. + var bidiText = PDFJS.bidi(text.str, -1); + textDiv.textContent = bidiText.content; + textDiv.dir = bidiText.direction; - this.textDivs.push(textDiv); + var idx = this.textDivs.push(textDiv) - 1; }; this.setTextContent = function textLayerBuilderSetTextContent(textContent) { @@ -1925,8 +1930,11 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { var textDiv = textDivs[i]; var bidiText = PDFJS.bidi(textContent[i], -1); + console.log("divL #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], textDiv.textContent, textDiv.dir); + textDiv.textContent = bidiText.content; textDiv.dir = bidiText.direction; + console.log("divC #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], bidiText.content, bidiText.direction); } this.setupRenderLayoutTimer(); From 3a8426e1792ec3cfef2f93588c7cd50d04e4c8cd Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 12 Sep 2012 10:42:43 -0700 Subject: [PATCH 07/28] Fix typo in new return value of bidi function --- src/bidi.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bidi.js b/src/bidi.js index ed7b3f35c..0c6eea9c2 100644 --- a/src/bidi.js +++ b/src/bidi.js @@ -444,7 +444,7 @@ var bidi = PDFJS.bidi = (function bidiClosure() { result += ch; } - return new bidiResult(str, direction); + return new bidiResult(result, direction); } return bidi; From a5520216fc2da0af5ce3dd42c16428e1fd20adee Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 12 Sep 2012 10:56:10 -0700 Subject: [PATCH 08/28] Some comment stuff --- web/viewer.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/web/viewer.js b/web/viewer.js index 653773521..a8ded4263 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1926,15 +1926,17 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { var textDivs = this.textDivs; + console.log(textContent); + for (var i = 0; i < textContent.length; i++) { var textDiv = textDivs[i]; var bidiText = PDFJS.bidi(textContent[i], -1); - console.log("divL #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], textDiv.textContent, textDiv.dir); + // console.log("divL #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], textDiv.textContent, textDiv.dir); textDiv.textContent = bidiText.content; textDiv.dir = bidiText.direction; - console.log("divC #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], bidiText.content, bidiText.direction); + // console.log("divC #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], bidiText.content, bidiText.direction); } this.setupRenderLayoutTimer(); From 58ed7fc35cb15257a111db8b22e465529a69ba26 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 12 Sep 2012 11:05:34 -0700 Subject: [PATCH 09/28] Make the textLayer divs prevent whitespaces --- web/viewer.css | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/web/viewer.css b/web/viewer.css index c1e7aaf3d..0f615094a 100644 --- a/web/viewer.css +++ b/web/viewer.css @@ -121,7 +121,7 @@ html[dir='rtl'] .innerCenter { -o-transition-timing-function: ease; transition-duration: 200ms; transition-timing-function: ease; - + } html[dir='ltr'] #sidebarContainer { -webkit-transition-property: left; @@ -629,7 +629,7 @@ html[dir='rtl'] .toolbarButton:first-child { display: inline-block; content: url(images/toolbarButton-sidebarToggle.png); } - + html[dir='ltr'] .toolbarButton.pageUp::before { display: inline-block; content: url(images/toolbarButton-pageUp.png); @@ -639,7 +639,7 @@ html[dir='rtl'] .toolbarButton.pageUp::before { display: inline-block; content: url(images/toolbarButton-pageUp-rtl.png); } - + html[dir='ltr'] .toolbarButton.pageDown::before { display: inline-block; content: url(images/toolbarButton-pageDown.png); @@ -654,7 +654,7 @@ html[dir='rtl'] .toolbarButton.pageDown::before { display: inline-block; content: url(images/toolbarButton-zoomOut.png); } - + .toolbarButton.zoomIn::before { display: inline-block; content: url(images/toolbarButton-zoomIn.png); @@ -691,12 +691,12 @@ html[dir='rtl'] .toolbarButton.pageDown::before { .toolbarButton.bookmark::before { content: url(images/toolbarButton-bookmark.png); } - + #viewThumbnail.toolbarButton::before { display: inline-block; content: url(images/toolbarButton-viewThumbnail.png); } - + #viewOutline.toolbarButton::before { display: inline-block; content: url(images/toolbarButton-viewOutline.png); @@ -797,7 +797,7 @@ html[dir='rtl'] .toolbarButton.pageDown::before { padding: 7px; -moz-transition-duration: 150ms; } - + a:focus > .thumbnail > .thumbnailSelectionRing > .thumbnailImage, .thumbnail:hover > .thumbnailSelectionRing > .thumbnailImage { opacity: .9; @@ -1016,7 +1016,7 @@ canvas { background: -moz-linear-gradient(top, #b2b2b2 0%,#898989 100%); background: -ms-linear-gradient(top, #b2b2b2 0%,#898989 100%); background: -o-linear-gradient(top, #b2b2b2 0%,#898989 100%); - background: linear-gradient(top, #b2b2b2 0%,#898989 100%); + background: linear-gradient(top, #b2b2b2 0%,#898989 100%); border-top-left-radius: 2px; border-bottom-left-radius: 2px; @@ -1066,6 +1066,7 @@ canvas { color: transparent; position: absolute; line-height:1.3; + white-space:pre; } /* TODO: file FF bug to support ::-moz-selection:window-inactive @@ -1202,7 +1203,7 @@ canvas { @page { margin: 0; -} +} #printContainer { display: none; From d26969a85fc1cd43d7d623515605463afa19c25f Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 12 Sep 2012 15:26:47 -0700 Subject: [PATCH 10/28] Remove some debugging informations --- web/viewer.js | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/web/viewer.js b/web/viewer.js index a8ded4263..bcbed3cf6 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1912,31 +1912,20 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textDiv.style.top = (text.geom.y - fontHeight) + 'px'; // The content of the div is set in the `setTextContent` function. - // For debug reasons, do the bidi thing here to compare it later once the - // text from the getTextContent function comes in. - var bidiText = PDFJS.bidi(text.str, -1); - textDiv.textContent = bidiText.content; - textDiv.dir = bidiText.direction; - var idx = this.textDivs.push(textDiv) - 1; + this.textDivs.push(textDiv); }; this.setTextContent = function textLayerBuilderSetTextContent(textContent) { // When calling this function, we assume rendering the textDivs has finished - var textDivs = this.textDivs; - console.log(textContent); - for (var i = 0; i < textContent.length; i++) { var textDiv = textDivs[i]; var bidiText = PDFJS.bidi(textContent[i], -1); - // console.log("divL #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], textDiv.textContent, textDiv.dir); - textDiv.textContent = bidiText.content; textDiv.dir = bidiText.direction; - // console.log("divC #%d: text=%s, bidi=%s, dir=%s", i, textContent[i], bidiText.content, bidiText.direction); } this.setupRenderLayoutTimer(); From 9b9de87ac432e6bdcae8ce080bd56da44038999e Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 12 Sep 2012 15:34:09 -0700 Subject: [PATCH 11/28] Undo some changes for now until there is support for font.spacedWidth --- src/evaluator.js | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 6afcccea8..57dbb2138 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -541,12 +541,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (typeof items[j] === 'string') { chunk += fontCharsToUnicode(items[j], font); } else if (items[j] < 0) { + // making all negative offsets a space - better to have + // a space in incorrect place than not have them at all chunk += ' '; - } else if (items[j] < 0 && font.spacedWidth > 0) { - var numFakeSpaces = Math.round(-e / font.spacedWidth); - if (numFakeSpaces > 0) { - chunk += ' '; - } + // This is a better way to detect spacing in the future. + // However, for now let's keep it simple (also, font.spacedWidth) + // is not available. + // } else if (items[j] < 0 && font.spacedWidth > 0) { + // var numFakeSpaces = Math.round(-e / font.spacedWidth); + // if (numFakeSpaces > 0) { + // chunk += ' '; + // } } } break; From ee0c2e2ab75cb0621baae8c0b4f6a91d9a34e8b8 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 12 Sep 2012 15:37:29 -0700 Subject: [PATCH 12/28] Make the search button hidden again --- web/viewer.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/viewer.html b/web/viewer.html index 813484c6c..5a2f4f28c 100644 --- a/web/viewer.html +++ b/web/viewer.html @@ -88,7 +88,7 @@ limitations under the License. - From 7432e596a198306b56d6117054a3874e76522abf Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 10:53:06 -0700 Subject: [PATCH 13/28] Improve the space detection for the getTextContent function --- src/evaluator.js | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 57dbb2138..1fa732f03 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -533,36 +533,29 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { switch (cmd) { // TODO: Add support for SAVE/RESTORE and XFORM here. case 'Tf': - font = handleSetFont(args[0].name).translated; + font = handleSetFont(args[0].name); break; case 'TJ': var items = args[0]; for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { - chunk += fontCharsToUnicode(items[j], font); - } else if (items[j] < 0) { - // making all negative offsets a space - better to have - // a space in incorrect place than not have them at all - chunk += ' '; - // This is a better way to detect spacing in the future. - // However, for now let's keep it simple (also, font.spacedWidth) - // is not available. - // } else if (items[j] < 0 && font.spacedWidth > 0) { - // var numFakeSpaces = Math.round(-e / font.spacedWidth); - // if (numFakeSpaces > 0) { - // chunk += ' '; - // } + chunk += fontCharsToUnicode(items[j], font.translated); + } else if (items[j] < 0 && font.spacedWidth > 0) { + var numFakeSpaces = Math.round(-e / font.spacedWidth); + if (numFakeSpaces > 0) { + chunk += ' '; + } } } break; case 'Tj': - chunk += fontCharsToUnicode(args[0], font); + chunk += fontCharsToUnicode(args[0], font.translated); break; case "'": - chunk += fontCharsToUnicode(args[0], font) + ' '; + chunk += fontCharsToUnicode(args[0], font.translated) + ' '; break; case '"': - chunk += fontCharsToUnicode(args[2], font) + ' '; + chunk += fontCharsToUnicode(args[2], font.translated) + ' '; break; case 'Do': // Set the chunk such that the following if won't add something From 32d14f457580bdb1c8647099e0a486fee75ed115 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 11:24:49 -0700 Subject: [PATCH 14/28] Ensure textLayers content is always fetched and set on the textLayer --- web/viewer.js | 54 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/web/viewer.js b/web/viewer.js index bcbed3cf6..09c5c4fe7 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1453,6 +1453,13 @@ var PageView = function pageView(container, pdfPage, id, scale, }, 0); }; + this.getTextContent = function pageviewGetTextContent() { + if (!this.textContent) { + this.textContent = this.pdfPage.getTextContent(); + } + return this.textContent; + }; + this.draw = function pageviewDraw(callback) { if (this.renderingState !== RenderingStates.INITIAL) error('Must be in new state before drawing'); @@ -1487,22 +1494,6 @@ var PageView = function pageView(container, pdfPage, id, scale, var self = this; function pageViewDrawCallback(error) { - var visiblePages = PDFView.getVisiblePages(); - var pageView = PDFView.getHighestPriority(visiblePages, PDFView.pages, - PDFView.pageViewScroll.down); - - if (pageView === self) { - if (!self.textContent) { - self.textContent = {}; - self.pdfPage.getTextContent().then( - function textContentResolved(textContent) { - self.textContent = textContent; - textLayer.setTextContent(textContent); - } - ); - } - } - self.renderingState = RenderingStates.FINISHED; if (self.loadingIconDiv) { @@ -1549,6 +1540,14 @@ var PageView = function pageView(container, pdfPage, id, scale, } ); + if (textLayer) { + this.getTextContent().then( + function textContentResolved(textContent) { + textLayer.setTextContent(textContent); + } + ); + } + setupAnnotations(this.pdfPage, this.viewport); div.setAttribute('data-loaded', true); }; @@ -1841,13 +1840,18 @@ var CustomStyle = (function CustomStyleClosure() { var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { var textLayerFrag = document.createDocumentFragment(); this.textLayerDiv = textLayerDiv; + this.layoutDone = false; + this.divContentDone = false; this.beginLayout = function textLayerBuilderBeginLayout() { this.textDivs = []; this.textLayerQueue = []; }; - this.endLayout = function textLayerBuilderEndLayout() { }; + this.endLayout = function textLayerBuilderEndLayout() { + this.layoutDone = true; + this.insertDivContent(); + }, this.renderLayer = function textLayerBuilderRenderLayer() { var self = this; @@ -1916,9 +1920,16 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { this.textDivs.push(textDiv); }; - this.setTextContent = function textLayerBuilderSetTextContent(textContent) { - // When calling this function, we assume rendering the textDivs has finished + this.insertDivContent = function textLayerUpdateTextContent() { + // Only set the content of the divs once layout has finished, the content + // for the divs is available and content is not yet set on the divs. + if (!this.layoutDone || this.divContentDone || !this.textContent) + return; + + this.divContentDone = true; + var textDivs = this.textDivs; + var textContent = this.textContent; for (var i = 0; i < textContent.length; i++) { var textDiv = textDivs[i]; @@ -1930,6 +1941,11 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { this.setupRenderLayoutTimer(); }; + + this.setTextContent = function textLayerBuilderSetTextContent(textContent) { + this.textContent = textContent; + this.insertDivContent(); + }; }; document.addEventListener('DOMContentLoaded', function webViewerLoad(evt) { From 897b99500e0abfa055ea8077acfd8cd494b62ab0 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 11:35:21 -0700 Subject: [PATCH 15/28] Fix typos that prevent space insertion for showSpaced in getTextContent --- src/evaluator.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 1fa732f03..dd36aa5e4 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -533,15 +533,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { switch (cmd) { // TODO: Add support for SAVE/RESTORE and XFORM here. case 'Tf': - font = handleSetFont(args[0].name); + font = handleSetFont(args[0].name).translated; break; case 'TJ': var items = args[0]; for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { - chunk += fontCharsToUnicode(items[j], font.translated); - } else if (items[j] < 0 && font.spacedWidth > 0) { - var numFakeSpaces = Math.round(-e / font.spacedWidth); + chunk += fontCharsToUnicode(items[j], font); + } else if (items[j] < 0 && font.spaceWidth > 0) { + var numFakeSpaces = Math.round(-items[j] / font.spaceWidth); if (numFakeSpaces > 0) { chunk += ' '; } @@ -549,13 +549,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } break; case 'Tj': - chunk += fontCharsToUnicode(args[0], font.translated); + chunk += fontCharsToUnicode(args[0], font); break; case "'": - chunk += fontCharsToUnicode(args[0], font.translated) + ' '; + chunk += fontCharsToUnicode(args[0], font) + ' '; break; case '"': - chunk += fontCharsToUnicode(args[2], font.translated) + ' '; + chunk += fontCharsToUnicode(args[2], font) + ' '; break; case 'Do': // Set the chunk such that the following if won't add something From 8d6565d1a8cc6c0d797db4363e831677a6caecbb Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 15:03:27 -0700 Subject: [PATCH 16/28] Change the format of the BidiResult object. --- src/bidi.js | 20 ++++++++++---------- web/viewer.js | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/bidi.js b/src/bidi.js index 0c6eea9c2..5eb66bfd9 100644 --- a/src/bidi.js +++ b/src/bidi.js @@ -138,16 +138,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() { } } - function bidiResult(content, direction) { - this.content = content; - this.direction = direction; + function BidiResult(str, isLTR) { + this.str = str; + this.ltr = isLTR; } function bidi(str, startLevel) { - var direction = ''; + var isLTR = true; var strLength = str.length; if (strLength == 0) - return new bidiResult(str, direction); + return new BidiResult(str, ltr); // get types, fill arrays @@ -181,16 +181,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() { // if less than 30% chars are rtl then string is primarily ltr // if more than 30% chars are rtl then string is primarily rtl if (numBidi == 0) { - direction = 'ltr'; - return new bidiResult(str, direction); + isLTR = true; + return new BidiResult(str, isLTR); } if (startLevel == -1) { if ((strLength / numBidi) < 0.3) { - direction = 'ltr'; + isLTR = true; startLevel = 0; } else { - direction = 'rtl'; + isLTR = false; startLevel = 1; } } @@ -444,7 +444,7 @@ var bidi = PDFJS.bidi = (function bidiClosure() { result += ch; } - return new bidiResult(result, direction); + return new BidiResult(result, isLTR); } return bidi; diff --git a/web/viewer.js b/web/viewer.js index 09c5c4fe7..2764a3648 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1935,8 +1935,8 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { var textDiv = textDivs[i]; var bidiText = PDFJS.bidi(textContent[i], -1); - textDiv.textContent = bidiText.content; - textDiv.dir = bidiText.direction; + textDiv.textContent = bidiText.str; + textDiv.dir = bidiText.ltr ? 'ltr' : 'rtl'; } this.setupRenderLayoutTimer(); From bd4434a7ea7be878360997849a2079e0ca39bec7 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 19:34:54 -0700 Subject: [PATCH 17/28] Apply bidi algorithm to the text in the worker --- src/evaluator.js | 15 ++++++++++----- web/viewer.js | 9 +++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index dd36aa5e4..f4fdd812e 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -507,7 +507,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) { if (!state) { - state = []; + var text = []; + var dirs = []; + state = { + text: text, + dirs: dirs + }; } var self = this; @@ -585,9 +590,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if ('Form' !== type.name) break; - // Add some spacing between the text here and the text of the - // xForm. - state = this.getTextContent( xobj, xobj.dict.get('Resources') || resources, @@ -596,7 +598,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { break; } // switch if (chunk !== '') { - state.push(chunk); + var bidiText = PDFJS.bidi(chunk, -1); + text.push(bidiText.str); + dirs.push(bidiText.ltr); + chunk = ''; } diff --git a/web/viewer.js b/web/viewer.js index 2764a3648..060f2eae4 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1930,13 +1930,14 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { var textDivs = this.textDivs; var textContent = this.textContent; + var text = textContent.text; + var dirs = textContent.dirs; - for (var i = 0; i < textContent.length; i++) { + for (var i = 0; i < text.length; i++) { var textDiv = textDivs[i]; - var bidiText = PDFJS.bidi(textContent[i], -1); - textDiv.textContent = bidiText.str; - textDiv.dir = bidiText.ltr ? 'ltr' : 'rtl'; + textDiv.textContent = text[i]; + textDiv.dir = dirs[i] ? 'ltr' : 'rtl'; } this.setupRenderLayoutTimer(); From e22fc80d3e42dc71a2137ba1a34e8d8ab01da7cf Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 19:52:37 -0700 Subject: [PATCH 18/28] Add handling for gs in getTextContent --- src/evaluator.js | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/evaluator.js b/src/evaluator.js index f4fdd812e..ee8451d7a 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -596,7 +596,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { state ); break; + case 'gs': + var dictName = args[0]; + var extGState = resources.get('ExtGState'); + + if (!isDict(extGState) || !extGState.has(dictName.name)) + break; + + var gsState = extGState.get(dictName.name); + + for (var i = 0; i < gsState.length; i++) { + if (gsState[i] === 'Font') { + font = handleSetFont(args[0].name).translated; + } + } + break; } // switch + if (chunk !== '') { var bidiText = PDFJS.bidi(chunk, -1); text.push(bidiText.str); @@ -610,7 +626,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { assertWellFormed(args.length <= 33, 'Too many arguments'); args.push(obj); } - } + } // while return state; }, From 6bc2a0b84af44399f8c6ee7ac8817cefc3707483 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 20:04:45 -0700 Subject: [PATCH 19/28] Remove spaces that don't make sense when using the text in divs --- src/evaluator.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index ee8451d7a..912e48edd 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -557,10 +557,14 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { chunk += fontCharsToUnicode(args[0], font); break; case "'": - chunk += fontCharsToUnicode(args[0], font) + ' '; + // For search, adding a extra white space for line breaks would be + // better here, but that causes too much spaces in the + // text-selection divs. + chunk += fontCharsToUnicode(args[0], font); break; case '"': - chunk += fontCharsToUnicode(args[2], font) + ' '; + // Note comment in "'" + chunk += fontCharsToUnicode(args[2], font); break; case 'Do': // Set the chunk such that the following if won't add something From a525492f65951025233b04c0da5f7b1d8967e288 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Fri, 14 Sep 2012 20:07:46 -0700 Subject: [PATCH 20/28] Make lint happy --- src/evaluator.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/evaluator.js b/src/evaluator.js index 912e48edd..0b3c941f0 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -505,7 +505,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return queue; }, - getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) { + getTextContent: function partialEvaluatorGetIRQueue( + stream, resources, state) { if (!state) { var text = []; var dirs = []; From fdb6a013c9878c3a086ef30141990d0adc3e4b31 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Sun, 16 Sep 2012 07:45:07 -0700 Subject: [PATCH 21/28] Fix unitialized text/dirs array --- src/evaluator.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 0b3c941f0..2bf261913 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -507,13 +507,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { getTextContent: function partialEvaluatorGetIRQueue( stream, resources, state) { + var text; + var dirs; + if (!state) { - var text = []; - var dirs = []; + text = []; + dirs = []; state = { text: text, dirs: dirs }; + } else { + text = state.text; + dirs = state.dirs; } var self = this; From 36d358fff862afe2f1f3d016218617817196f338 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Sun, 16 Sep 2012 07:46:13 -0700 Subject: [PATCH 22/28] Don't use shadow() for spaceWidth as otherwise sendering the font over to the mainthread causes issues if the worker used the property and the main thread tries to assign the spaceWidth to the getter function --- src/fonts.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/fonts.js b/src/fonts.js index e344bbf4b..0fee585c4 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -3181,6 +3181,10 @@ var Font = (function FontClosure() { }, get spaceWidth() { + if (this._shadowWidth !== undefined) { + return this._shadowWidth; + } + // trying to estimate space character width var possibleSpaceReplacements = ['space', 'minus', 'one', 'i']; var width; @@ -3208,7 +3212,8 @@ var Font = (function FontClosure() { break; // the non-zero width found } width = (width || this.defaultWidth) * this.widthMultiplier; - return shadow(this, 'spaceWidth', width); + this._shadowWidth = width; + return width; }, charToGlyph: function Font_charToGlyph(charcode) { From e7894ca07d107e03e473a76c7b8506b3a0f4620c Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Sun, 16 Sep 2012 07:46:29 -0700 Subject: [PATCH 23/28] Fix the driver.js file to handle new TextLayer infrastructure --- test/driver.js | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/test/driver.js b/test/driver.js index 998527807..1b690811b 100644 --- a/test/driver.js +++ b/test/driver.js @@ -159,6 +159,7 @@ NullTextLayerBuilder.prototype = { function SimpleTextLayerBuilder(ctx, viewport) { this.ctx = ctx; this.viewport = viewport; + this.textCounter = 0; } SimpleTextLayerBuilder.prototype = { beginLayout: function SimpleTextLayerBuilder_BeginLayout() { @@ -180,14 +181,18 @@ SimpleTextLayerBuilder.prototype = { ctx.stroke(); ctx.fill(); - var textContent = bidi(text, -1); + var textContent = this.textContent.text[this.textCounter]; ctx.font = fontHeight + 'px ' + fontName; ctx.fillStyle = 'black'; ctx.fillText(textContent, text.geom.x, text.geom.y); + + this.textCounter ++; + }, + setTextContent: function SimpleTextLayerBuilder_SetTextContent(textContent) { + this.textContent = textContent; } }; - function nextPage(task, loadError) { var failure = loadError || ''; @@ -245,6 +250,10 @@ function nextPage(task, loadError) { drawContext = dummyCanvas.getContext('2d'); // ... text builder will draw its content on the test canvas textLayerBuilder = new SimpleTextLayerBuilder(ctx, viewport); + + page.getTextContent().then(function(textContent) { + textLayerBuilder.setTextContent(textContent); + }); } else { drawContext = ctx; textLayerBuilder = new NullTextLayerBuilder(); From 3d0e6da2ffd6423a73b984031f17d056175b4eb1 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Sun, 16 Sep 2012 09:18:46 -0700 Subject: [PATCH 24/28] Move some more font conversion into loadFont function to make type3 fonts work for now --- src/evaluator.js | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 2bf261913..39c92a59f 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -164,6 +164,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { translated = { error: e }; } font.translated = translated; + + var data = translated; + if (data.loadCharProcs) { + delete data.loadCharProcs; + + var charProcs = font.get('CharProcs').getAll(); + var fontResources = font.get('Resources') || resources; + var charProcOperatorList = {}; + for (var key in charProcs) { + var glyphStream = charProcs[key]; + charProcOperatorList[key] = + this.getOperatorList(glyphStream, fontResources, dependency); + } + data.charProcOperatorList = charProcOperatorList; + } } return font; }, @@ -195,19 +210,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var loadedName = font.loadedName; if (!font.sent) { var data = font.translated; - if (data.loadCharProcs) { - delete data.loadCharProcs; - - var charProcs = font.get('CharProcs').getAll(); - var fontResources = font.get('Resources') || resources; - var charProcOperatorList = {}; - for (var key in charProcs) { - var glyphStream = charProcs[key]; - charProcOperatorList[key] = - self.getOperatorList(glyphStream, fontResources, dependency); - } - data.charProcOperatorList = charProcOperatorList; - } if (data instanceof Font) data = data.export(); From 049b19264c490e7d34c10e4666cf197a7432ce7b Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Mon, 17 Sep 2012 17:19:27 +0200 Subject: [PATCH 25/28] Make lint happy again --- test/driver.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/driver.js b/test/driver.js index 1b690811b..a493edb00 100644 --- a/test/driver.js +++ b/test/driver.js @@ -186,7 +186,7 @@ SimpleTextLayerBuilder.prototype = { ctx.fillStyle = 'black'; ctx.fillText(textContent, text.geom.x, text.geom.y); - this.textCounter ++; + this.textCounter++; }, setTextContent: function SimpleTextLayerBuilder_SetTextContent(textContent) { this.textContent = textContent; From a33ba145bf168264fe1ccd9e56d4d34e90c59a43 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Wed, 19 Sep 2012 22:17:01 +0200 Subject: [PATCH 26/28] Don't compute the string for the TextLayer in the canvas backend anymore and change the syntax of appendText --- src/canvas.js | 58 +++++++++++++++++++++------------------------------ web/viewer.js | 13 ++++++------ 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/src/canvas.js b/src/canvas.js index 5f05604b9..8ee20a0ae 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -677,9 +677,10 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var textHScale2 = textHScale * fontMatrix[0]; var glyphsLength = glyphs.length; var textLayer = this.textLayer; - var text = {str: '', length: 0, canvasWidth: 0, geom: {}}; + var geom; var textSelection = textLayer && !skipTextSelection ? true : false; var textRenderingMode = current.textRenderingMode; + var canvasWidth = 0.0; // Type3 fonts - each glyph is a "mini-PDF" if (font.coded) { @@ -692,7 +693,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { if (textSelection) { this.save(); ctx.scale(1, -1); - text.geom = this.getTextGeometry(); + geom = this.getTextGeometry(); this.restore(); } for (var i = 0; i < glyphsLength; ++i) { @@ -718,9 +719,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.translate(width, 0); current.x += width * textHScale; - text.str += glyph.unicode; - text.length++; - text.canvasWidth += width; + canvasWidth += width; } ctx.restore(); } else { @@ -735,7 +734,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { lineWidth /= scale; if (textSelection) - text.geom = this.getTextGeometry(); + geom = this.getTextGeometry(); if (fontSizeScale != 1.0) { ctx.scale(fontSizeScale, fontSizeScale); @@ -784,17 +783,19 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var glyphUnicode = glyph.unicode === ' ' ? '\u00A0' : glyph.unicode; if (glyphUnicode in NormalizedUnicodes) glyphUnicode = NormalizedUnicodes[glyphUnicode]; - text.str += reverseIfRtl(glyphUnicode); - text.canvasWidth += charWidth; + + canvasWidth += charWidth; } current.x += x * textHScale2; ctx.restore(); } - if (textSelection) - this.textLayer.appendText(text, font.fallbackName, fontSize); + if (textSelection) { + geom.canvasWidth = canvasWidth; + this.textLayer.appendText(font.fallbackName, fontSize, geom); + }` - return text; + return canvasWidth; }, showSpacedText: function CanvasGraphics_showSpacedText(arr) { var ctx = this.ctx; @@ -806,7 +807,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { textHScale *= (current.fontMatrix || IDENTITY_MATRIX)[0]; var arrLength = arr.length; var textLayer = this.textLayer; - var text = {str: '', length: 0, canvasWidth: 0, geom: {}}; + var geom; + var canvasWidth = 0.0; var textSelection = textLayer ? true : false; if (textSelection) { @@ -819,7 +821,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.scale(textHScale, 1); } else this.applyTextTransforms(); - text.geom = this.getTextGeometry(); + geom = this.getTextGeometry(); ctx.restore(); } @@ -829,34 +831,22 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var spacingLength = -e * 0.001 * fontSize * textHScale; current.x += spacingLength; - if (textSelection) { - // Emulate precise spacing via HTML spaces - text.canvasWidth += spacingLength; - if (e < 0 && text.geom.spaceWidth > 0) { // avoid div by zero - var numFakeSpaces = Math.round(-e / text.geom.spaceWidth); - if (numFakeSpaces > 0) { - text.str += '\u00A0'; - } - } - } + if (textSelection) + canvasWidth += spacingLength; } else if (isString(e)) { - var shownText = this.showText(e, true); + var shownCanvasWidth = this.showText(e, true); - if (textSelection) { - if (shownText.str === ' ') { - text.str += '\u00A0'; - } else { - text.str += shownText.str; - } - text.canvasWidth += shownText.canvasWidth; - } + if (textSelection) + canvasWidth += shownCanvasWidth; } else { error('TJ array element ' + e + ' is not string or num'); } } - if (textSelection) - this.textLayer.appendText(text, font.fallbackName, fontSize); + if (textSelection) { + geom.canvasWidth = canvasWidth; + this.textLayer.appendText(font.fallbackName, fontSize, geom); + } }, nextLineShowText: function CanvasGraphics_nextLineShowText(text) { this.nextLine(); diff --git a/web/viewer.js b/web/viewer.js index 060f2eae4..23155e14f 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1902,18 +1902,19 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { } }; - this.appendText = function textLayerBuilderAppendText(text, - fontName, fontSize) { + this.appendText = function textLayerBuilderAppendText(fontName, fontSize, + geom) { var textDiv = document.createElement('div'); // vScale and hScale already contain the scaling to pixel units - var fontHeight = fontSize * text.geom.vScale; - textDiv.dataset.canvasWidth = text.canvasWidth * text.geom.hScale; + var fontHeight = fontSize * geom.vScale; + textDiv.dataset.canvasWidth = geom.canvasWidth * geom.hScale; + textDiv.dataset.fontName = fontName; textDiv.style.fontSize = fontHeight + 'px'; textDiv.style.fontFamily = fontName; - textDiv.style.left = text.geom.x + 'px'; - textDiv.style.top = (text.geom.y - fontHeight) + 'px'; + textDiv.style.left = geom.x + 'px'; + textDiv.style.top = (geom.y - fontHeight) + 'px'; // The content of the div is set in the `setTextContent` function. From e48530d3913132ed0d369cf50639820060640201 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Thu, 20 Sep 2012 21:48:18 +0200 Subject: [PATCH 27/28] Fix AppendText after API changes --- src/canvas.js | 2 +- test/driver.js | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/canvas.js b/src/canvas.js index 8ee20a0ae..9ccc7317e 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -793,7 +793,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { if (textSelection) { geom.canvasWidth = canvasWidth; this.textLayer.appendText(font.fallbackName, fontSize, geom); - }` + } return canvasWidth; }, diff --git a/test/driver.js b/test/driver.js index a493edb00..d10cda2a0 100644 --- a/test/driver.js +++ b/test/driver.js @@ -168,23 +168,23 @@ SimpleTextLayerBuilder.prototype = { endLayout: function SimpleTextLayerBuilder_EndLayout() { this.ctx.restore(); }, - appendText: function SimpleTextLayerBuilder_AppendText(text, fontName, - fontSize) { + appendText: function SimpleTextLayerBuilder_AppendText(fontName, fontSize, + geom) { var ctx = this.ctx, viewport = this.viewport; // vScale and hScale already contain the scaling to pixel units - var fontHeight = fontSize * text.geom.vScale; + var fontHeight = fontSize * geom.vScale; ctx.beginPath(); ctx.strokeStyle = 'red'; ctx.fillStyle = 'yellow'; - ctx.rect(text.geom.x, text.geom.y - fontHeight, - text.canvasWidth * text.geom.hScale, fontHeight); + ctx.rect(geom.x, geom.y - fontHeight, + geom.canvasWidth * geom.hScale, fontHeight); ctx.stroke(); ctx.fill(); var textContent = this.textContent.text[this.textCounter]; ctx.font = fontHeight + 'px ' + fontName; ctx.fillStyle = 'black'; - ctx.fillText(textContent, text.geom.x, text.geom.y); + ctx.fillText(textContent, geom.x, geom.y); this.textCounter++; }, From f1e0edbaa9ff852428e8b9f20f52ea92cb6b0bc3 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Sat, 22 Sep 2012 11:18:26 +0200 Subject: [PATCH 28/28] Address Yurys review comments --- src/evaluator.js | 16 +++++----------- src/fonts.js | 4 +++- test/driver.js | 2 +- web/viewer.js | 11 +++++------ 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 39c92a59f..2a04c7bc0 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -509,19 +509,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { getTextContent: function partialEvaluatorGetIRQueue( stream, resources, state) { - var text; - var dirs; + var bidiTexts; if (!state) { - text = []; - dirs = []; + bidiTexts = []; state = { - text: text, - dirs: dirs + bidiTexts: bidiTexts }; } else { - text = state.text; - dirs = state.dirs; + bidiTexts = state.bidiTexts; } var self = this; @@ -627,9 +623,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } // switch if (chunk !== '') { - var bidiText = PDFJS.bidi(chunk, -1); - text.push(bidiText.str); - dirs.push(bidiText.ltr); + bidiTexts.push(PDFJS.bidi(chunk, -1)); chunk = ''; } diff --git a/src/fonts.js b/src/fonts.js index 0fee585c4..0c74de0f9 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -3181,7 +3181,7 @@ var Font = (function FontClosure() { }, get spaceWidth() { - if (this._shadowWidth !== undefined) { + if ('_shadowWidth' in this) { return this._shadowWidth; } @@ -3212,6 +3212,8 @@ var Font = (function FontClosure() { break; // the non-zero width found } width = (width || this.defaultWidth) * this.widthMultiplier; + // Do not shadow the property here. See discussion: + // https://github.com/mozilla/pdf.js/pull/2127#discussion_r1662280 this._shadowWidth = width; return width; }, diff --git a/test/driver.js b/test/driver.js index d10cda2a0..0997c7485 100644 --- a/test/driver.js +++ b/test/driver.js @@ -181,7 +181,7 @@ SimpleTextLayerBuilder.prototype = { ctx.stroke(); ctx.fill(); - var textContent = this.textContent.text[this.textCounter]; + var textContent = this.textContent.bidiTexts[this.textCounter].str; ctx.font = fontHeight + 'px ' + fontName; ctx.fillStyle = 'black'; ctx.fillText(textContent, geom.x, geom.y); diff --git a/web/viewer.js b/web/viewer.js index 23155e14f..6ba49db4b 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1930,15 +1930,14 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { this.divContentDone = true; var textDivs = this.textDivs; - var textContent = this.textContent; - var text = textContent.text; - var dirs = textContent.dirs; + var bidiTexts = this.textContent.bidiTexts; - for (var i = 0; i < text.length; i++) { + for (var i = 0; i < bidiTexts.length; i++) { + var bidiText = bidiTexts[i]; var textDiv = textDivs[i]; - textDiv.textContent = text[i]; - textDiv.dir = dirs[i] ? 'ltr' : 'rtl'; + textDiv.textContent = bidiText.str; + textDiv.dir = bidiText.ltr ? 'ltr' : 'rtl'; } this.setupRenderLayoutTimer();