diff --git a/src/bidi.js b/src/bidi.js index cf0a3a4cc..5eb66bfd9 100644 --- a/src/bidi.js +++ b/src/bidi.js @@ -138,11 +138,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() { } } - function bidi(text, startLevel) { - var str = text.str; + function BidiResult(str, isLTR) { + this.str = str; + this.ltr = isLTR; + } + + function bidi(str, startLevel) { + var isLTR = true; var strLength = str.length; if (strLength == 0) - return str; + return new BidiResult(str, ltr); // get types, fill arrays @@ -176,16 +181,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() { // if less than 30% chars are rtl then string is primarily ltr // if more than 30% chars are rtl then string is primarily rtl if (numBidi == 0) { - text.direction = 'ltr'; - return str; + isLTR = true; + return new BidiResult(str, isLTR); } if (startLevel == -1) { if ((strLength / numBidi) < 0.3) { - text.direction = 'ltr'; + isLTR = true; startLevel = 0; } else { - text.direction = 'rtl'; + isLTR = false; startLevel = 1; } } @@ -438,7 +443,8 @@ var bidi = PDFJS.bidi = (function bidiClosure() { if (ch != '<' && ch != '>') result += ch; } - return result; + + return new BidiResult(result, isLTR); } return bidi; diff --git a/src/canvas.js b/src/canvas.js index 5f05604b9..9ccc7317e 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -677,9 +677,10 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var textHScale2 = textHScale * fontMatrix[0]; var glyphsLength = glyphs.length; var textLayer = this.textLayer; - var text = {str: '', length: 0, canvasWidth: 0, geom: {}}; + var geom; var textSelection = textLayer && !skipTextSelection ? true : false; var textRenderingMode = current.textRenderingMode; + var canvasWidth = 0.0; // Type3 fonts - each glyph is a "mini-PDF" if (font.coded) { @@ -692,7 +693,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { if (textSelection) { this.save(); ctx.scale(1, -1); - text.geom = this.getTextGeometry(); + geom = this.getTextGeometry(); this.restore(); } for (var i = 0; i < glyphsLength; ++i) { @@ -718,9 +719,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.translate(width, 0); current.x += width * textHScale; - text.str += glyph.unicode; - text.length++; - text.canvasWidth += width; + canvasWidth += width; } ctx.restore(); } else { @@ -735,7 +734,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { lineWidth /= scale; if (textSelection) - text.geom = this.getTextGeometry(); + geom = this.getTextGeometry(); if (fontSizeScale != 1.0) { ctx.scale(fontSizeScale, fontSizeScale); @@ -784,17 +783,19 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var glyphUnicode = glyph.unicode === ' ' ? '\u00A0' : glyph.unicode; if (glyphUnicode in NormalizedUnicodes) glyphUnicode = NormalizedUnicodes[glyphUnicode]; - text.str += reverseIfRtl(glyphUnicode); - text.canvasWidth += charWidth; + + canvasWidth += charWidth; } current.x += x * textHScale2; ctx.restore(); } - if (textSelection) - this.textLayer.appendText(text, font.fallbackName, fontSize); + if (textSelection) { + geom.canvasWidth = canvasWidth; + this.textLayer.appendText(font.fallbackName, fontSize, geom); + } - return text; + return canvasWidth; }, showSpacedText: function CanvasGraphics_showSpacedText(arr) { var ctx = this.ctx; @@ -806,7 +807,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { textHScale *= (current.fontMatrix || IDENTITY_MATRIX)[0]; var arrLength = arr.length; var textLayer = this.textLayer; - var text = {str: '', length: 0, canvasWidth: 0, geom: {}}; + var geom; + var canvasWidth = 0.0; var textSelection = textLayer ? true : false; if (textSelection) { @@ -819,7 +821,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.scale(textHScale, 1); } else this.applyTextTransforms(); - text.geom = this.getTextGeometry(); + geom = this.getTextGeometry(); ctx.restore(); } @@ -829,34 +831,22 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var spacingLength = -e * 0.001 * fontSize * textHScale; current.x += spacingLength; - if (textSelection) { - // Emulate precise spacing via HTML spaces - text.canvasWidth += spacingLength; - if (e < 0 && text.geom.spaceWidth > 0) { // avoid div by zero - var numFakeSpaces = Math.round(-e / text.geom.spaceWidth); - if (numFakeSpaces > 0) { - text.str += '\u00A0'; - } - } - } + if (textSelection) + canvasWidth += spacingLength; } else if (isString(e)) { - var shownText = this.showText(e, true); + var shownCanvasWidth = this.showText(e, true); - if (textSelection) { - if (shownText.str === ' ') { - text.str += '\u00A0'; - } else { - text.str += shownText.str; - } - text.canvasWidth += shownText.canvasWidth; - } + if (textSelection) + canvasWidth += shownCanvasWidth; } else { error('TJ array element ' + e + ' is not string or num'); } } - if (textSelection) - this.textLayer.appendText(text, font.fallbackName, fontSize); + if (textSelection) { + geom.canvasWidth = canvasWidth; + this.textLayer.appendText(font.fallbackName, fontSize, geom); + } }, nextLineShowText: function CanvasGraphics_nextLineShowText(text) { this.nextLine(); diff --git a/src/evaluator.js b/src/evaluator.js index 3228043de..0b9bdf42d 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -164,6 +164,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { translated = { error: e }; } font.translated = translated; + + var data = translated; + if (data.loadCharProcs) { + delete data.loadCharProcs; + + var charProcs = font.get('CharProcs').getAll(); + var fontResources = font.get('Resources') || resources; + var charProcOperatorList = {}; + for (var key in charProcs) { + var glyphStream = charProcs[key]; + charProcOperatorList[key] = + this.getOperatorList(glyphStream, fontResources, dependency); + } + data.charProcOperatorList = charProcOperatorList; + } } return font; }, @@ -195,19 +210,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var loadedName = font.loadedName; if (!font.sent) { var data = font.translated; - if (data.loadCharProcs) { - delete data.loadCharProcs; - - var charProcs = font.get('CharProcs').getAll(); - var fontResources = font.get('Resources') || resources; - var charProcOperatorList = {}; - for (var key in charProcs) { - var glyphStream = charProcs[key]; - charProcOperatorList[key] = - self.getOperatorList(glyphStream, fontResources, dependency); - } - data.charProcOperatorList = charProcOperatorList; - } if (data instanceof Font) data = data.exportData(); @@ -505,7 +507,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return queue; }, - getTextContent: function partialEvaluatorGetIRQueue(stream, resources) { + getTextContent: function partialEvaluatorGetIRQueue( + stream, resources, state) { + var bidiTexts; + + if (!state) { + bidiTexts = []; + state = { + bidiTexts: bidiTexts + }; + } else { + bidiTexts = state.bidiTexts; + } var self = this; var xref = this.xref; @@ -515,18 +528,20 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } resources = xref.fetchIfRef(resources) || new Dict(); + // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. + var xobjs = null; var parser = new Parser(new Lexer(stream), false); var res = resources; var args = [], obj; - var text = ''; var chunk = ''; var font = null; while (!isEOF(obj = parser.getObj())) { if (isCmd(obj)) { var cmd = obj.cmd; switch (cmd) { + // TODO: Add support for SAVE/RESTORE and XFORM here. case 'Tf': font = handleSetFont(args[0].name).translated; break; @@ -535,10 +550,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { chunk += fontCharsToUnicode(items[j], font); - } else if (items[j] < 0) { - // making all negative offsets a space - better to have - // a space in incorrect place than not have them at all - chunk += ' '; + } else if (items[j] < 0 && font.spaceWidth > 0) { + var numFakeSpaces = Math.round(-items[j] / font.spaceWidth); + if (numFakeSpaces > 0) { + chunk += ' '; + } } } break; @@ -546,14 +562,69 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { chunk += fontCharsToUnicode(args[0], font); break; case "'": - chunk += fontCharsToUnicode(args[0], font) + ' '; + // For search, adding a extra white space for line breaks would be + // better here, but that causes too much spaces in the + // text-selection divs. + chunk += fontCharsToUnicode(args[0], font); break; case '"': - chunk += fontCharsToUnicode(args[2], font) + ' '; + // Note comment in "'" + chunk += fontCharsToUnicode(args[2], font); + break; + case 'Do': + // Set the chunk such that the following if won't add something + // to the state. + chunk = ''; + + if (args[0].code) { + break; + } + + if (!xobjs) { + xobjs = resources.get('XObject') || new Dict(); + } + + var name = args[0].name; + var xobj = xobjs.get(name); + if (!xobj) + break; + assertWellFormed(isStream(xobj), 'XObject should be a stream'); + + var type = xobj.dict.get('Subtype'); + assertWellFormed( + isName(type), + 'XObject should have a Name subtype' + ); + + if ('Form' !== type.name) + break; + + state = this.getTextContent( + xobj, + xobj.dict.get('Resources') || resources, + state + ); + break; + case 'gs': + var dictName = args[0]; + var extGState = resources.get('ExtGState'); + + if (!isDict(extGState) || !extGState.has(dictName.name)) + break; + + var gsState = extGState.get(dictName.name); + + for (var i = 0; i < gsState.length; i++) { + if (gsState[i] === 'Font') { + font = handleSetFont(args[0].name).translated; + } + } break; } // switch + if (chunk !== '') { - text += chunk; + bidiTexts.push(PDFJS.bidi(chunk, -1)); + chunk = ''; } @@ -562,9 +633,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { assertWellFormed(args.length <= 33, 'Too many arguments'); args.push(obj); } - } + } // while - return text; + return state; }, extractDataStructures: function diff --git a/src/fonts.js b/src/fonts.js index b9adc48b2..17d847a8d 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -3886,6 +3886,10 @@ var Font = (function FontClosure() { }, get spaceWidth() { + if ('_shadowWidth' in this) { + return this._shadowWidth; + } + // trying to estimate space character width var possibleSpaceReplacements = ['space', 'minus', 'one', 'i']; var width; @@ -3913,7 +3917,10 @@ var Font = (function FontClosure() { break; // the non-zero width found } width = (width || this.defaultWidth) * this.widthMultiplier; - return shadow(this, 'spaceWidth', width); + // Do not shadow the property here. See discussion: + // https://github.com/mozilla/pdf.js/pull/2127#discussion_r1662280 + this._shadowWidth = width; + return width; }, charToGlyph: function Font_charToGlyph(charcode) { diff --git a/test/driver.js b/test/driver.js index 998527807..0997c7485 100644 --- a/test/driver.js +++ b/test/driver.js @@ -159,6 +159,7 @@ NullTextLayerBuilder.prototype = { function SimpleTextLayerBuilder(ctx, viewport) { this.ctx = ctx; this.viewport = viewport; + this.textCounter = 0; } SimpleTextLayerBuilder.prototype = { beginLayout: function SimpleTextLayerBuilder_BeginLayout() { @@ -167,27 +168,31 @@ SimpleTextLayerBuilder.prototype = { endLayout: function SimpleTextLayerBuilder_EndLayout() { this.ctx.restore(); }, - appendText: function SimpleTextLayerBuilder_AppendText(text, fontName, - fontSize) { + appendText: function SimpleTextLayerBuilder_AppendText(fontName, fontSize, + geom) { var ctx = this.ctx, viewport = this.viewport; // vScale and hScale already contain the scaling to pixel units - var fontHeight = fontSize * text.geom.vScale; + var fontHeight = fontSize * geom.vScale; ctx.beginPath(); ctx.strokeStyle = 'red'; ctx.fillStyle = 'yellow'; - ctx.rect(text.geom.x, text.geom.y - fontHeight, - text.canvasWidth * text.geom.hScale, fontHeight); + ctx.rect(geom.x, geom.y - fontHeight, + geom.canvasWidth * geom.hScale, fontHeight); ctx.stroke(); ctx.fill(); - var textContent = bidi(text, -1); + var textContent = this.textContent.bidiTexts[this.textCounter].str; ctx.font = fontHeight + 'px ' + fontName; ctx.fillStyle = 'black'; - ctx.fillText(textContent, text.geom.x, text.geom.y); + ctx.fillText(textContent, geom.x, geom.y); + + this.textCounter++; + }, + setTextContent: function SimpleTextLayerBuilder_SetTextContent(textContent) { + this.textContent = textContent; } }; - function nextPage(task, loadError) { var failure = loadError || ''; @@ -245,6 +250,10 @@ function nextPage(task, loadError) { drawContext = dummyCanvas.getContext('2d'); // ... text builder will draw its content on the test canvas textLayerBuilder = new SimpleTextLayerBuilder(ctx, viewport); + + page.getTextContent().then(function(textContent) { + textLayerBuilder.setTextContent(textContent); + }); } else { drawContext = ctx; textLayerBuilder = new NullTextLayerBuilder(); diff --git a/web/viewer.css b/web/viewer.css index c1e7aaf3d..0f615094a 100644 --- a/web/viewer.css +++ b/web/viewer.css @@ -121,7 +121,7 @@ html[dir='rtl'] .innerCenter { -o-transition-timing-function: ease; transition-duration: 200ms; transition-timing-function: ease; - + } html[dir='ltr'] #sidebarContainer { -webkit-transition-property: left; @@ -629,7 +629,7 @@ html[dir='rtl'] .toolbarButton:first-child { display: inline-block; content: url(images/toolbarButton-sidebarToggle.png); } - + html[dir='ltr'] .toolbarButton.pageUp::before { display: inline-block; content: url(images/toolbarButton-pageUp.png); @@ -639,7 +639,7 @@ html[dir='rtl'] .toolbarButton.pageUp::before { display: inline-block; content: url(images/toolbarButton-pageUp-rtl.png); } - + html[dir='ltr'] .toolbarButton.pageDown::before { display: inline-block; content: url(images/toolbarButton-pageDown.png); @@ -654,7 +654,7 @@ html[dir='rtl'] .toolbarButton.pageDown::before { display: inline-block; content: url(images/toolbarButton-zoomOut.png); } - + .toolbarButton.zoomIn::before { display: inline-block; content: url(images/toolbarButton-zoomIn.png); @@ -691,12 +691,12 @@ html[dir='rtl'] .toolbarButton.pageDown::before { .toolbarButton.bookmark::before { content: url(images/toolbarButton-bookmark.png); } - + #viewThumbnail.toolbarButton::before { display: inline-block; content: url(images/toolbarButton-viewThumbnail.png); } - + #viewOutline.toolbarButton::before { display: inline-block; content: url(images/toolbarButton-viewOutline.png); @@ -797,7 +797,7 @@ html[dir='rtl'] .toolbarButton.pageDown::before { padding: 7px; -moz-transition-duration: 150ms; } - + a:focus > .thumbnail > .thumbnailSelectionRing > .thumbnailImage, .thumbnail:hover > .thumbnailSelectionRing > .thumbnailImage { opacity: .9; @@ -1016,7 +1016,7 @@ canvas { background: -moz-linear-gradient(top, #b2b2b2 0%,#898989 100%); background: -ms-linear-gradient(top, #b2b2b2 0%,#898989 100%); background: -o-linear-gradient(top, #b2b2b2 0%,#898989 100%); - background: linear-gradient(top, #b2b2b2 0%,#898989 100%); + background: linear-gradient(top, #b2b2b2 0%,#898989 100%); border-top-left-radius: 2px; border-bottom-left-radius: 2px; @@ -1066,6 +1066,7 @@ canvas { color: transparent; position: absolute; line-height:1.3; + white-space:pre; } /* TODO: file FF bug to support ::-moz-selection:window-inactive @@ -1202,7 +1203,7 @@ canvas { @page { margin: 0; -} +} #printContainer { display: none; diff --git a/web/viewer.js b/web/viewer.js index dbf7585b8..2662adff7 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1040,7 +1040,7 @@ var PDFView = { function extractPageText(pageIndex) { self.pages[pageIndex].pdfPage.getTextContent().then( function textContentResolved(textContent) { - self.pageText[pageIndex] = textContent; + self.pageText[pageIndex] = textContent.join(''); self.search(); if ((pageIndex + 1) < self.pages.length) extractPageText(pageIndex + 1); @@ -1228,6 +1228,8 @@ var PageView = function pageView(container, pdfPage, id, scale, this.renderingState = RenderingStates.INITIAL; this.resume = null; + this.textContent = null; + var anchor = document.createElement('a'); anchor.name = '' + this.id; @@ -1448,6 +1450,13 @@ var PageView = function pageView(container, pdfPage, id, scale, }, 0); }; + this.getTextContent = function pageviewGetTextContent() { + if (!this.textContent) { + this.textContent = this.pdfPage.getTextContent(); + } + return this.textContent; + }; + this.draw = function pageviewDraw(callback) { if (this.renderingState !== RenderingStates.INITIAL) error('Must be in new state before drawing'); @@ -1528,6 +1537,14 @@ var PageView = function pageView(container, pdfPage, id, scale, } ); + if (textLayer) { + this.getTextContent().then( + function textContentResolved(textContent) { + textLayer.setTextContent(textContent); + } + ); + } + setupAnnotations(this.pdfPage, this.viewport); div.setAttribute('data-loaded', true); }; @@ -1820,12 +1837,19 @@ var CustomStyle = (function CustomStyleClosure() { var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { var textLayerFrag = document.createDocumentFragment(); this.textLayerDiv = textLayerDiv; + this.layoutDone = false; + this.divContentDone = false; this.beginLayout = function textLayerBuilderBeginLayout() { this.textDivs = []; this.textLayerQueue = []; }; + this.endLayout = function textLayerBuilderEndLayout() { + this.layoutDone = true; + this.insertDivContent(); + }, + this.renderLayer = function textLayerBuilderRenderLayer() { var self = this; var textDivs = this.textDivs; @@ -1857,7 +1881,7 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textLayerDiv.appendChild(textLayerFrag); }; - this.endLayout = function textLayerBuilderEndLayout() { + this.setupRenderLayoutTimer = function textLayerSetupRenderLayoutTimer() { // Schedule renderLayout() if user has been scrolling, otherwise // run it right away var kRenderDelay = 200; // in ms @@ -1870,27 +1894,56 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { if (this.renderTimer) clearTimeout(this.renderTimer); this.renderTimer = setTimeout(function() { - self.endLayout(); + self.setupRenderLayoutTimer(); }, kRenderDelay); } - }; // endLayout + }; - this.appendText = function textLayerBuilderAppendText(text, - fontName, fontSize) { + this.appendText = function textLayerBuilderAppendText(fontName, fontSize, + geom) { var textDiv = document.createElement('div'); // vScale and hScale already contain the scaling to pixel units - var fontHeight = fontSize * text.geom.vScale; - textDiv.dataset.canvasWidth = text.canvasWidth * text.geom.hScale; + var fontHeight = fontSize * geom.vScale; + textDiv.dataset.canvasWidth = geom.canvasWidth * geom.hScale; + textDiv.dataset.fontName = fontName; textDiv.style.fontSize = fontHeight + 'px'; textDiv.style.fontFamily = fontName; - textDiv.style.left = text.geom.x + 'px'; - textDiv.style.top = (text.geom.y - fontHeight) + 'px'; - textDiv.textContent = PDFJS.bidi(text, -1); - textDiv.dir = text.direction; + textDiv.style.left = geom.x + 'px'; + textDiv.style.top = (geom.y - fontHeight) + 'px'; + + // The content of the div is set in the `setTextContent` function. + this.textDivs.push(textDiv); }; + + this.insertDivContent = function textLayerUpdateTextContent() { + // Only set the content of the divs once layout has finished, the content + // for the divs is available and content is not yet set on the divs. + if (!this.layoutDone || this.divContentDone || !this.textContent) + return; + + this.divContentDone = true; + + var textDivs = this.textDivs; + var bidiTexts = this.textContent.bidiTexts; + + for (var i = 0; i < bidiTexts.length; i++) { + var bidiText = bidiTexts[i]; + var textDiv = textDivs[i]; + + textDiv.textContent = bidiText.str; + textDiv.dir = bidiText.ltr ? 'ltr' : 'rtl'; + } + + this.setupRenderLayoutTimer(); + }; + + this.setTextContent = function textLayerBuilderSetTextContent(textContent) { + this.textContent = textContent; + this.insertDivContent(); + }; }; document.addEventListener('DOMContentLoaded', function webViewerLoad(evt) {