diff --git a/README.md b/README.md index f3500ae4d..7e5d2eeb3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pdf.js - + ## Overview @@ -205,3 +205,4 @@ a "PDF Reference" from Adobe: Recommended chapters to read: "2. Overview", "3.4 File Structure", "4.1 Graphics Objects" that lists the PDF commands. + diff --git a/src/canvas.js b/src/canvas.js index fdb537372..e056fe0f2 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -638,17 +638,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { geometry.hScale = tr[0] - bl[0]; geometry.vScale = tr[1] - bl[1]; } - var spaceGlyph = font.charsToGlyphs(' '); - - // Hack (sometimes space is not encoded) - if (spaceGlyph.length === 0 || spaceGlyph[0].width === 0) - spaceGlyph = font.charsToGlyphs('i'); - - // Fallback - if (spaceGlyph.length === 0 || spaceGlyph[0].width === 0) - spaceGlyph = [{width: 0}]; - - geometry.spaceWidth = spaceGlyph[0].width; + geometry.spaceWidth = font.spaceWidth; return geometry; }, @@ -687,13 +677,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var textSelection = textLayer && !skipTextSelection ? true : false; var textRenderingMode = current.textRenderingMode; - if (textSelection) { - ctx.save(); - this.applyTextTransforms(); - text.geom = this.getTextGeometry(); - ctx.restore(); - } - // Type3 fonts - each glyph is a "mini-PDF" if (font.coded) { ctx.save(); @@ -701,6 +684,13 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { ctx.translate(current.x, current.y); ctx.scale(textHScale, 1); + + if (textSelection) { + this.save(); + ctx.scale(1, -1); + text.geom = this.getTextGeometry(); + this.restore(); + } for (var i = 0; i < glyphsLength; ++i) { var glyph = glyphs[i]; @@ -720,7 +710,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var width = transformed[0] * fontSize + charSpacing; ctx.translate(width, 0); - current.x += width * textHScale2; + current.x += width * textHScale; text.str += glyph.unicode; text.length++; @@ -730,6 +720,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { } else { ctx.save(); this.applyTextTransforms(); + if (textSelection) + text.geom = this.getTextGeometry(); var width = 0; for (var i = 0; i < glyphsLength; ++i) { @@ -780,18 +772,26 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { showSpacedText: function canvasGraphicsShowSpacedText(arr) { var ctx = this.ctx; var current = this.current; + var font = current.font; var fontSize = current.fontSize; - var textHScale2 = current.textHScale * - (current.font.fontMatrix || IDENTITY_MATRIX)[0]; + var textHScale = current.textHScale; + if (!font.coded) + textHScale *= (font.fontMatrix || IDENTITY_MATRIX)[0]; var arrLength = arr.length; var textLayer = this.textLayer; - var font = current.font; var text = {str: '', length: 0, canvasWidth: 0, geom: {}}; var textSelection = textLayer ? true : false; if (textSelection) { ctx.save(); - this.applyTextTransforms(); + // Type3 fonts - each glyph is a "mini-PDF" (see also showText) + if (font.coded) { + ctx.transform.apply(ctx, current.textMatrix); + ctx.scale(1, -1); + ctx.translate(current.x, -1 * current.y); + ctx.scale(textHScale, 1); + } else + this.applyTextTransforms(); text.geom = this.getTextGeometry(); ctx.restore(); } @@ -799,7 +799,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { for (var i = 0; i < arrLength; ++i) { var e = arr[i]; if (isNum(e)) { - var spacingLength = -e * 0.001 * fontSize * textHScale2; + var spacingLength = -e * 0.001 * fontSize * textHScale; current.x += spacingLength; if (textSelection) { @@ -807,9 +807,10 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { text.canvasWidth += spacingLength; if (e < 0 && text.geom.spaceWidth > 0) { // avoid div by zero var numFakeSpaces = Math.round(-e / text.geom.spaceWidth); - for (var j = 0; j < numFakeSpaces; ++j) + if (numFakeSpaces > 0) { text.str += ' '; - text.length += numFakeSpaces > 0 ? 1 : 0; + text.length++; + } } } } else if (isString(e)) { @@ -1103,7 +1104,11 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { this.restore(); }, - paintImageXObject: function canvasGraphicsPaintImageXObject(imgData) { + paintImageXObject: function canvasGraphicsPaintImageXObject(objId) { + var imgData = this.objs.get(objId); + if (!imgData) { + error('Dependent image isn\'t ready yet'); + } this.save(); var ctx = this.ctx; var w = imgData.width; diff --git a/src/core.js b/src/core.js index 6a932f127..b498401d1 100644 --- a/src/core.js +++ b/src/core.js @@ -602,6 +602,10 @@ var PDFDoc = (function PDFDocClosure() { var imageData = data[2]; loadJpegStream(id, imageData, this.objs); break; + case 'Image': + var imageData = data[2]; + this.objs.resolve(id, imageData); + break; case 'Font': var name = data[2]; var file = data[3]; @@ -647,11 +651,46 @@ var PDFDoc = (function PDFDocClosure() { }, this); messageHandler.on('text_extracted', function pdfDocError(data) { - var index = data.index; + var index = data[0]; if (this.textExtracted) this.textExtracted(index); }, this); + messageHandler.on('jpeg_decode', function(data, promise) { + var imageData = data[0]; + var components = data[1]; + if (components != 3 && components != 1) + error('Only 3 component or 1 component can be returned'); + + var img = new Image(); + img.onload = (function jpegImageLoaderOnload() { + var width = img.width; + var height = img.height; + var size = width * height; + var rgbaLength = size * 4; + var buf = new Uint8Array(size * components); + var tmpCanvas = new ScratchCanvas(width, height); + var tmpCtx = tmpCanvas.getContext('2d'); + tmpCtx.drawImage(img, 0, 0); + var data = tmpCtx.getImageData(0, 0, width, height).data; + + if (components == 3) { + for (var i = 0, j = 0; i < rgbaLength; i += 4, j += 3) { + buf[j] = data[i]; + buf[j + 1] = data[i + 1]; + buf[j + 2] = data[i + 2]; + } + } else if (components == 1) { + for (var i = 0, j = 0; i < rgbaLength; i += 4, j++) { + buf[j] = data[i]; + } + } + promise.resolve({ data: buf, width: width, height: height}); + }).bind(this); + var src = 'data:image/jpeg;base64,' + window.btoa(imageData); + img.src = src; + }); + setTimeout(function pdfDocFontReadySetTimeout() { messageHandler.send('doc', this.data); this.workerReadyPromise.resolve(true); diff --git a/src/evaluator.js b/src/evaluator.js index 553a04364..7e1dd3083 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -184,62 +184,52 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var w = dict.get('Width', 'W'); var h = dict.get('Height', 'H'); - if (image instanceof JpegStream && image.isNative) { - var objId = 'img_' + uniquePrefix + (++self.objIdCounter); - handler.send('obj', [objId, 'JpegStream', image.getIR()]); + var imageMask = dict.get('ImageMask', 'IM') || false; + if (imageMask) { + // This depends on a tmpCanvas beeing filled with the + // current fillStyle, such that processing the pixel + // data can't be done here. Instead of creating a + // complete PDFImage, only read the information needed + // for later. - // Add the dependency on the image object. - insertDependency([objId]); - - // The normal fn. - fn = 'paintJpegXObject'; - args = [objId, w, h]; + var width = dict.get('Width', 'W'); + var height = dict.get('Height', 'H'); + var bitStrideLength = (width + 7) >> 3; + var imgArray = image.getBytes(bitStrideLength * height); + var decode = dict.get('Decode', 'D'); + var inverseDecode = !!decode && decode[0] > 0; + fn = 'paintImageMaskXObject'; + args = [imgArray, inverseDecode, width, height]; return; } - // Needs to be rendered ourself. - - // Figure out if the image has an imageMask. - var imageMask = dict.get('ImageMask', 'IM') || false; - // If there is no imageMask, create the PDFImage and a lot // of image processing can be done here. - if (!imageMask) { - var imageObj = new PDFImage(xref, resources, image, inline); + var objId = 'img_' + uniquePrefix + (++self.objIdCounter); + insertDependency([objId]); + args = [objId, w, h]; - if (imageObj.imageMask) { - throw 'Can\'t handle this in the web worker :/'; - } - - var imgData = { - width: w, - height: h, - data: new Uint8Array(w * h * 4) - }; - var pixels = imgData.data; - imageObj.fillRgbaBuffer(pixels, imageObj.decode); - - fn = 'paintImageXObject'; - args = [imgData]; + var softMask = dict.get('SMask', 'IM') || false; + if (!softMask && image instanceof JpegStream && image.isNative) { + // These JPEGs don't need any more processing so we can just send it. + fn = 'paintJpegXObject'; + handler.send('obj', [objId, 'JpegStream', image.getIR()]); return; } - // This depends on a tmpCanvas beeing filled with the - // current fillStyle, such that processing the pixel - // data can't be done here. Instead of creating a - // complete PDFImage, only read the information needed - // for later. - fn = 'paintImageMaskXObject'; + fn = 'paintImageXObject'; - var width = dict.get('Width', 'W'); - var height = dict.get('Height', 'H'); - var bitStrideLength = (width + 7) >> 3; - var imgArray = image.getBytes(bitStrideLength * height); - var decode = dict.get('Decode', 'D'); - var inverseDecode = !!decode && decode[0] > 0; - - args = [imgArray, inverseDecode, width, height]; + PDFImage.buildImage(function(imageObj) { + var imgData = { + width: w, + height: h, + data: new Uint8Array(w * h * 4) + }; + var pixels = imgData.data; + imageObj.fillRgbaBuffer(pixels, imageObj.decode); + handler.send('obj', [objId, 'Image', imgData]); + }, handler, xref, resources, image, inline); } uniquePrefix = uniquePrefix || ''; diff --git a/src/fonts.js b/src/fonts.js index 3c65a1a07..6bbbaf014 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -1833,6 +1833,10 @@ var Font = (function FontClosure() { var i = unassignedUnicodeItems[j]; while (unusedUnicode in usedUnicodes) unusedUnicode++; + var cid = i + 1; + // override only if unicode mapping is not specified + if (!(cid in toUnicode)) + toUnicode[cid] = unusedUnicode; glyphs[i].unicode = unusedUnicode++; } this.useToUnicode = true; @@ -2139,6 +2143,37 @@ var Font = (function FontClosure() { return rule; }, + get spaceWidth() { + // trying to estimate space character width + var possibleSpaceReplacements = ['space', 'minus', 'one', 'i']; + var width; + for (var i = 0, ii = possibleSpaceReplacements.length; i < ii; i++) { + var glyphName = possibleSpaceReplacements[i]; + // if possible, getting width by glyph name + if (glyphName in this.widths) { + width = this.widths[glyphName]; + break; + } + var glyphUnicode = GlyphsUnicode[glyphName]; + // finding the charcode via unicodeToCID map + var charcode = 0; + if (this.composite) + charcode = this.unicodeToCID[glyphUnicode]; + // ... via toUnicode map + if (!charcode && 'toUnicode' in this) + charcode = this.toUnicode.indexOf(glyphUnicode); + // setting it to unicode if negative or undefined + if (!(charcode > 0)) + charcode = glyphUnicode; + // trying to get width via charcode + width = this.widths[charcode]; + if (width) + break; // the non-zero width found + } + width = (width || this.defaultWidth) * this.widthMultiplier; + return shadow(this, 'spaceWidth', width); + }, + charToGlyph: function fonts_charToGlyph(charcode) { var fontChar, width, codeIRQueue; diff --git a/src/image.js b/src/image.js index 15c31b034..987542c58 100644 --- a/src/image.js +++ b/src/image.js @@ -4,7 +4,27 @@ 'use strict'; var PDFImage = (function PDFImageClosure() { - function PDFImage(xref, res, image, inline) { + /** + * Decode the image in the main thread if it supported. Resovles the promise + * when the image data is ready. + */ + function handleImageData(handler, xref, res, image, promise) { + if (image instanceof JpegStream && image.isNative) { + // For natively supported jpegs send them to the main thread for decoding. + var dict = image.dict; + var colorSpace = dict.get('ColorSpace', 'CS'); + colorSpace = ColorSpace.parse(colorSpace, xref, res); + var numComps = colorSpace.numComps; + handler.send('jpeg_decode', [image.getIR(), numComps], function(message) { + var data = message.data; + var stream = new Stream(data, 0, data.length, image.dict); + promise.resolve(stream); + }); + } else { + promise.resolve(image); + } + } + function PDFImage(xref, res, image, inline, smask) { this.image = image; if (image.getParams) { // JPX/JPEG2000 streams directly contain bits per component @@ -51,14 +71,37 @@ var PDFImage = (function PDFImageClosure() { this.decode = dict.get('Decode', 'D'); var mask = xref.fetchIfRef(dict.get('Mask')); - var smask = xref.fetchIfRef(dict.get('SMask')); if (mask) { TODO('masked images'); } else if (smask) { - this.smask = new PDFImage(xref, res, smask); + this.smask = new PDFImage(xref, res, smask, false); } } + /** + * Handles processing of image data and calls the callback with an argument + * of a PDFImage when the image is ready to be used. + */ + PDFImage.buildImage = function buildImage(callback, handler, xref, res, + image, inline) { + var imageDataPromise = new Promise(); + var smaskPromise = new Promise(); + // The image data and smask data may not be ready yet, wait till both are + // resolved. + Promise.all([imageDataPromise, smaskPromise]).then(function(results) { + var imageData = results[0], smaskData = results[1]; + var image = new PDFImage(xref, res, imageData, inline, smaskData); + callback(image); + }); + + handleImageData(handler, xref, res, image, imageDataPromise); + + var smask = xref.fetchIfRef(image.dict.get('SMask')); + if (smask) + handleImageData(handler, xref, res, smask, smaskPromise); + else + smaskPromise.resolve(null); + }; PDFImage.prototype = { getComponents: function getComponents(buffer, decodeMap) { @@ -130,18 +173,6 @@ var PDFImage = (function PDFImageClosure() { var buf = new Uint8Array(width * height); if (smask) { - if (smask.image.src) { - // smask is a DOM image - var tempCanvas = new ScratchCanvas(width, height); - var tempCtx = tempCanvas.getContext('2d'); - var domImage = smask.image; - tempCtx.drawImage(domImage, 0, 0, domImage.width, domImage.height, - 0, 0, width, height); - var data = tempCtx.getImageData(0, 0, width, height).data; - for (var i = 0, j = 0, ii = width * height; i < ii; ++i, j += 4) - buf[i] = data[j]; // getting first component value - return buf; - } var sw = smask.width; var sh = smask.height; if (sw != this.width || sh != this.height) @@ -159,8 +190,7 @@ var PDFImage = (function PDFImageClosure() { applyStencilMask: function applyStencilMask(buffer, inverseDecode) { var width = this.width, height = this.height; var bitStrideLength = (width + 7) >> 3; - this.image.reset(); - var imgArray = this.image.getBytes(bitStrideLength * height); + var imgArray = this.getImageBytes(bitStrideLength * height); var imgArrayPos = 0; var i, j, mask, buf; // removing making non-masked pixels transparent @@ -188,8 +218,7 @@ var PDFImage = (function PDFImageClosure() { // rows start at byte boundary; var rowBytes = (width * numComps * bpc + 7) >> 3; - this.image.reset(); - var imgArray = this.image.getBytes(height * rowBytes); + var imgArray = this.getImageBytes(height * rowBytes); var comps = this.colorSpace.getRgbBuffer( this.getComponents(imgArray, decodeMap), bpc); @@ -216,14 +245,17 @@ var PDFImage = (function PDFImageClosure() { // rows start at byte boundary; var rowBytes = (width * numComps * bpc + 7) >> 3; - this.image.reset(); - var imgArray = this.image.getBytes(height * rowBytes); + var imgArray = this.getImageBytes(height * rowBytes); var comps = this.getComponents(imgArray); var length = width * height; for (var i = 0; i < length; ++i) buffer[i] = comps[i]; + }, + getImageBytes: function getImageBytes(length) { + this.image.reset(); + return this.image.getBytes(length); } }; return PDFImage; diff --git a/src/parser.js b/src/parser.js index 6ffae0b1c..695438379 100644 --- a/src/parser.js +++ b/src/parser.js @@ -249,7 +249,7 @@ var Parser = (function ParserClosure() { if (name == 'CCITTFaxDecode' || name == 'CCF') { return new CCITTFaxStream(stream, params); } - TODO('filter "' + name + '" not supported yet'); + warn('filter "' + name + '" not supported yet'); return stream; } }; diff --git a/src/util.js b/src/util.js index 88b9f0b45..57dbca4bb 100644 --- a/src/util.js +++ b/src/util.js @@ -217,7 +217,33 @@ var Promise = (function PromiseClosure() { } this.callbacks = []; }; - + /** + * Builds a promise that is resolved when all the passed in promises are + * resolved. + * @param {Promise[]} promises Array of promises to wait for. + * @return {Promise} New dependant promise. + */ + Promise.all = function(promises) { + var deferred = new Promise(); + var unresolved = promises.length; + var results = []; + if (unresolved === 0) { + deferred.resolve(results); + return deferred; + } + for (var i = 0; i < unresolved; ++i) { + var promise = promises[i]; + promise.then((function(i) { + return function(value) { + results[i] = value; + unresolved--; + if (unresolved === 0) + deferred.resolve(results); + }; + })(i)); + } + return deferred; + }; Promise.prototype = { hasData: false, diff --git a/src/worker.js b/src/worker.js index 3cc91d07e..dea6339d1 100644 --- a/src/worker.js +++ b/src/worker.js @@ -6,6 +6,8 @@ function MessageHandler(name, comObj) { this.name = name; this.comObj = comObj; + this.callbackIndex = 1; + var callbacks = this.callbacks = {}; var ah = this.actionHandler = {}; ah['console_log'] = [function ahConsoleLog(data) { @@ -14,11 +16,33 @@ function MessageHandler(name, comObj) { ah['console_error'] = [function ahConsoleError(data) { console.error.apply(console, data); }]; + comObj.onmessage = function messageHandlerComObjOnMessage(event) { var data = event.data; - if (data.action in ah) { + if (data.isReply) { + var callbackId = data.callbackId; + if (data.callbackId in callbacks) { + var callback = callbacks[callbackId]; + delete callbacks[callbackId]; + callback(data.data); + } else { + throw 'Cannot resolve callback ' + callbackId; + } + } else if (data.action in ah) { var action = ah[data.action]; - action[0].call(action[1], data.data); + if (data.callbackId) { + var promise = new Promise(); + promise.then(function(resolvedData) { + comObj.postMessage({ + isReply: true, + callbackId: data.callbackId, + data: resolvedData + }); + }); + action[0].call(action[1], data.data, promise); + } else { + action[0].call(action[1], data.data); + } } else { throw 'Unkown action from worker: ' + data.action; } @@ -33,12 +57,23 @@ MessageHandler.prototype = { } ah[actionName] = [handler, scope]; }, - - send: function messageHandlerSend(actionName, data) { - this.comObj.postMessage({ + /** + * Sends a message to the comObj to invoke the action with the supplied data. + * @param {String} actionName Action to call. + * @param {JSON} data JSON data to send. + * @param {function} [callback] Optional callback that will handle a reply. + */ + send: function messageHandlerSend(actionName, data, callback) { + var message = { action: actionName, data: data - }); + }; + if (callback) { + var callbackId = this.callbackIndex++; + this.callbacks[callbackId] = callback; + message.callbackId = callbackId; + } + this.comObj.postMessage(message); } }; @@ -170,7 +205,7 @@ var WorkerMessageHandler = { if (pageNum > numPages) { console.log('text indexing=: time=%dms', Date.now() - start); - handler.send('text_extracted', { index: index }); + handler.send('text_extracted', [index]); return; } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index d3caa968a..23ba6340e 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -19,3 +19,4 @@ !issue840.pdf !scan-bad.pdf !freeculture.pdf +!issue918.pdf diff --git a/test/pdfs/aboutstacks.pdf.link b/test/pdfs/aboutstacks.pdf.link new file mode 100644 index 000000000..8b04ec042 --- /dev/null +++ b/test/pdfs/aboutstacks.pdf.link @@ -0,0 +1 @@ +http://greenhousechallenge.org/media/item/313/38/About-Stacks.pdf diff --git a/test/pdfs/issue918.pdf b/test/pdfs/issue918.pdf new file mode 100644 index 000000000..ac1a9c37f Binary files /dev/null and b/test/pdfs/issue918.pdf differ diff --git a/test/pdfs/issue919.pdf.link b/test/pdfs/issue919.pdf.link new file mode 100644 index 000000000..683001139 --- /dev/null +++ b/test/pdfs/issue919.pdf.link @@ -0,0 +1 @@ +http://agb.traviangames.com/Travian_AR_Terms.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 4d55ac0f7..7469db678 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -340,5 +340,26 @@ "link": true, "pageLimit": 3, "type": "eq" + }, + { "id": "issue919", + "file": "pdfs/issue919.pdf", + "md5": "3a1716a512aca4d7a8d6106bd4885d14", + "rounds": 1, + "link": true, + "pageLimit": 3, + "type": "eq" + }, + { "id": "issue918", + "file": "pdfs/issue918.pdf", + "md5": "d582cc0f2592ae82936589ced2a47e55", + "rounds": 1, + "type": "eq" + }, + { "id": "aboutstacks", + "file": "pdfs/aboutstacks.pdf", + "md5": "6e7c8416a293ba2d83bc8dd20c6ccf51", + "rounds": 1, + "link": true, + "type": "eq" } ] diff --git a/web/viewer.js b/web/viewer.js index 340586a94..e75eb2c32 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -612,7 +612,7 @@ var PageView = function pageView(container, content, id, pageWidth, pageHeight, ]; if (scale && scale !== PDFView.currentScale) - PDFView.setScale(scale, true); + PDFView.parseScale(scale, true); setTimeout(function pageViewScrollIntoViewRelayout() { // letting page to re-layout before scrolling