diff --git a/src/core/fonts.js b/src/core/fonts.js index 6f934484f..a721b0b17 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -2203,6 +2203,10 @@ var ToUnicodeMap = (function ToUnicodeMapClosure() { } }, + has: function(i) { + return this._map[i] !== undefined; + }, + get: function(i) { return this._map[i]; }, @@ -2232,6 +2236,10 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() { } }, + has: function (i) { + return this.firstChar <= i && i <= this.lastChar; + }, + get: function (i) { if (this.firstChar <= i && i <= this.lastChar) { return String.fromCharCode(i); @@ -2664,7 +2672,6 @@ var Font = (function FontClosure() { var isSymbolic = !!(properties.flags & FontFlags.Symbolic); var isIdentityUnicode = properties.toUnicode instanceof IdentityToUnicodeMap; - var isCidFontType2 = (properties.type === 'CIDFontType2'); var newMap = Object.create(null); var toFontChar = []; var usedFontCharCodes = []; @@ -2675,17 +2682,11 @@ var Font = (function FontClosure() { var fontCharCode = originalCharCode; // First try to map the value to a unicode position if a non identity map // was created. - if (!isIdentityUnicode) { - if (toUnicode.get(originalCharCode) !== undefined) { - var unicode = toUnicode.get(fontCharCode); - // TODO: Try to map ligatures to the correct spot. - if (unicode.length === 1) { - fontCharCode = unicode.charCodeAt(0); - } - } else if (isCidFontType2) { - // For CIDFontType2, move characters not present in toUnicode - // to the private use area (fixes bug 1028735 and issue 4881). - fontCharCode = nextAvailableFontCharCode; + if (!isIdentityUnicode && toUnicode.has(originalCharCode)) { + var unicode = toUnicode.get(fontCharCode); + // TODO: Try to map ligatures to the correct spot. + if (unicode.length === 1) { + fontCharCode = unicode.charCodeAt(0); } } // Try to move control characters, special characters and already mapped @@ -3530,6 +3531,7 @@ var Font = (function FontClosure() { var newGlyfData = new Uint8Array(oldGlyfDataLength); var startOffset = itemDecode(locaData, 0); var writeOffset = 0; + var missingGlyphData = {}; itemEncode(locaData, 0, writeOffset); var i, j; for (i = 0, j = itemSize; i < numGlyphs; i++, j += itemSize) { @@ -3547,6 +3549,10 @@ var Font = (function FontClosure() { continue; } + if (startOffset === endOffset) { + missingGlyphData[i] = true; + } + var newLength = sanitizeGlyph(oldGlyfData, startOffset, endOffset, newGlyfData, writeOffset, hintsValid); writeOffset += newLength; @@ -3563,7 +3569,7 @@ var Font = (function FontClosure() { itemEncode(locaData, j, simpleGlyph.length); } glyf.data = simpleGlyph; - return; + return missingGlyphData; } if (dupFirstEntry) { @@ -3580,6 +3586,7 @@ var Font = (function FontClosure() { } else { glyf.data = newGlyfData.subarray(0, writeOffset); } + return missingGlyphData; } function readPostScriptTable(post, properties, maxpNumGlyphs) { @@ -4039,11 +4046,13 @@ var Font = (function FontClosure() { sanitizeHead(tables.head, numGlyphs, isTrueType ? tables.loca.length : 0); + var missingGlyphs = {}; if (isTrueType) { var isGlyphLocationsLong = int16(tables.head.data[50], tables.head.data[51]); - sanitizeGlyphLocations(tables.loca, tables.glyf, numGlyphs, - isGlyphLocationsLong, hintsValid, dupFirstEntry); + missingGlyphs = sanitizeGlyphLocations(tables.loca, tables.glyf, + numGlyphs, isGlyphLocationsLong, + hintsValid, dupFirstEntry); } if (!tables.hhea) { @@ -4065,19 +4074,33 @@ var Font = (function FontClosure() { } } - var charCodeToGlyphId = [], charCode; + var charCodeToGlyphId = [], charCode, toUnicode = properties.toUnicode; + + function hasGlyph(glyphId, charCode) { + if (!missingGlyphs[glyphId]) { + return true; + } + if (charCode >= 0 && toUnicode.has(charCode)) { + return true; + } + return false; + } + if (properties.type === 'CIDFontType2') { var cidToGidMap = properties.cidToGidMap || []; - var cidToGidMapLength = cidToGidMap.length; + var isCidToGidMapEmpty = cidToGidMap.length === 0; + properties.cMap.forEach(function(charCode, cid) { assert(cid <= 0xffff, 'Max size of CID is 65,535'); var glyphId = -1; - if (cidToGidMapLength === 0) { + if (isCidToGidMapEmpty) { glyphId = charCode; } else if (cidToGidMap[cid] !== undefined) { glyphId = cidToGidMap[cid]; } - if (glyphId >= 0 && glyphId < numGlyphs) { + + if (glyphId >= 0 && glyphId < numGlyphs && + hasGlyph(glyphId, charCode)) { charCodeToGlyphId[charCode] = glyphId; } }); @@ -4137,7 +4160,8 @@ var Font = (function FontClosure() { var found = false; for (i = 0; i < cmapMappingsLength; ++i) { - if (cmapMappings[i].charCode === unicodeOrCharCode) { + if (cmapMappings[i].charCode === unicodeOrCharCode && + hasGlyph(cmapMappings[i].glyphId, unicodeOrCharCode)) { charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; found = true; break; @@ -4147,7 +4171,7 @@ var Font = (function FontClosure() { // Try to map using the post table. There are currently no known // pdfs that this fixes. var glyphId = properties.glyphNames.indexOf(glyphName); - if (glyphId > 0) { + if (glyphId > 0 && hasGlyph(glyphId, -1)) { charCodeToGlyphId[charCode] = glyphId; } } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index d1d03af43..2ad755ea7 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -18,6 +18,7 @@ !sizes.pdf !close-path-bug.pdf !issue4630.pdf +!issue5202.pdf !issue5280.pdf !alphatrans.pdf !devicen.pdf @@ -58,10 +59,13 @@ !zerowidthline.pdf !bug868745.pdf !mmtype1.pdf +!issue5704.pdf +!bug893730.pdf !bug864847.pdf !issue1002.pdf !issue925.pdf !issue2840.pdf +!issue4061.pdf !issue4668.pdf !issue5039.pdf !issue5070.pdf diff --git a/test/pdfs/bug893730.pdf b/test/pdfs/bug893730.pdf new file mode 100644 index 000000000..8c6f81862 Binary files /dev/null and b/test/pdfs/bug893730.pdf differ diff --git a/test/pdfs/issue4061.pdf b/test/pdfs/issue4061.pdf new file mode 100644 index 000000000..ace045d5c Binary files /dev/null and b/test/pdfs/issue4061.pdf differ diff --git a/test/pdfs/issue5202.pdf b/test/pdfs/issue5202.pdf new file mode 100644 index 000000000..7196c55e2 Binary files /dev/null and b/test/pdfs/issue5202.pdf differ diff --git a/test/pdfs/issue5704.pdf b/test/pdfs/issue5704.pdf new file mode 100644 index 000000000..585a7d1dd Binary files /dev/null and b/test/pdfs/issue5704.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index b92a41cbb..938089c91 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -485,6 +485,20 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue4061", + "file": "pdfs/issue4061.pdf", + "md5": "236aaa8840a47c3c061f8e3034549764", + "rounds": 1, + "link": false, + "type": "eq" + }, + { "id": "issue5202", + "file": "pdfs/issue5202.pdf", + "md5": "bb9cc69211112e66aab40828086a4e5a", + "rounds": 1, + "link": false, + "type": "eq" + }, { "id": "issue5238", "file": "pdfs/issue5238.pdf", "md5": "6ddecda00893be1793de20a70c83a3c2", @@ -1015,6 +1029,20 @@ "link": true, "type": "eq" }, + { "id": "issue5704", + "file": "pdfs/issue5704.pdf", + "md5": "6e0b62585feef24dff2d7e7687cd8128", + "rounds": 1, + "link": false, + "type": "eq" + }, + { "id": "bug893730", + "file": "pdfs/bug893730.pdf", + "md5": "2587379fb1b3bbff89c14f0863e78383", + "rounds": 1, + "link": false, + "type": "eq" + }, { "id": "pdfkit_compressed", "file": "pdfs/pdfkit_compressed.pdf", "md5": "ffe9c571d0a1572e234253e6c7cdee6c",