diff --git a/fonts.js b/fonts.js index 2d7ff94f5..c40b9f192 100755 --- a/fonts.js +++ b/fonts.js @@ -638,30 +638,28 @@ var Font = (function Font() { var ulUnicodeRange3 = 0; var ulUnicodeRange4 = 0; - var charset = properties.charset; - if (charset && charset.length) { - var firstCharIndex = null; - var lastCharIndex = 0; + var firstCharIndex = null; + var lastCharIndex = 0; - for (var i = 0; i < charset.length; i++) { - var code = GlyphsUnicode[charset[i]]; - if (firstCharIndex > code || !firstCharIndex) - firstCharIndex = code; - if (lastCharIndex < code) - lastCharIndex = code; + var encoding = properties.encoding; + for (var index in encoding) { + var code = encoding[index]; + if (firstCharIndex > code || !firstCharIndex) + firstCharIndex = code; + if (lastCharIndex < code) + lastCharIndex = code; - var position = getUnicodeRangeFor(code); - if (position < 32) { - ulUnicodeRange1 |= 1 << position; - } else if (position < 64) { - ulUnicodeRange2 |= 1 << position - 32; - } else if (position < 96) { - ulUnicodeRange3 |= 1 << position - 64; - } else if (position < 123) { - ulUnicodeRange4 |= 1 << position - 96; - } else { - error('Unicode ranges Bits > 123 are reserved for internal usage'); - } + var position = getUnicodeRangeFor(code); + if (position < 32) { + ulUnicodeRange1 |= 1 << position; + } else if (position < 64) { + ulUnicodeRange2 |= 1 << position - 32; + } else if (position < 96) { + ulUnicodeRange3 |= 1 << position - 64; + } else if (position < 123) { + ulUnicodeRange4 |= 1 << position - 96; + } else { + error('Unicode ranges Bits > 123 are reserved for internal usage'); } } @@ -847,7 +845,6 @@ var Font = (function Font() { } var encoding = properties.encoding; - var charset = properties.charset; for (var i = 0; i < numRecords; i++) { var table = records[i]; font.pos = start + table.offset; @@ -856,7 +853,9 @@ var Font = (function Font() { var length = int16(font.getBytes(2)); var language = int16(font.getBytes(2)); - if (format == 0) { + if (format == 4) { + return; + } else if (format == 0) { // Characters below 0x20 are controls characters that are hardcoded // into the platform so if some characters in the font are assigned // under this limit they will not be displayed so let's rewrite the @@ -871,35 +870,15 @@ var Font = (function Font() { } } - var rewrite = false; - for (var code in encoding) { - if (code < 0x20 && encoding[code]) - rewrite = true; - - if (rewrite) - encoding[code] = parseInt(code) + 0x1F; - } - - if (rewrite) { + if (properties.firstChar < 0x20) + var code = 0; for (var j = 0; j < glyphs.length; j++) { + var glyph = glyphs[j]; glyphs[j].unicode += 0x1F; - } + properties.glyphs[glyph.glyph] = encoding[++code] = glyph.unicode; } - cmap.data = createCMapTable(glyphs, deltas); - } else if (format == 6 && numRecords == 1 && !encoding.empty) { - // Format 0 alone is not allowed by the sanitizer so let's rewrite - // that to a 3-1-4 Unicode BMP table - TODO('Use an other source of informations than ' + - 'charset here, it is not reliable'); - var glyphs = []; - for (var j = 0; j < charset.length; j++) { - glyphs.push({ - unicode: GlyphsUnicode[charset[j]] || 0 - }); - } - - cmap.data = createCMapTable(glyphs); - } else if (format == 6 && numRecords == 1) { + return cmap.data = createCMapTable(glyphs, deltas); + } else if (format == 6) { // Format 6 is a 2-bytes dense mapping, which means the font data // lives glue together even if they are pretty far in the unicode // table. (This looks weird, so I can have missed something), this @@ -912,6 +891,8 @@ var Font = (function Font() { var min = 0xffff, max = 0; for (var j = 0; j < entryCount; j++) { var charcode = int16(font.getBytes(2)); + if (!charcode) + continue; glyphs.push(charcode); if (charcode < min) @@ -939,7 +920,7 @@ var Font = (function Font() { var index = firstCode; for (var j = start; j <= end; j++) encoding[index++] = glyphs[j - firstCode - 1].unicode; - cmap.data = createCMapTable(glyphs); + return cmap.data = createCMapTable(glyphs); } } }; @@ -1288,10 +1269,6 @@ var Font = (function Font() { unicode = charcode; } - // Check if the glyph has already been converted - if (!IsNum(unicode)) - unicode = encoding[unicode] = GlyphsUnicode[unicode.name]; - // Handle surrogate pairs if (unicode > 0xFFFF) { str += String.fromCharCode(unicode & 0xFFFF); @@ -1715,9 +1692,6 @@ var Type1Parser = function() { properties.textMatrix = matrix; break; case '/Encoding': - if (!properties.builtInEncoding) - break; - var size = parseInt(getToken()); getToken(); // read in 'array' @@ -1726,9 +1700,12 @@ var Type1Parser = function() { if (token == 'dup') { var index = parseInt(getToken()); var glyph = getToken(); - properties.encoding[index] = GlyphsUnicode[glyph]; + + if (!properties.differences[j]) { + var code = GlyphsUnicode[glyph]; + properties.glyphs[glyph] = properties.encoding[index] = code; + } getToken(); // read the in 'put' - j = index; } } break; @@ -1903,7 +1880,7 @@ CFF.prototype = { missings.push(glyph.glyph); } else { charstrings.push({ - glyph: glyph, + glyph: glyph.glyph, unicode: unicode, charstring: glyph.data, width: glyph.width, @@ -2079,7 +2056,7 @@ CFF.prototype = { var count = glyphs.length; for (var i = 0; i < count; i++) { - var index = CFFStrings.indexOf(charstrings[i].glyph.glyph); + var index = CFFStrings.indexOf(charstrings[i].glyph); // Some characters like asterikmath && circlecopyrt are // missing from the original strings, for the moment let's // map them to .notdef and see later if it cause any @@ -2176,7 +2153,6 @@ var Type2CFF = (function() { var stringIndex = this.parseIndex(dictIndex.endPos); var gsubrIndex = this.parseIndex(stringIndex.endPos); - var strings = this.getStrings(stringIndex); var baseDict = this.parseDict(dictIndex.get(0)); @@ -2219,7 +2195,7 @@ var Type2CFF = (function() { var charstrings = []; for (var i = 0, ii = charsets.length; i < ii; ++i) { var charName = charsets[i]; - var charCode = GlyphsUnicode[charName]; + var charCode = properties.glyphs[charName]; if (charCode) { var width = widths[charCode] || defaultWidth; charstrings.push({unicode: charCode, width: width, gid: i}); diff --git a/pdf.js b/pdf.js index e7095b692..32e13817f 100644 --- a/pdf.js +++ b/pdf.js @@ -4199,8 +4199,6 @@ var PartialEvaluator = (function() { var builtInEncoding = false; var encodingMap = {}; - var glyphMap = {}; - var charset = []; if (compositeFont) { // Special CIDFont support // XXX only CIDFontType2 supported for now @@ -4242,69 +4240,61 @@ var PartialEvaluator = (function() { if (fontDict.has('Encoding')) { var encoding = xref.fetchIfRef(fontDict.get('Encoding')); if (IsDict(encoding)) { - // Build a map of between codes and glyphs - // Load the base encoding var baseName = encoding.get('BaseEncoding'); - if (baseName) { + if (baseName) baseEncoding = Encodings[baseName.name].slice(); - } // Load the differences between the base and original var differences = encoding.get('Differences'); var index = 0; for (var j = 0; j < differences.length; j++) { var data = differences[j]; - if (IsNum(data)) { + if (IsNum(data)) index = data; - } else { + else diffEncoding[index++] = data.name; - } } } else if (IsName(encoding)) { baseEncoding = Encodings[encoding.name].slice(); + } else { + error("Encoding is not a Name nor a Dict"); } } + var fontType = subType.name; if (!baseEncoding) { - var type = subType.name; - if (type == 'TrueType') { - baseEncoding = Encodings.WinAnsiEncoding.slice(); - } else if (type == 'Type1') { - baseEncoding = Encodings.StandardEncoding.slice(); - if (!diffEncoding.length) - builtInEncoding = true; - } else { - error('Unknown type of font'); + switch (fontType) { + case 'TrueType': + baseEncoding = Encodings.WinAnsiEncoding.slice(); + break; + case 'Type1': + baseEncoding = Encodings.StandardEncoding.slice(); + break; + default: + warn('Unknown type of font: ' + fontType); + break; } } + // firstChar and width are required + // (except for 14 standard fonts) + var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')) || 0; + var lastChar = xref.fetchIfRef(fontDict.get('LastChar')) || 0; + var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; + // merge in the differences - var length = baseEncoding.length > diffEncoding.length ? - baseEncoding.length : diffEncoding.length; - for (var i = 0, ii = length; i < ii; ++i) { - var diffGlyph = diffEncoding[i]; - var baseGlyph = baseEncoding[i]; - if (diffGlyph) { - glyphMap[i] = diffGlyph; - encodingMap[i] = GlyphsUnicode[diffGlyph]; - } else if (baseGlyph) { - glyphMap[i] = baseGlyph; - encodingMap[i] = GlyphsUnicode[baseGlyph]; - } + var glyphsMap = {}; + for (var i = firstChar; i <= lastChar; i++) { + var glyph = diffEncoding[i] || baseEncoding[i]; + if (glyph) + glyphsMap[glyph] = encodingMap[i] = GlyphsUnicode[glyph]; } - if (fontDict.has('ToUnicode')) { - encodingMap['empty'] = true; - var glyphsMap = {}; - for (var p in glyphMap) - glyphsMap[glyphMap[p]] = encodingMap[p]; - + if (fontDict.has('ToUnicode') && differences) { var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode')); if (IsName(cmapObj)) { error('ToUnicode file cmap translation not implemented'); } else if (IsStream(cmapObj)) { - var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); - var tokens = []; var token = ''; @@ -4334,6 +4324,8 @@ var PartialEvaluator = (function() { var startRange = parseInt('0x' + tokens[j]); var endRange = parseInt('0x' + tokens[j + 1]); var code = parseInt('0x' + tokens[j + 2]); + for (var k = startRange; k < endRange; k++) + encodingMap[k] = code++; } break; @@ -4360,15 +4352,6 @@ var PartialEvaluator = (function() { } } } - - // firstChar and width are required - // (except for 14 standard fonts) - var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); - var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; - for (var j = 0; j < widths.length; j++) { - if (widths[j]) - charset.push(glyphMap[j + firstChar]); - } } if (!fd) { @@ -4396,7 +4379,6 @@ var PartialEvaluator = (function() { } var descriptor = xref.fetch(fd); - var fontName = fontDict.get('Name'); if (!fontName) fontName = xref.fetchIfRef(descriptor.get('FontName'));; @@ -4414,14 +4396,6 @@ var PartialEvaluator = (function() { } } - if (descriptor.has('CharSet')) { - // Get the font charset if any (meaningful only in Type 1) - charset = descriptor.get('CharSet'); - assertWellFormed(IsString(charset), 'invalid charset'); - charset = charset.split('/'); - charset.shift(); - } - var widths = fontDict.get('Widths'); if (widths) { var glyphWidths = {}; @@ -4435,9 +4409,8 @@ var PartialEvaluator = (function() { subtype: fileType, widths: glyphWidths, encoding: encodingMap, + differences: diffEncoding, glyphs: glyphsMap || GlyphsUnicode, - builtInEncoding: builtInEncoding, - charset: charset, firstChar: fontDict.get('FirstChar'), lastChar: fontDict.get('LastChar'), bbox: descriptor.get('FontBBox'),