From 567be2972025e209cccaaa3d74c32486a21cc6e5 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 8 Sep 2011 03:16:33 +0200 Subject: [PATCH] Add more glue between glyph mapping and code mapping --- fonts.js | 81 ++++++++++++++++++++++++++++---------------------------- pdf.js | 46 +++++++++++++++++++++----------- 2 files changed, 71 insertions(+), 56 deletions(-) diff --git a/fonts.js b/fonts.js index 659c5157e..bfdbb0b4a 100755 --- a/fonts.js +++ b/fonts.js @@ -711,7 +711,7 @@ var Font = (function Font() { var encoding = properties.encoding; for (var index in encoding) { - var code = encoding[index]; + var code = encoding[index].unicode; if (firstCharIndex > code || !firstCharIndex) firstCharIndex = code; if (lastCharIndex < code) @@ -970,15 +970,9 @@ var Font = (function Font() { if (index) { deltas.push(index); - var code = encoding[index]; - for (var glyph in properties.glyphs) { - if (properties.glyphs[glyph] == code) - break; - } - var unicode = j + kCmapGlyphOffset; - properties.glyphs[glyph] = encoding[j] = unicode; - glyphs.push({ glyph: glyph, unicode: unicode }); + encoding[j].unicode = unicode; + glyphs.push({ unicode: unicode }); } } @@ -1023,8 +1017,10 @@ var Font = (function Font() { var start = denseRange[0]; var end = denseRange[1]; var index = firstCode; - for (var j = start; j <= end; j++) - encoding[index++] = glyphs[j - firstCode - 1].unicode; + for (var j = start; j <= end; j++) { + var code = j - firstCode - 1; + encoding[index++] = { unicode: glyphs[code].unicode }; + } return cmap.data = createCMapTable(glyphs); } } @@ -1118,23 +1114,6 @@ var Font = (function Font() { // U+00AD (soft hyphen) is not drawn. // So, offset all the glyphs by 0xFF to avoid these cases and use // the encoding to map incoming characters to the new glyph positions - - var glyphs = []; - var encoding = properties.encoding; - - for (var i = 1; i < numGlyphs; i++) - glyphs.push({ unicode: i + kCmapGlyphOffset }); - - if ('undefined' == typeof(encoding[0])) { - // the font is directly characters to glyphs with no encoding - // so create an identity encoding - for (i = 0; i < numGlyphs; i++) - encoding[i] = i + kCmapGlyphOffset; - } else { - for (var code in encoding) - encoding[code] += kCmapGlyphOffset; - } - if (!cmap) { cmap = { tag: 'cmap', @@ -1142,6 +1121,21 @@ var Font = (function Font() { }; tables.push(cmap); } + + var encoding = properties.encoding; + if (!encoding[0]) { + // the font is directly characters to glyphs with no encoding + // so create an identity encoding + for (i = 0; i < numGlyphs; i++) + encoding[i] = { unicode: i + kCmapGlyphOffset }; + } else { + for (var code in encoding) + encoding[code].unicode += kCmapGlyphOffset; + } + + var glyphs = []; + for (var i = 1; i < numGlyphs; i++) + glyphs.push({ unicode: i + kCmapGlyphOffset }); cmap.data = createCMapTable(glyphs); } else { replaceCMapTable(cmap, font, properties); @@ -1361,14 +1355,14 @@ var Font = (function Font() { // loop should never end on the last byte for (var i = 0; i < length; i++) { var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]); - var unicode = encoding[charcode]; + var unicode = encoding[charcode].unicode; str += String.fromCharCode(unicode); } } else { for (var i = 0; i < chars.length; ++i) { var charcode = chars.charCodeAt(i); - var unicode = encoding[charcode]; + var unicode = encoding[charcode].unicode; if ('undefined' == typeof(unicode)) { warn('Unencoded charcode ' + charcode); unicode = charcode; @@ -1376,7 +1370,7 @@ var Font = (function Font() { // Check if the glyph has already been converted if (!IsNum(unicode)) - unicode = encoding[charcode] = this.glyphs[unicode]; + unicode = encoding[charcode].unicode = this.glyphs[unicode].unicode; // Handle surrogate pairs if (unicode > 0xFFFF) { @@ -1830,8 +1824,8 @@ var Type1Parser = function() { var glyph = getToken(); if ('undefined' == typeof(properties.differences[index])) { - properties.encoding[index] = glyph; - properties.glyphs[glyph] = GlyphsUnicode[glyph] || index; + var mapping = { unicode: GlyphsUnicode[glyph] || j }; + properties.glyphs[glyph] = properties.encoding[index] = mapping; } getToken(); // read the in 'put' } @@ -2000,14 +1994,14 @@ CFF.prototype = { for (var i = 0; i < glyphs.length; i++) { var glyph = glyphs[i]; - var unicode = properties.glyphs[glyph.glyph]; - if (!unicode) { + var mapping = properties.glyphs[glyph.glyph]; + if (!mapping) { if (glyph.glyph != '.notdef') missings.push(glyph.glyph); } else { charstrings.push({ glyph: glyph.glyph, - unicode: unicode, + unicode: mapping.unicode, charstring: glyph.data, width: glyph.width, lsb: glyph.lsb @@ -2340,17 +2334,24 @@ var Type2CFF = (function() { } } - if (code == -1) - index = code = properties.glyphs[glyph] || index; + if (code == -1) { + var mapping = properties.glyphs[glyph] || {}; + index = code = mapping.unicode || index; + } var width = widths[code] || defaultWidth; if (code <= 0x1f || (code >= 127 && code <= 255)) code += kCmapGlyphOffset; - properties.encoding[index] = code; + properties.glyphs[glyph] = properties.encoding[index] = { + unicode: code, + width: width + }; + charstrings.push({ unicode: code, - width: width, gid: i + width: width, + gid: i }); index++; } diff --git a/pdf.js b/pdf.js index 5b0558940..73575a4f3 100644 --- a/pdf.js +++ b/pdf.js @@ -4194,13 +4194,19 @@ var PartialEvaluator = (function() { var glyphsData = glyphsStream.getBytes(0); // Glyph ids are big-endian 2-byte values - // Set this to 0 to verify the font has an encoding. var encoding = properties.encoding; - encoding[0] = 0; + + // Set encoding 0 to later verify the font has an encoding + encoding[0] = { unicode: 0 }; for (var j = 0; j < glyphsData.length; j++) { var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; - if (glyphID != 0) - encoding[j >> 1] = glyphID; + if (glyphID == 0) + continue; + + encoding[j >> 1] = { + unicode: glyphID, + width: 0 + }; } } else if (type == 'CIDFontType0') { var encoding = xref.fetchIfRef(dict.get('Encoding')); @@ -4269,7 +4275,10 @@ var PartialEvaluator = (function() { var glyph = differences[i] || baseEncoding[i]; if (glyph) { var index = GlyphsUnicode[glyph] || i; - glyphs[glyph] = map[i] = index; + glyphs[glyph] = map[i] = { + unicode: index, + width: properties.widths[i - firstChar] || properties.defaultWidth + }; // If there is no file, the character mapping can't be modified // but this is unlikely that there is any standard encoding with @@ -4278,7 +4287,7 @@ var PartialEvaluator = (function() { continue; if (index <= 0x1f || (index >= 127 && index <= 255)) - glyphs[glyph] = map[i] += kCmapGlyphOffset; + map[i].unicode += kCmapGlyphOffset; } } @@ -4316,7 +4325,10 @@ var PartialEvaluator = (function() { var endRange = tokens[j + 1]; var code = tokens[j + 2]; while (startRange < endRange) { - map[startRange] = code++; + map[startRange] = { + unicode: code++, + width: 0 + } ++startRange; } } @@ -4327,7 +4339,10 @@ var PartialEvaluator = (function() { for (var j = 0; j < tokens.length; j += 2) { var index = tokens[j]; var code = tokens[j + 1]; - map[index] = code; + map[index] = { + unicode: code, + width: 0 + }; } break; @@ -4478,19 +4493,18 @@ var PartialEvaluator = (function() { descent: descriptor.get('Descent'), xHeight: descriptor.get('XHeight'), capHeight: descriptor.get('CapHeight'), + defaultWidth: descriptor.get('MissingWidth') || 0, flags: descriptor.get('Flags'), italicAngle: descriptor.get('ItalicAngle'), differences: [], - widths: [], + widths: (function() { + var glyphWidths = {}; + for (var i = 0; i <= widths.length; i++) + glyphWidths[firstChar++] = widths[i]; + return glyphWidths; + })(), encoding: {} }; - - // XXX Encoding and Glyphs should point to the same object so it will - // be hard to be out of sync. The object could contains the unicode and - // the width of the glyph. - for (var i = 0; i <= widths.length; i++) - properties.widths[firstChar++] = widths[i]; - properties.glyphs = this.extractEncoding(dict, xref, properties); return {