From aeba156d6beafc53b1207caab62232e4c5bde39a Mon Sep 17 00:00:00 2001 From: sbarman Date: Tue, 23 Aug 2011 18:08:47 -0700 Subject: [PATCH 1/3] Redid encoding --- pdf.js | 229 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 123 insertions(+), 106 deletions(-) diff --git a/pdf.js b/pdf.js index 1a0327819..410cb5e3a 100644 --- a/pdf.js +++ b/pdf.js @@ -4213,118 +4213,135 @@ var PartialEvaluator = (function() { '9.7.5.3'); } } - } else if (fontDict.has('Encoding')) { - var encoding = xref.fetchIfRef(fontDict.get('Encoding')); - if (IsDict(encoding)) { - // Build a map of between codes and glyphs - // Load the base encoding - var baseName = encoding.get('BaseEncoding'); - if (baseName) { - var base = Encodings[baseName.name]; - for (var j = 0, end = base.length; j < end; j++) - encodingMap[j] = GlyphsUnicode[base[j]] || 0; - } else { - TODO('need to load default encoding'); - } - - // Load the differences between the base and original - var differences = encoding.get('Differences'); - var index = 0; - for (var j = 0; j < differences.length; j++) { - var data = differences[j]; - if (IsNum(data)) { - index = data; - } else { - encodingMap[index++] = (subType.name == 'TrueType') ? j : - GlyphsUnicode[data.name]; + } else { + var baseEncoding = null, diffEncoding = []; + if (fontDict.has('Encoding')) { + var encoding = xref.fetchIfRef(fontDict.get('Encoding')); + if (IsDict(encoding)) { + // Build a map of between codes and glyphs + // Load the base encoding + var baseName = encoding.get('BaseEncoding'); + if (baseName) { + baseEncoding = Encodings[baseName.name].slice(); } - } - } else if (IsName(encoding)) { - var encoding = Encodings[encoding.name]; - if (!encoding) - error('Unknown font encoding'); - var index = 0; - for (var j = 0; j < encoding.length; j++) - encodingMap[index++] = GlyphsUnicode[encoding[j]]; - - // firstChar and width are required - // (except for 14 standard fonts) - var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); - var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; - for (var j = 0; j < widths.length; j++) { - if (widths[j]) - charset.push(encoding[j + firstChar]); - } - } - } else if (fontDict.has('ToUnicode')) { - encodingMap = {empty: true}; - var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode')); - if (IsName(cmapObj)) { - error('ToUnicode file cmap translation not implemented'); - } else if (IsStream(cmapObj)) { - var encoding = Encodings['WinAnsiEncoding']; - var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); - - var tokens = []; - var token = ''; - - var cmap = cmapObj.getBytes(cmapObj.length); - for (var i = 0; i < cmap.length; i++) { - var byte = cmap[i]; - if (byte == 0x20 || byte == 0x0A || byte == 0x3C || byte == 0x3E) { - switch (token) { - case 'useCMap': - error('useCMap is not implemented'); - break; - - case 'beginbfchar': - case 'beginbfrange': - case 'begincodespacerange': - token = ''; - tokens = []; - break; - - case 'endcodespacerange': - TODO('Support CMap ranges'); - break; - - case 'endbfrange': - for (var j = 0; j < tokens.length; j += 3) { - var startRange = parseInt('0x' + tokens[j]); - var endRange = parseInt('0x' + tokens[j + 1]); - var code = parseInt('0x' + tokens[j + 2]); - - for (var k = startRange; k <= endRange; k++) { - charset.push(encoding[code++] || '.notdef'); - } - } - break; - - case 'endbfchar': - for (var j = 0; j < tokens.length; j += 2) { - var index = parseInt('0x' + tokens[j]); - var code = parseInt('0x' + tokens[j + 1]); - encodingMap[index] = GlyphsUnicode[encoding[code]]; - charset.push(encoding[code] || '.notdef'); - } - break; - - default: - if (token.length) { - tokens.push(token); - token = ''; - } - break; + // Load the differences between the base and original + var differences = encoding.get('Differences'); + var index = 0; + for (var j = 0; j < differences.length; j++) { + var data = differences[j]; + if (IsNum(data)) { + index = data; + } else { + diffEncoding[index++] = data.name; + } + } + } else if (IsName(encoding)) { + baseEncoding = Encodings[encoding.name].slice(); + } + } + + if (!baseEncoding) { + var type = subType.name; + if (type == 'TrueType') { + baseEncoding = Encodings.WinAnsiEncoding.slice(0); + } else if (type == 'Type1') { + baseEncoding = Encodings.StandardEncoding.slice(0); + } else { + error('Unknown type of font'); + } + } + + // merge in the differences + var length = baseEncoding.length > diffEncoding.length ? + baseEncoding.length : diffEncoding.length; + for (var i = 0, ii = length; i < ii; ++i) { + var diffGlyph = diffEncoding[i]; + var baseGlyph = baseEncoding[i]; + if (diffGlyph) + encodingMap[i] = GlyphsUnicode[diffGlyph]; + else if (baseGlyph) + encodingMap[i] = GlyphsUnicode[baseGlyph]; + } + + if (fontDict.has('ToUnicode')) { + encodingMap = {empty: true}; + var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode')); + if (IsName(cmapObj)) { + error('ToUnicode file cmap translation not implemented'); + } else if (IsStream(cmapObj)) { + var encoding = Encodings['WinAnsiEncoding']; + var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); + + var tokens = []; + var token = ''; + + var cmap = cmapObj.getBytes(cmapObj.length); + for (var i = 0; i < cmap.length; i++) { + var byte = cmap[i]; + if (byte == 0x20 || byte == 0x0A || byte == 0x3C || byte == 0x3E) { + switch (token) { + case 'useCMap': + error('useCMap is not implemented'); + break; + + case 'beginbfchar': + case 'beginbfrange': + case 'begincodespacerange': + token = ''; + tokens = []; + break; + + case 'endcodespacerange': + TODO('Support CMap ranges'); + break; + + case 'endbfrange': + for (var j = 0; j < tokens.length; j += 3) { + var startRange = parseInt('0x' + tokens[j]); + var endRange = parseInt('0x' + tokens[j + 1]); + var code = parseInt('0x' + tokens[j + 2]); + + for (var k = startRange; k <= endRange; k++) { + charset.push(encoding[code++] || '.notdef'); + } + } + break; + + case 'endbfchar': + for (var j = 0; j < tokens.length; j += 2) { + var index = parseInt('0x' + tokens[j]); + var code = parseInt('0x' + tokens[j + 1]); + encodingMap[index] = GlyphsUnicode[encoding[code]]; + charset.push(encoding[code] || '.notdef'); + } + break; + + default: + if (token.length) { + tokens.push(token); + token = ''; + } + break; + } + } else if (byte == 0x5B || byte == 0x5D) { + error('CMAP list parsing is not implemented'); + } else { + token += String.fromCharCode(byte); } - } else if (byte == 0x5B || byte == 0x5D) { - error('CMAP list parsing is not implemented'); - } else { - token += String.fromCharCode(byte); } } } - } + +/* // firstChar and width are required + // (except for 14 standard fonts) + var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); + var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; + for (var j = 0; j < widths.length; j++) { + if (widths[j]) + charset.push(encoding[j + firstChar]); + } +*/ } if (!fd) { var baseFontName = fontDict.get('BaseFont'); From 5499cbc1b0a7d6bcbbf2dbd098c3dbd3e9572981 Mon Sep 17 00:00:00 2001 From: sbarman Date: Tue, 23 Aug 2011 18:17:30 -0700 Subject: [PATCH 2/3] figuring out charset --- pdf.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pdf.js b/pdf.js index 410cb5e3a..ed28fa4d9 100644 --- a/pdf.js +++ b/pdf.js @@ -4264,7 +4264,7 @@ var PartialEvaluator = (function() { encodingMap[i] = GlyphsUnicode[baseGlyph]; } - if (fontDict.has('ToUnicode')) { + if (fontDict.has('ToUnicode') && false) { encodingMap = {empty: true}; var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode')); if (IsName(cmapObj)) { @@ -4333,15 +4333,15 @@ var PartialEvaluator = (function() { } } -/* // firstChar and width are required + // firstChar and width are required // (except for 14 standard fonts) var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; for (var j = 0; j < widths.length; j++) { if (widths[j]) - charset.push(encoding[j + firstChar]); + charset.push(encodingMap[j + firstChar]); } -*/ } + } if (!fd) { var baseFontName = fontDict.get('BaseFont'); From 51fa9f579c62c38c0885d6b72111d2570c482feb Mon Sep 17 00:00:00 2001 From: sbarman Date: Tue, 23 Aug 2011 18:33:17 -0700 Subject: [PATCH 3/3] final fix for issue #215 --- pdf.js | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/pdf.js b/pdf.js index ed28fa4d9..11519c891 100644 --- a/pdf.js +++ b/pdf.js @@ -4176,6 +4176,7 @@ var PartialEvaluator = (function() { } var encodingMap = {}; + var glyphMap = {}; var charset = []; if (compositeFont) { // Special CIDFont support @@ -4258,19 +4259,20 @@ var PartialEvaluator = (function() { for (var i = 0, ii = length; i < ii; ++i) { var diffGlyph = diffEncoding[i]; var baseGlyph = baseEncoding[i]; - if (diffGlyph) + if (diffGlyph) { + glyphMap[i] = diffGlyph; encodingMap[i] = GlyphsUnicode[diffGlyph]; - else if (baseGlyph) + } else if (baseGlyph) { + glyphMap[i] = baseGlyph; encodingMap[i] = GlyphsUnicode[baseGlyph]; + } } - if (fontDict.has('ToUnicode') && false) { - encodingMap = {empty: true}; + if (fontDict.has('ToUnicode')) { var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode')); if (IsName(cmapObj)) { error('ToUnicode file cmap translation not implemented'); } else if (IsStream(cmapObj)) { - var encoding = Encodings['WinAnsiEncoding']; var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); var tokens = []; @@ -4279,7 +4281,8 @@ var PartialEvaluator = (function() { var cmap = cmapObj.getBytes(cmapObj.length); for (var i = 0; i < cmap.length; i++) { var byte = cmap[i]; - if (byte == 0x20 || byte == 0x0A || byte == 0x3C || byte == 0x3E) { + if (byte == 0x20 || byte == 0x0A || byte == 0x3C || + byte == 0x3E) { switch (token) { case 'useCMap': error('useCMap is not implemented'); @@ -4301,10 +4304,6 @@ var PartialEvaluator = (function() { var startRange = parseInt('0x' + tokens[j]); var endRange = parseInt('0x' + tokens[j + 1]); var code = parseInt('0x' + tokens[j + 2]); - - for (var k = startRange; k <= endRange; k++) { - charset.push(encoding[code++] || '.notdef'); - } } break; @@ -4312,8 +4311,7 @@ var PartialEvaluator = (function() { for (var j = 0; j < tokens.length; j += 2) { var index = parseInt('0x' + tokens[j]); var code = parseInt('0x' + tokens[j + 1]); - encodingMap[index] = GlyphsUnicode[encoding[code]]; - charset.push(encoding[code] || '.notdef'); + encodingMap[index] = code; } break; @@ -4339,7 +4337,7 @@ var PartialEvaluator = (function() { var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; for (var j = 0; j < widths.length; j++) { if (widths[j]) - charset.push(encodingMap[j + firstChar]); + charset.push(glyphMap[j + firstChar]); } }