diff --git a/make.js b/make.js index eee901647..a37d37889 100644 --- a/make.js +++ b/make.js @@ -105,6 +105,7 @@ target.generic = function() { copy: [ [COMMON_WEB_FILES, GENERIC_DIR + '/web'], ['external/webL10n/l10n.js', GENERIC_DIR + '/web'], + ['external/cmaps/', GENERIC_DIR + '/web/cmaps'], ['web/viewer.css', GENERIC_DIR + '/web'], ['web/compatibility.js', GENERIC_DIR + '/web'], ['web/compressed.tracemonkey-pldi-09.pdf', GENERIC_DIR + '/web'], @@ -489,6 +490,7 @@ target.firefox = function() { defines: defines, copy: [ [COMMON_WEB_FILES, FIREFOX_BUILD_CONTENT_DIR + '/web'], + ['external/cmaps/', FIREFOX_BUILD_CONTENT_DIR + '/web/cmaps'], [FIREFOX_EXTENSION_DIR + 'tools/l10n.js', FIREFOX_BUILD_CONTENT_DIR + '/web'], ['web/default_preferences.js', FIREFOX_BUILD_CONTENT_DIR] @@ -604,6 +606,7 @@ target.mozcentral = function() { defines: defines, copy: [ [COMMON_WEB_FILES, MOZCENTRAL_CONTENT_DIR + '/web'], + ['external/cmaps/', MOZCENTRAL_CONTENT_DIR + '/web/cmaps'], ['extensions/firefox/tools/l10n.js', MOZCENTRAL_CONTENT_DIR + '/web'], ['web/default_preferences.js', MOZCENTRAL_CONTENT_DIR] ], @@ -673,6 +676,7 @@ target.b2g = function() { var setup = { defines: defines, copy: [ + ['external/cmaps/', B2G_BUILD_CONTENT_DIR + '/web/cmaps'], ['extensions/b2g/images', B2G_BUILD_CONTENT_DIR + '/web'], ['extensions/b2g/viewer.html', B2G_BUILD_CONTENT_DIR + '/web'], ['extensions/b2g/viewer.css', B2G_BUILD_CONTENT_DIR + '/web'], @@ -713,6 +717,7 @@ target.chromium = function() { var setup = { defines: defines, copy: [ + ['external/cmaps/', CHROME_BUILD_CONTENT_DIR + '/web/cmaps'], [COMMON_WEB_FILES, CHROME_BUILD_CONTENT_DIR + '/web'], [['extensions/chromium/*.json', 'extensions/chromium/*.html', diff --git a/src/core/cmap.js b/src/core/cmap.js index aa76128f7..2b45dad58 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -15,184 +15,199 @@ * limitations under the License. */ /* globals Util, isString, isInt, warn, error, isCmd, isEOF, isName, Lexer, - isStream */ + isStream, StringStream */ 'use strict'; -var CMAP_CODESPACES = { - 'Adobe-CNS1-0': [[], [0, 14335]], - 'Adobe-CNS1-1': [[], [0, 17407]], - 'Adobe-CNS1-2': [[], [0, 17663]], - 'Adobe-CNS1-3': [[], [0, 18943]], - 'Adobe-CNS1-4': [[], [0, 19199]], - 'Adobe-CNS1-5': [[], [0, 19199]], - 'Adobe-CNS1-6': [[], [0, 19199]], - 'Adobe-CNS1-UCS2': [[], [0, 65535]], - 'B5-H': [[0, 128], [41280, 65278]], - 'B5-V': [[0, 128], [41280, 65278]], - 'B5pc-H': [[0, 128, 253, 255], [41280, 64766]], - 'B5pc-V': [[0, 128, 253, 255], [41280, 64766]], - 'CNS-EUC-H': [[0, 128], [41377, 65278], [], - [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]], - 'CNS-EUC-V': [[0, 128], [41377, 65278], [], - [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]], - 'CNS1-H': [[], [8481, 32382]], - 'CNS1-V': [[], [8481, 32382]], - 'CNS2-H': [[], [8481, 32382]], - 'CNS2-V': [[], [8481, 32382]], - 'ETen-B5-H': [[0, 128], [41280, 65278]], - 'ETen-B5-V': [[0, 128], [41280, 65278]], - 'ETenms-B5-H': [[0, 128], [41280, 65278]], - 'ETenms-B5-V': [[0, 128], [41280, 65278]], - 'ETHK-B5-H': [[0, 128], [34624, 65278]], - 'ETHK-B5-V': [[0, 128], [34624, 65278]], - 'HKdla-B5-H': [[0, 128], [41280, 65278]], - 'HKdla-B5-V': [[0, 128], [41280, 65278]], - 'HKdlb-B5-H': [[0, 128], [36416, 65278]], - 'HKdlb-B5-V': [[0, 128], [36416, 65278]], - 'HKgccs-B5-H': [[0, 128], [35392, 65278]], - 'HKgccs-B5-V': [[0, 128], [35392, 65278]], - 'HKm314-B5-H': [[0, 128], [41280, 65278]], - 'HKm314-B5-V': [[0, 128], [41280, 65278]], - 'HKm471-B5-H': [[0, 128], [41280, 65278]], - 'HKm471-B5-V': [[0, 128], [41280, 65278]], - 'HKscs-B5-H': [[0, 128], [34624, 65278]], - 'HKscs-B5-V': [[0, 128], [34624, 65278]], - 'UniCNS-UCS2-H': [[], [0, 55295, 57344, 65535]], - 'UniCNS-UCS2-V': [[], [0, 55295, 57344, 65535]], - 'UniCNS-UTF16-H': [[], [0, 55295, 57344, 65535], [], - [3623934976, 3690979327]], - 'UniCNS-UTF16-V': [[], [0, 55295, 57344, 65535], [], - [3623934976, 3690979327]], - 'Adobe-GB1-0': [[], [0, 7935]], - 'Adobe-GB1-1': [[], [0, 9983]], - 'Adobe-GB1-2': [[], [0, 22271]], - 'Adobe-GB1-3': [[], [0, 22527]], - 'Adobe-GB1-4': [[], [0, 29183]], - 'Adobe-GB1-5': [[], [0, 30463]], - 'Adobe-GB1-UCS2': [[], [0, 65535]], - 'GB-EUC-H': [[0, 128], [41377, 65278]], - 'GB-EUC-V': [[0, 128], [41377, 65278]], - 'GB-H': [[], [8481, 32382]], - 'GB-V': [[], [8481, 32382]], - 'GBK-EUC-H': [[0, 128], [33088, 65278]], - 'GBK-EUC-V': [[0, 128], [33088, 65278]], - 'GBK2K-H': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]], - 'GBK2K-V': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]], - 'GBKp-EUC-H': [[0, 128], [33088, 65278]], - 'GBKp-EUC-V': [[0, 128], [33088, 65278]], - 'GBpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]], - 'GBpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]], - 'GBT-EUC-H': [[0, 128], [41377, 65278]], - 'GBT-EUC-V': [[0, 128], [41377, 65278]], - 'GBT-H': [[], [8481, 32382]], - 'GBT-V': [[], [8481, 32382]], - 'GBTpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]], - 'GBTpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]], - 'UniGB-UCS2-H': [[], [0, 55295, 57344, 65535]], - 'UniGB-UCS2-V': [[], [0, 55295, 57344, 65535]], - 'UniGB-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]], - 'UniGB-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]], - '78-EUC-H': [[0, 128], [36512, 36575, 41377, 65278]], - '78-EUC-V': [[0, 128], [36512, 36575, 41377, 65278]], - '78-H': [[], [8481, 32382]], - '78-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '78-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '78-V': [[], [8481, 32382]], - '78ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '78ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '83pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]], - '90ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '90ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '90msp-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '90msp-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - '90pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]], - '90pv-RKSJ-V': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]], - 'Add-H': [[], [8481, 32382]], - 'Add-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - 'Add-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - 'Add-V': [[], [8481, 32382]], - 'Adobe-Japan1-0': [[], [0, 8447]], - 'Adobe-Japan1-1': [[], [0, 8447]], - 'Adobe-Japan1-2': [[], [0, 8959]], - 'Adobe-Japan1-3': [[], [0, 9471]], - 'Adobe-Japan1-4': [[], [0, 15615]], - 'Adobe-Japan1-5': [[], [0, 20479]], - 'Adobe-Japan1-6': [[], [0, 23295]], - 'Adobe-Japan1-UCS2': [[], [0, 65535]], - 'Adobe-Japan2-0': [[], [0, 6143]], - 'EUC-H': [[0, 128], [36512, 36575, 41377, 65278]], - 'EUC-V': [[0, 128], [36512, 36575, 41377, 65278]], - 'Ext-H': [[], [8481, 32382]], - 'Ext-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - 'Ext-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - 'Ext-V': [[], [8481, 32382]], - 'H': [[], [8481, 32382]], - 'Hankaku': [[0, 255], []], - 'Hiragana': [[0, 255], []], - 'Hojo-EUC-H': [[], [], [9413025, 9436926], []], - 'Hojo-EUC-V': [[], [], [9413025, 9436926], []], - 'Hojo-H': [[], [8481, 32382]], - 'Hojo-V': [[], [8481, 32382]], - 'Katakana': [[0, 255], []], - 'NWP-H': [[], [8481, 32382]], - 'NWP-V': [[], [8481, 32382]], - 'RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - 'RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], - 'Roman': [[0, 255], []], - 'UniHojo-UCS2-H': [[], [0, 55295, 57344, 65535]], - 'UniHojo-UCS2-V': [[], [0, 55295, 57344, 65535]], - 'UniHojo-UTF16-H': [[], [0, 55295, 57344, 65535], [], - [3623934976, 3690979327]], - 'UniHojo-UTF16-V': [[], [0, 55295, 57344, 65535], [], - [3623934976, 3690979327]], - 'UniJIS-UCS2-H': [[], [0, 55295, 57344, 65535]], - 'UniJIS-UCS2-HW-H': [[], [0, 55295, 57344, 65535]], - 'UniJIS-UCS2-HW-V': [[], [0, 55295, 57344, 65535]], - 'UniJIS-UCS2-V': [[], [0, 55295, 57344, 65535]], - 'UniJIS-UTF16-H': [[], [0, 55295, 57344, 65535], [], - [3623934976, 3690979327]], - 'UniJIS-UTF16-V': [[], [0, 55295, 57344, 65535], [], - [3623934976, 3690979327]], - 'UniJISPro-UCS2-HW-V': [[], [0, 55295, 57344, 65535]], - 'UniJISPro-UCS2-V': [[], [0, 55295, 57344, 65535]], - 'V': [[], [8481, 32382]], - 'WP-Symbol': [[0, 255], []], - 'Adobe-Korea1-0': [[], [0, 9471]], - 'Adobe-Korea1-1': [[], [0, 18175]], - 'Adobe-Korea1-2': [[], [0, 18431]], - 'Adobe-Korea1-UCS2': [[], [0, 65535]], - 'KSC-EUC-H': [[0, 128], [41377, 65278]], - 'KSC-EUC-V': [[0, 128], [41377, 65278]], - 'KSC-H': [[], [8481, 32382]], - 'KSC-Johab-H': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]], - 'KSC-Johab-V': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]], - 'KSC-V': [[], [8481, 32382]], - 'KSCms-UHC-H': [[0, 128], [33089, 65278]], - 'KSCms-UHC-HW-H': [[0, 128], [33089, 65278]], - 'KSCms-UHC-HW-V': [[0, 128], [33089, 65278]], - 'KSCms-UHC-V': [[0, 128], [33089, 65278]], - 'KSCpc-EUC-H': [[0, 132, 254, 255], [41281, 65022]], - 'KSCpc-EUC-V': [[0, 132, 254, 255], [41281, 65022]], - 'UniKS-UCS2-H': [[], [0, 55295, 57344, 65535]], - 'UniKS-UCS2-V': [[], [0, 55295, 57344, 65535]], - 'UniKS-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]], - 'UniKS-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]] -}; +var BUILT_IN_CMAPS = [ +// << Start unicode maps. +'Adobe-GB1-UCS2', +'Adobe-CNS1-UCS2', +'Adobe-Japan1-UCS2', +'Adobe-Korea1-UCS2', +// >> End unicode maps. +'78-EUC-H', +'78-EUC-V', +'78-H', +'78-RKSJ-H', +'78-RKSJ-V', +'78-V', +'78ms-RKSJ-H', +'78ms-RKSJ-V', +'83pv-RKSJ-H', +'90ms-RKSJ-H', +'90ms-RKSJ-V', +'90msp-RKSJ-H', +'90msp-RKSJ-V', +'90pv-RKSJ-H', +'90pv-RKSJ-V', +'Add-H', +'Add-RKSJ-H', +'Add-RKSJ-V', +'Add-V', +'Adobe-CNS1-0', +'Adobe-CNS1-1', +'Adobe-CNS1-2', +'Adobe-CNS1-3', +'Adobe-CNS1-4', +'Adobe-CNS1-5', +'Adobe-CNS1-6', +'Adobe-GB1-0', +'Adobe-GB1-1', +'Adobe-GB1-2', +'Adobe-GB1-3', +'Adobe-GB1-4', +'Adobe-GB1-5', +'Adobe-Japan1-0', +'Adobe-Japan1-1', +'Adobe-Japan1-2', +'Adobe-Japan1-3', +'Adobe-Japan1-4', +'Adobe-Japan1-5', +'Adobe-Japan1-6', +'Adobe-Korea1-0', +'Adobe-Korea1-1', +'Adobe-Korea1-2', +'B5-H', +'B5-V', +'B5pc-H', +'B5pc-V', +'CNS-EUC-H', +'CNS-EUC-V', +'CNS1-H', +'CNS1-V', +'CNS2-H', +'CNS2-V', +'ETHK-B5-H', +'ETHK-B5-V', +'ETen-B5-H', +'ETen-B5-V', +'ETenms-B5-H', +'ETenms-B5-V', +'EUC-H', +'EUC-V', +'Ext-H', +'Ext-RKSJ-H', +'Ext-RKSJ-V', +'Ext-V', +'GB-EUC-H', +'GB-EUC-V', +'GB-H', +'GB-V', +'GBK-EUC-H', +'GBK-EUC-V', +'GBK2K-H', +'GBK2K-V', +'GBKp-EUC-H', +'GBKp-EUC-V', +'GBT-EUC-H', +'GBT-EUC-V', +'GBT-H', +'GBT-V', +'GBTpc-EUC-H', +'GBTpc-EUC-V', +'GBpc-EUC-H', +'GBpc-EUC-V', +'H', +'HKdla-B5-H', +'HKdla-B5-V', +'HKdlb-B5-H', +'HKdlb-B5-V', +'HKgccs-B5-H', +'HKgccs-B5-V', +'HKm314-B5-H', +'HKm314-B5-V', +'HKm471-B5-H', +'HKm471-B5-V', +'HKscs-B5-H', +'HKscs-B5-V', +'Hankaku', +'Hiragana', +'KSC-EUC-H', +'KSC-EUC-V', +'KSC-H', +'KSC-Johab-H', +'KSC-Johab-V', +'KSC-V', +'KSCms-UHC-H', +'KSCms-UHC-HW-H', +'KSCms-UHC-HW-V', +'KSCms-UHC-V', +'KSCpc-EUC-H', +'KSCpc-EUC-V', +'Katakana', +'NWP-H', +'NWP-V', +'RKSJ-H', +'RKSJ-V', +'Roman', +'UniCNS-UCS2-H', +'UniCNS-UCS2-V', +'UniCNS-UTF16-H', +'UniCNS-UTF16-V', +'UniCNS-UTF32-H', +'UniCNS-UTF32-V', +'UniCNS-UTF8-H', +'UniCNS-UTF8-V', +'UniGB-UCS2-H', +'UniGB-UCS2-V', +'UniGB-UTF16-H', +'UniGB-UTF16-V', +'UniGB-UTF32-H', +'UniGB-UTF32-V', +'UniGB-UTF8-H', +'UniGB-UTF8-V', +'UniJIS-UCS2-H', +'UniJIS-UCS2-HW-H', +'UniJIS-UCS2-HW-V', +'UniJIS-UCS2-V', +'UniJIS-UTF16-H', +'UniJIS-UTF16-V', +'UniJIS-UTF32-H', +'UniJIS-UTF32-V', +'UniJIS-UTF8-H', +'UniJIS-UTF8-V', +'UniJIS2004-UTF16-H', +'UniJIS2004-UTF16-V', +'UniJIS2004-UTF32-H', +'UniJIS2004-UTF32-V', +'UniJIS2004-UTF8-H', +'UniJIS2004-UTF8-V', +'UniJISPro-UCS2-HW-V', +'UniJISPro-UCS2-V', +'UniJISPro-UTF8-V', +'UniJISX0213-UTF32-H', +'UniJISX0213-UTF32-V', +'UniJISX02132004-UTF32-H', +'UniJISX02132004-UTF32-V', +'UniKS-UCS2-H', +'UniKS-UCS2-V', +'UniKS-UTF16-H', +'UniKS-UTF16-V', +'UniKS-UTF32-H', +'UniKS-UTF32-V', +'UniKS-UTF8-H', +'UniKS-UTF8-V', +'V', +'WP-Symbol']; // CMap, not to be confused with TrueType's cmap. var CMap = (function CMapClosure() { - function CMap() { + function CMap(builtInCMap) { // Codespace ranges are stored as follows: // [[1BytePairs], [2BytePairs], [3BytePairs], [4BytePairs]] // where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...] this.codespaceRanges = [[], [], [], []]; + this.numCodespaceRanges = 0; this.map = []; this.vertical = false; + this.useCMap = null; + this.builtInCMap = builtInCMap; } CMap.prototype = { addCodespaceRange: function(n, low, high) { this.codespaceRanges[n - 1].push(low, high); + this.numCodespaceRanges++; }, mapRange: function(low, high, dstLow) { @@ -395,17 +410,33 @@ var CMapFactory = (function CMapFactoryClosure() { error('Invalid codespace range.'); } - function parseCmap(cMap, lexer) { + function parseWMode(cMap, lexer) { + var obj = lexer.getObj(); + if (isInt(obj)) { + cMap.vertical = !!obj; + } + } + + function parseCMap(cMap, lexer, builtInCMapUrl, useCMap) { + var previous; + var embededUseCMap; objLoop: while (true) { var obj = lexer.getObj(); if (isEOF(obj)) { break; + } else if (isName(obj)) { + if (obj.name === 'WMode') { + parseWMode(cMap, lexer); + } + previous = obj; } else if (isCmd(obj)) { switch (obj.cmd) { - case 'endcMap': + case 'endcmap': break objLoop; - case 'usecMap': - // TODO + case 'usecmap': + if (isName(previous)) { + embededUseCMap = previous.name; + } break; case 'begincodespacerange': parseCodespaceRange(cMap, lexer); @@ -425,30 +456,66 @@ var CMapFactory = (function CMapFactoryClosure() { } } } - } - return { - create: function (encoding) { - if (isName(encoding)) { - switch (encoding.name) { - case 'Identity-H': - return new IdentityCMap(false, 2); - case 'Identity-V': - return new IdentityCMap(true, 2); - default: - if (encoding.name in CMAP_CODESPACES) { - // XXX: Temporary hack so the correct amount of bytes are read in - // CMap.readCharCode. - var cMap = new CMap(); - cMap.codespaceRanges = CMAP_CODESPACES[encoding.name]; - return cMap; - } - return null; + + if (!useCMap && embededUseCMap) { + // Load the usecmap definition from the file only if there wasn't one + // specified. + useCMap = embededUseCMap; + } + if (useCMap) { + cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapUrl); + // If there aren't any code space ranges defined clone all the parent ones + // into this cMap. + if (cMap.numCodespaceRanges === 0) { + var useCodespaceRanges = cMap.useCMap.codespaceRanges; + for (var i = 0; i < useCodespaceRanges.length; i++) { + cMap.codespaceRanges[i] = useCodespaceRanges[i].slice(); } + cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges; + } + // Merge the map into the current one, making sure not to override + // any previously defined entries. + for (var key in cMap.useCMap.map) { + if (key in cMap.map) { + continue; + } + cMap.map[key] = cMap.useCMap.map[key]; + } + } + } + + function createBuiltInCMap(name, builtInCMapUrl) { + if (name === 'Identity-H') { + return new IdentityCMap(false, 2); + } else if (name === 'Identity-V') { + return new IdentityCMap(true, 2); + } + if (BUILT_IN_CMAPS.indexOf(name) === -1) { + error('Unknown cMap name: ' + name); + } + + var request = new XMLHttpRequest(); + var url = builtInCMapUrl + name; + request.open('GET', url, false); + request.send(null); + if (request.status === 0 && /^https?:/i.test(url)) { + error('Unable to get cMap at: ' + url); + } + var cMap = new CMap(true); + var lexer = new Lexer(new StringStream(request.responseText)); + parseCMap(cMap, lexer, builtInCMapUrl, null); + return cMap; + } + + return { + create: function (encoding, builtInCMapUrl, useCMap) { + if (isName(encoding)) { + return createBuiltInCMap(encoding.name, builtInCMapUrl); } else if (isStream(encoding)) { var cMap = new CMap(); var lexer = new Lexer(encoding); try { - parseCmap(cMap, lexer); + parseCMap(cMap, lexer, builtInCMapUrl, useCMap); } catch (e) { warn('Invalid CMap data. ' + e); } diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 2e85cbd16..fd685800f 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -828,41 +828,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } // Based on 9.6.6 of the spec the encoding can come from multiple places - // but should be prioritized in the following order: - // 1. Encoding dictionary - // 2. Encoding within font file (Type1 or Type1C) - // 3. Default (depends on font type) - // Differences applied to the above. - // Note: we don't fill in the encoding from the font file(2) here but use - // the flag overridableEncoding to signal that the font can override the - // encoding if it has one built in. - var overridableEncoding = true; - var hasEncoding = false; - var flags = properties.flags; + // and depends on the font type. The base encoding and differences are + // read here, but the encoding that is actually used is chosen during + // glyph mapping in the font. + // TODO: Loading the built in encoding in the font would allow the + // differences to be merged in here not require us to hold on to it. var differences = []; - var baseEncoding = properties.type === 'TrueType' ? - Encodings.WinAnsiEncoding : - Encodings.StandardEncoding; - // The Symbolic attribute can be misused for regular fonts - // Heuristic: we have to check if the font is a standard one and has - // Symbolic font name - if (!!(flags & FontFlags.Symbolic)) { - baseEncoding = !properties.file && /Symbol/i.test(properties.name) ? - Encodings.SymbolSetEncoding : Encodings.MacRomanEncoding; - } + var baseEncodingName = null; if (dict.has('Encoding')) { var encoding = dict.get('Encoding'); if (isDict(encoding)) { - var baseName = encoding.get('BaseEncoding'); - if (baseName) { - overridableEncoding = false; - hasEncoding = true; - baseEncoding = Encodings[baseName.name]; - } - + baseEncodingName = encoding.get('BaseEncoding'); + baseEncodingName = isName(baseEncodingName) ? baseEncodingName.name : + null; // Load the differences between the base and original if (encoding.has('Differences')) { - hasEncoding = true; var diffEncoding = encoding.get('Differences'); var index = 0; for (var j = 0, jj = diffEncoding.length; j < jj; j++) { @@ -874,38 +854,44 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } } } else if (isName(encoding)) { - overridableEncoding = false; - hasEncoding = true; - var currentEncoding = Encodings[encoding.name]; - - // Some bad PDF files contain fonts whose encoding name is not among - // the predefined encodings, causing baseEncoding to be undefined. - // In this case, fallback to using the baseEncoding as defined above - // and let the font override the encoding if one is available. - if (currentEncoding) { - baseEncoding = currentEncoding; - } else { - overridableEncoding = true; - } + baseEncodingName = encoding.name; } else { error('Encoding is not a Name nor a Dict'); } + // According to table 114 if the encoding is a named encoding it must be + // one of these predefined encodings. + if ((baseEncodingName !== 'MacRomanEncoding' && + baseEncodingName !== 'MacExpertEncoding' && + baseEncodingName !== 'WinAnsiEncoding')) { + baseEncodingName = null; + } + } + + if (baseEncodingName) { + properties.defaultEncoding = Encodings[baseEncodingName].slice(); + } else { + var encoding = properties.type === 'TrueType' ? + Encodings.WinAnsiEncoding : + Encodings.StandardEncoding; + // The Symbolic attribute can be misused for regular fonts + // Heuristic: we have to check if the font is a standard one also + if (!!(properties.flags & FontFlags.Symbolic)) { + encoding = !properties.file && /Symbol/i.test(properties.name) ? + Encodings.SymbolSetEncoding : Encodings.MacRomanEncoding; + } + properties.defaultEncoding = encoding; } properties.differences = differences; - properties.baseEncoding = baseEncoding; - properties.hasEncoding = hasEncoding; - properties.overridableEncoding = overridableEncoding; + properties.baseEncodingName = baseEncodingName; + properties.dict = dict; }, - readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref, - properties) { + readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) { var cmapObj = toUnicode; var charToUnicode = []; if (isName(cmapObj)) { - var isIdentityMap = cmapObj.name.substr(0, 9) == 'Identity-'; - if (!isIdentityMap) - error('ToUnicode file cmap translation not implemented'); + return CMapFactory.create(cmapObj).map; } else if (isStream(cmapObj)) { var cmap = CMapFactory.create(cmapObj).map; // Convert UTF-16BE @@ -927,7 +913,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }); return cmap; } - return charToUnicode; + return null; }, readCidToGidMap: function PartialEvaluator_readCidToGidMap(cidToGidStream) { // Extract the encoding from the CIDToGIDMap @@ -1006,7 +992,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (isName(baseFontName)) { var metrics = this.getBaseFontMetrics(baseFontName.name); - glyphsWidths = metrics.widths; + glyphsWidths = this.buildCharCodeToWidth(metrics.widths, + properties); defaultWidth = metrics.defaultWidth; } } @@ -1074,6 +1061,25 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }; }, + buildCharCodeToWidth: function PartialEvaluator_bulildCharCodeToWidth( + widthsByGlyphName, properties) { + var widths = Object.create(null); + var differences = properties.differences; + var encoding = properties.defaultEncoding; + for (var charCode = 0; charCode < 256; charCode++) { + if (charCode in differences && + widthsByGlyphName[differences[charCode]]) { + widths[charCode] = widthsByGlyphName[differences[charCode]]; + continue; + } + if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) { + widths[charCode] = widthsByGlyphName[encoding[charCode]]; + continue; + } + } + return widths; + }, + translateFont: function PartialEvaluator_translateFont(dict, xref) { var baseDict = dict; @@ -1135,6 +1141,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { lastChar: maxCharIndex }; this.extractDataStructures(dict, dict, xref, properties); + properties.widths = this.buildCharCodeToWidth(metrics.widths, + properties); return new Font(baseFontName, null, properties); } @@ -1212,12 +1220,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var cidEncoding = baseDict.get('Encoding'); if (isName(cidEncoding)) { properties.cidEncoding = cidEncoding.name; - properties.vertical = /-V$/.test(cidEncoding.name); } - properties.cmap = CMapFactory.create(cidEncoding); + properties.cMap = CMapFactory.create(cidEncoding, PDFJS.cMapUrl, null); + properties.vertical = properties.cMap.vertical; } - this.extractWidths(dict, xref, descriptor, properties); this.extractDataStructures(dict, baseDict, xref, properties); + this.extractWidths(dict, xref, descriptor, properties); if (type.name === 'Type3') { properties.coded = true; diff --git a/src/core/fonts.js b/src/core/fonts.js index 1d92856fa..e6bf1f633 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -18,14 +18,15 @@ ExpertSubsetCharset, FileReaderSync, GlyphsUnicode, info, isArray, isNum, ISOAdobeCharset, Stream, stringToBytes, TextDecoder, warn, Lexer, Util, - FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString */ + FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString, + IdentityCMap, Name, CMapFactory, PDFJS */ 'use strict'; // Unicode Private Use Area -var CMAP_GLYPH_OFFSET = 0xE000; -var GLYPH_AREA_SIZE = 0x1900; -var SYMBOLIC_FONT_GLYPH_OFFSET = 0xF000; +var PRIVATE_USE_OFFSET_START = 0xE000; +var PRIVATE_USE_OFFSET_END = 0xF8FF; +var SKIP_PRIVATE_USE_RANGE_F000_TO_F01F = false; // PDF Glyph Space Units are one Thousandth of a TextSpace Unit // except for Type 3 fonts @@ -425,37 +426,6 @@ var symbolsFonts = { 'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true }; -var CMapConverterList = { - 'H': jis7ToUnicode, - 'V': jis7ToUnicode, - 'EUC-H': eucjpToUnicode, - 'EUC-V': eucjpToUnicode, - '83pv-RKSJ-H': sjis83pvToUnicode, - '90pv-RKSJ-H': sjis90pvToUnicode, - '90ms-RKSJ-H': sjisToUnicode, - '90ms-RKSJ-V': sjisToUnicode, - '90msp-RKSJ-H': sjisToUnicode, - '90msp-RKSJ-V': sjisToUnicode, - 'GBK-EUC-H': gbkToUnicode, - 'GBKp-EUC-H': gbkToUnicode, - 'B5pc-H': big5ToUnicode, - 'ETenms-B5-H': big5ToUnicode, - 'ETenms-B5-V': big5ToUnicode, -}; - -// CMaps using Hankaku (Halfwidth) Latin glyphs instead of proportional one. -// We need to distinguish them to get correct widths from CIDFont dicts. -var HalfwidthCMaps = { - 'H': true, - 'V': true, - 'EUC-H': true, - 'EUC-V': true, - '90ms-RKSJ-H': true, - '90ms-RKSJ-V': true, - 'UniJIS-UCS2-HW-H': true, - 'UniJIS-UCS2-HW-V': true -}; - // Glyph map for well-known standard fonts. Sometimes Ghostscript uses CID fonts // but does not embed the CID to GID mapping. The mapping is incomplete for all // glyphs, but common for some set of the standard fonts. @@ -526,75 +496,13 @@ var GlyphMapForStandardFonts = { '3316': 578, '3379': 42785, '3393': 1159, '3416': 8377 }; -var decodeBytes; -if (typeof TextDecoder !== 'undefined') { - // The encodings supported by TextDecoder can be found at: - // http://encoding.spec.whatwg.org/#concept-encoding-get - decodeBytes = function(bytes, encoding, fatal) { - return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes); - }; -} else if (typeof FileReaderSync !== 'undefined') { - decodeBytes = function(bytes, encoding) { - return new FileReaderSync().readAsText(new Blob([bytes]), encoding); - }; -} else { - // Clear the list so that decodeBytes will never be called. - CMapConverterList = {}; -} - -function jis7ToUnicode(str) { - var bytes = stringToBytes(str); - var length = bytes.length; - for (var i = 0; i < length; ++i) { - bytes[i] |= 0x80; - } - return decodeBytes(bytes, 'euc-jp'); -} - -function eucjpToUnicode(str) { - return decodeBytes(stringToBytes(str), 'euc-jp'); -} - -function sjisToUnicode(str) { - return decodeBytes(stringToBytes(str), 'shift_jis'); -} - -function sjis83pvToUnicode(str) { - var bytes = stringToBytes(str); - try { - // TODO: 83pv has incompatible mappings in ed40..ee9c range. - return decodeBytes(bytes, 'shift_jis', true); - } catch (e) { - warn('Unsupported 83pv character found'); - // Just retry without checking errors for now. - return decodeBytes(bytes, 'shift_jis'); - } -} - -function sjis90pvToUnicode(str) { - var bytes = stringToBytes(str); - try { - // TODO: 90pv has incompatible mappings in 8740..879c and eb41..ee9c. - return decodeBytes(bytes, 'shift_jis', true); - } catch (e) { - warn('Unsupported 90pv character found'); - // Just retry without checking errors for now. - return decodeBytes(bytes, 'shift_jis'); - } -} - -function gbkToUnicode(str) { - return decodeBytes(stringToBytes(str), 'gbk'); -} - -function big5ToUnicode(str) { - return decodeBytes(stringToBytes(str), 'big5'); -} - // Some characters, e.g. copyrightserif, mapped to the private use area and // might not be displayed using standard fonts. Mapping/hacking well-known chars // to the similar equivalents in the normal characters range. -function mapPrivateUseChars(code) { +function mapSpecialUnicodeValues(code) { + if (code >= 0xFFF0 && code <= 0xFFFF) { // Specials unicode block. + return 0; + } switch (code) { case 0xF8E9: // copyrightsans case 0xF6D9: // copyrightserif @@ -787,12 +695,6 @@ function isRTLRangeFor(value) { return false; } -function isSpecialUnicode(unicode) { - return (unicode <= 0x1F || (unicode >= 127 && unicode < GLYPH_AREA_SIZE)) || - (unicode >= CMAP_GLYPH_OFFSET && - unicode < CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE); -} - // The normalization table is obtained by filtering the Unicode characters // database with entries. var NormalizedUnicodes = { @@ -2251,19 +2153,25 @@ var Font = (function FontClosure() { this.defaultWidth = properties.defaultWidth; this.composite = properties.composite; this.wideChars = properties.wideChars; - this.hasEncoding = properties.hasEncoding; - this.cmap = properties.cmap; + this.cMap = properties.cMap; this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS; this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS; - this.fontMatrix = properties.fontMatrix; + + var unicode = this.buildToUnicode(properties); + this.toUnicode = properties.toUnicode = unicode.toUnicode; + this.isIdentityUnicode = properties.isIdentityUnicode = unicode.isIdentity; + + this.toFontChar = []; + if (properties.type == 'Type3') { - this.encoding = properties.baseEncoding; + for (var charCode = 0; charCode < 256; charCode++) { + this.toFontChar[charCode] = this.differences[charCode] || + properties.defaultEncoding[charCode]; + } return; } - // Trying to fix encoding using glyph CIDSystemInfo. - this.loadCidToUnicode(properties); this.cidEncoding = properties.cidEncoding; this.vertical = properties.vertical; if (this.vertical) { @@ -2271,14 +2179,8 @@ var Font = (function FontClosure() { this.defaultVMetrics = properties.defaultVMetrics; } - if (properties.toUnicode && properties.toUnicode.length > 0) - this.toUnicode = properties.toUnicode; - else - this.rebuildToUnicode(properties); - - this.toFontChar = this.buildToFontChar(this.toUnicode); - if (!file) { + this.missingFile = true; // The file data is not specified. Trying to fix the font name // to be used with the canvas.font. var fontName = name.replace(/[,_]/g, '-'); @@ -2295,9 +2197,6 @@ var Font = (function FontClosure() { // if at least one width is present, remeasure all chars when exists this.remeasure = Object.keys(this.widths).length > 0; - - this.encoding = properties.baseEncoding; - this.noUnicodeAdaptation = true; if (isStandardFont && type === 'CIDFontType2' && properties.cidEncoding.indexOf('Identity-') === 0) { // Standard fonts might be embedded as CID font without glyph mapping. @@ -2308,6 +2207,10 @@ var Font = (function FontClosure() { } this.toFontChar = map; this.toUnicode = map; + } else { + for (var charCode in this.toUnicode) { + this.toFontChar[charCode] = this.toUnicode[charCode].charCodeAt(0); + } } this.loadedName = fontName.split('-')[0]; this.loading = false; @@ -2368,11 +2271,6 @@ var Font = (function FontClosure() { this.loading = true; } - var numFonts = 0; - function getUniqueName() { - return 'pdfFont' + numFonts++; - } - function stringToArray(str) { var array = []; for (var i = 0, ii = str.length; i < ii; ++i) @@ -2483,26 +2381,95 @@ var Font = (function FontClosure() { file.virtualOffset += data.length; } - function getRanges(glyphs, deltas) { + /** + * Rebuilds the char code to glyph ID map by trying to replace the char codes + * with their unicode value. It also moves char codes that are in known + * problematic locations. + * @return {Object} Two properties: + * 'toFontChar' - maps original char codes(the value that will be read + * from commands such as show text) to the char codes that will be used in the + * font that we build + * 'charCodeToGlyphId' - maps the new font char codes to glyph ids + */ + function adjustMapping(charCodeToGlyphId, properties) { + var toUnicode = properties.toUnicode; + var isSymbolic = !!(properties.flags & FontFlags.Symbolic); + var isIdentityUnicode = properties.isIdentityUnicode; + var newMap = Object.create(null); + var toFontChar = []; + var usedCharCodes = []; + var usedFontCharCodes = []; + var nextAvailableFontCharCode = PRIVATE_USE_OFFSET_START; + for (var originalCharCode in charCodeToGlyphId) { + originalCharCode |= 0; + var glyphId = charCodeToGlyphId[originalCharCode]; + var fontCharCode = originalCharCode; + // First try to map the value to a unicode position if a non identity map + // was created. + if (!isIdentityUnicode && originalCharCode in toUnicode) { + var unicode = toUnicode[fontCharCode]; + // TODO: Try to map ligatures to the correct spot. + if (unicode.length === 1) { + fontCharCode = unicode.charCodeAt(0); + } + } + // Try to move control characters, special characters and already mapped + // characters to the private use area since they will not be drawn by + // canvas if left in their current position. Also, move characters if the + // font was symbolic and there is only an identity unicode map since the + // characters probably aren't in the correct position (fixes an issue + // with firefox and thuluthfont). + if ((fontCharCode in usedFontCharCodes || + fontCharCode <= 0x1f || // Control chars + fontCharCode === 0x7F || // Control char + fontCharCode === 0xAD || // Soft hyphen + (fontCharCode >= 0x80 && fontCharCode <= 0x9F) || // Control chars + (isSymbolic && isIdentityUnicode)) && + nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END) { // Room left. + // Loop to try and find a free spot in the private use area. + do { + fontCharCode = nextAvailableFontCharCode++; + + if (SKIP_PRIVATE_USE_RANGE_F000_TO_F01F && fontCharCode === 0xF000) { + fontCharCode = 0xF020; + nextAvailableFontCharCode = fontCharCode + 1; + } + + } while (fontCharCode in usedFontCharCodes && + nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END); + } + + newMap[fontCharCode] = glyphId; + toFontChar[originalCharCode] = fontCharCode; + usedFontCharCodes[fontCharCode] = true; + } + return { + toFontChar: toFontChar, + charCodeToGlyphId: newMap + }; + } + + function getRanges(glyphs) { // Array.sort() sorts by characters, not numerically, so convert to an // array of characters. var codes = []; - var length = glyphs.length; - for (var n = 0; n < length; ++n) - codes.push({ unicode: glyphs[n].unicode, code: n }); + for (var charCode in glyphs) { + codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode] }); + } codes.sort(function fontGetRangesSort(a, b) { - return a.unicode - b.unicode; + return a.fontCharCode - b.fontCharCode; }); // Split the sorted codes into ranges. var ranges = []; + var length = codes.length; for (var n = 0; n < length; ) { - var start = codes[n].unicode; - var codeIndices = [deltas ? deltas[codes[n].code] : codes[n].code + 1]; + var start = codes[n].fontCharCode; + var codeIndices = [codes[n].glyphId]; ++n; var end = start; - while (n < length && end + 1 == codes[n].unicode) { - codeIndices.push(deltas ? deltas[codes[n].code] : codes[n].code + 1); + while (n < length && end + 1 == codes[n].fontCharCode) { + codeIndices.push(codes[n].glyphId); ++end; ++n; if (end === 0xFFFF) { break; } @@ -2513,9 +2480,8 @@ var Font = (function FontClosure() { return ranges; } - function createCmapTable(glyphs, deltas) { - var ranges = getRanges(glyphs, deltas); - + function createCmapTable(glyphs) { + var ranges = getRanges(glyphs); var numTables = ranges[ranges.length - 1][1] > 0xFFFF ? 2 : 1; var cmap = '\x00\x00' + // version string16(numTables) + // numTables @@ -2677,8 +2643,8 @@ var Font = (function FontClosure() { var lastCharIndex = 0; if (charstrings) { - for (var i = 0; i < charstrings.length; ++i) { - var code = charstrings[i].unicode; + for (var code in charstrings) { + code |= 0; if (firstCharIndex > code || !firstCharIndex) firstCharIndex = code; if (lastCharIndex < code) @@ -2840,37 +2806,6 @@ var Font = (function FontClosure() { return nameTable; } - // Normalize the charcodes in the cmap table into unicode values - // that will work with the (3, 1) cmap table we will write out. - function cmapCharcodeToUnicode(charcode, symbolic, platformId, encodingId) { - var unicode; - if (symbolic) { - // These codes will be shifted into the range - // SYMBOLIC_FONT_GLYPH_OFFSET to (SYMBOLIC_FONT_GLYPH_OFFSET + 0xFF) - // so that they are not in the control character range that could - // be displayed as spaces by browsers. - if (platformId === 3 && encodingId === 0 || - platformId === 1 && encodingId === 0) { - unicode = SYMBOLIC_FONT_GLYPH_OFFSET | (charcode & 0xFF); - } - } else { - if (platformId === 3 && encodingId === 1) { - // A (3, 1) table is alredy unicode (Microsoft Unicode format) - unicode = charcode; - } else if (platformId === 1 && encodingId === 0) { - // TODO(mack): Should apply the changes to convert the - // MacRomanEncoding to Mac OS Roman encoding in 9.6.6.4 - // table 115 of the pdf spec - var glyphName = Encodings.MacRomanEncoding[charcode]; - if (glyphName) { - unicode = GlyphsUnicode[glyphName]; - } - } - } - return unicode; - } - - Font.prototype = { name: null, font: null, @@ -2934,34 +2869,11 @@ var Font = (function FontClosure() { }; } - function createGlyphNameMap(glyphs, ids, properties) { - var glyphNames = properties.glyphNames; - if (!glyphNames) { - properties.glyphNameMap = {}; - return; - } - var glyphsLength = glyphs.length; - var glyphNameMap = {}; - var encoding = []; - for (var i = 0; i < glyphsLength; ++i) { - var glyphName = glyphNames[ids[i]]; - if (!glyphName) - continue; - var unicode = glyphs[i].unicode; - glyphNameMap[glyphName] = unicode; - var code = glyphs[i].code; - encoding[code] = glyphName; - } - properties.glyphNameMap = glyphNameMap; - if (properties.overridableEncoding) - properties.baseEncoding = encoding; - } - /** * Read the appropriate subtable from the cmap according to 9.6.6.4 from * PDF spec */ - function readCmapTable(cmap, font, hasEncoding, isSymbolicFont) { + function readCmapTable(cmap, font, isSymbolicFont) { var start = (font.start ? font.start : 0) + cmap.offset; font.pos = start; @@ -2969,73 +2881,36 @@ var Font = (function FontClosure() { var numTables = int16(font.getBytes(2)); var potentialTable; - var foundPreferredTable; - // There's an order of preference in terms of which cmap subtable we - // want to use. So scan through them to find our preferred table. + var canBreak = false; + // There's an order of preference in terms of which cmap subtable to + // use: + // - non-symbolic fonts the preference is a 3,1 table then a 1,0 table + // - symbolic fonts the preference is a 3,0 table then a 1,0 table + // The following takes advantage of the fact that the tables are sorted + // to work. for (var i = 0; i < numTables; i++) { var platformId = int16(font.getBytes(2)); var encodingId = int16(font.getBytes(2)); var offset = int32(font.getBytes(4)); var useTable = false; - var canBreak = false; - // The following block implements the following from the spec: - // - // When the font has no Encoding entry, or the font descriptor’s - // Symbolic flag is set (in which case the Encoding entry - // is ignored), this shall occur: - // - If the font contains a (3, 0) subtable, the range of - // - Otherwise, the (1, 0) subtable will be used. - // Otherwise, if the font does have an encoding: - // - Use the (3, 1) cmap subtable - // - Otherwise, use the (1, 0) subtable if present - // - // The following diverges slightly from the above spec in order - // to handle the case that hasEncoding and isSymbolicFont are both - // true. In this, based on the ordering of the rules in the spec, - // my interpretation is that we should be acting as if the font is - // symbolic. - // - // However, in this case, the test pdf 'preistabelle.pdf' - // is interpreting this case as a non-symbolic font. In this case - // though, 'presitabelle.pdf' does contain a (3, 1) table and does - // not contain a (3, 0) table which indicates it is non-symbolic. - // - // Thus, I am using this heurisitic of looking at which table is - // found to truly determine whether or not the font is symbolic. - // That is, if the specific symbolic/non-symbolic font specific - // tables (3, 0) or (3, 1) is found, that information is used for - // deciding if the font is symbolic or not. - // - // TODO(mack): This section needs some more thought on whether the - // heuristic is good enough. For now, it passes all the regression - // tests. - if (isSymbolicFont && platformId === 3 && encodingId === 0) { + if (platformId == 1 && encodingId === 0) { + useTable = true; + // Continue the loop since there still may be a higher priority + // table. + } else if (!isSymbolicFont && platformId === 3 && encodingId === 1) { useTable = true; canBreak = true; - foundPreferredTable = true; - } else if (hasEncoding && platformId === 3 && encodingId === 1) { + } else if (isSymbolicFont && platformId === 3 && encodingId === 0) { useTable = true; canBreak = true; - foundPreferredTable = true; - // Update the isSymbolicFont based on this heuristic - isSymbolicFont = false; - } else if (platformId === 1 && encodingId === 0 && - !foundPreferredTable) { - useTable = true; - foundPreferredTable = true; - } else if (!potentialTable) { - // We will use an arbitrary table if we cannot find a preferred - // table - useTable = true; } if (useTable) { potentialTable = { platformId: platformId, encodingId: encodingId, - offset: offset, - isSymbolicFont: isSymbolicFont + offset: offset }; } if (canBreak) { @@ -3044,16 +2919,8 @@ var Font = (function FontClosure() { } if (!potentialTable) { - error('Could not find a cmap table'); - return; - } - - if (!foundPreferredTable) { - warn('Did not find a cmap of suitable format. Interpreting (' + - potentialTable.platformId + ', ' + potentialTable.encodingId + - ') as (3, 1) table'); - potentialTable.platformId = 3; - potentialTable.encodingId = 1; + warn('Could not find a preferred cmap table.'); + return []; } font.pos = start + potentialTable.offset; @@ -3072,7 +2939,7 @@ var Font = (function FontClosure() { continue; } mappings.push({ - charcode: j, + charCode: j, glyphId: index }); } @@ -3132,7 +2999,7 @@ var Font = (function FontClosure() { continue; } mappings.push({ - charcode: j, + charCode: j, glyphId: glyphId }); } @@ -3150,10 +3017,10 @@ var Font = (function FontClosure() { var ids = []; for (var j = 0; j < entryCount; j++) { var glyphId = int16(font.getBytes(2)); - var charcode = firstCode + j; + var charCode = firstCode + j; mappings.push({ - charcode: charcode, + charCode: charCode, glyphId: glyphId }); } @@ -3163,10 +3030,10 @@ var Font = (function FontClosure() { // removing duplicate entries mappings.sort(function (a, b) { - return a.charcode - b.charcode; + return a.charCode - b.charCode; }); for (var i = 1; i < mappings.length; i++) { - if (mappings[i - 1].charcode === mappings[i].charcode) { + if (mappings[i - 1].charCode === mappings[i].charCode) { mappings.splice(i, 1); i--; } @@ -3175,7 +3042,6 @@ var Font = (function FontClosure() { return { platformId: potentialTable.platformId, encodingId: potentialTable.encodingId, - isSymbolicFont: potentialTable.isSymbolicFont, mappings: mappings, hasShortCmap: hasShortCmap }; @@ -3901,229 +3767,121 @@ var Font = (function FontClosure() { } } - var glyphs, ids; + var charCodeToGlyphId = []; if (properties.type == 'CIDFontType2') { - // Replace the old CMAP table with a shiny new one - // Type2 composite fonts map characters directly to glyphs so the cmap - // table must be replaced. - // canvas fillText will reencode some characters even if the font has a - // glyph at that position - e.g. newline is converted to a space and - // U+00AD (soft hyphen) is not drawn. - // So, offset all the glyphs by 0xFF to avoid these cases and use - // the encoding to map incoming characters to the new glyph positions - if (!tables.cmap) { - tables.cmap = { - tag: 'cmap', - data: null - }; - } - var cidToGidMap = properties.cidToGidMap || []; - var gidToCidMap = [0]; - if (cidToGidMap.length > 0) { - for (var j = cidToGidMap.length - 1; j >= 0; j--) { - var gid = cidToGidMap[j]; - if (gid) - gidToCidMap[gid] = j; + var cMap = properties.cMap.map; + for (var charCode in cMap) { + charCode |= 0; + var cid = cMap[charCode]; + assert(cid.length === 1, 'Max size of CID is 65,535'); + cid = cid.charCodeAt(0); + var glyphId = -1; + if (cidToGidMap.length === 0) { + glyphId = charCode; + } else if (cid in cidToGidMap) { + glyphId = cidToGidMap[cid]; } - // filling the gaps using CID above the CIDs currently used in font - var nextCid = cidToGidMap.length; - for (var i = 1; i < numGlyphs; i++) { - if (!gidToCidMap[i]) - gidToCidMap[i] = nextCid++; - } - } else { - for (var i = 1; i < numGlyphs; i++) { - gidToCidMap[i] = i; - } - if (dupFirstEntry) { - gidToCidMap[numGlyphs - 1] = 0; + if (glyphId >= 0 && glyphId < numGlyphs) { + charCodeToGlyphId[charCode] = glyphId; } } - - glyphs = []; - ids = []; - - var usedUnicodes = []; - var unassignedUnicodeItems = []; - var toFontChar = this.cidToFontChar || this.toFontChar; - for (var i = 1; i < numGlyphs; i++) { - var cid = gidToCidMap[i]; - var unicode = toFontChar[cid]; - if (!unicode || typeof unicode !== 'number' || - isSpecialUnicode(unicode) || unicode in usedUnicodes) { - unassignedUnicodeItems.push(i); - continue; - } - usedUnicodes[unicode] = true; - glyphs.push({ unicode: unicode, code: cid }); - ids.push(i); - } - - // unassigned codepoints will never be used for non-Identity CMap - // because the input will be Unicode - if (!this.cidToFontChar) { - // trying to fit as many unassigned symbols as we can - // in the range allocated for the user defined symbols - var unusedUnicode = CMAP_GLYPH_OFFSET; - for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j++) { - var i = unassignedUnicodeItems[j]; - var cid = gidToCidMap[i]; - while (unusedUnicode in usedUnicodes) - unusedUnicode++; - if (unusedUnicode >= CMAP_GLYPH_OFFSET + GLYPH_AREA_SIZE) - break; - var unicode = unusedUnicode++; - this.toFontChar[cid] = unicode; - usedUnicodes[unicode] = true; - glyphs.push({ unicode: unicode, code: cid }); - ids.push(i); - } + if (dupFirstEntry) { + charCodeToGlyphId[0] = numGlyphs - 1; } } else { - this.useToFontChar = true; // Most of the following logic in this code branch is based on the // 9.6.6.4 of the PDF spec. - - // TODO(mack): - // We are using this.hasEncoding to mean that the encoding is either - // MacRomanEncoding or WinAnsiEncoding (following spec in 9.6.6.4), - // but this.hasEncoding is currently true for any encodings on the - // Encodings object (e.g. MacExpertEncoding). So should consider using - // better check for this. - var cmapTable = readCmapTable(tables.cmap, font, this.hasEncoding, - this.isSymbolicFont); - - // TODO(mack): If the (3, 0) cmap table used, then the font is - // symbolic. The range of charcodes in the cmap table should be - // one of the following: - // -> 0x0000 - 0x00FF - // -> 0xF000 - 0xF0FF - // -> 0xF100 - 0xF1FF - // -> 0xF200 - 0xF2FF - // If it is not, we should change not consider this a symbolic font - this.isSymbolicFont = cmapTable.isSymbolicFont; - + var cmapTable = readCmapTable(tables.cmap, font, this.isSymbolicFont); var cmapPlatformId = cmapTable.platformId; var cmapEncodingId = cmapTable.encodingId; var cmapMappings = cmapTable.mappings; var cmapMappingsLength = cmapMappings.length; - var glyphs = []; - var ids = []; - for (var i = 0; i < cmapMappingsLength; ++i) { - var cmapMapping = cmapMappings[i]; - var charcode = cmapMapping.charcode; - var unicode = cmapCharcodeToUnicode(charcode, this.isSymbolicFont, - cmapPlatformId, cmapEncodingId); + var hasEncoding = properties.differences.length || + !!properties.baseEncodingName; - if (!unicode) { - // TODO(mack): gotta check if skipping mappings where we cannot find - // a unicode is the correct behaviour - continue; + // The spec seems to imply that if the font is symbolic the encoding + // should be ignored, this doesn't appear to work for 'preistabelle.pdf' + // where the the font is symbolic and it has an encoding. + if (hasEncoding && + (cmapPlatformId === 3 && cmapEncodingId === 1 || + cmapPlatformId === 1 && cmapEncodingId === 0)) { + var baseEncoding = []; + if (properties.baseEncodingName === 'MacRomanEncoding' || + properties.baseEncodingName === 'WinAnsiEncoding') { + baseEncoding = Encodings[properties.baseEncodingName]; } - glyphs.push({ - code: charcode, - unicode: unicode - }); - ids.push(cmapMapping.glyphId); - } - - var hasShortCmap = cmapTable.hasShortCmap; - var toFontChar = this.toFontChar; - - if (hasShortCmap && ids.length == numGlyphs) { - // Fixes the short cmap tables -- some generators use incorrect - // glyph id. - for (var i = 0, ii = ids.length; i < ii; i++) { - ids[i] = i; - } - } - - // Rewrite the whole toFontChar dictionary with a new one using the - // information from the mappings in the cmap table. - var newToFontChar = []; - if (this.isSymbolicFont) { - for (var i = 0, ii = glyphs.length; i < ii; i++) { - var glyph = glyphs[i]; - // For (3, 0) cmap tables: - // The charcode key being stored in toFontChar is the lower byte - // of the two-byte charcodes of the cmap table since according to - // the spec: 'each byte from the string shall be prepended with the - // high byte of the range [of charcodes in the cmap table], to form - // a two-byte character, which shall be used to select the - // associated glyph description from the subtable'. - // - // For (1, 0) cmap tables: - // 'single bytes from the string shall be used to look up the - // associated glyph descriptions from the subtable'. This means - // charcodes in the cmap will be single bytes, so no-op since - // glyph.code & 0xFF === glyph.code - newToFontChar[glyph.code & 0xFF] = glyph.unicode; - } - } else { - - var encoding = properties.baseEncoding; - var differences = properties.differences; - - // TODO(mack): check if it is necessary to shift control characters - // for non-symbolic fonts so that browsers dont't render them using - // space characters - - var glyphCodeMapping = cmapTable.glyphCodeMapping; - for (var charcode = 0; charcode < encoding.length; ++charcode) { - if (!encoding.hasOwnProperty(charcode)) { + for (var charCode = 0; charCode < 256; charCode++) { + var glyphName; + if (this.differences && charCode in this.differences) { + glyphName = this.differences[charCode]; + } else if (charCode in baseEncoding && + baseEncoding[charCode] !== '') { + glyphName = baseEncoding[charCode]; + } else { + glyphName = Encodings.StandardEncoding[charCode]; + } + if (!glyphName) { continue; } - - // Since the cmap table that we will be writing out is a (3, 1) - // unicode table, in this section we will rewrites the charcodes - // in the pdf into unicodes - - var glyphName = encoding[charcode]; - // A nonsymbolic font should not have a Differences array, but - // if it does have one, we should still use it - if (charcode in differences) { - glyphName = differences[charcode]; + var unicodeOrCharCode; + if (cmapPlatformId === 3 && cmapEncodingId === 1) { + unicodeOrCharCode = GlyphsUnicode[glyphName]; + } else if (cmapPlatformId === 1 && cmapEncodingId === 0) { + // TODO: the encoding needs to be updated with mac os table. + unicodeOrCharCode = Encodings.MacRomanEncoding.indexOf(glyphName); } - // Finally, any undefined entries in the table shall be filled - // using StandardEncoding - if (!glyphName) { - glyphName = Encodings.StandardEncoding[charcode]; + var found = false; + for (var i = 0; i < cmapMappingsLength; ++i) { + if (cmapMappings[i].charCode === unicodeOrCharCode) { + charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; + found = true; + break; + } } - - // TODO(mack): Handle the case that the glyph name cannot be - // mapped as specified, in which case the glyph name shall be - // looked up in the font program's 'post' table (if one is - // present) and the associated glyph id shall be used. - // - // For now, we're just using the '.notdef' glyph name in this - // case. - glyphName = glyphName || '.notdef'; - - var unicode = GlyphsUnicode[glyphName]; - newToFontChar[charcode] = unicode; + if (!found && properties.glyphNames) { + // Try to map using the post table. There are currently no known + // pdfs that this fixes. + var glyphId = properties.glyphNames.indexOf(glyphName); + if (glyphId > 0) { + charCodeToGlyphId[charCode] = glyphId; + } + } + } + } else { + // For (3, 0) cmap tables: + // The charcode key being stored in charCodeToGlyphId is the lower + // byte of the two-byte charcodes of the cmap table since according to + // the spec: 'each byte from the string shall be prepended with the + // high byte of the range [of charcodes in the cmap table], to form + // a two-byte character, which shall be used to select the + // associated glyph description from the subtable'. + // + // For (1, 0) cmap tables: + // 'single bytes from the string shall be used to look up the + // associated glyph descriptions from the subtable'. This means + // charcodes in the cmap will be single bytes, so no-op since + // glyph.charCode & 0xFF === glyph.charCode + for (var i = 0; i < cmapMappingsLength; ++i) { + var charCode = cmapMappings[i].charCode & 0xFF; + charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; } } - this.toFontChar = toFontChar = newToFontChar; - - createGlyphNameMap(glyphs, ids, properties); - this.glyphNameMap = properties.glyphNameMap; } - if (glyphs.length === 0) { + if (charCodeToGlyphId.length === 0) { // defines at least one glyph - glyphs.push({ unicode: 0xF000, code: 0xF000, glyph: '.notdef' }); - ids.push(0); + charCodeToGlyphId[0] = 0; } // Converting glyphs and ids into font's cmap table - tables.cmap.data = createCmapTable(glyphs, ids); - var unicodeIsEnabled = []; - for (var i = 0, ii = glyphs.length; i < ii; i++) { - unicodeIsEnabled[glyphs[i].unicode] = true; - } - this.unicodeIsEnabled = unicodeIsEnabled; + var newMapping = adjustMapping(charCodeToGlyphId, properties); + this.toFontChar = newMapping.toFontChar; + tables.cmap = { + tag: 'cmap', + data: createCmapTable(newMapping.charCodeToGlyphId) + }; if (!tables['OS/2'] || !validateOS2Table(tables['OS/2'])) { // extract some more font properties from the OpenType head and @@ -4138,7 +3896,9 @@ var Font = (function FontClosure() { tables['OS/2'] = { tag: 'OS/2', - data: stringToArray(createOS2Table(properties, glyphs, override)) + data: stringToArray(createOS2Table(properties, + newMapping.charCodeToGlyphId, + override)) }; } @@ -4222,63 +3982,49 @@ var Font = (function FontClosure() { createOpenTypeHeader('\x4F\x54\x54\x4F', otf, 9); - var charstrings = font.charstrings; - properties.fixedPitch = isFixedPitch(charstrings); + properties.fixedPitch = false; //isFixedPitch(charstrings); - var glyphNameMap = {}; - for (var i = 0; i < charstrings.length; ++i) { - var charstring = charstrings[i]; - glyphNameMap[charstring.glyph] = charstring.unicode; - } - this.glyphNameMap = glyphNameMap; + var mapping = font.getGlyphMapping(properties); + var newMapping = adjustMapping(mapping, properties); + this.toFontChar = newMapping.toFontChar; + var numGlyphs = font.numGlyphs; var seacs = font.seacs; - if (SEAC_ANALYSIS_ENABLED && seacs) { - var seacMap = []; + if (SEAC_ANALYSIS_ENABLED && seacs && seacs.length) { var matrix = properties.fontMatrix || FONT_IDENTITY_MATRIX; - for (var i = 0; i < charstrings.length; ++i) { - var charstring = charstrings[i]; - var seac = seacs[charstring.gid]; - if (!seac) { - continue; - } + var charset = font.getCharset(); + var charCodeToGlyphId = mapping; + var toFontChar = newMapping.toFontChar; + var seacs = font.seacs; + var seacMap = Object.create(null); + var glyphIdToCharCode = Object.create(null); + for (var charCode in charCodeToGlyphId) { + glyphIdToCharCode[charCodeToGlyphId[charCode]] = charCode | 0; + } + for (var glyphId in seacs) { + glyphId |= 0; + var seac = seacs[glyphId]; var baseGlyphName = Encodings.StandardEncoding[seac[2]]; - var baseUnicode = glyphNameMap[baseGlyphName]; var accentGlyphName = Encodings.StandardEncoding[seac[3]]; - var accentUnicode = glyphNameMap[accentGlyphName]; - if (!baseUnicode || !accentUnicode) { + var baseGlyphId = charset.indexOf(baseGlyphName); + var accentGlyphId = charset.indexOf(accentGlyphName); + if (baseGlyphId < 0 || accentGlyphId < 0) { continue; } var accentOffset = { x: seac[0] * matrix[0] + seac[1] * matrix[2] + matrix[4], y: seac[0] * matrix[1] + seac[1] * matrix[3] + matrix[5] }; - seacMap[charstring.unicode] = { - baseUnicode: baseUnicode, - accentUnicode: accentUnicode, + var charCode = glyphIdToCharCode[glyphId]; + seacMap[charCode] = { + baseFontCharCode: toFontChar[glyphIdToCharCode[baseGlyphId]], + accentFontCharCode: toFontChar[glyphIdToCharCode[accentGlyphId]], accentOffset: accentOffset }; } properties.seacMap = seacMap; } - if (properties.overridableEncoding && (properties.subtype == 'Type1C' || - properties.subtype == 'CIDFontType0C')) { - var encoding = []; - for (var i = 0; i < charstrings.length; ++i) { - var charstring = charstrings[i]; - encoding[charstring.code] = charstring.glyph; - } - properties.baseEncoding = encoding; - } - if (properties.subtype == 'CIDFontType0C') { - var toFontChar = []; - for (var i = 0; i < charstrings.length; ++i) { - var charstring = charstrings[i]; - toFontChar[charstring.code] = charstring.unicode; - } - this.toFontChar = toFontChar; - } var unitsPerEm = 1 / (properties.fontMatrix || FONT_IDENTITY_MATRIX)[0]; var fields = { @@ -4286,11 +4032,11 @@ var Font = (function FontClosure() { 'CFF ': font.data, // OS/2 and Windows Specific metrics - 'OS/2': stringToArray(createOS2Table(properties, charstrings)), + 'OS/2': stringToArray(createOS2Table(properties, + newMapping.charCodeToGlyphId)), // Character to glyphs mapping - 'cmap': createCmapTable(charstrings.slice(), - ('glyphIds' in font) ? font.glyphIds : null), + 'cmap': createCmapTable(newMapping.charCodeToGlyphId), // Font header 'head': (function fontFieldsHead() { @@ -4334,14 +4080,17 @@ var Font = (function FontClosure() { '\x00\x00' + // -reserved- '\x00\x00' + // -reserved- '\x00\x00' + // metricDataFormat - string16(charstrings.length + 1)); // Number of HMetrics + string16(numGlyphs + 1)); // Number of HMetrics })(), // Horizontal metrics 'hmtx': (function fontFieldsHmtx() { + var charstrings = font.charstrings; var hmtx = '\x00\x00\x00\x00'; // Fake .notdef - for (var i = 0, ii = charstrings.length; i < ii; i++) { - var charstring = charstrings[i]; + for (var i = 0, ii = numGlyphs; i < ii; i++) { + // TODO: For CFF fonts the width should technically match th x in + // the glyph, but it doesn't seem to matter. + var charstring = charstrings ? charstrings[i] : {}; var width = 'width' in charstring ? charstring.width : 0; hmtx += string16(width) + string16(0); } @@ -4352,7 +4101,7 @@ var Font = (function FontClosure() { 'maxp': (function fontFieldsMaxp() { return stringToArray( '\x00\x00\x50\x00' + // Version number - string16(charstrings.length + 1)); // Num of glyphs + string16(numGlyphs + 1)); // Num of glyphs })(), // Naming tables @@ -4373,124 +4122,103 @@ var Font = (function FontClosure() { return stringToArray(otf.file); }, - buildToFontChar: function Font_buildToFontChar(toUnicode) { - var result = []; - var unusedUnicode = CMAP_GLYPH_OFFSET; - for (var i = 0, ii = toUnicode.length; i < ii; i++) { - var unicode = toUnicode[i]; - var fontCharCode = typeof unicode === 'object' ? unusedUnicode++ : - unicode; - if (typeof unicode !== 'undefined') { - if (isString(fontCharCode) && fontCharCode.length === 1) { - fontCharCode = fontCharCode.charCodeAt(0); - } - result[i] = fontCharCode; - } - } - return result; - }, - - rebuildToUnicode: function Font_rebuildToUnicode(properties) { - var firstChar = properties.firstChar, lastChar = properties.lastChar; - var map = []; - var toUnicode = this.toUnicode || this.cidToUnicode; - if (toUnicode) { - var isIdentityMap = toUnicode.length === 0; - for (var i = firstChar, ii = lastChar; i <= ii; i++) { - // TODO missing map the character according font's CMap - map[i] = isIdentityMap ? String.fromCharCode(i) : toUnicode[i]; - } - } else { - for (var i = firstChar, ii = lastChar; i <= ii; i++) { - var glyph = properties.differences[i]; - if (!glyph) - glyph = properties.baseEncoding[i]; - if (!!glyph && (glyph in GlyphsUnicode)) - map[i] = String.fromCharCode(GlyphsUnicode[glyph]); - } - } - this.toUnicode = map; - }, - - loadCidToUnicode: function Font_loadCidToUnicode(properties) { - if (!properties.cidSystemInfo) - return; - - var cidToUnicodeMap = [], unicodeToCIDMap = []; - this.cidToUnicode = cidToUnicodeMap; - this.unicodeToCID = unicodeToCIDMap; - - var cidEncoding = properties.cidEncoding; + /** + * Builds a char code to unicode map based on section 9.10 of the spec. + * @param {Object} properties Font properties object. + * @return {Object} Has two properties: 'toUnicode' which maps char codes to + * unicode (string) values and 'isIdentity' which is true if an identity map + * is used. + */ + buildToUnicode: function Font_buildToUnicode(properties) { + var map = { + isIdentity: false, + toUnicode: null + }; + // Section 9.10.2 Mapping Character Codes to Unicode Values if (properties.toUnicode) { - if (cidEncoding && cidEncoding.indexOf('Identity-') !== 0) { - warn('Need to create a reverse mapping from \'ToUnicode\' CMap'); + map.toUnicode = properties.toUnicode; + return map; + } + // According to the spec if the font is a simple font we should only map + // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or + // the differences array only contains adobe standard or symbol set names, + // in pratice it seems better to always try to create a toUnicode + // map based of the default encoding. + if (!properties.composite /* is simple font */) { + var toUnicode = []; + var encoding = properties.defaultEncoding.slice(); + // Merge in the differences array. + var differences = properties.differences; + for (var charcode in differences) { + encoding[charcode] = differences[charcode]; } - return; // 'ToUnicode' CMap will be used - } - - var cidSystemInfo = properties.cidSystemInfo; - var cidToUnicode; - if (cidSystemInfo) { - cidToUnicode = CIDToUnicodeMaps[ - cidSystemInfo.registry + '-' + cidSystemInfo.ordering]; - } - - if (!cidToUnicode) - return; // identity encoding - - var overwrite = HalfwidthCMaps[cidEncoding]; - var cid = 1, i, j, k, ii; - for (i = 0, ii = cidToUnicode.length; i < ii; ++i) { - var unicode = cidToUnicode[i]; - if (isArray(unicode)) { - var length = unicode.length; - for (j = 0; j < length; j++) { - cidToUnicodeMap[cid] = k = unicode[j]; - if (!unicodeToCIDMap[k] || overwrite) { - unicodeToCIDMap[k] = cid; - } + for (var charcode in encoding) { + // a) Map the character code to a character name. + var glyphName = encoding[charcode]; + // b) Look up the character name in the Adobe Glyph List (see the + // Bibliography) to obtain the corresponding Unicode value. + if (glyphName === '' || !(glyphName in GlyphsUnicode)) { + continue; } - cid++; - } else if (typeof unicode === 'object') { - var fillLength = unicode.f; - if (fillLength) { - k = unicode.c; - for (j = 0; j < fillLength; ++j) { - cidToUnicodeMap[cid] = k; - if (!unicodeToCIDMap[k] || overwrite) { - unicodeToCIDMap[k] = cid; - } - cid++; - k++; - } - } else - cid += unicode.s; - } else if (unicode) { - cidToUnicodeMap[cid] = unicode; - if (!unicodeToCIDMap[unicode] || overwrite) { - unicodeToCIDMap[unicode] = cid; - } - cid++; - } else - cid++; - } - - if (!cidEncoding) { - return; - } - if (cidEncoding.indexOf('Identity-') !== 0) { - // input is already Unicode for non-Identity CMap encodings. - this.cidToUnicode = []; - // For CIDFontType2, however, we need cid-to-Unicode conversion - // to rebuild cmap. - if (properties.type == 'CIDFontType2') { - this.cidToFontChar = cidToUnicodeMap; + toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]); } - } else { - // We don't have to do reverse conversions if the string is - // already CID. - this.unicodeToCID = []; + map.toUnicode = toUnicode; + return map; } + // If the font is a composite font that uses one of the predefined CMaps + // listed in Table 118 (except Identity–H and Identity–V) or whose + // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or + // Adobe-Korea1 character collection: + if (properties.composite && ( + (properties.cMap.builtInCMap && + !(properties.cMap instanceof IdentityCMap)) || + (properties.cidSystemInfo.registry === 'Adobe' && + (properties.cidSystemInfo.ordering === 'GB1' || + properties.cidSystemInfo.ordering === 'CNS1' || + properties.cidSystemInfo.ordering === 'Japan1' || + properties.cidSystemInfo.ordering === 'Korea1')))) { + // Then: + // a) Map the character code to a character identifier (CID) according + // to the font’s CMap. + // b) Obtain the registry and ordering of the character collection used + // by the font’s CMap (for example, Adobe and Japan1) from its + // CIDSystemInfo dictionary. + var registry = properties.cidSystemInfo.registry; + var ordering = properties.cidSystemInfo.ordering; + // c) Construct a second CMap name by concatenating the registry and + // ordering obtained in step (b) in the format registry–ordering–UCS2 + // (for example, Adobe–Japan1–UCS2). + var ucs2CMapName = new Name(registry + '-' + ordering + '-UCS2'); + // d) Obtain the CMap with the name constructed in step (c) (available + // from the ASN Web site; see the Bibliography). + var ucs2CMap = CMapFactory.create(ucs2CMapName, PDFJS.cMapUrl, null); + var cMap = properties.cMap; + var toUnicode = []; + for (var charcode in cMap.map) { + var cid = cMap.map[charcode]; + assert(cid.length === 1, 'Max size of CID is 65,535'); + // e) Map the CID obtained in step (a) according to the CMap obtained + // in step (d), producing a Unicode value. + var ucs2 = ucs2CMap.map[cid.charCodeAt(0)]; + if (!ucs2) { + continue; + } + toUnicode[charcode] = String.fromCharCode((ucs2.charCodeAt(0) << 8) + + ucs2.charCodeAt(1)); + } + map.toUnicode = toUnicode; + return map; + } + + // The viewer's choice, just use an identity map. + var toUnicode = []; + var firstChar = properties.firstChar, lastChar = properties.lastChar; + for (var i = firstChar, ii = lastChar; i <= ii; i++) { + toUnicode[i] = String.fromCharCode(i); + } + map.isIdentity = true; + map.toUnicode = toUnicode; + return map; }, get spaceWidth() { @@ -4511,8 +4239,11 @@ var Font = (function FontClosure() { var glyphUnicode = GlyphsUnicode[glyphName]; // finding the charcode via unicodeToCID map var charcode = 0; - if (this.composite) - charcode = this.unicodeToCID[glyphUnicode]; + if (this.composite) { + if (glyphUnicode in this.cMap.map) { + charcode = this.cMap.lookup(glyphUnicode).charCodeAt(0); + } + } // ... via toUnicode map if (!charcode && 'toUnicode' in this) charcode = this.toUnicode.indexOf(glyphUnicode); @@ -4532,99 +4263,39 @@ var Font = (function FontClosure() { }, charToGlyph: function Font_charToGlyph(charcode) { - var fontCharCode, width, operatorList, disabled; + var fontCharCode, width, operatorList; - var width = this.widths[charcode]; - var vmetric = this.vmetrics && this.vmetrics[charcode]; - - switch (this.type) { - case 'CIDFontType0': - var cid = this.unicodeToCID[charcode] || charcode; - if (this.unicodeToCID.length > 0) { - width = this.widths[cid]; - vmetric = this.vmetrics && this.vmetrics[cid]; - } - if (this.noUnicodeAdaptation) { - fontCharCode = this.toFontChar[charcode] || charcode; - break; - } - // CIDFontType0 is not encoded in Unicode. - fontCharCode = this.toFontChar[cid] || cid; - break; - case 'CIDFontType2': - if (this.unicodeToCID.length > 0) { - var cid = this.unicodeToCID[charcode] || charcode; - width = this.widths[cid]; - vmetric = this.vmetrics && this.vmetrics[cid]; - fontCharCode = charcode; - break; - } - fontCharCode = this.toFontChar[charcode] || charcode; - break; - case 'MMType1': // XXX at the moment only "standard" fonts are supported - case 'Type1': - var glyphName = this.differences[charcode] || this.encoding[charcode]; - if (!isNum(width)) - width = this.widths[glyphName]; - if (this.noUnicodeAdaptation) { - fontCharCode = mapPrivateUseChars(GlyphsUnicode[glyphName] || - charcode); - break; - } - fontCharCode = this.glyphNameMap[glyphName] || - GlyphsUnicode[glyphName] || charcode; - break; - case 'Type3': - var glyphName = this.differences[charcode] || this.encoding[charcode]; - operatorList = this.charProcOperatorList[glyphName]; - fontCharCode = charcode; - break; - case 'TrueType': - if (this.useToFontChar) { - fontCharCode = this.toFontChar[charcode] || charcode; - break; - } - var glyphName = this.differences[charcode] || this.encoding[charcode]; - if (!glyphName) - glyphName = Encodings.StandardEncoding[charcode]; - if (!isNum(width)) - width = this.widths[glyphName]; - if (this.noUnicodeAdaptation) { - fontCharCode = GlyphsUnicode[glyphName] || charcode; - break; - } - if (!this.hasEncoding || this.isSymbolicFont) { - fontCharCode = this.useToFontChar ? this.toFontChar[charcode] : - charcode; - break; - } - - // MacRoman encoding address by re-encoding the cmap table - - fontCharCode = glyphName in this.glyphNameMap ? - this.glyphNameMap[glyphName] : GlyphsUnicode[glyphName]; - break; - default: - warn('Unsupported font type: ' + this.type); - break; + var widthCode = charcode; + if (this.cMap && charcode in this.cMap.map) { + widthCode = this.cMap.map[charcode].charCodeAt(0); } + var width = this.widths[widthCode]; + width = isNum(width) ? width : this.defaultWidth; + var vmetric = this.vmetrics && this.vmetrics[widthCode]; - var unicodeChars = !('toUnicode' in this) ? charcode : - this.toUnicode[charcode] || charcode; + var unicodeChars = this.toUnicode[charcode] || charcode; if (typeof unicodeChars === 'number') { unicodeChars = String.fromCharCode(unicodeChars); } - width = isNum(width) ? width : this.defaultWidth; - disabled = this.unicodeIsEnabled ? - !this.unicodeIsEnabled[fontCharCode] : false; + // First try the toFontChar map, if it's not there then try falling + // back to the char code. + fontCharCode = this.toFontChar[charcode] || charcode; + if (this.missingFile) { + fontCharCode = mapSpecialUnicodeValues(fontCharCode); + } + + if (this.type === 'Type3') { + // Font char code in this case is actually a glyph name. + operatorList = this.charProcOperatorList[fontCharCode]; + } var accent = null; - if (this.seacMap && this.seacMap[fontCharCode]) { - var seac = this.seacMap[fontCharCode]; - fontCharCode = seac.baseUnicode; + if (this.seacMap && this.seacMap[charcode]) { + var seac = this.seacMap[charcode]; + fontCharCode = seac.baseFontCharCode; accent = { - fontChar: String.fromCharCode(seac.accentUnicode), + fontChar: String.fromCharCode(seac.accentFontCharCode), offset: seac.accentOffset }; } @@ -4635,7 +4306,6 @@ var Font = (function FontClosure() { accent: accent, width: width, vmetric: vmetric, - disabled: disabled, operatorList: operatorList }; }, @@ -4658,23 +4328,12 @@ var Font = (function FontClosure() { glyphs = []; var charsCacheKey = chars; - var converter; - var cidEncoding = this.cidEncoding; - if (cidEncoding) { - converter = CMapConverterList[cidEncoding]; - if (converter) { - chars = converter(chars); - } else if (cidEncoding.indexOf('Uni') !== 0 && - cidEncoding.indexOf('Identity-') !== 0) { - warn('Unsupported CMap: ' + cidEncoding); - } - } - if (!converter && this.cmap) { + if (this.cMap) { var i = 0; // composite fonts have multi-byte strings convert the string from // single-byte to multi-byte while (i < chars.length) { - var c = this.cmap.readCharCode(chars, i); + var c = this.cMap.readCharCode(chars, i); var charcode = c[0]; var length = c[1]; i += length; @@ -4686,8 +4345,7 @@ var Font = (function FontClosure() { glyphs.push(null); } } - } - else { + } else { for (var i = 0, ii = chars.length; i < ii; ++i) { var charcode = chars.charCodeAt(i); var glyph = this.charToGlyph(charcode); @@ -4722,6 +4380,59 @@ var ErrorFont = (function ErrorFontClosure() { return ErrorFont; })(); +/** + * Shared logic for building a char code to glyph id mapping for Type1 and + * simple CFF fonts. See section 9.6.6.2 of the spec. + * @param {Object} properties Font properties object. + * @param {Object} builtInEncoding The encoding contained within the actual font + * data. + * @param {Array} Array of glyph names where the index is the glyph ID. + * @returns {Object} A char code to glyph ID map. + */ +function type1FontGlyphMapping(properties, builtInEncoding, glyphNames) { + var charCodeToGlyphId = Object.create(null); + if (properties.baseEncodingName) { + // If a valid base encoding name was used, the mapping is initialized with + // that. + var baseEncoding = Encodings[properties.baseEncodingName]; + for (var charCode = 0; charCode < baseEncoding.length; charCode++) { + var glyphId = glyphNames.indexOf(baseEncoding[charCode]); + if (glyphId >= 0) { + charCodeToGlyphId[charCode] = glyphId; + } + } + } else if (!!(properties.flags & FontFlags.Symbolic)) { + // For a symbolic font the encoding should be the fonts built-in + // encoding. + for (var charCode in builtInEncoding) { + charCodeToGlyphId[charCode] = builtInEncoding[charCode]; + } + } else { + // For non-symbolic fonts that don't have a base encoding the standard + // encoding should be used. + var baseEncoding = Encodings.StandardEncoding; + for (var charCode = 0; charCode < baseEncoding.length; charCode++) { + var glyphId = glyphNames.indexOf(baseEncoding[charCode]); + if (glyphId >= 0) { + charCodeToGlyphId[charCode] = glyphId; + } + } + } + + // Lastly, merge in the differences. + var differences = properties.differences; + if (differences) { + for (var charCode in differences) { + var glyphName = differences[charCode]; + var glyphId = glyphNames.indexOf(glyphName); + if (glyphId >= 0) { + charCodeToGlyphId[charCode] = glyphId; + } + } + } + return charCodeToGlyphId; +} + /* * CharStrings are encoded following the the CharString Encoding sequence * describe in Chapter 6 of the "Adobe Type1 Font Format" specification. @@ -5290,11 +5001,11 @@ var Type1Parser = (function Type1ParserClosure() { output = [14]; } program.charstrings.push({ - glyph: glyph, - data: output, - seac: charString.seac, + glyphName: glyph, + charstring: output, + width: charString.width, lsb: charString.lsb, - width: charString.width + seac: charString.seac }); } @@ -5343,10 +5054,7 @@ var Type1Parser = (function Type1ParserClosure() { this.getToken(); // read the in 'put' } } - if (properties.overridableEncoding && encoding) { - properties.baseEncoding = encoding; - break; - } + properties.builtInEncoding = encoding; break; case 'FontBBox': var fontBBox = this.readNumberArray(); @@ -5469,7 +5177,7 @@ var Type1Font = function Type1Font(name, file, properties) { for (var info in data.properties) properties[info] = data.properties[info]; - var charstrings = this.getOrderedCharStrings(data.charstrings, properties); + var charstrings = data.charstrings; var type2Charstrings = this.getType2Charstrings(charstrings); var subrs = this.getType2Subrs(data.subrs); @@ -5480,35 +5188,37 @@ var Type1Font = function Type1Font(name, file, properties) { }; Type1Font.prototype = { - getOrderedCharStrings: function Type1Font_getOrderedCharStrings(glyphs, - properties) { - var charstrings = []; - var usedUnicodes = []; - var i, length, glyphName; - var unusedUnicode = CMAP_GLYPH_OFFSET; - for (i = 0, length = glyphs.length; i < length; i++) { - var item = glyphs[i]; - var glyphName = item.glyph; - var unicode = glyphName in GlyphsUnicode ? - GlyphsUnicode[glyphName] : unusedUnicode++; - while (usedUnicodes[unicode]) { - unicode = unusedUnicode++; + get numGlyphs() { + return this.charstrings.length; + }, + + getCharset: function Type1Font_getCharset() { + var charset = ['.notdef']; + var charstrings = this.charstrings; + for (var glyphId = 0; glyphId < charstrings.length; glyphId++) { + charset.push(charstrings[glyphId].glyphName); + } + return charset; + }, + + getGlyphMapping: function Type1Font_getGlyphMapping(properties) { + var charstrings = this.charstrings; + var glyphNames = ['.notdef']; + for (var glyphId = 0; glyphId < charstrings.length; glyphId++) { + glyphNames.push(charstrings[glyphId].glyphName); + } + var encoding = properties.builtInEncoding; + if (encoding) { + var builtInEncoding = {}; + for (var charCode in encoding) { + var glyphId = glyphNames.indexOf(encoding[charCode]); + if (glyphId >= 0) { + builtInEncoding[charCode] = glyphId; + } } - usedUnicodes[unicode] = true; - charstrings.push({ - glyph: glyphName, - unicode: unicode, - gid: i, - charstring: item.data, - width: item.width, - lsb: item.lsb - }); } - charstrings.sort(function charstrings_sort(a, b) { - return a.unicode - b.unicode; - }); - return charstrings; + return type1FontGlyphMapping(properties, builtInEncoding, glyphNames); }, getSeacs: function Type1Font_getSeacs(charstrings) { @@ -5517,7 +5227,8 @@ Type1Font.prototype = { for (i = 0, ii = charstrings.length; i < ii; i++) { var charstring = charstrings[i]; if (charstring.seac) { - seacMap[i] = charstring.seac; + // Offset by 1 for .notdef + seacMap[i + 1] = charstring.seac; } } return seacMap; @@ -5589,11 +5300,11 @@ Type1Font.prototype = { var count = glyphs.length; var charsetArray = [0]; for (var i = 0; i < count; i++) { - var index = CFFStandardStrings.indexOf(charstrings[i].glyph); - // Some characters like asterikmath && circlecopyrt are - // missing from the original strings, for the moment let's - // map them to .notdef and see later if it cause any - // problems + var index = CFFStandardStrings.indexOf(charstrings[i].glyphName); + // TODO: Insert the string and correctly map it. Previously it was + // thought mapping names that aren't in the standard strings to .notdef + // was fine, however in issue818 when mapping them all to .notdef the + // adieresis glyph no longer worked. if (index == -1) index = 0; @@ -5660,7 +5371,7 @@ var CFFFont = (function CFFFontClosure() { var parser = new CFFParser(file, properties); this.cff = parser.parse(); var compiler = new CFFCompiler(this.cff); - this.readExtra(); + this.seacs = this.cff.seacs; try { this.data = compiler.compile(); } catch (e) { @@ -5672,111 +5383,38 @@ var CFFFont = (function CFFFontClosure() { } CFFFont.prototype = { - readExtra: function CFFFont_readExtra() { - // charstrings contains info about glyphs (one element per glyph - // containing mappings for {unicode, width}) - var charstrings = this.getCharStrings(); - - // create the mapping between charstring and glyph id - var glyphIds = []; - for (var i = 0, ii = charstrings.length; i < ii; i++) - glyphIds.push(charstrings[i].gid); - - this.charstrings = charstrings; - this.glyphIds = glyphIds; - this.seacs = this.cff.seacs; + get numGlyphs() { + return this.cff.charStrings.count; }, - getCharStrings: function CFFFont_getCharStrings() { + getCharset: function CFFFont_getCharset() { + return this.cff.charset.charset; + }, + getGlyphMapping: function CFFFont_getGlyphMapping() { var cff = this.cff; var charsets = cff.charset.charset; - var encoding = cff.encoding ? cff.encoding.encoding : null; - var charstrings = []; - var unicodeUsed = []; - var unassignedUnicodeItems = []; - var inverseEncoding = []; - var gidStart = 0; - if (charsets[0] === '.notdef') { - gidStart = 1; - } - // According to section 9.7.4.2 CIDFontType0C glyph selection should be - // handled differently. - if (this.properties.subtype === 'CIDFontType0C') { + var charCodeToGlyphId = Object.create(null); + + if (this.properties.composite) { if (this.cff.isCIDFont) { // If the font is actually a CID font then we should use the charset // to map CIDs to GIDs. - inverseEncoding = charsets; + for (var glyphId = 0; glyphId < charsets.length; glyphId++) { + var cidString = String.fromCharCode(charsets[glyphId]); + var charCode = this.properties.cMap.map.indexOf(cidString); + charCodeToGlyphId[charCode] = glyphId; + } } else { // If it is NOT actually a CID font then CIDs should be mapped // directly to GIDs. - inverseEncoding = []; - for (var i = 0, ii = cff.charStrings.count; i < ii; i++) { - inverseEncoding.push(i); - } - // Use the identity map for charsets as well. - charsets = inverseEncoding; - } - } else { - for (var charcode in encoding) { - var gid = encoding[charcode]; - if (gid in inverseEncoding) { - // Glyphs can be multiply-encoded if there was an encoding - // supplement. Convert to an array and append the charcode. - var previousCharcode = inverseEncoding[gid]; - if (!isArray(previousCharcode)) { - inverseEncoding[gid] = [previousCharcode]; - } - inverseEncoding[gid].push(charcode | 0); - } else { - inverseEncoding[gid] = charcode | 0; + for (var glyphId = 0; glyphId < cff.charStrings.count; glyphId++) { + charCodeToGlyphId[glyphId] = glyphId; } } + return charCodeToGlyphId; } - for (var i = gidStart, ii = charsets.length; i < ii; i++) { - var glyph = charsets[i]; - - var codes = inverseEncoding[i]; - if (!isArray(codes)) { - codes = [codes]; - } - - for (var j = 0; j < codes.length; j++) { - var code = codes[j]; - - if (!code || isSpecialUnicode(code)) { - unassignedUnicodeItems.push(i, code); - continue; - } - charstrings.push({ - unicode: code, - code: code, - gid: i, - glyph: glyph - }); - unicodeUsed[code] = true; - } - } - - var nextUnusedUnicode = CMAP_GLYPH_OFFSET; - for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j += 2) { - var i = unassignedUnicodeItems[j]; - // giving unicode value anyway - while (nextUnusedUnicode in unicodeUsed) - nextUnusedUnicode++; - var unicode = nextUnusedUnicode++; - charstrings.push({ - unicode: unicode, - code: unassignedUnicodeItems[j + 1] || 0, - gid: i, - glyph: charsets[i] - }); - } - - // sort the array by the unicode value (again) - charstrings.sort(function getCharStringsSort(a, b) { - return a.unicode - b.unicode; - }); - return charstrings; + var encoding = cff.encoding ? cff.encoding.encoding : null; + return type1FontGlyphMapping(this.properties, encoding, charsets); } }; @@ -7163,6 +6801,6 @@ var CFFCompiler = (function CFFCompilerClosure() { // https://github.com/mozilla/pdf.js/issues/1689 (function checkChromeWindows() { if (/Windows.*Chrome/.test(navigator.userAgent)) { - SYMBOLIC_FONT_GLYPH_OFFSET = 0xF100; + SKIP_PRIVATE_USE_RANGE_F000_TO_F01F = true; } })(); diff --git a/src/core/worker.js b/src/core/worker.js index b72d54529..307e18490 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -239,6 +239,8 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { PDFJS.disableFontFace = data.disableFontFace; PDFJS.disableCreateObjectURL = data.disableCreateObjectURL; PDFJS.verbosity = data.verbosity; + PDFJS.cMapUrl = data.cMapUrl === undefined ? + null : data.cMapUrl; getPdfManager(data).then(function () { pdfManager.onLoadedStream().then(function(stream) { diff --git a/src/display/api.js b/src/display/api.js index 74f0bec6a..ece581605 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -30,6 +30,13 @@ PDFJS.maxImageSize = PDFJS.maxImageSize === undefined ? -1 : PDFJS.maxImageSize; /** + * The url of where the predefined Adobe CMaps are located. Include trailing + * slash. + * @var {string} + */ +PDFJS.cMapUrl = PDFJS.cMapUrl === undefined ? null : PDFJS.cMapUrl; + +/* * By default fonts are converted to OpenType fonts and loaded via font face * rules. If disabled, the font will be rendered using a built in font renderer * that constructs the glyphs with primitive path commands. @@ -917,6 +924,7 @@ var WorkerTransport = (function WorkerTransportClosure() { source: source, disableRange: PDFJS.disableRange, maxImageSize: PDFJS.maxImageSize, + cMapUrl: PDFJS.cMapUrl, disableFontFace: PDFJS.disableFontFace, disableCreateObjectURL: PDFJS.disableCreateObjectURL, verbosity: PDFJS.verbosity diff --git a/src/display/canvas.js b/src/display/canvas.js index 7381aa691..e1348a42a 100644 --- a/src/display/canvas.js +++ b/src/display/canvas.js @@ -1383,38 +1383,37 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { var accent = glyph.accent; var scaledX, scaledY, scaledAccentX, scaledAccentY; - if (!glyph.disabled) { - if (vertical) { - scaledX = vx / fontSizeScale; - scaledY = (x + vy) / fontSizeScale; - } else { - scaledX = x / fontSizeScale; - scaledY = 0; - } - if (font.remeasure && width > 0 && this.isFontSubpixelAAEnabled) { - // some standard fonts may not have the exact width, trying to - // rescale per character - var measuredWidth = ctx.measureText(character).width * 1000 / - current.fontSize * current.fontSizeScale; - var characterScaleX = width / measuredWidth; - restoreNeeded = true; - ctx.save(); - ctx.scale(characterScaleX, 1); - scaledX /= characterScaleX; - if (accent) { - scaledAccentX /= characterScaleX; - } - } + if (vertical) { + scaledX = vx / fontSizeScale; + scaledY = (x + vy) / fontSizeScale; + } else { + scaledX = x / fontSizeScale; + scaledY = 0; + } - this.paintChar(character, scaledX, scaledY); + if (font.remeasure && width > 0 && this.isFontSubpixelAAEnabled) { + // some standard fonts may not have the exact width, trying to + // rescale per character + var measuredWidth = ctx.measureText(character).width * 1000 / + current.fontSize * current.fontSizeScale; + var characterScaleX = width / measuredWidth; + restoreNeeded = true; + ctx.save(); + ctx.scale(characterScaleX, 1); + scaledX /= characterScaleX; if (accent) { - scaledAccentX = scaledX + accent.offset.x / fontSizeScale; - scaledAccentY = scaledY - accent.offset.y / fontSizeScale; - this.paintChar(accent.fontChar, scaledAccentX, scaledAccentY); + scaledAccentX /= characterScaleX; } } + this.paintChar(character, scaledX, scaledY); + if (accent) { + scaledAccentX = scaledX + accent.offset.x / fontSizeScale; + scaledAccentY = scaledY - accent.offset.y / fontSizeScale; + this.paintChar(accent.fontChar, scaledAccentX, scaledAccentY); + } + x += charWidth; canvasWidth += charWidth; diff --git a/test/driver.js b/test/driver.js index 3d69dc4c2..464eb2aa3 100644 --- a/test/driver.js +++ b/test/driver.js @@ -28,6 +28,7 @@ // "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1." // PDFJS.disableWorker = true; PDFJS.enableStats = true; +PDFJS.cMapUrl = '../external/cmaps/'; var appPath, masterMode, browser, canvas, dummyCanvas, currentTaskIdx, manifest, stdout; diff --git a/test/font/font_fpgm_spec.js b/test/font/font_fpgm_spec.js index 088cfa060..c0408992a 100644 --- a/test/font/font_fpgm_spec.js +++ b/test/font/font_fpgm_spec.js @@ -9,7 +9,9 @@ describe('font_fpgm', function() { var font = new Font("font", new Stream(font2324), { loadedName: 'font', type: 'CIDFontType2', - differences: [] + differences: [], + defaultEncoding: [], + cMap: CMapFactory.create(new Name('Identity-H')) }); ttx(font.data, function(result) { output = result; }); runs(function() { diff --git a/test/font/font_os2_spec.js b/test/font/font_os2_spec.js index 320881bbc..70e87d0bd 100644 --- a/test/font/font_os2_spec.js +++ b/test/font/font_os2_spec.js @@ -11,7 +11,7 @@ describe('font_post', function() { loadedName: 'font', type: 'TrueType', differences: [], - baseEncoding: [] + defaultEncoding: [] }); ttx(font.data, function(result) { output = result; }); runs(function() { @@ -26,7 +26,9 @@ describe('font_post', function() { var font = new Font("font", new Stream(font1282), { loadedName: 'font', type: 'CIDFontType2', - differences: [] + differences: [], + defaultEncoding: [], + cMap: CMapFactory.create(new Name('Identity-H')) }); ttx(font.data, function(result) { output = result; }); runs(function() { diff --git a/test/font/font_post_spec.js b/test/font/font_post_spec.js index acd07b823..3de7209cb 100644 --- a/test/font/font_post_spec.js +++ b/test/font/font_post_spec.js @@ -12,7 +12,8 @@ describe('font_post', function() { loadedName: 'font', type: 'CIDFontType2', differences: [], - baseEncoding: [] + defaultEncoding: [], + cMap: CMapFactory.create(new Name('Identity-H')) }); ttx(font.data, function(result) { output = result; }); runs(function() { @@ -28,7 +29,7 @@ describe('font_post', function() { loadedName: 'font', type: 'TrueType', differences: [], - baseEncoding: [] + defaultEncoding: [] }); ttx(font.data, function(result) { output = result; }); runs(function() { @@ -44,7 +45,7 @@ describe('font_post', function() { loadedName: 'font', type: 'TrueType', differences: [], - baseEncoding: [] + defaultEncoding: [] }); ttx(font.data, function(result) { output = result; }); runs(function() { diff --git a/test/font/font_test.html b/test/font/font_test.html index 4fc43a783..12f1819cc 100644 --- a/test/font/font_test.html +++ b/test/font/font_test.html @@ -28,6 +28,7 @@ + diff --git a/test/test.py b/test/test.py index 51f1d616e..8ddbf13cc 100644 --- a/test/test.py +++ b/test/test.py @@ -220,6 +220,11 @@ class TestHandlerBase(BaseHTTPRequestHandler): self.sendIndex(url.path, url.query) return + pieces = path.split(os.sep); + if pieces[len(pieces) - 2] == 'cmaps': + self.sendFile(path, '.properties'); + return + if not (prefix == DOC_ROOT and os.path.isfile(path) and ext in MIMEs): diff --git a/test/unit/cmap_spec.js b/test/unit/cmap_spec.js index be0e41268..6d8350b48 100644 --- a/test/unit/cmap_spec.js +++ b/test/unit/cmap_spec.js @@ -1,6 +1,6 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ -/* globals expect, it, describe, StringStream, Lexer, CMapFactory */ +/* globals expect, it, describe, StringStream, Lexer, CMapFactory, Name */ 'use strict'; @@ -82,5 +82,22 @@ describe('cmap', function() { expect(c[0]).toEqual(0x8EA1A1A1); expect(c[1]).toEqual(4); }); + it('read usecmap', function() { + var str = '/Adobe-Japan1-1 usecmap\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream, null, '../../external/cmaps/'); + expect(cmap.useCMap).toBeDefined(); + }); + it('parses wmode', function() { + var str = '/WMode 1 def\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + expect(cmap.vertical).toEqual(true); + }); + it('loads built in cmap', function() { + var cmap = CMapFactory.create(new Name('Adobe-Japan1-1'), + '../../external/cmaps/', + null); + }); }); diff --git a/test/unit/font_spec.js b/test/unit/font_spec.js index 4efccb979..aa50cc532 100644 --- a/test/unit/font_spec.js +++ b/test/unit/font_spec.js @@ -386,7 +386,7 @@ describe('font', function() { var parser = new Type1Parser(stream); var props = { overridableEncoding: true }; var program = parser.extractFontHeader(props); - expect(props.baseEncoding[33]).toEqual('arrowright'); + expect(props.builtInEncoding[33]).toEqual('arrowright'); }); }); }); diff --git a/web/viewer.js b/web/viewer.js index a7b4d259b..a1dc43aad 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -62,6 +62,11 @@ PDFJS.imageResourcesPath = './images/'; //#if (FIREFOX || MOZCENTRAL || B2G || GENERIC || CHROME) //PDFJS.workerSrc = '../build/pdf.worker.js'; //#endif +//#if !PRODUCTION +PDFJS.cMapUrl = '../external/cmaps/'; +//#else +//PDFJS.cMapUrl = '../web/cmaps/'; +//#endif var mozL10n = document.mozL10n || document.webL10n;