diff --git a/src/fonts.js b/src/fonts.js index dda0c0df3..928c63b0a 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -401,6 +401,48 @@ var symbolsFonts = { 'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true }; +var CMapConverterList = { + 'H': jis7ToUnicode, + 'V': jis7ToUnicode, + 'EUC-H': eucjpToUnicode, + 'EUC-V': eucjpToUnicode, + '90ms-RKSJ-H': sjisToUnicode, + '90ms-RKSJ-V': sjisToUnicode, + '90msp-RKSJ-H': sjisToUnicode, + '90msp-RKSJ-V': sjisToUnicode +}; + +var decodeBytes; +if (typeof TextDecoder !== 'undefined') { + decodeBytes = function(bytes, encoding) { + return new TextDecoder(encoding).decode(bytes); + }; +} else if (typeof FileReaderSync !== 'undefined') { + decodeBytes = function(bytes, encoding) { + return new FileReaderSync().readAsText(new Blob([bytes]), encoding); + }; +} else { + // Clear the list so that decodeBytes will never be called. + CMapConverterList = {}; +} + +function jis7ToUnicode(str) { + var bytes = stringToBytes(str); + var length = bytes.length; + for (var i = 0; i < length; ++i) { + bytes[i] |= 0x80; + } + return decodeBytes(bytes, 'euc-jp'); +} + +function eucjpToUnicode(str) { + return decodeBytes(stringToBytes(str), 'euc-jp'); +} + +function sjisToUnicode(str) { + return decodeBytes(stringToBytes(str), 'shift_jis'); +} + // Some characters, e.g. copyrightserif, mapped to the private use area and // might not be displayed using standard fonts. Mapping/hacking well-known chars // to the similar equivalents in the normal characters range. @@ -2282,6 +2324,7 @@ var Font = (function FontClosure() { // Trying to fix encoding using glyph CIDSystemInfo. this.loadCidToUnicode(properties); + this.cidEncoding = properties.cidEncoding; if (properties.toUnicode) this.toUnicode = properties.toUnicode; @@ -4128,8 +4171,8 @@ var Font = (function FontClosure() { } var cidEncoding = properties.cidEncoding; - if (cidEncoding && cidEncoding.indexOf('Uni') === 0) { - // input is already Unicode for Uni* CMap encodings. + if (cidEncoding && cidEncoding.indexOf('Identity-') !== 0) { + // input is already Unicode for non-Identity CMap encodings. // However, Unicode-to-CID conversion is needed // regardless of the CMap encoding. So we can't reset // unicodeToCID. @@ -4304,8 +4347,20 @@ var Font = (function FontClosure() { charsCache = this.charsCache = Object.create(null); glyphs = []; + var charsCacheKey = chars; - if (this.wideChars) { + var converter; + var cidEncoding = this.cidEncoding; + if (cidEncoding) { + converter = CMapConverterList[cidEncoding]; + if (converter) { + chars = converter(chars); + } else if (cidEncoding.indexOf('Uni') !== 0 && + cidEncoding.indexOf('Identity-') !== 0) { + warn('Unsupported CMap: ' + cidEncoding); + } + } + if (!converter && this.wideChars) { // composite fonts have multi-byte strings convert the string from // single-byte to multi-byte // XXX assuming CIDFonts are two-byte - later need to extract the @@ -4332,7 +4387,7 @@ var Font = (function FontClosure() { } // Enter the translated string into the cache - return (charsCache[chars] = glyphs); + return (charsCache[charsCacheKey] = glyphs); } }; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 050b533fe..8c908244f 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -39,4 +39,7 @@ !mixedfonts.pdf !shading_extend.pdf !noembed-identity.pdf +!noembed-jis7.pdf +!noembed-eucjp.pdf +!noembed-sjis.pdf !issue2099-1.pdf diff --git a/test/pdfs/noembed-eucjp.pdf b/test/pdfs/noembed-eucjp.pdf new file mode 100644 index 000000000..7702977d1 Binary files /dev/null and b/test/pdfs/noembed-eucjp.pdf differ diff --git a/test/pdfs/noembed-jis7.pdf b/test/pdfs/noembed-jis7.pdf new file mode 100644 index 000000000..fa2a95f2b Binary files /dev/null and b/test/pdfs/noembed-jis7.pdf differ diff --git a/test/pdfs/noembed-sjis.pdf b/test/pdfs/noembed-sjis.pdf new file mode 100644 index 000000000..f7dd3860a Binary files /dev/null and b/test/pdfs/noembed-sjis.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index e200d8067..4b15cefac 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -817,6 +817,24 @@ "rounds": 1, "type": "eq" }, + { "id": "noembed-jis7", + "file": "pdfs/noembed-jis7.pdf", + "md5": "a0f6cf5a830f23d0c35994a6aaf92b3d", + "rounds": 1, + "type": "eq" + }, + { "id": "noembed-eucjp", + "file": "pdfs/noembed-eucjp.pdf", + "md5": "d270f2d46db99b70235b4d37cbc313ad", + "rounds": 1, + "type": "eq" + }, + { "id": "noembed-sjis", + "file": "pdfs/noembed-sjis.pdf", + "md5": "51f9d150bf4afe498019b3029d451072", + "rounds": 1, + "type": "eq" + }, { "id": "issue2099-1", "file": "pdfs/issue2099-1.pdf", "md5": "c7eca682d70a976dfc4b7e64d3e9f1ce",