mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Convert uniXXXX
glyph names to proper ones when building the charCodeToGlyphId
map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
This patch adds a `getUnicodeForGlyph` helper function, which is used to recover Unicode values for non-standard glyph names. Some PDF generators, e.g. Scribus PDF, use improper `uniXXXX` glyph names which breaks the glyph mapping. We can avoid this by converting them to "standard" glyph names instead. Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1132849. Fixes 6893. Fixes 6894.
This commit is contained in:
parent
147598417c
commit
dfe9015a43
6 changed files with 96 additions and 10 deletions
|
@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
|
|||
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
|
||||
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
|
||||
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
|
||||
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
|
||||
|
||||
// Unicode Private Use Area
|
||||
var PRIVATE_USE_OFFSET_START = 0xE000;
|
||||
|
@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
|
|||
*/
|
||||
var Font = (function FontClosure() {
|
||||
function Font(name, file, properties) {
|
||||
var charCode, glyphName, fontChar;
|
||||
var charCode, glyphName, unicode, fontChar;
|
||||
|
||||
this.name = name;
|
||||
this.loadedName = properties.loadedName;
|
||||
|
@ -609,21 +610,25 @@ var Font = (function FontClosure() {
|
|||
this.toFontChar[charCode] = fontChar;
|
||||
}
|
||||
} else if (isStandardFont) {
|
||||
this.toFontChar = [];
|
||||
glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (charCode in properties.defaultEncoding) {
|
||||
glyphName = (properties.differences[charCode] ||
|
||||
properties.defaultEncoding[charCode]);
|
||||
this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
|
||||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
this.toFontChar[charCode] = unicode;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
|
||||
glyphsUnicodeMap = getGlyphsUnicode();
|
||||
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
|
||||
if (notCidFont) {
|
||||
if (!this.composite) {
|
||||
glyphName = (properties.differences[charCode] ||
|
||||
properties.defaultEncoding[charCode]);
|
||||
unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
|
||||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
unicodeCharCode = unicode;
|
||||
}
|
||||
}
|
||||
this.toFontChar[charCode] = unicodeCharCode;
|
||||
}.bind(this));
|
||||
|
@ -722,7 +727,7 @@ var Font = (function FontClosure() {
|
|||
function int16(b0, b1) {
|
||||
return (b0 << 8) + b1;
|
||||
}
|
||||
|
||||
|
||||
function signedInt16(b0, b1) {
|
||||
var value = (b0 << 8) + b1;
|
||||
return value & (1 << 15) ? value - 0x10000 : value;
|
||||
|
@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Some bad PDF generators, e.g. Scribus PDF, include glyph names
|
||||
// in a 'uniXXXX' format -- attempting to recover proper ones.
|
||||
function recoverGlyphName(name, glyphsUnicodeMap) {
|
||||
if (glyphsUnicodeMap[name] !== undefined) {
|
||||
return name;
|
||||
}
|
||||
// The glyph name is non-standard, trying to recover.
|
||||
var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
for (var key in glyphsUnicodeMap) {
|
||||
if (glyphsUnicodeMap[key] === unicode) {
|
||||
return key;
|
||||
}
|
||||
}
|
||||
}
|
||||
warn('Unable to recover a standard glyph name for: ' + name);
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
if (properties.type === 'CIDFontType2') {
|
||||
var cidToGidMap = properties.cidToGidMap || [];
|
||||
var isCidToGidMapEmpty = cidToGidMap.length === 0;
|
||||
|
@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
|
|||
}
|
||||
var glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (charCode = 0; charCode < 256; charCode++) {
|
||||
var glyphName;
|
||||
var glyphName, standardGlyphName;
|
||||
if (this.differences && charCode in this.differences) {
|
||||
glyphName = this.differences[charCode];
|
||||
} else if (charCode in baseEncoding &&
|
||||
|
@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
|
|||
if (!glyphName) {
|
||||
continue;
|
||||
}
|
||||
// Ensure that non-standard glyph names are resolved to valid ones.
|
||||
standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
|
||||
|
||||
var unicodeOrCharCode, isUnicode = false;
|
||||
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
|
||||
unicodeOrCharCode = glyphsUnicodeMap[glyphName];
|
||||
unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
|
||||
isUnicode = true;
|
||||
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
|
||||
// TODO: the encoding needs to be updated with mac os table.
|
||||
unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
|
||||
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
|
||||
}
|
||||
|
||||
var found = false;
|
||||
|
@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
|
|||
if (!found && properties.glyphNames) {
|
||||
// Try to map using the post table.
|
||||
var glyphId = properties.glyphNames.indexOf(glyphName);
|
||||
// The post table ought to use the same kind of glyph names as the
|
||||
// `differences` array, but check the standard ones as a fallback.
|
||||
if (glyphId === -1 && standardGlyphName !== glyphName) {
|
||||
glyphId = properties.glyphNames.indexOf(standardGlyphName);
|
||||
}
|
||||
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
|
||||
charCodeToGlyphId[charCode] = glyphId;
|
||||
found = true;
|
||||
|
@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
|
|||
code = +glyphName.substr(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
code = unicode;
|
||||
}
|
||||
}
|
||||
if (code) {
|
||||
// If |baseEncodingName| is one the predefined encodings,
|
||||
|
|
|
@ -65,6 +65,36 @@
|
|||
return code;
|
||||
}
|
||||
|
||||
function getUnicodeForGlyph(name, glyphsUnicodeMap) {
|
||||
var unicode = glyphsUnicodeMap[name];
|
||||
if (unicode !== undefined) {
|
||||
return unicode;
|
||||
}
|
||||
if (!name) {
|
||||
return -1;
|
||||
}
|
||||
// Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs.
|
||||
if (name[0] === 'u') {
|
||||
var nameLen = name.length, hexStr;
|
||||
|
||||
if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX'
|
||||
hexStr = name.substr(3);
|
||||
} else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}'
|
||||
hexStr = name.substr(1);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
// Check for upper-case hexadecimal characters, to avoid false positives.
|
||||
if (hexStr === hexStr.toUpperCase()) {
|
||||
unicode = parseInt(hexStr, 16);
|
||||
if (unicode >= 0) {
|
||||
return unicode;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
var UnicodeRanges = [
|
||||
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
|
||||
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
|
||||
|
@ -1612,4 +1642,5 @@
|
|||
exports.reverseIfRtl = reverseIfRtl;
|
||||
exports.getUnicodeRangeFor = getUnicodeRangeFor;
|
||||
exports.getNormalizedUnicodes = getNormalizedUnicodes;
|
||||
exports.getUnicodeForGlyph = getUnicodeForGlyph;
|
||||
}));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue