mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 10:08:06 +02:00
Convert uniXXXX
glyph names to proper ones when building the charCodeToGlyphId
map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
This patch adds a `getUnicodeForGlyph` helper function, which is used to recover Unicode values for non-standard glyph names. Some PDF generators, e.g. Scribus PDF, use improper `uniXXXX` glyph names which breaks the glyph mapping. We can avoid this by converting them to "standard" glyph names instead. Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1132849. Fixes 6893. Fixes 6894.
This commit is contained in:
parent
147598417c
commit
dfe9015a43
6 changed files with 96 additions and 10 deletions
|
@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
|
|||
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
|
||||
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
|
||||
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
|
||||
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
|
||||
|
||||
// Unicode Private Use Area
|
||||
var PRIVATE_USE_OFFSET_START = 0xE000;
|
||||
|
@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
|
|||
*/
|
||||
var Font = (function FontClosure() {
|
||||
function Font(name, file, properties) {
|
||||
var charCode, glyphName, fontChar;
|
||||
var charCode, glyphName, unicode, fontChar;
|
||||
|
||||
this.name = name;
|
||||
this.loadedName = properties.loadedName;
|
||||
|
@ -609,21 +610,25 @@ var Font = (function FontClosure() {
|
|||
this.toFontChar[charCode] = fontChar;
|
||||
}
|
||||
} else if (isStandardFont) {
|
||||
this.toFontChar = [];
|
||||
glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (charCode in properties.defaultEncoding) {
|
||||
glyphName = (properties.differences[charCode] ||
|
||||
properties.defaultEncoding[charCode]);
|
||||
this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
|
||||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
this.toFontChar[charCode] = unicode;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
|
||||
glyphsUnicodeMap = getGlyphsUnicode();
|
||||
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
|
||||
if (notCidFont) {
|
||||
if (!this.composite) {
|
||||
glyphName = (properties.differences[charCode] ||
|
||||
properties.defaultEncoding[charCode]);
|
||||
unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
|
||||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
unicodeCharCode = unicode;
|
||||
}
|
||||
}
|
||||
this.toFontChar[charCode] = unicodeCharCode;
|
||||
}.bind(this));
|
||||
|
@ -722,7 +727,7 @@ var Font = (function FontClosure() {
|
|||
function int16(b0, b1) {
|
||||
return (b0 << 8) + b1;
|
||||
}
|
||||
|
||||
|
||||
function signedInt16(b0, b1) {
|
||||
var value = (b0 << 8) + b1;
|
||||
return value & (1 << 15) ? value - 0x10000 : value;
|
||||
|
@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Some bad PDF generators, e.g. Scribus PDF, include glyph names
|
||||
// in a 'uniXXXX' format -- attempting to recover proper ones.
|
||||
function recoverGlyphName(name, glyphsUnicodeMap) {
|
||||
if (glyphsUnicodeMap[name] !== undefined) {
|
||||
return name;
|
||||
}
|
||||
// The glyph name is non-standard, trying to recover.
|
||||
var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
for (var key in glyphsUnicodeMap) {
|
||||
if (glyphsUnicodeMap[key] === unicode) {
|
||||
return key;
|
||||
}
|
||||
}
|
||||
}
|
||||
warn('Unable to recover a standard glyph name for: ' + name);
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
if (properties.type === 'CIDFontType2') {
|
||||
var cidToGidMap = properties.cidToGidMap || [];
|
||||
var isCidToGidMapEmpty = cidToGidMap.length === 0;
|
||||
|
@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
|
|||
}
|
||||
var glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (charCode = 0; charCode < 256; charCode++) {
|
||||
var glyphName;
|
||||
var glyphName, standardGlyphName;
|
||||
if (this.differences && charCode in this.differences) {
|
||||
glyphName = this.differences[charCode];
|
||||
} else if (charCode in baseEncoding &&
|
||||
|
@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
|
|||
if (!glyphName) {
|
||||
continue;
|
||||
}
|
||||
// Ensure that non-standard glyph names are resolved to valid ones.
|
||||
standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
|
||||
|
||||
var unicodeOrCharCode, isUnicode = false;
|
||||
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
|
||||
unicodeOrCharCode = glyphsUnicodeMap[glyphName];
|
||||
unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
|
||||
isUnicode = true;
|
||||
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
|
||||
// TODO: the encoding needs to be updated with mac os table.
|
||||
unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
|
||||
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
|
||||
}
|
||||
|
||||
var found = false;
|
||||
|
@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
|
|||
if (!found && properties.glyphNames) {
|
||||
// Try to map using the post table.
|
||||
var glyphId = properties.glyphNames.indexOf(glyphName);
|
||||
// The post table ought to use the same kind of glyph names as the
|
||||
// `differences` array, but check the standard ones as a fallback.
|
||||
if (glyphId === -1 && standardGlyphName !== glyphName) {
|
||||
glyphId = properties.glyphNames.indexOf(standardGlyphName);
|
||||
}
|
||||
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
|
||||
charCodeToGlyphId[charCode] = glyphId;
|
||||
found = true;
|
||||
|
@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
|
|||
code = +glyphName.substr(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
code = unicode;
|
||||
}
|
||||
}
|
||||
if (code) {
|
||||
// If |baseEncodingName| is one the predefined encodings,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue