mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 14:48:08 +02:00
Second pass CIDFont support - make Arial Unicode in OpenOffice PDF
- supports PDF fonts with CIDtoGIDMap and no cmap
This commit is contained in:
parent
ea2d651709
commit
7b8542c6a7
2 changed files with 125 additions and 43 deletions
108
fonts.js
108
fonts.js
|
@ -404,12 +404,21 @@ var Font = (function() {
|
|||
data = this.checkAndRepair(name, file, properties);
|
||||
break;
|
||||
|
||||
case 'Type0':
|
||||
//this is a Truetype font
|
||||
this.mimetype = 'font/opentype';
|
||||
|
||||
// Repair the TrueType file if it is can be damaged in the point of
|
||||
// view of the sanitizer
|
||||
data = this.checkAndRepair(name, file, properties);
|
||||
break;
|
||||
|
||||
default:
|
||||
warn('Font ' + properties.type + ' is not supported');
|
||||
break;
|
||||
}
|
||||
this.data = data;
|
||||
|
||||
this.type = properties.type; //use the type to test if the string is single or multi-byte
|
||||
this.id = Fonts.registerFont(name, data, properties);
|
||||
this.loadedName = 'pdfFont' + this.id;
|
||||
};
|
||||
|
@ -856,8 +865,26 @@ var Font = (function() {
|
|||
data: stringToArray(createOS2Table(properties))
|
||||
});
|
||||
|
||||
// Replace the old CMAP table with a shiny new one
|
||||
replaceCMapTable(cmap, font, properties);
|
||||
if (!cmap) {
|
||||
var glyphs = [];
|
||||
var charset = properties.charset;
|
||||
for (var i=1; i < charset.length; i++) {
|
||||
if (charset.indexOf(i) != -1) {
|
||||
glyphs.push({
|
||||
unicode: charset.indexOf(i)
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tables.push({
|
||||
tag: 'cmap',
|
||||
data: createCMapTable(glyphs)
|
||||
})
|
||||
} else {
|
||||
// Replace the old CMAP table with a shiny new one
|
||||
replaceCMapTable(cmap, font, properties);
|
||||
}
|
||||
|
||||
// Rewrite the 'post' table if needed
|
||||
if (!post) {
|
||||
|
@ -1110,44 +1137,63 @@ var Font = (function() {
|
|||
|
||||
charsToUnicode: function fonts_chars2Unicode(chars) {
|
||||
var charsCache = this.charsCache;
|
||||
|
||||
var str;
|
||||
|
||||
// if we translated this string before, just grab it from the cache
|
||||
if (charsCache) {
|
||||
var str = charsCache[chars];
|
||||
str = charsCache[chars];
|
||||
if (str)
|
||||
return str;
|
||||
}
|
||||
|
||||
// translate the string using the font's encoding
|
||||
var encoding = this.encoding;
|
||||
if (!encoding)
|
||||
return chars;
|
||||
|
||||
|
||||
// lazily create the translation cache
|
||||
if (!charsCache)
|
||||
charsCache = this.charsCache = Object.create(null);
|
||||
|
||||
str = '';
|
||||
for (var i = 0; i < chars.length; ++i) {
|
||||
var charcode = chars.charCodeAt(i);
|
||||
var unicode = encoding[charcode];
|
||||
if ('undefined' == typeof(unicode)) {
|
||||
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
|
||||
// at the moment, for unknown reasons.
|
||||
warn('Unencoded charcode '+ charcode);
|
||||
unicode = charcode;
|
||||
|
||||
if (this.type == "Type0") {
|
||||
//string needs to be converted from byte to multi-byte assume for now two-byte
|
||||
str = '';
|
||||
var multiByteStr = "";
|
||||
var length = chars.length;
|
||||
for (var i = 0; i < length; i++) {
|
||||
var byte1 = chars.charCodeAt(i++) & 0xFF;
|
||||
var byte2;
|
||||
if (i == length)
|
||||
byte2 = 0;
|
||||
else
|
||||
byte2 = chars.charCodeAt(i) & 0xFF;
|
||||
multiByteStr += String.fromCharCode((byte1<<8) | byte2);
|
||||
}
|
||||
|
||||
// Check if the glyph has already been converted
|
||||
if (!IsNum(unicode))
|
||||
unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
|
||||
|
||||
// Handle surrogate pairs
|
||||
if (unicode > 0xFFFF) {
|
||||
str += String.fromCharCode(unicode & 0xFFFF);
|
||||
unicode >>= 16;
|
||||
str = multiByteStr;
|
||||
}
|
||||
else {
|
||||
// translate the string using the font's encoding
|
||||
var encoding = this.encoding;
|
||||
if (!encoding)
|
||||
return chars;
|
||||
|
||||
str = '';
|
||||
for (var i = 0; i < chars.length; ++i) {
|
||||
var charcode = chars.charCodeAt(i);
|
||||
var unicode = encoding[charcode];
|
||||
if ('undefined' == typeof(unicode)) {
|
||||
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
|
||||
// at the moment, for unknown reasons.
|
||||
warn('Unencoded charcode '+ charcode);
|
||||
unicode = charcode;
|
||||
}
|
||||
|
||||
// Check if the glyph has already been converted
|
||||
if (!IsNum(unicode))
|
||||
unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
|
||||
|
||||
// Handle surrogate pairs
|
||||
if (unicode > 0xFFFF) {
|
||||
str += String.fromCharCode(unicode & 0xFFFF);
|
||||
unicode >>= 16;
|
||||
}
|
||||
str += String.fromCharCode(unicode);
|
||||
}
|
||||
str += String.fromCharCode(unicode);
|
||||
}
|
||||
|
||||
// Enter the translated string into the cache
|
||||
|
|
60
pdf.js
60
pdf.js
|
@ -64,6 +64,14 @@ function stringToBytes(str) {
|
|||
return bytes;
|
||||
}
|
||||
|
||||
function singleByteToMultiByteString (str) {
|
||||
var multiByteStr = "";
|
||||
var bytes = stringToBytes(e);
|
||||
for (var j = 0; j<bytes.length; j++) {
|
||||
multiByteStr += String.fromCharCode((bytes[j++]<<16) | bytes[j]);
|
||||
}
|
||||
return multiByteStr;
|
||||
}
|
||||
var Stream = (function() {
|
||||
function constructor(arrayBuffer, start, length, dict) {
|
||||
this.bytes = new Uint8Array(arrayBuffer);
|
||||
|
@ -3624,19 +3632,26 @@ var PartialEvaluator = (function() {
|
|||
},
|
||||
|
||||
translateFont: function(fontDict, xref, resources) {
|
||||
var fd = fontDict.get('FontDescriptor');
|
||||
if (!fd)
|
||||
var fd;
|
||||
var descendant = [];
|
||||
var subType = fontDict.get('Subtype');
|
||||
assertWellFormed(IsName(subType), 'invalid font Subtype');
|
||||
|
||||
//If font is a composite get the FontDescriptor from the descendant font
|
||||
if (subType.name == "Type0")
|
||||
{
|
||||
//If font is a composite get the FontDescriptor from the descendant
|
||||
var df = fontDict.get("DescendantFonts");
|
||||
if (!df)
|
||||
return null;
|
||||
var descendant = xref.fetch(df[0]);
|
||||
descendant = xref.fetch(df[0]);
|
||||
fd = descendant.get("FontDescriptor");
|
||||
if (!fd)
|
||||
return null;
|
||||
fontDict.set("FontDescriptor", fd);
|
||||
} else {
|
||||
fd = fontDict.get('FontDescriptor');
|
||||
}
|
||||
|
||||
if (!fd)
|
||||
return null;
|
||||
|
||||
var descriptor = xref.fetch(fd);
|
||||
|
||||
var fontName = descriptor.get('FontName');
|
||||
|
@ -3650,7 +3665,32 @@ var PartialEvaluator = (function() {
|
|||
|
||||
var encodingMap = {};
|
||||
var charset = [];
|
||||
if (fontDict.has('Encoding')) {
|
||||
if (subType.name == 'Type0') {
|
||||
//XXX CIDFont support - only identity CID Encoding for now
|
||||
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
|
||||
if (IsName(encoding)) {
|
||||
//Encoding is a predefined CMap
|
||||
if (encoding.name == 'Identity-H') {
|
||||
if (descendant.get('Subtype').name == 'CIDFontType2')
|
||||
{
|
||||
//Extract an encoding from the CIDToGIDMap
|
||||
var glyphsStream = xref.fetchIfRef(descendant.get('CIDToGIDMap'));
|
||||
var glyphsData = glyphsStream.getBytes(0);
|
||||
var i = 0;
|
||||
for (var j=0; j<glyphsData.length; j++) {
|
||||
var glyphID = (glyphsData[j++]*0x100)+glyphsData[j];
|
||||
//encodingMap[glyphID] = i++;
|
||||
charset.push(glyphID);
|
||||
}
|
||||
encoding[0] = 0;
|
||||
}
|
||||
} else {
|
||||
TODO ('Need to support predefined CMaps see PDF 32000-1:2008 9.7.5.2 Predefined CMaps')
|
||||
}
|
||||
} else {
|
||||
TODO ('Need to support encoding streams see PDF 32000-1:2008 9.7.5.3');
|
||||
}
|
||||
} else if (fontDict.has('Encoding')) {
|
||||
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
|
||||
if (IsDict(encoding)) {
|
||||
// Build a map of between codes and glyphs
|
||||
|
@ -3682,7 +3722,6 @@ var PartialEvaluator = (function() {
|
|||
}
|
||||
} else if (IsName(encoding)) {
|
||||
var encoding = Encodings[encoding.name];
|
||||
//XXX CIDFont support - get the CID Encoding especially support japan1 and identity
|
||||
if (!encoding)
|
||||
error('Unknown font encoding');
|
||||
|
||||
|
@ -3767,9 +3806,6 @@ var PartialEvaluator = (function() {
|
|||
}
|
||||
}
|
||||
|
||||
var subType = fontDict.get('Subtype');
|
||||
assertWellFormed(IsName(subType), 'invalid font Subtype');
|
||||
|
||||
var properties = {
|
||||
type: subType.name,
|
||||
encoding: encodingMap,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue