1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Second pass CIDFont support - make Arial Unicode in OpenOffice PDF

- supports PDF fonts with CIDtoGIDMap and no cmap
This commit is contained in:
Adil Allawi 2011-07-11 17:41:47 +01:00
parent ea2d651709
commit 7b8542c6a7
2 changed files with 125 additions and 43 deletions

108
fonts.js
View file

@ -404,12 +404,21 @@ var Font = (function() {
data = this.checkAndRepair(name, file, properties);
break;
case 'Type0':
//this is a Truetype font
this.mimetype = 'font/opentype';
// Repair the TrueType file if it is can be damaged in the point of
// view of the sanitizer
data = this.checkAndRepair(name, file, properties);
break;
default:
warn('Font ' + properties.type + ' is not supported');
break;
}
this.data = data;
this.type = properties.type; //use the type to test if the string is single or multi-byte
this.id = Fonts.registerFont(name, data, properties);
this.loadedName = 'pdfFont' + this.id;
};
@ -856,8 +865,26 @@ var Font = (function() {
data: stringToArray(createOS2Table(properties))
});
// Replace the old CMAP table with a shiny new one
replaceCMapTable(cmap, font, properties);
if (!cmap) {
var glyphs = [];
var charset = properties.charset;
for (var i=1; i < charset.length; i++) {
if (charset.indexOf(i) != -1) {
glyphs.push({
unicode: charset.indexOf(i)
});
} else {
break;
}
}
tables.push({
tag: 'cmap',
data: createCMapTable(glyphs)
})
} else {
// Replace the old CMAP table with a shiny new one
replaceCMapTable(cmap, font, properties);
}
// Rewrite the 'post' table if needed
if (!post) {
@ -1110,44 +1137,63 @@ var Font = (function() {
charsToUnicode: function fonts_chars2Unicode(chars) {
var charsCache = this.charsCache;
var str;
// if we translated this string before, just grab it from the cache
if (charsCache) {
var str = charsCache[chars];
str = charsCache[chars];
if (str)
return str;
}
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
// lazily create the translation cache
if (!charsCache)
charsCache = this.charsCache = Object.create(null);
str = '';
for (var i = 0; i < chars.length; ++i) {
var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode];
if ('undefined' == typeof(unicode)) {
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
// at the moment, for unknown reasons.
warn('Unencoded charcode '+ charcode);
unicode = charcode;
if (this.type == "Type0") {
//string needs to be converted from byte to multi-byte assume for now two-byte
str = '';
var multiByteStr = "";
var length = chars.length;
for (var i = 0; i < length; i++) {
var byte1 = chars.charCodeAt(i++) & 0xFF;
var byte2;
if (i == length)
byte2 = 0;
else
byte2 = chars.charCodeAt(i) & 0xFF;
multiByteStr += String.fromCharCode((byte1<<8) | byte2);
}
// Check if the glyph has already been converted
if (!IsNum(unicode))
unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
// Handle surrogate pairs
if (unicode > 0xFFFF) {
str += String.fromCharCode(unicode & 0xFFFF);
unicode >>= 16;
str = multiByteStr;
}
else {
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
str = '';
for (var i = 0; i < chars.length; ++i) {
var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode];
if ('undefined' == typeof(unicode)) {
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
// at the moment, for unknown reasons.
warn('Unencoded charcode '+ charcode);
unicode = charcode;
}
// Check if the glyph has already been converted
if (!IsNum(unicode))
unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
// Handle surrogate pairs
if (unicode > 0xFFFF) {
str += String.fromCharCode(unicode & 0xFFFF);
unicode >>= 16;
}
str += String.fromCharCode(unicode);
}
str += String.fromCharCode(unicode);
}
// Enter the translated string into the cache

60
pdf.js
View file

@ -64,6 +64,14 @@ function stringToBytes(str) {
return bytes;
}
function singleByteToMultiByteString (str) {
var multiByteStr = "";
var bytes = stringToBytes(e);
for (var j = 0; j<bytes.length; j++) {
multiByteStr += String.fromCharCode((bytes[j++]<<16) | bytes[j]);
}
return multiByteStr;
}
var Stream = (function() {
function constructor(arrayBuffer, start, length, dict) {
this.bytes = new Uint8Array(arrayBuffer);
@ -3624,19 +3632,26 @@ var PartialEvaluator = (function() {
},
translateFont: function(fontDict, xref, resources) {
var fd = fontDict.get('FontDescriptor');
if (!fd)
var fd;
var descendant = [];
var subType = fontDict.get('Subtype');
assertWellFormed(IsName(subType), 'invalid font Subtype');
//If font is a composite get the FontDescriptor from the descendant font
if (subType.name == "Type0")
{
//If font is a composite get the FontDescriptor from the descendant
var df = fontDict.get("DescendantFonts");
if (!df)
return null;
var descendant = xref.fetch(df[0]);
descendant = xref.fetch(df[0]);
fd = descendant.get("FontDescriptor");
if (!fd)
return null;
fontDict.set("FontDescriptor", fd);
} else {
fd = fontDict.get('FontDescriptor');
}
if (!fd)
return null;
var descriptor = xref.fetch(fd);
var fontName = descriptor.get('FontName');
@ -3650,7 +3665,32 @@ var PartialEvaluator = (function() {
var encodingMap = {};
var charset = [];
if (fontDict.has('Encoding')) {
if (subType.name == 'Type0') {
//XXX CIDFont support - only identity CID Encoding for now
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsName(encoding)) {
//Encoding is a predefined CMap
if (encoding.name == 'Identity-H') {
if (descendant.get('Subtype').name == 'CIDFontType2')
{
//Extract an encoding from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(descendant.get('CIDToGIDMap'));
var glyphsData = glyphsStream.getBytes(0);
var i = 0;
for (var j=0; j<glyphsData.length; j++) {
var glyphID = (glyphsData[j++]*0x100)+glyphsData[j];
//encodingMap[glyphID] = i++;
charset.push(glyphID);
}
encoding[0] = 0;
}
} else {
TODO ('Need to support predefined CMaps see PDF 32000-1:2008 9.7.5.2 Predefined CMaps')
}
} else {
TODO ('Need to support encoding streams see PDF 32000-1:2008 9.7.5.3');
}
} else if (fontDict.has('Encoding')) {
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsDict(encoding)) {
// Build a map of between codes and glyphs
@ -3682,7 +3722,6 @@ var PartialEvaluator = (function() {
}
} else if (IsName(encoding)) {
var encoding = Encodings[encoding.name];
//XXX CIDFont support - get the CID Encoding especially support japan1 and identity
if (!encoding)
error('Unknown font encoding');
@ -3767,9 +3806,6 @@ var PartialEvaluator = (function() {
}
}
var subType = fontDict.get('Subtype');
assertWellFormed(IsName(subType), 'invalid font Subtype');
var properties = {
type: subType.name,
encoding: encodingMap,