mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 01:58:06 +02:00
Refactors CMapFactory.create to make it async
This commit is contained in:
parent
c6d2b7f9d9
commit
f6d28ca323
8 changed files with 559 additions and 473 deletions
|
@ -24,7 +24,7 @@
|
|||
'pdfjs/core/fonts', 'pdfjs/core/function', 'pdfjs/core/pattern',
|
||||
'pdfjs/core/cmap', 'pdfjs/core/metrics', 'pdfjs/core/bidi',
|
||||
'pdfjs/core/encodings', 'pdfjs/core/standard_fonts',
|
||||
'pdfjs/core/unicode'], factory);
|
||||
'pdfjs/core/unicode', 'pdfjs/core/glyphlist'], factory);
|
||||
} else if (typeof exports !== 'undefined') {
|
||||
factory(exports, require('../shared/util.js'), require('./primitives.js'),
|
||||
require('./stream.js'), require('./parser.js'), require('./image.js'),
|
||||
|
@ -32,7 +32,7 @@
|
|||
require('./fonts.js'), require('./function.js'), require('./pattern.js'),
|
||||
require('./cmap.js'), require('./metrics.js'), require('./bidi.js'),
|
||||
require('./encodings.js'), require('./standard_fonts.js'),
|
||||
require('./unicode.js'));
|
||||
require('./unicode.js'), require('./glyphlist.js'));
|
||||
} else {
|
||||
factory((root.pdfjsCoreEvaluator = {}), root.pdfjsSharedUtil,
|
||||
root.pdfjsCorePrimitives, root.pdfjsCoreStream, root.pdfjsCoreParser,
|
||||
|
@ -40,12 +40,13 @@
|
|||
root.pdfjsCoreFonts, root.pdfjsCoreFunction, root.pdfjsCorePattern,
|
||||
root.pdfjsCoreCMap, root.pdfjsCoreMetrics, root.pdfjsCoreBidi,
|
||||
root.pdfjsCoreEncodings, root.pdfjsCoreStandardFonts,
|
||||
root.pdfjsCoreUnicode);
|
||||
root.pdfjsCoreUnicode, root.pdfjsCoreGlyphList);
|
||||
}
|
||||
}(this, function (exports, sharedUtil, corePrimitives, coreStream, coreParser,
|
||||
coreImage, coreColorSpace, coreMurmurHash3, coreFonts,
|
||||
coreFunction, corePattern, coreCMap, coreMetrics, coreBidi,
|
||||
coreEncodings, coreStandardFonts, coreUnicode) {
|
||||
coreEncodings, coreStandardFonts, coreUnicode,
|
||||
coreGlyphList) {
|
||||
|
||||
var FONT_IDENTITY_MATRIX = sharedUtil.FONT_IDENTITY_MATRIX;
|
||||
var IDENTITY_MATRIX = sharedUtil.IDENTITY_MATRIX;
|
||||
|
@ -103,6 +104,8 @@ var getSerifFonts = coreStandardFonts.getSerifFonts;
|
|||
var getSymbolsFonts = coreStandardFonts.getSymbolsFonts;
|
||||
var getNormalizedUnicodes = coreUnicode.getNormalizedUnicodes;
|
||||
var reverseIfRtl = coreUnicode.reverseIfRtl;
|
||||
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
|
||||
var getGlyphsUnicode = coreGlyphList.getGlyphsUnicode;
|
||||
|
||||
var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
function PartialEvaluator(pdfManager, xref, handler, pageIndex,
|
||||
|
@ -651,8 +654,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
// TODO move promises into translate font
|
||||
var translatedPromise;
|
||||
try {
|
||||
translatedPromise = Promise.resolve(
|
||||
this.translateFont(preEvaluatedFont, xref));
|
||||
translatedPromise = this.translateFont(preEvaluatedFont, xref);
|
||||
} catch (e) {
|
||||
translatedPromise = Promise.reject(e);
|
||||
}
|
||||
|
@ -1550,9 +1552,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
xref, properties) {
|
||||
// 9.10.2
|
||||
var toUnicode = (dict.get('ToUnicode') || baseDict.get('ToUnicode'));
|
||||
if (toUnicode) {
|
||||
properties.toUnicode = this.readToUnicode(toUnicode);
|
||||
}
|
||||
var toUnicodePromise = toUnicode ?
|
||||
this.readToUnicode(toUnicode) : Promise.resolve(undefined);
|
||||
|
||||
if (properties.composite) {
|
||||
// CIDSystemInfo helps to match CID to glyphs
|
||||
var cidSystemInfo = dict.get('CIDSystemInfo');
|
||||
|
@ -1637,44 +1639,189 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
properties.differences = differences;
|
||||
properties.baseEncodingName = baseEncodingName;
|
||||
properties.dict = dict;
|
||||
return toUnicodePromise.then(function(toUnicode) {
|
||||
properties.toUnicode = toUnicode;
|
||||
return this.buildToUnicode(properties);
|
||||
}.bind(this)).then(function (toUnicode) {
|
||||
properties.toUnicode = toUnicode;
|
||||
return properties;
|
||||
});
|
||||
},
|
||||
|
||||
/**
|
||||
* Builds a char code to unicode map based on section 9.10 of the spec.
|
||||
* @param {Object} properties Font properties object.
|
||||
* @return {Promise} A Promise resolving to ToUnicodeMap object.
|
||||
*/
|
||||
buildToUnicode: function partialEvaluator_buildToUnicode(properties) {
|
||||
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
||||
if (properties.toUnicode && properties.toUnicode.length !== 0) {
|
||||
return Promise.resolve(properties.toUnicode);
|
||||
}
|
||||
// According to the spec if the font is a simple font we should only map
|
||||
// to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
|
||||
// the differences array only contains adobe standard or symbol set names,
|
||||
// in pratice it seems better to always try to create a toUnicode
|
||||
// map based of the default encoding.
|
||||
var toUnicode, charcode;
|
||||
if (!properties.composite /* is simple font */) {
|
||||
toUnicode = [];
|
||||
var encoding = properties.defaultEncoding.slice();
|
||||
var baseEncodingName = properties.baseEncodingName;
|
||||
// Merge in the differences array.
|
||||
var differences = properties.differences;
|
||||
for (charcode in differences) {
|
||||
encoding[charcode] = differences[charcode];
|
||||
}
|
||||
var glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (charcode in encoding) {
|
||||
// a) Map the character code to a character name.
|
||||
var glyphName = encoding[charcode];
|
||||
// b) Look up the character name in the Adobe Glyph List (see the
|
||||
// Bibliography) to obtain the corresponding Unicode value.
|
||||
if (glyphName === '') {
|
||||
continue;
|
||||
} else if (glyphsUnicodeMap[glyphName] === undefined) {
|
||||
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
||||
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
||||
var code = 0;
|
||||
switch (glyphName[0]) {
|
||||
case 'G': // Gxx glyph
|
||||
if (glyphName.length === 3) {
|
||||
code = parseInt(glyphName.substr(1), 16);
|
||||
}
|
||||
break;
|
||||
case 'g': // g00xx glyph
|
||||
if (glyphName.length === 5) {
|
||||
code = parseInt(glyphName.substr(1), 16);
|
||||
}
|
||||
break;
|
||||
case 'C': // Cddd glyph
|
||||
case 'c': // cddd glyph
|
||||
if (glyphName.length >= 3) {
|
||||
code = +glyphName.substr(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
code = unicode;
|
||||
}
|
||||
}
|
||||
if (code) {
|
||||
// If |baseEncodingName| is one the predefined encodings,
|
||||
// and |code| equals |charcode|, using the glyph defined in the
|
||||
// baseEncoding seems to yield a better |toUnicode| mapping
|
||||
// (fixes issue 5070).
|
||||
if (baseEncodingName && code === +charcode) {
|
||||
var baseEncoding = getEncoding(baseEncodingName);
|
||||
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
||||
toUnicode[charcode] =
|
||||
String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
toUnicode[charcode] = String.fromCharCode(code);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
toUnicode[charcode] =
|
||||
String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
||||
}
|
||||
return Promise.resolve(new ToUnicodeMap(toUnicode));
|
||||
}
|
||||
// If the font is a composite font that uses one of the predefined CMaps
|
||||
// listed in Table 118 (except Identity–H and Identity–V) or whose
|
||||
// descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
|
||||
// Adobe-Korea1 character collection:
|
||||
if (properties.composite && (
|
||||
(properties.cMap.builtInCMap &&
|
||||
!(properties.cMap instanceof IdentityCMap)) ||
|
||||
(properties.cidSystemInfo.registry === 'Adobe' &&
|
||||
(properties.cidSystemInfo.ordering === 'GB1' ||
|
||||
properties.cidSystemInfo.ordering === 'CNS1' ||
|
||||
properties.cidSystemInfo.ordering === 'Japan1' ||
|
||||
properties.cidSystemInfo.ordering === 'Korea1')))) {
|
||||
// Then:
|
||||
// a) Map the character code to a character identifier (CID) according
|
||||
// to the font’s CMap.
|
||||
// b) Obtain the registry and ordering of the character collection used
|
||||
// by the font’s CMap (for example, Adobe and Japan1) from its
|
||||
// CIDSystemInfo dictionary.
|
||||
var registry = properties.cidSystemInfo.registry;
|
||||
var ordering = properties.cidSystemInfo.ordering;
|
||||
// c) Construct a second CMap name by concatenating the registry and
|
||||
// ordering obtained in step (b) in the format registry–ordering–UCS2
|
||||
// (for example, Adobe–Japan1–UCS2).
|
||||
var ucs2CMapName = new Name(registry + '-' + ordering + '-UCS2');
|
||||
// d) Obtain the CMap with the name constructed in step (c) (available
|
||||
// from the ASN Web site; see the Bibliography).
|
||||
return CMapFactory.create(ucs2CMapName,
|
||||
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then(
|
||||
function (ucs2CMap) {
|
||||
var cMap = properties.cMap;
|
||||
toUnicode = [];
|
||||
cMap.forEach(function(charcode, cid) {
|
||||
assert(cid <= 0xffff, 'Max size of CID is 65,535');
|
||||
// e) Map the CID obtained in step (a) according to the CMap
|
||||
// obtained in step (d), producing a Unicode value.
|
||||
var ucs2 = ucs2CMap.lookup(cid);
|
||||
if (ucs2) {
|
||||
toUnicode[charcode] =
|
||||
String.fromCharCode((ucs2.charCodeAt(0) << 8) +
|
||||
ucs2.charCodeAt(1));
|
||||
}
|
||||
});
|
||||
return new ToUnicodeMap(toUnicode);
|
||||
});
|
||||
}
|
||||
|
||||
// The viewer's choice, just use an identity map.
|
||||
return Promise.resolve(new IdentityToUnicodeMap(properties.firstChar,
|
||||
properties.lastChar));
|
||||
},
|
||||
|
||||
readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) {
|
||||
var cmap, cmapObj = toUnicode;
|
||||
var cmapObj = toUnicode;
|
||||
if (isName(cmapObj)) {
|
||||
cmap = CMapFactory.create(cmapObj,
|
||||
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null);
|
||||
if (cmap instanceof IdentityCMap) {
|
||||
return new IdentityToUnicodeMap(0, 0xFFFF);
|
||||
}
|
||||
return new ToUnicodeMap(cmap.getMap());
|
||||
} else if (isStream(cmapObj)) {
|
||||
cmap = CMapFactory.create(cmapObj,
|
||||
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null);
|
||||
if (cmap instanceof IdentityCMap) {
|
||||
return new IdentityToUnicodeMap(0, 0xFFFF);
|
||||
}
|
||||
var map = new Array(cmap.length);
|
||||
// Convert UTF-16BE
|
||||
// NOTE: cmap can be a sparse array, so use forEach instead of for(;;)
|
||||
// to iterate over all keys.
|
||||
cmap.forEach(function(charCode, token) {
|
||||
var str = [];
|
||||
for (var k = 0; k < token.length; k += 2) {
|
||||
var w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
||||
if ((w1 & 0xF800) !== 0xD800) { // w1 < 0xD800 || w1 > 0xDFFF
|
||||
str.push(w1);
|
||||
continue;
|
||||
}
|
||||
k += 2;
|
||||
var w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
||||
str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000);
|
||||
return CMapFactory.create(cmapObj,
|
||||
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then(
|
||||
function (cmap) {
|
||||
if (cmap instanceof IdentityCMap) {
|
||||
return new IdentityToUnicodeMap(0, 0xFFFF);
|
||||
}
|
||||
map[charCode] = String.fromCharCode.apply(String, str);
|
||||
return new ToUnicodeMap(cmap.getMap());
|
||||
});
|
||||
} else if (isStream(cmapObj)) {
|
||||
return CMapFactory.create(cmapObj,
|
||||
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then(
|
||||
function (cmap) {
|
||||
if (cmap instanceof IdentityCMap) {
|
||||
return new IdentityToUnicodeMap(0, 0xFFFF);
|
||||
}
|
||||
var map = new Array(cmap.length);
|
||||
// Convert UTF-16BE
|
||||
// NOTE: cmap can be a sparse array, so use forEach instead of for(;;)
|
||||
// to iterate over all keys.
|
||||
cmap.forEach(function(charCode, token) {
|
||||
var str = [];
|
||||
for (var k = 0; k < token.length; k += 2) {
|
||||
var w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
||||
if ((w1 & 0xF800) !== 0xD800) { // w1 < 0xD800 || w1 > 0xDFFF
|
||||
str.push(w1);
|
||||
continue;
|
||||
}
|
||||
k += 2;
|
||||
var w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
||||
str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000);
|
||||
}
|
||||
map[charCode] = String.fromCharCode.apply(String, str);
|
||||
});
|
||||
return new ToUnicodeMap(map);
|
||||
});
|
||||
return new ToUnicodeMap(map);
|
||||
}
|
||||
return null;
|
||||
return Promise.resolve(null);
|
||||
},
|
||||
|
||||
readCidToGidMap: function PartialEvaluator_readCidToGidMap(cidToGidStream) {
|
||||
|
@ -1978,10 +2125,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
firstChar: 0,
|
||||
lastChar: maxCharIndex
|
||||
};
|
||||
this.extractDataStructures(dict, dict, xref, properties);
|
||||
properties.widths = this.buildCharCodeToWidth(metrics.widths,
|
||||
properties);
|
||||
return new Font(baseFontName, null, properties);
|
||||
return this.extractDataStructures(dict, dict, xref, properties).then(
|
||||
function (properties) {
|
||||
properties.widths = this.buildCharCodeToWidth(metrics.widths,
|
||||
properties);
|
||||
return new Font(baseFontName, null, properties);
|
||||
}.bind(this));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2058,23 +2207,33 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
coded: false
|
||||
};
|
||||
|
||||
var cMapPromise;
|
||||
if (composite) {
|
||||
var cidEncoding = baseDict.get('Encoding');
|
||||
if (isName(cidEncoding)) {
|
||||
properties.cidEncoding = cidEncoding.name;
|
||||
}
|
||||
properties.cMap = CMapFactory.create(cidEncoding,
|
||||
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null);
|
||||
properties.vertical = properties.cMap.vertical;
|
||||
}
|
||||
this.extractDataStructures(dict, baseDict, xref, properties);
|
||||
this.extractWidths(dict, xref, descriptor, properties);
|
||||
|
||||
if (type === 'Type3') {
|
||||
properties.isType3Font = true;
|
||||
cMapPromise = CMapFactory.create(cidEncoding,
|
||||
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then(
|
||||
function (cMap) {
|
||||
properties.cMap = cMap;
|
||||
properties.vertical = properties.cMap.vertical;
|
||||
});
|
||||
} else {
|
||||
cMapPromise = Promise.resolve(undefined);
|
||||
}
|
||||
|
||||
return new Font(fontName.name, fontFile, properties);
|
||||
return cMapPromise.then(function () {
|
||||
return this.extractDataStructures(dict, baseDict, xref, properties);
|
||||
}.bind(this)).then(function (properties) {
|
||||
this.extractWidths(dict, xref, descriptor, properties);
|
||||
|
||||
if (type === 'Type3') {
|
||||
properties.isType3Font = true;
|
||||
}
|
||||
|
||||
return new Font(fontName.name, fontFile, properties);
|
||||
}.bind(this));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue