mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-23 00:28:06 +02:00
Merge remote-tracking branch 'mozilla/master' into textsearch
Conflicts: extensions/firefox/components/PdfStreamConverter.js
This commit is contained in:
commit
d370f69978
17 changed files with 351 additions and 107 deletions
|
@ -108,39 +108,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
|
||||
// Compatibility
|
||||
BX: 'beginCompat',
|
||||
EX: 'endCompat'
|
||||
EX: 'endCompat',
|
||||
|
||||
// (reserved partial commands for the lexer)
|
||||
BM: null,
|
||||
BD: null,
|
||||
'true': null,
|
||||
fa: null,
|
||||
fal: null,
|
||||
fals: null,
|
||||
'false': null,
|
||||
nu: null,
|
||||
nul: null,
|
||||
'null': null
|
||||
};
|
||||
|
||||
function splitCombinedOperations(operations) {
|
||||
// Two or more operations can be combined together, trying to find which
|
||||
// operations were concatenated.
|
||||
var result = [];
|
||||
var opIndex = 0;
|
||||
|
||||
if (!operations) {
|
||||
return null;
|
||||
}
|
||||
|
||||
while (opIndex < operations.length) {
|
||||
var currentOp = '';
|
||||
for (var op in OP_MAP) {
|
||||
if (op == operations.substr(opIndex, op.length) &&
|
||||
op.length > currentOp.length) {
|
||||
currentOp = op;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentOp.length > 0) {
|
||||
result.push(operations.substr(opIndex, currentOp.length));
|
||||
opIndex += currentOp.length;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
PartialEvaluator.prototype = {
|
||||
getOperatorList: function PartialEvaluator_getOperatorList(stream,
|
||||
resources,
|
||||
|
@ -285,39 +267,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
resources = resources || new Dict();
|
||||
var xobjs = resources.get('XObject') || new Dict();
|
||||
var patterns = resources.get('Pattern') || new Dict();
|
||||
var parser = new Parser(new Lexer(stream), false, xref);
|
||||
var parser = new Parser(new Lexer(stream, OP_MAP), false, xref);
|
||||
var res = resources;
|
||||
var hasNextObj = false, nextObjs;
|
||||
var args = [], obj;
|
||||
var TILING_PATTERN = 1, SHADING_PATTERN = 2;
|
||||
|
||||
while (true) {
|
||||
if (hasNextObj) {
|
||||
obj = nextObjs.pop();
|
||||
hasNextObj = (nextObjs.length > 0);
|
||||
} else {
|
||||
obj = parser.getObj();
|
||||
if (isEOF(obj))
|
||||
break;
|
||||
}
|
||||
obj = parser.getObj();
|
||||
if (isEOF(obj))
|
||||
break;
|
||||
|
||||
if (isCmd(obj)) {
|
||||
var cmd = obj.cmd;
|
||||
var fn = OP_MAP[cmd];
|
||||
if (!fn) {
|
||||
// invalid content command, trying to recover
|
||||
var cmds = splitCombinedOperations(cmd);
|
||||
if (cmds) {
|
||||
cmd = cmds[0];
|
||||
fn = OP_MAP[cmd];
|
||||
// feeding other command on the next iteration
|
||||
hasNextObj = true;
|
||||
nextObjs = [];
|
||||
for (var idx = 1; idx < cmds.length; idx++) {
|
||||
nextObjs.push(Cmd.get(cmds[idx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
assertWellFormed(fn, 'Unknown command "' + cmd + '"');
|
||||
// TODO figure out how to type-check vararg functions
|
||||
|
||||
|
@ -725,8 +687,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
}
|
||||
} else if (octet == 0x3E) {
|
||||
if (token.length) {
|
||||
// XXX guessing chars size by checking number size in the CMap
|
||||
if (token.length <= 2 && properties.composite)
|
||||
// Heuristic: guessing chars size by checking numbers sizes
|
||||
// in the CMap entries.
|
||||
if (token.length == 2 && properties.composite)
|
||||
properties.wideChars = false;
|
||||
|
||||
if (token.length <= 4) {
|
||||
|
|
25
src/fonts.js
25
src/fonts.js
|
@ -2706,9 +2706,9 @@ var Font = (function FontClosure() {
|
|||
this.isSymbolicFont = false;
|
||||
}
|
||||
|
||||
// heuristics: if removed more than 2 glyphs encoding WinAnsiEncoding
|
||||
// does not set properly
|
||||
if (glyphsRemoved > 2) {
|
||||
// heuristics: if removed more than 10 glyphs encoding WinAnsiEncoding
|
||||
// does not set properly (broken PDFs have about 100 removed glyphs)
|
||||
if (glyphsRemoved > 10) {
|
||||
warn('Switching TrueType encoding to MacRomanEncoding for ' +
|
||||
this.name + ' font');
|
||||
encoding = Encodings.MacRomanEncoding;
|
||||
|
@ -4208,7 +4208,7 @@ var CFFFont = (function CFFFontClosure() {
|
|||
this.properties = properties;
|
||||
|
||||
var parser = new CFFParser(file, properties);
|
||||
var cff = parser.parse();
|
||||
var cff = parser.parse(true);
|
||||
var compiler = new CFFCompiler(cff);
|
||||
this.readExtra(cff);
|
||||
try {
|
||||
|
@ -4299,7 +4299,7 @@ var CFFParser = (function CFFParserClosure() {
|
|||
this.properties = properties;
|
||||
}
|
||||
CFFParser.prototype = {
|
||||
parse: function CFFParser_parse() {
|
||||
parse: function CFFParser_parse(normalizeCIDData) {
|
||||
var properties = this.properties;
|
||||
var cff = new CFF();
|
||||
this.cff = cff;
|
||||
|
@ -4354,6 +4354,21 @@ var CFFParser = (function CFFParserClosure() {
|
|||
cff.charset = charset;
|
||||
cff.encoding = encoding;
|
||||
|
||||
if (!cff.isCIDFont || !normalizeCIDData)
|
||||
return cff;
|
||||
|
||||
// DirectWrite does not like CID fonts data. Trying to convert/flatten
|
||||
// the font data and remove CID properties.
|
||||
if (cff.fdArray.length !== 1)
|
||||
error('Unable to normalize CID font in CFF data');
|
||||
|
||||
var fontDict = cff.fdArray[0];
|
||||
fontDict.setByKey(17, topDict.getByName('CharStrings'));
|
||||
cff.topDict = fontDict;
|
||||
cff.isCIDFont = false;
|
||||
delete cff.fdArray;
|
||||
delete cff.fdSelect;
|
||||
|
||||
return cff;
|
||||
},
|
||||
parseHeader: function CFFParser_parseHeader() {
|
||||
|
|
|
@ -264,8 +264,16 @@ var Parser = (function ParserClosure() {
|
|||
})();
|
||||
|
||||
var Lexer = (function LexerClosure() {
|
||||
function Lexer(stream) {
|
||||
function Lexer(stream, knownCommands) {
|
||||
this.stream = stream;
|
||||
// The PDFs might have "glued" commands with other commands, operands or
|
||||
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
|
||||
// commands and their prefixes. The prefixes are built the following way:
|
||||
// if there a command that is a prefix of the other valid command or
|
||||
// literal (e.g. 'f' and 'false') the following prefixes must be included,
|
||||
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
|
||||
// other commands or literals as a prefix. The knowCommands is optional.
|
||||
this.knownCommands = knownCommands;
|
||||
}
|
||||
|
||||
Lexer.isSpace = function Lexer_isSpace(ch) {
|
||||
|
@ -529,12 +537,18 @@ var Lexer = (function LexerClosure() {
|
|||
|
||||
// command
|
||||
var str = ch;
|
||||
var knownCommands = this.knownCommands;
|
||||
var knownCommandFound = knownCommands && (str in knownCommands);
|
||||
while (!!(ch = stream.lookChar()) && !specialChars[ch.charCodeAt(0)]) {
|
||||
// stop if known command is found and next character does not make
|
||||
// the str a command
|
||||
if (knownCommandFound && !((str + ch) in knownCommands))
|
||||
break;
|
||||
stream.skip();
|
||||
if (str.length == 128)
|
||||
error('Command token too long: ' + str.length);
|
||||
|
||||
str += ch;
|
||||
knownCommandFound = knownCommands && (str in knownCommands);
|
||||
}
|
||||
if (str == 'true')
|
||||
return true;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue