1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-23 00:28:06 +02:00

Merge remote-tracking branch 'mozilla/master' into textsearch

Conflicts:
	extensions/firefox/components/PdfStreamConverter.js
This commit is contained in:
Yury Delendik 2012-05-24 16:04:09 -05:00
commit d370f69978
17 changed files with 351 additions and 107 deletions

View file

@ -108,39 +108,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// Compatibility
BX: 'beginCompat',
EX: 'endCompat'
EX: 'endCompat',
// (reserved partial commands for the lexer)
BM: null,
BD: null,
'true': null,
fa: null,
fal: null,
fals: null,
'false': null,
nu: null,
nul: null,
'null': null
};
function splitCombinedOperations(operations) {
// Two or more operations can be combined together, trying to find which
// operations were concatenated.
var result = [];
var opIndex = 0;
if (!operations) {
return null;
}
while (opIndex < operations.length) {
var currentOp = '';
for (var op in OP_MAP) {
if (op == operations.substr(opIndex, op.length) &&
op.length > currentOp.length) {
currentOp = op;
}
}
if (currentOp.length > 0) {
result.push(operations.substr(opIndex, currentOp.length));
opIndex += currentOp.length;
} else {
return null;
}
}
return result;
}
PartialEvaluator.prototype = {
getOperatorList: function PartialEvaluator_getOperatorList(stream,
resources,
@ -285,39 +267,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
resources = resources || new Dict();
var xobjs = resources.get('XObject') || new Dict();
var patterns = resources.get('Pattern') || new Dict();
var parser = new Parser(new Lexer(stream), false, xref);
var parser = new Parser(new Lexer(stream, OP_MAP), false, xref);
var res = resources;
var hasNextObj = false, nextObjs;
var args = [], obj;
var TILING_PATTERN = 1, SHADING_PATTERN = 2;
while (true) {
if (hasNextObj) {
obj = nextObjs.pop();
hasNextObj = (nextObjs.length > 0);
} else {
obj = parser.getObj();
if (isEOF(obj))
break;
}
obj = parser.getObj();
if (isEOF(obj))
break;
if (isCmd(obj)) {
var cmd = obj.cmd;
var fn = OP_MAP[cmd];
if (!fn) {
// invalid content command, trying to recover
var cmds = splitCombinedOperations(cmd);
if (cmds) {
cmd = cmds[0];
fn = OP_MAP[cmd];
// feeding other command on the next iteration
hasNextObj = true;
nextObjs = [];
for (var idx = 1; idx < cmds.length; idx++) {
nextObjs.push(Cmd.get(cmds[idx]));
}
}
}
assertWellFormed(fn, 'Unknown command "' + cmd + '"');
// TODO figure out how to type-check vararg functions
@ -725,8 +687,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
} else if (octet == 0x3E) {
if (token.length) {
// XXX guessing chars size by checking number size in the CMap
if (token.length <= 2 && properties.composite)
// Heuristic: guessing chars size by checking numbers sizes
// in the CMap entries.
if (token.length == 2 && properties.composite)
properties.wideChars = false;
if (token.length <= 4) {

View file

@ -2706,9 +2706,9 @@ var Font = (function FontClosure() {
this.isSymbolicFont = false;
}
// heuristics: if removed more than 2 glyphs encoding WinAnsiEncoding
// does not set properly
if (glyphsRemoved > 2) {
// heuristics: if removed more than 10 glyphs encoding WinAnsiEncoding
// does not set properly (broken PDFs have about 100 removed glyphs)
if (glyphsRemoved > 10) {
warn('Switching TrueType encoding to MacRomanEncoding for ' +
this.name + ' font');
encoding = Encodings.MacRomanEncoding;
@ -4208,7 +4208,7 @@ var CFFFont = (function CFFFontClosure() {
this.properties = properties;
var parser = new CFFParser(file, properties);
var cff = parser.parse();
var cff = parser.parse(true);
var compiler = new CFFCompiler(cff);
this.readExtra(cff);
try {
@ -4299,7 +4299,7 @@ var CFFParser = (function CFFParserClosure() {
this.properties = properties;
}
CFFParser.prototype = {
parse: function CFFParser_parse() {
parse: function CFFParser_parse(normalizeCIDData) {
var properties = this.properties;
var cff = new CFF();
this.cff = cff;
@ -4354,6 +4354,21 @@ var CFFParser = (function CFFParserClosure() {
cff.charset = charset;
cff.encoding = encoding;
if (!cff.isCIDFont || !normalizeCIDData)
return cff;
// DirectWrite does not like CID fonts data. Trying to convert/flatten
// the font data and remove CID properties.
if (cff.fdArray.length !== 1)
error('Unable to normalize CID font in CFF data');
var fontDict = cff.fdArray[0];
fontDict.setByKey(17, topDict.getByName('CharStrings'));
cff.topDict = fontDict;
cff.isCIDFont = false;
delete cff.fdArray;
delete cff.fdSelect;
return cff;
},
parseHeader: function CFFParser_parseHeader() {

View file

@ -264,8 +264,16 @@ var Parser = (function ParserClosure() {
})();
var Lexer = (function LexerClosure() {
function Lexer(stream) {
function Lexer(stream, knownCommands) {
this.stream = stream;
// The PDFs might have "glued" commands with other commands, operands or
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
// commands and their prefixes. The prefixes are built the following way:
// if there a command that is a prefix of the other valid command or
// literal (e.g. 'f' and 'false') the following prefixes must be included,
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
this.knownCommands = knownCommands;
}
Lexer.isSpace = function Lexer_isSpace(ch) {
@ -529,12 +537,18 @@ var Lexer = (function LexerClosure() {
// command
var str = ch;
var knownCommands = this.knownCommands;
var knownCommandFound = knownCommands && (str in knownCommands);
while (!!(ch = stream.lookChar()) && !specialChars[ch.charCodeAt(0)]) {
// stop if known command is found and next character does not make
// the str a command
if (knownCommandFound && !((str + ch) in knownCommands))
break;
stream.skip();
if (str.length == 128)
error('Command token too long: ' + str.length);
str += ch;
knownCommandFound = knownCommands && (str in knownCommands);
}
if (str == 'true')
return true;