mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-25 17:48:07 +02:00
Splitting files
This commit is contained in:
parent
b428552e87
commit
a7278b7fbc
25 changed files with 7581 additions and 7588 deletions
630
src/parser.js
Normal file
630
src/parser.js
Normal file
|
@ -0,0 +1,630 @@
|
|||
var EOF = {};
|
||||
|
||||
function isEOF(v) {
|
||||
return v == EOF;
|
||||
}
|
||||
|
||||
var Parser = (function parserParser() {
|
||||
function constructor(lexer, allowStreams, xref) {
|
||||
this.lexer = lexer;
|
||||
this.allowStreams = allowStreams;
|
||||
this.xref = xref;
|
||||
this.inlineImg = 0;
|
||||
this.refill();
|
||||
}
|
||||
|
||||
constructor.prototype = {
|
||||
refill: function parserRefill() {
|
||||
this.buf1 = this.lexer.getObj();
|
||||
this.buf2 = this.lexer.getObj();
|
||||
},
|
||||
shift: function parserShift() {
|
||||
if (isCmd(this.buf2, 'ID')) {
|
||||
this.buf1 = this.buf2;
|
||||
this.buf2 = null;
|
||||
// skip byte after ID
|
||||
this.lexer.skip();
|
||||
} else {
|
||||
this.buf1 = this.buf2;
|
||||
this.buf2 = this.lexer.getObj();
|
||||
}
|
||||
},
|
||||
getObj: function parserGetObj(cipherTransform) {
|
||||
if (isCmd(this.buf1, 'BI')) { // inline image
|
||||
this.shift();
|
||||
return this.makeInlineImage(cipherTransform);
|
||||
}
|
||||
if (isCmd(this.buf1, '[')) { // array
|
||||
this.shift();
|
||||
var array = [];
|
||||
while (!isCmd(this.buf1, ']') && !isEOF(this.buf1))
|
||||
array.push(this.getObj());
|
||||
if (isEOF(this.buf1))
|
||||
error('End of file inside array');
|
||||
this.shift();
|
||||
return array;
|
||||
}
|
||||
if (isCmd(this.buf1, '<<')) { // dictionary or stream
|
||||
this.shift();
|
||||
var dict = new Dict();
|
||||
while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) {
|
||||
if (!isName(this.buf1)) {
|
||||
error('Dictionary key must be a name object');
|
||||
} else {
|
||||
var key = this.buf1.name;
|
||||
this.shift();
|
||||
if (isEOF(this.buf1))
|
||||
break;
|
||||
dict.set(key, this.getObj(cipherTransform));
|
||||
}
|
||||
}
|
||||
if (isEOF(this.buf1))
|
||||
error('End of file inside dictionary');
|
||||
|
||||
// stream objects are not allowed inside content streams or
|
||||
// object streams
|
||||
if (isCmd(this.buf2, 'stream')) {
|
||||
return this.allowStreams ?
|
||||
this.makeStream(dict, cipherTransform) : dict;
|
||||
}
|
||||
this.shift();
|
||||
return dict;
|
||||
}
|
||||
if (isInt(this.buf1)) { // indirect reference or integer
|
||||
var num = this.buf1;
|
||||
this.shift();
|
||||
if (isInt(this.buf1) && isCmd(this.buf2, 'R')) {
|
||||
var ref = new Ref(num, this.buf1);
|
||||
this.shift();
|
||||
this.shift();
|
||||
return ref;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
if (isString(this.buf1)) { // string
|
||||
var str = this.buf1;
|
||||
this.shift();
|
||||
if (cipherTransform)
|
||||
str = cipherTransform.decryptString(str);
|
||||
return str;
|
||||
}
|
||||
|
||||
// simple object
|
||||
var obj = this.buf1;
|
||||
this.shift();
|
||||
return obj;
|
||||
},
|
||||
makeInlineImage: function parserMakeInlineImage(cipherTransform) {
|
||||
var lexer = this.lexer;
|
||||
var stream = lexer.stream;
|
||||
|
||||
// parse dictionary
|
||||
var dict = new Dict();
|
||||
while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
|
||||
if (!isName(this.buf1)) {
|
||||
error('Dictionary key must be a name object');
|
||||
} else {
|
||||
var key = this.buf1.name;
|
||||
this.shift();
|
||||
if (isEOF(this.buf1))
|
||||
break;
|
||||
dict.set(key, this.getObj(cipherTransform));
|
||||
}
|
||||
}
|
||||
|
||||
// parse image stream
|
||||
var startPos = stream.pos;
|
||||
|
||||
// searching for the /\sEI\s/
|
||||
var state = 0, ch;
|
||||
while (state != 4 && (ch = stream.getByte()) != null) {
|
||||
switch (ch) {
|
||||
case 0x20:
|
||||
case 0x0D:
|
||||
case 0x0A:
|
||||
state = state === 3 ? 4 : 1;
|
||||
break;
|
||||
case 0x45:
|
||||
state = state === 1 ? 2 : 0;
|
||||
break;
|
||||
case 0x49:
|
||||
state = state === 2 ? 3 : 0;
|
||||
break;
|
||||
default:
|
||||
state = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO improve the small images performance to remove the limit
|
||||
var inlineImgLimit = 500;
|
||||
if (++this.inlineImg >= inlineImgLimit) {
|
||||
if (this.inlineImg === inlineImgLimit)
|
||||
warn('Too many inline images');
|
||||
this.shift();
|
||||
return null;
|
||||
}
|
||||
|
||||
var length = (stream.pos - 4) - startPos;
|
||||
var imageStream = stream.makeSubStream(startPos, length, dict);
|
||||
if (cipherTransform)
|
||||
imageStream = cipherTransform.createStream(imageStream);
|
||||
imageStream = this.filter(imageStream, dict, length);
|
||||
imageStream.parameters = dict;
|
||||
|
||||
this.buf2 = new Cmd('EI');
|
||||
this.shift();
|
||||
|
||||
return imageStream;
|
||||
},
|
||||
makeStream: function parserMakeStream(dict, cipherTransform) {
|
||||
var lexer = this.lexer;
|
||||
var stream = lexer.stream;
|
||||
|
||||
// get stream start position
|
||||
lexer.skipToNextLine();
|
||||
var pos = stream.pos;
|
||||
|
||||
// get length
|
||||
var length = dict.get('Length');
|
||||
var xref = this.xref;
|
||||
if (xref)
|
||||
length = xref.fetchIfRef(length);
|
||||
if (!isInt(length)) {
|
||||
error('Bad ' + length + ' attribute in stream');
|
||||
length = 0;
|
||||
}
|
||||
|
||||
// skip over the stream data
|
||||
stream.pos = pos + length;
|
||||
this.shift(); // '>>'
|
||||
this.shift(); // 'stream'
|
||||
if (!isCmd(this.buf1, 'endstream'))
|
||||
error('Missing endstream');
|
||||
this.shift();
|
||||
|
||||
stream = stream.makeSubStream(pos, length, dict);
|
||||
if (cipherTransform)
|
||||
stream = cipherTransform.createStream(stream);
|
||||
stream = this.filter(stream, dict, length);
|
||||
stream.parameters = dict;
|
||||
return stream;
|
||||
},
|
||||
filter: function parserFilter(stream, dict, length) {
|
||||
var filter = dict.get('Filter', 'F');
|
||||
var params = dict.get('DecodeParms', 'DP');
|
||||
if (isName(filter))
|
||||
return this.makeFilter(stream, filter.name, length, params);
|
||||
if (isArray(filter)) {
|
||||
var filterArray = filter;
|
||||
var paramsArray = params;
|
||||
for (var i = 0, ii = filterArray.length; i < ii; ++i) {
|
||||
filter = filterArray[i];
|
||||
if (!isName(filter))
|
||||
error('Bad filter name: ' + filter);
|
||||
else {
|
||||
params = null;
|
||||
if (isArray(paramsArray) && (i in paramsArray))
|
||||
params = paramsArray[i];
|
||||
stream = this.makeFilter(stream, filter.name, length, params);
|
||||
// after the first stream the length variable is invalid
|
||||
length = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
return stream;
|
||||
},
|
||||
makeFilter: function parserMakeFilter(stream, name, length, params) {
|
||||
if (name == 'FlateDecode' || name == 'Fl') {
|
||||
if (params) {
|
||||
return new PredictorStream(new FlateStream(stream), params);
|
||||
}
|
||||
return new FlateStream(stream);
|
||||
} else if (name == 'LZWDecode' || name == 'LZW') {
|
||||
var earlyChange = 1;
|
||||
if (params) {
|
||||
if (params.has('EarlyChange'))
|
||||
earlyChange = params.get('EarlyChange');
|
||||
return new PredictorStream(
|
||||
new LZWStream(stream, earlyChange), params);
|
||||
}
|
||||
return new LZWStream(stream, earlyChange);
|
||||
} else if (name == 'DCTDecode' || name == 'DCT') {
|
||||
var bytes = stream.getBytes(length);
|
||||
return new JpegStream(bytes, stream.dict);
|
||||
} else if (name == 'ASCII85Decode' || name == 'A85') {
|
||||
return new Ascii85Stream(stream);
|
||||
} else if (name == 'ASCIIHexDecode' || name == 'AHx') {
|
||||
return new AsciiHexStream(stream);
|
||||
} else if (name == 'CCITTFaxDecode' || name == 'CCF') {
|
||||
return new CCITTFaxStream(stream, params);
|
||||
} else {
|
||||
error('filter "' + name + '" not supported yet');
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
};
|
||||
|
||||
return constructor;
|
||||
})();
|
||||
|
||||
var Lexer = (function lexer() {
|
||||
function constructor(stream) {
|
||||
this.stream = stream;
|
||||
}
|
||||
|
||||
constructor.isSpace = function lexerIsSpace(ch) {
|
||||
return ch == ' ' || ch == '\t' || ch == '\x0d' || ch == '\x0a';
|
||||
};
|
||||
|
||||
// A '1' in this array means the character is white space. A '1' or
|
||||
// '2' means the character ends a name or command.
|
||||
var specialChars = [
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
||||
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
|
||||
];
|
||||
|
||||
function toHexDigit(ch) {
|
||||
if (ch >= '0' && ch <= '9')
|
||||
return ch.charCodeAt(0) - 48;
|
||||
ch = ch.toUpperCase();
|
||||
if (ch >= 'A' && ch <= 'F')
|
||||
return ch.charCodeAt(0) - 55;
|
||||
return -1;
|
||||
}
|
||||
|
||||
constructor.prototype = {
|
||||
getNumber: function lexerGetNumber(ch) {
|
||||
var floating = false;
|
||||
var str = ch;
|
||||
var stream = this.stream;
|
||||
for (;;) {
|
||||
ch = stream.lookChar();
|
||||
if (ch == '.' && !floating) {
|
||||
str += ch;
|
||||
floating = true;
|
||||
} else if (ch == '-') {
|
||||
// ignore minus signs in the middle of numbers to match
|
||||
// Adobe's behavior
|
||||
warn('Badly formated number');
|
||||
} else if (ch >= '0' && ch <= '9') {
|
||||
str += ch;
|
||||
} else if (ch == 'e' || ch == 'E') {
|
||||
floating = true;
|
||||
} else {
|
||||
// the last character doesn't belong to us
|
||||
break;
|
||||
}
|
||||
stream.skip();
|
||||
}
|
||||
var value = parseFloat(str);
|
||||
if (isNaN(value))
|
||||
error('Invalid floating point number: ' + value);
|
||||
return value;
|
||||
},
|
||||
getString: function lexerGetString() {
|
||||
var numParen = 1;
|
||||
var done = false;
|
||||
var str = '';
|
||||
var stream = this.stream;
|
||||
var ch;
|
||||
do {
|
||||
ch = stream.getChar();
|
||||
switch (ch) {
|
||||
case undefined:
|
||||
warn('Unterminated string');
|
||||
done = true;
|
||||
break;
|
||||
case '(':
|
||||
++numParen;
|
||||
str += ch;
|
||||
break;
|
||||
case ')':
|
||||
if (--numParen == 0) {
|
||||
done = true;
|
||||
} else {
|
||||
str += ch;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
ch = stream.getChar();
|
||||
switch (ch) {
|
||||
case undefined:
|
||||
warn('Unterminated string');
|
||||
done = true;
|
||||
break;
|
||||
case 'n':
|
||||
str += '\n';
|
||||
break;
|
||||
case 'r':
|
||||
str += '\r';
|
||||
break;
|
||||
case 't':
|
||||
str += '\t';
|
||||
break;
|
||||
case 'b':
|
||||
str += '\b';
|
||||
break;
|
||||
case 'f':
|
||||
str += '\f';
|
||||
break;
|
||||
case '\\':
|
||||
case '(':
|
||||
case ')':
|
||||
str += ch;
|
||||
break;
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
var x = ch - '0';
|
||||
ch = stream.lookChar();
|
||||
if (ch >= '0' && ch <= '7') {
|
||||
stream.skip();
|
||||
x = (x << 3) + (ch - '0');
|
||||
ch = stream.lookChar();
|
||||
if (ch >= '0' && ch <= '7') {
|
||||
stream.skip();
|
||||
x = (x << 3) + (ch - '0');
|
||||
}
|
||||
}
|
||||
|
||||
str += String.fromCharCode(x);
|
||||
break;
|
||||
case '\r':
|
||||
ch = stream.lookChar();
|
||||
if (ch == '\n')
|
||||
stream.skip();
|
||||
break;
|
||||
case '\n':
|
||||
break;
|
||||
default:
|
||||
str += ch;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
str += ch;
|
||||
}
|
||||
} while (!done);
|
||||
return str;
|
||||
},
|
||||
getName: function lexerGetName(ch) {
|
||||
var str = '';
|
||||
var stream = this.stream;
|
||||
while (!!(ch = stream.lookChar()) && !specialChars[ch.charCodeAt(0)]) {
|
||||
stream.skip();
|
||||
if (ch == '#') {
|
||||
ch = stream.lookChar();
|
||||
var x = toHexDigit(ch);
|
||||
if (x != -1) {
|
||||
stream.skip();
|
||||
var x2 = toHexDigit(stream.getChar());
|
||||
if (x2 == -1)
|
||||
error('Illegal digit in hex char in name: ' + x2);
|
||||
str += String.fromCharCode((x << 4) | x2);
|
||||
} else {
|
||||
str += '#';
|
||||
str += ch;
|
||||
}
|
||||
} else {
|
||||
str += ch;
|
||||
}
|
||||
}
|
||||
if (str.length > 128)
|
||||
error('Warning: name token is longer than allowed by the spec: ' +
|
||||
str.length);
|
||||
return new Name(str);
|
||||
},
|
||||
getHexString: function lexerGetHexString(ch) {
|
||||
var str = '';
|
||||
var stream = this.stream;
|
||||
for (;;) {
|
||||
ch = stream.getChar();
|
||||
if (ch == '>') {
|
||||
break;
|
||||
}
|
||||
if (!ch) {
|
||||
warn('Unterminated hex string');
|
||||
break;
|
||||
}
|
||||
if (specialChars[ch.charCodeAt(0)] != 1) {
|
||||
var x, x2;
|
||||
if ((x = toHexDigit(ch)) == -1)
|
||||
error('Illegal character in hex string: ' + ch);
|
||||
|
||||
ch = stream.getChar();
|
||||
while (specialChars[ch.charCodeAt(0)] == 1)
|
||||
ch = stream.getChar();
|
||||
|
||||
if ((x2 = toHexDigit(ch)) == -1)
|
||||
error('Illegal character in hex string: ' + ch);
|
||||
|
||||
str += String.fromCharCode((x << 4) | x2);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
},
|
||||
getObj: function lexerGetObj() {
|
||||
// skip whitespace and comments
|
||||
var comment = false;
|
||||
var stream = this.stream;
|
||||
var ch;
|
||||
while (true) {
|
||||
if (!(ch = stream.getChar()))
|
||||
return EOF;
|
||||
if (comment) {
|
||||
if (ch == '\r' || ch == '\n')
|
||||
comment = false;
|
||||
} else if (ch == '%') {
|
||||
comment = true;
|
||||
} else if (specialChars[ch.charCodeAt(0)] != 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// start reading token
|
||||
switch (ch) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case '+': case '-': case '.':
|
||||
return this.getNumber(ch);
|
||||
case '(':
|
||||
return this.getString();
|
||||
case '/':
|
||||
return this.getName(ch);
|
||||
// array punctuation
|
||||
case '[':
|
||||
case ']':
|
||||
return new Cmd(ch);
|
||||
// hex string or dict punctuation
|
||||
case '<':
|
||||
ch = stream.lookChar();
|
||||
if (ch == '<') {
|
||||
// dict punctuation
|
||||
stream.skip();
|
||||
return new Cmd('<<');
|
||||
}
|
||||
return this.getHexString(ch);
|
||||
// dict punctuation
|
||||
case '>':
|
||||
ch = stream.lookChar();
|
||||
if (ch == '>') {
|
||||
stream.skip();
|
||||
return new Cmd('>>');
|
||||
}
|
||||
case '{':
|
||||
case '}':
|
||||
return new Cmd(ch);
|
||||
// fall through
|
||||
case ')':
|
||||
error('Illegal character: ' + ch);
|
||||
return Error;
|
||||
}
|
||||
|
||||
// command
|
||||
var str = ch;
|
||||
while (!!(ch = stream.lookChar()) && !specialChars[ch.charCodeAt(0)]) {
|
||||
stream.skip();
|
||||
if (str.length == 128) {
|
||||
error('Command token too long: ' + str.length);
|
||||
break;
|
||||
}
|
||||
str += ch;
|
||||
}
|
||||
if (str == 'true')
|
||||
return true;
|
||||
if (str == 'false')
|
||||
return false;
|
||||
if (str == 'null')
|
||||
return null;
|
||||
return new Cmd(str);
|
||||
},
|
||||
skipToNextLine: function lexerSkipToNextLine() {
|
||||
var stream = this.stream;
|
||||
while (true) {
|
||||
var ch = stream.getChar();
|
||||
if (!ch || ch == '\n')
|
||||
return;
|
||||
if (ch == '\r') {
|
||||
if ((ch = stream.lookChar()) == '\n')
|
||||
stream.skip();
|
||||
return;
|
||||
}
|
||||
}
|
||||
},
|
||||
skip: function lexerSkip() {
|
||||
this.stream.skip();
|
||||
}
|
||||
};
|
||||
|
||||
return constructor;
|
||||
})();
|
||||
|
||||
var Linearization = (function linearizationLinearization() {
|
||||
function constructor(stream) {
|
||||
this.parser = new Parser(new Lexer(stream), false);
|
||||
var obj1 = this.parser.getObj();
|
||||
var obj2 = this.parser.getObj();
|
||||
var obj3 = this.parser.getObj();
|
||||
this.linDict = this.parser.getObj();
|
||||
if (isInt(obj1) && isInt(obj2) && isCmd(obj3, 'obj') &&
|
||||
isDict(this.linDict)) {
|
||||
var obj = this.linDict.get('Linearized');
|
||||
if (!(isNum(obj) && obj > 0))
|
||||
this.linDict = null;
|
||||
}
|
||||
}
|
||||
|
||||
constructor.prototype = {
|
||||
getInt: function linearizationGetInt(name) {
|
||||
var linDict = this.linDict;
|
||||
var obj;
|
||||
if (isDict(linDict) &&
|
||||
isInt(obj = linDict.get(name)) &&
|
||||
obj > 0) {
|
||||
return obj;
|
||||
}
|
||||
error('"' + name + '" field in linearization table is invalid');
|
||||
return 0;
|
||||
},
|
||||
getHint: function linearizationGetHint(index) {
|
||||
var linDict = this.linDict;
|
||||
var obj1, obj2;
|
||||
if (isDict(linDict) &&
|
||||
isArray(obj1 = linDict.get('H')) &&
|
||||
obj1.length >= 2 &&
|
||||
isInt(obj2 = obj1[index]) &&
|
||||
obj2 > 0) {
|
||||
return obj2;
|
||||
}
|
||||
error('Hints table in linearization table is invalid: ' + index);
|
||||
return 0;
|
||||
},
|
||||
get length() {
|
||||
if (!isDict(this.linDict))
|
||||
return 0;
|
||||
return this.getInt('L');
|
||||
},
|
||||
get hintsOffset() {
|
||||
return this.getHint(0);
|
||||
},
|
||||
get hintsLength() {
|
||||
return this.getHint(1);
|
||||
},
|
||||
get hintsOffset2() {
|
||||
return this.getHint(2);
|
||||
},
|
||||
get hintsLenth2() {
|
||||
return this.getHint(3);
|
||||
},
|
||||
get objectNumberFirst() {
|
||||
return this.getInt('O');
|
||||
},
|
||||
get endFirst() {
|
||||
return this.getInt('E');
|
||||
},
|
||||
get numPages() {
|
||||
return this.getInt('N');
|
||||
},
|
||||
get mainXRefEntriesOffset() {
|
||||
return this.getInt('T');
|
||||
},
|
||||
get pageFirst() {
|
||||
return this.getInt('P');
|
||||
}
|
||||
};
|
||||
|
||||
return constructor;
|
||||
})();
|
Loading…
Add table
Add a link
Reference in a new issue