From 8068ff242d0d4c067743f9d55ae3886a240e733c Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Tue, 31 Jan 2012 09:01:04 -0500 Subject: [PATCH 1/5] readXRefTable rewrite, progress --- src/obj.js | 71 +++++++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/src/obj.js b/src/obj.js index ef7932546..0bf29347c 100644 --- a/src/obj.js +++ b/src/obj.js @@ -287,44 +287,49 @@ var XRef = (function XRefClosure() { XRef.prototype = { readXRefTable: function readXRefTable(parser) { - var obj; - while (true) { - if (isCmd(obj = parser.getObj(), 'trailer')) - break; - if (!isInt(obj)) + // Example of cross-reference table: + // xref + // 0 1 <-- subsection header (first obj #, obj count) + // 0000000000 65535 f <-- actual object (offset, generation #, f/n) + // 23 2 <-- subsection header ... and so on ... + // 0000025518 00002 n + // 0000025635 00000 n + // trailer + // ... + + // Outer loop is over subsection headers + var first; + while (!isCmd(first = parser.getObj(), 'trailer')) { + var count = parser.getObj(); + + if (!isInt(first) || !isInt(count)) error('Invalid XRef table'); - var first = obj; - if (!isInt(obj = parser.getObj())) - error('Invalid XRef table'); - var n = obj; - if (first < 0 || n < 0 || (first + n) != ((first + n) | 0)) - error('Invalid XRef table: ' + first + ', ' + n); - for (var i = first; i < first + n; ++i) { - var entry = {}; - if (!isInt(obj = parser.getObj())) - error('Invalid XRef table: ' + first + ', ' + n); - entry.offset = obj; - if (!isInt(obj = parser.getObj())) - error('Invalid XRef table: ' + first + ', ' + n); - entry.gen = obj; - obj = parser.getObj(); - if (isCmd(obj, 'n')) { - entry.uncompressed = true; - } else if (isCmd(obj, 'f')) { + + // Inner loop is over objects themselves + for (var i = first; i < first + count; ++i) { + var entry = {}; + entry.offset = parser.getObj(); + entry.gen = parser.getObj(); + var type = parser.getObj(); + + if (type === 'f') entry.free = true; - } else { - error('Invalid XRef table: ' + first + ', ' + n); + else if (type === 'n') + entry.uncompressed = true; + + // Validate entry obj + if ( !isInt(entry.offset) || !isInt(entry.gen) || + !(('free' in entry) || ('uncompressed' in entry)) ) { + error('Invalid XRef table: ' + first + ', ' + count); } - if (!this.entries[i]) { - // In some buggy PDF files the xref table claims to start at 1 - // instead of 0. - if (i == 1 && first == 1 && - entry.offset == 0 && entry.gen == 65535 && entry.free) { - i = first = 0; - } + + if (!this.entries[i]) this.entries[i] = entry; - } } + + // No objects added? + if (i - first <= 0) + error('Invalid XRef table: ' + first + ', ' + count); } // read the trailer dictionary From 4375bd22194d78e5e123c100a7c5e93d325d36ab Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Tue, 31 Jan 2012 09:57:12 -0500 Subject: [PATCH 2/5] progress --- src/obj.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/obj.js b/src/obj.js index 0bf29347c..0db057b68 100644 --- a/src/obj.js +++ b/src/obj.js @@ -303,32 +303,32 @@ var XRef = (function XRefClosure() { var count = parser.getObj(); if (!isInt(first) || !isInt(count)) - error('Invalid XRef table'); + error('Invalid XRef table: wrong types in subsection header'); // Inner loop is over objects themselves - for (var i = first; i < first + count; ++i) { + for (var i = 0; i < count; i++) { var entry = {}; entry.offset = parser.getObj(); entry.gen = parser.getObj(); var type = parser.getObj(); - if (type === 'f') + if (isCmd(type, 'f')) entry.free = true; - else if (type === 'n') + else if (isCmd(type, 'n')) entry.uncompressed = true; // Validate entry obj if ( !isInt(entry.offset) || !isInt(entry.gen) || - !(('free' in entry) || ('uncompressed' in entry)) ) { + !(entry.free || entry.uncompressed) ) { error('Invalid XRef table: ' + first + ', ' + count); } - if (!this.entries[i]) - this.entries[i] = entry; + if (!this.entries[i + first]) + this.entries[i + first] = entry; } // No objects added? - if (i - first <= 0) + if (!(i > 0)) error('Invalid XRef table: ' + first + ', ' + count); } @@ -339,7 +339,7 @@ var XRef = (function XRefClosure() { // get the 'Prev' pointer var prev; - obj = dict.get('Prev'); + var obj = dict.get('Prev'); if (isInt(obj)) { prev = obj; } else if (isRef(obj)) { From 0959cd35172558b5a22647e846d43ece40e56c0c Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Tue, 31 Jan 2012 10:49:06 -0500 Subject: [PATCH 3/5] New readXRefTable, working --- src/obj.js | 101 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/src/obj.js b/src/obj.js index 0db057b68..d03ee4889 100644 --- a/src/obj.js +++ b/src/obj.js @@ -298,9 +298,10 @@ var XRef = (function XRefClosure() { // ... // Outer loop is over subsection headers - var first; - while (!isCmd(first = parser.getObj(), 'trailer')) { - var count = parser.getObj(); + var obj; + while (!isCmd(obj = parser.getObj(), 'trailer')) { + var first = obj, + count = parser.getObj(); if (!isInt(first) || !isInt(count)) error('Invalid XRef table: wrong types in subsection header'); @@ -320,46 +321,35 @@ var XRef = (function XRefClosure() { // Validate entry obj if ( !isInt(entry.offset) || !isInt(entry.gen) || !(entry.free || entry.uncompressed) ) { - error('Invalid XRef table: ' + first + ', ' + count); + error('Invalid entry in XRef subsection: ' + first + ', ' + count); } if (!this.entries[i + first]) this.entries[i + first] = entry; } - - // No objects added? - if (!(i > 0)) - error('Invalid XRef table: ' + first + ', ' + count); } - // read the trailer dictionary - var dict; - if (!isDict(dict = parser.getObj())) - error('Invalid XRef table'); + // Sanity check: as per spec, first object must have these properties + if ( this.entries[0] && + !(this.entries[0].gen === 65535 && this.entries[0].free) ) + error('Invalid XRef table: unexpected first object'); - // get the 'Prev' pointer - var prev; - var obj = dict.get('Prev'); - if (isInt(obj)) { - prev = obj; - } else if (isRef(obj)) { - // certain buggy PDF generators generate "/Prev NNN 0 R" instead - // of "/Prev NNN" - prev = obj.num; - } - if (prev) { - this.readXRef(prev); - } + // Sanity check + if (!isCmd(obj, 'trailer')) + error('Invalid XRef table: could not find trailer dictionary'); - // check for 'XRefStm' key - if (isInt(obj = dict.get('XRefStm'))) { - var pos = obj; - // ignore previously loaded xref streams (possible infinite recursion) - if (!(pos in this.xrefstms)) { - this.xrefstms[pos] = 1; - this.readXRef(pos); - } - } + // Read trailer dictionary, e.g. + // trailer + // << /Size 22 + // /Root 20R + // /Info 10R + // /ID [ <81b14aafa313db63dbd6f981e49f94f4> ] + // >> + // The parser goes through the entire stream << ... >> and provides + // a getter interface for the key-value table + var dict = parser.getObj(); + if (!isDict(dict)) + error('Invalid XRef table: could not parse trailer dictionary'); return dict; }, @@ -412,9 +402,6 @@ var XRef = (function XRefClosure() { } range.splice(0, 2); } - var prev = streamParameters.get('Prev'); - if (isInt(prev)) - this.readXRef(prev); return streamParameters; }, indexObjects: function indexObjects() { @@ -534,22 +521,48 @@ var XRef = (function XRefClosure() { try { var parser = new Parser(new Lexer(stream), true); var obj = parser.getObj(); + var dict; - // parse an old-style xref table - if (isCmd(obj, 'xref')) - return this.readXRefTable(parser); + // Get dictionary + if (isCmd(obj, 'xref')) { + // Parse end-of-file XRef + dict = this.readXRefTable(parser); - // parse an xref stream - if (isInt(obj)) { + // Recursively get other XRefs 'XRefStm', if any + obj = dict.get('XRefStm'); + if (isInt(obj)) { + var pos = obj; + // ignore previously loaded xref streams + // (possible infinite recursion) + if (!(pos in this.xrefstms)) { + this.xrefstms[pos] = 1; + this.readXRef(pos); + } + } + } else if (isInt(obj)) { + // Parse in-stream XRef if (!isInt(parser.getObj()) || !isCmd(parser.getObj(), 'obj') || !isStream(obj = parser.getObj())) { error('Invalid XRef stream'); } - return this.readXRefStream(obj); + dict = this.readXRefStream(obj); } + + // Recursively get previous dictionary, if any + obj = dict.get('Prev'); + if (isInt(obj)) + this.readXRef(obj); + else if (isRef(obj)) { + // The spec says Prev must not be a reference, i.e. "/Prev NNN" + // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R" + this.readXRef(obj.num); + } + + return dict; } catch (e) { - log('Reading of the xref table/stream failed: ' + e); + // log('(while reading XRef): ' + e); +error('(while reading XRef): ' + e); } warn('Indexing all PDF objects'); From 9e9674d45c025d20ed1480d485e905b51aedef98 Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Tue, 31 Jan 2012 10:50:30 -0500 Subject: [PATCH 4/5] Remove debugging line --- src/obj.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/obj.js b/src/obj.js index d03ee4889..2088839c0 100644 --- a/src/obj.js +++ b/src/obj.js @@ -561,8 +561,7 @@ var XRef = (function XRefClosure() { return dict; } catch (e) { - // log('(while reading XRef): ' + e); -error('(while reading XRef): ' + e); + log('(while reading XRef): ' + e); } warn('Indexing all PDF objects'); From 775290d69806726652c4c1f3fa1184f40e7c0492 Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Tue, 31 Jan 2012 10:57:32 -0500 Subject: [PATCH 5/5] Lint --- src/obj.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/obj.js b/src/obj.js index 2088839c0..8ac4706c0 100644 --- a/src/obj.js +++ b/src/obj.js @@ -292,11 +292,11 @@ var XRef = (function XRefClosure() { // 0 1 <-- subsection header (first obj #, obj count) // 0000000000 65535 f <-- actual object (offset, generation #, f/n) // 23 2 <-- subsection header ... and so on ... - // 0000025518 00002 n + // 0000025518 00002 n // 0000025635 00000 n // trailer // ... - + // Outer loop is over subsection headers var obj; while (!isCmd(obj = parser.getObj(), 'trailer')) { @@ -308,7 +308,7 @@ var XRef = (function XRefClosure() { // Inner loop is over objects themselves for (var i = 0; i < count; i++) { - var entry = {}; + var entry = {}; entry.offset = parser.getObj(); entry.gen = parser.getObj(); var type = parser.getObj(); @@ -319,19 +319,19 @@ var XRef = (function XRefClosure() { entry.uncompressed = true; // Validate entry obj - if ( !isInt(entry.offset) || !isInt(entry.gen) || - !(entry.free || entry.uncompressed) ) { + if (!isInt(entry.offset) || !isInt(entry.gen) || + !(entry.free || entry.uncompressed)) { error('Invalid entry in XRef subsection: ' + first + ', ' + count); } - + if (!this.entries[i + first]) this.entries[i + first] = entry; } } // Sanity check: as per spec, first object must have these properties - if ( this.entries[0] && - !(this.entries[0].gen === 65535 && this.entries[0].free) ) + if (this.entries[0] && + !(this.entries[0].gen === 65535 && this.entries[0].free)) error('Invalid XRef table: unexpected first object'); // Sanity check @@ -532,7 +532,7 @@ var XRef = (function XRefClosure() { obj = dict.get('XRefStm'); if (isInt(obj)) { var pos = obj; - // ignore previously loaded xref streams + // ignore previously loaded xref streams // (possible infinite recursion) if (!(pos in this.xrefstms)) { this.xrefstms[pos] = 1;