1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Implement progressive loading of PDFs

This commit is contained in:
Mack Duan 2013-02-06 15:19:29 -08:00
parent added3da8f
commit ef423ef30c
25 changed files with 2110 additions and 586 deletions

View file

@ -35,9 +35,13 @@
* - httpHeaders - Basic authentication headers.
* - password - For decrypting password-protected PDFs.
*
* @param {object} pdfDataRangeTransport is optional. It is used if you want
* to manually serve range requests for data in the PDF. See viewer.js for
* an example of pdfDataRangeTransport's interface.
*
* @return {Promise} A promise that is resolved with {PDFDocumentProxy} object.
*/
PDFJS.getDocument = function getDocument(source) {
PDFJS.getDocument = function getDocument(source, pdfDataRangeTransport) {
var workerInitializedPromise, workerReadyPromise, transport;
if (typeof source === 'string') {
@ -64,7 +68,8 @@ PDFJS.getDocument = function getDocument(source) {
workerInitializedPromise = new PDFJS.Promise();
workerReadyPromise = new PDFJS.Promise();
transport = new WorkerTransport(workerInitializedPromise, workerReadyPromise);
transport = new WorkerTransport(workerInitializedPromise,
workerReadyPromise, pdfDataRangeTransport);
workerInitializedPromise.then(function transportInitialized() {
transport.fetchDocument(params);
});
@ -114,10 +119,7 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
* mapping named destinations to reference numbers.
*/
getDestinations: function PDFDocumentProxy_getDestinations() {
var promise = new PDFJS.Promise();
var destinations = this.pdfInfo.destinations;
promise.resolve(destinations);
return promise;
return this.transport.getDestinations();
},
/**
* @return {Promise} A promise that is resolved with an array of all the
@ -180,6 +182,13 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
this.transport.getData(promise);
return promise;
},
/**
* @return {Promise} A promise that is resolved when the document's data
* is loaded
*/
dataLoaded: function PDFDocumentProxy_dataLoaded() {
return this.transport.dataLoaded();
},
destroy: function PDFDocumentProxy_destroy() {
this.transport.destroy();
}
@ -462,7 +471,10 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
* For internal use only.
*/
var WorkerTransport = (function WorkerTransportClosure() {
function WorkerTransport(workerInitializedPromise, workerReadyPromise) {
function WorkerTransport(workerInitializedPromise, workerReadyPromise,
pdfDataRangeTransport) {
this.pdfDataRangeTransport = pdfDataRangeTransport;
this.workerReadyPromise = workerReadyPromise;
this.commonObjs = new PDFObjects();
@ -544,6 +556,21 @@ var WorkerTransport = (function WorkerTransportClosure() {
function WorkerTransport_setupMessageHandler(messageHandler) {
this.messageHandler = messageHandler;
var pdfDataRangeTransport = this.pdfDataRangeTransport;
if (pdfDataRangeTransport) {
pdfDataRangeTransport.addListener(function(begin, chunk) {
messageHandler.send('OnDataRange', {
begin: begin,
chunk: chunk
});
});
messageHandler.on('RequestDataRange',
function transportDataRange(data) {
pdfDataRangeTransport.requestDataRange(data.begin, data.end);
}, this);
}
messageHandler.on('GetDoc', function transportDoc(data) {
var pdfInfo = data.pdfInfo;
var pdfDocument = new PDFDocumentProxy(pdfInfo, this);
@ -647,6 +674,10 @@ var WorkerTransport = (function WorkerTransportClosure() {
}, this);
messageHandler.on('DocProgress', function transportDocProgress(data) {
// TODO(mack): The progress event should be resolved on a different
// promise that tracks progress of whole file, since workerReadyPromise
// is for file being ready to render, not for when file is fully
// downloaded
this.workerReadyPromise.progress({
loaded: data.loaded,
total: data.total
@ -702,7 +733,11 @@ var WorkerTransport = (function WorkerTransportClosure() {
},
fetchDocument: function WorkerTransport_fetchDocument(source) {
this.messageHandler.send('GetDocRequest', {source: source});
source.chunkedViewerLoading = !!this.pdfDataRangeTransport;
this.messageHandler.send('GetDocRequest', {
source: source,
disableRange: PDFJS.disableRange
});
},
getData: function WorkerTransport_getData(promise) {
@ -711,6 +746,14 @@ var WorkerTransport = (function WorkerTransportClosure() {
});
},
dataLoaded: function WorkerTransport_dataLoaded() {
var promise = new PDFJS.Promise();
this.messageHandler.send('DataLoaded', null, function(args) {
promise.resolve(args);
});
return promise;
},
getPage: function WorkerTransport_getPage(pageNumber, promise) {
var pageIndex = pageNumber - 1;
if (pageIndex in this.pagePromises)
@ -724,6 +767,16 @@ var WorkerTransport = (function WorkerTransportClosure() {
getAnnotations: function WorkerTransport_getAnnotations(pageIndex) {
this.messageHandler.send('GetAnnotationsRequest',
{ pageIndex: pageIndex });
},
getDestinations: function WorkerTransport_getDestinations() {
var promise = new PDFJS.Promise();
this.messageHandler.send('GetDestinations', null,
function transportDestinations(destinations) {
promise.resolve(destinations);
}
);
return promise;
}
};
return WorkerTransport;

440
src/chunked_stream.js Normal file
View file

@ -0,0 +1,440 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals assert, MissingDataException, isInt, NetworkManager, PDFJS,
isEmptyObj */
'use strict';
var ChunkedStream = (function ChunkedStreamClosure() {
function ChunkedStream(length, chunkSize) {
this.bytes = new Uint8Array(length);
this.start = 0;
this.pos = 0;
this.end = length;
this.chunkSize = chunkSize;
this.loadedChunks = [];
this.numChunksLoaded = 0;
this.numChunks = Math.ceil(length / chunkSize);
}
// required methods for a stream. if a particular stream does not
// implement these, an error should be thrown
ChunkedStream.prototype = {
getMissingChunks: function ChunkedStream_getMissingChunks() {
var chunks = [];
for (var chunk = 0, n = this.numChunks; chunk < n; ++chunk) {
if (!(chunk in this.loadedChunks)) {
chunks.push(chunk);
}
}
return chunks;
},
allChunksLoaded: function ChunkedStream_allChunksLoaded() {
return this.numChunksLoaded === this.numChunks;
},
onReceiveData: function(begin, chunk) {
var end = begin + chunk.byteLength;
assert(begin % this.chunkSize === 0, 'Bad begin offset: ' + begin);
// Using this.length is inaccurate here since this.start can be moved
// See ChunkedStream.moveStart()
var length = this.bytes.length;
assert(end % this.chunkSize === 0 || end === length,
'Bad end offset: ' + end);
this.bytes.set(new Uint8Array(chunk), begin);
var chunkSize = this.chunkSize;
var beginChunk = Math.floor(begin / chunkSize);
var endChunk = Math.floor((end - 1) / chunkSize) + 1;
for (var chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!(chunk in this.loadedChunks)) {
this.loadedChunks[chunk] = true;
++this.numChunksLoaded;
}
}
},
ensureRange: function ChunkedStream_ensureRange(begin, end) {
if (begin >= end) {
return;
}
var chunkSize = this.chunkSize;
var beginChunk = Math.floor(begin / chunkSize);
var endChunk = Math.floor((end - 1) / chunkSize) + 1;
for (var chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!(chunk in this.loadedChunks)) {
throw new MissingDataException(begin, end);
}
}
},
nextEmptyChunk: function ChunkedStream_nextEmptyChunk(beginChunk) {
for (var chunk = beginChunk, n = this.numChunks; chunk < n; ++chunk) {
if (!(chunk in this.loadedChunks)) {
return chunk;
}
}
// Wrap around to beginning
for (var chunk = 0; chunk < beginChunk; ++chunk) {
if (!(chunk in this.loadedChunks)) {
return chunk;
}
}
return null;
},
hasChunk: function ChunkedStream_hasChunk(chunk) {
return chunk in this.loadedChunks;
},
get length() {
return this.end - this.start;
},
getByte: function ChunkedStream_getByte() {
var pos = this.pos;
if (pos >= this.end) {
return null;
}
this.ensureRange(pos, pos + 1);
return this.bytes[this.pos++];
},
// returns subarray of original buffer
// should only be read
getBytes: function ChunkedStream_getBytes(length) {
var bytes = this.bytes;
var pos = this.pos;
var strEnd = this.end;
if (!length) {
this.ensureRange(pos, strEnd);
return bytes.subarray(pos, strEnd);
}
var end = pos + length;
if (end > strEnd)
end = strEnd;
this.ensureRange(pos, end);
this.pos = end;
return bytes.subarray(pos, end);
},
getByteRange: function ChunkedStream_getBytes(begin, end) {
this.ensureRange(begin, end);
return this.bytes.subarray(begin, end);
},
lookChar: function ChunkedStream_lookChar() {
var pos = this.pos;
if (pos >= this.end)
return null;
this.ensureRange(pos, pos + 1);
return String.fromCharCode(this.bytes[pos]);
},
getChar: function ChunkedStream_getChar() {
var pos = this.pos;
if (pos >= this.end)
return null;
this.ensureRange(pos, pos + 1);
return String.fromCharCode(this.bytes[this.pos++]);
},
skip: function ChunkedStream_skip(n) {
if (!n)
n = 1;
this.pos += n;
},
reset: function ChunkedStream_reset() {
this.pos = this.start;
},
moveStart: function ChunkedStream_moveStart() {
this.start = this.pos;
},
makeSubStream: function ChunkedStream_makeSubStream(start, length, dict) {
function ChunkedStreamSubstream() {}
ChunkedStreamSubstream.prototype = Object.create(this);
var subStream = new ChunkedStreamSubstream();
subStream.pos = subStream.start = start;
subStream.end = start + length || this.end;
subStream.dict = dict;
return subStream;
},
isStream: true
};
return ChunkedStream;
})();
var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
function ChunkedStreamManager(length, chunkSize, url, args) {
this.stream = new ChunkedStream(length, chunkSize);
this.length = length;
this.chunkSize = chunkSize;
this.url = url;
var msgHandler = this.msgHandler = args.msgHandler;
if (args.chunkedViewerLoading) {
msgHandler.on('OnDataRange', this.onReceiveData.bind(this));
this.sendRequest = function ChunkedStreamManager_sendRequest(begin, end) {
msgHandler.send('RequestDataRange', { begin: begin, end: end });
};
} else {
var getXhr = function getXhr() {
//#if B2G
// return new XMLHttpRequest({ mozSystem: true });
//#else
return new XMLHttpRequest();
//#endif
};
this.networkManager = new NetworkManager(this.url, {
getXhr: getXhr,
httpHeaders: args.httpHeaders
});
var self = this;
this.sendRequest = function ChunkedStreamManager_sendRequest(begin, end) {
this.networkManager.requestRange(begin, end, {
onDone: this.onReceiveData.bind(this),
});
};
}
this.currRequestId = 0;
this.chunksNeededByRequest = {};
this.requestsByChunk = {};
this.callbacksByRequest = {};
this.loadedStream = new PDFJS.Promise();
}
ChunkedStreamManager.prototype = {
onLoadedStream: function ChunkedStreamManager_getLoadedStream() {
return this.loadedStream;
},
// Get all the chunks that are not yet loaded and groups them into
// contiguous ranges to load in as few requests as possible
requestAllChunks: function ChunkedStreamManager_requestAllChunks() {
var missingChunks = this.stream.getMissingChunks();
var chunksToRequest = [];
for (var i = 0, n = missingChunks.length; i < n; ++i) {
var chunk = missingChunks[i];
if (!(chunk in this.requestsByChunk)) {
this.requestsByChunk[chunk] = [];
chunksToRequest.push(chunk);
}
}
var groupedChunks = this.groupChunks(chunksToRequest);
for (var i = 0, n = groupedChunks.length; i < n; ++i) {
var groupedChunk = groupedChunks[i];
var begin = groupedChunk.beginChunk * this.chunkSize;
var end = groupedChunk.endChunk * this.chunkSize;
this.sendRequest(begin, end);
}
return this.loadedStream;
},
getStream: function ChunkedStreamManager_getStream() {
return this.stream;
},
// Loads any chunks in the requested range that are not yet loaded
requestRange: function ChunkedStreamManager_requestRange(
begin, end, callback) {
end = Math.min(end, this.length);
var beginChunk = this.getBeginChunk(begin);
var endChunk = this.getEndChunk(end);
var requestId = this.currRequestId++;
var chunksNeeded;
this.chunksNeededByRequest[requestId] = chunksNeeded = {};
for (var chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!this.stream.hasChunk(chunk)) {
chunksNeeded[chunk] = true;
}
}
if (isEmptyObj(chunksNeeded)) {
callback();
return;
}
this.callbacksByRequest[requestId] = callback;
var chunksToRequest = [];
for (var chunk in chunksNeeded) {
chunk = chunk | 0;
if (!(chunk in this.requestsByChunk)) {
this.requestsByChunk[chunk] = [];
chunksToRequest.push(chunk);
}
this.requestsByChunk[chunk].push(requestId);
}
if (!chunksToRequest.length) {
return;
}
var groupedChunksToRequest = this.groupChunks(chunksToRequest);
for (var i = 0; i < groupedChunksToRequest.length; ++i) {
var groupedChunk = groupedChunksToRequest[i];
var begin = groupedChunk.beginChunk * this.chunkSize;
var end = groupedChunk.endChunk * this.chunkSize;
this.sendRequest(begin, end);
}
},
// Groups a sorted array of chunks into as few continguous larger
// chunks as possible
groupChunks: function ChunkedStreamManager_groupChunks(chunks) {
var groupedChunks = [];
var beginChunk;
var prevChunk;
for (var i = 0; i < chunks.length; ++i) {
var chunk = chunks[i];
if (!beginChunk) {
beginChunk = chunk;
}
if (prevChunk && prevChunk + 1 !== chunk) {
groupedChunks.push({
beginChunk: beginChunk, endChunk: prevChunk + 1});
beginChunk = chunk;
}
if (i + 1 === chunks.length) {
groupedChunks.push({
beginChunk: beginChunk, endChunk: chunk + 1});
}
prevChunk = chunk;
}
return groupedChunks;
},
onReceiveData: function ChunkedStreamManager_onReceiveData(args) {
var chunk = args.chunk;
var begin = args.begin;
var end = begin + chunk.byteLength;
var beginChunk = this.getBeginChunk(begin);
var endChunk = this.getEndChunk(end);
this.stream.onReceiveData(begin, chunk);
if (this.stream.allChunksLoaded()) {
this.loadedStream.resolve(this.stream);
}
var loadedRequests = [];
for (var chunk = beginChunk; chunk < endChunk; ++chunk) {
var requestIds = this.requestsByChunk[chunk];
delete this.requestsByChunk[chunk];
for (var i = 0; i < requestIds.length; ++i) {
var requestId = requestIds[i];
var chunksNeeded = this.chunksNeededByRequest[requestId];
if (chunk in chunksNeeded) {
delete chunksNeeded[chunk];
}
if (!isEmptyObj(chunksNeeded)) {
continue;
}
loadedRequests.push(requestId);
}
}
// If there are no pending requests, automatically fetch the next
// unfetched chunk of the PDF
if (isEmptyObj(this.requestsByChunk)) {
var nextEmptyChunk;
if (this.stream.numChunksLoaded === 1) {
// This is a special optimization so that after fetching the first
// chunk, rather than fetching the second chunk, we fetch the last
// chunk.
var lastChunk = this.stream.numChunks - 1;
if (!this.stream.hasChunk(lastChunk)) {
nextEmptyChunk = lastChunk;
}
} else {
nextEmptyChunk = this.stream.nextEmptyChunk(endChunk);
}
if (isInt(nextEmptyChunk)) {
var nextEmptyByte = nextEmptyChunk * this.chunkSize;
this.requestRange(nextEmptyByte, nextEmptyByte + this.chunkSize,
function() {});
}
}
for (var i = 0; i < loadedRequests.length; ++i) {
var requestId = loadedRequests[i];
var callback = this.callbacksByRequest[requestId];
delete this.callbacksByRequest[requestId];
callback();
}
this.msgHandler.send('DocProgress', {
loaded: this.stream.numChunksLoaded * this.chunkSize,
total: this.length
});
},
getBeginChunk: function ChunkedStreamManager_getBeginChunk(begin) {
var chunk = Math.floor(begin / this.chunkSize);
return chunk;
},
getEndChunk: function ChunkedStreamManager_getEndChunk(end) {
if (end % this.chunkSize === 0) {
return end / this.chunkSize;
}
// 0 -> 0
// 1 -> 1
// 99 -> 1
// 100 -> 1
// 101 -> 2
var chunk = Math.floor((end - 1) / this.chunkSize) + 1;
return chunk;
}
};
return ChunkedStreamManager;
})();

View file

@ -17,7 +17,8 @@
/* globals assertWellFormed, calculateMD5, Catalog, error, info, isArray,
isArrayBuffer, isDict, isName, isStream, isString, Lexer,
Linearization, NullStream, PartialEvaluator, shadow, Stream,
StreamsSequenceStream, stringToPDFString, TODO, Util, warn, XRef */
StreamsSequenceStream, stringToPDFString, TODO, Util, warn, XRef,
MissingDataException */
'use strict';
@ -35,69 +36,6 @@ if (!globalScope.PDFJS) {
globalScope.PDFJS = {};
}
// getPdf()
// Convenience function to perform binary Ajax GET
// Usage: getPdf('http://...', callback)
// getPdf({
// url:String ,
// [,progress:Function, error:Function]
// },
// callback)
function getPdf(arg, callback) {
var params = arg;
if (typeof arg === 'string')
params = { url: arg };
//#if !B2G
var xhr = new XMLHttpRequest();
//#else
//var xhr = new XMLHttpRequest({mozSystem: true});
//#endif
xhr.open('GET', params.url);
var headers = params.headers;
if (headers) {
for (var property in headers) {
if (typeof headers[property] === 'undefined')
continue;
xhr.setRequestHeader(property, params.headers[property]);
}
}
xhr.mozResponseType = xhr.responseType = 'arraybuffer';
var protocol = params.url.substring(0, params.url.indexOf(':') + 1);
xhr.expected = (protocol === 'http:' || protocol === 'https:') ? 200 : 0;
if ('progress' in params)
xhr.onprogress = params.progress || undefined;
var calledErrorBack = false;
if ('error' in params) {
xhr.onerror = function errorBack() {
if (!calledErrorBack) {
calledErrorBack = true;
params.error();
}
};
}
xhr.onreadystatechange = function getPdfOnreadystatechange(e) {
if (xhr.readyState === 4) {
if (xhr.status === xhr.expected) {
var data = (xhr.mozResponseArrayBuffer || xhr.mozResponse ||
xhr.responseArrayBuffer || xhr.response);
callback(data);
} else if (params.error && !calledErrorBack) {
calledErrorBack = true;
params.error(e);
}
}
};
xhr.send(null);
}
globalScope.PDFJS.getPdf = getPdf;
globalScope.PDFJS.pdfBug = false;
@ -496,8 +434,8 @@ var PDFDocument = (function PDFDocumentClosure() {
function init(stream, password) {
assertWellFormed(stream.length > 0, 'stream must have data');
this.stream = stream;
this.setup(password);
this.acroForm = this.catalog.catDict.get('AcroForm');
var xref = new XRef(this.stream, password);
this.xref = xref;
}
function find(stream, needle, limit, backwards) {
@ -535,15 +473,25 @@ var PDFDocument = (function PDFDocumentClosure() {
};
PDFDocument.prototype = {
parse: function PDFDocument_parse(recoveryMode) {
this.setup(recoveryMode);
this.acroForm = this.catalog.catDict.get('AcroForm');
},
get linearization() {
var length = this.stream.length;
var linearization = false;
if (length) {
try {
linearization = new Linearization(this.stream);
if (linearization.length != length)
if (linearization.length != length) {
linearization = false;
}
} catch (err) {
if (err instanceof MissingDataException) {
throw err;
}
warn('The linearization data is not available ' +
'or unreadable pdf data is found');
linearization = false;
@ -622,14 +570,13 @@ var PDFDocument = (function PDFDocumentClosure() {
}
// May not be a PDF file, continue anyway.
},
setup: function PDFDocument_setup(password) {
this.checkHeader();
var xref = new XRef(this.stream,
this.startXRef,
this.mainXRefEntriesOffset,
password);
this.xref = xref;
this.catalog = new Catalog(xref);
parseStartXRef: function PDFDocument_parseStartXRef() {
var startXRef = this.startXRef;
this.xref.setStartXRef(startXRef);
},
setup: function PDFDocument_setup(recoveryMode) {
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.xref);
},
get numPages() {
var linearization = this.linearization;
@ -637,7 +584,7 @@ var PDFDocument = (function PDFDocumentClosure() {
// shadow the prototype getter
return shadow(this, 'numPages', num);
},
getDocumentInfo: function PDFDocument_getDocumentInfo() {
get documentInfo() {
var docInfo = {
PDFFormatVersion: this.pdfFormatVersion,
IsAcroFormPresent: !!this.acroForm
@ -660,9 +607,9 @@ var PDFDocument = (function PDFDocumentClosure() {
}
}
}
return shadow(this, 'getDocumentInfo', docInfo);
return shadow(this, 'documentInfo', docInfo);
},
getFingerprint: function PDFDocument_getFingerprint() {
get fingerprint() {
var xref = this.xref, fileID;
if (xref.trailer.has('ID')) {
fileID = '';
@ -681,10 +628,15 @@ var PDFDocument = (function PDFDocumentClosure() {
}
}
return shadow(this, 'getFingerprint', fileID);
return shadow(this, 'fingerprint', fileID);
},
getPage: function PDFDocument_getPage(n) {
return this.catalog.getPage(n);
traversePages: function PDFDocument_traversePages() {
this.catalog.traversePages();
},
getPage: function PDFDocument_getPage(pageIndex) {
return this.catalog.getPage(pageIndex);
}
};

View file

@ -19,7 +19,7 @@
IDENTITY_MATRIX, info, isArray, isCmd, isDict, isEOF, isName, isNum,
isStream, isString, JpegStream, Lexer, Metrics, Name, Parser,
Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts,
TilingPattern, TODO, warn, Util */
TilingPattern, TODO, warn, Util, MissingDataException */
'use strict';
@ -155,10 +155,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
assert(fontRes, 'fontRes not available');
++this.fontIdCounter;
font = xref.fetchIfRef(font) || fontRes.get(fontName);
if (!isDict(font)) {
++this.fontIdCounter;
return {
translated: new ErrorFont('Font ' + fontName + ' is not available'),
loadedName: 'g_font_' + this.uniquePrefix + this.fontIdCounter
@ -169,7 +168,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (!loadedName) {
// keep track of each font we translated so the caller can
// load them asynchronously before calling display on a page
loadedName = 'g_font_' + this.uniquePrefix + this.fontIdCounter;
loadedName = 'g_font_' + this.uniquePrefix + (this.fontIdCounter + 1);
font.loadedName = loadedName;
var translated;
@ -177,6 +176,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
translated = this.translateFont(font, xref, resources,
dependency);
} catch (e) {
if (e instanceof MissingDataException) {
font.loadedName = undefined;
throw e;
}
translated = new ErrorFont(e instanceof Error ? e.message : e);
}
font.translated = translated;
@ -196,6 +199,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
data.charProcOperatorList = charProcOperatorList;
}
}
++this.fontIdCounter;
return font;
},

224
src/network.js Normal file
View file

@ -0,0 +1,224 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// NOTE: Be careful what goes in this file, as it is also used from the context
// of the addon. So using warn/error in here will break the addon.
'use strict';
//#if (FIREFOX || MOZCENTRAL)
//
//Components.utils.import('resource://gre/modules/Services.jsm');
//
//var EXPORTED_SYMBOLS = ['NetworkManager'];
//
//function log(aMsg) {
// var msg = 'network.js: ' + (aMsg.join ? aMsg.join('') : aMsg);
// Services.console.logStringMessage(msg);
// // TODO(mack): dump() doesn't seem to work here...
// dump(msg + '\n');
//}
//#else
function log(aMsg) {
console.log(aMsg);
}
//#endif
var NetworkManager = (function NetworkManagerClosure() {
function NetworkManager(url, args) {
this.url = url;
args = args || {};
this.httpHeaders = args.httpHeaders || {};
this.getXhr = args.getXhr ||
function NetworkManager_getXhr() {
return new XMLHttpRequest();
};
this.currXhrId = 0;
this.pendingRequests = {};
this.loadedRequests = {};
}
function getArrayBuffer(xhr) {
var data = (xhr.mozResponseArrayBuffer || xhr.mozResponse ||
xhr.responseArrayBuffer || xhr.response);
if (typeof data !== 'string') {
return data;
}
var length = data.length;
var buffer = new Uint8Array(length);
for (var i = 0; i < length; i++) {
buffer[i] = data.charCodeAt(i) & 0xFF;
}
return buffer;
}
NetworkManager.prototype = {
requestRange: function NetworkManager_requestRange(begin, end, listeners) {
var args = {
begin: begin,
end: end
};
for (var prop in listeners) {
args[prop] = listeners[prop];
}
return this.request(args);
},
requestFull: function NetworkManager_requestRange(listeners) {
return this.request(listeners);
},
request: function NetworkManager_requestRange(args) {
var xhr = this.getXhr();
var xhrId = this.currXhrId++;
var pendingRequest = this.pendingRequests[xhrId] = {
xhr: xhr
};
xhr.open('GET', this.url);
for (var property in this.httpHeaders) {
var value = this.httpHeaders[property];
if (typeof value === 'undefined') {
continue;
}
xhr.setRequestHeader(property, value);
}
if ('begin' in args && 'end' in args) {
var rangeStr = args.begin + '-' + (args.end - 1);
xhr.setRequestHeader('Range', 'bytes=' + rangeStr);
pendingRequest.expectedStatus = 206;
} else {
pendingRequest.expectedStatus = 200;
}
xhr.mozResponseType = xhr.responseType = 'arraybuffer';
if (args.onProgress) {
xhr.onprogress = args.onProgress;
}
if (args.onError) {
xhr.onerror = function(evt) {
args.onError(xhr.status);
};
}
xhr.onreadystatechange = this.onStateChange.bind(this, xhrId);
pendingRequest.onHeadersReceived = args.onHeadersReceived;
pendingRequest.onDone = args.onDone;
pendingRequest.onError = args.onError;
xhr.send(null);
return xhrId;
},
onStateChange: function NetworkManager_onStateChange(xhrId, evt) {
var pendingRequest = this.pendingRequests[xhrId];
if (!pendingRequest) {
// Maybe abortRequest was called...
return;
}
var xhr = pendingRequest.xhr;
if (xhr.readyState >= 2 && pendingRequest.onHeadersReceived) {
pendingRequest.onHeadersReceived();
delete pendingRequest.onHeadersReceived;
}
if (xhr.readyState !== 4) {
return;
}
if (!(xhrId in this.pendingRequests)) {
// The XHR request might have been aborted in onHeadersReceived()
// callback, in which case we should abort request
return;
}
delete this.pendingRequests[xhrId];
if (xhr.status === 0) {
if (pendingRequest.onError) {
pendingRequest.onError(xhr.status);
}
return;
}
if (xhr.status !== pendingRequest.expectedStatus) {
if (pendingRequest.onError) {
pendingRequest.onError(xhr.status);
}
return;
}
this.loadedRequests[xhrId] = true;
var chunk = getArrayBuffer(xhr);
if (pendingRequest.expectedStatus === 206) {
var rangeHeader = xhr.getResponseHeader('Content-Range');
var matches = /bytes (\d+)-(\d+)\/(\d+)/.exec(rangeHeader);
var begin = parseInt(matches[1], 10);
var end = parseInt(matches[2], 10) + 1;
pendingRequest.onDone({
begin: begin,
end: end,
chunk: chunk
});
} else {
pendingRequest.onDone({
chunk: chunk
});
}
},
hasPendingRequests: function NetworkManager_hasPendingRequests() {
for (var xhrId in this.pendingRequests) {
return true;
}
return false;
},
getRequestXhr: function NetworkManager_getXhr(xhrId) {
return this.pendingRequests[xhrId].xhr;
},
isPendingRequest: function NetworkManager_isPendingRequest(xhrId) {
return xhrId in this.pendingRequests;
},
isLoadedRequest: function NetworkManager_isLoadedRequest(xhrId) {
return xhrId in this.loadedRequests;
},
abortAllRequests: function NetworkManager_abortAllRequests() {
for (var xhrId in this.pendingRequests) {
this.abortRequest(xhrId | 0);
}
},
abortRequest: function NetworkManager_abortRequest(xhrId) {
var xhr = this.pendingRequests[xhrId].xhr;
delete this.pendingRequests[xhrId];
xhr.abort();
}
};
return NetworkManager;
})();

View file

@ -17,7 +17,8 @@
/* globals assertWellFormed, bytesToString, CipherTransformFactory, error, info,
InvalidPDFException, isArray, isCmd, isDict, isInt, isName, isRef,
isStream, JpegStream, Lexer, log, Page, Parser, Promise, shadow,
stringToPDFString, stringToUTF8String, warn, isString */
stringToPDFString, stringToUTF8String, warn, isString, assert, PDFJS,
MissingDataException, XRefParseException */
'use strict';
@ -152,9 +153,18 @@ var RefSet = (function RefSetClosure() {
var Catalog = (function CatalogClosure() {
function Catalog(xref) {
this.xref = xref;
var obj = xref.getCatalogObj();
assertWellFormed(isDict(obj), 'catalog object is not a dictionary');
this.catDict = obj;
this.catDict = xref.getCatalogObj();
assertWellFormed(isDict(this.catDict),
'catalog object is not a dictionary');
// Stores state as we traverse the pages catalog so that we can resume
// parsing if an exception is thrown
this.traversePagesQueue = [{
pagesDict: this.toplevelPagesDict,
posInKids: 0
}];
this.pagePromises = [];
this.currPageIndex = 0;
}
Catalog.prototype = {
@ -258,27 +268,6 @@ var Catalog = (function CatalogClosure() {
// shadow the prototype getter
return shadow(this, 'num', obj);
},
traverseKids: function Catalog_traverseKids(pagesDict) {
var pageCache = this.pageCache;
var kids = pagesDict.get('Kids');
assertWellFormed(isArray(kids),
'page dictionary kids object is not an array');
for (var i = 0, ii = kids.length; i < ii; ++i) {
var kid = kids[i];
assertWellFormed(isRef(kid),
'page dictionary kid is not a reference');
var obj = this.xref.fetch(kid);
if (isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids'))) {
pageCache.push(new Page(this.xref, pageCache.length, obj, kid));
} else { // must be a child page dictionary
assertWellFormed(
isDict(obj),
'page dictionary kid reference points to wrong type of object'
);
this.traverseKids(obj);
}
}
},
get destinations() {
function fetchDestination(dest) {
return isDict(dest) ? dest.get('D') : dest;
@ -346,13 +335,53 @@ var Catalog = (function CatalogClosure() {
}
return shadow(this, 'javaScript', javaScript);
},
getPage: function Catalog_getPage(n) {
var pageCache = this.pageCache;
if (!pageCache) {
pageCache = this.pageCache = [];
this.traverseKids(this.toplevelPagesDict);
getPage: function Catalog_getPage(pageIndex) {
if (!(pageIndex in this.pagePromises)) {
this.pagePromises[pageIndex] = new PDFJS.Promise();
}
return this.pagePromises[pageIndex];
},
// Traverses pages in DFS order so that pages are processed in increasing
// order
traversePages: function Catalog_traversePages() {
var queue = this.traversePagesQueue;
while (queue.length) {
var queueItem = queue[queue.length - 1];
var pagesDict = queueItem.pagesDict;
var kids = pagesDict.get('Kids');
assert(isArray(kids), 'page dictionary kids object is not an array');
if (queueItem.posInKids >= kids.length) {
queue.pop();
continue;
}
var kidRef = kids[queueItem.posInKids];
assert(isRef(kidRef), 'page dictionary kid is not a reference');
var kid = this.xref.fetch(kidRef);
if (isDict(kid, 'Page') || (isDict(kid) && !kid.has('Kids'))) {
var pageIndex = this.currPageIndex++;
var page = new Page(this.xref, pageIndex, kid, kidRef);
if (!(pageIndex in this.pagePromises)) {
this.pagePromises[pageIndex] = new PDFJS.Promise();
}
this.pagePromises[pageIndex].resolve(page);
} else { // must be a child page dictionary
assert(
isDict(kid),
'page dictionary kid reference points to wrong type of object'
);
queue.push({
pagesDict: kid,
posInKids: 0
});
}
++queueItem.posInKids;
}
return this.pageCache[n - 1];
}
};
@ -360,75 +389,60 @@ var Catalog = (function CatalogClosure() {
})();
var XRef = (function XRefClosure() {
function XRef(stream, startXRef, mainXRefEntriesOffset, password) {
function XRef(stream, password) {
this.stream = stream;
this.entries = [];
this.xrefstms = {};
var trailerDict = this.readXRef(startXRef);
trailerDict.assignXref(this);
this.trailer = trailerDict;
// prepare the XRef cache
this.cache = [];
var encrypt = trailerDict.get('Encrypt');
if (encrypt) {
var ids = trailerDict.get('ID');
var fileId = (ids && ids.length) ? ids[0] : '';
this.encrypt = new CipherTransformFactory(encrypt, fileId, password);
}
// get the root dictionary (catalog) object
if (!(this.root = trailerDict.get('Root')))
error('Invalid root reference');
this.password = password;
}
XRef.prototype = {
readXRefTable: function XRef_readXRefTable(parser) {
// Example of cross-reference table:
// xref
// 0 1 <-- subsection header (first obj #, obj count)
// 0000000000 65535 f <-- actual object (offset, generation #, f/n)
// 23 2 <-- subsection header ... and so on ...
// 0000025518 00002 n
// 0000025635 00000 n
// trailer
// ...
setStartXRef: function XRef_setStartXRef(startXRef) {
// Store the starting positions of xref tables as we process them
// so we can recover from missing data errors
this.startXRefQueue = [startXRef];
},
// Outer loop is over subsection headers
var obj;
while (!isCmd(obj = parser.getObj(), 'trailer')) {
var first = obj,
count = parser.getObj();
if (!isInt(first) || !isInt(count))
error('Invalid XRef table: wrong types in subsection header');
// Inner loop is over objects themselves
for (var i = 0; i < count; i++) {
var entry = {};
entry.offset = parser.getObj();
entry.gen = parser.getObj();
var type = parser.getObj();
if (isCmd(type, 'f'))
entry.free = true;
else if (isCmd(type, 'n'))
entry.uncompressed = true;
// Validate entry obj
if (!isInt(entry.offset) || !isInt(entry.gen) ||
!(entry.free || entry.uncompressed)) {
error('Invalid entry in XRef subsection: ' + first + ', ' + count);
}
if (!this.entries[i + first])
this.entries[i + first] = entry;
}
parse: function XRef_parse(recoveryMode) {
var trailerDict;
if (!recoveryMode) {
trailerDict = this.readXRef();
} else {
warn('Indexing all PDF objects');
trailerDict = this.indexObjects();
}
trailerDict.assignXref(this);
this.trailer = trailerDict;
var encrypt = trailerDict.get('Encrypt');
if (encrypt) {
var ids = trailerDict.get('ID');
var fileId = (ids && ids.length) ? ids[0] : '';
this.encrypt = new CipherTransformFactory(
encrypt, fileId, this.password);
}
// Sanity check: as per spec, first object must be free
if (this.entries[0] && !this.entries[0].free)
error('Invalid XRef table: unexpected first object');
// get the root dictionary (catalog) object
if (!(this.root = trailerDict.get('Root'))) {
error('Invalid root reference');
}
},
processXRefTable: function XRef_processXRefTable(parser) {
if (!('tableState' in this)) {
// Stores state of the table as we process it so we can resume
// from middle of table in case of missing data error
this.tableState = {
entryNum: 0,
streamPos: parser.lexer.stream.pos,
parserBuf1: parser.buf1,
parserBuf2: parser.buf2
};
}
var obj = this.readXRefTable(parser);
// Sanity check
if (!isCmd(obj, 'trailer'))
@ -447,27 +461,140 @@ var XRef = (function XRefClosure() {
if (!isDict(dict))
error('Invalid XRef table: could not parse trailer dictionary');
delete this.tableState;
return dict;
},
readXRefTable: function XRef_readXRefTable(parser) {
// Example of cross-reference table:
// xref
// 0 1 <-- subsection header (first obj #, obj count)
// 0000000000 65535 f <-- actual object (offset, generation #, f/n)
// 23 2 <-- subsection header ... and so on ...
// 0000025518 00002 n
// 0000025635 00000 n
// trailer
// ...
var stream = parser.lexer.stream;
var tableState = this.tableState;
stream.pos = tableState.streamPos;
parser.buf1 = tableState.parserBuf1;
parser.buf2 = tableState.parserBuf2;
// Outer loop is over subsection headers
var obj;
while (true) {
if (!('firstEntryNum' in tableState) || !('entryCount' in tableState)) {
if (isCmd(obj = parser.getObj(), 'trailer')) {
break;
}
tableState.firstEntryNum = obj;
tableState.entryCount = parser.getObj();
}
var first = tableState.firstEntryNum;
var count = tableState.entryCount;
if (!isInt(first) || !isInt(count))
error('Invalid XRef table: wrong types in subsection header');
// Inner loop is over objects themselves
for (var i = tableState.entryNum; i < count; i++) {
tableState.streamPos = stream.pos;
tableState.entryNum = i;
tableState.parserBuf1 = parser.buf1;
tableState.parserBuf2 = parser.buf2;
var entry = {};
entry.offset = parser.getObj();
entry.gen = parser.getObj();
var type = parser.getObj();
if (isCmd(type, 'f'))
entry.free = true;
else if (isCmd(type, 'n'))
entry.uncompressed = true;
// Validate entry obj
if (!isInt(entry.offset) || !isInt(entry.gen) ||
!(entry.free || entry.uncompressed)) {
console.log(entry.offset, entry.gen, entry.free,
entry.uncompressed);
error('Invalid entry in XRef subsection: ' + first + ', ' + count);
}
if (!this.entries[i + first])
this.entries[i + first] = entry;
}
tableState.entryNum = 0;
tableState.streamPos = stream.pos;
tableState.parserBuf1 = parser.buf1;
tableState.parserBuf2 = parser.buf2;
delete tableState.firstEntryNum;
delete tableState.entryCount;
}
// Sanity check: as per spec, first object must be free
if (this.entries[0] && !this.entries[0].free)
error('Invalid XRef table: unexpected first object');
return obj;
},
processXRefStream: function XRef_processXRefStream(stream) {
if (!('streamState' in this)) {
// Stores state of the stream as we process it so we can resume
// from middle of stream in case of missing data error
var streamParameters = stream.parameters;
var byteWidths = streamParameters.get('W');
var range = streamParameters.get('Index');
if (!range) {
range = [0, streamParameters.get('Size')];
}
this.streamState = {
entryRanges: range,
byteWidths: byteWidths,
entryNum: 0,
streamPos: stream.pos
};
}
this.readXRefStream(stream);
delete this.streamState;
return stream.parameters;
},
readXRefStream: function XRef_readXRefStream(stream) {
var streamParameters = stream.parameters;
var byteWidths = streamParameters.get('W');
var range = streamParameters.get('Index');
if (!range)
range = [0, streamParameters.get('Size')];
var i, j;
while (range.length > 0) {
var first = range[0], n = range[1];
var streamState = this.streamState;
stream.pos = streamState.streamPos;
var byteWidths = streamState.byteWidths;
var typeFieldWidth = byteWidths[0];
var offsetFieldWidth = byteWidths[1];
var generationFieldWidth = byteWidths[2];
var entryRanges = streamState.entryRanges;
while (entryRanges.length > 0) {
var first = entryRanges[0];
var n = entryRanges[1];
if (!isInt(first) || !isInt(n))
error('Invalid XRef range fields: ' + first + ', ' + n);
var typeFieldWidth = byteWidths[0];
var offsetFieldWidth = byteWidths[1];
var generationFieldWidth = byteWidths[2];
if (!isInt(typeFieldWidth) || !isInt(offsetFieldWidth) ||
!isInt(generationFieldWidth)) {
error('Invalid XRef entry fields length: ' + first + ', ' + n);
}
for (i = 0; i < n; ++i) {
for (i = streamState.entryNum; i < n; ++i) {
streamState.entryNum = i;
streamState.streamPos = stream.pos;
var type = 0, offset = 0, generation = 0;
for (j = 0; j < typeFieldWidth; ++j)
type = (type << 8) | stream.getByte();
@ -496,9 +623,11 @@ var XRef = (function XRefClosure() {
if (!this.entries[first + i])
this.entries[first + i] = entry;
}
range.splice(0, 2);
streamState.entryNum = 0;
streamState.streamPos = stream.pos;
entryRanges.splice(0, 2);
}
return streamParameters;
},
indexObjects: function XRef_indexObjects() {
// Simple scan through the PDF content to find objects,
@ -586,7 +715,8 @@ var XRef = (function XRefClosure() {
}
// reading XRef streams
for (var i = 0, ii = xrefStms.length; i < ii; ++i) {
this.readXRef(xrefStms[i], true);
this.startXRefQueue.push(xrefStms[i]);
this.readXRef(/* recoveryMode */ true);
}
// finding main trailer
var dict;
@ -610,64 +740,84 @@ var XRef = (function XRefClosure() {
// calling error() would reject worker with an UnknownErrorException.
throw new InvalidPDFException('Invalid PDF structure');
},
readXRef: function XRef_readXRef(startXRef, recoveryMode) {
readXRef: function XRef_readXRef(recoveryMode) {
var stream = this.stream;
stream.pos = startXRef;
try {
var parser = new Parser(new Lexer(stream), true, null);
var obj = parser.getObj();
var dict;
while (this.startXRefQueue.length) {
var startXRef = this.startXRefQueue[0];
// Get dictionary
if (isCmd(obj, 'xref')) {
// Parse end-of-file XRef
dict = this.readXRefTable(parser);
stream.pos = startXRef;
// Recursively get other XRefs 'XRefStm', if any
obj = dict.get('XRefStm');
if (isInt(obj)) {
var pos = obj;
// ignore previously loaded xref streams
// (possible infinite recursion)
if (!(pos in this.xrefstms)) {
this.xrefstms[pos] = 1;
this.readXRef(pos);
var parser = new Parser(new Lexer(stream), true, null);
var obj = parser.getObj();
var dict;
// Get dictionary
if (isCmd(obj, 'xref')) {
// Parse end-of-file XRef
dict = this.processXRefTable(parser);
if (!this.topDict) {
this.topDict = dict;
}
// Recursively get other XRefs 'XRefStm', if any
obj = dict.get('XRefStm');
if (isInt(obj)) {
var pos = obj;
// ignore previously loaded xref streams
// (possible infinite recursion)
if (!(pos in this.xrefstms)) {
this.xrefstms[pos] = 1;
this.startXRefQueue.push(pos);
}
}
} else if (isInt(obj)) {
// Parse in-stream XRef
if (!isInt(parser.getObj()) ||
!isCmd(parser.getObj(), 'obj') ||
!isStream(obj = parser.getObj())) {
error('Invalid XRef stream');
}
dict = this.processXRefStream(obj);
if (!this.topDict) {
this.topDict = dict;
}
if (!dict)
error('Failed to read XRef stream');
}
} else if (isInt(obj)) {
// Parse in-stream XRef
if (!isInt(parser.getObj()) ||
!isCmd(parser.getObj(), 'obj') ||
!isStream(obj = parser.getObj())) {
error('Invalid XRef stream');
// Recursively get previous dictionary, if any
obj = dict.get('Prev');
if (isInt(obj)) {
this.startXRefQueue.push(obj);
} else if (isRef(obj)) {
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.startXRefQueue.push(obj.num);
}
dict = this.readXRefStream(obj);
if (!dict)
error('Failed to read XRef stream');
this.startXRefQueue.shift();
}
// Recursively get previous dictionary, if any
obj = dict.get('Prev');
if (isInt(obj))
this.readXRef(obj, recoveryMode);
else if (isRef(obj)) {
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.readXRef(obj.num, recoveryMode);
}
return dict;
return this.topDict;
} catch (e) {
if (e instanceof MissingDataException) {
throw e;
}
log('(while reading XRef): ' + e);
}
if (recoveryMode)
return;
warn('Indexing all PDF objects');
return this.indexObjects();
throw new XRefParseException();
},
getEntry: function XRef_getEntry(i) {
var e = this.entries[i];
if (e === null)

180
src/pdf_manager.js Normal file
View file

@ -0,0 +1,180 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals NotImplementedException, MissingDataException, PDFJS, Stream,
PDFDocument, ChunkedStream, ChunkedStreamManager */
'use strict';
// TODO(mack): Make use of PDFJS.Util.inherit() when it becomes available
var BasePdfManager = (function BasePdfManagerClosure() {
function BasePdfManager() {
throw new Error('Cannot initialize BaseManagerManager');
}
BasePdfManager.prototype = {
onLoadedStream: function BasePdfManager_onLoadedStream() {
throw new NotImplementedException();
},
ensureModel: function BasePdfManager_ensureModel(prop) {
var args = [].slice.call(arguments);
args.unshift(this.pdfModel);
return this.ensure.apply(this, args);
},
ensureXRef: function BasePdfManager_ensureXRef(prop) {
var args = [].slice.call(arguments);
args.unshift(this.pdfModel.xref);
return this.ensure.apply(this, args);
},
ensureCatalog: function BasePdfManager_ensureCatalog(prop) {
var args = [].slice.call(arguments);
args.unshift(this.pdfModel.catalog);
return this.ensure.apply(this, args);
},
getPage: function BasePdfManager_pagePage(pageIndex) {
return this.pdfModel.getPage(pageIndex);
},
ensure: function BasePdfManager_ensure(obj, prop) {
return new NotImplementedException();
},
requestLoadedStream: function BasePdfManager_requestLoadedStream() {
return new NotImplementedException();
}
};
return BasePdfManager;
})();
var LocalPdfManager = (function LocalPdfManagerClosure() {
function LocalPdfManager(data, password) {
var stream = new Stream(data);
this.pdfModel = new PDFDocument(stream, password);
this.loadedStream = new PDFJS.Promise();
this.loadedStream.resolve(stream);
}
LocalPdfManager.prototype = Object.create(BasePdfManager.prototype);
LocalPdfManager.prototype.constructor = LocalPdfManager;
LocalPdfManager.prototype.ensure =
function LocalPdfManager_ensure(obj, prop) {
var promise = new PDFJS.Promise();
var result;
var value = obj[prop];
try {
if (typeof(value) === 'function') {
var args = [].slice.call(arguments, 2);
result = value.apply(obj, args);
} else {
result = value;
}
promise.resolve(result);
} catch (e) {
console.log(e.stack);
promise.reject(e);
}
return promise;
};
LocalPdfManager.prototype.requestLoadedStream =
function LocalPdfManager_requestLoadedStream() {
};
LocalPdfManager.prototype.onLoadedStream =
function LocalPdfManager_getLoadedStream() {
return this.loadedStream;
};
return LocalPdfManager;
})();
var NetworkPdfManager = (function NetworkPdfManagerClosure() {
var CHUNK_SIZE = 64000;
function NetworkPdfManager(args, msgHandler) {
this.msgHandler = msgHandler;
var params = {
msgHandler: msgHandler,
httpHeaders: args.httpHeaders,
chunkedViewerLoading: args.chunkedViewerLoading
};
this.streamManager = new ChunkedStreamManager(args.length, CHUNK_SIZE,
args.url, params);
this.pdfModel = new PDFDocument(this.streamManager.getStream(),
args.password);
}
NetworkPdfManager.prototype = Object.create(BasePdfManager.prototype);
NetworkPdfManager.prototype.constructor = NetworkPdfManager;
NetworkPdfManager.prototype.ensure =
function NetworkPdfManager_ensure(obj, prop) {
var promise = new PDFJS.Promise();
var args = [].slice.call(arguments);
args.unshift(promise);
this.ensureHelper.apply(this, args);
return promise;
};
NetworkPdfManager.prototype.ensureHelper =
function NetworkPdfManager_ensureHelper(promise, obj, prop) {
try {
var result;
var value = obj[prop];
if (typeof(value) === 'function') {
var args = [].slice.call(arguments, 3);
result = value.apply(obj, args);
} else {
result = value;
}
promise.resolve(result);
} catch(e) {
if (!(e instanceof MissingDataException)) {
console.log(e.stack);
promise.reject(e);
return;
}
var allArgs = Array.prototype.slice.call(arguments);
this.streamManager.requestRange(e.begin, e.end, function() {
this.ensureHelper.apply(this, allArgs);
}.bind(this));
}
};
NetworkPdfManager.prototype.requestLoadedStream =
function NetworkPdfManager_requestLoadedStream() {
this.streamManager.requestAllChunks();
};
NetworkPdfManager.prototype.onLoadedStream =
function NetworkPdfManager_getLoadedStream() {
return this.streamManager.onLoadedStream();
};
return NetworkPdfManager;
})();

View file

@ -189,6 +189,45 @@ var MissingPDFException = (function MissingPDFExceptionClosure() {
return MissingPDFException;
})();
var NotImplementedException = (function NotImplementedExceptionClosure() {
function NotImplementedException(msg) {
this.message = msg;
}
NotImplementedException.prototype = new Error();
NotImplementedException.prototype.name = 'NotImplementedException';
NotImplementedException.constructor = NotImplementedException;
return NotImplementedException;
})();
var MissingDataException = (function MissingDataExceptionClosure() {
function MissingDataException(begin, end) {
this.begin = begin;
this.end = end;
this.message = 'Missing data [begin, end)';
}
MissingDataException.prototype = new Error();
MissingDataException.prototype.name = 'MissingDataException';
MissingDataException.constructor = MissingDataException;
return MissingDataException;
})();
var XRefParseException = (function XRefParseExceptionClosure() {
function XRefParseException(msg) {
this.message = msg;
}
XRefParseException.prototype = new Error();
XRefParseException.prototype.name = 'XRefParseException';
XRefParseException.constructor = XRefParseException;
return XRefParseException;
})();
function bytesToString(bytes) {
var str = '';
var length = bytes.length;
@ -482,6 +521,13 @@ function stringToUTF8String(str) {
return decodeURIComponent(escape(str));
}
function isEmptyObj(obj) {
for (var key in obj) {
return false;
}
return true;
}
function isBool(v) {
return typeof v == 'boolean';
}

View file

@ -16,7 +16,9 @@
*/
/* globals error, globalScope, InvalidPDFException, log,
MissingPDFException, PasswordException, PDFDocument, PDFJS, Promise,
Stream, UnknownErrorException, warn */
Stream, UnknownErrorException, warn, NetworkManager, LocalPdfManager,
NetworkPdfManager, XRefParseException, NotImplementedException,
isInt */
'use strict';
@ -107,58 +109,124 @@ MessageHandler.prototype = {
var WorkerMessageHandler = {
setup: function wphSetup(handler) {
var pdfModel = null;
var pdfManager;
function loadDocument(pdfData, pdfModelSource) {
// Create only the model of the PDFDoc, which is enough for
// processing the content of the pdf.
var pdfPassword = pdfModelSource.password;
try {
pdfModel = new PDFDocument(new Stream(pdfData), pdfPassword);
} catch (e) {
if (e instanceof PasswordException) {
if (e.code === 'needpassword') {
handler.send('NeedPassword', {
exception: e
});
} else if (e.code === 'incorrectpassword') {
handler.send('IncorrectPassword', {
exception: e
});
function loadDocument(recoveryMode) {
var loadDocumentPromise = new PDFJS.Promise();
var parseSuccess = function parseSuccess() {
var numPagesPromise = pdfManager.ensureModel('numPages');
var fingerprintPromise = pdfManager.ensureModel('fingerprint');
var outlinePromise = pdfManager.ensureCatalog('documentOutline');
var infoPromise = pdfManager.ensureModel('documentInfo');
var metadataPromise = pdfManager.ensureCatalog('metadata');
var encryptedPromise = pdfManager.ensureXRef('encrypt');
var javaScriptPromise = pdfManager.ensureCatalog('javaScript');
PDFJS.Promise.all([numPagesPromise, fingerprintPromise, outlinePromise,
infoPromise, metadataPromise, encryptedPromise,
javaScriptPromise]).then(
function onDocReady(results) {
var doc = {
numPages: results[0],
fingerprint: results[1],
outline: results[2],
info: results[3],
metadata: results[4],
encrypted: !!results[5],
javaScript: results[6]
};
loadDocumentPromise.resolve(doc);
});
};
var parseFailure = function parseFailure(e) {
loadDocumentPromise.reject(e);
};
pdfManager.ensureModel('checkHeader').then(function() {
pdfManager.ensureModel('parseStartXRef').then(function() {
pdfManager.ensureModel('parse', recoveryMode).then(
parseSuccess, parseFailure);
});
});
return loadDocumentPromise;
}
function getPdfManager(data) {
var pdfManagerPromise = new PDFJS.Promise();
var source = data.source;
var disableRange = data.disableRange;
if (source.data) {
pdfManager = new LocalPdfManager(source.data, source.password);
pdfManagerPromise.resolve();
return pdfManagerPromise;
} else if (source.chunkedViewerLoading) {
pdfManager = new NetworkPdfManager(source, handler);
pdfManagerPromise.resolve();
return pdfManagerPromise;
}
var networkManager = new NetworkManager(source.url, {
httpHeaders: source.httpHeaders
});
var fullRequestXhrId = networkManager.requestFull({
onHeadersReceived: function onHeadersReceived() {
if (disableRange) {
return;
}
return;
} else if (e instanceof InvalidPDFException) {
handler.send('InvalidPDF', {
exception: e
});
var fullRequestXhr = networkManager.getRequestXhr(fullRequestXhrId);
if (fullRequestXhr.getResponseHeader('Accept-Ranges') !== 'bytes') {
return;
}
return;
} else if (e instanceof MissingPDFException) {
handler.send('MissingPDF', {
exception: e
});
var length = fullRequestXhr.getResponseHeader('Content-Length');
length = parseInt(length, 10);
if (!isInt(length)) {
return;
}
return;
} else {
handler.send('UnknownError', {
exception: new UnknownErrorException(e.message, e.toString())
});
// NOTE: by cancelling the full request, and then issuing range
// requests, there will be an issue for sites where you can only
// request the pdf once. However, if this is the case, then the
// server should not be returning that it can support range requests.
networkManager.abortRequest(fullRequestXhrId);
return;
source.length = length;
pdfManager = new NetworkPdfManager(source, handler);
pdfManagerPromise.resolve(pdfManager);
},
onDone: function onDone(args) {
// the data is array, instantiating directly from it
pdfManager = new LocalPdfManager(args.chunk, source.password);
pdfManagerPromise.resolve();
},
onError: function onError(status) {
if (status == 404) {
var exception = new MissingPDFException( 'Missing PDF "' +
source.url + '".');
handler.send('MissingPDF', { exception: exception });
} else {
handler.send('DocError', 'Unexpected server response (' +
status + ') while retrieving PDF "' +
source.url + '".');
}
},
onProgress: function onProgress(evt) {
handler.send('DocProgress', {
loaded: evt.loaded,
total: evt.lengthComputable ? evt.total : void(0)
});
}
}
var doc = {
numPages: pdfModel.numPages,
fingerprint: pdfModel.getFingerprint(),
destinations: pdfModel.catalog.destinations,
javaScript: pdfModel.catalog.javaScript,
outline: pdfModel.catalog.documentOutline,
info: pdfModel.getDocumentInfo(),
metadata: pdfModel.catalog.metadata,
encrypted: !!pdfModel.xref.encrypt
};
handler.send('GetDoc', {pdfInfo: doc});
});
return pdfManagerPromise;
}
handler.on('test', function wphSetupTest(data) {
@ -184,140 +252,183 @@ var WorkerMessageHandler = {
});
handler.on('GetDocRequest', function wphSetupDoc(data) {
var source = data.source;
if (source.data) {
// the data is array, instantiating directly from it
loadDocument(source.data, source);
return;
}
PDFJS.getPdf(
{
url: source.url,
progress: function getPDFProgress(evt) {
handler.send('DocProgress', {
loaded: evt.loaded,
total: evt.lengthComputable ? evt.total : void(0)
var onSuccess = function(doc) {
handler.send('GetDoc', { pdfInfo: doc });
pdfManager.ensureModel('traversePages');
};
var onFailure = function(e) {
if (e instanceof PasswordException) {
if (e.code === 'needpassword') {
handler.send('NeedPassword', {
exception: e
});
},
error: function getPDFError(e) {
if (e.target.status == 404) {
handler.send('MissingPDF', {
exception: new MissingPDFException(
'Missing PDF \"' + source.url + '\".')});
} else {
handler.send('DocError', 'Unexpected server response (' +
e.target.status + ') while retrieving PDF \"' +
source.url + '\".');
}
},
headers: source.httpHeaders
},
function getPDFLoad(data) {
loadDocument(data, source);
} else if (e.code === 'incorrectpassword') {
handler.send('IncorrectPassword', {
exception: e
});
}
} else if (e instanceof InvalidPDFException) {
handler.send('InvalidPDF', {
exception: e
});
} else if (e instanceof MissingPDFException) {
handler.send('MissingPDF', {
exception: e
});
} else {
handler.send('UnknownError', {
exception: new UnknownErrorException(e.message, e.toString())
});
}
};
getPdfManager(data).then(function() {
loadDocument(false).then(onSuccess, function(ex) {
// Try again with recoveryMode == true
if (!(ex instanceof XRefParseException)) {
onFailure(ex);
return;
}
pdfManager.onLoadedStream().then(function() {
loadDocument(true).then(onSuccess, onFailure);
});
});
});
});
handler.on('GetPageRequest', function wphSetupGetPage(data) {
var pageNumber = data.pageIndex + 1;
var pdfPage = pdfModel.getPage(pageNumber);
var page = {
pageIndex: data.pageIndex,
rotate: pdfPage.rotate,
ref: pdfPage.ref,
view: pdfPage.view
};
handler.send('GetPage', {pageInfo: page});
var pageIndex = data.pageIndex;
pdfManager.getPage(pageIndex).then(function(page) {
var rotatePromise = pdfManager.ensure(page, 'rotate');
var refPromise = pdfManager.ensure(page, 'ref');
var viewPromise = pdfManager.ensure(page, 'view');
PDFJS.Promise.all([rotatePromise, refPromise, viewPromise]).then(
function(results) {
var page = {
pageIndex: data.pageIndex,
rotate: results[0],
ref: results[1],
view: results[2]
};
handler.send('GetPage', { pageInfo: page });
});
});
});
handler.on('GetDestinations',
function wphSetupGetDestinations(data, promise) {
pdfManager.ensureCatalog('destinations').then(function(destinations) {
promise.resolve(destinations);
});
}
);
handler.on('GetData', function wphSetupGetData(data, promise) {
promise.resolve(pdfModel.stream.bytes);
pdfManager.requestLoadedStream();
pdfManager.onLoadedStream().then(function(stream) {
promise.resolve(stream.bytes);
});
});
handler.on('DataLoaded', function wphSetupDataLoaded(data, promise) {
pdfManager.onLoadedStream().then(function(stream) {
promise.resolve({ length: stream.bytes.byteLength });
});
});
handler.on('GetAnnotationsRequest', function wphSetupGetAnnotations(data) {
var pdfPage = pdfModel.getPage(data.pageIndex + 1);
handler.send('GetAnnotations', {
pageIndex: data.pageIndex,
annotations: pdfPage.getAnnotations()
pdfManager.getPage(data.pageIndex).then(function(page) {
pdfManager.ensure(page, 'getAnnotations').then(function(annotations) {
handler.send('GetAnnotations', {
pageIndex: data.pageIndex,
annotations: annotations
});
});
});
});
handler.on('RenderPageRequest', function wphSetupRenderPage(data) {
var pageNum = data.pageIndex + 1;
pdfManager.getPage(data.pageIndex).then(function(page) {
var start = Date.now();
var dependency = [];
var operatorList = null;
try {
var page = pdfModel.getPage(pageNum);
var pageNum = data.pageIndex + 1;
var start = Date.now();
var dependency = [];
// Pre compile the pdf page and fetch the fonts/images.
operatorList = page.getOperatorList(handler, dependency);
} catch (e) {
var minimumStackMessage =
'worker.js: while trying to getPage() and getOperatorList()';
pdfManager.ensure(page, 'getOperatorList', handler,
dependency).then(function(operatorList) {
var wrappedException;
// The following code does quite the same as
// Page.prototype.startRendering, but stops at one point and sends the
// result back to the main thread.
// Turn the error into an obj that can be serialized
if (typeof e === 'string') {
wrappedException = {
message: e,
stack: minimumStackMessage
};
} else if (typeof e === 'object') {
wrappedException = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage
};
} else {
wrappedException = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage
};
}
log('page=%d - getOperatorList: time=%dms, len=%d', pageNum,
Date.now() - start, operatorList.fnArray.length);
handler.send('PageError', {
pageNum: pageNum,
error: wrappedException
// Filter the dependecies for fonts.
var fonts = {};
for (var i = 0, ii = dependency.length; i < ii; i++) {
var dep = dependency[i];
if (dep.indexOf('g_font_') === 0) {
fonts[dep] = true;
}
}
handler.send('RenderPage', {
pageIndex: data.pageIndex,
operatorList: operatorList,
depFonts: Object.keys(fonts)
});
}, function(e) {
var minimumStackMessage =
'worker.js: while trying to getPage() and getOperatorList()';
var wrappedException;
// Turn the error into an obj that can be serialized
if (typeof e === 'string') {
wrappedException = {
message: e,
stack: minimumStackMessage
};
} else if (typeof e === 'object') {
wrappedException = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage
};
} else {
wrappedException = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage
};
}
handler.send('PageError', {
pageNum: pageNum,
error: wrappedException
});
});
return;
}
log('page=%d - getOperatorList: time=%dms, len=%d', pageNum,
Date.now() - start, operatorList.fnArray.length);
// Filter the dependecies for fonts.
var fonts = {};
for (var i = 0, ii = dependency.length; i < ii; i++) {
var dep = dependency[i];
if (dep.indexOf('g_font_') === 0) {
fonts[dep] = true;
}
}
handler.send('RenderPage', {
pageIndex: data.pageIndex,
operatorList: operatorList,
depFonts: Object.keys(fonts)
});
}, this);
handler.on('GetTextContent', function wphExtractText(data, promise) {
var pageNum = data.pageIndex + 1;
var start = Date.now();
var textContent = '';
try {
var page = pdfModel.getPage(pageNum);
textContent = page.extractTextContent();
promise.resolve(textContent);
} catch (e) {
// Skip errored pages
promise.reject(e);
}
log('text indexing: page=%d - time=%dms',
pageNum, Date.now() - start);
pdfManager.getPage(data.pageIndex).then(function(page) {
var pageNum = data.pageIndex + 1;
var start = Date.now();
pdfManager.ensure(page,
'extractTextContent').then(function(textContent) {
promise.resolve(textContent);
log('text indexing: page=%d - time=%dms', pageNum,
Date.now() - start);
}, function (e) {
// Skip errored pages
promise.reject(e);
});
});
});
}
};

View file

@ -19,6 +19,9 @@
// List of files to include;
var files = [
'network.js',
'chunked_stream.js',
'pdf_manager.js',
'core.js',
'util.js',
'canvas.js',