1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-26 01:58:06 +02:00

Implement progressive loading of PDFs

This commit is contained in:
Mack Duan 2013-02-06 15:19:29 -08:00
parent added3da8f
commit ef423ef30c
25 changed files with 2110 additions and 586 deletions

View file

@ -16,7 +16,9 @@
*/
/* globals error, globalScope, InvalidPDFException, log,
MissingPDFException, PasswordException, PDFDocument, PDFJS, Promise,
Stream, UnknownErrorException, warn */
Stream, UnknownErrorException, warn, NetworkManager, LocalPdfManager,
NetworkPdfManager, XRefParseException, NotImplementedException,
isInt */
'use strict';
@ -107,58 +109,124 @@ MessageHandler.prototype = {
var WorkerMessageHandler = {
setup: function wphSetup(handler) {
var pdfModel = null;
var pdfManager;
function loadDocument(pdfData, pdfModelSource) {
// Create only the model of the PDFDoc, which is enough for
// processing the content of the pdf.
var pdfPassword = pdfModelSource.password;
try {
pdfModel = new PDFDocument(new Stream(pdfData), pdfPassword);
} catch (e) {
if (e instanceof PasswordException) {
if (e.code === 'needpassword') {
handler.send('NeedPassword', {
exception: e
});
} else if (e.code === 'incorrectpassword') {
handler.send('IncorrectPassword', {
exception: e
});
function loadDocument(recoveryMode) {
var loadDocumentPromise = new PDFJS.Promise();
var parseSuccess = function parseSuccess() {
var numPagesPromise = pdfManager.ensureModel('numPages');
var fingerprintPromise = pdfManager.ensureModel('fingerprint');
var outlinePromise = pdfManager.ensureCatalog('documentOutline');
var infoPromise = pdfManager.ensureModel('documentInfo');
var metadataPromise = pdfManager.ensureCatalog('metadata');
var encryptedPromise = pdfManager.ensureXRef('encrypt');
var javaScriptPromise = pdfManager.ensureCatalog('javaScript');
PDFJS.Promise.all([numPagesPromise, fingerprintPromise, outlinePromise,
infoPromise, metadataPromise, encryptedPromise,
javaScriptPromise]).then(
function onDocReady(results) {
var doc = {
numPages: results[0],
fingerprint: results[1],
outline: results[2],
info: results[3],
metadata: results[4],
encrypted: !!results[5],
javaScript: results[6]
};
loadDocumentPromise.resolve(doc);
});
};
var parseFailure = function parseFailure(e) {
loadDocumentPromise.reject(e);
};
pdfManager.ensureModel('checkHeader').then(function() {
pdfManager.ensureModel('parseStartXRef').then(function() {
pdfManager.ensureModel('parse', recoveryMode).then(
parseSuccess, parseFailure);
});
});
return loadDocumentPromise;
}
function getPdfManager(data) {
var pdfManagerPromise = new PDFJS.Promise();
var source = data.source;
var disableRange = data.disableRange;
if (source.data) {
pdfManager = new LocalPdfManager(source.data, source.password);
pdfManagerPromise.resolve();
return pdfManagerPromise;
} else if (source.chunkedViewerLoading) {
pdfManager = new NetworkPdfManager(source, handler);
pdfManagerPromise.resolve();
return pdfManagerPromise;
}
var networkManager = new NetworkManager(source.url, {
httpHeaders: source.httpHeaders
});
var fullRequestXhrId = networkManager.requestFull({
onHeadersReceived: function onHeadersReceived() {
if (disableRange) {
return;
}
return;
} else if (e instanceof InvalidPDFException) {
handler.send('InvalidPDF', {
exception: e
});
var fullRequestXhr = networkManager.getRequestXhr(fullRequestXhrId);
if (fullRequestXhr.getResponseHeader('Accept-Ranges') !== 'bytes') {
return;
}
return;
} else if (e instanceof MissingPDFException) {
handler.send('MissingPDF', {
exception: e
});
var length = fullRequestXhr.getResponseHeader('Content-Length');
length = parseInt(length, 10);
if (!isInt(length)) {
return;
}
return;
} else {
handler.send('UnknownError', {
exception: new UnknownErrorException(e.message, e.toString())
});
// NOTE: by cancelling the full request, and then issuing range
// requests, there will be an issue for sites where you can only
// request the pdf once. However, if this is the case, then the
// server should not be returning that it can support range requests.
networkManager.abortRequest(fullRequestXhrId);
return;
source.length = length;
pdfManager = new NetworkPdfManager(source, handler);
pdfManagerPromise.resolve(pdfManager);
},
onDone: function onDone(args) {
// the data is array, instantiating directly from it
pdfManager = new LocalPdfManager(args.chunk, source.password);
pdfManagerPromise.resolve();
},
onError: function onError(status) {
if (status == 404) {
var exception = new MissingPDFException( 'Missing PDF "' +
source.url + '".');
handler.send('MissingPDF', { exception: exception });
} else {
handler.send('DocError', 'Unexpected server response (' +
status + ') while retrieving PDF "' +
source.url + '".');
}
},
onProgress: function onProgress(evt) {
handler.send('DocProgress', {
loaded: evt.loaded,
total: evt.lengthComputable ? evt.total : void(0)
});
}
}
var doc = {
numPages: pdfModel.numPages,
fingerprint: pdfModel.getFingerprint(),
destinations: pdfModel.catalog.destinations,
javaScript: pdfModel.catalog.javaScript,
outline: pdfModel.catalog.documentOutline,
info: pdfModel.getDocumentInfo(),
metadata: pdfModel.catalog.metadata,
encrypted: !!pdfModel.xref.encrypt
};
handler.send('GetDoc', {pdfInfo: doc});
});
return pdfManagerPromise;
}
handler.on('test', function wphSetupTest(data) {
@ -184,140 +252,183 @@ var WorkerMessageHandler = {
});
handler.on('GetDocRequest', function wphSetupDoc(data) {
var source = data.source;
if (source.data) {
// the data is array, instantiating directly from it
loadDocument(source.data, source);
return;
}
PDFJS.getPdf(
{
url: source.url,
progress: function getPDFProgress(evt) {
handler.send('DocProgress', {
loaded: evt.loaded,
total: evt.lengthComputable ? evt.total : void(0)
var onSuccess = function(doc) {
handler.send('GetDoc', { pdfInfo: doc });
pdfManager.ensureModel('traversePages');
};
var onFailure = function(e) {
if (e instanceof PasswordException) {
if (e.code === 'needpassword') {
handler.send('NeedPassword', {
exception: e
});
},
error: function getPDFError(e) {
if (e.target.status == 404) {
handler.send('MissingPDF', {
exception: new MissingPDFException(
'Missing PDF \"' + source.url + '\".')});
} else {
handler.send('DocError', 'Unexpected server response (' +
e.target.status + ') while retrieving PDF \"' +
source.url + '\".');
}
},
headers: source.httpHeaders
},
function getPDFLoad(data) {
loadDocument(data, source);
} else if (e.code === 'incorrectpassword') {
handler.send('IncorrectPassword', {
exception: e
});
}
} else if (e instanceof InvalidPDFException) {
handler.send('InvalidPDF', {
exception: e
});
} else if (e instanceof MissingPDFException) {
handler.send('MissingPDF', {
exception: e
});
} else {
handler.send('UnknownError', {
exception: new UnknownErrorException(e.message, e.toString())
});
}
};
getPdfManager(data).then(function() {
loadDocument(false).then(onSuccess, function(ex) {
// Try again with recoveryMode == true
if (!(ex instanceof XRefParseException)) {
onFailure(ex);
return;
}
pdfManager.onLoadedStream().then(function() {
loadDocument(true).then(onSuccess, onFailure);
});
});
});
});
handler.on('GetPageRequest', function wphSetupGetPage(data) {
var pageNumber = data.pageIndex + 1;
var pdfPage = pdfModel.getPage(pageNumber);
var page = {
pageIndex: data.pageIndex,
rotate: pdfPage.rotate,
ref: pdfPage.ref,
view: pdfPage.view
};
handler.send('GetPage', {pageInfo: page});
var pageIndex = data.pageIndex;
pdfManager.getPage(pageIndex).then(function(page) {
var rotatePromise = pdfManager.ensure(page, 'rotate');
var refPromise = pdfManager.ensure(page, 'ref');
var viewPromise = pdfManager.ensure(page, 'view');
PDFJS.Promise.all([rotatePromise, refPromise, viewPromise]).then(
function(results) {
var page = {
pageIndex: data.pageIndex,
rotate: results[0],
ref: results[1],
view: results[2]
};
handler.send('GetPage', { pageInfo: page });
});
});
});
handler.on('GetDestinations',
function wphSetupGetDestinations(data, promise) {
pdfManager.ensureCatalog('destinations').then(function(destinations) {
promise.resolve(destinations);
});
}
);
handler.on('GetData', function wphSetupGetData(data, promise) {
promise.resolve(pdfModel.stream.bytes);
pdfManager.requestLoadedStream();
pdfManager.onLoadedStream().then(function(stream) {
promise.resolve(stream.bytes);
});
});
handler.on('DataLoaded', function wphSetupDataLoaded(data, promise) {
pdfManager.onLoadedStream().then(function(stream) {
promise.resolve({ length: stream.bytes.byteLength });
});
});
handler.on('GetAnnotationsRequest', function wphSetupGetAnnotations(data) {
var pdfPage = pdfModel.getPage(data.pageIndex + 1);
handler.send('GetAnnotations', {
pageIndex: data.pageIndex,
annotations: pdfPage.getAnnotations()
pdfManager.getPage(data.pageIndex).then(function(page) {
pdfManager.ensure(page, 'getAnnotations').then(function(annotations) {
handler.send('GetAnnotations', {
pageIndex: data.pageIndex,
annotations: annotations
});
});
});
});
handler.on('RenderPageRequest', function wphSetupRenderPage(data) {
var pageNum = data.pageIndex + 1;
pdfManager.getPage(data.pageIndex).then(function(page) {
var start = Date.now();
var dependency = [];
var operatorList = null;
try {
var page = pdfModel.getPage(pageNum);
var pageNum = data.pageIndex + 1;
var start = Date.now();
var dependency = [];
// Pre compile the pdf page and fetch the fonts/images.
operatorList = page.getOperatorList(handler, dependency);
} catch (e) {
var minimumStackMessage =
'worker.js: while trying to getPage() and getOperatorList()';
pdfManager.ensure(page, 'getOperatorList', handler,
dependency).then(function(operatorList) {
var wrappedException;
// The following code does quite the same as
// Page.prototype.startRendering, but stops at one point and sends the
// result back to the main thread.
// Turn the error into an obj that can be serialized
if (typeof e === 'string') {
wrappedException = {
message: e,
stack: minimumStackMessage
};
} else if (typeof e === 'object') {
wrappedException = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage
};
} else {
wrappedException = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage
};
}
log('page=%d - getOperatorList: time=%dms, len=%d', pageNum,
Date.now() - start, operatorList.fnArray.length);
handler.send('PageError', {
pageNum: pageNum,
error: wrappedException
// Filter the dependecies for fonts.
var fonts = {};
for (var i = 0, ii = dependency.length; i < ii; i++) {
var dep = dependency[i];
if (dep.indexOf('g_font_') === 0) {
fonts[dep] = true;
}
}
handler.send('RenderPage', {
pageIndex: data.pageIndex,
operatorList: operatorList,
depFonts: Object.keys(fonts)
});
}, function(e) {
var minimumStackMessage =
'worker.js: while trying to getPage() and getOperatorList()';
var wrappedException;
// Turn the error into an obj that can be serialized
if (typeof e === 'string') {
wrappedException = {
message: e,
stack: minimumStackMessage
};
} else if (typeof e === 'object') {
wrappedException = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage
};
} else {
wrappedException = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage
};
}
handler.send('PageError', {
pageNum: pageNum,
error: wrappedException
});
});
return;
}
log('page=%d - getOperatorList: time=%dms, len=%d', pageNum,
Date.now() - start, operatorList.fnArray.length);
// Filter the dependecies for fonts.
var fonts = {};
for (var i = 0, ii = dependency.length; i < ii; i++) {
var dep = dependency[i];
if (dep.indexOf('g_font_') === 0) {
fonts[dep] = true;
}
}
handler.send('RenderPage', {
pageIndex: data.pageIndex,
operatorList: operatorList,
depFonts: Object.keys(fonts)
});
}, this);
handler.on('GetTextContent', function wphExtractText(data, promise) {
var pageNum = data.pageIndex + 1;
var start = Date.now();
var textContent = '';
try {
var page = pdfModel.getPage(pageNum);
textContent = page.extractTextContent();
promise.resolve(textContent);
} catch (e) {
// Skip errored pages
promise.reject(e);
}
log('text indexing: page=%d - time=%dms',
pageNum, Date.now() - start);
pdfManager.getPage(data.pageIndex).then(function(page) {
var pageNum = data.pageIndex + 1;
var start = Date.now();
pdfManager.ensure(page,
'extractTextContent').then(function(textContent) {
promise.resolve(textContent);
log('text indexing: page=%d - time=%dms', pageNum,
Date.now() - start);
}, function (e) {
// Skip errored pages
promise.reject(e);
});
});
});
}
};