1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Extract one page after the other and not all pages at once

This commit is contained in:
Julian Viereck 2012-04-08 16:18:43 -07:00
parent 3c77291013
commit c9fb5637c3
3 changed files with 49 additions and 49 deletions

View file

@ -698,6 +698,9 @@ var PDFDoc = (function PDFDocClosure() {
this.fontsLoading = {};
this.workerReadyPromise = new Promise('workerReady');
this.pageText = [];
this.startedTextExtraction = false;
// If worker support isn't disabled explicit and the browser has worker
// support, create a new web worker and test if it/the browser fullfills
// all requirements to run parts of pdf.js in a web worker.
@ -769,7 +772,6 @@ var PDFDoc = (function PDFDocClosure() {
WorkerMessageHandler.setup(messageHandler);
},
setupMessageHandler: function PDFDoc_setupMessageHandler(messageHandler) {
this.messageHandler = messageHandler;
@ -825,9 +827,18 @@ var PDFDoc = (function PDFDocClosure() {
}, this);
messageHandler.on('text_extracted', function pdfTextExtracted(data) {
var index = data[0];
var pageNum = data[0];
var content = data[1];
if (pageNum !== this.pageText.length + 1)
error('pdfTextExtracted: pageIdx and pageText length got to fit');
this.pageText.push(content);
if (this.textExtracted)
this.textExtracted(index);
this.textExtracted(pageNum, content);
if (pageNum < this.numPages)
this.extractTextPage(pageNum + 1);
}, this);
messageHandler.on('jpeg_decode', function(data, promise) {
@ -895,9 +906,19 @@ var PDFDoc = (function PDFDocClosure() {
return (this.pageCache[n] = page);
},
extractTextPage: function PDFDoc_extractTextPage(pageNum) {
this.messageHandler.send('extract_text', pageNum);
},
extractText: function PDFDoc_extractText() {
if (this.startedTextExtraction)
return;
this.startedTextExtraction = true;
this.workerReadyPromise.then(function pdfDocStartRenderingThen() {
this.messageHandler.send('extract_text');
// Start the text extraction process.
this.extractTextPage(1);
}.bind(this));
},

View file

@ -94,7 +94,6 @@ var WorkerMessageHandler = {
handler.on('page_request', function wphSetupPageRequest(pageNum) {
pageNum = parseInt(pageNum);
// The following code does quite the same as
// Page.prototype.startRendering, but stops at one point and sends the
// result back to the main thread.
@ -156,37 +155,20 @@ var WorkerMessageHandler = {
});
}, this);
handler.on('extract_text', function wphExtractText() {
var numPages = pdfModel.numPages;
var index = [];
handler.on('extract_text', function wphExtractText(pageNum) {
var start = Date.now();
function indexPage(pageNum) {
if (pageNum > numPages) {
console.log('text indexing: time=%dms', Date.now() - start);
handler.send('text_extracted', [index]);
return;
}
var textContent = '';
// try {
var page = pdfModel.getPage(pageNum);
textContent = page.extractTextContent();
// } catch (e) {
// // Skip errored pages
// }
index.push(textContent);
// processing one page, interrupting thread to process
// other requests
setTimeout(function extractTextNextPage() {
indexPage(pageNum + 1);
}, 0);
var textContent = '';
try {
var page = pdfModel.getPage(pageNum);
textContent = page.extractTextContent();
} catch (e) {
// Skip errored pages
}
indexPage(1);
console.log('text indexing: page=%d - time=%dms',
pageNum, Date.now() - start);
handler.send('text_extracted', [pageNum, textContent]);
});
}
};