1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-26 01:58:06 +02:00

Adds Streams API in getTextContent to stream data.

This patch adds Streams API support in getTextContent
so that we can stream data in chunks instead of fetching
whole data from worker thread to main thread. This patch
supports Streams API without changing the core functionality
of getTextContent.

Enqueue textContent directly at getTextContent in partialEvaluator.

Adds desiredSize and ready property in streamSink.
This commit is contained in:
Mukul Mishra 2017-04-17 18:16:53 +05:30
parent 209751346c
commit 0c13d0ff46
8 changed files with 275 additions and 114 deletions

View file

@ -874,30 +874,35 @@ var WorkerMessageHandler = {
});
}, this);
handler.on('GetTextContent', function wphExtractText(data) {
handler.on('GetTextContent', function wphExtractText(data, sink) {
var pageIndex = data.pageIndex;
return pdfManager.getPage(pageIndex).then(function(page) {
sink.onPull = function (desiredSize) { };
sink.onCancel = function (reason) { };
pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
return page.extractTextContent({
page.extractTextContent({
handler,
task,
sink,
normalizeWhitespace: data.normalizeWhitespace,
combineTextItems: data.combineTextItems,
}).then(function(textContent) {
}).then(function() {
finishWorkerTask(task);
info('text indexing: page=' + pageNum + ' - time=' +
(Date.now() - start) + 'ms');
return textContent;
sink.close();
}, function (reason) {
finishWorkerTask(task);
if (task.terminated) {
return; // ignoring errors from the terminated thread
}
sink.error(reason);
throw reason;
});
});