1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-21 23:58:07 +02:00

Adds Streams API in getTextContent to stream data.

This patch adds Streams API support in getTextContent
so that we can stream data in chunks instead of fetching
whole data from worker thread to main thread. This patch
supports Streams API without changing the core functionality
of getTextContent.

Enqueue textContent directly at getTextContent in partialEvaluator.

Adds desiredSize and ready property in streamSink.
This commit is contained in:
Mukul Mishra 2017-04-17 18:16:53 +05:30
parent 209751346c
commit 0c13d0ff46
8 changed files with 275 additions and 114 deletions

View file

@ -24,8 +24,6 @@ import {
import { getGlobalEventBus } from './dom_events';
import { RenderingStates } from './pdf_rendering_queue';
const TEXT_LAYER_RENDER_DELAY = 200; // ms
/**
* @typedef {Object} PDFPageViewOptions
* @property {HTMLDivElement} container - The viewer element.
@ -444,12 +442,11 @@ class PDFPageView {
let resultPromise = paintTask.promise.then(function() {
return finishPaintTask(null).then(function () {
if (textLayer) {
pdfPage.getTextContent({
let readableStream = pdfPage.streamTextContent({
normalizeWhitespace: true,
}).then(function textContentResolved(textContent) {
textLayer.setTextContent(textContent);
textLayer.render(TEXT_LAYER_RENDER_DELAY);
});
textLayer.setTextContentStream(readableStream);
textLayer.render();
}
});
}, function(reason) {

View file

@ -41,6 +41,8 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
this.textLayerDiv = options.textLayerDiv;
this.eventBus = options.eventBus || getGlobalEventBus();
this.textContent = null;
this.textContentItemsStr = [];
this.textContentStream = null;
this.renderingDone = false;
this.pageIdx = options.pageIndex;
this.pageNumber = this.pageIdx + 1;
@ -79,7 +81,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
* for specified amount of ms.
*/
render: function TextLayerBuilder_render(timeout) {
if (!this.textContent || this.renderingDone) {
if (!(this.textContent || this.textContentStream) || this.renderingDone) {
return;
}
this.cancel();
@ -88,9 +90,11 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
var textLayerFrag = document.createDocumentFragment();
this.textLayerRenderTask = renderTextLayer({
textContent: this.textContent,
textContentStream: this.textContentStream,
container: textLayerFrag,
viewport: this.viewport,
textDivs: this.textDivs,
textContentItemsStr: this.textContentItemsStr,
timeout,
enhanceTextSelection: this.enhanceTextSelection,
});
@ -113,6 +117,11 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
}
},
setTextContentStream(readableStream) {
this.cancel();
this.textContentStream = readableStream;
},
setTextContent: function TextLayerBuilder_setTextContent(textContent) {
this.cancel();
this.textContent = textContent;
@ -122,8 +131,8 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
matchesLength) {
var i = 0;
var iIndex = 0;
var bidiTexts = this.textContent.items;
var end = bidiTexts.length - 1;
let textContentItemsStr = this.textContentItemsStr;
var end = textContentItemsStr.length - 1;
var queryLen = (this.findController === null ?
0 : this.findController.state.query.length);
var ret = [];
@ -135,12 +144,13 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
var matchIdx = matches[m];
// Loop over the divIdxs.
while (i !== end && matchIdx >= (iIndex + bidiTexts[i].str.length)) {
iIndex += bidiTexts[i].str.length;
while (i !== end && matchIdx >=
(iIndex + textContentItemsStr[i].length)) {
iIndex += textContentItemsStr[i].length;
i++;
}
if (i === bidiTexts.length) {
if (i === textContentItemsStr.length) {
console.error('Could not find a matching mapping');
}
@ -160,8 +170,9 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
// Somewhat the same array as above, but use > instead of >= to get
// the end position right.
while (i !== end && matchIdx > (iIndex + bidiTexts[i].str.length)) {
iIndex += bidiTexts[i].str.length;
while (i !== end && matchIdx >
(iIndex + textContentItemsStr[i].length)) {
iIndex += textContentItemsStr[i].length;
i++;
}
@ -181,7 +192,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
return;
}
var bidiTexts = this.textContent.items;
let textContentItemsStr = this.textContentItemsStr;
var textDivs = this.textDivs;
var prevEnd = null;
var pageIdx = this.pageIdx;
@ -204,7 +215,8 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
function appendTextToDiv(divIdx, fromOffset, toOffset, className) {
var div = textDivs[divIdx];
var content = bidiTexts[divIdx].str.substring(fromOffset, toOffset);
var content =
textContentItemsStr[divIdx].substring(fromOffset, toOffset);
var node = document.createTextNode(content);
if (className) {
var span = document.createElement('span');
@ -277,7 +289,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
// Clear all matches.
var matches = this.matches;
var textDivs = this.textDivs;
var bidiTexts = this.textContent.items;
let textContentItemsStr = this.textContentItemsStr;
var clearedUntilDivIdx = -1;
// Clear all current matches.
@ -286,7 +298,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
var begin = Math.max(clearedUntilDivIdx, match.begin.divIdx);
for (var n = begin, end = match.end.divIdx; n <= end; n++) {
var div = textDivs[n];
div.textContent = bidiTexts[n].str;
div.textContent = textContentItemsStr[n];
div.className = '';
}
clearedUntilDivIdx = match.end.divIdx + 1;