mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 10:08:06 +02:00
fix stream reset, interrupting thread when pages are indexed
This commit is contained in:
parent
94cc2cdb75
commit
4d44eb6184
2 changed files with 19 additions and 8 deletions
|
@ -205,7 +205,7 @@ var Page = (function PageClosure() {
|
||||||
streams.push(xref.fetchIfRef(content[i]));
|
streams.push(xref.fetchIfRef(content[i]));
|
||||||
content = new StreamsSequenceStream(streams);
|
content = new StreamsSequenceStream(streams);
|
||||||
} else if (isStream(content))
|
} else if (isStream(content))
|
||||||
content.pos = 0;
|
content.reset();
|
||||||
|
|
||||||
var pe = this.pe = new PartialEvaluator(
|
var pe = this.pe = new PartialEvaluator(
|
||||||
xref, handler, 'p' + this.pageNumber + '_');
|
xref, handler, 'p' + this.pageNumber + '_');
|
||||||
|
@ -236,7 +236,7 @@ var Page = (function PageClosure() {
|
||||||
streams.push(xref.fetchIfRef(content[i]));
|
streams.push(xref.fetchIfRef(content[i]));
|
||||||
content = new StreamsSequenceStream(streams);
|
content = new StreamsSequenceStream(streams);
|
||||||
} else if (isStream(content))
|
} else if (isStream(content))
|
||||||
content.pos = 0;
|
content.reset();
|
||||||
|
|
||||||
var pe = new PartialEvaluator(
|
var pe = new PartialEvaluator(
|
||||||
xref, handler, 'p' + this.pageNumber + '_');
|
xref, handler, 'p' + this.pageNumber + '_');
|
||||||
|
|
|
@ -164,23 +164,34 @@ var WorkerMessageHandler = {
|
||||||
handler.on('extract_text', function wphExtractText() {
|
handler.on('extract_text', function wphExtractText() {
|
||||||
var numPages = pdfDoc.numPages;
|
var numPages = pdfDoc.numPages;
|
||||||
var index = [];
|
var index = [];
|
||||||
for (var i = 0; i < numPages; i++) {
|
var start = Date.now();
|
||||||
var start = Date.now();
|
|
||||||
|
function indexPage(pageNum) {
|
||||||
|
if (pageNum > numPages) {
|
||||||
|
console.log('text indexing=: time=%dms', Date.now() - start);
|
||||||
|
|
||||||
|
handler.send('text_extracted', { index: index });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
var textContent = '';
|
var textContent = '';
|
||||||
try {
|
try {
|
||||||
var page = pdfDoc.getPage(i + 1);
|
var page = pdfDoc.getPage(pageNum);
|
||||||
textContent = page.extractTextContent();
|
textContent = page.extractTextContent();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// Skip errored pages
|
// Skip errored pages
|
||||||
}
|
}
|
||||||
|
|
||||||
index.push(textContent);
|
index.push(textContent);
|
||||||
|
|
||||||
|
// processing one page, interrupting thread to process
|
||||||
|
// other requests
|
||||||
|
setTimeout(function extractTextNextPage() {
|
||||||
|
indexPage(pageNum + 1);
|
||||||
|
}, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('text indexing=: time=%dms', Date.now() - start);
|
indexPage(1);
|
||||||
|
|
||||||
handler.send('text_extracted', { index: index });
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue