1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-26 01:58:06 +02:00

Text char codes extraction

This commit is contained in:
notmasteryet 2011-12-10 17:24:54 -06:00
parent 853f16085f
commit 3b72c6063c
4 changed files with 152 additions and 13 deletions

View file

@ -160,6 +160,28 @@ var WorkerMessageHandler = {
handler.send('font_ready', [objId, obj]);
});
handler.on('extract_text', function wphExtractText() {
var numPages = pdfDoc.numPages;
var index = [];
for (var i = 0; i < numPages; i++) {
var start = Date.now();
var textContent = '';
try {
var page = pdfDoc.getPage(i + 1);
textContent = page.extractTextContent();
} catch (e) {
// Skip errored pages
}
index.push(textContent);
}
console.log('text indexing=: time=%dms', Date.now() - start);
handler.send('text_extracted', { index: index });
});
}
};