mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 01:58:06 +02:00
Merge text search with current master
This commit is contained in:
commit
3c77291013
89 changed files with 11599 additions and 3372 deletions
110
src/worker.js
110
src/worker.js
|
@ -26,7 +26,7 @@ function MessageHandler(name, comObj) {
|
|||
delete callbacks[callbackId];
|
||||
callback(data.data);
|
||||
} else {
|
||||
throw 'Cannot resolve callback ' + callbackId;
|
||||
error('Cannot resolve callback ' + callbackId);
|
||||
}
|
||||
} else if (data.action in ah) {
|
||||
var action = ah[data.action];
|
||||
|
@ -44,7 +44,7 @@ function MessageHandler(name, comObj) {
|
|||
action[0].call(action[1], data.data);
|
||||
}
|
||||
} else {
|
||||
throw 'Unkown action from worker: ' + data.action;
|
||||
error('Unkown action from worker: ' + data.action);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ MessageHandler.prototype = {
|
|||
on: function messageHandlerOn(actionName, handler, scope) {
|
||||
var ah = this.actionHandler;
|
||||
if (ah[actionName]) {
|
||||
throw 'There is already an actionName called "' + actionName + '"';
|
||||
error('There is already an actionName called "' + actionName + '"');
|
||||
}
|
||||
ah[actionName] = [handler, scope];
|
||||
},
|
||||
|
@ -79,7 +79,7 @@ MessageHandler.prototype = {
|
|||
|
||||
var WorkerMessageHandler = {
|
||||
setup: function wphSetup(handler) {
|
||||
var pdfDoc = null;
|
||||
var pdfModel = null;
|
||||
|
||||
handler.on('test', function wphSetupTest(data) {
|
||||
handler.send('test', data instanceof Uint8Array);
|
||||
|
@ -88,7 +88,7 @@ var WorkerMessageHandler = {
|
|||
handler.on('doc', function wphSetupDoc(data) {
|
||||
// Create only the model of the PDFDoc, which is enough for
|
||||
// processing the content of the pdf.
|
||||
pdfDoc = new PDFDocModel(new Stream(data));
|
||||
pdfModel = new PDFDocModel(new Stream(data));
|
||||
});
|
||||
|
||||
handler.on('page_request', function wphSetupPageRequest(pageNum) {
|
||||
|
@ -103,17 +103,33 @@ var WorkerMessageHandler = {
|
|||
var start = Date.now();
|
||||
|
||||
var dependency = [];
|
||||
var IRQueue = null;
|
||||
var operatorList = null;
|
||||
try {
|
||||
var page = pdfDoc.getPage(pageNum);
|
||||
var page = pdfModel.getPage(pageNum);
|
||||
// Pre compile the pdf page and fetch the fonts/images.
|
||||
IRQueue = page.getIRQueue(handler, dependency);
|
||||
operatorList = page.getOperatorList(handler, dependency);
|
||||
} catch (e) {
|
||||
var minimumStackMessage =
|
||||
'worker.js: while trying to getPage() and getOperatorList()';
|
||||
|
||||
// Turn the error into an obj that can be serialized
|
||||
e = {
|
||||
message: typeof e === 'object' ? e.message : e,
|
||||
stack: typeof e === 'object' ? e.stack : null
|
||||
};
|
||||
if (typeof e === 'string') {
|
||||
e = {
|
||||
message: e,
|
||||
stack: minimumStackMessage
|
||||
};
|
||||
} else if (typeof e === 'object') {
|
||||
e = {
|
||||
message: e.message || e.toString(),
|
||||
stack: e.stack || minimumStackMessage
|
||||
};
|
||||
} else {
|
||||
e = {
|
||||
message: 'Unknown exception type: ' + (typeof e),
|
||||
stack: minimumStackMessage
|
||||
};
|
||||
}
|
||||
|
||||
handler.send('page_error', {
|
||||
pageNum: pageNum,
|
||||
error: e
|
||||
|
@ -121,8 +137,8 @@ var WorkerMessageHandler = {
|
|||
return;
|
||||
}
|
||||
|
||||
console.log('page=%d - getIRQueue: time=%dms, len=%d', pageNum,
|
||||
Date.now() - start, IRQueue.fnArray.length);
|
||||
console.log('page=%d - getOperatorList: time=%dms, len=%d', pageNum,
|
||||
Date.now() - start, operatorList.fnArray.length);
|
||||
|
||||
// Filter the dependecies for fonts.
|
||||
var fonts = {};
|
||||
|
@ -135,62 +151,13 @@ var WorkerMessageHandler = {
|
|||
|
||||
handler.send('page', {
|
||||
pageNum: pageNum,
|
||||
IRQueue: IRQueue,
|
||||
operatorList: operatorList,
|
||||
depFonts: Object.keys(fonts)
|
||||
});
|
||||
}, this);
|
||||
|
||||
handler.on('font', function wphSetupFont(data) {
|
||||
var objId = data[0];
|
||||
var name = data[1];
|
||||
var file = data[2];
|
||||
var properties = data[3];
|
||||
|
||||
var font = {
|
||||
name: name,
|
||||
file: file,
|
||||
properties: properties
|
||||
};
|
||||
|
||||
// Some fonts don't have a file, e.g. the build in ones like Arial.
|
||||
if (file) {
|
||||
var fontFileDict = new Dict();
|
||||
fontFileDict.map = file.dict.map;
|
||||
|
||||
var fontFile = new Stream(file.bytes, file.start,
|
||||
file.end - file.start, fontFileDict);
|
||||
|
||||
// Check if this is a FlateStream. Otherwise just use the created
|
||||
// Stream one. This makes complex_ttf_font.pdf work.
|
||||
var cmf = file.bytes[0];
|
||||
if ((cmf & 0x0f) == 0x08) {
|
||||
font.file = new FlateStream(fontFile);
|
||||
} else {
|
||||
font.file = fontFile;
|
||||
}
|
||||
}
|
||||
|
||||
var obj = new Font(font.name, font.file, font.properties);
|
||||
|
||||
var str = '';
|
||||
var objData = obj.data;
|
||||
if (objData) {
|
||||
var length = objData.length;
|
||||
for (var j = 0; j < length; ++j)
|
||||
str += String.fromCharCode(objData[j]);
|
||||
}
|
||||
|
||||
obj.str = str;
|
||||
|
||||
// Remove the data array form the font object, as it's not needed
|
||||
// anymore as we sent over the ready str.
|
||||
delete obj.data;
|
||||
|
||||
handler.send('font_ready', [objId, obj]);
|
||||
});
|
||||
|
||||
handler.on('extract_text', function wphExtractText() {
|
||||
var numPages = pdfDoc.numPages;
|
||||
var numPages = pdfModel.numPages;
|
||||
var index = [];
|
||||
var start = Date.now();
|
||||
|
||||
|
@ -203,12 +170,12 @@ var WorkerMessageHandler = {
|
|||
}
|
||||
|
||||
var textContent = '';
|
||||
try {
|
||||
var page = pdfDoc.getPage(pageNum);
|
||||
// try {
|
||||
var page = pdfModel.getPage(pageNum);
|
||||
textContent = page.extractTextContent();
|
||||
} catch (e) {
|
||||
// Skip errored pages
|
||||
}
|
||||
// } catch (e) {
|
||||
// // Skip errored pages
|
||||
// }
|
||||
|
||||
index.push(textContent);
|
||||
|
||||
|
@ -241,6 +208,7 @@ var workerConsole = {
|
|||
action: 'console_error',
|
||||
data: args
|
||||
});
|
||||
throw 'pdf.js execution error';
|
||||
},
|
||||
|
||||
time: function time(name) {
|
||||
|
@ -250,7 +218,7 @@ var workerConsole = {
|
|||
timeEnd: function timeEnd(name) {
|
||||
var time = consoleTimer[name];
|
||||
if (time == null) {
|
||||
throw 'Unkown timer name ' + name;
|
||||
error('Unkown timer name ' + name);
|
||||
}
|
||||
this.log('Timer:', name, Date.now() - time);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue