1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-26 01:58:06 +02:00

Merge text search with current master

This commit is contained in:
Julian Viereck 2012-04-08 08:57:55 -07:00
commit 3c77291013
89 changed files with 11599 additions and 3372 deletions

View file

@ -26,7 +26,7 @@ function MessageHandler(name, comObj) {
delete callbacks[callbackId];
callback(data.data);
} else {
throw 'Cannot resolve callback ' + callbackId;
error('Cannot resolve callback ' + callbackId);
}
} else if (data.action in ah) {
var action = ah[data.action];
@ -44,7 +44,7 @@ function MessageHandler(name, comObj) {
action[0].call(action[1], data.data);
}
} else {
throw 'Unkown action from worker: ' + data.action;
error('Unkown action from worker: ' + data.action);
}
};
}
@ -53,7 +53,7 @@ MessageHandler.prototype = {
on: function messageHandlerOn(actionName, handler, scope) {
var ah = this.actionHandler;
if (ah[actionName]) {
throw 'There is already an actionName called "' + actionName + '"';
error('There is already an actionName called "' + actionName + '"');
}
ah[actionName] = [handler, scope];
},
@ -79,7 +79,7 @@ MessageHandler.prototype = {
var WorkerMessageHandler = {
setup: function wphSetup(handler) {
var pdfDoc = null;
var pdfModel = null;
handler.on('test', function wphSetupTest(data) {
handler.send('test', data instanceof Uint8Array);
@ -88,7 +88,7 @@ var WorkerMessageHandler = {
handler.on('doc', function wphSetupDoc(data) {
// Create only the model of the PDFDoc, which is enough for
// processing the content of the pdf.
pdfDoc = new PDFDocModel(new Stream(data));
pdfModel = new PDFDocModel(new Stream(data));
});
handler.on('page_request', function wphSetupPageRequest(pageNum) {
@ -103,17 +103,33 @@ var WorkerMessageHandler = {
var start = Date.now();
var dependency = [];
var IRQueue = null;
var operatorList = null;
try {
var page = pdfDoc.getPage(pageNum);
var page = pdfModel.getPage(pageNum);
// Pre compile the pdf page and fetch the fonts/images.
IRQueue = page.getIRQueue(handler, dependency);
operatorList = page.getOperatorList(handler, dependency);
} catch (e) {
var minimumStackMessage =
'worker.js: while trying to getPage() and getOperatorList()';
// Turn the error into an obj that can be serialized
e = {
message: typeof e === 'object' ? e.message : e,
stack: typeof e === 'object' ? e.stack : null
};
if (typeof e === 'string') {
e = {
message: e,
stack: minimumStackMessage
};
} else if (typeof e === 'object') {
e = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage
};
} else {
e = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage
};
}
handler.send('page_error', {
pageNum: pageNum,
error: e
@ -121,8 +137,8 @@ var WorkerMessageHandler = {
return;
}
console.log('page=%d - getIRQueue: time=%dms, len=%d', pageNum,
Date.now() - start, IRQueue.fnArray.length);
console.log('page=%d - getOperatorList: time=%dms, len=%d', pageNum,
Date.now() - start, operatorList.fnArray.length);
// Filter the dependecies for fonts.
var fonts = {};
@ -135,62 +151,13 @@ var WorkerMessageHandler = {
handler.send('page', {
pageNum: pageNum,
IRQueue: IRQueue,
operatorList: operatorList,
depFonts: Object.keys(fonts)
});
}, this);
handler.on('font', function wphSetupFont(data) {
var objId = data[0];
var name = data[1];
var file = data[2];
var properties = data[3];
var font = {
name: name,
file: file,
properties: properties
};
// Some fonts don't have a file, e.g. the build in ones like Arial.
if (file) {
var fontFileDict = new Dict();
fontFileDict.map = file.dict.map;
var fontFile = new Stream(file.bytes, file.start,
file.end - file.start, fontFileDict);
// Check if this is a FlateStream. Otherwise just use the created
// Stream one. This makes complex_ttf_font.pdf work.
var cmf = file.bytes[0];
if ((cmf & 0x0f) == 0x08) {
font.file = new FlateStream(fontFile);
} else {
font.file = fontFile;
}
}
var obj = new Font(font.name, font.file, font.properties);
var str = '';
var objData = obj.data;
if (objData) {
var length = objData.length;
for (var j = 0; j < length; ++j)
str += String.fromCharCode(objData[j]);
}
obj.str = str;
// Remove the data array form the font object, as it's not needed
// anymore as we sent over the ready str.
delete obj.data;
handler.send('font_ready', [objId, obj]);
});
handler.on('extract_text', function wphExtractText() {
var numPages = pdfDoc.numPages;
var numPages = pdfModel.numPages;
var index = [];
var start = Date.now();
@ -203,12 +170,12 @@ var WorkerMessageHandler = {
}
var textContent = '';
try {
var page = pdfDoc.getPage(pageNum);
// try {
var page = pdfModel.getPage(pageNum);
textContent = page.extractTextContent();
} catch (e) {
// Skip errored pages
}
// } catch (e) {
// // Skip errored pages
// }
index.push(textContent);
@ -241,6 +208,7 @@ var workerConsole = {
action: 'console_error',
data: args
});
throw 'pdf.js execution error';
},
time: function time(name) {
@ -250,7 +218,7 @@ var workerConsole = {
timeEnd: function timeEnd(name) {
var time = consoleTimer[name];
if (time == null) {
throw 'Unkown timer name ' + name;
error('Unkown timer name ' + name);
}
this.log('Timer:', name, Date.now() - time);
}