1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Make getTextContent return offset array and improve the algorithm. Make

parts in viewer.js work again.
This commit is contained in:
Julian Viereck 2012-09-11 15:10:34 -07:00
parent e13846821c
commit a38c4bc729
3 changed files with 64 additions and 7 deletions

View file

@ -505,7 +505,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return queue;
},
getTextContent: function partialEvaluatorGetIRQueue(stream, resources) {
getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) {
if (!state) {
state = {
text: '',
mapping: []
};
}
var self = this;
var xref = this.xref;
@ -515,18 +521,22 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
resources = xref.fetchIfRef(resources) || new Dict();
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
var xobjs = null;
var parser = new Parser(new Lexer(stream), false);
var res = resources;
var args = [], obj;
var text = '';
var text = state.text;
var chunk = '';
var commandOffset = state.mapping;
var font = null;
while (!isEOF(obj = parser.getObj())) {
if (isCmd(obj)) {
var cmd = obj.cmd;
switch (cmd) {
// TODO: Add support for SAVE/RESTORE and XFORM here.
case 'Tf':
font = handleSetFont(args[0].name).translated;
break;
@ -536,9 +546,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (typeof items[j] === 'string') {
chunk += fontCharsToUnicode(items[j], font);
} else if (items[j] < 0) {
// making all negative offsets a space - better to have
// a space in incorrect place than not have them at all
chunk += ' ';
} else if (items[j] < 0 && font.spacedWidth > 0) {
var numFakeSpaces = Math.round(-e / font.spacedWidth);
if (numFakeSpaces > 0) {
chunk += ' ';
}
}
}
break;
@ -551,8 +564,49 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
case '"':
chunk += fontCharsToUnicode(args[2], font) + ' ';
break;
case 'Do':
// Set the chunk such that the following if won't add something
// to the state.
chunk = '';
if (args[0].code) {
break;
}
if (!xobjs) {
xobjs = resources.get('XObject') || new Dict();
}
var name = args[0].name;
var xobj = xobjs.get(name);
if (!xobj)
break;
assertWellFormed(isStream(xobj), 'XObject should be a stream');
var type = xobj.dict.get('Subtype');
assertWellFormed(
isName(type),
'XObject should have a Name subtype'
);
if ('Form' !== type.name)
break;
// Add some spacing between the text here and the text of the
// xForm.
text = text + ' ';
state.text = text;
state = this.getTextContent(
xobj,
xobj.dict.get('Resources') || resources,
state
);
text = state.text;
break;
} // switch
if (chunk !== '') {
commandOffset.push(text.length);
text += chunk;
chunk = '';
}
@ -564,7 +618,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
}
return text;
return {
text: text,
mapping: commandOffset
};
},
extractDataStructures: function