mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Make getTextContent return offset array and improve the algorithm. Make
parts in viewer.js work again.
This commit is contained in:
parent
e13846821c
commit
a38c4bc729
3 changed files with 64 additions and 7 deletions
|
@ -505,7 +505,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
return queue;
|
||||
},
|
||||
|
||||
getTextContent: function partialEvaluatorGetIRQueue(stream, resources) {
|
||||
getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) {
|
||||
if (!state) {
|
||||
state = {
|
||||
text: '',
|
||||
mapping: []
|
||||
};
|
||||
}
|
||||
|
||||
var self = this;
|
||||
var xref = this.xref;
|
||||
|
@ -515,18 +521,22 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
}
|
||||
|
||||
resources = xref.fetchIfRef(resources) || new Dict();
|
||||
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
||||
var xobjs = null;
|
||||
|
||||
var parser = new Parser(new Lexer(stream), false);
|
||||
var res = resources;
|
||||
var args = [], obj;
|
||||
|
||||
var text = '';
|
||||
var text = state.text;
|
||||
var chunk = '';
|
||||
var commandOffset = state.mapping;
|
||||
var font = null;
|
||||
while (!isEOF(obj = parser.getObj())) {
|
||||
if (isCmd(obj)) {
|
||||
var cmd = obj.cmd;
|
||||
switch (cmd) {
|
||||
// TODO: Add support for SAVE/RESTORE and XFORM here.
|
||||
case 'Tf':
|
||||
font = handleSetFont(args[0].name).translated;
|
||||
break;
|
||||
|
@ -536,9 +546,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
if (typeof items[j] === 'string') {
|
||||
chunk += fontCharsToUnicode(items[j], font);
|
||||
} else if (items[j] < 0) {
|
||||
// making all negative offsets a space - better to have
|
||||
// a space in incorrect place than not have them at all
|
||||
chunk += ' ';
|
||||
} else if (items[j] < 0 && font.spacedWidth > 0) {
|
||||
var numFakeSpaces = Math.round(-e / font.spacedWidth);
|
||||
if (numFakeSpaces > 0) {
|
||||
chunk += ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -551,8 +564,49 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
case '"':
|
||||
chunk += fontCharsToUnicode(args[2], font) + ' ';
|
||||
break;
|
||||
case 'Do':
|
||||
// Set the chunk such that the following if won't add something
|
||||
// to the state.
|
||||
chunk = '';
|
||||
|
||||
if (args[0].code) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!xobjs) {
|
||||
xobjs = resources.get('XObject') || new Dict();
|
||||
}
|
||||
|
||||
var name = args[0].name;
|
||||
var xobj = xobjs.get(name);
|
||||
if (!xobj)
|
||||
break;
|
||||
assertWellFormed(isStream(xobj), 'XObject should be a stream');
|
||||
|
||||
var type = xobj.dict.get('Subtype');
|
||||
assertWellFormed(
|
||||
isName(type),
|
||||
'XObject should have a Name subtype'
|
||||
);
|
||||
|
||||
if ('Form' !== type.name)
|
||||
break;
|
||||
|
||||
// Add some spacing between the text here and the text of the
|
||||
// xForm.
|
||||
text = text + ' ';
|
||||
|
||||
state.text = text;
|
||||
state = this.getTextContent(
|
||||
xobj,
|
||||
xobj.dict.get('Resources') || resources,
|
||||
state
|
||||
);
|
||||
text = state.text;
|
||||
break;
|
||||
} // switch
|
||||
if (chunk !== '') {
|
||||
commandOffset.push(text.length);
|
||||
text += chunk;
|
||||
chunk = '';
|
||||
}
|
||||
|
@ -564,7 +618,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||
}
|
||||
}
|
||||
|
||||
return text;
|
||||
return {
|
||||
text: text,
|
||||
mapping: commandOffset
|
||||
};
|
||||
},
|
||||
|
||||
extractDataStructures: function
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue