1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 23:28:06 +02:00

[api-minor] Add a parameter to PDFPageProxy_getTextContent that controls whether PartialEvaluator_getTextContent will attempt to combine same line text items

From the discussion in issue 7445, it seems that there may be cases where an API consumer would want to get the text content as is, without combined text items.
This commit is contained in:
Jonas Jenwald 2016-07-03 18:29:47 +02:00
parent 9228a04061
commit f297e4d17c
8 changed files with 42 additions and 27 deletions

View file

@ -332,7 +332,7 @@ var Driver = (function DriverClosure() {
this._log('Loading file "' + task.file + '"\n');
var absoluteUrl = new URL(task.file, window.location).href;
var absoluteUrl = new URL(task.file, window.location).href;
PDFJS.disableRange = task.disableRange;
PDFJS.disableAutoFetch = !task.enableAutoFetch;
try {
@ -469,12 +469,12 @@ var Driver = (function DriverClosure() {
textLayerContext.clearRect(0, 0,
textLayerCanvas.width, textLayerCanvas.height);
// The text builder will draw its content on the test canvas
initPromise =
page.getTextContent({ normalizeWhitespace: true }).then(
function(textContent) {
return rasterizeTextLayer(textLayerContext, viewport,
textContent);
});
initPromise = page.getTextContent({
normalizeWhitespace: true,
}).then(function(textContent) {
return rasterizeTextLayer(textLayerContext, viewport,
textContent);
});
} else {
textLayerCanvas = null;

View file

@ -771,12 +771,14 @@ describe('api', function() {
});
it('gets text content', function (done) {
var defaultPromise = page.getTextContent();
var normalizeWhitespacePromise = page.getTextContent({
normalizeWhitespace: true });
var parametersPromise = page.getTextContent({
normalizeWhitespace: true,
disableCombineTextItems: true,
});
var promises = [
defaultPromise,
normalizeWhitespacePromise
parametersPromise,
];
Promise.all(promises).then(function (data) {
expect(!!data[0].items).toEqual(true);