1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 23:28:06 +02:00

[api-minor] Add a parameter to PDFPageProxy_getTextContent that enables replacing of all whitespace with standard spaces in the textLayer (issue 6612)

This patch goes a bit further than issue 6612 requires, and replaces all kinds of whitespace with standard spaces.

When testing this locally, it actually seemed to slightly improve two existing test-cases (`tracemonkey-text` and `taro-text`).

Fixes 6612.
This commit is contained in:
Jonas Jenwald 2015-11-23 16:57:43 +01:00
parent c2dfe9e9a9
commit 6dfe53b976
12 changed files with 75 additions and 24 deletions

View file

@ -334,10 +334,12 @@ var Driver = (function DriverClosure() {
textLayerContext.clearRect(0, 0,
textLayerCanvas.width, textLayerCanvas.height);
// The text builder will draw its content on the test canvas
initPromise = page.getTextContent().then(function(textContent) {
return rasterizeTextLayer(textLayerContext, viewport,
textContent);
});
initPromise =
page.getTextContent({ normalizeWhitespace: true }).then(
function(textContent) {
return rasterizeTextLayer(textLayerContext, viewport,
textContent);
});
} else {
textLayerCanvas = null;
initPromise = Promise.resolve();

View file

@ -49,6 +49,7 @@
!issue5280.pdf
!issue5677.pdf
!issue5954.pdf
!issue6612.pdf
!alphatrans.pdf
!devicen.pdf
!cmykjpeg.pdf

BIN
test/pdfs/issue6612.pdf Normal file

Binary file not shown.

View file

@ -1271,6 +1271,13 @@
"link": false,
"type": "eq"
},
{ "id": "issue6612-text",
"file": "pdfs/issue6612.pdf",
"md5": "657f33236496916597cd70ef1222509a",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "zerowidthline",
"file": "pdfs/zerowidthline.pdf",
"md5": "295d26e61a85635433f8e4b768953f60",

View file

@ -482,11 +482,21 @@ describe('api', function() {
});
});
it('gets text content', function () {
var promise = page.getTextContent();
waitsForPromiseResolved(promise, function (data) {
expect(!!data.items).toEqual(true);
expect(data.items.length).toEqual(7);
expect(!!data.styles).toEqual(true);
var defaultPromise = page.getTextContent();
var normalizeWhitespacePromise = page.getTextContent({
normalizeWhitespace: true });
var promises = [
defaultPromise,
normalizeWhitespacePromise
];
waitsForPromiseResolved(Promise.all(promises), function (data) {
expect(!!data[0].items).toEqual(true);
expect(data[0].items.length).toEqual(7);
expect(!!data[0].styles).toEqual(true);
// A simple check that ensures the two `textContent` object match.
expect(JSON.stringify(data[0])).toEqual(JSON.stringify(data[1]));
});
});
it('gets operator list', function() {