mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-20 23:28:06 +02:00
[api-minor] Add a parameter to PDFPageProxy_getTextContent
that enables replacing of all whitespace with standard spaces in the textLayer (issue 6612)
This patch goes a bit further than issue 6612 requires, and replaces all kinds of whitespace with standard spaces. When testing this locally, it actually seemed to slightly improve two existing test-cases (`tracemonkey-text` and `taro-text`). Fixes 6612.
This commit is contained in:
parent
c2dfe9e9a9
commit
6dfe53b976
12 changed files with 75 additions and 24 deletions
|
@ -334,10 +334,12 @@ var Driver = (function DriverClosure() {
|
|||
textLayerContext.clearRect(0, 0,
|
||||
textLayerCanvas.width, textLayerCanvas.height);
|
||||
// The text builder will draw its content on the test canvas
|
||||
initPromise = page.getTextContent().then(function(textContent) {
|
||||
return rasterizeTextLayer(textLayerContext, viewport,
|
||||
textContent);
|
||||
});
|
||||
initPromise =
|
||||
page.getTextContent({ normalizeWhitespace: true }).then(
|
||||
function(textContent) {
|
||||
return rasterizeTextLayer(textLayerContext, viewport,
|
||||
textContent);
|
||||
});
|
||||
} else {
|
||||
textLayerCanvas = null;
|
||||
initPromise = Promise.resolve();
|
||||
|
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
|
@ -49,6 +49,7 @@
|
|||
!issue5280.pdf
|
||||
!issue5677.pdf
|
||||
!issue5954.pdf
|
||||
!issue6612.pdf
|
||||
!alphatrans.pdf
|
||||
!devicen.pdf
|
||||
!cmykjpeg.pdf
|
||||
|
|
BIN
test/pdfs/issue6612.pdf
Normal file
BIN
test/pdfs/issue6612.pdf
Normal file
Binary file not shown.
|
@ -1271,6 +1271,13 @@
|
|||
"link": false,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue6612-text",
|
||||
"file": "pdfs/issue6612.pdf",
|
||||
"md5": "657f33236496916597cd70ef1222509a",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "text"
|
||||
},
|
||||
{ "id": "zerowidthline",
|
||||
"file": "pdfs/zerowidthline.pdf",
|
||||
"md5": "295d26e61a85635433f8e4b768953f60",
|
||||
|
|
|
@ -482,11 +482,21 @@ describe('api', function() {
|
|||
});
|
||||
});
|
||||
it('gets text content', function () {
|
||||
var promise = page.getTextContent();
|
||||
waitsForPromiseResolved(promise, function (data) {
|
||||
expect(!!data.items).toEqual(true);
|
||||
expect(data.items.length).toEqual(7);
|
||||
expect(!!data.styles).toEqual(true);
|
||||
var defaultPromise = page.getTextContent();
|
||||
var normalizeWhitespacePromise = page.getTextContent({
|
||||
normalizeWhitespace: true });
|
||||
|
||||
var promises = [
|
||||
defaultPromise,
|
||||
normalizeWhitespacePromise
|
||||
];
|
||||
waitsForPromiseResolved(Promise.all(promises), function (data) {
|
||||
expect(!!data[0].items).toEqual(true);
|
||||
expect(data[0].items.length).toEqual(7);
|
||||
expect(!!data[0].styles).toEqual(true);
|
||||
|
||||
// A simple check that ensures the two `textContent` object match.
|
||||
expect(JSON.stringify(data[0])).toEqual(JSON.stringify(data[1]));
|
||||
});
|
||||
});
|
||||
it('gets operator list', function() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue