1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-21 07:38:07 +02:00

[api-minor] Add a parameter to PDFPageProxy_getTextContent that enables replacing of all whitespace with standard spaces in the textLayer (issue 6612)

This patch goes a bit further than issue 6612 requires, and replaces all kinds of whitespace with standard spaces.

When testing this locally, it actually seemed to slightly improve two existing test-cases (`tracemonkey-text` and `taro-text`).

Fixes 6612.
This commit is contained in:
Jonas Jenwald 2015-11-23 16:57:43 +01:00
parent c2dfe9e9a9
commit 6dfe53b976
12 changed files with 75 additions and 24 deletions

View file

@ -66,7 +66,6 @@ var PDFFindController = (function PDFFindControllerClosure() {
'\u00BC': '1/4', // Vulgar fraction one quarter
'\u00BD': '1/2', // Vulgar fraction one half
'\u00BE': '3/4', // Vulgar fraction three quarters
'\u00A0': ' ' // No-break space
};
this.findBar = options.findBar || null;

View file

@ -489,7 +489,7 @@ var PDFPageView = (function PDFPageViewClosure() {
function pdfPageRenderCallback() {
pageViewDrawCallback(null);
if (textLayer) {
self.pdfPage.getTextContent().then(
self.pdfPage.getTextContent({ normalizeWhitespace: true }).then(
function textContentResolved(textContent) {
textLayer.setTextContent(textContent);
textLayer.render(TEXT_LAYER_RENDER_DELAY);

View file

@ -471,7 +471,7 @@ var PDFViewer = (function pdfViewer() {
if (!this.pdfDocument) {
return;
}
var pageView = this._pages[pageNumber - 1];
if (this.isInPresentationMode) {
@ -729,7 +729,7 @@ var PDFViewer = (function pdfViewer() {
getPageTextContent: function (pageIndex) {
return this.pdfDocument.getPage(pageIndex + 1).then(function (page) {
return page.getTextContent();
return page.getTextContent({ normalizeWhitespace: true });
});
},