mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-21 07:38:07 +02:00
[api-minor] Add a parameter to PDFPageProxy_getTextContent
that enables replacing of all whitespace with standard spaces in the textLayer (issue 6612)
This patch goes a bit further than issue 6612 requires, and replaces all kinds of whitespace with standard spaces. When testing this locally, it actually seemed to slightly improve two existing test-cases (`tracemonkey-text` and `taro-text`). Fixes 6612.
This commit is contained in:
parent
c2dfe9e9a9
commit
6dfe53b976
12 changed files with 75 additions and 24 deletions
|
@ -66,7 +66,6 @@ var PDFFindController = (function PDFFindControllerClosure() {
|
|||
'\u00BC': '1/4', // Vulgar fraction one quarter
|
||||
'\u00BD': '1/2', // Vulgar fraction one half
|
||||
'\u00BE': '3/4', // Vulgar fraction three quarters
|
||||
'\u00A0': ' ' // No-break space
|
||||
};
|
||||
this.findBar = options.findBar || null;
|
||||
|
||||
|
|
|
@ -489,7 +489,7 @@ var PDFPageView = (function PDFPageViewClosure() {
|
|||
function pdfPageRenderCallback() {
|
||||
pageViewDrawCallback(null);
|
||||
if (textLayer) {
|
||||
self.pdfPage.getTextContent().then(
|
||||
self.pdfPage.getTextContent({ normalizeWhitespace: true }).then(
|
||||
function textContentResolved(textContent) {
|
||||
textLayer.setTextContent(textContent);
|
||||
textLayer.render(TEXT_LAYER_RENDER_DELAY);
|
||||
|
|
|
@ -471,7 +471,7 @@ var PDFViewer = (function pdfViewer() {
|
|||
if (!this.pdfDocument) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
var pageView = this._pages[pageNumber - 1];
|
||||
|
||||
if (this.isInPresentationMode) {
|
||||
|
@ -729,7 +729,7 @@ var PDFViewer = (function pdfViewer() {
|
|||
|
||||
getPageTextContent: function (pageIndex) {
|
||||
return this.pdfDocument.getPage(pageIndex + 1).then(function (page) {
|
||||
return page.getTextContent();
|
||||
return page.getTextContent({ normalizeWhitespace: true });
|
||||
});
|
||||
},
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue