From 12d8b52c499da7f7c2eb003c3213eb6ce77b906b Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 26 Oct 2018 18:22:32 +0200 Subject: [PATCH 1/3] Move the `normalize` helper function out of `PDFFindController` In the event that multiple instances of `PDFFindController` ever exists simultaneously, they will all be able to share just one `normalize` function in this way. Furthermore, the regular expression is now created lazily rather than at class construction time. --- web/pdf_find_controller.js | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 5c19b4435..904d1f813 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -40,6 +40,18 @@ const CHARACTERS_TO_NORMALIZE = { '\u00BE': '3/4', // Vulgar fraction three quarters }; +let normalizationRegex = null; +function normalize(text) { + if (!normalizationRegex) { + // Compile the regular expression for text normalization once. + const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join(''); + normalizationRegex = new RegExp(`[${replace}]`, 'g'); + } + return text.replace(normalizationRegex, function(ch) { + return CHARACTERS_TO_NORMALIZE[ch]; + }); +} + /** * @typedef {Object} PDFFindControllerOptions * @property {IPDFLinkService} linkService - The navigation/linking service. @@ -59,10 +71,6 @@ class PDFFindController { this._reset(); eventBus.on('findbarclose', this._onFindBarClose.bind(this)); - - // Compile the regular expression for text normalization once. - const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join(''); - this._normalizationRegex = new RegExp(`[${replace}]`, 'g'); } get highlightMatches() { @@ -164,12 +172,6 @@ class PDFFindController { this._firstPageCapability = createPromiseCapability(); } - _normalize(text) { - return text.replace(this._normalizationRegex, function(ch) { - return CHARACTERS_TO_NORMALIZE[ch]; - }); - } - /** * Helper for multi-term search that fills the `matchesWithLength` array * and handles cases where one search term includes another search term (for @@ -304,8 +306,8 @@ class PDFFindController { } _calculateMatch(pageIndex) { - let pageContent = this._normalize(this._pageContents[pageIndex]); - let query = this._normalize(this._state.query); + let pageContent = normalize(this._pageContents[pageIndex]); + let query = normalize(this._state.query); const { caseSensitive, entireWord, phraseSearch, } = this._state; if (query.length === 0) { From 84ae4f9a5ee4c737397a819cdbb56339d1a72fdb Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 26 Oct 2018 20:23:32 +0200 Subject: [PATCH 2/3] Only normalize the text-content once, in `PDFFindController`, and not on every new search operation Currently the text-content is normalized every time that a new search operation is started, which seems completely useless considering that the "raw" text-content is never used for anything. For a short document, such as e.g. the `tracemonkey` file, this repeated normalization won't matter much, but for documents with a couple of thousand pages it seems completely unnecessary (and wasteful) to keep repeating the normalization whenever e.g. a new search operation starts. --- web/pdf_find_controller.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 904d1f813..1db2e32bd 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -160,7 +160,7 @@ class PDFFindController { matchIdx: null, }; this._extractTextPromises = []; - this._pageContents = []; // Stores the text for each page. + this._pageContents = []; // Stores the normalized text for each page. this._matchesCountTotal = 0; this._pagesToSearch = null; this._pendingFindMatches = Object.create(null); @@ -306,7 +306,7 @@ class PDFFindController { } _calculateMatch(pageIndex) { - let pageContent = normalize(this._pageContents[pageIndex]); + let pageContent = this._pageContents[pageIndex]; let query = normalize(this._state.query); const { caseSensitive, entireWord, phraseSearch, } = this._state; @@ -364,8 +364,8 @@ class PDFFindController { strBuf.push(textItems[j].str); } - // Store the page content (text items) as one string. - this._pageContents[i] = strBuf.join(''); + // Store the normalized page content (text items) as one string. + this._pageContents[i] = normalize(strBuf.join('')); extractTextCapability.resolve(i); }, (reason) => { console.error(`Unable to get text content for page ${i + 1}`, reason); From 5dc12f9a6de58ae0db4c24fb3dc8723ae12131e5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 26 Oct 2018 20:53:42 +0200 Subject: [PATCH 3/3] Only normalize the search query once, in `PDFFindController, for every page being searched For a short document, such as e.g. the `tracemonkey` file, this repeated normalization won't matter much, but for documents with a couple of thousand pages it seems completely unnecessary (and wasteful) to keep repeating the normalization whenever for every single page. --- web/pdf_find_controller.js | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 1db2e32bd..a7b2eb0a6 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -172,6 +172,17 @@ class PDFFindController { this._firstPageCapability = createPromiseCapability(); } + /** + * @return {string} The (current) normalized search query. + */ + get _query() { + if (this._state.query !== this._rawQuery) { + this._rawQuery = this._state.query; + this._normalizedQuery = normalize(this._state.query); + } + return this._normalizedQuery; + } + /** * Helper for multi-term search that fills the `matchesWithLength` array * and handles cases where one search term includes another search term (for @@ -307,7 +318,7 @@ class PDFFindController { _calculateMatch(pageIndex) { let pageContent = this._pageContents[pageIndex]; - let query = normalize(this._state.query); + let query = this._query; const { caseSensitive, entireWord, phraseSearch, } = this._state; if (query.length === 0) { @@ -425,7 +436,7 @@ class PDFFindController { } // If there's no query there's no point in searching. - if (this._state.query === '') { + if (this._query === '') { this._updateUIState(FindState.FOUND); return; }