From 84ae4f9a5ee4c737397a819cdbb56339d1a72fdb Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 26 Oct 2018 20:23:32 +0200 Subject: [PATCH] Only normalize the text-content once, in `PDFFindController`, and not on every new search operation Currently the text-content is normalized every time that a new search operation is started, which seems completely useless considering that the "raw" text-content is never used for anything. For a short document, such as e.g. the `tracemonkey` file, this repeated normalization won't matter much, but for documents with a couple of thousand pages it seems completely unnecessary (and wasteful) to keep repeating the normalization whenever e.g. a new search operation starts. --- web/pdf_find_controller.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 904d1f813..1db2e32bd 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -160,7 +160,7 @@ class PDFFindController { matchIdx: null, }; this._extractTextPromises = []; - this._pageContents = []; // Stores the text for each page. + this._pageContents = []; // Stores the normalized text for each page. this._matchesCountTotal = 0; this._pagesToSearch = null; this._pendingFindMatches = Object.create(null); @@ -306,7 +306,7 @@ class PDFFindController { } _calculateMatch(pageIndex) { - let pageContent = normalize(this._pageContents[pageIndex]); + let pageContent = this._pageContents[pageIndex]; let query = normalize(this._state.query); const { caseSensitive, entireWord, phraseSearch, } = this._state; @@ -364,8 +364,8 @@ class PDFFindController { strBuf.push(textItems[j].str); } - // Store the page content (text items) as one string. - this._pageContents[i] = strBuf.join(''); + // Store the normalized page content (text items) as one string. + this._pageContents[i] = normalize(strBuf.join('')); extractTextCapability.resolve(i); }, (reason) => { console.error(`Unable to get text content for page ${i + 1}`, reason);