Merge pull request #18549 from nicolo-ribaudo/custom-find-matcher-subclass

[api-minor] Allow specifying custom match logic in PDFFindController
2025-04-19 22:58:07 +02:00 · 2024-08-13 12:17:27 +02:00 · 2024-08-13 12:17:27 +02:00 · a999b346d0
commit a999b346d0
parent 17b24639b9 f051597e23
2 changed files with 157 additions and 59 deletions
--- a/web/pdf_find_controller.js
+++ b/web/pdf_find_controller.js
@ -670,37 +670,6 @@ class PDFFindController {
    return true;
  }

-  #calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
-    const matches = (this._pageMatches[pageIndex] = []);
-    const matchesLength = (this._pageMatchesLength[pageIndex] = []);
-    if (!query) {
-      // The query can be empty because some chars like diacritics could have
-      // been stripped out.
-      return;
-    }
-    const diffs = this._pageDiffs[pageIndex];
-    let match;
-    while ((match = query.exec(pageContent)) !== null) {
-      if (
-        entireWord &&
-        !this.#isEntireWord(pageContent, match.index, match[0].length)
-      ) {
-        continue;
-      }
-
-      const [matchPos, matchLen] = getOriginalIndex(
-        diffs,
-        match.index,
-        match[0].length
-      );
-
-      if (matchLen) {
-        matches.push(matchPos);
-        matchesLength.push(matchLen);
-      }
-    }
-  }
-
  #convertToRegExpString(query, hasDiacritics) {
    const { matchDiacritics } = this.#state;
    let isUnicode = false;
@ -772,12 +741,64 @@ class PDFFindController {
  }

  #calculateMatch(pageIndex) {
-    let query = this.#query;
+    const query = this.#query;
    if (query.length === 0) {
      return; // Do nothing: the matches should be wiped out already.
    }
-    const { caseSensitive, entireWord } = this.#state;
    const pageContent = this._pageContents[pageIndex];
+    const matcherResult = this.match(query, pageContent, pageIndex);
+
+    const matches = (this._pageMatches[pageIndex] = []);
+    const matchesLength = (this._pageMatchesLength[pageIndex] = []);
+    const diffs = this._pageDiffs[pageIndex];
+
+    matcherResult?.forEach(({ index, length }) => {
+      const [matchPos, matchLen] = getOriginalIndex(diffs, index, length);
+      if (matchLen) {
+        matches.push(matchPos);
+        matchesLength.push(matchLen);
+      }
+    });
+
+    // When `highlightAll` is set, ensure that the matches on previously
+    // rendered (and still active) pages are correctly highlighted.
+    if (this.#state.highlightAll) {
+      this.#updatePage(pageIndex);
+    }
+    if (this._resumePageIdx === pageIndex) {
+      this._resumePageIdx = null;
+      this.#nextPageMatch();
+    }
+
+    // Update the match count.
+    const pageMatchesCount = matches.length;
+    this._matchesCountTotal += pageMatchesCount;
+    if (this.#updateMatchesCountOnProgress) {
+      if (pageMatchesCount > 0) {
+        this.#updateUIResultsCount();
+      }
+    } else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
+      // For example, in GeckoView we want to have only the final update because
+      // the Java side provides only one object to update the counts.
+      this.#updateUIResultsCount();
+    }
+  }
+
+  /**
+   * @typedef {Object} FindMatch
+   * @property {number} index - The start of the matched text in the page's
+   *   string contents.
+   * @property {number} length - The length of the matched text.
+   */
+
+  /**
+   * @param {string | string[]} query - The search query.
+   * @param {string} pageContent - The text content of the page to search in.
+   * @param {number} pageIndex - The index of the page that is being processed.
+   * @returns {FindMatch[] | undefined} An array of matches in the provided
+   *   page.
+   */
+  match(query, pageContent, pageIndex) {
    const hasDiacritics = this._hasDiacritics[pageIndex];

    let isUnicode = false;
@ -799,34 +820,28 @@ class PDFFindController {
        })
        .join("|");
    }
+    if (!query) {
+      // The query can be empty because some chars like diacritics could have
+      // been stripped out.
+      return undefined;
+    }

+    const { caseSensitive, entireWord } = this.#state;
    const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
-    query = query ? new RegExp(query, flags) : null;
+    query = new RegExp(query, flags);

-    this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
-
-    // When `highlightAll` is set, ensure that the matches on previously
-    // rendered (and still active) pages are correctly highlighted.
-    if (this.#state.highlightAll) {
-      this.#updatePage(pageIndex);
-    }
-    if (this._resumePageIdx === pageIndex) {
-      this._resumePageIdx = null;
-      this.#nextPageMatch();
-    }
-
-    // Update the match count.
-    const pageMatchesCount = this._pageMatches[pageIndex].length;
-    this._matchesCountTotal += pageMatchesCount;
-    if (this.#updateMatchesCountOnProgress) {
-      if (pageMatchesCount > 0) {
-        this.#updateUIResultsCount();
+    const matches = [];
+    let match;
+    while ((match = query.exec(pageContent)) !== null) {
+      if (
+        entireWord &&
+        !this.#isEntireWord(pageContent, match.index, match[0].length)
+      ) {
+        continue;
      }
-    } else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
-      // For example, in GeckoView we want to have only the final update because
-      // the Java side provides only one object to update the counts.
-      this.#updateUIResultsCount();
+      matches.push({ index: match.index, length: match[0].length });
    }
+    return matches;
  }

  #extractText() {
@ -1103,7 +1118,7 @@ class PDFFindController {
      current += matchIdx + 1;
    }
    // When searching starts, this method may be called before the `pageMatches`
-    // have been counted (in `_calculateMatch`). Ensure that the UI won't show
+    // have been counted (in `#calculateMatch`). Ensure that the UI won't show
    // temporarily broken state when the active find result doesn't make sense.
    if (current < 1 || current > total) {
      current = total = 0;