1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 22:58:07 +02:00

Merge pull request #18549 from nicolo-ribaudo/custom-find-matcher-subclass

[api-minor] Allow specifying custom match logic in PDFFindController
This commit is contained in:
Jonas Jenwald 2024-08-13 12:17:27 +02:00 committed by GitHub
commit a999b346d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 157 additions and 59 deletions

View file

@ -670,37 +670,6 @@ class PDFFindController {
return true;
}
#calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
const matches = (this._pageMatches[pageIndex] = []);
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
if (!query) {
// The query can be empty because some chars like diacritics could have
// been stripped out.
return;
}
const diffs = this._pageDiffs[pageIndex];
let match;
while ((match = query.exec(pageContent)) !== null) {
if (
entireWord &&
!this.#isEntireWord(pageContent, match.index, match[0].length)
) {
continue;
}
const [matchPos, matchLen] = getOriginalIndex(
diffs,
match.index,
match[0].length
);
if (matchLen) {
matches.push(matchPos);
matchesLength.push(matchLen);
}
}
}
#convertToRegExpString(query, hasDiacritics) {
const { matchDiacritics } = this.#state;
let isUnicode = false;
@ -772,12 +741,64 @@ class PDFFindController {
}
#calculateMatch(pageIndex) {
let query = this.#query;
const query = this.#query;
if (query.length === 0) {
return; // Do nothing: the matches should be wiped out already.
}
const { caseSensitive, entireWord } = this.#state;
const pageContent = this._pageContents[pageIndex];
const matcherResult = this.match(query, pageContent, pageIndex);
const matches = (this._pageMatches[pageIndex] = []);
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
const diffs = this._pageDiffs[pageIndex];
matcherResult?.forEach(({ index, length }) => {
const [matchPos, matchLen] = getOriginalIndex(diffs, index, length);
if (matchLen) {
matches.push(matchPos);
matchesLength.push(matchLen);
}
});
// When `highlightAll` is set, ensure that the matches on previously
// rendered (and still active) pages are correctly highlighted.
if (this.#state.highlightAll) {
this.#updatePage(pageIndex);
}
if (this._resumePageIdx === pageIndex) {
this._resumePageIdx = null;
this.#nextPageMatch();
}
// Update the match count.
const pageMatchesCount = matches.length;
this._matchesCountTotal += pageMatchesCount;
if (this.#updateMatchesCountOnProgress) {
if (pageMatchesCount > 0) {
this.#updateUIResultsCount();
}
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
// For example, in GeckoView we want to have only the final update because
// the Java side provides only one object to update the counts.
this.#updateUIResultsCount();
}
}
/**
* @typedef {Object} FindMatch
* @property {number} index - The start of the matched text in the page's
* string contents.
* @property {number} length - The length of the matched text.
*/
/**
* @param {string | string[]} query - The search query.
* @param {string} pageContent - The text content of the page to search in.
* @param {number} pageIndex - The index of the page that is being processed.
* @returns {FindMatch[] | undefined} An array of matches in the provided
* page.
*/
match(query, pageContent, pageIndex) {
const hasDiacritics = this._hasDiacritics[pageIndex];
let isUnicode = false;
@ -799,34 +820,28 @@ class PDFFindController {
})
.join("|");
}
if (!query) {
// The query can be empty because some chars like diacritics could have
// been stripped out.
return undefined;
}
const { caseSensitive, entireWord } = this.#state;
const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
query = query ? new RegExp(query, flags) : null;
query = new RegExp(query, flags);
this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
// When `highlightAll` is set, ensure that the matches on previously
// rendered (and still active) pages are correctly highlighted.
if (this.#state.highlightAll) {
this.#updatePage(pageIndex);
}
if (this._resumePageIdx === pageIndex) {
this._resumePageIdx = null;
this.#nextPageMatch();
}
// Update the match count.
const pageMatchesCount = this._pageMatches[pageIndex].length;
this._matchesCountTotal += pageMatchesCount;
if (this.#updateMatchesCountOnProgress) {
if (pageMatchesCount > 0) {
this.#updateUIResultsCount();
const matches = [];
let match;
while ((match = query.exec(pageContent)) !== null) {
if (
entireWord &&
!this.#isEntireWord(pageContent, match.index, match[0].length)
) {
continue;
}
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
// For example, in GeckoView we want to have only the final update because
// the Java side provides only one object to update the counts.
this.#updateUIResultsCount();
matches.push({ index: match.index, length: match[0].length });
}
return matches;
}
#extractText() {
@ -1103,7 +1118,7 @@ class PDFFindController {
current += matchIdx + 1;
}
// When searching starts, this method may be called before the `pageMatches`
// have been counted (in `_calculateMatch`). Ensure that the UI won't show
// have been counted (in `#calculateMatch`). Ensure that the UI won't show
// temporarily broken state when the active find result doesn't make sense.
if (current < 1 || current > total) {
current = total = 0;