mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 22:58:07 +02:00
Allow specifying custom match logic in PDFFindController
This patch allows embedders of PDF.js to provide custom match logic for seaching in PDFs. This is done by subclassing the PDFFindController class and overriding the `match` method. `match` is called once per PDF page, receives as parameters the search query, the page contents, and the page index, and returns an array of { index, length } objects representing the search results.
This commit is contained in:
parent
b80e552760
commit
f051597e23
2 changed files with 157 additions and 59 deletions
|
@ -670,37 +670,6 @@ class PDFFindController {
|
|||
return true;
|
||||
}
|
||||
|
||||
#calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
|
||||
const matches = (this._pageMatches[pageIndex] = []);
|
||||
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
|
||||
if (!query) {
|
||||
// The query can be empty because some chars like diacritics could have
|
||||
// been stripped out.
|
||||
return;
|
||||
}
|
||||
const diffs = this._pageDiffs[pageIndex];
|
||||
let match;
|
||||
while ((match = query.exec(pageContent)) !== null) {
|
||||
if (
|
||||
entireWord &&
|
||||
!this.#isEntireWord(pageContent, match.index, match[0].length)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const [matchPos, matchLen] = getOriginalIndex(
|
||||
diffs,
|
||||
match.index,
|
||||
match[0].length
|
||||
);
|
||||
|
||||
if (matchLen) {
|
||||
matches.push(matchPos);
|
||||
matchesLength.push(matchLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#convertToRegExpString(query, hasDiacritics) {
|
||||
const { matchDiacritics } = this.#state;
|
||||
let isUnicode = false;
|
||||
|
@ -772,12 +741,64 @@ class PDFFindController {
|
|||
}
|
||||
|
||||
#calculateMatch(pageIndex) {
|
||||
let query = this.#query;
|
||||
const query = this.#query;
|
||||
if (query.length === 0) {
|
||||
return; // Do nothing: the matches should be wiped out already.
|
||||
}
|
||||
const { caseSensitive, entireWord } = this.#state;
|
||||
const pageContent = this._pageContents[pageIndex];
|
||||
const matcherResult = this.match(query, pageContent, pageIndex);
|
||||
|
||||
const matches = (this._pageMatches[pageIndex] = []);
|
||||
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
|
||||
const diffs = this._pageDiffs[pageIndex];
|
||||
|
||||
matcherResult?.forEach(({ index, length }) => {
|
||||
const [matchPos, matchLen] = getOriginalIndex(diffs, index, length);
|
||||
if (matchLen) {
|
||||
matches.push(matchPos);
|
||||
matchesLength.push(matchLen);
|
||||
}
|
||||
});
|
||||
|
||||
// When `highlightAll` is set, ensure that the matches on previously
|
||||
// rendered (and still active) pages are correctly highlighted.
|
||||
if (this.#state.highlightAll) {
|
||||
this.#updatePage(pageIndex);
|
||||
}
|
||||
if (this._resumePageIdx === pageIndex) {
|
||||
this._resumePageIdx = null;
|
||||
this.#nextPageMatch();
|
||||
}
|
||||
|
||||
// Update the match count.
|
||||
const pageMatchesCount = matches.length;
|
||||
this._matchesCountTotal += pageMatchesCount;
|
||||
if (this.#updateMatchesCountOnProgress) {
|
||||
if (pageMatchesCount > 0) {
|
||||
this.#updateUIResultsCount();
|
||||
}
|
||||
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
|
||||
// For example, in GeckoView we want to have only the final update because
|
||||
// the Java side provides only one object to update the counts.
|
||||
this.#updateUIResultsCount();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {Object} FindMatch
|
||||
* @property {number} index - The start of the matched text in the page's
|
||||
* string contents.
|
||||
* @property {number} length - The length of the matched text.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {string | string[]} query - The search query.
|
||||
* @param {string} pageContent - The text content of the page to search in.
|
||||
* @param {number} pageIndex - The index of the page that is being processed.
|
||||
* @returns {FindMatch[] | undefined} An array of matches in the provided
|
||||
* page.
|
||||
*/
|
||||
match(query, pageContent, pageIndex) {
|
||||
const hasDiacritics = this._hasDiacritics[pageIndex];
|
||||
|
||||
let isUnicode = false;
|
||||
|
@ -799,34 +820,28 @@ class PDFFindController {
|
|||
})
|
||||
.join("|");
|
||||
}
|
||||
if (!query) {
|
||||
// The query can be empty because some chars like diacritics could have
|
||||
// been stripped out.
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const { caseSensitive, entireWord } = this.#state;
|
||||
const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
|
||||
query = query ? new RegExp(query, flags) : null;
|
||||
query = new RegExp(query, flags);
|
||||
|
||||
this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
|
||||
|
||||
// When `highlightAll` is set, ensure that the matches on previously
|
||||
// rendered (and still active) pages are correctly highlighted.
|
||||
if (this.#state.highlightAll) {
|
||||
this.#updatePage(pageIndex);
|
||||
}
|
||||
if (this._resumePageIdx === pageIndex) {
|
||||
this._resumePageIdx = null;
|
||||
this.#nextPageMatch();
|
||||
}
|
||||
|
||||
// Update the match count.
|
||||
const pageMatchesCount = this._pageMatches[pageIndex].length;
|
||||
this._matchesCountTotal += pageMatchesCount;
|
||||
if (this.#updateMatchesCountOnProgress) {
|
||||
if (pageMatchesCount > 0) {
|
||||
this.#updateUIResultsCount();
|
||||
const matches = [];
|
||||
let match;
|
||||
while ((match = query.exec(pageContent)) !== null) {
|
||||
if (
|
||||
entireWord &&
|
||||
!this.#isEntireWord(pageContent, match.index, match[0].length)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
|
||||
// For example, in GeckoView we want to have only the final update because
|
||||
// the Java side provides only one object to update the counts.
|
||||
this.#updateUIResultsCount();
|
||||
matches.push({ index: match.index, length: match[0].length });
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
#extractText() {
|
||||
|
@ -1103,7 +1118,7 @@ class PDFFindController {
|
|||
current += matchIdx + 1;
|
||||
}
|
||||
// When searching starts, this method may be called before the `pageMatches`
|
||||
// have been counted (in `_calculateMatch`). Ensure that the UI won't show
|
||||
// have been counted (in `#calculateMatch`). Ensure that the UI won't show
|
||||
// temporarily broken state when the active find result doesn't make sense.
|
||||
if (current < 1 || current > total) {
|
||||
current = total = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue