mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 14:48:08 +02:00
Merge pull request #18549 from nicolo-ribaudo/custom-find-matcher-subclass
[api-minor] Allow specifying custom match logic in PDFFindController
This commit is contained in:
commit
a999b346d0
2 changed files with 157 additions and 59 deletions
|
@ -51,7 +51,8 @@ class MockLinkService extends SimpleLinkService {
|
|||
|
||||
async function initPdfFindController(
|
||||
filename,
|
||||
updateMatchesCountOnProgress = true
|
||||
updateMatchesCountOnProgress = true,
|
||||
matcher = undefined
|
||||
) {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams(filename || tracemonkeyFileName, {
|
||||
|
@ -65,7 +66,13 @@ async function initPdfFindController(
|
|||
const linkService = new MockLinkService();
|
||||
linkService.setDocument(pdfDocument);
|
||||
|
||||
const pdfFindController = new PDFFindController({
|
||||
let FindControllerClass = PDFFindController;
|
||||
if (matcher !== undefined) {
|
||||
FindControllerClass = class extends PDFFindController {};
|
||||
FindControllerClass.prototype.match = matcher;
|
||||
}
|
||||
|
||||
const pdfFindController = new FindControllerClass({
|
||||
linkService,
|
||||
eventBus,
|
||||
updateMatchesCountOnProgress,
|
||||
|
@ -1054,4 +1061,80 @@ describe("pdf_find_controller", function () {
|
|||
const { eventBus } = await initPdfFindController();
|
||||
await testOnFind({ eventBus });
|
||||
});
|
||||
|
||||
describe("custom matcher", () => {
|
||||
it("calls to the matcher with the right arguments", async () => {
|
||||
const QUERY = "Foo bar";
|
||||
|
||||
const spy = jasmine
|
||||
.createSpy("custom find matcher")
|
||||
.and.callFake(() => [{ index: 0, length: 1 }]);
|
||||
|
||||
const { eventBus, pdfFindController } = await initPdfFindController(
|
||||
null,
|
||||
false,
|
||||
spy
|
||||
);
|
||||
|
||||
const PAGES_COUNT = 14;
|
||||
|
||||
await testSearch({
|
||||
eventBus,
|
||||
pdfFindController,
|
||||
state: { query: QUERY },
|
||||
selectedMatch: { pageIndex: 0, matchIndex: 0 },
|
||||
matchesPerPage: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
});
|
||||
|
||||
expect(spy).toHaveBeenCalledTimes(PAGES_COUNT);
|
||||
|
||||
for (let i = 0; i < PAGES_COUNT; i++) {
|
||||
const args = spy.calls.argsFor(i);
|
||||
expect(args[0]).withContext(`page ${i}`).toBe(QUERY);
|
||||
expect(args[2]).withContext(`page ${i}`).toBe(i);
|
||||
}
|
||||
|
||||
expect(spy.calls.argsFor(0)[1]).toMatch(/^Trace-based /);
|
||||
expect(spy.calls.argsFor(1)[1]).toMatch(/^Hence, recording and /);
|
||||
expect(spy.calls.argsFor(12)[1]).toMatch(/Figure 12. Fraction of time /);
|
||||
expect(spy.calls.argsFor(13)[1]).toMatch(/^not be interpreted as /);
|
||||
});
|
||||
|
||||
it("uses the results returned by the custom matcher", async () => {
|
||||
const QUERY = "Foo bar";
|
||||
|
||||
// prettier-ignore
|
||||
const spy = jasmine.createSpy("custom find matcher")
|
||||
.and.returnValue(undefined)
|
||||
.withArgs(QUERY, jasmine.anything(), 0)
|
||||
.and.returnValue([
|
||||
{ index: 20, length: 3 },
|
||||
{ index: 50, length: 8 },
|
||||
])
|
||||
.withArgs(QUERY, jasmine.anything(), 2)
|
||||
.and.returnValue([
|
||||
{ index: 7, length: 19 }
|
||||
])
|
||||
.withArgs(QUERY, jasmine.anything(), 13)
|
||||
.and.returnValue([
|
||||
{ index: 50, length: 2 },
|
||||
{ index: 54, length: 9 },
|
||||
{ index: 80, length: 4 },
|
||||
]);
|
||||
|
||||
const { eventBus, pdfFindController } = await initPdfFindController(
|
||||
null,
|
||||
false,
|
||||
spy
|
||||
);
|
||||
|
||||
await testSearch({
|
||||
eventBus,
|
||||
pdfFindController,
|
||||
state: { query: QUERY },
|
||||
selectedMatch: { pageIndex: 0, matchIndex: 0 },
|
||||
matchesPerPage: [2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -670,37 +670,6 @@ class PDFFindController {
|
|||
return true;
|
||||
}
|
||||
|
||||
#calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
|
||||
const matches = (this._pageMatches[pageIndex] = []);
|
||||
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
|
||||
if (!query) {
|
||||
// The query can be empty because some chars like diacritics could have
|
||||
// been stripped out.
|
||||
return;
|
||||
}
|
||||
const diffs = this._pageDiffs[pageIndex];
|
||||
let match;
|
||||
while ((match = query.exec(pageContent)) !== null) {
|
||||
if (
|
||||
entireWord &&
|
||||
!this.#isEntireWord(pageContent, match.index, match[0].length)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const [matchPos, matchLen] = getOriginalIndex(
|
||||
diffs,
|
||||
match.index,
|
||||
match[0].length
|
||||
);
|
||||
|
||||
if (matchLen) {
|
||||
matches.push(matchPos);
|
||||
matchesLength.push(matchLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#convertToRegExpString(query, hasDiacritics) {
|
||||
const { matchDiacritics } = this.#state;
|
||||
let isUnicode = false;
|
||||
|
@ -772,12 +741,64 @@ class PDFFindController {
|
|||
}
|
||||
|
||||
#calculateMatch(pageIndex) {
|
||||
let query = this.#query;
|
||||
const query = this.#query;
|
||||
if (query.length === 0) {
|
||||
return; // Do nothing: the matches should be wiped out already.
|
||||
}
|
||||
const { caseSensitive, entireWord } = this.#state;
|
||||
const pageContent = this._pageContents[pageIndex];
|
||||
const matcherResult = this.match(query, pageContent, pageIndex);
|
||||
|
||||
const matches = (this._pageMatches[pageIndex] = []);
|
||||
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
|
||||
const diffs = this._pageDiffs[pageIndex];
|
||||
|
||||
matcherResult?.forEach(({ index, length }) => {
|
||||
const [matchPos, matchLen] = getOriginalIndex(diffs, index, length);
|
||||
if (matchLen) {
|
||||
matches.push(matchPos);
|
||||
matchesLength.push(matchLen);
|
||||
}
|
||||
});
|
||||
|
||||
// When `highlightAll` is set, ensure that the matches on previously
|
||||
// rendered (and still active) pages are correctly highlighted.
|
||||
if (this.#state.highlightAll) {
|
||||
this.#updatePage(pageIndex);
|
||||
}
|
||||
if (this._resumePageIdx === pageIndex) {
|
||||
this._resumePageIdx = null;
|
||||
this.#nextPageMatch();
|
||||
}
|
||||
|
||||
// Update the match count.
|
||||
const pageMatchesCount = matches.length;
|
||||
this._matchesCountTotal += pageMatchesCount;
|
||||
if (this.#updateMatchesCountOnProgress) {
|
||||
if (pageMatchesCount > 0) {
|
||||
this.#updateUIResultsCount();
|
||||
}
|
||||
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
|
||||
// For example, in GeckoView we want to have only the final update because
|
||||
// the Java side provides only one object to update the counts.
|
||||
this.#updateUIResultsCount();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {Object} FindMatch
|
||||
* @property {number} index - The start of the matched text in the page's
|
||||
* string contents.
|
||||
* @property {number} length - The length of the matched text.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {string | string[]} query - The search query.
|
||||
* @param {string} pageContent - The text content of the page to search in.
|
||||
* @param {number} pageIndex - The index of the page that is being processed.
|
||||
* @returns {FindMatch[] | undefined} An array of matches in the provided
|
||||
* page.
|
||||
*/
|
||||
match(query, pageContent, pageIndex) {
|
||||
const hasDiacritics = this._hasDiacritics[pageIndex];
|
||||
|
||||
let isUnicode = false;
|
||||
|
@ -799,34 +820,28 @@ class PDFFindController {
|
|||
})
|
||||
.join("|");
|
||||
}
|
||||
if (!query) {
|
||||
// The query can be empty because some chars like diacritics could have
|
||||
// been stripped out.
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const { caseSensitive, entireWord } = this.#state;
|
||||
const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
|
||||
query = query ? new RegExp(query, flags) : null;
|
||||
query = new RegExp(query, flags);
|
||||
|
||||
this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
|
||||
|
||||
// When `highlightAll` is set, ensure that the matches on previously
|
||||
// rendered (and still active) pages are correctly highlighted.
|
||||
if (this.#state.highlightAll) {
|
||||
this.#updatePage(pageIndex);
|
||||
}
|
||||
if (this._resumePageIdx === pageIndex) {
|
||||
this._resumePageIdx = null;
|
||||
this.#nextPageMatch();
|
||||
}
|
||||
|
||||
// Update the match count.
|
||||
const pageMatchesCount = this._pageMatches[pageIndex].length;
|
||||
this._matchesCountTotal += pageMatchesCount;
|
||||
if (this.#updateMatchesCountOnProgress) {
|
||||
if (pageMatchesCount > 0) {
|
||||
this.#updateUIResultsCount();
|
||||
const matches = [];
|
||||
let match;
|
||||
while ((match = query.exec(pageContent)) !== null) {
|
||||
if (
|
||||
entireWord &&
|
||||
!this.#isEntireWord(pageContent, match.index, match[0].length)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
|
||||
// For example, in GeckoView we want to have only the final update because
|
||||
// the Java side provides only one object to update the counts.
|
||||
this.#updateUIResultsCount();
|
||||
matches.push({ index: match.index, length: match[0].length });
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
#extractText() {
|
||||
|
@ -1103,7 +1118,7 @@ class PDFFindController {
|
|||
current += matchIdx + 1;
|
||||
}
|
||||
// When searching starts, this method may be called before the `pageMatches`
|
||||
// have been counted (in `_calculateMatch`). Ensure that the UI won't show
|
||||
// have been counted (in `#calculateMatch`). Ensure that the UI won't show
|
||||
// temporarily broken state when the active find result doesn't make sense.
|
||||
if (current < 1 || current > total) {
|
||||
current = total = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue