mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 14:48:08 +02:00
Merge pull request #19123 from calixteman/improve_search_perf
Very slightly improve the performance when searching in a pdf
This commit is contained in:
commit
308ca2a16f
1 changed files with 26 additions and 19 deletions
|
@ -184,7 +184,7 @@ function normalize(text) {
|
|||
}
|
||||
|
||||
let normalized = text.normalize("NFD");
|
||||
const positions = [[0, 0]];
|
||||
const positions = [0, 0];
|
||||
let rawDiacriticsIndex = 0;
|
||||
let syllableIndex = 0;
|
||||
let shift = 0;
|
||||
|
@ -201,7 +201,7 @@ function normalize(text) {
|
|||
const replacement = CHARACTERS_TO_NORMALIZE[p1];
|
||||
const jj = replacement.length;
|
||||
for (let j = 1; j < jj; j++) {
|
||||
positions.push([i - shift + j, shift - j]);
|
||||
positions.push(i - shift + j, shift - j);
|
||||
}
|
||||
shift -= jj - 1;
|
||||
return replacement;
|
||||
|
@ -216,7 +216,7 @@ function normalize(text) {
|
|||
}
|
||||
const jj = replacement.length;
|
||||
for (let j = 1; j < jj; j++) {
|
||||
positions.push([i - shift + j, shift - j]);
|
||||
positions.push(i - shift + j, shift - j);
|
||||
}
|
||||
shift -= jj - 1;
|
||||
return replacement;
|
||||
|
@ -233,13 +233,13 @@ function normalize(text) {
|
|||
} else {
|
||||
// i is the position of the first diacritic
|
||||
// so (i - 1) is the position for the letter before.
|
||||
positions.push([i - 1 - shift + 1, shift - 1]);
|
||||
positions.push(i - 1 - shift + 1, shift - 1);
|
||||
shift -= 1;
|
||||
shiftOrigin += 1;
|
||||
}
|
||||
|
||||
// End-of-line.
|
||||
positions.push([i - shift + 1, shift]);
|
||||
positions.push(i - shift + 1, shift);
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
||||
|
@ -261,7 +261,7 @@ function normalize(text) {
|
|||
for (let j = 1; j <= jj; j++) {
|
||||
// i is the position of the first diacritic
|
||||
// so (i - 1) is the position for the letter before.
|
||||
positions.push([i - 1 - shift + j, shift - j]);
|
||||
positions.push(i - 1 - shift + j, shift - j);
|
||||
}
|
||||
shift -= jj;
|
||||
shiftOrigin += jj;
|
||||
|
@ -270,7 +270,7 @@ function normalize(text) {
|
|||
// Diacritics are followed by a -\n.
|
||||
// See comments in `if (p6)` block.
|
||||
i += len - 1;
|
||||
positions.push([i - shift + 1, 1 + shift]);
|
||||
positions.push(i - shift + 1, 1 + shift);
|
||||
shift += 1;
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
@ -296,7 +296,7 @@ function normalize(text) {
|
|||
// The \n isn't in the original text so here y = i, n = X.len - 2 and
|
||||
// o = X.len - 1.
|
||||
const len = p6.length - 2;
|
||||
positions.push([i - shift + len, 1 + shift]);
|
||||
positions.push(i - shift + len, 1 + shift);
|
||||
shift += 1;
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
@ -308,7 +308,7 @@ function normalize(text) {
|
|||
// white space.
|
||||
// A CJK can be encoded in UTF-32, hence their length isn't always 1.
|
||||
const len = p7.length - 1;
|
||||
positions.push([i - shift + len, shift]);
|
||||
positions.push(i - shift + len, shift);
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
return p7.slice(0, -1);
|
||||
|
@ -317,7 +317,7 @@ function normalize(text) {
|
|||
if (p8) {
|
||||
// eol is replaced by space: "foo\nbar" is likely equivalent to
|
||||
// "foo bar".
|
||||
positions.push([i - shift + 1, shift - 1]);
|
||||
positions.push(i - shift + 1, shift - 1);
|
||||
shift -= 1;
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
@ -331,7 +331,7 @@ function normalize(text) {
|
|||
const newCharLen = syllablePositions[syllableIndex][0] - 1;
|
||||
++syllableIndex;
|
||||
for (let j = 1; j <= newCharLen; j++) {
|
||||
positions.push([i - (shift - j), shift - j]);
|
||||
positions.push(i - (shift - j), shift - j);
|
||||
}
|
||||
shift -= newCharLen;
|
||||
shiftOrigin += newCharLen;
|
||||
|
@ -340,9 +340,15 @@ function normalize(text) {
|
|||
}
|
||||
);
|
||||
|
||||
positions.push([normalized.length, shift]);
|
||||
positions.push(normalized.length, shift);
|
||||
const starts = new Uint32Array(positions.length >> 1);
|
||||
const shifts = new Int32Array(positions.length >> 1);
|
||||
for (let i = 0, ii = positions.length; i < ii; i += 2) {
|
||||
starts[i >> 1] = positions[i];
|
||||
shifts[i >> 1] = positions[i + 1];
|
||||
}
|
||||
|
||||
return [normalized, positions, hasDiacritics];
|
||||
return [normalized, [starts, shifts], hasDiacritics];
|
||||
}
|
||||
|
||||
// Determine the original, non-normalized, match index such that highlighting of
|
||||
|
@ -353,25 +359,26 @@ function getOriginalIndex(diffs, pos, len) {
|
|||
return [pos, len];
|
||||
}
|
||||
|
||||
const [starts, shifts] = diffs;
|
||||
// First char in the new string.
|
||||
const start = pos;
|
||||
// Last char in the new string.
|
||||
const end = pos + len - 1;
|
||||
let i = binarySearchFirstItem(diffs, x => x[0] >= start);
|
||||
if (diffs[i][0] > start) {
|
||||
let i = binarySearchFirstItem(starts, x => x >= start);
|
||||
if (starts[i] > start) {
|
||||
--i;
|
||||
}
|
||||
|
||||
let j = binarySearchFirstItem(diffs, x => x[0] >= end, i);
|
||||
if (diffs[j][0] > end) {
|
||||
let j = binarySearchFirstItem(starts, x => x >= end, i);
|
||||
if (starts[j] > end) {
|
||||
--j;
|
||||
}
|
||||
|
||||
// First char in the old string.
|
||||
const oldStart = start + diffs[i][1];
|
||||
const oldStart = start + shifts[i];
|
||||
|
||||
// Last char in the old string.
|
||||
const oldEnd = end + diffs[j][1];
|
||||
const oldEnd = end + shifts[j];
|
||||
const oldLen = oldEnd + 1 - oldStart;
|
||||
|
||||
return [oldStart, oldLen];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue