mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 22:58:07 +02:00
Very slightly improve the performance when searching in a pdf
It helps to slightly decrease memory use in reducing the number of created arrays. In searching for "a" in pdf.pdf, the time spent in getOriginalIndex is decreased by around 30%.
This commit is contained in:
parent
65f20b0392
commit
94d53d5b45
1 changed files with 26 additions and 19 deletions
|
@ -184,7 +184,7 @@ function normalize(text) {
|
|||
}
|
||||
|
||||
let normalized = text.normalize("NFD");
|
||||
const positions = [[0, 0]];
|
||||
const positions = [0, 0];
|
||||
let rawDiacriticsIndex = 0;
|
||||
let syllableIndex = 0;
|
||||
let shift = 0;
|
||||
|
@ -201,7 +201,7 @@ function normalize(text) {
|
|||
const replacement = CHARACTERS_TO_NORMALIZE[p1];
|
||||
const jj = replacement.length;
|
||||
for (let j = 1; j < jj; j++) {
|
||||
positions.push([i - shift + j, shift - j]);
|
||||
positions.push(i - shift + j, shift - j);
|
||||
}
|
||||
shift -= jj - 1;
|
||||
return replacement;
|
||||
|
@ -216,7 +216,7 @@ function normalize(text) {
|
|||
}
|
||||
const jj = replacement.length;
|
||||
for (let j = 1; j < jj; j++) {
|
||||
positions.push([i - shift + j, shift - j]);
|
||||
positions.push(i - shift + j, shift - j);
|
||||
}
|
||||
shift -= jj - 1;
|
||||
return replacement;
|
||||
|
@ -233,13 +233,13 @@ function normalize(text) {
|
|||
} else {
|
||||
// i is the position of the first diacritic
|
||||
// so (i - 1) is the position for the letter before.
|
||||
positions.push([i - 1 - shift + 1, shift - 1]);
|
||||
positions.push(i - 1 - shift + 1, shift - 1);
|
||||
shift -= 1;
|
||||
shiftOrigin += 1;
|
||||
}
|
||||
|
||||
// End-of-line.
|
||||
positions.push([i - shift + 1, shift]);
|
||||
positions.push(i - shift + 1, shift);
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
||||
|
@ -261,7 +261,7 @@ function normalize(text) {
|
|||
for (let j = 1; j <= jj; j++) {
|
||||
// i is the position of the first diacritic
|
||||
// so (i - 1) is the position for the letter before.
|
||||
positions.push([i - 1 - shift + j, shift - j]);
|
||||
positions.push(i - 1 - shift + j, shift - j);
|
||||
}
|
||||
shift -= jj;
|
||||
shiftOrigin += jj;
|
||||
|
@ -270,7 +270,7 @@ function normalize(text) {
|
|||
// Diacritics are followed by a -\n.
|
||||
// See comments in `if (p6)` block.
|
||||
i += len - 1;
|
||||
positions.push([i - shift + 1, 1 + shift]);
|
||||
positions.push(i - shift + 1, 1 + shift);
|
||||
shift += 1;
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
@ -296,7 +296,7 @@ function normalize(text) {
|
|||
// The \n isn't in the original text so here y = i, n = X.len - 2 and
|
||||
// o = X.len - 1.
|
||||
const len = p6.length - 2;
|
||||
positions.push([i - shift + len, 1 + shift]);
|
||||
positions.push(i - shift + len, 1 + shift);
|
||||
shift += 1;
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
@ -308,7 +308,7 @@ function normalize(text) {
|
|||
// white space.
|
||||
// A CJK can be encoded in UTF-32, hence their length isn't always 1.
|
||||
const len = p7.length - 1;
|
||||
positions.push([i - shift + len, shift]);
|
||||
positions.push(i - shift + len, shift);
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
return p7.slice(0, -1);
|
||||
|
@ -317,7 +317,7 @@ function normalize(text) {
|
|||
if (p8) {
|
||||
// eol is replaced by space: "foo\nbar" is likely equivalent to
|
||||
// "foo bar".
|
||||
positions.push([i - shift + 1, shift - 1]);
|
||||
positions.push(i - shift + 1, shift - 1);
|
||||
shift -= 1;
|
||||
shiftOrigin += 1;
|
||||
eol += 1;
|
||||
|
@ -331,7 +331,7 @@ function normalize(text) {
|
|||
const newCharLen = syllablePositions[syllableIndex][0] - 1;
|
||||
++syllableIndex;
|
||||
for (let j = 1; j <= newCharLen; j++) {
|
||||
positions.push([i - (shift - j), shift - j]);
|
||||
positions.push(i - (shift - j), shift - j);
|
||||
}
|
||||
shift -= newCharLen;
|
||||
shiftOrigin += newCharLen;
|
||||
|
@ -340,9 +340,15 @@ function normalize(text) {
|
|||
}
|
||||
);
|
||||
|
||||
positions.push([normalized.length, shift]);
|
||||
positions.push(normalized.length, shift);
|
||||
const starts = new Uint32Array(positions.length >> 1);
|
||||
const shifts = new Int32Array(positions.length >> 1);
|
||||
for (let i = 0, ii = positions.length; i < ii; i += 2) {
|
||||
starts[i >> 1] = positions[i];
|
||||
shifts[i >> 1] = positions[i + 1];
|
||||
}
|
||||
|
||||
return [normalized, positions, hasDiacritics];
|
||||
return [normalized, [starts, shifts], hasDiacritics];
|
||||
}
|
||||
|
||||
// Determine the original, non-normalized, match index such that highlighting of
|
||||
|
@ -353,25 +359,26 @@ function getOriginalIndex(diffs, pos, len) {
|
|||
return [pos, len];
|
||||
}
|
||||
|
||||
const [starts, shifts] = diffs;
|
||||
// First char in the new string.
|
||||
const start = pos;
|
||||
// Last char in the new string.
|
||||
const end = pos + len - 1;
|
||||
let i = binarySearchFirstItem(diffs, x => x[0] >= start);
|
||||
if (diffs[i][0] > start) {
|
||||
let i = binarySearchFirstItem(starts, x => x >= start);
|
||||
if (starts[i] > start) {
|
||||
--i;
|
||||
}
|
||||
|
||||
let j = binarySearchFirstItem(diffs, x => x[0] >= end, i);
|
||||
if (diffs[j][0] > end) {
|
||||
let j = binarySearchFirstItem(starts, x => x >= end, i);
|
||||
if (starts[j] > end) {
|
||||
--j;
|
||||
}
|
||||
|
||||
// First char in the old string.
|
||||
const oldStart = start + diffs[i][1];
|
||||
const oldStart = start + shifts[i];
|
||||
|
||||
// Last char in the old string.
|
||||
const oldEnd = end + diffs[j][1];
|
||||
const oldEnd = end + shifts[j];
|
||||
const oldLen = oldEnd + 1 - oldStart;
|
||||
|
||||
return [oldStart, oldLen];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue