Merge pull request #19123 from calixteman/improve_search_perf

Very slightly improve the performance when searching in a pdf
2024-11-28 17:44:14 +01:00 · 2024-11-28 17:44:14 +01:00 · 308ca2a16f
commit 308ca2a16f
parent 65f20b0392 94d53d5b45
1 changed files with 26 additions and 19 deletions
--- a/web/pdf_find_controller.js
+++ b/web/pdf_find_controller.js
@ -184,7 +184,7 @@ function normalize(text) {
  }
  let normalized = text.normalize("NFD");
-  const positions = [[0, 0]];
+  const positions = [0, 0];
  let rawDiacriticsIndex = 0;
  let syllableIndex = 0;
  let shift = 0;
@ -201,7 +201,7 @@ function normalize(text) {
        const replacement = CHARACTERS_TO_NORMALIZE[p1];
        const jj = replacement.length;
        for (let j = 1; j < jj; j++) {
-          positions.push([i - shift + j, shift - j]);
+          positions.push(i - shift + j, shift - j);
        }
        shift -= jj - 1;
        return replacement;
@ -216,7 +216,7 @@ function normalize(text) {
        }
        const jj = replacement.length;
        for (let j = 1; j < jj; j++) {
-          positions.push([i - shift + j, shift - j]);
+          positions.push(i - shift + j, shift - j);
        }
        shift -= jj - 1;
        return replacement;
@ -233,13 +233,13 @@ function normalize(text) {
        } else {
          // i is the position of the first diacritic
          // so (i - 1) is the position for the letter before.
-          positions.push([i - 1 - shift + 1, shift - 1]);
+          positions.push(i - 1 - shift + 1, shift - 1);
          shift -= 1;
          shiftOrigin += 1;
        }
        // End-of-line.
-        positions.push([i - shift + 1, shift]);
+        positions.push(i - shift + 1, shift);
        shiftOrigin += 1;
        eol += 1;
@ -261,7 +261,7 @@ function normalize(text) {
        for (let j = 1; j <= jj; j++) {
          // i is the position of the first diacritic
          // so (i - 1) is the position for the letter before.
-          positions.push([i - 1 - shift + j, shift - j]);
+          positions.push(i - 1 - shift + j, shift - j);
        }
        shift -= jj;
        shiftOrigin += jj;
@ -270,7 +270,7 @@ function normalize(text) {
          // Diacritics are followed by a -\n.
          // See comments in `if (p6)` block.
          i += len - 1;
-          positions.push([i - shift + 1, 1 + shift]);
+          positions.push(i - shift + 1, 1 + shift);
          shift += 1;
          shiftOrigin += 1;
          eol += 1;
@ -296,7 +296,7 @@ function normalize(text) {
        // The \n isn't in the original text so here y = i, n = X.len - 2 and
        // o = X.len - 1.
        const len = p6.length - 2;
-        positions.push([i - shift + len, 1 + shift]);
+        positions.push(i - shift + len, 1 + shift);
        shift += 1;
        shiftOrigin += 1;
        eol += 1;
@ -308,7 +308,7 @@ function normalize(text) {
        // white space.
        // A CJK can be encoded in UTF-32, hence their length isn't always 1.
        const len = p7.length - 1;
-        positions.push([i - shift + len, shift]);
+        positions.push(i - shift + len, shift);
        shiftOrigin += 1;
        eol += 1;
        return p7.slice(0, -1);
@ -317,7 +317,7 @@ function normalize(text) {
      if (p8) {
        // eol is replaced by space: "foo\nbar" is likely equivalent to
        // "foo bar".
-        positions.push([i - shift + 1, shift - 1]);
+        positions.push(i - shift + 1, shift - 1);
        shift -= 1;
        shiftOrigin += 1;
        eol += 1;
@ -331,7 +331,7 @@ function normalize(text) {
        const newCharLen = syllablePositions[syllableIndex][0] - 1;
        ++syllableIndex;
        for (let j = 1; j <= newCharLen; j++) {
-          positions.push([i - (shift - j), shift - j]);
+          positions.push(i - (shift - j), shift - j);
        }
        shift -= newCharLen;
        shiftOrigin += newCharLen;
@ -340,9 +340,15 @@ function normalize(text) {
    }
  );
-  positions.push([normalized.length, shift]);
+  positions.push(normalized.length, shift);
  const starts = new Uint32Array(positions.length >> 1);
  const shifts = new Int32Array(positions.length >> 1);
  for (let i = 0, ii = positions.length; i < ii; i += 2) {
    starts[i >> 1] = positions[i];
    shifts[i >> 1] = positions[i + 1];
  }
-  return [normalized, positions, hasDiacritics];
+  return [normalized, [starts, shifts], hasDiacritics];
 }
 // Determine the original, non-normalized, match index such that highlighting of
@ -353,25 +359,26 @@ function getOriginalIndex(diffs, pos, len) {
    return [pos, len];
  }
  const [starts, shifts] = diffs;
  // First char in the new string.
  const start = pos;
  // Last char in the new string.
  const end = pos + len - 1;
-  let i = binarySearchFirstItem(diffs, x => x[0] >= start);
+  let i = binarySearchFirstItem(starts, x => x >= start);
-  if (diffs[i][0] > start) {
+  if (starts[i] > start) {
    --i;
  }
-  let j = binarySearchFirstItem(diffs, x => x[0] >= end, i);
+  let j = binarySearchFirstItem(starts, x => x >= end, i);
-  if (diffs[j][0] > end) {
+  if (starts[j] > end) {
    --j;
  }
  // First char in the old string.
-  const oldStart = start + diffs[i][1];
+  const oldStart = start + shifts[i];
  // Last char in the old string.
-  const oldEnd = end + diffs[j][1];
+  const oldEnd = end + shifts[j];
  const oldLen = oldEnd + 1 - oldStart;
  return [oldStart, oldLen];