Fix the regex string used to find the chars to normalize with NFKC when searching
ICU has been updated in Firefox (see https://bugzilla.mozilla.org/show_bug.cgi?id=2000225). The char `0xA7F1` is now alphabetic and can be normalized as a "S".
This commit is contained in:
parent
ec71e4ed65
commit
eee20cf138
@ -13,6 +13,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { FeatureTest } from "pdfjs-lib";
|
||||
|
||||
const CharacterType = {
|
||||
SPACE: 0,
|
||||
ALPHA_LETTER: 1,
|
||||
@ -114,8 +116,13 @@ function getCharacterType(charCode) {
|
||||
|
||||
let NormalizeWithNFKC;
|
||||
function getNormalizeWithNFKC() {
|
||||
if (
|
||||
(typeof PDFJSDev === "undefined" && FeatureTest.platform.isFirefox) ||
|
||||
(typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"))
|
||||
) {
|
||||
/* eslint-disable no-irregular-whitespace */
|
||||
NormalizeWithNFKC ||= ` ¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰꟲ-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`;
|
||||
NormalizeWithNFKC ||= `\xA0¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰ-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`;
|
||||
}
|
||||
|
||||
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
||||
const ranges = [];
|
||||
@ -145,7 +152,24 @@ function getNormalizeWithNFKC() {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ranges.join("") !== NormalizeWithNFKC) {
|
||||
|
||||
const rangesStr = ranges.join("");
|
||||
if (!NormalizeWithNFKC) {
|
||||
NormalizeWithNFKC = rangesStr;
|
||||
} else if (rangesStr !== NormalizeWithNFKC) {
|
||||
for (let i = 1; i < rangesStr.length; i++) {
|
||||
if (rangesStr[i] !== NormalizeWithNFKC[i]) {
|
||||
console.log(
|
||||
`Difference at index ${i}: ` +
|
||||
`U+${rangesStr.charCodeAt(i).toString(16).toUpperCase().padStart(4, "0")}` +
|
||||
`!== U+${NormalizeWithNFKC.charCodeAt(i)
|
||||
.toString(16)
|
||||
.toUpperCase()
|
||||
.padStart(4, "0")}`
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw new Error(
|
||||
"getNormalizeWithNFKC - update the `NormalizeWithNFKC` string."
|
||||
);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user