Merge pull request #20456 from calixteman/issue20225
When searching for a group of punctuation signs, only add extraspaces around the group
This commit is contained in:
commit
d4b6464675
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -762,3 +762,4 @@
|
||||
!extract_link.pdf
|
||||
!two_paragraphs.pdf
|
||||
!paragraph_and_link.pdf
|
||||
!issue20225.pdf
|
||||
|
||||
BIN
test/pdfs/issue20225.pdf
Executable file
BIN
test/pdfs/issue20225.pdf
Executable file
Binary file not shown.
@ -634,8 +634,8 @@ describe("pdf_find_controller", function () {
|
||||
pageIndex: 0,
|
||||
matchIndex: 0,
|
||||
},
|
||||
pageMatches: [[1497]],
|
||||
pageMatchesLength: [[25]],
|
||||
pageMatches: [[1498]],
|
||||
pageMatchesLength: [[24]],
|
||||
});
|
||||
});
|
||||
|
||||
@ -1138,6 +1138,26 @@ describe("pdf_find_controller", function () {
|
||||
});
|
||||
});
|
||||
|
||||
it("performs a search with a group of punctuation signs", async () => {
|
||||
const { eventBus, pdfFindController } =
|
||||
await initPdfFindController("issue20225.pdf");
|
||||
|
||||
await testSearch({
|
||||
eventBus,
|
||||
pdfFindController,
|
||||
state: {
|
||||
query: "....",
|
||||
},
|
||||
matchesPerPage: [1],
|
||||
selectedMatch: {
|
||||
pageIndex: 0,
|
||||
matchIndex: 0,
|
||||
},
|
||||
pageMatches: [[8]],
|
||||
pageMatchesLength: [[4]],
|
||||
});
|
||||
});
|
||||
|
||||
describe("custom matcher", () => {
|
||||
it("calls to the matcher with the right arguments", async () => {
|
||||
const QUERY = "Foo bar";
|
||||
|
||||
@ -78,7 +78,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below.
|
||||
|
||||
const DIACRITICS_REG_EXP = /\p{M}+/gu;
|
||||
const SPECIAL_CHARS_REG_EXP =
|
||||
/([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu;
|
||||
/([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu;
|
||||
const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
|
||||
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
|
||||
|
||||
@ -708,6 +708,18 @@ class PDFFindController {
|
||||
#convertToRegExpString(query, hasDiacritics) {
|
||||
const { matchDiacritics } = this.#state;
|
||||
let isUnicode = false;
|
||||
const addExtraWhitespaces = (original, fixed) => {
|
||||
if (original === query) {
|
||||
return fixed;
|
||||
}
|
||||
if (query.startsWith(original)) {
|
||||
return `${fixed}[ ]*`;
|
||||
}
|
||||
if (query.endsWith(original)) {
|
||||
return `[ ]*${fixed}`;
|
||||
}
|
||||
return `[ ]*${fixed}[ ]*`;
|
||||
};
|
||||
query = query.replaceAll(
|
||||
SPECIAL_CHARS_REG_EXP,
|
||||
(
|
||||
@ -723,11 +735,11 @@ class PDFFindController {
|
||||
|
||||
if (p1) {
|
||||
// Escape characters like *+?... to not interfere with regexp syntax.
|
||||
return `[ ]*\\${p1}[ ]*`;
|
||||
return addExtraWhitespaces(p1, `\\${p1}`);
|
||||
}
|
||||
if (p2) {
|
||||
// Allow whitespaces around punctuation signs.
|
||||
return `[ ]*${p2}[ ]*`;
|
||||
// Allow whitespaces around group of punctuation signs.
|
||||
return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&"));
|
||||
}
|
||||
if (p3) {
|
||||
// Replace spaces by \s+ to be sure to match any spaces.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user