When searching for a group of punctuation signs, only add extraspaces around the group

It fixes #20225.

And only add the extra spaces where it's required depending on their position in the query string.
This commit is contained in:
Calixte Denizet 2025-11-21 18:59:32 +01:00
parent 8f0c62990e
commit 039b9e4df3
4 changed files with 39 additions and 6 deletions

View File

@ -761,3 +761,4 @@
!extract_link.pdf
!two_paragraphs.pdf
!paragraph_and_link.pdf
!issue20225.pdf

BIN
test/pdfs/issue20225.pdf Executable file

Binary file not shown.

View File

@ -634,8 +634,8 @@ describe("pdf_find_controller", function () {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[1497]],
pageMatchesLength: [[25]],
pageMatches: [[1498]],
pageMatchesLength: [[24]],
});
});
@ -1138,6 +1138,26 @@ describe("pdf_find_controller", function () {
});
});
it("performs a search with a group of punctuation signs", async () => {
const { eventBus, pdfFindController } =
await initPdfFindController("issue20225.pdf");
await testSearch({
eventBus,
pdfFindController,
state: {
query: "....",
},
matchesPerPage: [1],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[8]],
pageMatchesLength: [[4]],
});
});
describe("custom matcher", () => {
it("calls to the matcher with the right arguments", async () => {
const QUERY = "Foo bar";

View File

@ -78,7 +78,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below.
const DIACRITICS_REG_EXP = /\p{M}+/gu;
const SPECIAL_CHARS_REG_EXP =
/([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu;
/([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu;
const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
@ -708,6 +708,18 @@ class PDFFindController {
#convertToRegExpString(query, hasDiacritics) {
const { matchDiacritics } = this.#state;
let isUnicode = false;
const addExtraWhitespaces = (original, fixed) => {
if (original === query) {
return fixed;
}
if (query.startsWith(original)) {
return `${fixed}[ ]*`;
}
if (query.endsWith(original)) {
return `[ ]*${fixed}`;
}
return `[ ]*${fixed}[ ]*`;
};
query = query.replaceAll(
SPECIAL_CHARS_REG_EXP,
(
@ -723,11 +735,11 @@ class PDFFindController {
if (p1) {
// Escape characters like *+?... to not interfere with regexp syntax.
return `[ ]*\\${p1}[ ]*`;
return addExtraWhitespaces(p1, `\\${p1}`);
}
if (p2) {
// Allow whitespaces around punctuation signs.
return `[ ]*${p2}[ ]*`;
// Allow whitespaces around group of punctuation signs.
return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&"));
}
if (p3) {
// Replace spaces by \s+ to be sure to match any spaces.