Merge pull request #20456 from calixteman/issue20225

When searching for a group of punctuation signs, only add extraspaces around the group
This commit is contained in:
calixteman 2025-11-22 15:25:25 +01:00 committed by GitHub
commit d4b6464675
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 39 additions and 6 deletions

View File

@ -762,3 +762,4 @@
!extract_link.pdf !extract_link.pdf
!two_paragraphs.pdf !two_paragraphs.pdf
!paragraph_and_link.pdf !paragraph_and_link.pdf
!issue20225.pdf

BIN
test/pdfs/issue20225.pdf Executable file

Binary file not shown.

View File

@ -634,8 +634,8 @@ describe("pdf_find_controller", function () {
pageIndex: 0, pageIndex: 0,
matchIndex: 0, matchIndex: 0,
}, },
pageMatches: [[1497]], pageMatches: [[1498]],
pageMatchesLength: [[25]], pageMatchesLength: [[24]],
}); });
}); });
@ -1138,6 +1138,26 @@ describe("pdf_find_controller", function () {
}); });
}); });
it("performs a search with a group of punctuation signs", async () => {
const { eventBus, pdfFindController } =
await initPdfFindController("issue20225.pdf");
await testSearch({
eventBus,
pdfFindController,
state: {
query: "....",
},
matchesPerPage: [1],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[8]],
pageMatchesLength: [[4]],
});
});
describe("custom matcher", () => { describe("custom matcher", () => {
it("calls to the matcher with the right arguments", async () => { it("calls to the matcher with the right arguments", async () => {
const QUERY = "Foo bar"; const QUERY = "Foo bar";

View File

@ -78,7 +78,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below.
const DIACRITICS_REG_EXP = /\p{M}+/gu; const DIACRITICS_REG_EXP = /\p{M}+/gu;
const SPECIAL_CHARS_REG_EXP = const SPECIAL_CHARS_REG_EXP =
/([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu; /([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu;
const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u; const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u; const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
@ -708,6 +708,18 @@ class PDFFindController {
#convertToRegExpString(query, hasDiacritics) { #convertToRegExpString(query, hasDiacritics) {
const { matchDiacritics } = this.#state; const { matchDiacritics } = this.#state;
let isUnicode = false; let isUnicode = false;
const addExtraWhitespaces = (original, fixed) => {
if (original === query) {
return fixed;
}
if (query.startsWith(original)) {
return `${fixed}[ ]*`;
}
if (query.endsWith(original)) {
return `[ ]*${fixed}`;
}
return `[ ]*${fixed}[ ]*`;
};
query = query.replaceAll( query = query.replaceAll(
SPECIAL_CHARS_REG_EXP, SPECIAL_CHARS_REG_EXP,
( (
@ -723,11 +735,11 @@ class PDFFindController {
if (p1) { if (p1) {
// Escape characters like *+?... to not interfere with regexp syntax. // Escape characters like *+?... to not interfere with regexp syntax.
return `[ ]*\\${p1}[ ]*`; return addExtraWhitespaces(p1, `\\${p1}`);
} }
if (p2) { if (p2) {
// Allow whitespaces around punctuation signs. // Allow whitespaces around group of punctuation signs.
return `[ ]*${p2}[ ]*`; return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&"));
} }
if (p3) { if (p3) {
// Replace spaces by \s+ to be sure to match any spaces. // Replace spaces by \s+ to be sure to match any spaces.