diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index cb41f1d0a..2303ff0ec 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -761,3 +761,4 @@ !extract_link.pdf !two_paragraphs.pdf !paragraph_and_link.pdf +!issue20225.pdf diff --git a/test/pdfs/issue20225.pdf b/test/pdfs/issue20225.pdf new file mode 100755 index 000000000..135c1b475 Binary files /dev/null and b/test/pdfs/issue20225.pdf differ diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js index c4d18925c..25f940861 100644 --- a/test/unit/pdf_find_controller_spec.js +++ b/test/unit/pdf_find_controller_spec.js @@ -634,8 +634,8 @@ describe("pdf_find_controller", function () { pageIndex: 0, matchIndex: 0, }, - pageMatches: [[1497]], - pageMatchesLength: [[25]], + pageMatches: [[1498]], + pageMatchesLength: [[24]], }); }); @@ -1138,6 +1138,26 @@ describe("pdf_find_controller", function () { }); }); + it("performs a search with a group of punctuation signs", async () => { + const { eventBus, pdfFindController } = + await initPdfFindController("issue20225.pdf"); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: "....", + }, + matchesPerPage: [1], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[8]], + pageMatchesLength: [[4]], + }); + }); + describe("custom matcher", () => { it("calls to the matcher with the right arguments", async () => { const QUERY = "Foo bar"; diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 19abf3fca..eaa4456f0 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -78,7 +78,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below. const DIACRITICS_REG_EXP = /\p{M}+/gu; const SPECIAL_CHARS_REG_EXP = - /([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu; + /([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu; const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u; const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u; @@ -708,6 +708,18 @@ class PDFFindController { #convertToRegExpString(query, hasDiacritics) { const { matchDiacritics } = this.#state; let isUnicode = false; + const addExtraWhitespaces = (original, fixed) => { + if (original === query) { + return fixed; + } + if (query.startsWith(original)) { + return `${fixed}[ ]*`; + } + if (query.endsWith(original)) { + return `[ ]*${fixed}`; + } + return `[ ]*${fixed}[ ]*`; + }; query = query.replaceAll( SPECIAL_CHARS_REG_EXP, ( @@ -723,11 +735,11 @@ class PDFFindController { if (p1) { // Escape characters like *+?... to not interfere with regexp syntax. - return `[ ]*\\${p1}[ ]*`; + return addExtraWhitespaces(p1, `\\${p1}`); } if (p2) { - // Allow whitespaces around punctuation signs. - return `[ ]*${p2}[ ]*`; + // Allow whitespaces around group of punctuation signs. + return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&")); } if (p3) { // Replace spaces by \s+ to be sure to match any spaces.