From 039b9e4df3ce433fe2dc585f2741048091cff2c6 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 21 Nov 2025 18:59:32 +0100 Subject: [PATCH] When searching for a group of punctuation signs, only add extraspaces around the group It fixes #20225. And only add the extra spaces where it's required depending on their position in the query string. --- test/pdfs/.gitignore | 1 + test/pdfs/issue20225.pdf | Bin 0 -> 9288 bytes test/unit/pdf_find_controller_spec.js | 24 ++++++++++++++++++++++-- web/pdf_find_controller.js | 20 ++++++++++++++++---- 4 files changed, 39 insertions(+), 6 deletions(-) create mode 100755 test/pdfs/issue20225.pdf diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index cb41f1d0a..2303ff0ec 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -761,3 +761,4 @@ !extract_link.pdf !two_paragraphs.pdf !paragraph_and_link.pdf +!issue20225.pdf diff --git a/test/pdfs/issue20225.pdf b/test/pdfs/issue20225.pdf new file mode 100755 index 0000000000000000000000000000000000000000..135c1b475545792413db59d03eb68e46f35d5612 GIT binary patch literal 9288 zcmeHNc{G%L-$z-Jy^=&tvWA)Kntg^W4YKb^Su$qEZe|)|%T0<>N+cna6rn{ZQAE;$ z>`N*kMM@#ritt{e&GX#e=XvgP-shb6k9*FUIdfgtcl~_7zw4au=l4^v)H6^A;aFjX z*KZnIg#|$n2EZ7uUc#E12vbiwjm+}gN29_(gei=G#N!bbFc5{%hhac88etCIwuRvd zcnrc2<^V%u0T_q@owT%sX>=;b25hMhpwcYKEMGs!BZ=YT?@RZCp%IoW z8r74+X0Tu&QcDYA;OWDru@DA6WH#-)3c^UwK@-qNlhC?IG#0I|XMjW#2nGZLGz!p1 zA^{Qxk0NPl;z0l;VX$aC3eZE5(A&0wC_M}cqmRVma5#)EQp*WpM5oe%9H7KO4Clg) z0RsUjKx>34jqc9&fMM{QXaYEiViDF11{(@#;kgY24Gf7z*akCc2zzUq8)Bye2t&d+ z5csiKG_tR-phvf>D^<+J#nnZSPeMpb=isQ=5qmEAph6wj1F&7YV551d2acX3V-1fV z5I_5+v1CeAC!CvWNP~Y!SP(+-hnXLE`mxC@b`Xo^CM*aD3o0n+TNnrn{=`^V5Q+JX z>r#Nhp!)sD35$a&i9;f6U?}`Q1nc7BLUD14cj4lxH086sC-6W`NJf~)(ACGX?w(ln zy(;*t{Y;*Sx&tAW?Q#;9PJ#D`>k?j`|E9{t&BwJ;j^AmCs29-s(NbnKHknFhlOdEi z2wV8GeV``ugJBmgEy?cCe@=H;e+QKWiTW_ye|Hp!`kf+)L1)wGY(E%?UeF-17|d;q zAO}1Fj>Vxs7#0BFcpMU{5C#qqkT5JB2S?(NP6#uowa6Sv01g!z`eH#O^ot`vzfK6K z7a^-ZqOhj_Wd2YM|as72ZqdJ@k8^q zn#KZ=LxUm94s+E+yJ|gHA+KQ!B=hfn(X`SsQztL{UC+QD?Kk2h?&+NSeD6&6xpLc% z{I!fWt}?E0DNE7Bq;B7bCxJb(bDux;eqr7z4Q5UB1_f%JtQ>Gar*6TvGQ)4dyxJ)Zg|6)IC+v2S8B$oZ<*6skJkMLPJ8wO4_PE%#@BjNt&A9o&He#+bPAVI@i?we}vACvS<2h z(k%kBXG;68i%wa%PraW^mh>(cmDP6MueqVy(P{S0fc7j|3HZZ!rL z2kp|Iu)1d6*dKE|)LUk=-b9s^yh(I0G`C1i{d86vJhlbbI<5TX|&D6!#lU5>|>`>LmZMK z-rn4mI2Yd9Im~Nk zw=3+};fwrJ;ZnY*`-`QxD?LLKETl4466T^Oj%kX;RweJsSVPNKW*CSfn`#MBZ+8T{ z#rO`2S-dUIK=si-Q-zN8t`Jv2e+akA+ljvIA}|&C8lJ)oQOUJaG`vVsdf(zzRcmPL z8y2T+#Aq=iMd=z0eNM~+hEiT{xJ0`7bYR*#jC}Oqz1`Nzz~kQ4)_fb38{Y)V+)BK( zOjOpkFkrP-W>eSbMWN;u8tcx3+W{TJjB2@;4YJvnyDgHHRA9AnSso|u-8}@#RzE&y zKs#I@_kgr|m#RqaMD*7e@2(Dp?rMAt*oo`!Pec|@j_6qSnYPPv^)FLcLva_{sd!aD z$mD)7?WHMjAhhh%dXp0Pt|={Ei|$Pg`)?$T_amqJDvojQ*YlJTDHTYQ;7`o1RgMgh zyw#u4gVdpO`A_W~CKC-kgKt^E1TQBEKSR@v_-exJXK$5Aj2bnk7(Xu*IXBAM*lL%M z5J|%xkx?nH zQP`i#Kb&}LYKkDLdAc{$nRjZ+KQ%NpD&BwY`JT6Bhd+0wbu>LJr;qf!*8(Dks zizYj|3Mm>IYqEMvVSz61?yYNm6%27+A?7Q0l z8L%mZH`qG4_`p?q>UO;rX?d#)fOws+L3V=kY-!k@w+Hxwr#_~!rG_@?FMBQJRPUPX1_>_0Gve zspW{C0M~~#jXTYn9yw|+`xN`Jh8(5}6A^k~bnqHaUj4;(tC)lglzdHRbE~N^FF)_+ z{s*+1?f%C^s!R#dul$u(JDhIa87DXDWQg{;(y}&gWlauWZq7MtJ$QoK;d1Y*V8~$CzZ5j^ANDRbnI5Ns$`%%Kmv01YLtJ!0AvvKq@TyrMWxXdc&8_wS%>HtPpwBB{n24mNR> zr*vM}-QPlezPs||6UXaqJ5Ch#KErE!TxR!ui~MFGogONPCcI%-rC?qOCV9Wy*6N+@-F0UZWsN zVYN0T$y7FBOiQO;GbG9~hFb0hYFjIBJ?n=w2S0eCte4#obZ<7fI3g zPNEynD++L#j*sZRjGuX$C7mN}5?5>Pye8=0sy}#V-LEfyxwiI1!@C{j&j?iw`XOe5 zpDt-}s}`KUeU~SG{JoE_ovPEFk(R2sygNv*5x@HGTyrAAX9qwVN%2ggXG+?1V^FLIqTFJC1W zC7d;anFlnOS*_VrlyH7G_uH$!5qDO#WY$czHR(=tjg_@rBTAX_#0aF$Hiz!OxU|Vv zy%K(UJ$O&WXOA0N&z={J2{VAQo!ZBwoogmSqlPCOp9P-(v?3GPQ#pZwYY6Rc3+1m( z0pJ>zGL{C@B5m;o1@APZOwJ5hr(Shy+wVv1J9%FV35u?oF^pC^_2$d4;^}u*Z^PcM z_sY$|XzC?sq)rU6&#r;FBI%TIHsa^Pz zd_jqfNxMaOff<%5V znMn60KaN;-S540@|E(iMETl%XMSmZWdT0}{R4@H0uZk{_040)dfdUEmoo~GdecYz&On6;jo9s z$;{BeZ9T0nqkNk;X&z;yRonEmm6g-9tuiWc@RH>Ci`t50%LsJ)hjo0*h%ub#NASxm~Z<(jPzW&i}1%Kp&t!JoyM!AN;e0m5p2K8zWfL` z?>{bATWdajd|PKuLzm({d@O2Gei67Nuhv8?5a)7Ls08pA!!A zGq{9CPIuSmK0X(z95bFgZWdF%G11Yg3LP)~Xme@q@yIKWlZQ!WncLHjrVdsQ>W{P$ z4Zt^BE~bFnYIj&$xlgQbG$qCBTE7kJJ76sSUL&Ev*=40xZ-+Ro!f<66Uxj^YO5)^} zj^y?msv-Ac9yb)LYI?tbJ(@Mhd=a+ZdTM<^K4SxFN2hYEcmd2I`%q@O>+rcCq(|dA+qH~gi>%4Q;XckEUkgN%2os5?xf#@G6@ z(ukmStKNNeZkRa)q;y6<+bp3cpJ+Qdl21=>bu+pD;*o)84XeK3W65w(XF=iee(T&m z`v6D&B;JYgII4w?*a*-~(@c=xec+lozvE3V^X0u3AN$ffxz_09_=t;Ss9$;Pdj@3)i=twVn$LV^7b$F-$A!JXq|I%|fT&(ALZv7_Aqb z{`h%}BAX_&A@?C}`{Q(x*}nEppV6+#V^Kj~N3yrCtuULHbm4M**Klg7=D`0a&4EOr z7Po<8T%}hPAar;nOi;FyRB*}P!gyAZtu9REe6jRBc1LsS6orUEN)nrjTN#&3s+mv8_w9_ z%SxWTv8rBAkl$#4|0BJ0b2Z)QJ-?FBy44=SBK#Z1=`HQ10);NwPxtl=OgyMdf8%W? z;NRT7;gLay?V_y~8>w;k~kNnO1C~E6=4{N@2WGT&mqHho#{9muP)w0l;4@00^;Q<}lR%Mgkyh z083yeP+*&HnUA*4FK7;IK-8;R0d0X#)V9F6jn^qJe?KFC|IDV-yt&hJSqg+R!MP6u z0(68==v;%H34d_G_PjajNMT)knyqkl+JiEmL*Wm`ZO!tnlRQY2e6}8Sb+z;PnBXh| zp!NZ8AJ^7 zW%@$Y7c9uvhweuV+A2qe2vQmm`p>zIkb^CVu)Vj+>2e;x?9D7;BnFEHBfvp*3IKo* zI|Z>?7%Jd~Q-dJ^Bnkmw5g@1z5{O8U2x5P-fO1+KLQs=Mbt78q8T{}EU2T=~V6&M- z1R^jn5FUtvGg$5jkU$_H03-s5REH$g{etOia*#URZ}WE}Kkewz{3t9>Cfk!ihjHwZ zT^auDt#Wd|m`9H9ABp)ge>tN2!55N*Qy9L8ATkpH!U4omN>s`(9i~6aXCX8y1wr$n z`EuwzNDo|655m;U?3e12?tFch)MPHg;~(^(2ar1y;uioM3?M&5GX~YuEm#j?^|T=H zG3pTVNYEC<6H#a)h*t$bA^>P2{z>vTKnpfV5F-q+*Gt$y5kcf)8;d0W$;Qu?T*}Iy zBKg6!F3A3qodpjJmMw$fqxHSxbSW%`E13;b#-i0-J=rQSBnD#LkN^RN`<~chl||O5 z6rvl0%PYRjSCkO~gO~mgaG+JtB%4!HVJN20EMUhw7W=mkzFVgB1H01{V6ooe*)D%$p5gTiQuf8MeFGM zB3-x(ty1U>f%x@`Sa`%)QUBG??*#vgZvJ-im%#lSuD{{>O9=cW;lI1_RxXHY;8ny!DGLir!1Ex3dL literal 0 HcmV?d00001 diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js index c4d18925c..25f940861 100644 --- a/test/unit/pdf_find_controller_spec.js +++ b/test/unit/pdf_find_controller_spec.js @@ -634,8 +634,8 @@ describe("pdf_find_controller", function () { pageIndex: 0, matchIndex: 0, }, - pageMatches: [[1497]], - pageMatchesLength: [[25]], + pageMatches: [[1498]], + pageMatchesLength: [[24]], }); }); @@ -1138,6 +1138,26 @@ describe("pdf_find_controller", function () { }); }); + it("performs a search with a group of punctuation signs", async () => { + const { eventBus, pdfFindController } = + await initPdfFindController("issue20225.pdf"); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: "....", + }, + matchesPerPage: [1], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[8]], + pageMatchesLength: [[4]], + }); + }); + describe("custom matcher", () => { it("calls to the matcher with the right arguments", async () => { const QUERY = "Foo bar"; diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 19abf3fca..eaa4456f0 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -78,7 +78,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below. const DIACRITICS_REG_EXP = /\p{M}+/gu; const SPECIAL_CHARS_REG_EXP = - /([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu; + /([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu; const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u; const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u; @@ -708,6 +708,18 @@ class PDFFindController { #convertToRegExpString(query, hasDiacritics) { const { matchDiacritics } = this.#state; let isUnicode = false; + const addExtraWhitespaces = (original, fixed) => { + if (original === query) { + return fixed; + } + if (query.startsWith(original)) { + return `${fixed}[ ]*`; + } + if (query.endsWith(original)) { + return `[ ]*${fixed}`; + } + return `[ ]*${fixed}[ ]*`; + }; query = query.replaceAll( SPECIAL_CHARS_REG_EXP, ( @@ -723,11 +735,11 @@ class PDFFindController { if (p1) { // Escape characters like *+?... to not interfere with regexp syntax. - return `[ ]*\\${p1}[ ]*`; + return addExtraWhitespaces(p1, `\\${p1}`); } if (p2) { - // Allow whitespaces around punctuation signs. - return `[ ]*${p2}[ ]*`; + // Allow whitespaces around group of punctuation signs. + return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&")); } if (p3) { // Replace spaces by \s+ to be sure to match any spaces.