From 4d15bfec0d83e695f18f6201508fbbdb0c558c71 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 3 Oct 2025 22:17:57 +0200 Subject: [PATCH] Only apply word spacing when there is a 0x20 in the text chunk Fixes #20319. --- src/core/evaluator.js | 10 +++-- test/pdfs/.gitignore | 2 + test/pdfs/issue20319_1.pdf | Bin 0 -> 49916 bytes test/pdfs/issue20319_2.pdf | 80 +++++++++++++++++++++++++++++++++++++ test/unit/api_spec.js | 31 ++++++++++++++ 5 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue20319_1.pdf create mode 100644 test/pdfs/issue20319_2.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 256a50626..d38fab6b0 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2927,7 +2927,7 @@ class PartialEvaluator { for (let i = 0, ii = glyphs.length; i < ii; i++) { const glyph = glyphs[i]; - const { category } = glyph; + const { category, originalCharCode } = glyph; if (category.isInvisibleFormatMark) { continue; @@ -2941,6 +2941,10 @@ class PartialEvaluator { } let scaledDim = glyphWidth * scale; + if (originalCharCode === 0x20) { + charSpacing += textState.wordSpacing; + } + if (!keepWhiteSpace && category.isWhitespace) { // Don't push a " " in the textContentItem // (except when it's between two non-spaces chars), @@ -2948,13 +2952,13 @@ class PartialEvaluator { // compareWithLastPosition. // This way we can merge real spaces and spaces due to cursor moves. if (!font.vertical) { - charSpacing += scaledDim + textState.wordSpacing; + charSpacing += scaledDim; textState.translateTextMatrix( charSpacing * textState.textHScale, 0 ); } else { - charSpacing += -scaledDim + textState.wordSpacing; + charSpacing += -scaledDim; textState.translateTextMatrix(0, -charSpacing); } saveLastChar(" "); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 6b1ede9ae..45530519a 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -746,3 +746,5 @@ !issue20232.pdf !bug1989304.pdf !comments.pdf +!issue20319_1.pdf +!issue20319_2.pdf diff --git a/test/pdfs/issue20319_1.pdf b/test/pdfs/issue20319_1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0fe77c69957d908b4634028bd615fc62d642aef0 GIT binary patch literal 49916 zcmeIb3!GHdmG8f+PMxam;{8y>XMqxN1iIc2Kx`g@C@2wv1V!!cs)k1BZu+4R2BT4< zMvbD0F`9=_CuWRMG{!J7*Jxgv>v(g$>O_;tsFV1a$qdOD<2Xj4>wbUxoKutyi2na` zKcD&B+qA3JKKr@$dhNCMK4nc=wqW7()Qm#>{ul4LC!S6u6J4$AST zWKyMcylHuVYu}dcN}_4i(k1bx6*jrF($`XM>1&D4nG^5r>#4MCjL$xKb9YPIg_XWU zYh`Um=Sh?Q?a?1hPIQz{ntXO{X>w`z{K~qHQ$NvDS^kMLR*D-6b|}+|b!ObMr})TgqLnm6@cK-84C2tMpxX(&V`+NUU1AEHS^Ur;;emNKJ1` zCR2$*X-2+~D<|6v(-P@qI@6TQH>Fb3Q>B^d)XY>q;s2XFCr)a1Pq}^OSqm2WAhPz`&u?n@9aI+>m=xBL8Z5? zr=z>CqpLHaep_0*`uk3rJXr(P{`nKz*j*c_vv-D_onl1=+-RBo%a z>F)2@U`Z>tHB~lLHdZ?OdTE#1Z##IpbZM>Weuvw*alb9Q>-h1Wb`&sNrm4mN<~N&h zW-je2ceHO=(9&0#1M_p!lcnkD(u!1eW;Qi5nVp`@%}gd|Hw~(}mqE6V`KVA|Wj}pn z@-x%P`abGv4(cP&$^G=QFD1VI+PZ`Ku_JW#tmx|6FvrulxotgNtu1|tsrl^m){edt z66xHG)Qog8U&?qBt8Y@*Te)p!dsolKmcBVH-Q62H+FGP1O)zqH)83-G`c`T)7j(7t zOS+2}%<1p%D9_BdW(%pdLhJN=E?u0SO{LqXm$Lcf^h~mtZLPGXS}UoXC1dH*`nLP@ zySTHrucfoC67<_zX{)s5N*VRmGCiBg6sH%Hne_CQOtzFSmy)TLd^za1u5Dev%w*=W zp009#TcziNu7S+t^|5Re{1piNsK)*wSd?@A) zv1^E3ABqDXs`8L_4YBJ(ao|H$9@4J=G z6&-yWDv80~ZRs*umksq0B?gPs;7&WWcoUh`ESUmZ`z3v9AF`!{Te zuVJs1Xj;^@qN}cLsjh8pGN*Txd-wK3J8u%pTGqa22B~CyV};nnp9*;)Ih$$8q%*{O znH--ba4D0@5CvxP%1@Wc&7})`CewvE@g_gSrDZCg&9~>%M59U4nL^q=r3zW1|01ok z=`y7yo=a)lf4Y)M0U9mOC>eTV&u|brvQU#$UxVlf;?4gYLiS2KRlolYwh{?tBIiIm z0=Zj8CnOpvf!z_$XE(f|Yi*t6tds+>&+qE&(~d2X%6YBk_jGm7>)MPIlleJ)SSk$R%>w z+zjNGSW%AW$i2AE{^Kdirx&s+o1ILiv&keH2A;EGef*Ra@qMkC{rf&-RIN`@5Uo0LCn{Yg;K%4EL)mMzXy^G3 z(`w7#ah|b%w>i&*FE(LknRA=7%(l40>~v;DBF-}&*j7)Qlg#(by-K^!m|LiKmiYyL z^XUCF9T&E?7X`@>LAL zf68XGQx;6uR0eK|w7Yj>GBI;_^YF-%Ci-Nk!Ce!Mn5G{;zbj=%KfmkwUCD7{CXJag zY0RYdh}qri9I^Ygfm>oD-+rSfdV&d8F)H_0h$(Ewnr}6K)Rd*Dh= zY}RNKjy1$WqZ?u)fnmeLBStqg#FmDmBfHG#;SEltE7}@&#*7{{a>THP7=47yh;fb1 zxY5u5?U)Irlxdp%+g%gV&+i&rN-~+)X!rLM(lPh<`fqdtV3TC|4a?gb%rS&#PbGw&%KScZZJ(|McCX*1H$J^ZE7bL#GbxTrc&M!wH|av$FKwbI%_& z``^tm4azmo{qV%M1N`R~_sksqV#6yut57X~*v5gQj2Uy(pI_YFJ^DqPOF{5VNe z8~?)h0@vb{3ZV(k{pM%Uhs{^q3FiLTZDv#K7;|$tV}9lwW9|gDV=$8_e~J31g&#Jn z!B2!ALN*i3RlqyIwZNBvlY#qzYk>QFd>M6L08UdM0YpwS*FyGH?vvFw-B-<~=rN`@ ziVJan0bI{>ndf)RCg%hoS6$(fc9)q=4Jp$eGo~E9!SuSV)B{JOlx=0am1cbOF>?WZ z?rJERxciV9?w(rxoqK~hmwD_E&ET(y1~~!X-pEf(yL*h871?TWd*u7L#kb692(Tb7G;IVmDP8mFPTY^%gsckBL#Do zJ0WDk4`BdcpO=>R@aQ(5PwU(X)z?)Ay3PfD)?h+MIM0}EcxFqpc&|LlUI2ciew>_H z4U{7$bPbEbEc*Hi@_CHsW68hS`H?w~_D{r~FgN>97VjKj%T zH<(877|VZO4ocU9mG84=iDXd^l7*GEF!Q#uAA~h`jekHa9YyAvzts`=BRa*W_rua> z+wMSE`h9%$N78xe7d%Gys(Zj4;OD?=nDc>K+)c zHY>xon(30G#vF{Jf55(EPn1cv;_+bIIMG}j`yKh%pzL5UmR-s=4v4W?Yd3?iKW^cJ zV%fWFPC91g?AyJyJ?y_8`}RG{eYM&w-y@v}u=PWM4j|*``n`3m&LL;ZgWYDEdwn$* zeycjm%Jv~MHrDUS1=+0i<*?B5f2>&{zY#OlOu$R}=%l~4hL2)5{pe3C<3LB?%Ph~n zVU~w)taM+5%&^z1Y4AUh27JrY89?m_?lglzExvHMvk0nHDco`FxBf6Yvv@=deEc?4d(YSu!}O4SYgnDr-DqT7Vf)^A14TG@APJa669+F4sa?HW3r z=Zp6G?&uH9=ExLNiHtStqD|m`W__h!_hQ?wJ#rUypJtuB+}wz~#^x3vuiemo3%>g)WO*C&{0Q^AR^onh2l2qOHRw0}VZj{APo}O3 z4fDV}K}=Bt;=kv;I3sio_}<`|80tj}K|fz39t5cKB>h|q{sVXk_z~bu=WMn#t{ENr zBjY_}3QpM@*LlPI-C&^p6VA)#Sz#aO%RFMt?j6SLxd!}%m)`vXmx$%GWd?{F#vdPV+8Ug z&!5TG6(`0rW>mv6Gb-{)eB1+ORBQ(Lf0FPVwah32y8r-_@7 zHHX-CiXRzMd3N3TYufFk?@P={(E&3`*zGe*DW4YE1MPti_TvQc$9cHAD6-RBh^_ws zpZTT8mx(#38_*%2Y}@%V!KQ88p?E{QbMG~6)csfxe+UP$ceGr6&|Lywl}9`!zH42Q zk8b#u$u_(}**nz-qbXtfeaakWWi-khX5$*kQ0*^c9M+YFJ?5~;ZqsG!NS11+exSqV z)ydC}Dfy|-yG_63XX`YX@r{#+7l7zFW?}4Q#we49#>VTw7aSkSc#WP~kQ@&&$3;uXRJtn}s?6V)S^0g3@#mW}wWo5YQ13Lx>?WoHh;L>` z&oi?l=KzY`{{x>n+|1Qj7WSAE?fBxo{Jr*c>>;d7e!#f@$XLgl6RkdJe8zH{>GLqs z&zaec4b100GfR9E?Q!PXz*-c$p66oq@2p%|!=tazZX1MU$ptv1ehL&_J0bSpR4#{+qS=e|KbUX{boA~A`>cMNPUR^nIe%gFEVvBQDtQ|^`YDNJl6JqqUJ$6K@PSmM3vfZ@}uD0(DW1~`bNf>^=M zA6^GI=utFXJySh#p}skFKu%JlbHsNgTQ`dV%>#w8~k;-R3v3u{3HgF zLB5BYjn0nhKe{R6tzG80*r#c8iRr?BDh4YN|Cg|@l61vA&U8Bez&=LLha#6(OlNcp zygQS=c5=RRAND+fbB?FTe+Kv}@Upqw{k7?g{SsJi4sZCmxjb^UIXr53{}vB_ z_YKq8@I|vFHjVtRQRkzk)BOkF9P*DL{}S+2vjE<8sxS1|md6_DgEIFK#)nyx)=cGWa3t-EKY`SQGSr&PNIsAs_PXOncbi>u)9Tu)5 z2J4JypY~Vu^9As);MH#C`FYx0$@Bf>LsR5h`gt5V?SRK?dHyzV5BrSU(f!wGb1{D4 zX7Zi`M1#^dLSG8nCV(BDzd;Q7C-8RodABROmlgx&3xIN zV?JGrd9_z4n01kp@k?*eUcOv$u+~}naKCC!m%TZ6Ro`}pRsY3#*en9(M&jUac@T-3 zQzI3#D742c3cJnZ@K*C;S$5{W|QI3-U3Vqm_x` z6RltHH1byU750+95^bU*phfhCerlRR&sVRobD{sx%RK)H5PsR58hWvMMQAPbwpV`_ ze%<6mtDVkXi;b5&eC?=MIucrn3dAnBNe+J=UgPygPu~t>z2J4(` zbg-W7gZG|^ajs^L@XyGO@^Pb$W7$^@#x|~ojss!4_F+>8!aA$AeDv`?=LhdSKRZfs zo??~#;uGugS^I-?q5WZc{?)CSh?Rx%&hn?(7$^8Pk#P^u+Ajv>%ZUgp!1&h=;bc@QTc%|v5sOV zYv(@RuiRhr_2EF+(-(UVNc;E4`^wQf0}Rdr4}fd2U?7)4IQ9kR`E~4@J=psKd=+n_ ze=?0goV9g0anA_lS)QNH{%t=Hea?)CK59lZTw@v=zHH(RV@NN;uWKLoKEPSXh}i39 zMB_uIvGKo~c;iCS7t!`9GqT}_oP*b8PkZq?>_>Kp1c%Co;xF=zG;Py8i^d zU8Mhtygg=Qza7%lhE-9?Q{n8J`nvMw7HY{t)~7RW~BC# z?+bS_&RfiBIO80~I5*LbK4O=ekul~M$fCwmpI83;ygjk#_h|~o3fdhIA54z#BNNH% zU~&oQI3WIj^WHE0KJ9_dS-mr%%j{W)#rDkUk^Sja>~zy$&pIwO4La}8`axWMJ@D7S zBk=V|U=q5t8$1R$+yd(fw%)-$q(NsS)V&(GQf&g>*RakU1inn{*#`C&JCCt-u5uQ7 zq&r8>UxfF^qu7I-tLq%*6)@*8Iw$eYY&1{rsyD#lyST$8FSOF$#oGJa5U+TczXkU5 z&)(f~he({R^E&PlEg1UOouk_8oNzm!JKpV)37o}E08gR5uwj27*cr+Qi20x8Z0A|} zd3Ha;8PO8|zS%T_{aNT)Gc~-&OpTmirq-v!kC5{;Q_1<-#5bzbyg5@%aKCq2peqS;QnvU>(&Jh?xL4xGnbX;yI_I0q~M zrUGaB&zVTcToXX(KTJDtjy;2=9d&L7>^-bs|AVzxOsn`+aVh7bHqE)NVppA)+Bi?= zyu_=_v7oq)xfi)7e!HQc*yZc&C+FyH+1|B}2cJjTyI}2yS2p~~oZrx5&X3(-V}MrT zfVS8&_L4W5B~hKhy=_)TziuvoXK%3HEwT4W=$m3DYh%}QC-5uwtEZZ@d%w9L_F4LQ z#N?tGa~FH31|Sys4Qu)9)t|e!nnM|PffKJj5PogozSuFQC32KW1^3SJw`%v#tNlFT zGLZA17x-9tN}tKT>SnF8Za3#0-vtINUAr2FS9isiS9cA+t-33=lRRRN*yCn+JjQ)L z@ttB*#iNShv_`u}et={(EUQA zcuZdw`{xqsANsP~GcSog9NG!)_p$#Rn$GihA7*&9&Q*N=}HYf{d z3i=6X6%J|6U-ZDH)t~0?V_Qe#^ZcIns^izI&$sojp>6;ggS4%C5BR3N{aE_z#u`j- zK!aHldEPWej%8naw)snedyC0f*u@KLb$~791nB-~#;U!;f=wE9^CgG60(T94c6VTs_0^*gX=>IfLjs4)dF+OOT`=gfxAGE&4 ztiexcR}b}NA2$6eeF2;$S%_T2yEcIG0%$mSZ(4bJ<^{n|h5sA*N1B@gpC4e&iMs}G44wxuXfW4$9MCV^@TTdmFH^eKUqD-Zzjsp( zaL#N&eQ-AYP2euz-g@8;=K=R656FKYHZ8Oqc%1ax;0Uk_n9uXyfLDQk3!VplBzSgy zO6+!oxsT_~gids&zf7m#%fJv9oGAZ?6;5O zZe*?QLxMY<+MSJmcc6C=wU^hKgwlG~wK2|_=@q>1yNh$Y9?mHAZjave)0yZ--f!c* zIPN3=r27-STgf{Eo4Fg`?2a~L-TCIfxg$7Fc*tyy4rl-H0PiTi&fV>aq`%EOJR{AI z{q$z;CpSlb&whUgZAl9_OV}K{(cB;XXP%D-XlKjP<{R9}KjV%g7WkFU0tZ*oS|-uQFyZQwNT_HN;QpC2H%Q|j=ydA^mqk>EVVf8Ql~ zI`iDZ`y74Mx7;1ow}N@;ecs4F+VQ*e4kvn((m4%ub;1ABpv&gd-(2X+@|~6=cwg~n zdQT*{ujBowX5Gz3-Zjn0;xyjbT8Z8*mR^a!jkj};F^jwGvyct%6vRGfK1q6myAb$W zlaKw@Tx!cGkB>4}^Iq9z-U+)c_EqlK)us=5?jij)_akj)tNX5*3okw?9f)lMZ=%j+ zyq7{7_j$^x&-3l>3h)b@504`*`zYzZ0+s{nQ}9#Xfq2kd8k@zwRJv_+9&Da;A7BnE zJ>ZV+)P_0ujA`b~*hQv%;8V{3ZzT;`b2PTs#{2TGz~lA26aR|)3*dU5%lOrIs;}_A z^eawo;2xK>yR7<3L#p};@4M^WsP(+t{t9?&G)mc4?wVF|_V}22nRg1;LHnoOhpJmS zW30G0RG;R~c>(vNXCudz;HkX7bvEzjUPk&K6+Bc=C)^-lc_$o~fTqPnYq zo#@96=;8$KLc1xuE^;OB?Y+c%QkQclbY=D7NTK?#(OmU)-o3dX`Z_R?GfmDmqc511 z-2Y$T{H(e@`X+gIa4z~Q&PO>Th0d$oH_Zg>VIghbY}j4>Lo~;o(j(PBxJ|q(b1Q!I zWs6sE2D*axa8D+0h5M{I*@<&6{aSUMd#zchbi>8?D&FCO2ezH+u?L#PJ94Wy8%j~u z6#07fSJB^8{{Sxx>DM?1)!c7p?%#2@L)&(~bG4oCSZ!zi+v#UJerG$nx82+ctOvSy zXH@UY{>;aEH}x&x2_ML}WqOxY@5|cvtr_TtH}iF;9leHgUwvQT2fSAWZVZ{z1r6iO1oyXQUgMo+-iY(fyfH_ad3v@mF><&I zg!FzD7blS!+?~{=-PJYwSU~E(5QLhcbO?rcFr))RIxwUILpm^|14BA6qys}bFr))R zIxwUILpm^|14BA6qys}bFr))RIxwUILpm^|14BA6qys}bFr))RIxwUILpm^|14BA6 zqys}bFr))RIxwUILpm^|14BA6qys}bFr))RIxwUILpm^|14BA6qys}bFr))RIxwUI zLpt#OIuJ5u#8cr$bE3JG|50Dqj5c31e336QjQ?|S1OGcg{` z^(`y=6K(5SI{Cfa1i$E;=;%yz_qT57XiJp4Hnwzh#+R;Iy?pM%6TPa-D?J_UA0)@j z=PQ}rW{c@D9cHarXZlRSOf_xh1aR6U`OA=MrAC5Zt?Zz@7w9n+(_%K7Y2+?8oz$N} zYOdM9U&5SK>#Ns3RcKS8%|(3jzY|0V3rMY}hn1$E8f}c*LN9B{Rf`tee}Y~*dFrN4 zEB$s*FG2e*#%-}>akJE{GONvUMqY@}>w8^pN9-`|P*b7jetJ;9AEd>9Qk|XQOPgPL za=`iNIOiunKF<9~+4)IxB`rXy?l_ zoG)E{wELw2=l0u1ySE?Z+&0?z;;kdyFHUi8rQWS0om-nDU!dU^#yelQG;+&G=a%Nk z=chQIyCUg+Zos+urg84gQ=FT&k8p1q=iIb2)ZE+{*?wc(-9EzEzB6Q+7e{W4J2yTa zxuI*Dd&8s7XD=J(es+xW+2+Xgq^_UoTzBoG?sWsswVzqzUi+wX?WK{=Tr)M>zXOf)mM#iuRhwjdS~_V=4#}sG0v3?eP!9XBI#UysPm~?oXdtemzK-!r320< zH;i^aInlXf+er74w6krb^Osvkxqms%**ePk#4XO2G0x_=bMZw9_u_Y)i~5grFG@K5 zM>>5p>O0!$9dLREoK4*$+)V>c_XwxEInuSw>D)NS?Yz+0ILFy=;c$1uXlKKvkqd`A z7dA)MGvxYroQ`#mx*Y?~y0vTEb&ophE{&{hpW?1v-rAx`Un z(=y;(aQ+PUf&u6J8P0hF&YA(|+|!+NwmGW@oR2SXRt-334>&6ybv`!WEH67}jdwnp zc9xwv%3Ze2IdhbA#@yz4&XQryX=P{e1r&ROc5G<&>z(&^6Z zS#fvvcxQHVWLDgnSsLrkT;r6AW8BhMr#QwbjBxS;PA)s%%?&u&%s4kY-pQonZf2a5 zPBpsexRdtelWKI5O^3P31y0k9L)@mroTkSkGmehCGY)ZPTpF3)Sazr1;!HbnygTi5 z=R_uP;&|u8=Ez?_`d^ft6Q(BJ6XrToA#7^WIUaQUfb)@=&Xf^{xl`6S#~nM?J#M*k zEX|G`>m1u0ncUz^N*v}+TH_>+8RI4na}tk7j$yE4#yH1Z8aX=d9Nio_>R9KwmRA_yO%)y6*R~o8~WGu(-3MFH!rE@%E0+a*zMJ%XgZ4qtlnD^Q|FeuS}P~;yX!q=mfp^_iK!o(zx;$m z)AIiA?hTcV{E&JgnV2&tQLePp(bAUgGx&MzM3ZeceQ}xcj=n9^X|uP6{grlTp+(9g zrmd@7>FsW5tMn)i(65mvlXDWY7xHh8*fFR)?X_xcCvNDmwKLS_r^}Obyk9c6PuY}h z>(^2*Og2#?v8}(S2kIf>wh4{1$pT$-IM~WUn!2cD9ty=5zT< zA)7B1vW1d<+dbgr>}-m{_Ix^@BfFk~k|e{ZyL76q-?_bQVsI*zEO}Dm*W>Mvu=88@ z^IN)4t=OomkfwA&#j86#lP&NY^cz~%_9pV)kI~Qbe!_lwp^%K*;&dvRi`!25k$e4I z{lb=w9UHbJW@klvOFos$7jo%jE|bp{()m(854pBl$A(Hekk>_8boFd(+2CcK?Geqd&BvP-_q7n{&Fx&fp_1TnIV!j^Q7p!rtcutXG_RhH z?!K;`c+)C>!nstbo@41cj?HW7t%$Hh)BcEhkD?Q)dJ`dcMc1Om3sBxfQ=qbN^z1|m zJ@lyFOGd&$n7YsF?;W?F|X^y616VIVYYi6yrHKC8nxUPV>v67u&w1JPMd?lDWxPeq?aP+9H^0y zn4}7e@l=*4RmxH!E9OIZRuzj$WaMcS{7bQ1NP;OO=n<+X`p8i~SG3Yhv zE6VfqE8!utl$Bn|mmFn~pF%Svzf=L0DWHHF1CdDQJ%LcKNS4W%c2g5Q3q>ngK%x&fPjNw>o{bp(l{%1-+@?@g`@R2@DF&i zG+q=Q$eCr*GCNPgp4O-wM~MkZH!{dw8s%w%=k~mM;*7Mxa)%z}3N?n@p`2NX9RYp8 z+l+5j8EaM6J^FqrpUKAMn`C9;IQ3y=CWk!WeFn?&tN_R2t0Kl?iS^y6xLiO*HJ!m6 zt#Q*%MJkNRSCm6>>igz4wDFsE>QBCA{|7o0p-@<1{tpPUh*JC>(f1cTb5|qfv>eeeC=Hm z&mxH|wxda9;W#duvO&|3*whVv7{YzeCz%A3*gLS_aZpghgIq)~>i`cXY_Qb?vkq|0 zdIBq99m=IsHP@kfMXesVlpM}Ekn3Q229G=VIWQhCPJmTXWL(S|RvE2d*t*Qiu3_ps zaG7#3TE?Y=GPE4)YL+~im1whdeV%x33wK=f+-flQ+Ss^g#v}NUlam~B2r5TUGCi$i zGExN07^=q#m?>F@oTDzw5##0Tx`^?~MxZIfimLjyH)%N%mI}}wSDBLqA>JI~_7ozc zAVOwG-7L8oY^A`8PW?Rfd6KEB2hqaZ;SV=pk(mZiX#;(q3+NEJznlY#+(XcK`ofR@sW@DH7=m$S#8QHA zX$zGF!a>#4&dm$q#lI3ch)(vPcvK=I)MG%g-m_Z%EJGgqEE(@bCXG{Za{||soJsYK;w&k%Od85@hV0};*y_bWI>Z* z0#p{SAwflU3q@5MY|uSo6f=;VO0`N*AEhcXyjWXg8W<8Pz`lc+T002!SD-(OF)NE< z7cF6Y5jC}5n+ZXdbOH<4c&QX~$FJFgs)C}_V=%uwr6lgr_| zBui@}!VpwsCsjZ$lD0f8gF*}=r8w1Lcy9-3Els4uPFX+@g9}UGVr_{=1%y+8nRzV5 zVn~y!*2oBgVmlli+Dc$?tst|8{{gNAV!>X_GC^uAW`JwGN^w06VK)v#lQ>zCM8a;i zH8%zc23310+F)Gsuj)=pGZ-u2K+G~6313<)g$ah23>8SD(dZS2E_MJkv9^CHN+noe zi)`YHC}7dXu~G%{$S>i^i>Q4;6l3O`=1~dVFXDC!>QNn_`Btwev&(|~H#4>y64l4G z$`Xr~JlZhT0(vabTYlU2)RAs#O(34pBB-T0ZQVA{q%%YyT2f>kd3+1oa~&+$U7^+q z5_b14RfAhONb^hx4q4N|151a*HB>RLdI~Ba&0W}tCpuk2oL%WQX0z5=|MV*&1uFR#tv`e^8&mG~|ayaJ#6=bzm z(djsq6!Va&)-SsoQ?tCzT9R^QSP;TQSH(|Ctip?*EdDAiFwp`^X9r_Rhi7SM!|O@k zU}^#Cic~BqG{w4X2npp9strFiV>xoTVVw(sCp#~D!jr~cQ)yI3u^|aLBE?}j)_jGO zMRYQcAPd?tDMo{0#aBfL$)Q;}6h%$#+N;fFOmom0$RQQh$LX~lpM;h{>x*HAY(1-$O{Bx~dfy#1XBlLR$uuw$ z65^l^B63iw-kB^PqE?Q|u7m2`7HME6t;v$84NR6cG9|Ra2F=i>69qM4{o#YTz)=!% z8QP%~pw08~G?i~+gn~T*BxFU#Qe>J-$N7)~(~k^^=H%HTj?<*y+ES;q9;k?3VXAke z!!AsbKLXF&6^=^KpwLah4hb7m;t?{apyJLfyjCzT0>vDSq7%Ukw98;g*fU6lLY~d) zWFwWKy3Wx_z%hHgi~^8=Nf~^A42OicWvgKYvO~MQLR1;Jr&A4$CM&V09psS^x6~vJ zF&V1bxv+9&F#9Y@XFUnx%pk-p3a|5rG}foBfa-a7EYuVYUdgeNe9 zgmx?$RuHw)!jn7GlFB0VEFM||*eYTz1}Gv(NQM$i8V?_^DI;$s_E^?f)m1O4 ztPBKUaat>F@%*q=rYyQoZh=h%NW>~Iw+ki$wDIL&Ec?SA_nvWLF|hDJ&%ru-FkF8E zX8W_TgBNtlLwm8rzF3ifCrS?o!N6dA;4@8FjM!})s@QGcL~y`%Kuq)fE&V|#pz8}w zDZs(`#es0`>;_3I4t&qE&Iy8ilxOg{UH!AxV#~UNU@LOO8sJ)F@j((nq!!>>B!^t; zPf&vcCJ`hey-N~SKU^2;t&SX#@Ikl(C%J}EnL-0aFp4Pg8hAT}jcTfGqg2$djQ7mr>$FcGp~^ZdDB_U` zLC8xo3Q@TX4_BiOaB|%3XtkyP0*j!IiwJ(@B-3(OY1S-`S}9GjOcT$lG5SVh#Y(Ay zf=|^J39ODhYLU11k2=TJOz2koEy-2Q5HTOPJT3kOyJm^qit*SHP)NXFgE21HZ3A6+ z*}zwnE8^3)?$5V)5tw6N?(G!5+x!@6$*-Z zB?1NSe6Y-mkBYka2w0@0ssJ0kIZIdtsPLvDVd?x#XB8yW3p`U4M?$?I8*c>5Fm2`~ zWNoPVAfAzswIO+rkPB8u$j4SLDhSYJmz9{d-5)|?+M1+v0%&PcxcLDYR8-Vqukys8 zB3n#u`F!$=2!JOd9!naE&9wr|lSUPh2=t*4-bt1sEFEWQg@Nx@`xgNsG0!k|mN* z_)|h@b+D$xVRda=k@mVqXsM#4BROhoK?7~~&|qyKt%cz|X{gf{KA5|vQ^KqiF4{gz zHmp=Ar)xkA+-{i^C-M?4Jn<^Ypi1PaHIZXVX9;=}fJAWV6ZEI8L`gBd9ZG?(_EYMR zgvdZ*IpP%(I?J%fb3BSl2*i#?IQb1`l#9x9qHD&7FL7W0x70NzKXoNkS zlS5?I=!0TFUCQ%8R0>x}XfPd?bIF?|gro=JW?Kho@&uo4GXg)J=)g9!`vL{98RA!k znc5pDt*#Wg>#SM}u04lmY|Tfpr-nhad{EBxNyx-FDM5d6v<1}O*3ob{ey<-GgRoSoy12r5l17sIwhz#lWCNzifrQwrI8aW? z9!Nu^Osqf{tV1xnvBpZ(6tVK54Vj2Uq(~Colwk8`y= z>D3s8tcoe6zH%7${swGM*m&ClAAldwd6h7^UK2&`wwVH5rfI`=o2Z>rF|>wda$22O zYMC&mNJ3hbr=uKdLvb2m@qsQ;g#>&{)2)_o5==R*!#ijsAr(hbk~RtXAn{(EAT2&P zSFmGPW0YEnK`O?n@u7P~^15=$F334qm`4wIOs5vuUN)D+Cbk#M z$uHAsihN+fMkrK4JQ)Ncw?#s}No05@%jpdB^D>DT2=GPOfR{4%wJZm$gg!Bfx<-^^3c4#nUpXvgHx zP90c_Y8&eosjq8p_>#l6(mE7U9tkx?9kiN|u(yjk<_Qh81IRb( z3c_nAz00F_I@~3}e017uO+!8pLuKabQYM#|GmyUPyi^X-a>06O9X06ylOmPNE!m35 zinRz{=g~d&gShx$K(AQtTg4e9t2<7qsI4Z3F|^fQZAYl)Ftx1bDoUnODO~M9+NSZfW^im2RUY{h?r@|^*gh3@4 zT*HyDD=l=TJhLK4O{G4FfCLMvJ#M$PQ3B5EWbstOjq5|^!YuW+AA?X(JE)*r1^!ARDxX#XRjfBWs0vvhO=+--8O+ z%WF9#L{y-y-rSIuNutWj!?s!)N_7#bs1vp@eSN50Q-f?KgAQmyMd*@>*w-C2L0pl9 zV&7j<2n^G$0BNq+%o|4aAfLJ=v`n#?rx~1OVmEAXtoTW2$+hnlXcffY+TFNfB_awI zKZH*NX+s9ymc(mn3&5MucIg%67$}P=!49=Z;6qu*S?3W-8t0YaZDs6NW!4t0IM61$ z)xb&F*BW3{4o_>(Pcbis?#O(78UwslXAn+KW*u2^96SK8~5{ zJiA~+FzUho0z}lvxA)|XB^vZvNT52w0YH!+bf|L|yYQh}dk?cv3`v9?nB_jotb-~q z%FrsO_%pAWLY7u-h1^QZ#`UH~iHH$K>Oh?XNvM(a=rkQe#FHXl0oEi1^qT1zG^ z=fDu!Me8U&E7~y@U1DHzx+!Fq;s%{*=4#C839}&~{mF|c*rGeOAhgz-J4zQAEeNyo zGDvg*9|;F2@V0-1IMm6)6*VAH+rQyw6y~X_y}80Q+0_`mvT?W19E6zF{Ik+}m`R|d zCu&;+i}WIzWhLK7fJJ(rNsT~!Ft|F>+=-uX_j=!1#mIngOZrM{#jASxUYu4XYpU|W z^ceVJxeV*<=uhmY69p@LprIH72}wu-ltmYQ>be1RuBrEKMoJbZp#VB<*d~ z&nA}Tk_!W`KGbX5tzr`JPn%?|?IdKey4Fx=mqUW}X)VQT_^bP-NXGx(gBsayGOVe6 zi6r}hzY^hnZ=~G9mq7RqLvJFjUio&zDV==jM&I|TeIsJ}sde9Z(3c(fYQ&1Jk9BtF z+c}ApzN19Dz2DrZBXx5R-}Ol*5OD2Zg8RBmF43Nd^No^PehR|%FVUHZ=My@DyI;%H=VUMiQ!`jxr5;hQk;r7BC9 zw@)6> +endobj + +2 0 obj +<< /Type /Pages + /Kids [3 0 R] + /Count 1 +>> +endobj + +3 0 obj +<< /Type /Page + /Parent 2 0 R + /MediaBox [0 0 200 200] + /Resources << + /Font << /F1 4 0 R >> + >> + /Contents 5 0 R +>> +endobj + +4 0 obj +<< /Type /Font + /Subtype /Type3 + /Name /F1 + /FontBBox [0 0 500 500] + /FontMatrix [0.001 0 0 0.001 0 0] + /Encoding << /Differences [32 /A 33 /A] >> + /CharProcs << /A 6 0 R >> + /FirstChar 32 + /LastChar 33 + /Widths [500 500] + /Resources << >> +>> +endobj + +5 0 obj +<< /Length 45 >> +stream +BT +/F1 20 Tf +50 Tw +100 100 Td +<212021> Tj +ET +endstream +endobj + +6 0 obj +<< /Length 77 >> +stream +500 0 d0 +50 50 50 400 re +400 50 50 400 re +50 400 400 50 re +50 200 400 50 re +f +endstream +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000062 00000 n +0000000126 00000 n +0000000275 00000 n +0000000554 00000 n +0000000650 00000 n +trailer +<< /Size 7 + /Root 1 0 R +>> +startxref +778 +%%EOF diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 648fe309e..a006bb226 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -4027,6 +4027,37 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) expect(items[1].fontName).not.toEqual(items[0].fontName); }); + it("gets text content with word spacing (issue 20319)", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("issue20319_1.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); + const text = mergeText(items); + + expect(text).toEqual("A A"); + + await loadingTask.destroy(); + }); + + it("gets text content with word spacing and a fake space (issue 20319)", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("issue20319_2.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); + const text = mergeText(items); + expect(text).toEqual("AA A"); + + await loadingTask.destroy(); + }); + it("gets empty structure tree", async function () { const tree = await page.getStructTree();