From b97cee2ab54e61f9a916ff96929da3848c67f93f Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Thu, 5 Jun 2025 17:03:06 +0200 Subject: [PATCH] Correctly handle files with a hash sign in their names (bug 1894166) It fixes #19990. --- src/display/display_utils.js | 91 ++++++++++++++++++++++++------- test/integration/viewer_spec.mjs | 24 ++++++++ test/pdfs/.gitignore | 1 + test/pdfs/empty#hash.pdf | Bin 0 -> 4920 bytes test/unit/display_utils_spec.js | 14 +++++ web/app.js | 3 +- 6 files changed, 112 insertions(+), 21 deletions(-) create mode 100755 test/pdfs/empty#hash.pdf diff --git a/src/display/display_utils.js b/src/display/display_utils.js index c724b16e3..d6ed53da3 100644 --- a/src/display/display_utils.js +++ b/src/display/display_utils.js @@ -338,31 +338,82 @@ function getPdfFilenameFromUrl(url, defaultFilename = "document.pdf") { warn('getPdfFilenameFromUrl: ignore "data:"-URL for performance reasons.'); return defaultFilename; } - const reURI = /^(?:(?:[^:]+:)?\/\/[^/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/; - // SCHEME HOST 1.PATH 2.QUERY 3.REF - // Pattern to get last matching NAME.pdf - const reFilename = /[^/?#=]+\.pdf\b(?!.*\.pdf\b)/i; - const splitURI = reURI.exec(url); - let suggestedFilename = - reFilename.exec(splitURI[1]) || - reFilename.exec(splitURI[2]) || - reFilename.exec(splitURI[3]); - if (suggestedFilename) { - suggestedFilename = suggestedFilename[0]; - if (suggestedFilename.includes("%")) { - // URL-encoded %2Fpath%2Fto%2Ffile.pdf should be file.pdf + + const getURL = urlString => { + try { + return new URL(urlString); + } catch { try { - suggestedFilename = reFilename.exec( - decodeURIComponent(suggestedFilename) - )[0]; + return new URL(decodeURIComponent(urlString)); } catch { - // Possible (extremely rare) errors: - // URIError "Malformed URI", e.g. for "%AA.pdf" - // TypeError "null has no properties", e.g. for "%2F.pdf" + try { + // Attempt to parse the URL using the document's base URI. + return new URL(urlString, "https://foo.bar"); + } catch { + try { + return new URL(decodeURIComponent(urlString), "https://foo.bar"); + } catch { + return null; + } + } + } + } + }; + + const newURL = getURL(url); + if (!newURL) { + // If the URL is invalid, return the default filename. + return defaultFilename; + } + + const decode = name => { + try { + let decoded = decodeURIComponent(name); + if (decoded.includes("/")) { + decoded = decoded.split("/").at(-1); + if (decoded.test(/^\.pdf$/i)) { + return decoded; + } + return name; + } + return decoded; + } catch { + return name; + } + }; + + const pdfRegex = /\.pdf$/i; + const filename = newURL.pathname.split("/").at(-1); + if (pdfRegex.test(filename)) { + return decode(filename); + } + + if (newURL.searchParams.size > 0) { + const values = Array.from(newURL.searchParams.values()).reverse(); + for (const value of values) { + if (pdfRegex.test(value)) { + // If any of the search parameters ends with ".pdf", return it. + return decode(value); + } + } + const keys = Array.from(newURL.searchParams.keys()).reverse(); + for (const key of keys) { + if (pdfRegex.test(key)) { + // If any of the search parameter keys ends with ".pdf", return it. + return decode(key); } } } - return suggestedFilename || defaultFilename; + + if (newURL.hash) { + const reFilename = /[^/?#=]+\.pdf\b(?!.*\.pdf\b)/i; + const hashFilename = reFilename.exec(newURL.hash); + if (hashFilename) { + return decode(hashFilename[0]); + } + } + + return defaultFilename; } class StatTimer { diff --git a/test/integration/viewer_spec.mjs b/test/integration/viewer_spec.mjs index 40634dc0d..507d15c41 100644 --- a/test/integration/viewer_spec.mjs +++ b/test/integration/viewer_spec.mjs @@ -1220,4 +1220,28 @@ describe("PDF viewer", () => { ); }); }); + + describe("Filename with a hash sign", () => { + let pages; + + beforeEach(async () => { + pages = await loadAndWait("empty%23hash.pdf", ".textLayer .endOfContent"); + }); + + afterEach(async () => { + await closePages(pages); + }); + + it("must extract the filename correctly", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + const filename = await page.evaluate(() => document.title); + + expect(filename) + .withContext(`In ${browserName}`) + .toBe("empty#hash.pdf"); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index e1a6e57ad..cac180c7f 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -726,3 +726,4 @@ !chrome-text-selection-markedContent.pdf !bug1963407.pdf !issue19517.pdf +!empty#hash.pdf diff --git a/test/pdfs/empty#hash.pdf b/test/pdfs/empty#hash.pdf new file mode 100755 index 0000000000000000000000000000000000000000..d6d144f529bc8bc26eb2e3248acd70a0ba3bf67f GIT binary patch literal 4920 zcmeHLZEO=|93L=VEUUr9Aexvw7N#J(KF@P^z3xg^=-M@GuB>d;p_90KdAb$aUUR*& zZoxQ=L`ZplP1-}C!F?f+jN3r8627M%5$e?I?}(}6KSK$EvP{eIY<%n(UW4iOb#*baD+MX&>4 z4z>WVz{3snZ5;3d_QLg`2k;!GYj|@Y;3OHVvQP2opt9NlenyIqET_rSj=a z4sbB06E&$Anhr1<2*5}(We^=kQj$SRDzG)&XaFM`UnguQ znLeW*aL5d1o0(`M?9?=aMpRtyqR0R&!1!R6z)hVb0lRxJplcL@oS_pb?R4~Cl4W&C zuiV=y%e}u!^6!&-ckdW~2z*j=ADTPnbWo(qqGil-hNK$>og|zN;jx{OF+H7y|ssatOK#nhws_6tnrn?o>*P?;w$Lp4#%Z= ztNWJlJAOMlr*>J**bnm;*hE;&zRZv)F(g$oBr|J@SV!JSQO@LmQ2ZK``slM*d@h0% zqM|9SugVb2RnrOSTDDm$^dL8dpMC_)(z{or_suX$*&y9Qm3Il9+;1y9U2Qp$5BLml##E+cJ)mF+??)QyhCM`9!V zQzOU52hPE>t2d&FfiD{#ch6~iaAoZ;H&z_q|HvEjkNol1+x6o9!6PTPZCmHux9)wg zfB3|q_`wa_uF*5E9fR{@ahrWIx1>_YWhS!-!LGb)6bTz`FQro1!)h}AwS}ytY$XPe zNnbM4swLU0Aln(*st!q*v&l>O0fO|E{!$oe207#MI5aBfZSgIE+rh*-$743%JV zgBe3AFqz!)l97s?Fv%%;GHWEY3^46VvX(cRT&}6&Gkup6OJ}DXWpeIfl5Ryy!-ABB z*o~l_lB!JUWb=Be7@Dd;k|Jr!-5k}!wt5uPXmm=|)?GSntC_Xnaf=>Zpzb(01%QbG z_2Y|bYBDhxrUer)!Do=bU?Gk~ABTNBw;HiNg#2)tWEG&IjSxNf=oxKmgB7hdERxe~ zR48e0Wm+WVBd91l%}&vSrpGlc6(}(-sOXw386c)>V5Pt_a?)4@Sk8^zEW%#Cl$=$~ zVo_Cm2~AH+MnKADQ%OZKxdJJeepp?y$QO6c7uJ-#xx=mDKt7*TeF-_ysLGPSsF)}W z{m~?hgo%eC3NIrpvXY2|Vi?h=RrfZ#tr_}Lkx@v+u81)bJ*<~e6NoT8!W<(aSz)Av zz$-{4s)v^=b}hOVyEGFY#YN34ME|#N-J(;r<%eedSn8=%ti`vqTIopvr;du^l37`I z^{OU$7v0P{nGv|NaLvLsBLrq7Jj&4H_H6Q8`>K5czreBJaK+(wmVW=8*#ZCL zS$xzc7|ZzLj=^}UZ6p4tXjnd03dh|RZ8&x2e6F>j_3P|0mj3@15XW;Pyugo62&mJG zV&A^>?rC@-+kUE}?a6Gr`*cTJ!^TA`Pj~McN8EVybi?lPIAXuyCO8)H-tmdavmKwx zhj!zO;e(f6zi=gfdGCiG%v*$NmYhP**<@vzEZ7fa`2Tk(bK4+>w;R9E5!AHti#dU^ zj$dl#J{mc^wywQy&*X6JwWpW7_dxjBd2M4i?o}~D#}^%bVcm