diff --git a/src/display/display_utils.js b/src/display/display_utils.js index c724b16e3..d6ed53da3 100644 --- a/src/display/display_utils.js +++ b/src/display/display_utils.js @@ -338,31 +338,82 @@ function getPdfFilenameFromUrl(url, defaultFilename = "document.pdf") { warn('getPdfFilenameFromUrl: ignore "data:"-URL for performance reasons.'); return defaultFilename; } - const reURI = /^(?:(?:[^:]+:)?\/\/[^/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/; - // SCHEME HOST 1.PATH 2.QUERY 3.REF - // Pattern to get last matching NAME.pdf - const reFilename = /[^/?#=]+\.pdf\b(?!.*\.pdf\b)/i; - const splitURI = reURI.exec(url); - let suggestedFilename = - reFilename.exec(splitURI[1]) || - reFilename.exec(splitURI[2]) || - reFilename.exec(splitURI[3]); - if (suggestedFilename) { - suggestedFilename = suggestedFilename[0]; - if (suggestedFilename.includes("%")) { - // URL-encoded %2Fpath%2Fto%2Ffile.pdf should be file.pdf + + const getURL = urlString => { + try { + return new URL(urlString); + } catch { try { - suggestedFilename = reFilename.exec( - decodeURIComponent(suggestedFilename) - )[0]; + return new URL(decodeURIComponent(urlString)); } catch { - // Possible (extremely rare) errors: - // URIError "Malformed URI", e.g. for "%AA.pdf" - // TypeError "null has no properties", e.g. for "%2F.pdf" + try { + // Attempt to parse the URL using the document's base URI. + return new URL(urlString, "https://foo.bar"); + } catch { + try { + return new URL(decodeURIComponent(urlString), "https://foo.bar"); + } catch { + return null; + } + } + } + } + }; + + const newURL = getURL(url); + if (!newURL) { + // If the URL is invalid, return the default filename. + return defaultFilename; + } + + const decode = name => { + try { + let decoded = decodeURIComponent(name); + if (decoded.includes("/")) { + decoded = decoded.split("/").at(-1); + if (decoded.test(/^\.pdf$/i)) { + return decoded; + } + return name; + } + return decoded; + } catch { + return name; + } + }; + + const pdfRegex = /\.pdf$/i; + const filename = newURL.pathname.split("/").at(-1); + if (pdfRegex.test(filename)) { + return decode(filename); + } + + if (newURL.searchParams.size > 0) { + const values = Array.from(newURL.searchParams.values()).reverse(); + for (const value of values) { + if (pdfRegex.test(value)) { + // If any of the search parameters ends with ".pdf", return it. + return decode(value); + } + } + const keys = Array.from(newURL.searchParams.keys()).reverse(); + for (const key of keys) { + if (pdfRegex.test(key)) { + // If any of the search parameter keys ends with ".pdf", return it. + return decode(key); } } } - return suggestedFilename || defaultFilename; + + if (newURL.hash) { + const reFilename = /[^/?#=]+\.pdf\b(?!.*\.pdf\b)/i; + const hashFilename = reFilename.exec(newURL.hash); + if (hashFilename) { + return decode(hashFilename[0]); + } + } + + return defaultFilename; } class StatTimer { diff --git a/test/integration/viewer_spec.mjs b/test/integration/viewer_spec.mjs index 40634dc0d..507d15c41 100644 --- a/test/integration/viewer_spec.mjs +++ b/test/integration/viewer_spec.mjs @@ -1220,4 +1220,28 @@ describe("PDF viewer", () => { ); }); }); + + describe("Filename with a hash sign", () => { + let pages; + + beforeEach(async () => { + pages = await loadAndWait("empty%23hash.pdf", ".textLayer .endOfContent"); + }); + + afterEach(async () => { + await closePages(pages); + }); + + it("must extract the filename correctly", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + const filename = await page.evaluate(() => document.title); + + expect(filename) + .withContext(`In ${browserName}`) + .toBe("empty#hash.pdf"); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index e1a6e57ad..cac180c7f 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -726,3 +726,4 @@ !chrome-text-selection-markedContent.pdf !bug1963407.pdf !issue19517.pdf +!empty#hash.pdf diff --git a/test/pdfs/empty#hash.pdf b/test/pdfs/empty#hash.pdf new file mode 100755 index 000000000..d6d144f52 Binary files /dev/null and b/test/pdfs/empty#hash.pdf differ diff --git a/test/unit/display_utils_spec.js b/test/unit/display_utils_spec.js index eef734a83..8429c1129 100644 --- a/test/unit/display_utils_spec.js +++ b/test/unit/display_utils_spec.js @@ -193,6 +193,20 @@ describe("display_utils", function () { "document.pdf" ); }); + + it("gets PDF filename with a hash sign", function () { + expect(getPdfFilenameFromUrl("/foo.html?file=foo%23.pdf")).toEqual( + "foo#.pdf" + ); + + expect(getPdfFilenameFromUrl("/foo.html?file=%23.pdf")).toEqual("#.pdf"); + + expect(getPdfFilenameFromUrl("/foo.html?foo%23.pdf")).toEqual("foo#.pdf"); + + expect(getPdfFilenameFromUrl("/foo%23.pdf?a=b#c")).toEqual("foo#.pdf"); + + expect(getPdfFilenameFromUrl("foo.html#%23.pdf")).toEqual("#.pdf"); + }); }); describe("isValidFetchUrl", function () { diff --git a/web/app.js b/web/app.js index dea04724d..5978a5afc 100644 --- a/web/app.js +++ b/web/app.js @@ -727,6 +727,7 @@ const PDFViewerApplication = { const queryString = document.location.search.substring(1); const params = parseQueryString(queryString); file = params.get("file") ?? AppOptions.get("defaultUrl"); + file = encodeURIComponent(file).replaceAll("%2F", "/"); validateFileURL(file); } else if (PDFJSDev.test("MOZCENTRAL")) { file = window.location.href; @@ -2336,7 +2337,7 @@ if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { this.open({ url: URL.createObjectURL(file), - originalUrl: file.name, + originalUrl: encodeURIComponent(file.name), }); };