From abc9522886d230f983208b586ddb09cdaa260844 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 25 Apr 2025 18:14:22 +0200 Subject: [PATCH] Avoid (most) string parsing when removing/replacing the hash property of a URL --- extensions/chromium/extension-router.js | 1 + src/core/catalog.js | 3 +++ src/display/filter_factory.js | 4 ++-- src/pdf.js | 3 +++ src/shared/util.js | 23 +++++++++++++++++++++++ test/unit/pdf_spec.js | 2 ++ web/app.js | 15 ++++++++++++--- web/app_options.js | 8 +++++++- web/generic_scripting.js | 2 +- web/pdf_history.js | 8 ++++---- web/pdfjs.js | 2 ++ 11 files changed, 60 insertions(+), 11 deletions(-) diff --git a/extensions/chromium/extension-router.js b/extensions/chromium/extension-router.js index 5bbb0d1a5..f9720e12b 100644 --- a/extensions/chromium/extension-router.js +++ b/extensions/chromium/extension-router.js @@ -46,6 +46,7 @@ limitations under the License. } var scheme = url.slice(0, schemeIndex).toLowerCase(); if (schemes.includes(scheme)) { + // NOTE: We cannot use the `updateUrlHash` function in this context. url = url.split("#", 1)[0]; if (url.charAt(schemeIndex) === ":") { url = encodeURIComponent(url); diff --git a/src/core/catalog.js b/src/core/catalog.js index e8181d224..6245bfae3 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1607,6 +1607,9 @@ class Catalog { // NOTE: the destination is relative to the *remote* document. const remoteDest = fetchRemoteDest(action); if (remoteDest && typeof url === "string") { + // NOTE: We don't use the `updateUrlHash` function here, since + // the `createValidAbsoluteUrl` function (see below) already + // handles parsing and validation of the final URL. url = /* baseUrl = */ url.split("#", 1)[0] + "#" + remoteDest; } // The 'NewWindow' property, equal to `LinkTarget.BLANK`. diff --git a/src/display/filter_factory.js b/src/display/filter_factory.js index 6b844db35..acec5d409 100644 --- a/src/display/filter_factory.js +++ b/src/display/filter_factory.js @@ -14,7 +14,7 @@ */ import { getRGB, isDataScheme, SVG_NS } from "./display_utils.js"; -import { unreachable, Util, warn } from "../shared/util.js"; +import { unreachable, updateUrlHash, Util, warn } from "../shared/util.js"; class BaseFilterFactory { constructor() { @@ -143,7 +143,7 @@ class DOMFilterFactory extends BaseFilterFactory { if (isDataScheme(url)) { warn('#createUrl: ignore "data:"-URL for performance reasons.'); } else { - this.#baseUrl = url.split("#", 1)[0]; + this.#baseUrl = updateUrlHash(url, ""); } } } diff --git a/src/pdf.js b/src/pdf.js index 2a3acbaaf..d707b01ce 100644 --- a/src/pdf.js +++ b/src/pdf.js @@ -40,6 +40,7 @@ import { PermissionFlag, ResponseException, shadow, + updateUrlHash, Util, VerbosityLevel, } from "./shared/util.js"; @@ -140,6 +141,7 @@ globalThis.pdfjsLib = { SupportedImageMimeTypes, TextLayer, TouchManager, + updateUrlHash, Util, VerbosityLevel, version, @@ -193,6 +195,7 @@ export { SupportedImageMimeTypes, TextLayer, TouchManager, + updateUrlHash, Util, VerbosityLevel, version, diff --git a/src/shared/util.js b/src/shared/util.js index 5d3f10f25..a0eaf7c01 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -445,6 +445,28 @@ function createValidAbsoluteUrl(url, baseUrl = null, options = null) { return _isValidProtocol(absoluteUrl) ? absoluteUrl : null; } +/** + * Remove, or replace, the hash property of the URL. + * + * @param {URL|string} url - The absolute, or relative, URL. + * @param {string} hash - The hash property (use an empty string to remove it). + * @param {boolean} [allowRel] - Allow relative URLs. + * @returns {string} The resulting URL string. + */ +function updateUrlHash(url, hash, allowRel = false) { + const res = URL.parse(url); + if (res) { + res.hash = hash; + return res.href; + } + // Support well-formed relative URLs, necessary for `web/app.js` in GENERIC + // builds, by optionally falling back to string parsing. + if (allowRel && createValidAbsoluteUrl(url, "http://example.com")) { + return url.split("#", 1)[0] + `${hash ? `#${hash}` : ""}`; + } + return ""; +} + function shadow(obj, prop, value, nonSerializable = false) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( @@ -1319,6 +1341,7 @@ export { toHexUtil, UnknownErrorException, unreachable, + updateUrlHash, utf8StringToString, Util, VerbosityLevel, diff --git a/test/unit/pdf_spec.js b/test/unit/pdf_spec.js index 714753502..b594eb83d 100644 --- a/test/unit/pdf_spec.js +++ b/test/unit/pdf_spec.js @@ -31,6 +31,7 @@ import { PermissionFlag, ResponseException, shadow, + updateUrlHash, Util, VerbosityLevel, } from "../../src/shared/util.js"; @@ -117,6 +118,7 @@ const expectedAPI = Object.freeze({ SupportedImageMimeTypes, TextLayer, TouchManager, + updateUrlHash, Util, VerbosityLevel, version, diff --git a/web/app.js b/web/app.js index 597365a7b..1e245ec38 100644 --- a/web/app.js +++ b/web/app.js @@ -57,6 +57,7 @@ import { shadow, stopEvent, TouchManager, + updateUrlHash, version, } from "pdfjs-lib"; import { AppOptions, OptionKind } from "./app_options.js"; @@ -943,10 +944,18 @@ const PDFViewerApplication = { setTitleUsingUrl(url = "", downloadUrl = null) { this.url = url; - this.baseUrl = url.split("#", 1)[0]; + this.baseUrl = + typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC") + ? updateUrlHash(url, "", /* allowRel = */ true) + : updateUrlHash(url, ""); if (downloadUrl) { this._downloadUrl = - downloadUrl === url ? this.baseUrl : downloadUrl.split("#", 1)[0]; + // eslint-disable-next-line no-nested-ternary + downloadUrl === url + ? this.baseUrl + : typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC") + ? updateUrlHash(downloadUrl, "", /* allowRel = */ true) + : updateUrlHash(downloadUrl, ""); } if (isDataScheme(url)) { this._hideViewBookmark(); @@ -1309,7 +1318,7 @@ const PDFViewerApplication = { this.secondaryToolbar?.setPagesCount(pdfDocument.numPages); if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("CHROME")) { - const baseUrl = location.href.split("#", 1)[0]; + const baseUrl = updateUrlHash(location, ""); // Ignore "data:"-URLs for performance reasons, even though it may cause // internal links to not work perfectly in all cases (see bug 1803050). this.pdfLinkService.setDocument( diff --git a/web/app_options.js b/web/app_options.js index 133df8dae..df4b6a57b 100644 --- a/web/app_options.js +++ b/web/app_options.js @@ -393,7 +393,13 @@ const defaultOptions = { }, docBaseUrl: { /** @type {string} */ - value: typeof PDFJSDev === "undefined" ? document.URL.split("#", 1)[0] : "", + value: + typeof PDFJSDev === "undefined" + ? // NOTE: We cannot use the `updateUrlHash` function here, because of + // the default preferences generation (see `gulpfile.mjs`). + // However, the following line is *only* used in development mode. + document.URL.split("#", 1)[0] + : "", kind: OptionKind.API, }, enableHWA: { diff --git a/web/generic_scripting.js b/web/generic_scripting.js index fe3f4261a..cf40f541e 100644 --- a/web/generic_scripting.js +++ b/web/generic_scripting.js @@ -17,7 +17,7 @@ import { getPdfFilenameFromUrl } from "pdfjs-lib"; async function docProperties(pdfDocument) { const url = "", - baseUrl = url.split("#", 1)[0]; + baseUrl = ""; const { info, metadata, contentDispositionFilename, contentLength } = await pdfDocument.getMetadata(); diff --git a/web/pdf_history.js b/web/pdf_history.js index 6c965ffbb..bed96f370 100644 --- a/web/pdf_history.js +++ b/web/pdf_history.js @@ -17,6 +17,7 @@ /** @typedef {import("./interfaces").IPDFLinkService} IPDFLinkService */ import { isValidRotation, parseQueryString } from "./ui_utils.js"; +import { updateUrlHash } from "pdfjs-lib"; import { waitOnEventOrTimeout } from "./event_utils.js"; // Heuristic value used when force-resetting `this._blockHashChange`. @@ -383,10 +384,9 @@ class PDFHistory { let newUrl; if (this._updateUrl && destination?.hash) { - const baseUrl = document.location.href.split("#", 1)[0]; - // Prevent errors in Firefox. - if (!baseUrl.startsWith("file://")) { - newUrl = `${baseUrl}#${destination.hash}`; + const { href, protocol } = document.location; + if (protocol !== "file:") { + newUrl = updateUrlHash(href, destination.hash); } } if (shouldReplace) { diff --git a/web/pdfjs.js b/web/pdfjs.js index 5ba83c141..7634d2c14 100644 --- a/web/pdfjs.js +++ b/web/pdfjs.js @@ -60,6 +60,7 @@ const { SupportedImageMimeTypes, TextLayer, TouchManager, + updateUrlHash, Util, VerbosityLevel, version, @@ -113,6 +114,7 @@ export { SupportedImageMimeTypes, TextLayer, TouchManager, + updateUrlHash, Util, VerbosityLevel, version,