From 2761f78768600c7dc3feef395c462f1a60afae1a Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 7 Mar 2025 21:00:30 +0100 Subject: [PATCH] [enh] Static file with hashes --- client/simple/src/js/main/mapresult.js | 4 +- client/simple/src/less/index.less | 2 +- client/simple/vite.config.js | 171 ++++++++++++++++++++++++- searx/settings.yml | 2 - searx/settings_defaults.py | 1 - searx/webapp.py | 31 +++-- searx/webutils.py | 55 +++++--- tests/unit/test_webapp.py | 3 - utils/lib_sxng_static.sh | 1 + 9 files changed, 226 insertions(+), 44 deletions(-) diff --git a/client/simple/src/js/main/mapresult.js b/client/simple/src/js/main/mapresult.js index 2c3777678..9b703d804 100644 --- a/client/simple/src/js/main/mapresult.js +++ b/client/simple/src/js/main/mapresult.js @@ -16,8 +16,8 @@ var map_boundingbox = JSON.parse(this.dataset.mapBoundingbox); var map_geojson = JSON.parse(this.dataset.mapGeojson); - searxng.loadStyle('css/leaflet.css'); - searxng.loadScript('js/leaflet.js', function () { + searxng.loadStyle('css/leaflet.SEARXNG_HASH.css'); + searxng.loadScript('js/leaflet.SEARXNG_HASH.js', function () { var map_bounds = null; if (map_boundingbox) { var southWest = L.latLng(map_boundingbox[0], map_boundingbox[2]); diff --git a/client/simple/src/less/index.less b/client/simple/src/less/index.less index 6316b4ed6..1ebe8997f 100644 --- a/client/simple/src/less/index.less +++ b/client/simple/src/less/index.less @@ -6,7 +6,7 @@ text-align: center; .title { - background: url("../img/searxng.png") no-repeat; + background: url("../img/searxng.SEARXNG_HASH.png") no-repeat; min-height: 4rem; margin: 4rem auto; background-position: center; diff --git a/client/simple/vite.config.js b/client/simple/vite.config.js index f8d3e6757..162999e6f 100644 --- a/client/simple/vite.config.js +++ b/client/simple/vite.config.js @@ -2,12 +2,16 @@ * CONFIG: https://vite.dev/config/ */ -import { resolve } from "node:path"; +import { resolve, relative } from "node:path"; +import { Buffer } from 'buffer'; +import path from 'path'; import { defineConfig } from "vite"; import stylelint from "vite-plugin-stylelint"; import { viteStaticCopy } from "vite-plugin-static-copy"; import { plg_svg2png } from "./tools/plg.js"; import { plg_svg2svg } from "./tools/plg.js"; +import fs from 'node:fs/promises'; +import crypto from 'node:crypto'; const ROOT = "../.."; // root of the git reposetory @@ -40,6 +44,155 @@ const svg2svg_favicon_opts = { ] }; +function AddSearxNGHashes(options = {}) { + const { + fileName = "hashes.json", + exclude = [], + include_without_hashes = [] + } = options; + let outDir = null; + + // Helper: recursively get all files (not directories) within `dir`. + async function getAllFiles(dir) { + let entries = await fs.readdir(dir, { withFileTypes: true }); + let files = []; + for (const entry of entries) { + const fullPath = resolve(dir, entry.name); + if (entry.isDirectory()) { + files = files.concat(await getAllFiles(fullPath)); + } else { + files.push(fullPath); + } + } + + // Separate out `.map` files so they end up last + const mapFiles = files.filter((file) => file.endsWith(".map")); + const otherFiles = files.filter((file) => !file.endsWith(".map")); + + return [...otherFiles, ...mapFiles]; + } + + function replacePathsInBuffer(body, mapping) { + // Convert the Buffer to a string (assuming UTF-8) + let content = body.toString("utf-8"); + + // Perform replacements + for (const logicalPath of Object.keys(mapping)) { + const hashedPath = mapping[logicalPath]; + content = content.replaceAll(logicalPath, hashedPath); + } + + // Convert the modified string back to a Buffer + return Buffer.from(content, "utf-8"); + } + + return { + name: "recursive-hash-manifest-plugin", + apply: "build", + + // Capture the final "outDir" from the resolved Vite config + configResolved(config) { + outDir = config.build.outDir; + }, + + // "closeBundle" is called after everything (including other async tasks) is done writing + async closeBundle() { + // Check if the outDir is set (from configResolved) + if (outDir === null) { + return + } + + // Get a list of every file in the output directory + let allFiles = await getAllFiles(outDir); + + // Optionally exclude certain files + const exclusionSet = new Set([...exclude, fileName]); + allFiles = allFiles.filter((filePath) => { + const relPath = relative(outDir, filePath); + return !exclusionSet.has(relPath); + }); + + // Compute a hash for each file + const assets = {}; + const var_mapping = {} + const hash_override = {} + for (const filePath of allFiles) { + const relPath = relative(outDir, filePath); + + // Get the shortHash + let shortHash; + + if (include_without_hashes.includes(relative(outDir, filePath))) { + shortHash = ""; + } else if (Object.prototype.hasOwnProperty.call(hash_override, filePath)) { + shortHash = hash_override[filePath]; + } else { + const fileBuf = await fs.readFile(filePath); + const hashSum = crypto.createHash("sha256").update(fileBuf).digest("hex"); + shortHash = "." + hashSum.slice(0, 8); + hash_override[filePath + ".map"] = shortHash; + } + + // Prepare to build a new file path + const dirName = path.dirname(filePath); + let newFilePath; + let varPath = null; + + // Special handling for *.js.map + if (filePath.endsWith(".js.map")) { + const baseName = path.basename(filePath, ".js.map"); + newFilePath = path.join(dirName, `${baseName}${shortHash}.js.map`); + } + // Special handling for *.css.map + else if (filePath.endsWith(".css.map")) { + const baseName = path.basename(filePath, ".css.map"); + newFilePath = path.join(dirName, `${baseName}${shortHash}.css.map`); + } + // Otherwise, rename as usual + else { + const extName = path.extname(filePath); + const baseName = path.basename(filePath, extName); + newFilePath = path.join(dirName, `${baseName}${shortHash}${extName}`); + + // + varPath = `${baseName}.SEARXNG_HASH${extName}`; + var_mapping[varPath] = `${baseName}${shortHash}${extName}`; + if (filePath.endsWith(".js")) { + var_mapping[`//# sourceMappingURL=${baseName}${extName}.map`] = `//# sourceMappingURL=${baseName}${shortHash}${extName}.map`; + } + } + + // New relative path + const newRelPath = relative(outDir, newFilePath); + assets[relPath] = newRelPath; + } + + // Step 2: Once the manifest is all set, read back files that might reference others + // and replace placeholders with hashed paths. + for (const filePath of allFiles) { + const extName = path.extname(filePath); + if (![".css", ".js", ".html"].includes(extName)) { + continue; + } + const originalBuf = await fs.readFile(filePath); + const replacedBuf = replacePathsInBuffer(originalBuf, var_mapping); + await fs.writeFile(filePath, replacedBuf); + } + + // Step 3: rename the original files to their hashed filenames + for (const filePath of allFiles) { + const relPath = path.relative(outDir, filePath); + const newRelPath = assets[relPath]; + const newFilePath = path.join(outDir, newRelPath); + await fs.rename(filePath, newFilePath); + } + + // Write out `assets.json` + const assetsPath = resolve(outDir, fileName); + await fs.writeFile(assetsPath, JSON.stringify(assets, null, 2), "utf-8"); + }, + }; +} export default defineConfig({ @@ -180,6 +333,22 @@ export default defineConfig({ svg2svg_opts ), + // -- create assets.json and add hashes to files + AddSearxNGHashes({ + fileName: "assets.json", + exclude: [ + ".gitattributes", + "manifest.json" + ], + include_without_hashes: [ + "css/images/layers-2x.png", + "css/images/layers.png", + "css/images/marker-icon-2x.png", + "css/images/marker-icon.png", + "css/images/marker-shadow.png", + ] + }), + ] // end: plugins }); diff --git a/searx/settings.yml b/searx/settings.yml index 45728424c..b46c32d8d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -117,8 +117,6 @@ redis: ui: # Custom static path - leave it blank if you didn't change static_path: "" - # Is overwritten by ${SEARXNG_STATIC_USE_HASH}. - static_use_hash: false # Custom templates path - leave it blank if you didn't change templates_path: "" # query_in_title: When true, the result page's titles contains the query diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index 1bafa749a..55930e664 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -190,7 +190,6 @@ SCHEMA = { }, 'ui': { 'static_path': SettingsDirectoryValue(str, os.path.join(searx_dir, 'static')), - 'static_use_hash': SettingsValue(bool, False, 'SEARXNG_STATIC_USE_HASH'), 'templates_path': SettingsDirectoryValue(str, os.path.join(searx_dir, 'templates')), 'default_theme': SettingsValue(str, 'simple'), 'default_locale': SettingsValue(str, ''), diff --git a/searx/webapp.py b/searx/webapp.py index 7104853e8..766e7890e 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -16,6 +16,7 @@ import base64 from timeit import default_timer from html import escape from io import StringIO +from pathlib import Path import typing import urllib @@ -63,7 +64,7 @@ from searx.botdetection import link_token from searx.data import ENGINE_DESCRIPTIONS from searx.result_types import Answer from searx.settings_defaults import OUTPUT_FORMATS -from searx.settings_loader import DEFAULT_SETTINGS_FILE +from searx.settings_loader import DEFAULT_SETTINGS_FILE, searx_dir from searx.exceptions import SearxParameterException from searx.engines import ( DEFAULT_CATEGORY, @@ -244,24 +245,21 @@ def get_result_template(theme_name: str, template_name: str): def custom_url_for(endpoint: str, **values): - suffix = "" if endpoint == 'static' and values.get('filename'): - file_hash = static_files.get(values['filename']) - if not file_hash: + actual_filename = static_files.get(values['filename']) + if not actual_filename: # try file in the current theme theme_name = sxng_request.preferences.get_value('theme') - filename_with_theme = "themes/{}/{}".format(theme_name, values['filename']) - file_hash = static_files.get(filename_with_theme) - if file_hash: - values['filename'] = filename_with_theme - if get_setting('ui.static_use_hash') and file_hash: - suffix = "?" + file_hash + logical_filename = "themes/{}/{}".format(theme_name, values['filename']) + actual_filename = static_files.get(logical_filename) + if actual_filename: + values['filename'] = actual_filename if endpoint == 'info' and 'locale' not in values: locale = sxng_request.preferences.get_value('locale') if infopage.INFO_PAGES.get_page(values['pagename'], locale) is None: locale = infopage.INFO_PAGES.locale_default values['locale'] = locale - return url_for(endpoint, **values) + suffix + return url_for(endpoint, **values) def morty_proxify(url: str): @@ -1250,9 +1248,11 @@ def opensearch(): @app.route('/favicon.ico') def favicon(): theme = sxng_request.preferences.get_value("theme") + logical_file_name = 'themes/' + theme + '/img/favicon.png' + actual_file_name = static_files.get(logical_file_name, logical_file_name) return send_from_directory( - os.path.join(app.root_path, settings['ui']['static_path'], 'themes', theme, 'img'), # type: ignore - 'favicon.png', + os.path.join(app.root_path, settings['ui']['static_path']), # type: ignore + actual_file_name, mimetype='image/vnd.microsoft.icon', ) @@ -1365,7 +1365,10 @@ def run(): port=settings['server']['port'], host=settings['server']['bind_address'], threaded=True, - extra_files=[DEFAULT_SETTINGS_FILE], + extra_files=[ + DEFAULT_SETTINGS_FILE, + Path(searx_dir) / "static/themes/simple/assets.json", + ], ) diff --git a/searx/webutils.py b/searx/webutils.py index 6e49e3830..d44c5ade9 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -178,30 +178,45 @@ def get_themes(templates_path): return os.listdir(templates_path) -def get_hash_for_file(file: pathlib.Path) -> str: - m = hashlib.sha1() - with file.open('rb') as f: - m.update(f.read()) - return m.hexdigest() +def get_static_files_legacy(static_path: str, path: pathlib.Path): + result: list[str] = [] + for file in path.iterdir(): + if file.name.startswith('.'): + # ignore hidden file + continue + if file.is_file(): + result.append(str(file.relative_to(static_path))) + if file.is_dir() and file.name not in ('node_modules', 'src'): + # ignore "src" and "node_modules" directories + result.extend(get_static_files_legacy(static_path, file)) + return result def get_static_files(static_path: str) -> Dict[str, str]: - static_files: Dict[str, str] = {} - static_path_path = pathlib.Path(static_path) + results = {} + themes_dir = pathlib.Path(static_path) / "themes" + for theme_dir in themes_dir.iterdir(): + if not theme_dir.is_dir(): + continue + assets_file = theme_dir / "assets.json" + if assets_file.is_file(): + # assets.json exist + with assets_file.open("r", encoding="utf-8") as f: + assets = json.load(f) + for rel_logical_filename, rel_actual_filename in assets.items(): + logical_filename = f"themes/{theme_dir.name}/{rel_logical_filename}" + actual_filename = f"themes/{theme_dir.name}/{rel_actual_filename}" + results[logical_filename] = actual_filename + else: + # assets.json does not exist + results.update( + { + logical_filename: logical_filename + for logical_filename in get_static_files_legacy(static_path, theme_dir) + } + ) - def walk(path: pathlib.Path): - for file in path.iterdir(): - if file.name.startswith('.'): - # ignore hidden file - continue - if file.is_file(): - static_files[str(file.relative_to(static_path_path))] = get_hash_for_file(file) - if file.is_dir() and file.name not in ('node_modules', 'src'): - # ignore "src" and "node_modules" directories - walk(file) - - walk(static_path_path) - return static_files + return results def get_result_templates(templates_path): diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index e8f4fcdcd..0bcd4b461 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -25,9 +25,6 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods pass self.setattr4test(searx.search.processors, 'initialize_processor', dummy) - # remove sha for the static file so the tests don't have to care about - # the changing URLs - self.setattr4test(searx.webapp, 'static_files', {}) # set some defaults test_results = [ diff --git a/utils/lib_sxng_static.sh b/utils/lib_sxng_static.sh index 9c9ea085c..3d6c2dc48 100755 --- a/utils/lib_sxng_static.sh +++ b/utils/lib_sxng_static.sh @@ -122,4 +122,5 @@ static.build.restore() { build_msg STATIC "git-restore of the built files (/static)" git restore --staged "${STATIC_BUILT_PATHS[@]}" git restore --worktree "${STATIC_BUILT_PATHS[@]}" + git clean --force -d "${STATIC_BUILT_PATHS[@]}" }