From 96a6e3dcb2283fa7ad9db4172a00582073a166d7 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 21 Mar 2025 08:04:10 +0100 Subject: [PATCH] [fix] Results.url: don't normalize www.example.com to example.com Hostname "www" in URL results can't be normalized to an empty string: - https://www.tu-darmstadt.de/ - https://tu-darmstadt.de/ Reported-By: @Bnyro Signed-off-by: Markus Heiser --- searx/result_types/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py index 220a4ff12..1cd4e4d2d 100644 --- a/searx/result_types/_base.py +++ b/searx/result_types/_base.py @@ -51,7 +51,7 @@ def _normalize_url_fields(result: Result | LegacyResult): # if the result has no scheme, use http as default scheme=result.parsed_url.scheme or "http", # normalize ``www.example.com`` to ``example.com`` - netloc=result.parsed_url.netloc.replace("www.", ""), + # netloc=result.parsed_url.netloc.replace("www.", ""), # normalize ``example.com/path/`` to ``example.com/path`` path=result.parsed_url.path.rstrip("/"), ) @@ -69,7 +69,7 @@ def _normalize_url_fields(result: Result | LegacyResult): _url = urllib.parse.urlparse(_url) item["url"] = _url._replace( scheme=_url.scheme or "http", - netloc=_url.netloc.replace("www.", ""), + # netloc=_url.netloc.replace("www.", ""), path=_url.path.rstrip("/"), ).geturl() @@ -78,7 +78,7 @@ def _normalize_url_fields(result: Result | LegacyResult): _url = urllib.parse.urlparse(infobox_id) result.id = _url._replace( scheme=_url.scheme or "http", - netloc=_url.netloc.replace("www.", ""), + # netloc=_url.netloc.replace("www.", ""), path=_url.path.rstrip("/"), ).geturl()