[fix] Results.url: don't normalize www.example.com to example.com
Hostname "www" in URL results can't be normalized to an empty string: - https://www.tu-darmstadt.de/ - https://tu-darmstadt.de/ Reported-By: @Bnyro <bnyro@tutanota.com> Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
ef99cc472d
commit
96a6e3dcb2
@ -51,7 +51,7 @@ def _normalize_url_fields(result: Result | LegacyResult):
|
|||||||
# if the result has no scheme, use http as default
|
# if the result has no scheme, use http as default
|
||||||
scheme=result.parsed_url.scheme or "http",
|
scheme=result.parsed_url.scheme or "http",
|
||||||
# normalize ``www.example.com`` to ``example.com``
|
# normalize ``www.example.com`` to ``example.com``
|
||||||
netloc=result.parsed_url.netloc.replace("www.", ""),
|
# netloc=result.parsed_url.netloc.replace("www.", ""),
|
||||||
# normalize ``example.com/path/`` to ``example.com/path``
|
# normalize ``example.com/path/`` to ``example.com/path``
|
||||||
path=result.parsed_url.path.rstrip("/"),
|
path=result.parsed_url.path.rstrip("/"),
|
||||||
)
|
)
|
||||||
@ -69,7 +69,7 @@ def _normalize_url_fields(result: Result | LegacyResult):
|
|||||||
_url = urllib.parse.urlparse(_url)
|
_url = urllib.parse.urlparse(_url)
|
||||||
item["url"] = _url._replace(
|
item["url"] = _url._replace(
|
||||||
scheme=_url.scheme or "http",
|
scheme=_url.scheme or "http",
|
||||||
netloc=_url.netloc.replace("www.", ""),
|
# netloc=_url.netloc.replace("www.", ""),
|
||||||
path=_url.path.rstrip("/"),
|
path=_url.path.rstrip("/"),
|
||||||
).geturl()
|
).geturl()
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ def _normalize_url_fields(result: Result | LegacyResult):
|
|||||||
_url = urllib.parse.urlparse(infobox_id)
|
_url = urllib.parse.urlparse(infobox_id)
|
||||||
result.id = _url._replace(
|
result.id = _url._replace(
|
||||||
scheme=_url.scheme or "http",
|
scheme=_url.scheme or "http",
|
||||||
netloc=_url.netloc.replace("www.", ""),
|
# netloc=_url.netloc.replace("www.", ""),
|
||||||
path=_url.path.rstrip("/"),
|
path=_url.path.rstrip("/"),
|
||||||
).geturl()
|
).geturl()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user