From 8ee51cc0f3ae97658fad85ff91bdb03ca6be51c1 Mon Sep 17 00:00:00 2001 From: grasdk <115414609+grasdk@users.noreply.github.com> Date: Fri, 11 Apr 2025 00:24:10 +0200 Subject: [PATCH] [fix] engine dokuwiki: basedir duplication Dokuwiki searches behind reverse proxy had duplicate base path in the url, creating a wrong url. This patch exchanges string concat of URLs with urljoin [1] from urllib.parse. This eliminates the dual problem, while retaining the old functionality designed to concatenate protocol, hostname and port (as base_url) with path. [1] https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin Closes: https://github.com/searxng/searxng/issues/4598 --- searx/engines/doku.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/searx/engines/doku.py b/searx/engines/doku.py index dfe643c9e..43e6a4a9a 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -4,6 +4,7 @@ """ from urllib.parse import urlencode +from urllib.parse import urljoin from lxml.html import fromstring from searx.utils import extract_text, eval_xpath @@ -63,7 +64,7 @@ def response(resp): title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title')) # append result - results.append({'title': title, 'content': "", 'url': base_url + res_url}) + results.append({'title': title, 'content': "", 'url': urljoin(base_url, res_url)}) # Search results for r in eval_xpath(doc, '//dl[@class="search_results"]/*'): @@ -75,7 +76,7 @@ def response(resp): content = extract_text(eval_xpath(r, '.')) # append result - results.append({'title': title, 'content': content, 'url': base_url + res_url}) + results.append({'title': title, 'content': content, 'url': urljoin(base_url, res_url)}) except: # pylint: disable=bare-except continue