grasdk 8ee51cc0f3 [fix] engine dokuwiki: basedir duplication
Dokuwiki searches behind reverse proxy had duplicate base path in the url,
creating a wrong url.

This patch exchanges string concat of URLs with urljoin [1] from urllib.parse.  This
eliminates the dual problem, while retaining the old functionality designed to
concatenate protocol, hostname and port (as base_url) with path.

[1] https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin

Closes: https://github.com/searxng/searxng/issues/4598
2025-04-11 09:47:25 +02:00

88 lines
2.2 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Doku Wiki
"""
from urllib.parse import urlencode
from urllib.parse import urljoin
from lxml.html import fromstring
from searx.utils import extract_text, eval_xpath
# about
about = {
"website": 'https://www.dokuwiki.org/',
"wikidata_id": 'Q851864',
"official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['general'] # 'images', 'music', 'videos', 'files'
paging = False
number_of_results = 5
# search-url
# Doku is OpenSearch compatible
base_url = 'http://localhost:8090'
search_url = (
# fmt: off
'/?do=search'
'&{query}'
# fmt: on
)
# '&startRecord={offset}'
# '&maximumRecords={limit}'
# do search-request
def request(query, params):
params['url'] = base_url + search_url.format(query=urlencode({'id': query}))
return params
# get response from search-request
def response(resp):
results = []
doc = fromstring(resp.text)
# parse results
# Quickhits
for r in eval_xpath(doc, '//div[@class="search_quickresult"]/ul/li'):
try:
res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
except: # pylint: disable=bare-except
continue
if not res_url:
continue
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
# append result
results.append({'title': title, 'content': "", 'url': urljoin(base_url, res_url)})
# Search results
for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
try:
if r.tag == "dt":
res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
elif r.tag == "dd":
content = extract_text(eval_xpath(r, '.'))
# append result
results.append({'title': title, 'content': content, 'url': urljoin(base_url, res_url)})
except: # pylint: disable=bare-except
continue
if not res_url:
continue
# return results
return results