[fix] brave.news engine: response is HTML and no longer JSON

The response from brave.com for news is no longer a JSON string.

Closes: https://github.com/searxng/searxng/issues/4352
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-02-20 09:51:16 +01:00 committed by Markus Heiser
parent c2804c51e2
commit feb15e3878

View File

@ -254,14 +254,14 @@ def response(resp) -> EngineResults:
if brave_category in ('search', 'goggles'):
return _parse_search(resp)
if brave_category in ('news'):
return _parse_news(resp)
datastr = extr(resp.text, "const data = ", ";\n").strip()
json_data = js_variable_to_python(datastr)
json_resp = json_data[1]['data']['body']['response']
if brave_category == 'news':
return _parse_news(json_resp['news'])
if brave_category == 'images':
return _parse_images(json_resp)
if brave_category == 'videos':
@ -339,18 +339,31 @@ def _parse_search(resp) -> EngineResults:
return result_list
def _parse_news(json_resp) -> EngineResults:
result_list = EngineResults()
def _parse_news(resp) -> EngineResults:
result_list = EngineResults()
dom = html.fromstring(resp.text)
for result in eval_xpath_list(dom, '//div[contains(@class, "results")]//div[@data-type="news"]'):
# import pdb
# pdb.set_trace()
url = eval_xpath_getindex(result, './/a[contains(@class, "result-header")]/@href', 0, default=None)
if url is None:
continue
title = extract_text(eval_xpath_list(result, './/span[contains(@class, "snippet-title")]'))
content = extract_text(eval_xpath_list(result, './/p[contains(@class, "desc")]'))
thumbnail = eval_xpath_getindex(result, './/div[contains(@class, "image-wrapper")]//img/@src', 0, default='')
for result in json_resp["results"]:
item = {
'url': result['url'],
'title': result['title'],
'content': result['description'],
'publishedDate': _extract_published_date(result['age']),
"url": url,
"title": title,
"content": content,
"thumbnail": thumbnail,
}
if result['thumbnail'] is not None:
item['thumbnail'] = result['thumbnail']['src']
result_list.append(item)
return result_list