[fix] brave.news engine: response is HTML and no longer JSON
The response from brave.com for news is no longer a JSON string. Closes: https://github.com/searxng/searxng/issues/4352 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
c2804c51e2
commit
feb15e3878
@ -254,14 +254,14 @@ def response(resp) -> EngineResults:
|
|||||||
if brave_category in ('search', 'goggles'):
|
if brave_category in ('search', 'goggles'):
|
||||||
return _parse_search(resp)
|
return _parse_search(resp)
|
||||||
|
|
||||||
|
if brave_category in ('news'):
|
||||||
|
return _parse_news(resp)
|
||||||
|
|
||||||
datastr = extr(resp.text, "const data = ", ";\n").strip()
|
datastr = extr(resp.text, "const data = ", ";\n").strip()
|
||||||
|
|
||||||
json_data = js_variable_to_python(datastr)
|
json_data = js_variable_to_python(datastr)
|
||||||
json_resp = json_data[1]['data']['body']['response']
|
json_resp = json_data[1]['data']['body']['response']
|
||||||
|
|
||||||
if brave_category == 'news':
|
|
||||||
return _parse_news(json_resp['news'])
|
|
||||||
|
|
||||||
if brave_category == 'images':
|
if brave_category == 'images':
|
||||||
return _parse_images(json_resp)
|
return _parse_images(json_resp)
|
||||||
if brave_category == 'videos':
|
if brave_category == 'videos':
|
||||||
@ -339,18 +339,31 @@ def _parse_search(resp) -> EngineResults:
|
|||||||
return result_list
|
return result_list
|
||||||
|
|
||||||
|
|
||||||
def _parse_news(json_resp) -> EngineResults:
|
def _parse_news(resp) -> EngineResults:
|
||||||
result_list = EngineResults()
|
|
||||||
|
result_list = EngineResults()
|
||||||
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
|
for result in eval_xpath_list(dom, '//div[contains(@class, "results")]//div[@data-type="news"]'):
|
||||||
|
|
||||||
|
# import pdb
|
||||||
|
# pdb.set_trace()
|
||||||
|
|
||||||
|
url = eval_xpath_getindex(result, './/a[contains(@class, "result-header")]/@href', 0, default=None)
|
||||||
|
if url is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = extract_text(eval_xpath_list(result, './/span[contains(@class, "snippet-title")]'))
|
||||||
|
content = extract_text(eval_xpath_list(result, './/p[contains(@class, "desc")]'))
|
||||||
|
thumbnail = eval_xpath_getindex(result, './/div[contains(@class, "image-wrapper")]//img/@src', 0, default='')
|
||||||
|
|
||||||
for result in json_resp["results"]:
|
|
||||||
item = {
|
item = {
|
||||||
'url': result['url'],
|
"url": url,
|
||||||
'title': result['title'],
|
"title": title,
|
||||||
'content': result['description'],
|
"content": content,
|
||||||
'publishedDate': _extract_published_date(result['age']),
|
"thumbnail": thumbnail,
|
||||||
}
|
}
|
||||||
if result['thumbnail'] is not None:
|
|
||||||
item['thumbnail'] = result['thumbnail']['src']
|
|
||||||
result_list.append(item)
|
result_list.append(item)
|
||||||
|
|
||||||
return result_list
|
return result_list
|
||||||
|
Loading…
x
Reference in New Issue
Block a user