From 556db857aad658a752e6ded3ac7d8b56b385e992 Mon Sep 17 00:00:00 2001 From: Aadniz <8147434+Aadniz@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:19:24 +0100 Subject: [PATCH] [fix] presearch engine: News and Videos formatted incorrectly --- searx/engines/presearch.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/searx/engines/presearch.py b/searx/engines/presearch.py index 870f2383b..1940cc7ad 100644 --- a/searx/engines/presearch.py +++ b/searx/engines/presearch.py @@ -58,6 +58,12 @@ have to set these values in both requests we send to Presearch; in the first request to get the request-ID from Presearch and in the final request to get the result list (see ``send_accept_language_header``). +The time format returned by Presearch varies depending on the language set. +Multiple different formats can be supported by using ``dateutil`` parser, but +it doesn't support formats such as "N time ago", "vor N time" (German), +"Hace N time" (Spanish). Because of this, the dates are simply joined together +with the rest of other metadata. + Implementations =============== @@ -246,7 +252,7 @@ def response(resp): results.append( { 'template': 'images.html', - 'title': item['title'], + 'title': html_to_text(item['title']), 'url': item.get('link'), 'img_src': item.get('image'), 'thumbnail_src': item.get('thumbnail'), @@ -261,7 +267,7 @@ def response(resp): metadata = [x for x in [item.get('description'), item.get('duration')] if x] results.append( { - 'title': item['title'], + 'title': html_to_text(item['title']), 'url': item.get('link'), 'content': '', 'metadata': ' / '.join(metadata), @@ -271,12 +277,18 @@ def response(resp): elif search_type == 'news': for item in json_resp.get('news', []): - metadata = [x for x in [item.get('source'), item.get('time')] if x] + source = item.get('source') + # Bug on their end, time sometimes returns "" + time = html_to_text(item.get('time')).strip() + metadata = [source] + if time != "": + metadata.append(time) + results.append( { - 'title': item['title'], + 'title': html_to_text(item['title']), 'url': item.get('link'), - 'content': item.get('description', ''), + 'content': html_to_text(item.get('description', '')), 'metadata': ' / '.join(metadata), 'thumbnail': item.get('image'), }