From d8a4d589eb3520a4f163ed7d6c77945d14df618d Mon Sep 17 00:00:00 2001 From: GenericMale Date: Tue, 21 Jan 2025 20:34:01 +0100 Subject: [PATCH 1/5] [feat] new engine: tavily.com --- searx/engines/tavily.py | 89 +++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 16 ++++++++ 2 files changed, 105 insertions(+) create mode 100644 searx/engines/tavily.py diff --git a/searx/engines/tavily.py b/searx/engines/tavily.py new file mode 100644 index 000000000..9a5b079dc --- /dev/null +++ b/searx/engines/tavily.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" +Tavily AI Engine +""" + +from json import dumps +from datetime import datetime +from searx.exceptions import SearxEngineAPIException + +# about +about = { + "website": 'https://tavily.com/', + "wikidata_id": None, + "official_api_documentation": 'https://docs.tavily.com/docs/rest-api/api-reference', + "use_official_api": True, + "require_api_key": True, + "results": 'JSON', +} + +search_url = 'https://api.tavily.com/search' +paging = False +time_range_support = True + +search_type = 'search' # possible values: search, news +api_key = 'unset' +max_results = 20 +search_depth = 'basic' # The depth of the search. It can be "basic" or "advanced". +include_images = False # Include query-related images. Turns answer into infobox with first image. +include_domains = [] # A list of domains to specifically include in the search results. +exclude_domains = [] # A list of domains to specifically exclude from the search results. + + +def request(query, params): + if api_key == 'unset': + raise SearxEngineAPIException('missing Tavily API key') + + data = { + 'query': query, + 'api_key': api_key, + 'search_depth': 'basic', + 'time_range': params["time_range"], + 'max_results': max_results, + 'include_images': include_images, + 'include_domains': include_domains, + 'exclude_domains': exclude_domains, + } + if search_type == 'search': + data['include_answer'] = True + elif search_type == 'news': + data['topic'] = 'news' + else: + raise ValueError(f"Invalid search type {search_type}") + + params['url'] = search_url + params['method'] = 'POST' + params['headers']['content-type'] = 'application/json' + params['data'] = dumps(data) + return params + + +def response(resp): + results = [] + json_resp = resp.json() + + for result in json_resp.get('results', []): + results.append( + { + 'title': result['title'], + 'url': result['url'], + 'content': result['content'], + 'publishedDate': _parse_date(result.get('published_date')), + } + ) + + if json_resp['images']: + results.append({'infobox': 'Tavily', 'img_src': json_resp['images'][0], 'content': json_resp['answer']}) + elif json_resp['answer']: + results.append({'answer': json_resp['answer']}) + + return results + + +def _parse_date(pubDate): + if pubDate is not None: + try: + return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %Z') + except (ValueError, TypeError) as e: + logger.debug("ignore exception (publishedDate): %s", e) + return None diff --git a/searx/settings.yml b/searx/settings.yml index 46de5d855..8d016610c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1828,6 +1828,22 @@ engines: shortcut: tm disabled: true + # - name: tavily + # engine: tavily + # shortcut: tav + # categories: general + # # API key required, see: https://docs.tavily.com/docs/welcome#getting-started + # api_key: 'unset' + # include_images: false + # timeout: 15.0 + # - name: tavily news + # engine: tavily + # shortcut: tavnews + # categories: news + # api_key: 'unset' + # search_type: news + # timeout: 15.0 + # Requires Tor - name: torch engine: xpath From 1273ed7f7d2e80389a972f7e989300cb096c8f9d Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 22 Jan 2025 13:52:37 +0100 Subject: [PATCH 2/5] [mod] add doc to tavily and slightly improve the engine - Config options like ``search_type`` renamed to follow the upstream API (``topic``). - Default ``max_results`` is set to 5 - use image description if one exists - add an init function to check engine's settings - settings example: additional category 'ai' To review the added documentation of this path:: make docs.live and jump to: http://0.0.0.0:8000/dev/engines/online/tavily.html Signed-off-by: Markus Heiser --- docs/dev/engines/online/tavily.rst | 8 + searx/engines/tavily.py | 237 +++++++++++++++++++++++------ searx/settings.yml | 25 +-- 3 files changed, 218 insertions(+), 52 deletions(-) create mode 100644 docs/dev/engines/online/tavily.rst diff --git a/docs/dev/engines/online/tavily.rst b/docs/dev/engines/online/tavily.rst new file mode 100644 index 000000000..8d7c5fdba --- /dev/null +++ b/docs/dev/engines/online/tavily.rst @@ -0,0 +1,8 @@ +.. _tavily engine: + +====== +Tavily +====== + +.. automodule:: searx.engines.tavily + :members: diff --git a/searx/engines/tavily.py b/searx/engines/tavily.py index 9a5b079dc..3913d67e9 100644 --- a/searx/engines/tavily.py +++ b/searx/engines/tavily.py @@ -1,81 +1,213 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """ -Tavily AI Engine + +.. sidebar:: info + + Before reporting an issue with this engine, + please consult `API error codes`_. + +Tavily_ search API (AI engine). This engine implements the REST API +(`POST /search`_) and does not make use of the `Tavily Python Wrapper`_. + +From the API response this engine generates *result items* (shown in the main +result list) and an *answer result* (shown on top of the main result list). +If the *answer* from Tavily contains an image, the *answer result* is turned +into a *infobox result*. + +.. attention:: + + AI queries take considerably longer to process than queries to conventional + search engines. The ``timeout`` should therefore also be set considerably + higher, but it is not recommended to activate AI queries by default + (set ``disabled: true``), as otherwise all user searches will have to wait + for the AI. + +.. _Tavily: https://tavily.com/ +.. _Tavily Python Wrapper: https://pypi.org/project/tavily-python/ +.. _POST /search: https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search +.. _Tavily API Credit Deduction: + https://docs.tavily.com/docs/rest-api/api-reference#tavily-api-credit-deduction-overview +.. _Getting started: https://docs.tavily.com/docs/welcome#getting-started +.. _API error codes: https://docs.tavily.com/docs/rest-api/api-reference#error-codes + +Configuration +============= + +The engine has the following mandatory setting: + +- :py:obj:`api_key` +- :py:obj:`topic` + +Optional settings are: + +- :py:obj:`days` +- :py:obj:`search_depth` +- :py:obj:`max_results` +- :py:obj:`include_images` +- :py:obj:`include_domains` +- :py:obj:`exclude_domains` + +Example configuration for general search queries: + +.. code:: yaml + + - name: tavily + engine: tavily + shortcut: tav + categories: [general, ai] + api_key: xxxxxxxx + topic: general + include_images: true + timeout: 15 + disabled: true + +Example configuration for news search: + +.. code:: yaml + + - name: tavily news + engine: tavily + shortcut: tavnews + categories: [news, ai] + api_key: xxxxxxxx + topic: news + timeout: 15 + disabled: true + + +Implementation +============== + """ from json import dumps from datetime import datetime -from searx.exceptions import SearxEngineAPIException +from flask_babel import gettext # about about = { - "website": 'https://tavily.com/', + "website": "https://tavily.com/", "wikidata_id": None, - "official_api_documentation": 'https://docs.tavily.com/docs/rest-api/api-reference', + "official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference", "use_official_api": True, "require_api_key": True, "results": 'JSON', } -search_url = 'https://api.tavily.com/search' +search_url = "https://api.tavily.com/search" paging = False time_range_support = True -search_type = 'search' # possible values: search, news -api_key = 'unset' -max_results = 20 -search_depth = 'basic' # The depth of the search. It can be "basic" or "advanced". -include_images = False # Include query-related images. Turns answer into infobox with first image. -include_domains = [] # A list of domains to specifically include in the search results. -exclude_domains = [] # A list of domains to specifically exclude from the search results. +api_key: str = "unset" +"""Tavily API Key (`Getting started`_).""" + +search_depth: str = "basic" +"""The depth of the search. It can be ``basic`` or ``advanced``. Default is +``basic`` unless specified otherwise in a given method. + +- have an eye on your `Tavily API Credit Deduction`_! +""" + +topic: str = "" +"""The category of the search. This will determine which of tavily's agents +will be used for the search. Currently: only ``general`` and ``news`` are +supported and ``general`` will implicitly activate ``include_answer`` in the +`POST /search`_ API.""" + +days: int = 3 +"""The number of days back from the current date to include in the search results. +This specifies the time frame of data to be retrieved. Please note that this +feature is only available when using the ``news`` search topic. Default is 3.""" + +max_results: int = 5 +"""The maximum number of search results to return. Default is 5.""" + +include_images: bool = False +"""Include a list of query-related images in the response. Turns answer into +infobox with first image (as far there are any images in the response). Will +implicitly activate ``include_image_descriptions`` in the `POST /search`_ API +(adds descriptive text for each image). +""" + +include_domains: list[str] = [] +"""A list of domains to specifically include in the search results. Default +is ``[]```, which includes all domains.""" + +exclude_domains: list[str] = [] +"""A list of domains to specifically exclude from the search results. Default +is ``[]``, which doesn't exclude any domains. +""" def request(query, params): - if api_key == 'unset': - raise SearxEngineAPIException('missing Tavily API key') data = { - 'query': query, - 'api_key': api_key, - 'search_depth': 'basic', - 'time_range': params["time_range"], - 'max_results': max_results, - 'include_images': include_images, - 'include_domains': include_domains, - 'exclude_domains': exclude_domains, + "query": query, + "api_key": api_key, + "search_depth": search_depth, + "topic": topic, + "time_range": params["time_range"], + "max_results": max_results, + "include_images": include_images, + "include_domains": include_domains, + "exclude_domains": exclude_domains, } - if search_type == 'search': - data['include_answer'] = True - elif search_type == 'news': - data['topic'] = 'news' - else: - raise ValueError(f"Invalid search type {search_type}") - params['url'] = search_url - params['method'] = 'POST' - params['headers']['content-type'] = 'application/json' - params['data'] = dumps(data) + if include_images: + data["include_image_descriptions"] = True + + if topic == "general": + data["include_answer"] = True + + elif topic == "news": + data["topic"] = "news" + data["days"] = days + + params["url"] = search_url + params["method"] = "POST" + params["headers"]["Content-type"] = "application/json" + params["data"] = dumps(data) + return params def response(resp): results = [] - json_resp = resp.json() + data = resp.json() - for result in json_resp.get('results', []): + for result in data.get("results", []): results.append( { - 'title': result['title'], - 'url': result['url'], - 'content': result['content'], - 'publishedDate': _parse_date(result.get('published_date')), + "title": f"[{gettext('ai')}] {result['title']}", + "url": result["url"], + "content": result["content"], + "publishedDate": _parse_date(result.get("published_date")), } ) - if json_resp['images']: - results.append({'infobox': 'Tavily', 'img_src': json_resp['images'][0], 'content': json_resp['answer']}) - elif json_resp['answer']: - results.append({'answer': json_resp['answer']}) + img_list = data.get("images") + if img_list: + content = data.get("answer") + img_src = img_list[0] + if isinstance(img_list[0], dict): + img_src = img_list[0]["url"] + img_caption = gettext("Image caption") + ": " + img_list[0]["description"] + if not content: + gettext("Image caption") + content = img_caption + else: + content += "//" + img_caption + + results.append( + { + "infobox": f"Tavily [{gettext('ai')}]", + "img_src": img_src, + "content": content, + } + ) + + elif data["answer"]: + results.append({"answer": data["answer"]}) return results @@ -83,7 +215,26 @@ def response(resp): def _parse_date(pubDate): if pubDate is not None: try: - return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %Z') + return datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z") except (ValueError, TypeError) as e: logger.debug("ignore exception (publishedDate): %s", e) return None + + +def init(engine_settings: dict): + msg = [] + + val = engine_settings.get("api_key") or api_key + if not val or val == "unset": + msg.append("missing api_key") + + val = engine_settings.get("topic") or topic + if val not in ["general", "news"]: + msg.append(f"invalid topic: '{val}'") + + val = engine_settings.get("search_depth") or search_depth + if val not in ["basic", "advanced"]: + msg.append(f"invalid search_depth: '{val}'") + + if msg: + raise ValueError(f"[{engine_settings['name']}] engine's settings: {' / '.join(msg)}") diff --git a/searx/settings.yml b/searx/settings.yml index 8d016610c..7ec4135ca 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1828,21 +1828,28 @@ engines: shortcut: tm disabled: true + # Tavily requires an API key as well as other configurations. Before you + # activate these engines you should read the documentation. + # --> https://docs.searxng.org/dev/engines/online/tavily.html + # # - name: tavily # engine: tavily # shortcut: tav - # categories: general - # # API key required, see: https://docs.tavily.com/docs/welcome#getting-started - # api_key: 'unset' - # include_images: false - # timeout: 15.0 + # categories: [general, ai] + # api_key: unset + # topic: general + # include_images: true + # timeout: 15 + # disabled: true + # # - name: tavily news # engine: tavily # shortcut: tavnews - # categories: news - # api_key: 'unset' - # search_type: news - # timeout: 15.0 + # categories: [news, ai] + # api_key: unset + # topic: news + # timeout: 15 + # disabled: true # Requires Tor - name: torch From 8ab16bb419d54733f9dec0ae792b8ac739fe3f0e Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 22 Jan 2025 14:00:13 +0100 Subject: [PATCH 3/5] [mod] update intersphinx links and add missing CATEGORY_GROUPS To get translations for, the missed CATEGORY_GROUPS has been added: - ai - movies - translate - wikimedia Signed-off-by: Markus Heiser --- docs/conf.py | 4 +-- searx/searxng.msg | 67 +++++++++++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index fec9eb64b..464bc6ae5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -142,10 +142,10 @@ suppress_warnings = ['myst.domains'] intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "babel" : ("https://babel.readthedocs.io/en/latest/", None), - "flask": ("https://flask.palletsprojects.com/", None), + "flask": ("https://flask.palletsprojects.com/en/stable/", None), "flask_babel": ("https://python-babel.github.io/flask-babel/", None), # "werkzeug": ("https://werkzeug.palletsprojects.com/", None), - "jinja": ("https://jinja.palletsprojects.com/", None), + "jinja": ("https://jinja.palletsprojects.com/en/stable/", None), "linuxdoc" : ("https://return42.github.io/linuxdoc/", None), "sphinx" : ("https://www.sphinx-doc.org/en/master/", None), "redis": ('https://redis.readthedocs.io/en/stable/', None), diff --git a/searx/searxng.msg b/searx/searxng.msg index a4bfb038a..2992b169e 100644 --- a/searx/searxng.msg +++ b/searx/searxng.msg @@ -7,72 +7,77 @@ from searx import webutils from searx import engines __all__ = [ - 'CONSTANT_NAMES', - 'CATEGORY_NAMES', - 'CATEGORY_GROUPS', - 'STYLE_NAMES', 'BRAND_CUSTOM_LINKS', - 'WEATHER_TERMS', + 'CATEGORY_GROUPS', + 'CATEGORY_NAMES', + 'CONSTANT_NAMES', 'SOCIAL_MEDIA_TERMS', + 'STYLE_NAMES', + 'WEATHER_TERMS', ] CONSTANT_NAMES = { # Constants defined in other modules - 'NO_SUBGROUPING': webutils.NO_SUBGROUPING, 'DEFAULT_CATEGORY': engines.DEFAULT_CATEGORY, + 'NO_SUBGROUPING': webutils.NO_SUBGROUPING, } CATEGORY_NAMES = { 'FILES': 'files', 'GENERAL': 'general', - 'MUSIC': 'music', - 'SOCIAL_MEDIA': 'social media', 'IMAGES': 'images', - 'VIDEOS': 'videos', - 'RADIO': 'radio', - 'TV': 'tv', 'IT': 'it', - 'NEWS': 'news', 'MAP': 'map', + 'MUSIC': 'music', + 'NEWS': 'news', 'ONIONS': 'onions', + 'RADIO': 'radio', 'SCIENCE': 'science', + 'SOCIAL_MEDIA': 'social media', + 'TV': 'tv', + 'VIDEOS': 'videos', } CATEGORY_GROUPS = { # non-tab categories + 'AI': 'ai', 'APPS': 'apps', 'DICTIONARIES': 'dictionaries', 'LYRICS': 'lyrics', + 'MOVIES': 'movies', 'PACKAGES': 'packages', 'Q_A': 'q&a', 'REPOS': 'repos', + 'SCIENTIFIC_PUBLICATIONS': 'scientific publications', 'SOFTWARE_WIKIS': 'software wikis', + 'TRANSLATE': 'translate', + 'WEATHER': 'weather', 'WEB': 'web', - 'SCIENTIFIC PUBLICATIONS': 'scientific publications', + 'WIKIMEDIA': 'wikimedia', } STYLE_NAMES = { 'AUTO': 'auto', - 'LIGHT': 'light', - 'DARK': 'dark', 'BLACK': 'black', + 'DARK': 'dark', + 'LIGHT': 'light', } BRAND_CUSTOM_LINKS = { - 'UPTIME': 'Uptime', 'ABOUT': 'About', + 'UPTIME': 'Uptime', } WEATHER_TERMS = { - 'AVERAGE TEMP.': 'Average temp.', - 'CLOUD COVER': 'Cloud cover', + 'AVERAGE_TEMP.': 'Average temp.', + 'CLOUD_COVER': 'Cloud cover', 'CONDITION': 'Condition', - 'CURRENT CONDITION': 'Current condition', + 'CURRENT_CONDITION': 'Current condition', 'EVENING': 'Evening', - 'FEELS LIKE': 'Feels like', + 'FEELS_LIKE': 'Feels like', 'HUMIDITY': 'Humidity', - 'MAX TEMP.': 'Max temp.', - 'MIN TEMP.': 'Min temp.', + 'MAX_TEMP.': 'Max temp.', + 'MIN_TEMP.': 'Min temp.', 'MORNING': 'Morning', 'NIGHT': 'Night', 'NOON': 'Noon', @@ -80,22 +85,22 @@ WEATHER_TERMS = { 'SUNRISE': 'Sunrise', 'SUNSET': 'Sunset', 'TEMPERATURE': 'Temperature', - 'UV INDEX': 'UV index', + 'UV_INDEX': 'UV index', 'VISIBILITY': 'Visibility', 'WIND': 'Wind', } SOCIAL_MEDIA_TERMS = { - 'SUBSCRIBERS': 'subscribers', - 'POSTS': 'posts', - 'ACTIVE USERS': 'active users', + 'ACTIVE_USERS': 'active users', + 'AUTHOR': 'author', 'COMMENTS': 'comments', - 'USER': 'user', 'COMMUNITY': 'community', 'POINTS': 'points', + 'POSTS': 'posts', + 'SUBSCRIBERS': 'subscribers', + 'THREAD_ANSWERED': 'answered', + 'THREAD_CLOSED': 'closed', + 'THREAD_OPEN': 'open', 'TITLE': 'title', - 'AUTHOR': 'author', - 'THREAD OPEN': 'open', - 'THREAD CLOSED': 'closed', - 'THREAD ANSWERED': 'answered', + 'USER': 'user', } From 1a3ffdb4ea206077ff51aac0e60ec7f215af07d9 Mon Sep 17 00:00:00 2001 From: GenericMale Date: Thu, 23 Jan 2025 03:10:17 +0100 Subject: [PATCH 4/5] [mod] small tavily engine changes - add include_image_descriptions & include_answer to engine settings - move [ai] prefix in results from title to content content - minor doc fixes --- searx/engines/tavily.py | 82 ++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/searx/engines/tavily.py b/searx/engines/tavily.py index 3913d67e9..e4c77fe83 100644 --- a/searx/engines/tavily.py +++ b/searx/engines/tavily.py @@ -6,18 +6,18 @@ Before reporting an issue with this engine, please consult `API error codes`_. -Tavily_ search API (AI engine). This engine implements the REST API +Tavily_ search API (AI engine). This engine implements the REST API (`POST /search`_) and does not make use of the `Tavily Python Wrapper`_. -From the API response this engine generates *result items* (shown in the main +From the API response, this engine generates *result items* (shown in the main result list) and an *answer result* (shown on top of the main result list). If the *answer* from Tavily contains an image, the *answer result* is turned -into a *infobox result*. +into an *infobox result*. .. attention:: AI queries take considerably longer to process than queries to conventional - search engines. The ``timeout`` should therefore also be set considerably + search engines. The ``timeout`` should therefore also be set considerably higher, but it is not recommended to activate AI queries by default (set ``disabled: true``), as otherwise all user searches will have to wait for the AI. @@ -43,7 +43,9 @@ Optional settings are: - :py:obj:`days` - :py:obj:`search_depth` - :py:obj:`max_results` +- :py:obj:`include_answer` - :py:obj:`include_images` +- :py:obj:`include_image_descriptions` - :py:obj:`include_domains` - :py:obj:`exclude_domains` @@ -102,36 +104,42 @@ api_key: str = "unset" """Tavily API Key (`Getting started`_).""" search_depth: str = "basic" -"""The depth of the search. It can be ``basic`` or ``advanced``. Default is +"""The depth of the search. It can be ``basic`` or ``advanced``. Default is ``basic`` unless specified otherwise in a given method. - have an eye on your `Tavily API Credit Deduction`_! """ topic: str = "" -"""The category of the search. This will determine which of tavily's agents -will be used for the search. Currently: only ``general`` and ``news`` are -supported and ``general`` will implicitly activate ``include_answer`` in the -`POST /search`_ API.""" +"""The category of the search. This will determine which of Tavily's agents +will be used for the search. Currently, only ``general`` and ``news`` are +supported.""" days: int = 3 """The number of days back from the current date to include in the search results. -This specifies the time frame of data to be retrieved. Please note that this +This specifies the time frame of data to be retrieved. Please note that this feature is only available when using the ``news`` search topic. Default is 3.""" max_results: int = 5 -"""The maximum number of search results to return. Default is 5.""" +"""The maximum number of search results to return. Default is 5.""" + +include_answer: bool = True +"""Include a short answer to the original query, generated by an LLM based on Tavily's +search results.""" include_images: bool = False -"""Include a list of query-related images in the response. Turns answer into -infobox with first image (as far there are any images in the response). Will -implicitly activate ``include_image_descriptions`` in the `POST /search`_ API -(adds descriptive text for each image). +"""Include a list of query-related images in the response. Creates an infobox +with the first image (as far as there are any images in the response) and the answer, +if ``include_answer`` is also enabled. """ +include_image_descriptions: bool = False +"""When ``include_images`` is set to True, this option adds descriptive text for +each image.""" + include_domains: list[str] = [] """A list of domains to specifically include in the search results. Default -is ``[]```, which includes all domains.""" +is ``[]``, which includes all domains.""" exclude_domains: list[str] = [] """A list of domains to specifically exclude from the search results. Default @@ -154,13 +162,12 @@ def request(query, params): } if include_images: - data["include_image_descriptions"] = True + data["include_image_descriptions"] = include_image_descriptions if topic == "general": - data["include_answer"] = True + data["include_answer"] = include_answer elif topic == "news": - data["topic"] = "news" data["days"] = days params["url"] = search_url @@ -178,33 +185,32 @@ def response(resp): for result in data.get("results", []): results.append( { - "title": f"[{gettext('ai')}] {result['title']}", + "title": result['title'], "url": result["url"], - "content": result["content"], + "content": f"[{gettext('ai')}] {result['content']}", "publishedDate": _parse_date(result.get("published_date")), } ) img_list = data.get("images") if img_list: - content = data.get("answer") - img_src = img_list[0] - if isinstance(img_list[0], dict): - img_src = img_list[0]["url"] - img_caption = gettext("Image caption") + ": " + img_list[0]["description"] - if not content: - gettext("Image caption") - content = img_caption - else: - content += "//" + img_caption + result = { + "infobox": f"Tavily [{gettext('ai')}]", + "img_src": img_list[0], + } - results.append( - { - "infobox": f"Tavily [{gettext('ai')}]", - "img_src": img_src, - "content": content, - } - ) + content = data.get("answer") + if isinstance(img_list[0], dict): + result["img_src"] = img_list[0]["url"] + img_caption = f"{gettext('Image caption')}: {img_list[0]['description']}" + if not content: + result["content"] = img_caption + else: + result["content"] = f"{content}
{img_caption}" + elif content: + result["content"] = content + + results.append(result) elif data["answer"]: results.append({"answer": data["answer"]}) From e4e6f214946003795dd063b7bee36abe80e8f786 Mon Sep 17 00:00:00 2001 From: GenericMale Date: Thu, 23 Jan 2025 11:02:44 +0100 Subject: [PATCH 5/5] [mod] tavily engine: revert double punctuation & remove f-stringed gettexts --- searx/engines/tavily.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/searx/engines/tavily.py b/searx/engines/tavily.py index e4c77fe83..1abd1dee0 100644 --- a/searx/engines/tavily.py +++ b/searx/engines/tavily.py @@ -6,7 +6,7 @@ Before reporting an issue with this engine, please consult `API error codes`_. -Tavily_ search API (AI engine). This engine implements the REST API +Tavily_ search API (AI engine). This engine implements the REST API (`POST /search`_) and does not make use of the `Tavily Python Wrapper`_. From the API response, this engine generates *result items* (shown in the main @@ -17,7 +17,7 @@ into an *infobox result*. .. attention:: AI queries take considerably longer to process than queries to conventional - search engines. The ``timeout`` should therefore also be set considerably + search engines. The ``timeout`` should therefore also be set considerably higher, but it is not recommended to activate AI queries by default (set ``disabled: true``), as otherwise all user searches will have to wait for the AI. @@ -93,7 +93,7 @@ about = { "official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference", "use_official_api": True, "require_api_key": True, - "results": 'JSON', + "results": "JSON", } search_url = "https://api.tavily.com/search" @@ -104,31 +104,31 @@ api_key: str = "unset" """Tavily API Key (`Getting started`_).""" search_depth: str = "basic" -"""The depth of the search. It can be ``basic`` or ``advanced``. Default is +"""The depth of the search. It can be ``basic`` or ``advanced``. Default is ``basic`` unless specified otherwise in a given method. - have an eye on your `Tavily API Credit Deduction`_! """ topic: str = "" -"""The category of the search. This will determine which of Tavily's agents -will be used for the search. Currently, only ``general`` and ``news`` are +"""The category of the search. This will determine which of Tavily's agents +will be used for the search. Currently, only ``general`` and ``news`` are supported.""" days: int = 3 """The number of days back from the current date to include in the search results. -This specifies the time frame of data to be retrieved. Please note that this +This specifies the time frame of data to be retrieved. Please note that this feature is only available when using the ``news`` search topic. Default is 3.""" max_results: int = 5 -"""The maximum number of search results to return. Default is 5.""" +"""The maximum number of search results to return. Default is 5.""" include_answer: bool = True """Include a short answer to the original query, generated by an LLM based on Tavily's search results.""" include_images: bool = False -"""Include a list of query-related images in the response. Creates an infobox +"""Include a list of query-related images in the response. Creates an infobox with the first image (as far as there are any images in the response) and the answer, if ``include_answer`` is also enabled. """ @@ -185,9 +185,9 @@ def response(resp): for result in data.get("results", []): results.append( { - "title": result['title'], + "title": result["title"], "url": result["url"], - "content": f"[{gettext('ai')}] {result['content']}", + "content": "[" + gettext("ai") + "] " + result["content"], "publishedDate": _parse_date(result.get("published_date")), } ) @@ -195,18 +195,18 @@ def response(resp): img_list = data.get("images") if img_list: result = { - "infobox": f"Tavily [{gettext('ai')}]", + "infobox": "Tavily [" + gettext("ai") + "]", "img_src": img_list[0], } content = data.get("answer") if isinstance(img_list[0], dict): result["img_src"] = img_list[0]["url"] - img_caption = f"{gettext('Image caption')}: {img_list[0]['description']}" + img_caption = gettext("Image caption") + ": " + img_list[0]["description"] if not content: result["content"] = img_caption else: - result["content"] = f"{content}
{img_caption}" + result["content"] = content + "//" + img_caption elif content: result["content"] = content