[mod] add doc to tavily and slightly improve the engine

- Config options like ``search_type`` renamed to follow the upstream API (``topic``). - Default ``max_results`` is set to 5 - use image description if one exists - add an init function to check engine's settings - settings example: additional category 'ai' To review the added documentation of this path:: make docs.live and jump to: http://0.0.0.0:8000/dev/engines/online/tavily.html Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2025-01-22 13:52:37 +01:00 · 2025-01-22 13:52:37 +01:00 · 1273ed7f7d
commit 1273ed7f7d
parent d8a4d589eb
3 changed files with 218 additions and 52 deletions
--- a/docs/dev/engines/online/tavily.rst
+++ b/docs/dev/engines/online/tavily.rst
@ -0,0 +1,8 @@
 .. _tavily engine:
 ======
 Tavily
 ======
 .. automodule:: searx.engines.tavily
   :members:
--- a/searx/engines/tavily.py
+++ b/searx/engines/tavily.py
@ -1,81 +1,213 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
-Tavily AI Engine
+
 .. sidebar:: info
   Before reporting an issue with this engine,
   please consult `API error codes`_.
 Tavily_ search API (AI engine).  This engine implements the REST API
 (`POST /search`_) and does not make use of the `Tavily Python Wrapper`_.
 From the API response this engine generates *result items* (shown in the main
 result list) and an *answer result* (shown on top of the main result list).
 If the *answer* from Tavily contains an image, the *answer result* is turned
 into a *infobox result*.
 .. attention::
   AI queries take considerably longer to process than queries to conventional
   search engines.  The ``timeout`` should therefore also be set considerably
   higher, but it is not recommended to activate AI queries by default
   (set ``disabled: true``), as otherwise all user searches will have to wait
   for the AI.
 .. _Tavily: https://tavily.com/
 .. _Tavily Python Wrapper: https://pypi.org/project/tavily-python/
 .. _POST /search: https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search
 .. _Tavily API Credit Deduction:
   https://docs.tavily.com/docs/rest-api/api-reference#tavily-api-credit-deduction-overview
 .. _Getting started: https://docs.tavily.com/docs/welcome#getting-started
 .. _API error codes: https://docs.tavily.com/docs/rest-api/api-reference#error-codes
 Configuration
 =============
 The engine has the following mandatory setting:
 - :py:obj:`api_key`
 - :py:obj:`topic`
 Optional settings are:
 - :py:obj:`days`
 - :py:obj:`search_depth`
 - :py:obj:`max_results`
 - :py:obj:`include_images`
 - :py:obj:`include_domains`
 - :py:obj:`exclude_domains`
 Example configuration for general search queries:
 .. code:: yaml
  - name: tavily
    engine: tavily
    shortcut: tav
    categories: [general, ai]
    api_key: xxxxxxxx
    topic: general
    include_images: true
    timeout: 15
    disabled: true
 Example configuration for news search:
 .. code:: yaml
  - name: tavily news
    engine: tavily
    shortcut: tavnews
    categories: [news, ai]
    api_key: xxxxxxxx
    topic: news
    timeout: 15
    disabled: true
 Implementation
 ==============
 """
 from json import dumps
 from datetime import datetime
-from searx.exceptions import SearxEngineAPIException
+from flask_babel import gettext
 # about
 about = {
-    "website": 'https://tavily.com/',
+    "website": "https://tavily.com/",
    "wikidata_id": None,
-    "official_api_documentation": 'https://docs.tavily.com/docs/rest-api/api-reference',
+    "official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference",
    "use_official_api": True,
    "require_api_key": True,
    "results": 'JSON',
 }
-search_url = 'https://api.tavily.com/search'
+search_url = "https://api.tavily.com/search"
 paging = False
 time_range_support = True
-search_type = 'search'  # possible values: search, news
+api_key: str = "unset"
-api_key = 'unset'
+"""Tavily API Key (`Getting started`_)."""
-max_results = 20
+
-search_depth = 'basic'  # The depth of the search. It can be "basic" or "advanced".
+search_depth: str = "basic"
-include_images = False  # Include query-related images. Turns answer into infobox with first image.
+"""The depth of the search.  It can be ``basic`` or ``advanced``.  Default is
-include_domains = []  # A list of domains to specifically include in the search results.
+``basic`` unless specified otherwise in a given method.
-exclude_domains = []  # A list of domains to specifically exclude from the search results.
+
 - have an eye on your `Tavily API Credit Deduction`_!
 """
 topic: str = ""
 """The category of the search.  This will determine which of tavily's agents
 will be used for the search.  Currently: only ``general`` and ``news`` are
 supported and ``general`` will implicitly activate ``include_answer`` in the
 `POST /search`_ API."""
 days: int = 3
 """The number of days back from the current date to include in the search results.
 This specifies the time frame of data to be retrieved.  Please note that this
 feature is only available when using the ``news`` search topic. Default is 3."""
 max_results: int = 5
 """The maximum number of search results to return.  Default is 5."""
 include_images: bool = False
 """Include a list of query-related images in the response.  Turns answer into
 infobox with first image (as far there are any images in the response).  Will
 implicitly activate ``include_image_descriptions`` in the `POST /search`_ API
 (adds descriptive text for each image).
 """
 include_domains: list[str] = []
 """A list of domains to specifically include in the search results. Default
 is ``[]```, which includes all domains."""
 exclude_domains: list[str] = []
 """A list of domains to specifically exclude from the search results. Default
 is ``[]``, which doesn't exclude any domains.
 """
 def request(query, params):
    if api_key == 'unset':
        raise SearxEngineAPIException('missing Tavily API key')
    data = {
-        'query': query,
+        "query": query,
-        'api_key': api_key,
+        "api_key": api_key,
-        'search_depth': 'basic',
+        "search_depth": search_depth,
-        'time_range': params["time_range"],
+        "topic": topic,
-        'max_results': max_results,
+        "time_range": params["time_range"],
-        'include_images': include_images,
+        "max_results": max_results,
-        'include_domains': include_domains,
+        "include_images": include_images,
-        'exclude_domains': exclude_domains,
+        "include_domains": include_domains,
        "exclude_domains": exclude_domains,
    }
    if search_type == 'search':
        data['include_answer'] = True
    elif search_type == 'news':
        data['topic'] = 'news'
    else:
        raise ValueError(f"Invalid search type {search_type}")
-    params['url'] = search_url
+    if include_images:
-    params['method'] = 'POST'
+        data["include_image_descriptions"] = True
-    params['headers']['content-type'] = 'application/json'
+
-    params['data'] = dumps(data)
+    if topic == "general":
        data["include_answer"] = True
    elif topic == "news":
        data["topic"] = "news"
        data["days"] = days
    params["url"] = search_url
    params["method"] = "POST"
    params["headers"]["Content-type"] = "application/json"
    params["data"] = dumps(data)
    return params
 def response(resp):
    results = []
-    json_resp = resp.json()
+    data = resp.json()
-    for result in json_resp.get('results', []):
+    for result in data.get("results", []):
        results.append(
            {
-                'title': result['title'],
+                "title": f"[{gettext('ai')}] {result['title']}",
-                'url': result['url'],
+                "url": result["url"],
-                'content': result['content'],
+                "content": result["content"],
-                'publishedDate': _parse_date(result.get('published_date')),
+                "publishedDate": _parse_date(result.get("published_date")),
            }
        )
-    if json_resp['images']:
+    img_list = data.get("images")
-        results.append({'infobox': 'Tavily', 'img_src': json_resp['images'][0], 'content': json_resp['answer']})
+    if img_list:
-    elif json_resp['answer']:
+        content = data.get("answer")
-        results.append({'answer': json_resp['answer']})
+        img_src = img_list[0]
        if isinstance(img_list[0], dict):
            img_src = img_list[0]["url"]
            img_caption = gettext("Image caption") + ": " + img_list[0]["description"]
            if not content:
                gettext("Image caption")
                content = img_caption
            else:
                content += "//" + img_caption
        results.append(
            {
                "infobox": f"Tavily [{gettext('ai')}]",
                "img_src": img_src,
                "content": content,
            }
        )
    elif data["answer"]:
        results.append({"answer": data["answer"]})
    return results
@ -83,7 +215,26 @@ def response(resp):
 def _parse_date(pubDate):
    if pubDate is not None:
        try:
-            return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %Z')
+            return datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z")
        except (ValueError, TypeError) as e:
            logger.debug("ignore exception (publishedDate): %s", e)
    return None
 def init(engine_settings: dict):
    msg = []
    val = engine_settings.get("api_key") or api_key
    if not val or val == "unset":
        msg.append("missing api_key")
    val = engine_settings.get("topic") or topic
    if val not in ["general", "news"]:
        msg.append(f"invalid topic: '{val}'")
    val = engine_settings.get("search_depth") or search_depth
    if val not in ["basic", "advanced"]:
        msg.append(f"invalid search_depth: '{val}'")
    if msg:
        raise ValueError(f"[{engine_settings['name']}] engine's settings: {' / '.join(msg)}")
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -1828,21 +1828,28 @@ engines:
    shortcut: tm
    disabled: true
  # Tavily requires an API key as well as other configurations. Before you
  # activate these engines you should read the documentation.
  # --> https://docs.searxng.org/dev/engines/online/tavily.html
  #
  # - name: tavily
  #   engine: tavily
  #   shortcut: tav
-  #   categories: general
+  #   categories: [general, ai]
-  #   # API key required, see: https://docs.tavily.com/docs/welcome#getting-started
+  #   api_key: unset
-  #   api_key: 'unset'
+  #   topic: general
-  #   include_images: false
+  #   include_images: true
-  #   timeout: 15.0
+  #   timeout: 15
  #   disabled: true
  #
  # - name: tavily news
  #   engine: tavily
  #   shortcut: tavnews
-  #   categories: news
+  #   categories: [news, ai]
-  #   api_key: 'unset'
+  #   api_key: unset
-  #   search_type: news
+  #   topic: news
-  #   timeout: 15.0
+  #   timeout: 15
  #   disabled: true
  # Requires Tor
  - name: torch