[mod] add doc to tavily and slightly improve the engine
- Config options like ``search_type`` renamed to follow the upstream API (``topic``). - Default ``max_results`` is set to 5 - use image description if one exists - add an init function to check engine's settings - settings example: additional category 'ai' To review the added documentation of this path:: make docs.live and jump to: http://0.0.0.0:8000/dev/engines/online/tavily.html Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
d8a4d589eb
commit
1273ed7f7d
8
docs/dev/engines/online/tavily.rst
Normal file
8
docs/dev/engines/online/tavily.rst
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
.. _tavily engine:
|
||||||
|
|
||||||
|
======
|
||||||
|
Tavily
|
||||||
|
======
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.tavily
|
||||||
|
:members:
|
@ -1,81 +1,213 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""
|
"""
|
||||||
Tavily AI Engine
|
|
||||||
|
.. sidebar:: info
|
||||||
|
|
||||||
|
Before reporting an issue with this engine,
|
||||||
|
please consult `API error codes`_.
|
||||||
|
|
||||||
|
Tavily_ search API (AI engine). This engine implements the REST API
|
||||||
|
(`POST /search`_) and does not make use of the `Tavily Python Wrapper`_.
|
||||||
|
|
||||||
|
From the API response this engine generates *result items* (shown in the main
|
||||||
|
result list) and an *answer result* (shown on top of the main result list).
|
||||||
|
If the *answer* from Tavily contains an image, the *answer result* is turned
|
||||||
|
into a *infobox result*.
|
||||||
|
|
||||||
|
.. attention::
|
||||||
|
|
||||||
|
AI queries take considerably longer to process than queries to conventional
|
||||||
|
search engines. The ``timeout`` should therefore also be set considerably
|
||||||
|
higher, but it is not recommended to activate AI queries by default
|
||||||
|
(set ``disabled: true``), as otherwise all user searches will have to wait
|
||||||
|
for the AI.
|
||||||
|
|
||||||
|
.. _Tavily: https://tavily.com/
|
||||||
|
.. _Tavily Python Wrapper: https://pypi.org/project/tavily-python/
|
||||||
|
.. _POST /search: https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search
|
||||||
|
.. _Tavily API Credit Deduction:
|
||||||
|
https://docs.tavily.com/docs/rest-api/api-reference#tavily-api-credit-deduction-overview
|
||||||
|
.. _Getting started: https://docs.tavily.com/docs/welcome#getting-started
|
||||||
|
.. _API error codes: https://docs.tavily.com/docs/rest-api/api-reference#error-codes
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
=============
|
||||||
|
|
||||||
|
The engine has the following mandatory setting:
|
||||||
|
|
||||||
|
- :py:obj:`api_key`
|
||||||
|
- :py:obj:`topic`
|
||||||
|
|
||||||
|
Optional settings are:
|
||||||
|
|
||||||
|
- :py:obj:`days`
|
||||||
|
- :py:obj:`search_depth`
|
||||||
|
- :py:obj:`max_results`
|
||||||
|
- :py:obj:`include_images`
|
||||||
|
- :py:obj:`include_domains`
|
||||||
|
- :py:obj:`exclude_domains`
|
||||||
|
|
||||||
|
Example configuration for general search queries:
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: tavily
|
||||||
|
engine: tavily
|
||||||
|
shortcut: tav
|
||||||
|
categories: [general, ai]
|
||||||
|
api_key: xxxxxxxx
|
||||||
|
topic: general
|
||||||
|
include_images: true
|
||||||
|
timeout: 15
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
Example configuration for news search:
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: tavily news
|
||||||
|
engine: tavily
|
||||||
|
shortcut: tavnews
|
||||||
|
categories: [news, ai]
|
||||||
|
api_key: xxxxxxxx
|
||||||
|
topic: news
|
||||||
|
timeout: 15
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
|
||||||
|
Implementation
|
||||||
|
==============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import dumps
|
from json import dumps
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from flask_babel import gettext
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://tavily.com/',
|
"website": "https://tavily.com/",
|
||||||
"wikidata_id": None,
|
"wikidata_id": None,
|
||||||
"official_api_documentation": 'https://docs.tavily.com/docs/rest-api/api-reference',
|
"official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference",
|
||||||
"use_official_api": True,
|
"use_official_api": True,
|
||||||
"require_api_key": True,
|
"require_api_key": True,
|
||||||
"results": 'JSON',
|
"results": 'JSON',
|
||||||
}
|
}
|
||||||
|
|
||||||
search_url = 'https://api.tavily.com/search'
|
search_url = "https://api.tavily.com/search"
|
||||||
paging = False
|
paging = False
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
|
|
||||||
search_type = 'search' # possible values: search, news
|
api_key: str = "unset"
|
||||||
api_key = 'unset'
|
"""Tavily API Key (`Getting started`_)."""
|
||||||
max_results = 20
|
|
||||||
search_depth = 'basic' # The depth of the search. It can be "basic" or "advanced".
|
search_depth: str = "basic"
|
||||||
include_images = False # Include query-related images. Turns answer into infobox with first image.
|
"""The depth of the search. It can be ``basic`` or ``advanced``. Default is
|
||||||
include_domains = [] # A list of domains to specifically include in the search results.
|
``basic`` unless specified otherwise in a given method.
|
||||||
exclude_domains = [] # A list of domains to specifically exclude from the search results.
|
|
||||||
|
- have an eye on your `Tavily API Credit Deduction`_!
|
||||||
|
"""
|
||||||
|
|
||||||
|
topic: str = ""
|
||||||
|
"""The category of the search. This will determine which of tavily's agents
|
||||||
|
will be used for the search. Currently: only ``general`` and ``news`` are
|
||||||
|
supported and ``general`` will implicitly activate ``include_answer`` in the
|
||||||
|
`POST /search`_ API."""
|
||||||
|
|
||||||
|
days: int = 3
|
||||||
|
"""The number of days back from the current date to include in the search results.
|
||||||
|
This specifies the time frame of data to be retrieved. Please note that this
|
||||||
|
feature is only available when using the ``news`` search topic. Default is 3."""
|
||||||
|
|
||||||
|
max_results: int = 5
|
||||||
|
"""The maximum number of search results to return. Default is 5."""
|
||||||
|
|
||||||
|
include_images: bool = False
|
||||||
|
"""Include a list of query-related images in the response. Turns answer into
|
||||||
|
infobox with first image (as far there are any images in the response). Will
|
||||||
|
implicitly activate ``include_image_descriptions`` in the `POST /search`_ API
|
||||||
|
(adds descriptive text for each image).
|
||||||
|
"""
|
||||||
|
|
||||||
|
include_domains: list[str] = []
|
||||||
|
"""A list of domains to specifically include in the search results. Default
|
||||||
|
is ``[]```, which includes all domains."""
|
||||||
|
|
||||||
|
exclude_domains: list[str] = []
|
||||||
|
"""A list of domains to specifically exclude from the search results. Default
|
||||||
|
is ``[]``, which doesn't exclude any domains.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
if api_key == 'unset':
|
|
||||||
raise SearxEngineAPIException('missing Tavily API key')
|
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
'query': query,
|
"query": query,
|
||||||
'api_key': api_key,
|
"api_key": api_key,
|
||||||
'search_depth': 'basic',
|
"search_depth": search_depth,
|
||||||
'time_range': params["time_range"],
|
"topic": topic,
|
||||||
'max_results': max_results,
|
"time_range": params["time_range"],
|
||||||
'include_images': include_images,
|
"max_results": max_results,
|
||||||
'include_domains': include_domains,
|
"include_images": include_images,
|
||||||
'exclude_domains': exclude_domains,
|
"include_domains": include_domains,
|
||||||
|
"exclude_domains": exclude_domains,
|
||||||
}
|
}
|
||||||
if search_type == 'search':
|
|
||||||
data['include_answer'] = True
|
|
||||||
elif search_type == 'news':
|
|
||||||
data['topic'] = 'news'
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Invalid search type {search_type}")
|
|
||||||
|
|
||||||
params['url'] = search_url
|
if include_images:
|
||||||
params['method'] = 'POST'
|
data["include_image_descriptions"] = True
|
||||||
params['headers']['content-type'] = 'application/json'
|
|
||||||
params['data'] = dumps(data)
|
if topic == "general":
|
||||||
|
data["include_answer"] = True
|
||||||
|
|
||||||
|
elif topic == "news":
|
||||||
|
data["topic"] = "news"
|
||||||
|
data["days"] = days
|
||||||
|
|
||||||
|
params["url"] = search_url
|
||||||
|
params["method"] = "POST"
|
||||||
|
params["headers"]["Content-type"] = "application/json"
|
||||||
|
params["data"] = dumps(data)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
json_resp = resp.json()
|
data = resp.json()
|
||||||
|
|
||||||
for result in json_resp.get('results', []):
|
for result in data.get("results", []):
|
||||||
results.append(
|
results.append(
|
||||||
{
|
{
|
||||||
'title': result['title'],
|
"title": f"[{gettext('ai')}] {result['title']}",
|
||||||
'url': result['url'],
|
"url": result["url"],
|
||||||
'content': result['content'],
|
"content": result["content"],
|
||||||
'publishedDate': _parse_date(result.get('published_date')),
|
"publishedDate": _parse_date(result.get("published_date")),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
if json_resp['images']:
|
img_list = data.get("images")
|
||||||
results.append({'infobox': 'Tavily', 'img_src': json_resp['images'][0], 'content': json_resp['answer']})
|
if img_list:
|
||||||
elif json_resp['answer']:
|
content = data.get("answer")
|
||||||
results.append({'answer': json_resp['answer']})
|
img_src = img_list[0]
|
||||||
|
if isinstance(img_list[0], dict):
|
||||||
|
img_src = img_list[0]["url"]
|
||||||
|
img_caption = gettext("Image caption") + ": " + img_list[0]["description"]
|
||||||
|
if not content:
|
||||||
|
gettext("Image caption")
|
||||||
|
content = img_caption
|
||||||
|
else:
|
||||||
|
content += "//" + img_caption
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
"infobox": f"Tavily [{gettext('ai')}]",
|
||||||
|
"img_src": img_src,
|
||||||
|
"content": content,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
elif data["answer"]:
|
||||||
|
results.append({"answer": data["answer"]})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@ -83,7 +215,26 @@ def response(resp):
|
|||||||
def _parse_date(pubDate):
|
def _parse_date(pubDate):
|
||||||
if pubDate is not None:
|
if pubDate is not None:
|
||||||
try:
|
try:
|
||||||
return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %Z')
|
return datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z")
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
logger.debug("ignore exception (publishedDate): %s", e)
|
logger.debug("ignore exception (publishedDate): %s", e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def init(engine_settings: dict):
|
||||||
|
msg = []
|
||||||
|
|
||||||
|
val = engine_settings.get("api_key") or api_key
|
||||||
|
if not val or val == "unset":
|
||||||
|
msg.append("missing api_key")
|
||||||
|
|
||||||
|
val = engine_settings.get("topic") or topic
|
||||||
|
if val not in ["general", "news"]:
|
||||||
|
msg.append(f"invalid topic: '{val}'")
|
||||||
|
|
||||||
|
val = engine_settings.get("search_depth") or search_depth
|
||||||
|
if val not in ["basic", "advanced"]:
|
||||||
|
msg.append(f"invalid search_depth: '{val}'")
|
||||||
|
|
||||||
|
if msg:
|
||||||
|
raise ValueError(f"[{engine_settings['name']}] engine's settings: {' / '.join(msg)}")
|
||||||
|
@ -1828,21 +1828,28 @@ engines:
|
|||||||
shortcut: tm
|
shortcut: tm
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
|
# Tavily requires an API key as well as other configurations. Before you
|
||||||
|
# activate these engines you should read the documentation.
|
||||||
|
# --> https://docs.searxng.org/dev/engines/online/tavily.html
|
||||||
|
#
|
||||||
# - name: tavily
|
# - name: tavily
|
||||||
# engine: tavily
|
# engine: tavily
|
||||||
# shortcut: tav
|
# shortcut: tav
|
||||||
# categories: general
|
# categories: [general, ai]
|
||||||
# # API key required, see: https://docs.tavily.com/docs/welcome#getting-started
|
# api_key: unset
|
||||||
# api_key: 'unset'
|
# topic: general
|
||||||
# include_images: false
|
# include_images: true
|
||||||
# timeout: 15.0
|
# timeout: 15
|
||||||
|
# disabled: true
|
||||||
|
#
|
||||||
# - name: tavily news
|
# - name: tavily news
|
||||||
# engine: tavily
|
# engine: tavily
|
||||||
# shortcut: tavnews
|
# shortcut: tavnews
|
||||||
# categories: news
|
# categories: [news, ai]
|
||||||
# api_key: 'unset'
|
# api_key: unset
|
||||||
# search_type: news
|
# topic: news
|
||||||
# timeout: 15.0
|
# timeout: 15
|
||||||
|
# disabled: true
|
||||||
|
|
||||||
# Requires Tor
|
# Requires Tor
|
||||||
- name: torch
|
- name: torch
|
||||||
|
Loading…
x
Reference in New Issue
Block a user