[feat] new engine: tavily.com

This commit is contained in:
GenericMale 2025-01-21 20:34:01 +01:00 committed by Markus Heiser
parent e7081bb2c1
commit d8a4d589eb
2 changed files with 105 additions and 0 deletions

89
searx/engines/tavily.py Normal file
View File

@ -0,0 +1,89 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Tavily AI Engine
"""
from json import dumps
from datetime import datetime
from searx.exceptions import SearxEngineAPIException
# about
about = {
"website": 'https://tavily.com/',
"wikidata_id": None,
"official_api_documentation": 'https://docs.tavily.com/docs/rest-api/api-reference',
"use_official_api": True,
"require_api_key": True,
"results": 'JSON',
}
search_url = 'https://api.tavily.com/search'
paging = False
time_range_support = True
search_type = 'search' # possible values: search, news
api_key = 'unset'
max_results = 20
search_depth = 'basic' # The depth of the search. It can be "basic" or "advanced".
include_images = False # Include query-related images. Turns answer into infobox with first image.
include_domains = [] # A list of domains to specifically include in the search results.
exclude_domains = [] # A list of domains to specifically exclude from the search results.
def request(query, params):
if api_key == 'unset':
raise SearxEngineAPIException('missing Tavily API key')
data = {
'query': query,
'api_key': api_key,
'search_depth': 'basic',
'time_range': params["time_range"],
'max_results': max_results,
'include_images': include_images,
'include_domains': include_domains,
'exclude_domains': exclude_domains,
}
if search_type == 'search':
data['include_answer'] = True
elif search_type == 'news':
data['topic'] = 'news'
else:
raise ValueError(f"Invalid search type {search_type}")
params['url'] = search_url
params['method'] = 'POST'
params['headers']['content-type'] = 'application/json'
params['data'] = dumps(data)
return params
def response(resp):
results = []
json_resp = resp.json()
for result in json_resp.get('results', []):
results.append(
{
'title': result['title'],
'url': result['url'],
'content': result['content'],
'publishedDate': _parse_date(result.get('published_date')),
}
)
if json_resp['images']:
results.append({'infobox': 'Tavily', 'img_src': json_resp['images'][0], 'content': json_resp['answer']})
elif json_resp['answer']:
results.append({'answer': json_resp['answer']})
return results
def _parse_date(pubDate):
if pubDate is not None:
try:
return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %Z')
except (ValueError, TypeError) as e:
logger.debug("ignore exception (publishedDate): %s", e)
return None

View File

@ -1828,6 +1828,22 @@ engines:
shortcut: tm
disabled: true
# - name: tavily
# engine: tavily
# shortcut: tav
# categories: general
# # API key required, see: https://docs.tavily.com/docs/welcome#getting-started
# api_key: 'unset'
# include_images: false
# timeout: 15.0
# - name: tavily news
# engine: tavily
# shortcut: tavnews
# categories: news
# api_key: 'unset'
# search_type: news
# timeout: 15.0
# Requires Tor
- name: torch
engine: xpath