[feat] add Sogou engine for searxng

Co-authored-by: Bnyro <bnyro@tutanota.com>
This commit is contained in:
Zhijie He 2025-02-23 13:47:03 +08:00 committed by Bnyro
parent 71d1504e57
commit 97aa5a779b
6 changed files with 225 additions and 1 deletions

View File

@ -42,6 +42,7 @@
- ``mwmbl`` - ``mwmbl``
- ``qwant`` - ``qwant``
- ``seznam`` - ``seznam``
- ``sogou``
- ``stract`` - ``stract``
- ``swisscows`` - ``swisscows``
- ``wikipedia`` - ``wikipedia``

View File

@ -20,6 +20,7 @@ from searx.engines import (
) )
from searx.network import get as http_get, post as http_post from searx.network import get as http_get, post as http_post
from searx.exceptions import SearxEngineResponseException from searx.exceptions import SearxEngineResponseException
from searx.utils import extr
def update_kwargs(**kwargs): def update_kwargs(**kwargs):
@ -186,6 +187,23 @@ def seznam(query, _lang):
] ]
def sogou(query, _lang):
# Sogou search autocompleter
base_url = "https://sor.html5.qq.com/api/getsug?"
response = get(base_url + urlencode({'m': 'searxng', 'key': query}))
if response.ok:
raw_json = extr(response.text, "[", "]", default="")
try:
data = json.loads(f"[{raw_json}]]")
return data[1]
except json.JSONDecodeError:
return []
return []
def stract(query, _lang): def stract(query, _lang):
# stract autocompleter (beta) # stract autocompleter (beta)
url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}" url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}"
@ -270,6 +288,7 @@ backends = {
'mwmbl': mwmbl, 'mwmbl': mwmbl,
'qwant': qwant, 'qwant': qwant,
'seznam': seznam, 'seznam': seznam,
'sogou': sogou,
'stract': stract, 'stract': stract,
'swisscows': swisscows, 'swisscows': swisscows,
'wikipedia': wikipedia, 'wikipedia': wikipedia,

68
searx/engines/sogou.py Normal file
View File

@ -0,0 +1,68 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Sogou search engine for searxng"""
from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text
# Metadata
about = {
"website": "https://www.sogou.com/",
"wikidata_id": "Q7554565",
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
# Engine Configuration
categories = ["general"]
paging = True
max_page = 10
time_range_support = True
time_range_dict = {'day': 'inttime_day', 'week': 'inttime_week', 'month': 'inttime_month', 'year': 'inttime_year'}
# Base URL
base_url = "https://www.sogou.com"
def request(query, params):
query_params = {
"query": query,
"page": params["pageno"],
}
if time_range_dict.get(params['time_range']):
query_params["s_from"] = time_range_dict.get(params['time_range'])
query_params["tsn"] = 1
params["url"] = f"{base_url}/web?{urlencode(query_params)}"
return params
def response(resp):
dom = html.fromstring(resp.text)
results = []
for item in dom.xpath('//div[contains(@class, "vrwrap")]'):
title = extract_text(item.xpath('.//h3[contains(@class, "vr-title")]/a'))
url = extract_text(item.xpath('.//h3[contains(@class, "vr-title")]/a/@href'))
if url.startswith("/link?url="):
url = f"{base_url}{url}"
content = extract_text(item.xpath('.//div[contains(@class, "text-layout")]//p[contains(@class, "star-wiki")]'))
if not content:
content = extract_text(item.xpath('.//div[contains(@class, "fz-mid space-txt")]'))
if title and url:
results.append(
{
"title": title,
"url": url,
"content": content,
}
)
return results

View File

@ -0,0 +1,49 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Sogou-Images: A search engine for retrieving images from Sogou."""
import json
import re
from urllib.parse import quote_plus
# about
about = {
"website": "https://pic.sogou.com/",
"wikidata_id": "Q7554565",
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
# engine dependent config
categories = ["images"]
base_url = "https://pic.sogou.com"
def request(query, params):
params["url"] = f"{base_url}/pics?query={quote_plus(query)}"
return params
def response(resp):
results = []
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*({.*?});', resp.text, re.S)
if not match:
return results
data = json.loads(match.group(1))
if "searchList" in data and "searchList" in data["searchList"]:
for item in data["searchList"]["searchList"]:
results.append(
{
"template": "images.html",
"url": item.get("url", ""),
"thumbnail_src": item.get("picUrl", ""),
"img_src": item.get("picUrl", ""),
"content": item.get("content_major", ""),
"title": item.get("title", ""),
"source": item.get("ch_site_name", ""),
}
)
return results

View File

@ -0,0 +1,72 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Sogou-Videos: A search engine for retrieving videos from Sogou."""
from urllib.parse import urlencode
from datetime import datetime
from searx.exceptions import SearxEngineAPIException
about = {
"website": "https://v.sogou.com/",
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = ["videos"]
paging = True
results_per_page = 10
# Base URL
base_url = "https://v.sogou.com"
def request(query, params):
query_params = {
"page": params["pageno"],
"pagesize": 10,
"query": query,
}
params["url"] = f"{base_url}/api/video/shortVideoV2?{urlencode(query_params)}"
return params
def response(resp):
try:
data = resp.json()
except Exception as e:
raise SearxEngineAPIException(f"Invalid response: {e}") from e
results = []
if not data.get("data", {}).get("list"):
raise SearxEngineAPIException("Invalid response")
for entry in data["data"]["list"]:
if not entry.get("titleEsc") or not entry.get("url"):
continue
video_url = entry.get("url")
if video_url.startswith("/vc/np"):
video_url = f"{base_url}{video_url}"
published_date = None
if entry.get("date") and entry.get("duration"):
try:
date_time_str = f"{entry['date']} {entry['duration']}"
published_date = datetime.strptime(date_time_str, "%Y-%m-%d %H:%M")
except (ValueError, TypeError):
published_date = None
results.append(
{
'url': video_url,
'title': entry["titleEsc"],
'content': f"{entry['site']} | {entry['duration']}",
'template': 'videos.html',
'publishedDate': published_date,
'thumbnail': entry["picurl"],
}
)
return results

View File

@ -34,7 +34,7 @@ search:
# Filter results. 0: None, 1: Moderate, 2: Strict # Filter results. 0: None, 1: Moderate, 2: Strict
safe_search: 0 safe_search: 0
# Existing autocomplete backends: "360search", "baidu", "brave", "dbpedia", "duckduckgo", "google", "yandex", # Existing autocomplete backends: "360search", "baidu", "brave", "dbpedia", "duckduckgo", "google", "yandex",
# "mwmbl", "seznam", "stract", "swisscows", "qwant", "wikipedia" - # "mwmbl", "seznam", "sogou", "stract", "swisscows", "qwant", "wikipedia" -
# leave blank to turn it off by default. # leave blank to turn it off by default.
autocomplete: "" autocomplete: ""
# minimun characters to type before autocompleter starts # minimun characters to type before autocompleter starts
@ -1711,6 +1711,21 @@ engines:
engine: sepiasearch engine: sepiasearch
shortcut: sep shortcut: sep
- name: sogou
engine: sogou
shortcut: sogou
disabled: true
- name: sogou images
engine: sogou_images
shortcut: sogoui
disabled: true
- name: sogou videos
engine: sogou_videos
shortcut: sogouv
disabled: true
- name: soundcloud - name: soundcloud
engine: soundcloud engine: soundcloud
shortcut: sc shortcut: sc