[mod] engines: migration of the individual cache solutions to EngineCache
The EngineCache class replaces all previously individual solutions for caches in the context of the engines. - demo_offline.py - duckduckgo.py - radio_browser.py - soundcloud.py - startpage.py - wolframalpha_api.py - wolframalpha_noapi.py Search term to test most of the modified engines:: !ddg !rb !sc !sp !wa test !ddg !rb !sc !sp !wa foo For introspection of the DB, jump into developer environment and run command to show cache state:: $ ./manage pyenv.cmd bash --norc --noprofile (py3) python -m searx.enginelib cache state cache tables and key/values =========================== [demo_offline ] 2025-04-22 11:32:50 count --> (int) 4 [startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20 [duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325 [duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451 [radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...] [soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp [wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7 number of tables: 6 number of key/value pairs: 7 In the "cache tables and key/values" section, the table name (engine name) is at first position on the second there is the calculated expire date and on the third and fourth position the key/value is shown. About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore the key is a hash value of the query term (to not to store the raw query term). In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl / ExpireCache and their last modification date are shown:: properties of ENGINES_CACHE =========================== [last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1 [last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE : [last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe2b1c9349f461158d07cb78a3750e5c5be686aa8ebdc [last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline [last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage [last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo [last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser [last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud [last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha These properties provide information about the state of the ExpireCache and control the behavior. For example, the maintenance intervals are controlled by the last modification date of the LAST_MAINTENANCE property and the hash value of the password can be used to detect whether the password has been changed (in this case the DB entries can no longer be decrypted and the entire cache must be discarded). Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
4cbfba9d7b
commit
bdfe1c2a15
@ -15,6 +15,7 @@ close to the implementation, its just a simple example. To get in use of this
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
engine_type = 'offline'
|
engine_type = 'offline'
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
@ -32,14 +33,18 @@ about = {
|
|||||||
# if there is a need for globals, use a leading underline
|
# if there is a need for globals, use a leading underline
|
||||||
_my_offline_engine: str = ""
|
_my_offline_engine: str = ""
|
||||||
|
|
||||||
|
CACHE: EngineCache
|
||||||
|
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
||||||
|
seconds."""
|
||||||
|
|
||||||
def init(engine_settings=None):
|
|
||||||
|
def init(engine_settings):
|
||||||
"""Initialization of the (offline) engine. The origin of this demo engine is a
|
"""Initialization of the (offline) engine. The origin of this demo engine is a
|
||||||
simple json string which is loaded in this example while the engine is
|
simple json string which is loaded in this example while the engine is
|
||||||
initialized.
|
initialized."""
|
||||||
|
global _my_offline_engine, CACHE # pylint: disable=global-statement
|
||||||
|
|
||||||
"""
|
CACHE = EngineCache(engine_settings["name"]) # type:ignore
|
||||||
global _my_offline_engine # pylint: disable=global-statement
|
|
||||||
|
|
||||||
_my_offline_engine = (
|
_my_offline_engine = (
|
||||||
'[ {"value": "%s"}'
|
'[ {"value": "%s"}'
|
||||||
@ -57,8 +62,8 @@ def search(query, request_params) -> EngineResults:
|
|||||||
results.
|
results.
|
||||||
"""
|
"""
|
||||||
res = EngineResults()
|
res = EngineResults()
|
||||||
|
count = CACHE.get("count", 0)
|
||||||
|
|
||||||
count = 0
|
|
||||||
for row in json.loads(_my_offline_engine):
|
for row in json.loads(_my_offline_engine):
|
||||||
count += 1
|
count += 1
|
||||||
kvmap = {
|
kvmap = {
|
||||||
@ -75,4 +80,7 @@ def search(query, request_params) -> EngineResults:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
res.add(res.types.LegacyResult(number_of_results=count))
|
res.add(res.types.LegacyResult(number_of_results=count))
|
||||||
|
|
||||||
|
# cache counter value for 20sec
|
||||||
|
CACHE.set("count", count, expire=20)
|
||||||
return res
|
return res
|
||||||
|
@ -6,16 +6,17 @@ DuckDuckGo WEB
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import re
|
|
||||||
from urllib.parse import quote_plus
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
import babel
|
import babel
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
|
||||||
from searx import (
|
from searx import (
|
||||||
locales,
|
locales,
|
||||||
redislib,
|
|
||||||
external_bang,
|
external_bang,
|
||||||
)
|
)
|
||||||
from searx.utils import (
|
from searx.utils import (
|
||||||
@ -25,12 +26,12 @@ from searx.utils import (
|
|||||||
extract_text,
|
extract_text,
|
||||||
)
|
)
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||||
from searx import redisdb
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
from searx.exceptions import SearxEngineCaptchaException
|
from searx.exceptions import SearxEngineCaptchaException
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if typing.TYPE_CHECKING:
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logger: logging.Logger
|
logger: logging.Logger
|
||||||
@ -61,28 +62,18 @@ url = "https://html.duckduckgo.com/html"
|
|||||||
|
|
||||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
||||||
__CACHE = []
|
|
||||||
|
CACHE: EngineCache
|
||||||
|
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
||||||
|
seconds."""
|
||||||
|
|
||||||
|
|
||||||
def _cache_key(query: str, region: str):
|
def init(_): # pylint: disable=unused-argument
|
||||||
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}")
|
global CACHE # pylint: disable=global-statement
|
||||||
|
CACHE = EngineCache("duckduckgo") # type:ignore
|
||||||
|
|
||||||
|
|
||||||
def cache_vqd(query: str, region: str, value: str):
|
def get_vqd(query: str, region: str, force_request: bool = False) -> str:
|
||||||
"""Caches a ``vqd`` value from a query."""
|
|
||||||
c = redisdb.client()
|
|
||||||
if c:
|
|
||||||
logger.debug("VALKEY cache vqd value: %s (%s)", value, region)
|
|
||||||
c.set(_cache_key(query, region), value, ex=600)
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.debug("MEM cache vqd value: %s (%s)", value, region)
|
|
||||||
if len(__CACHE) > 100: # cache vqd from last 100 queries
|
|
||||||
__CACHE.pop(0)
|
|
||||||
__CACHE.append((_cache_key(query, region), value))
|
|
||||||
|
|
||||||
|
|
||||||
def get_vqd(query: str, region: str, force_request: bool = False):
|
|
||||||
"""Returns the ``vqd`` that fits to the *query*.
|
"""Returns the ``vqd`` that fits to the *query*.
|
||||||
|
|
||||||
:param query: The query term
|
:param query: The query term
|
||||||
@ -114,31 +105,34 @@ def get_vqd(query: str, region: str, force_request: bool = False):
|
|||||||
seems the block list is a sliding window: to get my IP rid from the bot list
|
seems the block list is a sliding window: to get my IP rid from the bot list
|
||||||
I had to cool down my IP for 1h (send no requests from that IP to DDG).
|
I had to cool down my IP for 1h (send no requests from that IP to DDG).
|
||||||
"""
|
"""
|
||||||
key = _cache_key(query, region)
|
key = CACHE.secret_hash(f"{query}//{region}")
|
||||||
|
value = CACHE.get(key=key)
|
||||||
c = redisdb.client()
|
if value is not None and not force_request:
|
||||||
if c:
|
logger.debug("vqd: re-use cached value: %s", value)
|
||||||
value = c.get(key)
|
|
||||||
if value or value == b'':
|
|
||||||
value = value.decode('utf-8') # type: ignore
|
|
||||||
logger.debug("re-use CACHED vqd value: %s", value)
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
for k, value in __CACHE:
|
logger.debug("vqd: request value from from duckduckgo.com")
|
||||||
if k == key:
|
|
||||||
logger.debug("MEM re-use CACHED vqd value: %s", value)
|
|
||||||
return value
|
|
||||||
|
|
||||||
if force_request:
|
|
||||||
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
||||||
if resp.status_code == 200: # type: ignore
|
if resp.status_code == 200: # type: ignore
|
||||||
value = extr(resp.text, 'vqd="', '"') # type: ignore
|
value = extr(resp.text, 'vqd="', '"') # type: ignore
|
||||||
if value:
|
if value:
|
||||||
logger.debug("vqd value from DDG request: %s", value)
|
logger.debug("vqd value from duckduckgo.com request: '%s'", value)
|
||||||
cache_vqd(query, region, value)
|
else:
|
||||||
|
logger.error("vqd: can't parse value from ddg response (return empty string)")
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
logger.error("vqd: got HTTP %s from duckduckgo.com", resp.status_code)
|
||||||
|
|
||||||
|
if value:
|
||||||
|
CACHE.set(key=key, value=value)
|
||||||
|
else:
|
||||||
|
logger.error("vqd value from duckduckgo.com ", resp.status_code)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
return None
|
|
||||||
|
def set_vqd(query: str, region: str, value: str):
|
||||||
|
key = CACHE.secret_hash(f"{query}//{region}")
|
||||||
|
CACHE.set(key=key, value=value, expire=3600)
|
||||||
|
|
||||||
|
|
||||||
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
||||||
@ -373,8 +367,11 @@ def response(resp) -> EngineResults:
|
|||||||
# some locales (at least China) does not have a "next page" button
|
# some locales (at least China) does not have a "next page" button
|
||||||
form = form[0]
|
form = form[0]
|
||||||
form_vqd = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
form_vqd = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
||||||
|
set_vqd(
|
||||||
cache_vqd(resp.search_params['data']['q'], resp.search_params['data']['kl'], form_vqd)
|
query=resp.search_params['data']['q'],
|
||||||
|
region=resp.search_params['data']['kl'],
|
||||||
|
value=str(form_vqd),
|
||||||
|
)
|
||||||
|
|
||||||
# just select "web-result" and ignore results of class "result--ad result--ad--small"
|
# just select "web-result" and ignore results of class "result--ad result--ad--small"
|
||||||
for div_result in eval_xpath(doc, '//div[@id="links"]/div[contains(@class, "web-result")]'):
|
for div_result in eval_xpath(doc, '//div[@id="links"]/div[contains(@class, "web-result")]'):
|
||||||
@ -401,7 +398,7 @@ def response(resp) -> EngineResults:
|
|||||||
results.add(
|
results.add(
|
||||||
results.types.Answer(
|
results.types.Answer(
|
||||||
answer=zero_click,
|
answer=zero_click,
|
||||||
url=eval_xpath_getindex(doc, '//div[@id="zero_click_abstract"]/a/@href', 0),
|
url=eval_xpath_getindex(doc, '//div[@id="zero_click_abstract"]/a/@href', 0), # type: ignore
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -5,7 +5,9 @@
|
|||||||
https://de1.api.radio-browser.info/#Advanced_station_search
|
https://de1.api.radio-browser.info/#Advanced_station_search
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typing
|
||||||
import random
|
import random
|
||||||
import socket
|
import socket
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
@ -13,9 +15,15 @@ import babel
|
|||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
from searx.network import get
|
from searx.network import get
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.locales import language_tag
|
from searx.locales import language_tag
|
||||||
|
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
traits: EngineTraits
|
traits: EngineTraits
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
@ -52,11 +60,24 @@ none filters are applied. Valid filters are:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
servers = []
|
CACHE: EngineCache
|
||||||
|
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
||||||
|
seconds."""
|
||||||
|
|
||||||
|
|
||||||
def init(_):
|
def init(_):
|
||||||
# see https://api.radio-browser.info
|
global CACHE # pylint: disable=global-statement
|
||||||
|
CACHE = EngineCache("radio_browser")
|
||||||
|
server_list()
|
||||||
|
|
||||||
|
|
||||||
|
def server_list() -> list[str]:
|
||||||
|
|
||||||
|
servers = CACHE.get("servers", [])
|
||||||
|
if servers:
|
||||||
|
return servers
|
||||||
|
|
||||||
|
# hint: can take up to 40sec!
|
||||||
ips = socket.getaddrinfo("all.api.radio-browser.info", 80, 0, 0, socket.IPPROTO_TCP)
|
ips = socket.getaddrinfo("all.api.radio-browser.info", 80, 0, 0, socket.IPPROTO_TCP)
|
||||||
for ip_tuple in ips:
|
for ip_tuple in ips:
|
||||||
_ip: str = ip_tuple[4][0] # type: ignore
|
_ip: str = ip_tuple[4][0] # type: ignore
|
||||||
@ -65,8 +86,22 @@ def init(_):
|
|||||||
if srv not in servers:
|
if srv not in servers:
|
||||||
servers.append(srv)
|
servers.append(srv)
|
||||||
|
|
||||||
|
# update server list once in 24h
|
||||||
|
CACHE.set(key="servers", value=servers, expire=60 * 60 * 24)
|
||||||
|
|
||||||
|
return servers
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
|
servers = server_list()
|
||||||
|
if not servers:
|
||||||
|
logger.error("Fetched server list is empty!")
|
||||||
|
params["url"] = None
|
||||||
|
return
|
||||||
|
|
||||||
|
server = random.choice(servers)
|
||||||
|
|
||||||
args = {
|
args = {
|
||||||
'name': query,
|
'name': query,
|
||||||
'order': 'votes',
|
'order': 'votes',
|
||||||
@ -87,8 +122,7 @@ def request(query, params):
|
|||||||
if countrycode in traits.custom['countrycodes']: # type: ignore
|
if countrycode in traits.custom['countrycodes']: # type: ignore
|
||||||
args['countrycode'] = countrycode
|
args['countrycode'] = countrycode
|
||||||
|
|
||||||
params['url'] = f"{random.choice(servers)}/json/stations/search?{urlencode(args)}"
|
params['url'] = f"{server}/json/stations/search?{urlencode(args)}"
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
@ -154,8 +188,9 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
|
|
||||||
babel_reg_list = get_global("territory_languages").keys()
|
babel_reg_list = get_global("territory_languages").keys()
|
||||||
|
|
||||||
language_list = get(f'{servers[0]}/json/languages').json() # type: ignore
|
server = server_list()[0]
|
||||||
country_list = get(f'{servers[0]}/json/countries').json() # type: ignore
|
language_list = get(f'{server}/json/languages').json() # type: ignore
|
||||||
|
country_list = get(f'{server}/json/countries').json() # type: ignore
|
||||||
|
|
||||||
for lang in language_list:
|
for lang in language_list:
|
||||||
|
|
||||||
|
@ -1,14 +1,23 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""SoundCloud is a German audio streaming service."""
|
"""SoundCloud is a German audio streaming service."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urllib.parse import quote_plus, urlencode
|
import typing
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
from urllib.parse import quote_plus, urlencode
|
||||||
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from searx.network import get as http_get
|
from searx.network import get as http_get
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": "https://soundcloud.com",
|
"website": "https://soundcloud.com",
|
||||||
@ -28,7 +37,6 @@ HTML frontend of the common WEB site.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
||||||
guest_client_id = ""
|
|
||||||
results_per_page = 10
|
results_per_page = 10
|
||||||
|
|
||||||
soundcloud_facet = "model"
|
soundcloud_facet = "model"
|
||||||
@ -48,6 +56,10 @@ app_locale_map = {
|
|||||||
"sv": "sv",
|
"sv": "sv",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CACHE: EngineCache
|
||||||
|
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
||||||
|
seconds."""
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
@ -55,6 +67,12 @@ def request(query, params):
|
|||||||
# - user_id=451561-497874-703312-310156
|
# - user_id=451561-497874-703312-310156
|
||||||
# - app_version=1740727428
|
# - app_version=1740727428
|
||||||
|
|
||||||
|
guest_client_id = CACHE.get("guest_client_id")
|
||||||
|
if guest_client_id is None:
|
||||||
|
guest_client_id = get_client_id()
|
||||||
|
if guest_client_id:
|
||||||
|
CACHE.set(key="guest_client_id", value=guest_client_id)
|
||||||
|
|
||||||
args = {
|
args = {
|
||||||
"q": query,
|
"q": query,
|
||||||
"offset": (params['pageno'] - 1) * results_per_page,
|
"offset": (params['pageno'] - 1) * results_per_page,
|
||||||
@ -104,12 +122,12 @@ def response(resp):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None): # pylint: disable=unused-argument
|
def init(engine_settings): # pylint: disable=unused-argument
|
||||||
global guest_client_id # pylint: disable=global-statement
|
global CACHE # pylint: disable=global-statement
|
||||||
guest_client_id = get_client_id()
|
CACHE = EngineCache(engine_settings["name"]) # type:ignore
|
||||||
|
|
||||||
|
|
||||||
def get_client_id() -> str:
|
def get_client_id() -> str | None:
|
||||||
|
|
||||||
client_id = ""
|
client_id = ""
|
||||||
url = "https://soundcloud.com"
|
url = "https://soundcloud.com"
|
||||||
@ -143,4 +161,4 @@ def get_client_id() -> str:
|
|||||||
logger.info("using client_id '%s' for soundclud queries", client_id)
|
logger.info("using client_id '%s' for soundclud queries", client_id)
|
||||||
else:
|
else:
|
||||||
logger.warning("missing valid client_id for soundclud queries")
|
logger.warning("missing valid client_id for soundclud queries")
|
||||||
return client_id
|
return client_id or None
|
||||||
|
@ -84,7 +84,6 @@ from typing import TYPE_CHECKING, Any
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
import re
|
import re
|
||||||
from unicodedata import normalize, combining
|
from unicodedata import normalize, combining
|
||||||
from time import time
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from json import loads
|
from json import loads
|
||||||
|
|
||||||
@ -97,6 +96,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7
|
|||||||
from searx.exceptions import SearxEngineCaptchaException
|
from searx.exceptions import SearxEngineCaptchaException
|
||||||
from searx.locales import region_tag
|
from searx.locales import region_tag
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import logging
|
import logging
|
||||||
@ -159,10 +159,21 @@ search_form_xpath = '//form[@id="search"]'
|
|||||||
</form>
|
</form>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# timestamp of the last fetch of 'sc' code
|
|
||||||
sc_code_ts = 0
|
CACHE: EngineCache
|
||||||
sc_code = ''
|
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
||||||
sc_code_cache_sec = 30
|
seconds."""
|
||||||
|
|
||||||
|
|
||||||
|
def init(_):
|
||||||
|
global CACHE # pylint: disable=global-statement
|
||||||
|
|
||||||
|
# hint: all three startpage engines (WEB, Images & News) can/should use the
|
||||||
|
# same sc_code ..
|
||||||
|
CACHE = EngineCache("startpage") # type:ignore
|
||||||
|
|
||||||
|
|
||||||
|
sc_code_cache_sec = 3600
|
||||||
"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
|
"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
|
||||||
|
|
||||||
|
|
||||||
@ -176,14 +187,10 @@ def get_sc_code(searxng_locale, params):
|
|||||||
|
|
||||||
Startpage's search form generates a new sc-code on each request. This
|
Startpage's search form generates a new sc-code on each request. This
|
||||||
function scrap a new sc-code from Startpage's home page every
|
function scrap a new sc-code from Startpage's home page every
|
||||||
:py:obj:`sc_code_cache_sec` seconds.
|
:py:obj:`sc_code_cache_sec` seconds."""
|
||||||
|
|
||||||
"""
|
sc_code = CACHE.get("SC_CODE", "")
|
||||||
|
if sc_code:
|
||||||
global sc_code_ts, sc_code # pylint: disable=global-statement
|
|
||||||
|
|
||||||
if sc_code and (time() < (sc_code_ts + sc_code_cache_sec)):
|
|
||||||
logger.debug("get_sc_code: reuse '%s'", sc_code)
|
|
||||||
return sc_code
|
return sc_code
|
||||||
|
|
||||||
headers = {**params['headers']}
|
headers = {**params['headers']}
|
||||||
@ -233,8 +240,9 @@ def get_sc_code(searxng_locale, params):
|
|||||||
message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, # type: ignore
|
message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, # type: ignore
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
sc_code_ts = time()
|
sc_code = str(sc_code)
|
||||||
logger.debug("get_sc_code: new value is: %s", sc_code)
|
logger.debug("get_sc_code: new value is: %s", sc_code)
|
||||||
|
CACHE.set(key="SC_CODE", value=sc_code, expire=sc_code_cache_sec)
|
||||||
return sc_code
|
return sc_code
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import etree
|
import lxml.etree
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -72,7 +72,7 @@ def replace_pua_chars(text):
|
|||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
search_results = lxml.etree.XML(resp.content)
|
||||||
|
|
||||||
# return empty array if there are no results
|
# return empty array if there are no results
|
||||||
if search_results.xpath(failure_xpath):
|
if search_results.xpath(failure_xpath):
|
||||||
|
@ -3,11 +3,13 @@
|
|||||||
Wolfram|Alpha (Science)
|
Wolfram|Alpha (Science)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from time import time
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from searx.network import get as http_get
|
from searx.network import get as http_get
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -40,41 +42,39 @@ search_url = (
|
|||||||
|
|
||||||
referer_url = url + 'input/?{query}'
|
referer_url = url + 'input/?{query}'
|
||||||
|
|
||||||
token = {'value': '', 'last_updated': None}
|
|
||||||
|
|
||||||
# pods to display as image in infobox
|
# pods to display as image in infobox
|
||||||
# this pods do return a plaintext, but they look better and are more useful as images
|
# this pods do return a plaintext, but they look better and are more useful as images
|
||||||
image_pods = {'VisualRepresentation', 'Illustration', 'Symbol'}
|
image_pods = {'VisualRepresentation', 'Illustration', 'Symbol'}
|
||||||
|
|
||||||
|
|
||||||
|
CACHE: EngineCache
|
||||||
|
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
||||||
|
seconds."""
|
||||||
|
|
||||||
|
|
||||||
|
def init(engine_settings):
|
||||||
|
global CACHE # pylint: disable=global-statement
|
||||||
|
CACHE = EngineCache(engine_settings["name"]) # type:ignore
|
||||||
|
|
||||||
|
|
||||||
|
def obtain_token() -> str:
|
||||||
|
token = CACHE.get(key="token")
|
||||||
|
if token is None:
|
||||||
|
resp = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
|
||||||
|
token = resp.json()["code"]
|
||||||
# seems, wolframalpha resets its token in every hour
|
# seems, wolframalpha resets its token in every hour
|
||||||
def obtain_token():
|
CACHE.set(key="code", value=token, expire=3600)
|
||||||
update_time = time() - (time() % 3600)
|
|
||||||
try:
|
|
||||||
token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
|
|
||||||
token['value'] = loads(token_response.text)['code']
|
|
||||||
token['last_updated'] = update_time
|
|
||||||
except: # pylint: disable=bare-except
|
|
||||||
pass
|
|
||||||
return token
|
return token
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None): # pylint: disable=unused-argument
|
|
||||||
obtain_token()
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
# obtain token if last update was more than an hour
|
token = obtain_token()
|
||||||
if time() - (token['last_updated'] or 0) > 3600:
|
params['url'] = search_url.format(query=urlencode({'input': query}), token=token)
|
||||||
obtain_token()
|
|
||||||
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
|
|
||||||
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
|
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user