Merge 7d40733afe90916ab3bbaa5ea131d9eed7e03ecc into 590b21165288127c6f942fe43d261d57d9a6d5c3
This commit is contained in:
commit
de8b705340
69
searx/engine_cache.py
Normal file
69
searx/engine_cache.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""This provides an easy to use interface for engine implementations to store and read key-value pairs.
|
||||||
|
|
||||||
|
For instance, this can be used to remember programmatically extracted API keys or other kinds of secret tokens.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
from searx import redisdb, redislib
|
||||||
|
|
||||||
|
|
||||||
|
class EngineCache:
|
||||||
|
def store(self, key: str, value: str):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get(self, key: str) -> Optional[str]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class MemoryEngineCache(EngineCache):
|
||||||
|
def __init__(self, max_size: int = 100):
|
||||||
|
self.__STORAGE = {}
|
||||||
|
self.max_size = max_size
|
||||||
|
|
||||||
|
def store(self, key, value):
|
||||||
|
"""Store the provided key-value pair in the cache."""
|
||||||
|
if len(self.__STORAGE) > self.max_size:
|
||||||
|
self.__STORAGE.popitem()
|
||||||
|
|
||||||
|
# remove the old value in order to add the new value to the top
|
||||||
|
# of the dictionary, as dictionaries are ordered since Python 3.7
|
||||||
|
if key in self.__STORAGE:
|
||||||
|
self.__STORAGE.pop(key)
|
||||||
|
|
||||||
|
self.__STORAGE[key] = value
|
||||||
|
|
||||||
|
def get(self, key):
|
||||||
|
return self.__STORAGE.get(key)
|
||||||
|
|
||||||
|
|
||||||
|
class RedisEngineCache(EngineCache):
|
||||||
|
def __init__(self, key_prefix: str, expiration_seconds: int = 600):
|
||||||
|
self.key_prefix = key_prefix
|
||||||
|
self.expiration_seconds = expiration_seconds
|
||||||
|
|
||||||
|
def _get_cache_key(self, key):
|
||||||
|
return self.key_prefix + redislib.secret_hash(key)
|
||||||
|
|
||||||
|
def store(self, key, value):
|
||||||
|
c = redisdb.client()
|
||||||
|
|
||||||
|
cache_key = self._get_cache_key(key)
|
||||||
|
c.set(cache_key, value, ex=self.expiration_seconds)
|
||||||
|
|
||||||
|
def get(self, key):
|
||||||
|
c = redisdb.client()
|
||||||
|
|
||||||
|
cache_key = self._get_cache_key(key)
|
||||||
|
value = c.get(cache_key)
|
||||||
|
if value or value == b'':
|
||||||
|
return value
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_cache(database_prefix: str) -> EngineCache:
|
||||||
|
if redisdb.client():
|
||||||
|
return RedisEngineCache(database_prefix)
|
||||||
|
|
||||||
|
return MemoryEngineCache()
|
@ -15,7 +15,6 @@ import lxml.html
|
|||||||
|
|
||||||
from searx import (
|
from searx import (
|
||||||
locales,
|
locales,
|
||||||
redislib,
|
|
||||||
external_bang,
|
external_bang,
|
||||||
)
|
)
|
||||||
from searx.utils import (
|
from searx.utils import (
|
||||||
@ -25,7 +24,7 @@ from searx.utils import (
|
|||||||
extract_text,
|
extract_text,
|
||||||
)
|
)
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||||
from searx import redisdb
|
from searx.engine_cache import get_or_create_cache, EngineCache
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.exceptions import SearxEngineCaptchaException
|
from searx.exceptions import SearxEngineCaptchaException
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
@ -61,25 +60,23 @@ url = "https://html.duckduckgo.com/html"
|
|||||||
|
|
||||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
||||||
__CACHE = []
|
|
||||||
|
__CACHE: EngineCache = get_or_create_cache('SearXNG_ddg_web_vqd')
|
||||||
|
|
||||||
|
|
||||||
def _cache_key(query: str, region: str):
|
def init(_):
|
||||||
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}")
|
global __CACHE # pylint: disable=global-statement
|
||||||
|
__CACHE = get_or_create_cache('SearXNG_ddg_web_vqd')
|
||||||
|
# TODO: why is the __CACHE always None if initialized as None,
|
||||||
|
# even though it should be changed here and this method is
|
||||||
|
# confirmed to be called? ...
|
||||||
|
|
||||||
|
|
||||||
def cache_vqd(query: str, region: str, value: str):
|
def cache_vqd(query: str, region: str, value: str):
|
||||||
"""Caches a ``vqd`` value from a query."""
|
"""Caches a ``vqd`` value from a query."""
|
||||||
c = redisdb.client()
|
|
||||||
if c:
|
|
||||||
logger.debug("VALKEY cache vqd value: %s (%s)", value, region)
|
|
||||||
c.set(_cache_key(query, region), value, ex=600)
|
|
||||||
|
|
||||||
else:
|
__CACHE.store(f"{query}//{region}", value)
|
||||||
logger.debug("MEM cache vqd value: %s (%s)", value, region)
|
logger.debug("cached vqd value: %s (%s)", value, region)
|
||||||
if len(__CACHE) > 100: # cache vqd from last 100 queries
|
|
||||||
__CACHE.pop(0)
|
|
||||||
__CACHE.append((_cache_key(query, region), value))
|
|
||||||
|
|
||||||
|
|
||||||
def get_vqd(query: str, region: str, force_request: bool = False):
|
def get_vqd(query: str, region: str, force_request: bool = False):
|
||||||
@ -114,20 +111,10 @@ def get_vqd(query: str, region: str, force_request: bool = False):
|
|||||||
seems the block list is a sliding window: to get my IP rid from the bot list
|
seems the block list is a sliding window: to get my IP rid from the bot list
|
||||||
I had to cool down my IP for 1h (send no requests from that IP to DDG).
|
I had to cool down my IP for 1h (send no requests from that IP to DDG).
|
||||||
"""
|
"""
|
||||||
key = _cache_key(query, region)
|
value = __CACHE.get(f"{query}//{region}")
|
||||||
|
if value is not None:
|
||||||
c = redisdb.client()
|
logger.debug("re-use CACHED vqd value: %s", value)
|
||||||
if c:
|
return value
|
||||||
value = c.get(key)
|
|
||||||
if value or value == b'':
|
|
||||||
value = value.decode('utf-8') # type: ignore
|
|
||||||
logger.debug("re-use CACHED vqd value: %s", value)
|
|
||||||
return value
|
|
||||||
|
|
||||||
for k, value in __CACHE:
|
|
||||||
if k == key:
|
|
||||||
logger.debug("MEM re-use CACHED vqd value: %s", value)
|
|
||||||
return value
|
|
||||||
|
|
||||||
if force_request:
|
if force_request:
|
||||||
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user