diff --git a/searx/engine_cache.py b/searx/engine_cache.py new file mode 100644 index 000000000..67e5fd1dd --- /dev/null +++ b/searx/engine_cache.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""This provides an easy to use interface for engine implementations to store and read key-value pairs. + +For instance, this can be used to remember programmatically extracted API keys or other kinds of secret tokens. +""" + +from typing import Optional +from searx import redisdb, redislib + + +class EngineCache: + def store(self, key: str, value: str): + pass + + def get(self, key: str) -> Optional[str]: + pass + + +class MemoryEngineCache(EngineCache): + def __init__(self, max_size: int = 100): + self.__STORAGE = {} + self.max_size = max_size + + def store(self, key, value): + """Store the provided key-value pair in the cache.""" + if len(self.__STORAGE) > self.max_size: + self.__STORAGE.popitem() + + # remove the old value in order to add the new value to the top + # of the dictionary, as dictionaries are ordered since Python 3.7 + if key in self.__STORAGE: + self.__STORAGE.pop(key) + + self.__STORAGE[key] = value + + def get(self, key): + return self.__STORAGE.get(key) + + +class RedisEngineCache(EngineCache): + def __init__(self, key_prefix: str, expiration_seconds: int = 600): + self.key_prefix = key_prefix + self.expiration_seconds = expiration_seconds + + def _get_cache_key(self, key): + return self.key_prefix + redislib.secret_hash(key) + + def store(self, key, value): + c = redisdb.client() + + cache_key = self._get_cache_key(key) + c.set(cache_key, value, ex=self.expiration_seconds) + + def get(self, key): + c = redisdb.client() + + cache_key = self._get_cache_key(key) + value = c.get(cache_key) + if value or value == b'': + return value + + return None + + +def get_or_create_cache(database_prefix: str) -> EngineCache: + if redisdb.client(): + return RedisEngineCache(database_prefix) + + return MemoryEngineCache() diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 450cd9cf8..4a03daa10 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,7 +15,6 @@ import lxml.html from searx import ( locales, - redislib, external_bang, ) from searx.utils import ( @@ -25,7 +24,7 @@ from searx.utils import ( extract_text, ) from searx.network import get # see https://github.com/searxng/searxng/issues/762 -from searx import redisdb +from searx.engine_cache import get_or_create_cache, EngineCache from searx.enginelib.traits import EngineTraits from searx.exceptions import SearxEngineCaptchaException from searx.result_types import EngineResults @@ -61,25 +60,23 @@ url = "https://html.duckduckgo.com/html" time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'} -__CACHE = [] + +__CACHE: EngineCache = get_or_create_cache('SearXNG_ddg_web_vqd') -def _cache_key(query: str, region: str): - return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}") +def init(_): + global __CACHE # pylint: disable=global-statement + __CACHE = get_or_create_cache('SearXNG_ddg_web_vqd') + # TODO: why is the __CACHE always None if initialized as None, + # even though it should be changed here and this method is + # confirmed to be called? ... def cache_vqd(query: str, region: str, value: str): """Caches a ``vqd`` value from a query.""" - c = redisdb.client() - if c: - logger.debug("VALKEY cache vqd value: %s (%s)", value, region) - c.set(_cache_key(query, region), value, ex=600) - else: - logger.debug("MEM cache vqd value: %s (%s)", value, region) - if len(__CACHE) > 100: # cache vqd from last 100 queries - __CACHE.pop(0) - __CACHE.append((_cache_key(query, region), value)) + __CACHE.store(f"{query}//{region}", value) + logger.debug("cached vqd value: %s (%s)", value, region) def get_vqd(query: str, region: str, force_request: bool = False): @@ -114,20 +111,10 @@ def get_vqd(query: str, region: str, force_request: bool = False): seems the block list is a sliding window: to get my IP rid from the bot list I had to cool down my IP for 1h (send no requests from that IP to DDG). """ - key = _cache_key(query, region) - - c = redisdb.client() - if c: - value = c.get(key) - if value or value == b'': - value = value.decode('utf-8') # type: ignore - logger.debug("re-use CACHED vqd value: %s", value) - return value - - for k, value in __CACHE: - if k == key: - logger.debug("MEM re-use CACHED vqd value: %s", value) - return value + value = __CACHE.get(f"{query}//{region}") + if value is not None: + logger.debug("re-use CACHED vqd value: %s", value) + return value if force_request: resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')