Merge 7d40733afe90916ab3bbaa5ea131d9eed7e03ecc into 590b21165288127c6f942fe43d261d57d9a6d5c3
This commit is contained in:
		
						commit
						de8b705340
					
				
							
								
								
									
										69
									
								
								searx/engine_cache.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								searx/engine_cache.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,69 @@
 | 
			
		||||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
"""This provides an easy to use interface for engine implementations to store and read key-value pairs.
 | 
			
		||||
 | 
			
		||||
For instance, this can be used to remember programmatically extracted API keys or other kinds of secret tokens.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from typing import Optional
 | 
			
		||||
from searx import redisdb, redislib
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EngineCache:
 | 
			
		||||
    def store(self, key: str, value: str):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def get(self, key: str) -> Optional[str]:
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MemoryEngineCache(EngineCache):
 | 
			
		||||
    def __init__(self, max_size: int = 100):
 | 
			
		||||
        self.__STORAGE = {}
 | 
			
		||||
        self.max_size = max_size
 | 
			
		||||
 | 
			
		||||
    def store(self, key, value):
 | 
			
		||||
        """Store the provided key-value pair in the cache."""
 | 
			
		||||
        if len(self.__STORAGE) > self.max_size:
 | 
			
		||||
            self.__STORAGE.popitem()
 | 
			
		||||
 | 
			
		||||
        # remove the old value in order to add the new value to the top
 | 
			
		||||
        # of the dictionary, as dictionaries are ordered since Python 3.7
 | 
			
		||||
        if key in self.__STORAGE:
 | 
			
		||||
            self.__STORAGE.pop(key)
 | 
			
		||||
 | 
			
		||||
        self.__STORAGE[key] = value
 | 
			
		||||
 | 
			
		||||
    def get(self, key):
 | 
			
		||||
        return self.__STORAGE.get(key)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RedisEngineCache(EngineCache):
 | 
			
		||||
    def __init__(self, key_prefix: str, expiration_seconds: int = 600):
 | 
			
		||||
        self.key_prefix = key_prefix
 | 
			
		||||
        self.expiration_seconds = expiration_seconds
 | 
			
		||||
 | 
			
		||||
    def _get_cache_key(self, key):
 | 
			
		||||
        return self.key_prefix + redislib.secret_hash(key)
 | 
			
		||||
 | 
			
		||||
    def store(self, key, value):
 | 
			
		||||
        c = redisdb.client()
 | 
			
		||||
 | 
			
		||||
        cache_key = self._get_cache_key(key)
 | 
			
		||||
        c.set(cache_key, value, ex=self.expiration_seconds)
 | 
			
		||||
 | 
			
		||||
    def get(self, key):
 | 
			
		||||
        c = redisdb.client()
 | 
			
		||||
 | 
			
		||||
        cache_key = self._get_cache_key(key)
 | 
			
		||||
        value = c.get(cache_key)
 | 
			
		||||
        if value or value == b'':
 | 
			
		||||
            return value
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_or_create_cache(database_prefix: str) -> EngineCache:
 | 
			
		||||
    if redisdb.client():
 | 
			
		||||
        return RedisEngineCache(database_prefix)
 | 
			
		||||
 | 
			
		||||
    return MemoryEngineCache()
 | 
			
		||||
@ -15,7 +15,6 @@ import lxml.html
 | 
			
		||||
 | 
			
		||||
from searx import (
 | 
			
		||||
    locales,
 | 
			
		||||
    redislib,
 | 
			
		||||
    external_bang,
 | 
			
		||||
)
 | 
			
		||||
from searx.utils import (
 | 
			
		||||
@ -25,7 +24,7 @@ from searx.utils import (
 | 
			
		||||
    extract_text,
 | 
			
		||||
)
 | 
			
		||||
from searx.network import get  # see https://github.com/searxng/searxng/issues/762
 | 
			
		||||
from searx import redisdb
 | 
			
		||||
from searx.engine_cache import get_or_create_cache, EngineCache
 | 
			
		||||
from searx.enginelib.traits import EngineTraits
 | 
			
		||||
from searx.exceptions import SearxEngineCaptchaException
 | 
			
		||||
from searx.result_types import EngineResults
 | 
			
		||||
@ -61,25 +60,23 @@ url = "https://html.duckduckgo.com/html"
 | 
			
		||||
 | 
			
		||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 | 
			
		||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
 | 
			
		||||
__CACHE = []
 | 
			
		||||
 | 
			
		||||
__CACHE: EngineCache = get_or_create_cache('SearXNG_ddg_web_vqd')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _cache_key(query: str, region: str):
 | 
			
		||||
    return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}")
 | 
			
		||||
def init(_):
 | 
			
		||||
    global __CACHE  # pylint: disable=global-statement
 | 
			
		||||
    __CACHE = get_or_create_cache('SearXNG_ddg_web_vqd')
 | 
			
		||||
    # TODO: why is the __CACHE always None if initialized as None,
 | 
			
		||||
    # even though it should be changed here and this method is
 | 
			
		||||
    # confirmed to be called? ...
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cache_vqd(query: str, region: str, value: str):
 | 
			
		||||
    """Caches a ``vqd`` value from a query."""
 | 
			
		||||
    c = redisdb.client()
 | 
			
		||||
    if c:
 | 
			
		||||
        logger.debug("VALKEY cache vqd value: %s (%s)", value, region)
 | 
			
		||||
        c.set(_cache_key(query, region), value, ex=600)
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        logger.debug("MEM cache vqd value: %s (%s)", value, region)
 | 
			
		||||
        if len(__CACHE) > 100:  # cache vqd from last 100 queries
 | 
			
		||||
            __CACHE.pop(0)
 | 
			
		||||
        __CACHE.append((_cache_key(query, region), value))
 | 
			
		||||
    __CACHE.store(f"{query}//{region}", value)
 | 
			
		||||
    logger.debug("cached vqd value: %s (%s)", value, region)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_vqd(query: str, region: str, force_request: bool = False):
 | 
			
		||||
@ -114,20 +111,10 @@ def get_vqd(query: str, region: str, force_request: bool = False):
 | 
			
		||||
    seems the block list is a sliding window: to get my IP rid from the bot list
 | 
			
		||||
    I had to cool down my IP for 1h (send no requests from that IP to DDG).
 | 
			
		||||
    """
 | 
			
		||||
    key = _cache_key(query, region)
 | 
			
		||||
 | 
			
		||||
    c = redisdb.client()
 | 
			
		||||
    if c:
 | 
			
		||||
        value = c.get(key)
 | 
			
		||||
        if value or value == b'':
 | 
			
		||||
            value = value.decode('utf-8')  # type: ignore
 | 
			
		||||
            logger.debug("re-use CACHED vqd value: %s", value)
 | 
			
		||||
            return value
 | 
			
		||||
 | 
			
		||||
    for k, value in __CACHE:
 | 
			
		||||
        if k == key:
 | 
			
		||||
            logger.debug("MEM re-use CACHED vqd value: %s", value)
 | 
			
		||||
            return value
 | 
			
		||||
    value = __CACHE.get(f"{query}//{region}")
 | 
			
		||||
    if value is not None:
 | 
			
		||||
        logger.debug("re-use CACHED vqd value: %s", value)
 | 
			
		||||
        return value
 | 
			
		||||
 | 
			
		||||
    if force_request:
 | 
			
		||||
        resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user