[mod] ExpireCache - sqlite based key/value cache with expire time
This commit is contained in:
parent
4a594f1b53
commit
4cbfba9d7b
@ -29,6 +29,8 @@
|
|||||||
directly using ``python searx/webapp.py``. Doesn't apply to a SearXNG
|
directly using ``python searx/webapp.py``. Doesn't apply to a SearXNG
|
||||||
services running behind a proxy and using socket communications.
|
services running behind a proxy and using socket communications.
|
||||||
|
|
||||||
|
.. _server.secret_key:
|
||||||
|
|
||||||
``secret_key`` : ``$SEARXNG_SECRET``
|
``secret_key`` : ``$SEARXNG_SECRET``
|
||||||
Used for cryptography purpose.
|
Used for cryptography purpose.
|
||||||
|
|
||||||
|
@ -4,19 +4,13 @@
|
|||||||
Engine Library
|
Engine Library
|
||||||
==============
|
==============
|
||||||
|
|
||||||
.. contents::
|
|
||||||
:depth: 2
|
|
||||||
:local:
|
|
||||||
:backlinks: entry
|
|
||||||
|
|
||||||
.. automodule:: searx.enginelib
|
.. automodule:: searx.enginelib
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
.. _searx.enginelib.traits:
|
.. _searx.enginelib.traits:
|
||||||
|
|
||||||
|
|
||||||
Engine traits
|
Engine traits
|
||||||
=============
|
=============
|
||||||
|
|
||||||
.. automodule:: searx.enginelib.traits
|
.. automodule:: searx.enginelib.traits
|
||||||
:members:
|
:members:
|
||||||
|
8
docs/src/searx.cache.rst
Normal file
8
docs/src/searx.cache.rst
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
.. _searx.cache:
|
||||||
|
|
||||||
|
======
|
||||||
|
Caches
|
||||||
|
======
|
||||||
|
|
||||||
|
.. automodule:: searx.cache
|
||||||
|
:members:
|
@ -1,5 +1,6 @@
|
|||||||
certifi==2025.4.26
|
certifi==2025.4.26
|
||||||
babel==2.17.0
|
babel==2.17.0
|
||||||
|
cryptography==44.0.2
|
||||||
flask-babel==4.0.0
|
flask-babel==4.0.0
|
||||||
flask==3.1.0
|
flask==3.1.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
|
461
searx/cache.py
Normal file
461
searx/cache.py
Normal file
@ -0,0 +1,461 @@
|
|||||||
|
"""Implementation of caching solutions.
|
||||||
|
|
||||||
|
- :py:obj:`searx.cache.ExpireCache` and its :py:obj:`searx.cache.ExpireCacheCfg`
|
||||||
|
|
||||||
|
----
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
__all__ = ["ExpireCacheCfg", "ExpireCacheStats", "ExpireCache", "ExpireCacheSQLite"]
|
||||||
|
|
||||||
|
import abc
|
||||||
|
import dataclasses
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import secrets
|
||||||
|
import sqlite3
|
||||||
|
import string
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from base64 import urlsafe_b64encode, urlsafe_b64decode
|
||||||
|
|
||||||
|
import msgspec
|
||||||
|
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
from cryptography.hazmat.primitives import hashes
|
||||||
|
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
||||||
|
|
||||||
|
from searx import sqlitedb
|
||||||
|
from searx import logger
|
||||||
|
from searx import get_setting
|
||||||
|
|
||||||
|
log = logger.getChild("cache")
|
||||||
|
|
||||||
|
|
||||||
|
class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||||
|
"""Configuration of a :py:obj:`ExpireCache` cache."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
"""Name of the cache."""
|
||||||
|
|
||||||
|
db_url: str = ""
|
||||||
|
"""URL of the SQLite DB, the path to the database file. If unset a default
|
||||||
|
DB will be created in `/tmp/sxng_cache_{self.name}.db`"""
|
||||||
|
|
||||||
|
MAX_VALUE_LEN: int = 1024 * 10
|
||||||
|
"""Max lenght of a *serialized* value."""
|
||||||
|
|
||||||
|
MAXHOLD_TIME: int = 60 * 60 * 24 * 7 # 7 days
|
||||||
|
"""Hold time (default in sec.), after which a value is removed from the cache."""
|
||||||
|
|
||||||
|
MAINTENANCE_PERIOD: int = 60 * 60 # 2h
|
||||||
|
"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
|
||||||
|
``auto``."""
|
||||||
|
|
||||||
|
MAINTENANCE_MODE: typing.Literal["auto", "off"] = "auto"
|
||||||
|
"""Type of maintenance mode
|
||||||
|
|
||||||
|
``auto``:
|
||||||
|
Maintenance is carried out automatically as part of the maintenance
|
||||||
|
intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
|
||||||
|
|
||||||
|
``off``:
|
||||||
|
Maintenance is switched off and must be carried out by an external process
|
||||||
|
if required.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# encryption of the values stored in the DB
|
||||||
|
|
||||||
|
password: bytes = get_setting("server.secret_key").encode() # type: ignore
|
||||||
|
"""Password used in case of :py:obj:`ExpireCacheCfg.ENCRYPT_VALUE` is
|
||||||
|
``True``.
|
||||||
|
|
||||||
|
The default password is taken from :ref:`secret_key <server.secret_key>`.
|
||||||
|
When the password is changed, the values in the cache can no longer be
|
||||||
|
decrypted, which is why all values in the cache are deleted when the
|
||||||
|
password is changed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ENCRYPT_VALUE: bool = True
|
||||||
|
"""Encrypting the values before they are written to the DB (see:
|
||||||
|
:py:obj:`ExpireCacheCfg.password`)."""
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
# if db_url is unset, use a default DB in /tmp/sxng_cache_{name}.db
|
||||||
|
if not self.db_url:
|
||||||
|
self.db_url = tempfile.gettempdir() + os.sep + f"sxng_cache_{ExpireCache.normalize_name(self.name)}.db"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class ExpireCacheStats:
|
||||||
|
"""Dataclass wich provides information on the status of the cache."""
|
||||||
|
|
||||||
|
cached_items: dict[str, list[tuple[str, typing.Any, int]]]
|
||||||
|
"""Values in the cache mapped by table name.
|
||||||
|
|
||||||
|
.. code: python
|
||||||
|
|
||||||
|
{
|
||||||
|
"table name": [
|
||||||
|
("foo key": "foo value", <expire>),
|
||||||
|
("bar key": "bar value", <expire>),
|
||||||
|
# ...
|
||||||
|
],
|
||||||
|
# ...
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
def report(self):
|
||||||
|
c_tables = 0
|
||||||
|
c_kv = 0
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
for table_name, kv_list in self.cached_items.items():
|
||||||
|
c_tables += 1
|
||||||
|
if not kv_list:
|
||||||
|
lines.append(f"[{table_name:20s}] empty")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for key, value, expire in kv_list:
|
||||||
|
valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
c_kv += 1
|
||||||
|
lines.append(f"[{table_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ")
|
||||||
|
|
||||||
|
lines.append(f"number of tables: {c_tables}")
|
||||||
|
lines.append(f"number of key/value pairs: {c_kv}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
class ExpireCache(abc.ABC):
|
||||||
|
"""Abstract base class for the implementation of a key/value cache
|
||||||
|
with expire date."""
|
||||||
|
|
||||||
|
cfg: ExpireCacheCfg
|
||||||
|
|
||||||
|
hmac_iterations: int = 10_000
|
||||||
|
crypt_hash_property = "crypt_hash"
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def set(self, key: str, value: typing.Any, expire: int | None) -> bool:
|
||||||
|
"""Set *key* to *value*. To set a timeout on key use argument
|
||||||
|
``expire`` (in sec.). If expire is unset the default is taken from
|
||||||
|
:py:obj:`ExpireCacheCfg.MAXHOLD_TIME`. After the timeout has expired,
|
||||||
|
the key will automatically be deleted.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def get(self, key: str, default=None) -> typing.Any:
|
||||||
|
"""Return *value* of *key*. If key is unset, ``None`` is returned."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def maintenance(self, force: bool = False, drop_crypted: bool = False) -> bool:
|
||||||
|
"""Performs maintenance on the cache.
|
||||||
|
|
||||||
|
``force``:
|
||||||
|
Maintenance should be carried out even if the maintenance interval has
|
||||||
|
not yet been reached.
|
||||||
|
|
||||||
|
``drop_crypted``:
|
||||||
|
The encrypted values can no longer be decrypted (if the password is
|
||||||
|
changed), they must be removed from the cache.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def state(self) -> ExpireCacheStats:
|
||||||
|
"""Returns a :py:obj:`ExpireCacheStats`, which provides information
|
||||||
|
about the status of the cache."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def build_cache(cfg: ExpireCacheCfg) -> ExpireCache:
|
||||||
|
"""Factory to build a caching instance.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Currently, only the SQLite adapter is available, but other database
|
||||||
|
types could be implemented in the future, e.g. a Valkey (Redis)
|
||||||
|
adapter.
|
||||||
|
"""
|
||||||
|
return ExpireCacheSQLite(cfg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def normalize_name(name: str) -> str:
|
||||||
|
"""Returns a normalized name that can be used as a file name or as a SQL
|
||||||
|
table name."""
|
||||||
|
|
||||||
|
_valid = "-_." + string.ascii_letters + string.digits
|
||||||
|
return "".join([c for c in name if c in _valid])
|
||||||
|
|
||||||
|
def derive_key(self, password: bytes, salt: bytes, iterations: int) -> bytes:
|
||||||
|
"""Derive a secret-key from a given password and salt."""
|
||||||
|
kdf = PBKDF2HMAC(
|
||||||
|
algorithm=hashes.SHA256(),
|
||||||
|
length=32,
|
||||||
|
salt=salt,
|
||||||
|
iterations=iterations,
|
||||||
|
)
|
||||||
|
return urlsafe_b64encode(kdf.derive(password))
|
||||||
|
|
||||||
|
def serialize(self, value: typing.Any) -> bytes:
|
||||||
|
dump: bytes = pickle.dumps(value)
|
||||||
|
if self.cfg.ENCRYPT_VALUE:
|
||||||
|
dump = self.encrypt(dump)
|
||||||
|
return dump
|
||||||
|
|
||||||
|
def deserialize(self, value: bytes) -> typing.Any:
|
||||||
|
if self.cfg.ENCRYPT_VALUE:
|
||||||
|
value = self.decrypt(value)
|
||||||
|
obj = pickle.loads(value)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def encrypt(self, message: bytes) -> bytes:
|
||||||
|
"""Encode and decode values by a method using `Fernet with password`_ where
|
||||||
|
the key is derived from the password (PBKDF2HMAC_). The *password* for
|
||||||
|
encryption is taken from the :ref:`server.secret_key`
|
||||||
|
|
||||||
|
.. _Fernet with password: https://stackoverflow.com/a/55147077
|
||||||
|
.. _PBKDF2HMAC: https://cryptography.io/en/latest/hazmat/primitives/key-derivation-functions/#pbkdf2
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Including the salt in the output makes it possible to use a random
|
||||||
|
# salt value, which in turn ensures the encrypted output is guaranteed
|
||||||
|
# to be fully random regardless of password reuse or message
|
||||||
|
# repetition.
|
||||||
|
salt = secrets.token_bytes(16) # randomly generated salt
|
||||||
|
|
||||||
|
# Including the iteration count ensures that you can adjust
|
||||||
|
# for CPU performance increases over time without losing the ability to
|
||||||
|
# decrypt older messages.
|
||||||
|
iterations = int(self.hmac_iterations)
|
||||||
|
|
||||||
|
key = self.derive_key(self.cfg.password, salt, iterations)
|
||||||
|
crypted_msg = Fernet(key).encrypt(message)
|
||||||
|
|
||||||
|
# Put salt and iteration count on the beginning of the binary
|
||||||
|
token = b"%b%b%b" % (salt, iterations.to_bytes(4, "big"), urlsafe_b64encode(crypted_msg))
|
||||||
|
return urlsafe_b64encode(token)
|
||||||
|
|
||||||
|
def decrypt(self, token: bytes) -> bytes:
|
||||||
|
token = urlsafe_b64decode(token)
|
||||||
|
|
||||||
|
# Strip salt and iteration count from the beginning of the binary
|
||||||
|
salt = token[:16]
|
||||||
|
iterations = int.from_bytes(token[16:20], "big")
|
||||||
|
|
||||||
|
key = self.derive_key(self.cfg.password, salt, iterations)
|
||||||
|
crypted_msg = urlsafe_b64decode(token[20:])
|
||||||
|
|
||||||
|
message = Fernet(key).decrypt(crypted_msg)
|
||||||
|
return message
|
||||||
|
|
||||||
|
def secret_hash(self, name: str | bytes) -> str:
|
||||||
|
"""Creates a hash of the argument ``name``. The hash value is formed
|
||||||
|
from the ``name`` combined with the :py:obj:`password
|
||||||
|
<ExpireCacheCfg.password>`. Can be used, for example, to make the
|
||||||
|
``key`` stored in the DB unreadable for third parties."""
|
||||||
|
|
||||||
|
if isinstance(name, str):
|
||||||
|
name = bytes(name, encoding='utf-8')
|
||||||
|
m = hmac.new(name + self.cfg.password, digestmod='sha256')
|
||||||
|
return m.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
||||||
|
"""Cache that manages key/value pairs in a SQLite DB. The DB model in the
|
||||||
|
SQLite DB is implemented in abstract class :py:obj:`SQLiteAppl
|
||||||
|
<searx.sqlitedb.SQLiteAppl>`.
|
||||||
|
|
||||||
|
The following configurations are required / supported:
|
||||||
|
|
||||||
|
- :py:obj:`ExpireCacheCfg.db_url`
|
||||||
|
- :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`
|
||||||
|
- :py:obj:`ExpireCacheCfg.MAINTENANCE_PERIOD`
|
||||||
|
- :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE`
|
||||||
|
- :py:obj:`ExpireCacheCfg.ENCRYPT_VALUE`
|
||||||
|
"""
|
||||||
|
|
||||||
|
DB_SCHEMA = 1
|
||||||
|
|
||||||
|
# The key/value tables will be created on demand by self.create_table
|
||||||
|
DDL_CREATE_TABLES = {}
|
||||||
|
|
||||||
|
CACHE_TABLE_PREFIX = "CACHE-TABLE-"
|
||||||
|
|
||||||
|
def __init__(self, cfg: ExpireCacheCfg):
|
||||||
|
"""An instance of the SQLite expire cache is build up from a
|
||||||
|
:py:obj:`config <ExpireCacheCfg>`."""
|
||||||
|
|
||||||
|
self.cfg = cfg
|
||||||
|
if cfg.db_url == ":memory:":
|
||||||
|
log.critical("don't use SQLite DB in :memory: in production!!")
|
||||||
|
super().__init__(cfg.db_url)
|
||||||
|
|
||||||
|
def init(self, conn: sqlite3.Connection) -> bool:
|
||||||
|
ret_val = super().init(conn)
|
||||||
|
if not ret_val:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.cfg.ENCRYPT_VALUE:
|
||||||
|
new = hashlib.sha256(self.cfg.password).hexdigest()
|
||||||
|
old = self.properties(self.crypt_hash_property)
|
||||||
|
if old != new:
|
||||||
|
if old is not None:
|
||||||
|
log.warning("[%s] crypt token changed: drop all cache tables", self.cfg.name)
|
||||||
|
self.maintenance(force=True, drop_crypted=True)
|
||||||
|
self.properties.set(self.crypt_hash_property, new)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def maintenance(self, force: bool = False, drop_crypted: bool = False) -> bool:
|
||||||
|
|
||||||
|
if not force and int(time.time()) < self.next_maintenance_time:
|
||||||
|
# log.debug("no maintenance required yet, next maintenance interval is in the future")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Prevent parallel DB maintenance cycles from other DB connections
|
||||||
|
# (e.g. in multi thread or process environments).
|
||||||
|
self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
|
||||||
|
|
||||||
|
if drop_crypted:
|
||||||
|
self.truncate_tables(self.table_names)
|
||||||
|
return True
|
||||||
|
|
||||||
|
# drop items by expire time stamp ..
|
||||||
|
expire = int(time.time())
|
||||||
|
|
||||||
|
with self.connect() as conn:
|
||||||
|
for table in self.table_names:
|
||||||
|
res = conn.execute(f"DELETE FROM {table} WHERE expire < ?", (expire,))
|
||||||
|
log.debug("deleted %s keys from table %s (expire date reached)", res.rowcount, table)
|
||||||
|
|
||||||
|
# Vacuuming the WALs
|
||||||
|
# https://www.theunterminatedstring.com/sqlite-vacuuming/
|
||||||
|
|
||||||
|
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def create_table(self, table: str) -> bool:
|
||||||
|
"""Create DB ``table`` if it has not yet been created, no recreates are
|
||||||
|
initiated if the table already exists.
|
||||||
|
"""
|
||||||
|
if table in self.table_names:
|
||||||
|
# log.debug("key/value table %s exists in DB (no need to recreate)", table)
|
||||||
|
return False
|
||||||
|
|
||||||
|
log.info("key/value table '%s' NOT exists in DB -> create DB table ..", table)
|
||||||
|
sql_table = "\n".join(
|
||||||
|
[
|
||||||
|
f"CREATE TABLE IF NOT EXISTS {table} (",
|
||||||
|
" key TEXT,",
|
||||||
|
" value BLOB,",
|
||||||
|
f" expire INTEGER DEFAULT (strftime('%s', 'now') + {self.cfg.MAXHOLD_TIME}),",
|
||||||
|
"PRIMARY KEY (key))",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
sql_index = f"CREATE INDEX IF NOT EXISTS index_expire_{table} ON {table}(expire);"
|
||||||
|
with self.connect() as conn:
|
||||||
|
conn.execute(sql_table)
|
||||||
|
conn.execute(sql_index)
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
self.properties.set(f"{self.CACHE_TABLE_PREFIX}-{table}", table)
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def table_names(self) -> list[str]:
|
||||||
|
"""List of key/value tables already created in the DB."""
|
||||||
|
sql = f"SELECT value FROM properties WHERE name LIKE '{self.CACHE_TABLE_PREFIX}%%'"
|
||||||
|
rows = self.DB.execute(sql).fetchall() or []
|
||||||
|
return [r[0] for r in rows]
|
||||||
|
|
||||||
|
def truncate_tables(self, table_names: list[str]):
|
||||||
|
log.debug("truncate table: %s", ",".join(table_names))
|
||||||
|
with self.connect() as conn:
|
||||||
|
for table in table_names:
|
||||||
|
conn.execute(f"DELETE FROM {table}")
|
||||||
|
conn.close()
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def next_maintenance_time(self) -> int:
|
||||||
|
"""Returns (unix epoch) time of the next maintenance."""
|
||||||
|
|
||||||
|
return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE", int(time.time()))
|
||||||
|
|
||||||
|
# implement ABC methods of ExpireCache
|
||||||
|
|
||||||
|
def set(self, key: str, value: typing.Any, expire: int | None, table: str | None = None) -> bool:
|
||||||
|
"""Set key/value in ``table``. If expire is unset the default is taken
|
||||||
|
from :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`. If ``table`` argument is
|
||||||
|
``None`` (the default), a table name is generated from the
|
||||||
|
:py:obj:`ExpireCacheCfg.name`. If DB ``table`` does not exists, it will be
|
||||||
|
created (on demand) by :py:obj:`self.create_table
|
||||||
|
<ExpireCacheSQLite.create_table>`.
|
||||||
|
"""
|
||||||
|
self.maintenance()
|
||||||
|
|
||||||
|
value = self.serialize(value=value)
|
||||||
|
if len(value) > self.cfg.MAX_VALUE_LEN:
|
||||||
|
log.warning("ExpireCache.set(): %s.key='%s' - value too big to cache (len: %s) ", table, value, len(value))
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not expire:
|
||||||
|
expire = self.cfg.MAXHOLD_TIME
|
||||||
|
expire = int(time.time()) + expire
|
||||||
|
|
||||||
|
table_name = table
|
||||||
|
if not table_name:
|
||||||
|
table_name = self.normalize_name(self.cfg.name)
|
||||||
|
self.create_table(table_name)
|
||||||
|
|
||||||
|
sql = (
|
||||||
|
f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)"
|
||||||
|
f" ON CONFLICT DO "
|
||||||
|
f"UPDATE SET value=?, expire=?"
|
||||||
|
)
|
||||||
|
|
||||||
|
if table:
|
||||||
|
with self.DB:
|
||||||
|
self.DB.execute(sql, (key, value, expire, value, expire))
|
||||||
|
else:
|
||||||
|
with self.connect() as conn:
|
||||||
|
conn.execute(sql, (key, value, expire, value, expire))
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get(self, key: str, default=None, table: str | None = None) -> typing.Any:
|
||||||
|
"""Get value of ``key`` from ``table``. If ``table`` argument is
|
||||||
|
``None`` (the default), a table name is generated from the
|
||||||
|
:py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in table), the
|
||||||
|
``default`` value is returned.
|
||||||
|
"""
|
||||||
|
self.maintenance()
|
||||||
|
|
||||||
|
if not table:
|
||||||
|
table = self.normalize_name(self.cfg.name)
|
||||||
|
|
||||||
|
if table not in self.table_names:
|
||||||
|
return default
|
||||||
|
|
||||||
|
sql = f"SELECT value FROM {table} WHERE key = ?"
|
||||||
|
row = self.DB.execute(sql, (key,)).fetchone()
|
||||||
|
if row is None:
|
||||||
|
return default
|
||||||
|
|
||||||
|
return self.deserialize(row[0])
|
||||||
|
|
||||||
|
def state(self) -> ExpireCacheStats:
|
||||||
|
cached_items = {}
|
||||||
|
for table in self.table_names:
|
||||||
|
cached_items[table] = []
|
||||||
|
for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
|
||||||
|
cached_items[table].append((row[0], self.deserialize(row[1]), row[2]))
|
||||||
|
return ExpireCacheStats(cached_items=cached_items)
|
@ -1,6 +1,16 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Implementations of the framework for the SearXNG engines.
|
"""Implementations of the framework for the SearXNG engines.
|
||||||
|
|
||||||
|
- :py:obj:`searx.enginelib.EngineCache`
|
||||||
|
- :py:obj:`searx.enginelib.Engine`
|
||||||
|
- :py:obj:`searx.enginelib.traits`
|
||||||
|
|
||||||
|
There is a command line for developer purposes and for deeper analysis. Here is
|
||||||
|
an example in which the command line is called in the development environment::
|
||||||
|
|
||||||
|
$ ./manage pyenv.cmd bash --norc --noprofile
|
||||||
|
(py3) python -m searx.enginelib --help
|
||||||
|
|
||||||
.. hint::
|
.. hint::
|
||||||
|
|
||||||
The long term goal is to modularize all implementations of the engine
|
The long term goal is to modularize all implementations of the engine
|
||||||
@ -9,16 +19,158 @@
|
|||||||
- move implementations of the :ref:`searx.engines loader` to a new module in
|
- move implementations of the :ref:`searx.engines loader` to a new module in
|
||||||
the :py:obj:`searx.enginelib` namespace.
|
the :py:obj:`searx.enginelib` namespace.
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import List, Callable, TYPE_CHECKING
|
|
||||||
|
__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
|
||||||
|
|
||||||
|
from typing import List, Callable, TYPE_CHECKING, Any
|
||||||
|
import string
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from ..cache import ExpireCache, ExpireCacheCfg
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from searx.enginelib import traits
|
from searx.enginelib import traits
|
||||||
|
|
||||||
|
|
||||||
|
ENGINES_CACHE = ExpireCache.build_cache(
|
||||||
|
ExpireCacheCfg(
|
||||||
|
name="ENGINES_CACHE",
|
||||||
|
MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
|
||||||
|
MAINTENANCE_PERIOD=60 * 60, # 2h
|
||||||
|
)
|
||||||
|
)
|
||||||
|
"""Global :py:obj:`searx.cache.ExpireCacheSQLite` instance where the cached
|
||||||
|
values from all engines are stored. The `MAXHOLD_TIME` is 7 days and the
|
||||||
|
`MAINTENANCE_PERIOD` is set to two hours."""
|
||||||
|
|
||||||
|
app = typer.Typer()
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def state():
|
||||||
|
"""Show state for the caches of the engines."""
|
||||||
|
|
||||||
|
title = "cache tables and key/values"
|
||||||
|
print(title)
|
||||||
|
print("=" * len(title))
|
||||||
|
print(ENGINES_CACHE.state().report())
|
||||||
|
print()
|
||||||
|
title = f"properties of {ENGINES_CACHE.cfg.name}"
|
||||||
|
print(title)
|
||||||
|
print("=" * len(title))
|
||||||
|
print(str(ENGINES_CACHE.properties)) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def maintenance(force: bool = True):
|
||||||
|
"""Carry out maintenance on cache of the engines."""
|
||||||
|
ENGINES_CACHE.maintenance(force=force)
|
||||||
|
|
||||||
|
|
||||||
|
class EngineCache:
|
||||||
|
"""Persistent (SQLite) key/value cache that deletes its values again after
|
||||||
|
``expire`` seconds (default/max: :py:obj:`MAXHOLD_TIME
|
||||||
|
<searx.cache.ExpireCacheCfg.MAXHOLD_TIME>`). This class is a wrapper around
|
||||||
|
:py:obj:`ENGINES_CACHE` (:py:obj:`ExpireCacheSQLite
|
||||||
|
<searx.cache.ExpireCacheSQLite>`).
|
||||||
|
|
||||||
|
In the :origin:`searx/engines/demo_offline.py` engine you can find an
|
||||||
|
exemplary implementation of such a cache other exaples are implemeted
|
||||||
|
in:
|
||||||
|
|
||||||
|
- :origin:`searx/engines/radio_browser.py`
|
||||||
|
- :origin:`searx/engines/soundcloud.py`
|
||||||
|
- :origin:`searx/engines/startpage.py`
|
||||||
|
|
||||||
|
.. code: python
|
||||||
|
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
|
CACHE: EngineCache
|
||||||
|
|
||||||
|
def init(engine_settings):
|
||||||
|
global CACHE
|
||||||
|
CACHE = EngineCache(engine_settings["name"])
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
token = CACHE.get(key="token")
|
||||||
|
if token is None:
|
||||||
|
token = get_token()
|
||||||
|
# cache token of this engine for 1h
|
||||||
|
CACHE.set(key="token", value=token, expire=3600)
|
||||||
|
...
|
||||||
|
|
||||||
|
For introspection of the DB, jump into developer environment and run command to
|
||||||
|
show cache state::
|
||||||
|
|
||||||
|
$ ./manage pyenv.cmd bash --norc --noprofile
|
||||||
|
(py3) python -m searx.enginelib cache state
|
||||||
|
|
||||||
|
cache tables and key/values
|
||||||
|
===========================
|
||||||
|
[demo_offline ] 2025-04-22 11:32:50 count --> (int) 4
|
||||||
|
[startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20
|
||||||
|
[duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325
|
||||||
|
[duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451
|
||||||
|
[radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...]
|
||||||
|
[soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp
|
||||||
|
[wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7
|
||||||
|
number of tables: 6
|
||||||
|
number of key/value pairs: 7
|
||||||
|
|
||||||
|
In the "cache tables and key/values" section, the table name (engine name) is at
|
||||||
|
first position on the second there is the calculated expire date and on the
|
||||||
|
third and fourth position the key/value is shown.
|
||||||
|
|
||||||
|
About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore
|
||||||
|
the key is a hash value of the query term (to not to store the raw query term).
|
||||||
|
|
||||||
|
In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl /
|
||||||
|
ExpireCache and their last modification date are shown::
|
||||||
|
|
||||||
|
properties of ENGINES_CACHE
|
||||||
|
===========================
|
||||||
|
[last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1
|
||||||
|
[last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE :
|
||||||
|
[last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe...
|
||||||
|
[last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline
|
||||||
|
[last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage
|
||||||
|
[last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo
|
||||||
|
[last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser
|
||||||
|
[last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud
|
||||||
|
[last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha
|
||||||
|
|
||||||
|
These properties provide information about the state of the ExpireCache and
|
||||||
|
control the behavior. For example, the maintenance intervals are controlled by
|
||||||
|
the last modification date of the LAST_MAINTENANCE property and the hash value
|
||||||
|
of the password can be used to detect whether the password has been changed (in
|
||||||
|
this case the DB entries can no longer be decrypted and the entire cache must be
|
||||||
|
discarded).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, engine_name: str, expire: int | None = None):
|
||||||
|
self.expire = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
|
||||||
|
_valid = "-_." + string.ascii_letters + string.digits
|
||||||
|
self.table_name = "".join([c if c in _valid else "_" for c in engine_name])
|
||||||
|
|
||||||
|
def set(self, key: str, value: Any, expire: int | None = None) -> bool:
|
||||||
|
return ENGINES_CACHE.set(
|
||||||
|
key=key,
|
||||||
|
value=value,
|
||||||
|
expire=expire or self.expire,
|
||||||
|
table=self.table_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get(self, key: str, default=None) -> Any:
|
||||||
|
return ENGINES_CACHE.get(key, default=default, table=self.table_name)
|
||||||
|
|
||||||
|
def secret_hash(self, name: str | bytes) -> str:
|
||||||
|
return ENGINES_CACHE.secret_hash(name=name)
|
||||||
|
|
||||||
|
|
||||||
class Engine: # pylint: disable=too-few-public-methods
|
class Engine: # pylint: disable=too-few-public-methods
|
||||||
"""Class of engine instances build from YAML settings.
|
"""Class of engine instances build from YAML settings.
|
||||||
|
|
||||||
|
21
searx/enginelib/__main__.py
Normal file
21
searx/enginelib/__main__.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
"""Implementation of a command line for development purposes. To start a
|
||||||
|
command, switch to the environment and run library module as a script::
|
||||||
|
|
||||||
|
$ ./manage pyenv.cmd bash --norc --noprofile
|
||||||
|
(py3) python -m searx.enginelib --help
|
||||||
|
|
||||||
|
The following commands can be used for maintenance and introspection
|
||||||
|
(development) of the engine cache::
|
||||||
|
|
||||||
|
(py3) python -m searx.enginelib cache state
|
||||||
|
(py3) python -m searx.enginelib cache maintenance
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from .. import enginelib
|
||||||
|
|
||||||
|
app = typer.Typer()
|
||||||
|
app.add_typer(enginelib.app, name="cache", help="Commands related to the cache of the engines.")
|
||||||
|
app()
|
Loading…
x
Reference in New Issue
Block a user