[fix] ResourceWarning: unclosed database in sqlite3

Reported:

- https://github.com/inetol-infrastructure/searxng-container/issues/5

Related:

- https://github.com/searxng/searxng/issues/4405#issuecomment-2692352352

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-04-16 18:59:06 +02:00 committed by Markus Heiser
parent 590b211652
commit 4a594f1b53
4 changed files with 200 additions and 73 deletions

View File

@ -29,7 +29,7 @@ Programming Interface
parameter. This function can be omitted, if there is no need to setup anything parameter. This function can be omitted, if there is no need to setup anything
in advance. in advance.
:py:func:`search(query, params) <searx.engines.demo_offline.searc>` :py:func:`search(query, params) <searx.engines.demo_offline.search>`
Each offline engine has a function named ``search``. This function is Each offline engine has a function named ``search``. This function is
responsible to perform a search and return the results in a presentable responsible to perform a search and return the results in a presentable
format. (Where *presentable* means presentable by the selected result format. (Where *presentable* means presentable by the selected result

View File

@ -1,5 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Implementations for providing the favicons in SearXNG""" """Implementations for providing the favicons in SearXNG.
There is a command line for developer purposes and for deeper analysis. Here is
an example in which the command line is called in the development environment::
$ ./manage pyenv.cmd bash --norc --noprofile
(py3) python -m searx.favicons --help
"""
from __future__ import annotations from __future__ import annotations

View File

@ -236,6 +236,12 @@ class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
model in the SQLite DB is implemented using the abstract class model in the SQLite DB is implemented using the abstract class
:py:obj:`sqlitedb.SQLiteAppl`. :py:obj:`sqlitedb.SQLiteAppl`.
For introspection of the DB, jump into developer environment and run command
to show cache state::
$ ./manage pyenv.cmd bash --norc --noprofile
(py3) python -m searx.favicons cache state
The following configurations are required / supported: The following configurations are required / supported:
- :py:obj:`FaviconCacheConfig.db_url` - :py:obj:`FaviconCacheConfig.db_url`
@ -357,6 +363,10 @@ CREATE TABLE IF NOT EXISTS blob_map (
if sha256 != FALLBACK_ICON: if sha256 != FALLBACK_ICON:
conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data)) conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data))
conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority)) conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority))
# hint: the with context of the connection object closes the transaction
# but not the DB connection. The connection has to be closed by the
# caller of self.connect()!
conn.close()
return True return True
@ -376,7 +386,8 @@ CREATE TABLE IF NOT EXISTS blob_map (
return return
self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property! self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
# do maintenance tasks # Do maintenance tasks. This can be take a little more time, to avoid
# DB locks, etablish a new DB connecton.
with self.connect() as conn: with self.connect() as conn:
@ -407,6 +418,12 @@ CREATE TABLE IF NOT EXISTS blob_map (
conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list)) conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list))
logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c) logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c)
# Vacuuming the WALs
# https://www.theunterminatedstring.com/sqlite-vacuuming/
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
conn.close()
def _query_val(self, sql, default=None): def _query_val(self, sql, default=None):
val = self.DB.execute(sql).fetchone() val = self.DB.execute(sql).fetchone()
if val is not None: if val is not None:

View File

@ -7,20 +7,83 @@
:py:obj:`SQLiteProperties`: :py:obj:`SQLiteProperties`:
Class to manage properties stored in a database. Class to manage properties stored in a database.
---- Examplarical implementations based on :py:obj:`SQLiteAppl`:
:py:obj:`searx.cache.ExpireCacheSQLite` :
Cache that manages key/value pairs in a SQLite DB, in which the key/value
pairs are deleted after an "expire" time. This type of cache is used, for
example, for the engines, see :py:obj:`searx.enginelib.EngineCache`.
:py:obj:`searx.favicons.cache.FaviconCacheSQLite` :
Favicon cache that manages the favicon BLOBs in a SQLite DB.
----
""" """
from __future__ import annotations from __future__ import annotations
import sys import abc
import datetime
import re import re
import sqlite3 import sqlite3
import sys
import threading import threading
import abc import uuid
from searx import logger from searx import logger
logger = logger.getChild('sqlitedb') logger = logger.getChild("sqlitedb")
THREAD_LOCAL = threading.local()
class DBSession:
"""A *thead-local* DB session"""
@classmethod
def get_connect(cls, app: SQLiteAppl) -> sqlite3.Connection:
"""Returns a thread local DB connection. The connection is only
established once per thread.
"""
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
THREAD_LOCAL.DBSession_map = {}
session = THREAD_LOCAL.DBSession_map.get(app.db_url)
if session is None:
session = cls(app)
return session.conn
def __init__(self, app: SQLiteAppl):
self.uuid = uuid.uuid4()
self.app = app
self._conn = None
# self.__del__ will be called, when thread ends
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
THREAD_LOCAL.DBSession_map = {}
THREAD_LOCAL.DBSession_map[self.app.db_url] = self
@property
def conn(self) -> sqlite3.Connection:
msg = f"[{threading.current_thread().ident}] DBSession: " f"{self.app.__class__.__name__}({self.app.db_url})"
if self._conn is None:
self._conn = self.app.connect()
logger.debug("%s --> created new connection", msg)
# else:
# logger.debug("%s --> already connected", msg)
return self._conn
def __del__(self):
try:
if self._conn is not None:
# HINT: Don't use Python's logging facility in a destructor, it
# will produce error reports when python aborts the process or
# thread, because at this point objects that the logging module
# needs, do not exist anymore.
# msg = f"DBSession: close [{self.uuid}] {self.app.__class__.__name__}({self.app.db_url})"
# logger.debug(msg)
self._conn.close()
except Exception: # pylint: disable=broad-exception-caught
pass
class SQLiteAppl(abc.ABC): class SQLiteAppl(abc.ABC):
@ -51,13 +114,18 @@ class SQLiteAppl(abc.ABC):
""" """
SQLITE_JOURNAL_MODE = "WAL" SQLITE_JOURNAL_MODE = "WAL"
"""``SQLiteAppl`` applications are optimzed for WAL_ mode, its not recommend
to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`).
.. _WAL: https://sqlite.org/wal.html
"""
SQLITE_CONNECT_ARGS = { SQLITE_CONNECT_ARGS = {
# "timeout": 5.0, # "timeout": 5.0,
# "detect_types": 0, # "detect_types": 0,
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"), "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
"cached_statements": 0, # https://github.com/python/cpython/issues/118172 "cached_statements": 0, # https://github.com/python/cpython/issues/118172
# "uri": False, # "uri": False,
"autocommit": False, "isolation_level": None,
} # fmt:skip } # fmt:skip
"""Connection arguments (:py:obj:`sqlite3.connect`) """Connection arguments (:py:obj:`sqlite3.connect`)
@ -66,10 +134,6 @@ class SQLiteAppl(abc.ABC):
``serialized``. The check is more of a hindrance in this case because it ``serialized``. The check is more of a hindrance in this case because it
would prevent a DB connector from being used in multiple threads. would prevent a DB connector from being used in multiple threads.
``autocommit``:
Is disabled by default. Note: autocommit option has been added in Python
3.12.
``cached_statements``: ``cached_statements``:
Is set to ``0`` by default. Note: Python 3.12+ fetch result are not Is set to ``0`` by default. Note: Python 3.12+ fetch result are not
consistent in multi-threading application and causing an API misuse error. consistent in multi-threading application and causing an API misuse error.
@ -89,9 +153,17 @@ class SQLiteAppl(abc.ABC):
self.db_url = db_url self.db_url = db_url
self.properties = SQLiteProperties(db_url) self.properties = SQLiteProperties(db_url)
self.thread_local = threading.local()
self._init_done = False self._init_done = False
self._compatibility() self._compatibility()
# atexit.register(self.tear_down)
# def tear_down(self):
# """:ref:`Vacuuming the WALs` upon normal interpreter termination
# (:py:obj:`atexit.register`).
# .. _SQLite: Vacuuming the WALs: https://www.theunterminatedstring.com/sqlite-vacuuming/
# """
# self.DB.execute("PRAGMA wal_checkpoint(TRUNCATE)")
def _compatibility(self): def _compatibility(self):
@ -113,19 +185,31 @@ class SQLiteAppl(abc.ABC):
"SQLite runtime library version %s is not supported (require >= 3.35)", sqlite3.sqlite_version "SQLite runtime library version %s is not supported (require >= 3.35)", sqlite3.sqlite_version
) )
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
self.register_functions(conn)
return conn
def connect(self) -> sqlite3.Connection: def connect(self) -> sqlite3.Connection:
"""Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`). If not """Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`). If not
already done, the DB schema is set up already done, the DB schema is set up. The caller must take care of
closing the resource. Alternatively, :py:obj:`SQLiteAppl.DB` can also
be used (the resource behind `self.DB` is automatically closed when the
process or thread is terminated).
""" """
if sys.version_info < (3, 12): if sys.version_info < (3, 12):
# Prior Python 3.12 there is no "autocommit" option # Prior Python 3.12 there is no "autocommit" option
self.SQLITE_CONNECT_ARGS.pop("autocommit", None) self.SQLITE_CONNECT_ARGS.pop("autocommit", None)
self.init() msg = (
logger.debug("%s: connect to DB: %s // %s", self.__class__.__name__, self.db_url, self.SQLITE_CONNECT_ARGS) f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})"
conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore f" {self.SQLITE_CONNECT_ARGS} // {self.SQLITE_JOURNAL_MODE}"
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}") )
self.register_functions(conn) logger.debug(msg)
with self._connect() as conn:
self.init(conn)
return conn return conn
def register_functions(self, conn): def register_functions(self, conn):
@ -150,7 +234,7 @@ class SQLiteAppl(abc.ABC):
.. _re.search: https://docs.python.org/3/library/re.html#re.search .. _re.search: https://docs.python.org/3/library/re.html#re.search
""" """
conn.create_function('regexp', 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True) conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True)
@property @property
def DB(self) -> sqlite3.Connection: def DB(self) -> sqlite3.Connection:
@ -168,57 +252,66 @@ class SQLiteAppl(abc.ABC):
https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
""" """
if getattr(self.thread_local, 'DB', None) is None: conn = None
self.thread_local.DB = self.connect()
# Theoretically it is possible to reuse the DB cursor across threads as if self.SQLITE_THREADING_MODE == "serialized":
# of Python 3.12, in practice the threading of the cursor seems to me to # Theoretically it is possible to reuse the DB cursor across threads
# be so faulty that I prefer to establish one connection per thread # as of Python 3.12, in practice the threading of the cursor seems
# to me a little faulty that I prefer to establish one connection
self.thread_local.DB.commit() # per thread.
return self.thread_local.DB
# In "serialized" mode, SQLite can be safely used by multiple threads
# with no restriction.
#
# if self.SQLITE_THREADING_MODE != "serialized":
# if getattr(self.thread_local, 'DB', None) is None:
# self.thread_local.DB = self.connect()
# return self.thread_local.DB
# #
# may we can activate this code one day ..
# if self._DB is None: # if self._DB is None:
# self._DB = self.connect() # pylint: disable=attribute-defined-outside-init # self._DB = self.connect()
# return self._DB # conn = self._DB
conn = DBSession.get_connect(self)
else:
conn = DBSession.get_connect(self)
def init(self): # Since more than one instance of SQLiteAppl share the same DB
# connection, we need to make sure that each SQLiteAppl instance has run
# its init method at least once.
self.init(conn)
return conn
def init(self, conn: sqlite3.Connection) -> bool:
"""Initializes the DB schema and properties, is only executed once even """Initializes the DB schema and properties, is only executed once even
if called several times.""" if called several times.
If the initialization has not yet taken place, it is carried out and a
`True` is returned to the caller at the end. If the initialization has
already been carried out in the past, `False` is returned.
"""
if self._init_done: if self._init_done:
return return False
self._init_done = True self._init_done = True
logger.debug("init DB: %s", self.db_url) logger.debug("init DB: %s", self.db_url)
self.properties.init() self.properties.init(conn)
ver = self.properties("DB_SCHEMA") ver = self.properties("DB_SCHEMA")
if ver is None: if ver is None:
with self.properties.DB: with conn:
self.create_schema(self.properties.DB) self.create_schema(conn)
else: else:
ver = int(ver) ver = int(ver)
if ver != self.DB_SCHEMA: if ver != self.DB_SCHEMA:
raise sqlite3.DatabaseError("Expected DB schema v%s, DB schema is v%s" % (self.DB_SCHEMA, ver)) raise sqlite3.DatabaseError("Expected DB schema v%s, DB schema is v%s" % (self.DB_SCHEMA, ver))
logger.debug("DB_SCHEMA = %s", ver) logger.debug("DB_SCHEMA = %s", ver)
def create_schema(self, conn): return True
def create_schema(self, conn: sqlite3.Connection):
logger.debug("create schema ..") logger.debug("create schema ..")
self.properties.set("DB_SCHEMA", self.DB_SCHEMA)
self.properties.set("LAST_MAINTENANCE", "")
with conn: with conn:
for table_name, sql in self.DDL_CREATE_TABLES.items(): for table_name, sql in self.DDL_CREATE_TABLES.items():
conn.execute(sql) conn.execute(sql)
self.properties.set(f"Table {table_name} created", table_name) self.properties.set(f"Table {table_name} created", table_name)
self.properties.set("DB_SCHEMA", self.DB_SCHEMA)
self.properties.set("LAST_MAINTENANCE", "")
class SQLiteProperties(SQLiteAppl): class SQLiteProperties(SQLiteAppl):
@ -253,33 +346,32 @@ CREATE TABLE IF NOT EXISTS properties (
" ON CONFLICT(name) DO UPDATE" " ON CONFLICT(name) DO UPDATE"
" SET value=excluded.value, m_time=strftime('%s', 'now')" " SET value=excluded.value, m_time=strftime('%s', 'now')"
) )
SQL_DELETE = "DELETE FROM properties WHERE name = ?"
SQL_TABLE_EXISTS = ( SQL_TABLE_EXISTS = (
"SELECT name FROM sqlite_master" "SELECT name FROM sqlite_master"
" WHERE type='table' AND name='properties'" " WHERE type='table' AND name='properties'"
) # fmt:skip ) # fmt:skip
SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS) SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
SQLITE_CONNECT_ARGS["autocommit"] = True # This option has no effect before Python 3.12
def __init__(self, db_url: str): # pylint: disable=super-init-not-called def __init__(self, db_url: str): # pylint: disable=super-init-not-called
self.db_url = db_url self.db_url = db_url
self.thread_local = threading.local()
self._init_done = False self._init_done = False
self._compatibility() self._compatibility()
def init(self): def init(self, conn: sqlite3.Connection) -> bool:
"""Initializes DB schema of the properties in the DB.""" """Initializes DB schema of the properties in the DB."""
if self._init_done: if self._init_done:
return return False
self._init_done = True self._init_done = True
logger.debug("init properties of DB: %s", self.db_url) logger.debug("init properties of DB: %s", self.db_url)
with self.DB as conn:
res = conn.execute(self.SQL_TABLE_EXISTS) res = conn.execute(self.SQL_TABLE_EXISTS)
if res.fetchone() is None: # DB schema needs to be be created if res.fetchone() is None: # DB schema needs to be be created
self.create_schema(conn) self.create_schema(conn)
return True
def __call__(self, name, default=None): def __call__(self, name: str, default=None):
"""Returns the value of the property ``name`` or ``default`` if property """Returns the value of the property ``name`` or ``default`` if property
not exists in DB.""" not exists in DB."""
@ -288,36 +380,47 @@ CREATE TABLE IF NOT EXISTS properties (
return default return default
return res[0] return res[0]
def set(self, name, value): def set(self, name: str, value: str | int):
"""Set ``value`` of property ``name`` in DB. If property already """Set ``value`` of property ``name`` in DB. If property already
exists, update the ``m_time`` (and the value).""" exists, update the ``m_time`` (and the value)."""
with self.DB:
self.DB.execute(self.SQL_SET, (name, value)) self.DB.execute(self.SQL_SET, (name, value))
if sys.version_info <= (3, 12): def delete(self, name: str) -> int:
# Prior Python 3.12 there is no "autocommit" option / lets commit """Delete of property ``name`` from DB."""
# explicitely. with self.DB:
self.DB.commit() cur = self.DB.execute(self.SQL_DELETE, (name,))
return cur.rowcount
def row(self, name, default=None): def row(self, name: str, default=None):
"""Returns the DB row of property ``name`` or ``default`` if property """Returns the DB row of property ``name`` or ``default`` if property
not exists in DB.""" not exists in DB."""
cur = self.DB.cursor() res = self.DB.execute("SELECT * FROM properties WHERE name = ?", (name,))
cur.execute("SELECT * FROM properties WHERE name = ?", (name,)) row = res.fetchone()
res = cur.fetchone() if row is None:
if res is None:
return default return default
col_names = [column[0] for column in cur.description]
return dict(zip(col_names, res))
def m_time(self, name, default: int = 0) -> int: col_names = [column[0] for column in row.description]
return dict(zip(col_names, row))
def m_time(self, name: str, default: int = 0) -> int:
"""Last modification time of this property.""" """Last modification time of this property."""
res = self.DB.execute(self.SQL_M_TIME, (name,)).fetchone() res = self.DB.execute(self.SQL_M_TIME, (name,))
if res is None: row = res.fetchone()
if row is None:
return default return default
return int(res[0]) return int(row[0])
def create_schema(self, conn): def create_schema(self, conn):
with conn: with conn:
conn.execute(self.DDL_PROPERTIES) conn.execute(self.DDL_PROPERTIES)
def __str__(self) -> str:
lines = []
for row in self.DB.execute("SELECT name, value, m_time FROM properties"):
name, value, m_time = row
m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S")
lines.append(f"[last modified: {m_time}] {name:20s}: {value}")
return "\n".join(lines)