searxng/searx/plugins/_core.py
Markus Heiser 50f92779bd [refactor] migrate plugins from "module" to class SXNGPlugin
This patch brings two major changes:

- ``Result.filter_urls(..)`` to pass a filter function for URL fields
- The ``enabled_plugins:`` section in SearXNG's settings do no longer exists.

To understand plugin development compile documentation:

    $ make docs.clean docs.live

and read http://0.0.0.0:8000/dev/plugins/development.html

There is no longer a distinction between built-in and external plugin, all
plugins are registered via the settings in the ``plugins:`` section.

In SearXNG, plugins can be registered via a fully qualified class name.  A
configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate
it by default / *opt-in* or *opt-out* from user's point of view.

built-in plugins
================

The built-in plugins are all located in the namespace `searx.plugins`.

.. code:: yaml

    plugins:

      searx.plugins.calculator.SXNGPlugin:
        active: true

      searx.plugins.hash_plugin.SXNGPlugin:
        active: true

      searx.plugins.self_info.SXNGPlugin:
        active: true

      searx.plugins.tracker_url_remover.SXNGPlugin:
        active: true

      searx.plugins.unit_converter.SXNGPlugin:
        active: true

      searx.plugins.ahmia_filter.SXNGPlugin:
        active: true

      searx.plugins.hostnames.SXNGPlugin:
        active: true

      searx.plugins.oa_doi_rewrite.SXNGPlugin:
        active: false

      searx.plugins.tor_check.SXNGPlugin:
        active: false

external plugins
================

SearXNG supports *external plugins* / there is no need to install one, SearXNG
runs out of the box.

- Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/

To get a developer installation in a SearXNG developer environment:

.. code:: sh

   $ git clone git@github.com:return42/tgwf-searx-plugins.git
   $ ./manage pyenv.cmd python -m \
         pip install -e tgwf-searx-plugins

To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to
the ``plugins:``:

.. code:: yaml

    plugins:
      # ...
      only_show_green_results.SXNGPlugin:
        active: false

Result.filter_urls(..)
======================

The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields.
In the following example, the filter function ``my_url_filter``:

.. code:: python

   def my_url_filter(result, field_name, url_src) -> bool | str:
       if "google" in url_src:
           return False              # remove URL field from result
       if "facebook" in url_src:
           new_url = url_src.replace("facebook", "fb-dummy")
           return new_url            # return modified URL
       return True                   # leave URL in field unchanged

is applied to all URL fields in the :py:obj:`Plugin.on_result` hook:

.. code:: python

   class MyUrlFilter(Plugin):
       ...
       def on_result(self, request, search, result) -> bool:
           result.filter_urls(my_url_filter)
           return True

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2025-03-29 10:16:43 +01:00

305 lines
10 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=too-few-public-methods,missing-module-docstring
from __future__ import annotations
__all__ = ["PluginInfo", "Plugin", "PluginCfg", "PluginStorage"]
import abc
import importlib
import inspect
import logging
import re
import typing
from dataclasses import dataclass, field
from searx.extended_types import SXNG_Request
from searx.result_types import Result
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
import flask
log: logging.Logger = logging.getLogger("searx.plugins")
@dataclass
class PluginInfo:
"""Object that holds informations about a *plugin*, these infos are shown to
the user in the Preferences menu.
To be able to translate the information into other languages, the text must
be written in English and translated with :py:obj:`flask_babel.gettext`.
"""
id: str
"""The ID-selector in HTML/CSS `#<id>`."""
name: str
"""Name of the *plugin*."""
description: str
"""Short description of the *answerer*."""
preference_section: typing.Literal["general", "ui", "privacy", "query"] | None = "general"
"""Section (tab/group) in the preferences where this plugin is shown to the
user.
The value ``query`` is reserved for plugins that are activated via a
*keyword* as part of a search query, see:
- :py:obj:`PluginInfo.examples`
- :py:obj:`Plugin.keywords`
Those plugins are shown in the preferences in tab *Special Queries*.
"""
examples: list[str] = field(default_factory=list)
"""List of short examples of the usage / of query terms."""
keywords: list[str] = field(default_factory=list)
"""See :py:obj:`Plugin.keywords`"""
ID_REGXP = re.compile("[a-z][a-z0-9].*")
class Plugin(abc.ABC):
"""Abstract base class of all Plugins."""
id: str = ""
"""The ID (suffix) in the HTML form."""
active: typing.ClassVar[bool]
"""Plugin is enabled/disabled by default (:py:obj:`PluginCfg.active`)."""
keywords: list[str] = []
"""Keywords in the search query that activate the plugin. The *keyword* is
the first word in a search query. If a plugin should be executed regardless
of the search query, the list of keywords should be empty (which is also the
default in the base class for Plugins)."""
log: logging.Logger
"""A logger object, is automatically initialized when calling the
constructor (if not already set in the subclass)."""
info: PluginInfo
"""Informations about the *plugin*, see :py:obj:`PluginInfo`."""
fqn: str = ""
def __init__(self, plg_cfg: PluginCfg) -> None:
super().__init__()
if not self.fqn:
self.fqn = self.__class__.__mro__[0].__module__
# names from the configuration
for n, v in plg_cfg.__dict__.items():
setattr(self, n, v)
# names that must be set by the plugin implementation
for attr in [
"id",
]:
if getattr(self, attr, None) is None:
raise NotImplementedError(f"plugin {self} is missing attribute {attr}")
if not ID_REGXP.match(self.id):
raise ValueError(f"plugin ID {self.id} contains invalid character (use lowercase ASCII)")
if not getattr(self, "log", None):
pkg_name = inspect.getmodule(self.__class__).__package__ # type: ignore
self.log = logging.getLogger(f"{pkg_name}.{self.id}")
def __hash__(self) -> int:
"""The hash value is used in :py:obj:`set`, for example, when an object
is added to the set. The hash value is also used in other contexts,
e.g. when checking for equality to identify identical plugins from
different sources (name collisions)."""
return id(self)
def __eq__(self, other):
"""py:obj:`Plugin` objects are equal if the hash values of the two
objects are equal."""
return hash(self) == hash(other)
def init(self, app: "flask.Flask") -> bool: # pylint: disable=unused-argument
"""Initialization of the plugin, the return value decides whether this
plugin is active or not. Initialization only takes place once, at the
time the WEB application is set up. The base methode always returns
``True``, the methode can be overwritten in the inheritances,
- ``True`` plugin is active
- ``False`` plugin is inactive
"""
return True
# pylint: disable=unused-argument
def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
"""Runs BEFORE the search request and returns a boolean:
- ``True`` to continue the search
- ``False`` to stop the search
"""
return True
def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
"""Runs for each result of each engine and returns a boolean:
- ``True`` to keep the result
- ``False`` to remove the result from the result list
The ``result`` can be modified to the needs.
.. hint::
If :py:obj:`Result.url <searx.result_types._base.Result.url>` is modified,
:py:obj:`Result.parsed_url <searx.result_types._base.Result.parsed_url>` must
be changed accordingly:
.. code:: python
result["parsed_url"] = urlparse(result["url"])
"""
return True
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
"""Runs AFTER the search request. Can return a list of
:py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
final result list."""
return
@dataclass
class PluginCfg:
"""Settings of a plugin.
.. code:: yaml
mypackage.mymodule.MyPlugin:
active: true
"""
active: bool = False
"""Plugin is active by default and the user can *opt-out* in the preferences."""
class PluginStorage:
"""A storage for managing the *plugins* of SearXNG."""
plugin_list: set[Plugin]
"""The list of :py:obj:`Plugins` in this storage."""
def __init__(self):
self.plugin_list = set()
def __iter__(self):
yield from self.plugin_list
def __len__(self):
return len(self.plugin_list)
@property
def info(self) -> list[PluginInfo]:
return [p.info for p in self.plugin_list]
def load_settings(self, cfg: dict[str, dict]):
"""Load plugins configured in SearXNG's settings :ref:`settings
plugins`."""
for fqn, plg_settings in cfg.items():
cls = None
mod_name, cls_name = fqn.rsplit('.', 1)
try:
mod = importlib.import_module(mod_name)
cls = getattr(mod, cls_name, None)
except Exception as exc: # pylint: disable=broad-exception-caught
log.exception(exc)
if cls is None:
msg = f"plugin {fqn} is not implemented"
raise ValueError(msg)
plg = cls(PluginCfg(**plg_settings))
self.register(plg)
def register(self, plugin: Plugin):
"""Register a :py:obj:`Plugin`. In case of name collision (if two
plugins have same ID) a :py:obj:`KeyError` exception is raised.
"""
if plugin in [p.id for p in self.plugin_list]:
msg = f"name collision '{plugin.id}'"
plugin.log.critical(msg)
raise KeyError(msg)
self.plugin_list.add(plugin)
plugin.log.debug("plugin has been loaded")
def init(self, app: "flask.Flask") -> None:
"""Calls the method :py:obj:`Plugin.init` of each plugin in this
storage. Depending on its return value, the plugin is removed from
*this* storage or not."""
for plg in self.plugin_list.copy():
if not plg.init(app):
self.plugin_list.remove(plg)
def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
ret = True
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
try:
ret = bool(plugin.pre_search(request=request, search=search))
except Exception: # pylint: disable=broad-except
plugin.log.exception("Exception while calling pre_search")
continue
if not ret:
# skip this search on the first False from a plugin
break
return ret
def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
ret = True
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
try:
ret = bool(plugin.on_result(request=request, search=search, result=result))
except Exception: # pylint: disable=broad-except
plugin.log.exception("Exception while calling on_result")
continue
if not ret:
# ignore this result item on the first False from a plugin
break
return ret
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None:
"""Extend :py:obj:`search.result_container
<searx.results.ResultContainer`> with result items from plugins listed
in :py:obj:`search.user_plugins <SearchWithPlugins.user_plugins>`.
"""
keyword = None
for keyword in search.search_query.query.split():
if keyword:
break
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
if plugin.keywords:
# plugin with keywords: skip plugin if no keyword match
if keyword and keyword not in plugin.keywords:
continue
try:
results = plugin.post_search(request=request, search=search) or []
except Exception: # pylint: disable=broad-except
plugin.log.exception("Exception while calling post_search")
continue
# In case of *plugins* prefix ``plugin:`` is set, see searx.result_types.Result
search.result_container.extend(f"plugin: {plugin.id}", results)