
This patch brings two major changes: - ``Result.filter_urls(..)`` to pass a filter function for URL fields - The ``enabled_plugins:`` section in SearXNG's settings do no longer exists. To understand plugin development compile documentation: $ make docs.clean docs.live and read http://0.0.0.0:8000/dev/plugins/development.html There is no longer a distinction between built-in and external plugin, all plugins are registered via the settings in the ``plugins:`` section. In SearXNG, plugins can be registered via a fully qualified class name. A configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate it by default / *opt-in* or *opt-out* from user's point of view. built-in plugins ================ The built-in plugins are all located in the namespace `searx.plugins`. .. code:: yaml plugins: searx.plugins.calculator.SXNGPlugin: active: true searx.plugins.hash_plugin.SXNGPlugin: active: true searx.plugins.self_info.SXNGPlugin: active: true searx.plugins.tracker_url_remover.SXNGPlugin: active: true searx.plugins.unit_converter.SXNGPlugin: active: true searx.plugins.ahmia_filter.SXNGPlugin: active: true searx.plugins.hostnames.SXNGPlugin: active: true searx.plugins.oa_doi_rewrite.SXNGPlugin: active: false searx.plugins.tor_check.SXNGPlugin: active: false external plugins ================ SearXNG supports *external plugins* / there is no need to install one, SearXNG runs out of the box. - Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/ To get a developer installation in a SearXNG developer environment: .. code:: sh $ git clone git@github.com:return42/tgwf-searx-plugins.git $ ./manage pyenv.cmd python -m \ pip install -e tgwf-searx-plugins To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to the ``plugins:``: .. code:: yaml plugins: # ... only_show_green_results.SXNGPlugin: active: false Result.filter_urls(..) ====================== The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields. In the following example, the filter function ``my_url_filter``: .. code:: python def my_url_filter(result, field_name, url_src) -> bool | str: if "google" in url_src: return False # remove URL field from result if "facebook" in url_src: new_url = url_src.replace("facebook", "fb-dummy") return new_url # return modified URL return True # leave URL in field unchanged is applied to all URL fields in the :py:obj:`Plugin.on_result` hook: .. code:: python class MyUrlFilter(Plugin): ... def on_result(self, request, search, result) -> bool: result.filter_urls(my_url_filter) return True Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
305 lines
10 KiB
Python
305 lines
10 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
# pylint: disable=too-few-public-methods,missing-module-docstring
|
|
|
|
from __future__ import annotations
|
|
|
|
__all__ = ["PluginInfo", "Plugin", "PluginCfg", "PluginStorage"]
|
|
|
|
import abc
|
|
import importlib
|
|
import inspect
|
|
import logging
|
|
import re
|
|
import typing
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
from searx.extended_types import SXNG_Request
|
|
from searx.result_types import Result
|
|
|
|
if typing.TYPE_CHECKING:
|
|
from searx.search import SearchWithPlugins
|
|
import flask
|
|
|
|
log: logging.Logger = logging.getLogger("searx.plugins")
|
|
|
|
|
|
@dataclass
|
|
class PluginInfo:
|
|
"""Object that holds informations about a *plugin*, these infos are shown to
|
|
the user in the Preferences menu.
|
|
|
|
To be able to translate the information into other languages, the text must
|
|
be written in English and translated with :py:obj:`flask_babel.gettext`.
|
|
"""
|
|
|
|
id: str
|
|
"""The ID-selector in HTML/CSS `#<id>`."""
|
|
|
|
name: str
|
|
"""Name of the *plugin*."""
|
|
|
|
description: str
|
|
"""Short description of the *answerer*."""
|
|
|
|
preference_section: typing.Literal["general", "ui", "privacy", "query"] | None = "general"
|
|
"""Section (tab/group) in the preferences where this plugin is shown to the
|
|
user.
|
|
|
|
The value ``query`` is reserved for plugins that are activated via a
|
|
*keyword* as part of a search query, see:
|
|
|
|
- :py:obj:`PluginInfo.examples`
|
|
- :py:obj:`Plugin.keywords`
|
|
|
|
Those plugins are shown in the preferences in tab *Special Queries*.
|
|
"""
|
|
|
|
examples: list[str] = field(default_factory=list)
|
|
"""List of short examples of the usage / of query terms."""
|
|
|
|
keywords: list[str] = field(default_factory=list)
|
|
"""See :py:obj:`Plugin.keywords`"""
|
|
|
|
|
|
ID_REGXP = re.compile("[a-z][a-z0-9].*")
|
|
|
|
|
|
class Plugin(abc.ABC):
|
|
"""Abstract base class of all Plugins."""
|
|
|
|
id: str = ""
|
|
"""The ID (suffix) in the HTML form."""
|
|
|
|
active: typing.ClassVar[bool]
|
|
"""Plugin is enabled/disabled by default (:py:obj:`PluginCfg.active`)."""
|
|
|
|
keywords: list[str] = []
|
|
"""Keywords in the search query that activate the plugin. The *keyword* is
|
|
the first word in a search query. If a plugin should be executed regardless
|
|
of the search query, the list of keywords should be empty (which is also the
|
|
default in the base class for Plugins)."""
|
|
|
|
log: logging.Logger
|
|
"""A logger object, is automatically initialized when calling the
|
|
constructor (if not already set in the subclass)."""
|
|
|
|
info: PluginInfo
|
|
"""Informations about the *plugin*, see :py:obj:`PluginInfo`."""
|
|
|
|
fqn: str = ""
|
|
|
|
def __init__(self, plg_cfg: PluginCfg) -> None:
|
|
super().__init__()
|
|
if not self.fqn:
|
|
self.fqn = self.__class__.__mro__[0].__module__
|
|
|
|
# names from the configuration
|
|
for n, v in plg_cfg.__dict__.items():
|
|
setattr(self, n, v)
|
|
|
|
# names that must be set by the plugin implementation
|
|
for attr in [
|
|
"id",
|
|
]:
|
|
if getattr(self, attr, None) is None:
|
|
raise NotImplementedError(f"plugin {self} is missing attribute {attr}")
|
|
|
|
if not ID_REGXP.match(self.id):
|
|
raise ValueError(f"plugin ID {self.id} contains invalid character (use lowercase ASCII)")
|
|
|
|
if not getattr(self, "log", None):
|
|
pkg_name = inspect.getmodule(self.__class__).__package__ # type: ignore
|
|
self.log = logging.getLogger(f"{pkg_name}.{self.id}")
|
|
|
|
def __hash__(self) -> int:
|
|
"""The hash value is used in :py:obj:`set`, for example, when an object
|
|
is added to the set. The hash value is also used in other contexts,
|
|
e.g. when checking for equality to identify identical plugins from
|
|
different sources (name collisions)."""
|
|
|
|
return id(self)
|
|
|
|
def __eq__(self, other):
|
|
"""py:obj:`Plugin` objects are equal if the hash values of the two
|
|
objects are equal."""
|
|
|
|
return hash(self) == hash(other)
|
|
|
|
def init(self, app: "flask.Flask") -> bool: # pylint: disable=unused-argument
|
|
"""Initialization of the plugin, the return value decides whether this
|
|
plugin is active or not. Initialization only takes place once, at the
|
|
time the WEB application is set up. The base methode always returns
|
|
``True``, the methode can be overwritten in the inheritances,
|
|
|
|
- ``True`` plugin is active
|
|
- ``False`` plugin is inactive
|
|
"""
|
|
return True
|
|
|
|
# pylint: disable=unused-argument
|
|
def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
|
|
"""Runs BEFORE the search request and returns a boolean:
|
|
|
|
- ``True`` to continue the search
|
|
- ``False`` to stop the search
|
|
"""
|
|
return True
|
|
|
|
def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
|
|
"""Runs for each result of each engine and returns a boolean:
|
|
|
|
- ``True`` to keep the result
|
|
- ``False`` to remove the result from the result list
|
|
|
|
The ``result`` can be modified to the needs.
|
|
|
|
.. hint::
|
|
|
|
If :py:obj:`Result.url <searx.result_types._base.Result.url>` is modified,
|
|
:py:obj:`Result.parsed_url <searx.result_types._base.Result.parsed_url>` must
|
|
be changed accordingly:
|
|
|
|
.. code:: python
|
|
|
|
result["parsed_url"] = urlparse(result["url"])
|
|
"""
|
|
return True
|
|
|
|
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
|
|
"""Runs AFTER the search request. Can return a list of
|
|
:py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
|
|
final result list."""
|
|
return
|
|
|
|
|
|
@dataclass
|
|
class PluginCfg:
|
|
"""Settings of a plugin.
|
|
|
|
.. code:: yaml
|
|
|
|
mypackage.mymodule.MyPlugin:
|
|
active: true
|
|
"""
|
|
|
|
active: bool = False
|
|
"""Plugin is active by default and the user can *opt-out* in the preferences."""
|
|
|
|
|
|
class PluginStorage:
|
|
"""A storage for managing the *plugins* of SearXNG."""
|
|
|
|
plugin_list: set[Plugin]
|
|
"""The list of :py:obj:`Plugins` in this storage."""
|
|
|
|
def __init__(self):
|
|
self.plugin_list = set()
|
|
|
|
def __iter__(self):
|
|
yield from self.plugin_list
|
|
|
|
def __len__(self):
|
|
return len(self.plugin_list)
|
|
|
|
@property
|
|
def info(self) -> list[PluginInfo]:
|
|
|
|
return [p.info for p in self.plugin_list]
|
|
|
|
def load_settings(self, cfg: dict[str, dict]):
|
|
"""Load plugins configured in SearXNG's settings :ref:`settings
|
|
plugins`."""
|
|
|
|
for fqn, plg_settings in cfg.items():
|
|
cls = None
|
|
mod_name, cls_name = fqn.rsplit('.', 1)
|
|
try:
|
|
mod = importlib.import_module(mod_name)
|
|
cls = getattr(mod, cls_name, None)
|
|
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
log.exception(exc)
|
|
|
|
if cls is None:
|
|
msg = f"plugin {fqn} is not implemented"
|
|
raise ValueError(msg)
|
|
plg = cls(PluginCfg(**plg_settings))
|
|
self.register(plg)
|
|
|
|
def register(self, plugin: Plugin):
|
|
"""Register a :py:obj:`Plugin`. In case of name collision (if two
|
|
plugins have same ID) a :py:obj:`KeyError` exception is raised.
|
|
"""
|
|
|
|
if plugin in [p.id for p in self.plugin_list]:
|
|
msg = f"name collision '{plugin.id}'"
|
|
plugin.log.critical(msg)
|
|
raise KeyError(msg)
|
|
|
|
self.plugin_list.add(plugin)
|
|
plugin.log.debug("plugin has been loaded")
|
|
|
|
def init(self, app: "flask.Flask") -> None:
|
|
"""Calls the method :py:obj:`Plugin.init` of each plugin in this
|
|
storage. Depending on its return value, the plugin is removed from
|
|
*this* storage or not."""
|
|
|
|
for plg in self.plugin_list.copy():
|
|
if not plg.init(app):
|
|
self.plugin_list.remove(plg)
|
|
|
|
def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
|
|
|
|
ret = True
|
|
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
|
|
try:
|
|
ret = bool(plugin.pre_search(request=request, search=search))
|
|
except Exception: # pylint: disable=broad-except
|
|
plugin.log.exception("Exception while calling pre_search")
|
|
continue
|
|
if not ret:
|
|
# skip this search on the first False from a plugin
|
|
break
|
|
return ret
|
|
|
|
def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
|
|
|
|
ret = True
|
|
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
|
|
try:
|
|
ret = bool(plugin.on_result(request=request, search=search, result=result))
|
|
except Exception: # pylint: disable=broad-except
|
|
plugin.log.exception("Exception while calling on_result")
|
|
continue
|
|
if not ret:
|
|
# ignore this result item on the first False from a plugin
|
|
break
|
|
|
|
return ret
|
|
|
|
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None:
|
|
"""Extend :py:obj:`search.result_container
|
|
<searx.results.ResultContainer`> with result items from plugins listed
|
|
in :py:obj:`search.user_plugins <SearchWithPlugins.user_plugins>`.
|
|
"""
|
|
|
|
keyword = None
|
|
for keyword in search.search_query.query.split():
|
|
if keyword:
|
|
break
|
|
|
|
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
|
|
|
|
if plugin.keywords:
|
|
# plugin with keywords: skip plugin if no keyword match
|
|
if keyword and keyword not in plugin.keywords:
|
|
continue
|
|
try:
|
|
results = plugin.post_search(request=request, search=search) or []
|
|
except Exception: # pylint: disable=broad-except
|
|
plugin.log.exception("Exception while calling post_search")
|
|
continue
|
|
|
|
# In case of *plugins* prefix ``plugin:`` is set, see searx.result_types.Result
|
|
search.result_container.extend(f"plugin: {plugin.id}", results)
|