[fix] rerank plugin: adapt to #4183

This commit is contained in:
GenericMale 2025-01-29 18:17:46 +01:00
parent 7fa0fbd93d
commit ab83de9332
4 changed files with 96 additions and 72 deletions

View File

@ -1,7 +1,7 @@
.. _rerank plugin:
================
Rerank plugin
Rerank
================
.. automodule:: searx.plugins.rerank

View File

@ -68,6 +68,9 @@ class PluginInfo:
keywords: list[str] = field(default_factory=list)
"""See :py:obj:`Plugin.keywords`"""
is_allowed: bool = True
"""Switch to disable plugin completely, without the user preference."""
class Plugin(abc.ABC):
"""Abstract base class of all Plugins."""

View File

@ -1,4 +1,26 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, missing-class-docstring
from __future__ import annotations
import typing
from flask_babel import gettext
from searx import settings
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
try:
import bm25s
except ImportError:
# Import error is ignored because the admin has to install bm25s manually to use the plugin
bm25s = None
class SXNGPlugin(Plugin):
"""Plugin which reranks the search results using the Okapi BM25 algorithm.
This plugin utilizes the `bm25s` library to reorder search results based on their relevance to the search query,
@ -31,28 +53,28 @@ of results from different engines.
"""
from searx import settings
try:
import bm25s
except ImportError:
# Import error is ignored because the admin has to install bm25s manually to use the plugin
bm25s = None
name = 'Rerank plugin'
description = 'Rerank search results, ignoring original engine ranking'
id = "rerank"
default_on = False
preference_section = 'general'
# Supported stopwords for bm25s. Default is 'en'
stopword_langs = ['en', 'de', 'nl', 'fr', 'es', 'pt', 'it', 'ru', 'sv', 'no', 'zh']
def __init__(self):
super().__init__()
remove_extra_engines = settings.get('rerank', {}).get('remove_extra_engines')
self.stopword_langs = ['en', 'de', 'nl', 'fr', 'es', 'pt', 'it', 'ru', 'sv', 'no', 'zh']
self.remove_extra_engines = settings.get('rerank', {}).get('remove_extra_engines')
self.info = PluginInfo(
id=self.id,
name=gettext("Rerank plugin"),
description=gettext("""Rerank search results, ignoring original engine ranking"""),
preference_section="general",
is_allowed=bm25s is not None,
)
def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults:
results = EngineResults()
def post_search(_request, search):
if not bm25s:
return True
return results
# pylint: disable=protected-access
results = search.result_container._merged_results
@ -60,11 +82,14 @@ def post_search(_request, search):
locale = search.search_query.locale
# Determine the stopwords based on the selected locale
stopwords = locale.language if locale and locale.language in stopword_langs else 'en'
stopwords = locale.language if locale and locale.language in self.stopword_langs else 'en'
retriever = bm25s.BM25()
result_tokens = bm25s.tokenize(
[f"{result.get('title', '')} | {result.get('content', '')} | {result.get('url', '')}" for result in results],
[
f"{result.get('title', '')} | {result.get('content', '')} | {result.get('url', '')}"
for result in results
],
stopwords=stopwords,
)
retriever.index(result_tokens)
@ -74,7 +99,7 @@ def post_search(_request, search):
# Retrieve ranked indices of results based on the query tokens
indices = retriever.retrieve(query_tokens, k=len(results), return_as='documents', show_progress=False)
if remove_extra_engines:
if self.remove_extra_engines:
# Only keep the main engine and set our ranking
for position, index in enumerate(indices[0]):
if 'positions' in results[index]:
@ -87,8 +112,4 @@ def post_search(_request, search):
if 'positions' in results[index]:
results[index]['positions'] = [position + 1] * len(results[index]['positions'])
return True
def is_allowed():
return bm25s is not None
return results

View File

@ -38,7 +38,7 @@
{%- macro plugin_preferences(section) -%}
{%- for plugin in plugins_storage -%}
{%- if plugin.preference_section == section -%}
{%- if plugin.preference_section == section and plugin.is_allowed -%}
<fieldset>{{- '' -}}
<legend>{{ _(plugin.name) }}</legend>{{- '' -}}
<div class="value">