 50f92779bd
			
		
	
	
		50f92779bd
		
	
	
	
	
		
			
			This patch brings two major changes:
- ``Result.filter_urls(..)`` to pass a filter function for URL fields
- The ``enabled_plugins:`` section in SearXNG's settings do no longer exists.
To understand plugin development compile documentation:
    $ make docs.clean docs.live
and read http://0.0.0.0:8000/dev/plugins/development.html
There is no longer a distinction between built-in and external plugin, all
plugins are registered via the settings in the ``plugins:`` section.
In SearXNG, plugins can be registered via a fully qualified class name.  A
configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate
it by default / *opt-in* or *opt-out* from user's point of view.
built-in plugins
================
The built-in plugins are all located in the namespace `searx.plugins`.
.. code:: yaml
    plugins:
      searx.plugins.calculator.SXNGPlugin:
        active: true
      searx.plugins.hash_plugin.SXNGPlugin:
        active: true
      searx.plugins.self_info.SXNGPlugin:
        active: true
      searx.plugins.tracker_url_remover.SXNGPlugin:
        active: true
      searx.plugins.unit_converter.SXNGPlugin:
        active: true
      searx.plugins.ahmia_filter.SXNGPlugin:
        active: true
      searx.plugins.hostnames.SXNGPlugin:
        active: true
      searx.plugins.oa_doi_rewrite.SXNGPlugin:
        active: false
      searx.plugins.tor_check.SXNGPlugin:
        active: false
external plugins
================
SearXNG supports *external plugins* / there is no need to install one, SearXNG
runs out of the box.
- Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/
To get a developer installation in a SearXNG developer environment:
.. code:: sh
   $ git clone git@github.com:return42/tgwf-searx-plugins.git
   $ ./manage pyenv.cmd python -m \
         pip install -e tgwf-searx-plugins
To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to
the ``plugins:``:
.. code:: yaml
    plugins:
      # ...
      only_show_green_results.SXNGPlugin:
        active: false
Result.filter_urls(..)
======================
The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields.
In the following example, the filter function ``my_url_filter``:
.. code:: python
   def my_url_filter(result, field_name, url_src) -> bool | str:
       if "google" in url_src:
           return False              # remove URL field from result
       if "facebook" in url_src:
           new_url = url_src.replace("facebook", "fb-dummy")
           return new_url            # return modified URL
       return True                   # leave URL in field unchanged
is applied to all URL fields in the :py:obj:`Plugin.on_result` hook:
.. code:: python
   class MyUrlFilter(Plugin):
       ...
       def on_result(self, request, search, result) -> bool:
           result.filter_urls(my_url_filter)
           return True
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
		
	
			
		
			
				
	
	
		
			284 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			284 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # SPDX-License-Identifier: AGPL-3.0-or-later
 | |
| """A plugin for converting measured values from one unit to another unit (a
 | |
| unit converter).
 | |
| 
 | |
| The plugin looks up the symbols (given in the query term) in a list of
 | |
| converters, each converter is one item in the list (compare
 | |
| :py:obj:`ADDITIONAL_UNITS`).  If the symbols are ambiguous, the matching units
 | |
| of measurement are evaluated.  The weighting in the evaluation results from the
 | |
| sorting of the :py:obj:`list of unit converters<symbol_to_si>`.
 | |
| """
 | |
| from __future__ import annotations
 | |
| import typing
 | |
| import re
 | |
| import babel.numbers
 | |
| 
 | |
| from flask_babel import gettext, get_locale
 | |
| 
 | |
| from searx import data
 | |
| from searx.plugins import Plugin, PluginInfo
 | |
| from searx.result_types import EngineResults
 | |
| 
 | |
| if typing.TYPE_CHECKING:
 | |
|     from searx.search import SearchWithPlugins
 | |
|     from searx.extended_types import SXNG_Request
 | |
|     from searx.plugins import PluginCfg
 | |
| 
 | |
| 
 | |
| name = ""
 | |
| description = gettext("")
 | |
| 
 | |
| plugin_id = ""
 | |
| preference_section = ""
 | |
| 
 | |
| CONVERT_KEYWORDS = ["in", "to", "as"]
 | |
| 
 | |
| 
 | |
| class SXNGPlugin(Plugin):
 | |
|     """Convert between units.  The result is displayed in area for the
 | |
|     "answers".
 | |
|     """
 | |
| 
 | |
|     id = "unit_converter"
 | |
| 
 | |
|     def __init__(self, plg_cfg: "PluginCfg") -> None:
 | |
|         super().__init__(plg_cfg)
 | |
| 
 | |
|         self.info = PluginInfo(
 | |
|             id=self.id,
 | |
|             name=gettext("Unit converter plugin"),
 | |
|             description=gettext("Convert between units"),
 | |
|             preference_section="general",
 | |
|         )
 | |
| 
 | |
|     def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults:
 | |
|         results = EngineResults()
 | |
| 
 | |
|         # only convert between units on the first page
 | |
|         if search.search_query.pageno > 1:
 | |
|             return results
 | |
| 
 | |
|         query = search.search_query.query
 | |
|         query_parts = query.split(" ")
 | |
| 
 | |
|         if len(query_parts) < 3:
 | |
|             return results
 | |
| 
 | |
|         for query_part in query_parts:
 | |
|             for keyword in CONVERT_KEYWORDS:
 | |
|                 if query_part == keyword:
 | |
|                     from_query, to_query = query.split(keyword, 1)
 | |
|                     target_val = _parse_text_and_convert(from_query.strip(), to_query.strip())
 | |
|                     if target_val:
 | |
|                         results.add(results.types.Answer(answer=target_val))
 | |
| 
 | |
|         return results
 | |
| 
 | |
| 
 | |
| # inspired from https://stackoverflow.com/a/42475086
 | |
| RE_MEASURE = r'''
 | |
| (?P<sign>[-+]?)         # +/- or nothing for positive
 | |
| (\s*)                   # separator: white space or nothing
 | |
| (?P<number>[\d\.,]*)    # number: 1,000.00 (en) or 1.000,00 (de)
 | |
| (?P<E>[eE][-+]?\d+)?    # scientific notation: e(+/-)2 (*10^2)
 | |
| (\s*)                   # separator: white space or nothing
 | |
| (?P<unit>\S+)           # unit of measure
 | |
| '''
 | |
| 
 | |
| 
 | |
| ADDITIONAL_UNITS = [
 | |
|     {
 | |
|         "si_name": "Q11579",
 | |
|         "symbol": "°C",
 | |
|         "to_si": lambda val: val + 273.15,
 | |
|         "from_si": lambda val: val - 273.15,
 | |
|     },
 | |
|     {
 | |
|         "si_name": "Q11579",
 | |
|         "symbol": "°F",
 | |
|         "to_si": lambda val: (val + 459.67) * 5 / 9,
 | |
|         "from_si": lambda val: (val * 9 / 5) - 459.67,
 | |
|     },
 | |
| ]
 | |
| """Additional items to convert from a measure unit to a SI unit (vice versa).
 | |
| 
 | |
| .. code:: python
 | |
| 
 | |
|     {
 | |
|         "si_name": "Q11579",                 # Wikidata item ID of the SI unit (Kelvin)
 | |
|         "symbol": "°C",                      # symbol of the measure unit
 | |
|         "to_si": lambda val: val + 273.15,   # convert measure value (val) to SI unit
 | |
|         "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
 | |
|     },
 | |
|     {
 | |
|         "si_name": "Q11573",
 | |
|         "symbol": "mi",
 | |
|         "to_si": 1609.344,                   # convert measure value (val) to SI unit
 | |
|         "from_si": 1 / 1609.344              # convert SI value (val) measure unit
 | |
|     },
 | |
| 
 | |
| The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
 | |
| or a callable_ (val in / converted value returned).
 | |
| 
 | |
| .. _callable: https://docs.python.org/3/glossary.html#term-callable
 | |
| """
 | |
| 
 | |
| 
 | |
| ALIAS_SYMBOLS = {
 | |
|     '°C': ('C',),
 | |
|     '°F': ('F',),
 | |
|     'mi': ('L',),
 | |
| }
 | |
| """Alias symbols for known unit of measure symbols / by example::
 | |
| 
 | |
|     '°C': ('C', ...),  # list of alias symbols for °C (Q69362731)
 | |
|     '°F': ('F', ...),  # list of alias symbols for °F (Q99490479)
 | |
|     'mi': ('L',),      # list of alias symbols for mi (Q253276)
 | |
| """
 | |
| 
 | |
| 
 | |
| SYMBOL_TO_SI = []
 | |
| 
 | |
| 
 | |
| def symbol_to_si():
 | |
|     """Generates a list of tuples, each tuple is a measure unit and the fields
 | |
|     in the tuple are:
 | |
| 
 | |
|     0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
 | |
| 
 | |
|     1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
 | |
| 
 | |
|     2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
 | |
|        multiplied by 1609.344)
 | |
| 
 | |
|     3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
 | |
|        100mi divided by 1609.344)
 | |
| 
 | |
|     The returned list is sorted, the first items are created from
 | |
|     ``WIKIDATA_UNITS``, the second group of items is build from
 | |
|     :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
 | |
| 
 | |
|     If you search this list for a symbol, then a match with a symbol from
 | |
|     Wikidata has the highest weighting (first hit in the list), followed by the
 | |
|     symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
 | |
|     given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
 | |
| 
 | |
|     """
 | |
| 
 | |
|     global SYMBOL_TO_SI  # pylint: disable=global-statement
 | |
|     if SYMBOL_TO_SI:
 | |
|         return SYMBOL_TO_SI
 | |
| 
 | |
|     # filter out units which can't be normalized to a SI unit and filter out
 | |
|     # units without a symbol / arcsecond does not have a symbol
 | |
|     # https://www.wikidata.org/wiki/Q829073
 | |
| 
 | |
|     for item in data.WIKIDATA_UNITS.values():
 | |
|         if item['to_si_factor'] and item['symbol']:
 | |
|             SYMBOL_TO_SI.append(
 | |
|                 (
 | |
|                     item['symbol'],
 | |
|                     item['si_name'],
 | |
|                     1 / item['to_si_factor'],  # from_si
 | |
|                     item['to_si_factor'],  # to_si
 | |
|                     item['symbol'],
 | |
|                 )
 | |
|             )
 | |
| 
 | |
|     for item in ADDITIONAL_UNITS:
 | |
|         SYMBOL_TO_SI.append(
 | |
|             (
 | |
|                 item['symbol'],
 | |
|                 item['si_name'],
 | |
|                 item['from_si'],
 | |
|                 item['to_si'],
 | |
|                 item['symbol'],
 | |
|             )
 | |
|         )
 | |
| 
 | |
|     alias_items = []
 | |
|     for item in SYMBOL_TO_SI:
 | |
|         for alias in ALIAS_SYMBOLS.get(item[0], ()):
 | |
|             alias_items.append(
 | |
|                 (
 | |
|                     alias,
 | |
|                     item[1],
 | |
|                     item[2],  # from_si
 | |
|                     item[3],  # to_si
 | |
|                     item[0],  # origin unit
 | |
|                 )
 | |
|             )
 | |
|     SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
 | |
|     return SYMBOL_TO_SI
 | |
| 
 | |
| 
 | |
| def _parse_text_and_convert(from_query, to_query) -> str | None:
 | |
| 
 | |
|     # pylint: disable=too-many-branches, too-many-locals
 | |
| 
 | |
|     if not (from_query and to_query):
 | |
|         return None
 | |
| 
 | |
|     measured = re.match(RE_MEASURE, from_query, re.VERBOSE)
 | |
|     if not (measured and measured.group('number'), measured.group('unit')):
 | |
|         return None
 | |
| 
 | |
|     # Symbols are not unique, if there are several hits for the from-unit, then
 | |
|     # the correct one must be determined by comparing it with the to-unit
 | |
|     # https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863
 | |
| 
 | |
|     # first: collecting possible units
 | |
| 
 | |
|     source_list, target_list = [], []
 | |
| 
 | |
|     for symbol, si_name, from_si, to_si, orig_symbol in symbol_to_si():
 | |
| 
 | |
|         if symbol == measured.group('unit'):
 | |
|             source_list.append((si_name, to_si))
 | |
|         if symbol == to_query:
 | |
|             target_list.append((si_name, from_si, orig_symbol))
 | |
| 
 | |
|     if not (source_list and target_list):
 | |
|         return None
 | |
| 
 | |
|     source_to_si = target_from_si = target_symbol = None
 | |
| 
 | |
|     # second: find the right unit by comparing list of from-units with list of to-units
 | |
| 
 | |
|     for source in source_list:
 | |
|         for target in target_list:
 | |
|             if source[0] == target[0]:  # compare si_name
 | |
|                 source_to_si = source[1]
 | |
|                 target_from_si = target[1]
 | |
|                 target_symbol = target[2]
 | |
| 
 | |
|     if not (source_to_si and target_from_si):
 | |
|         return None
 | |
| 
 | |
|     _locale = get_locale() or 'en_US'
 | |
| 
 | |
|     value = measured.group('sign') + measured.group('number') + (measured.group('E') or '')
 | |
|     value = babel.numbers.parse_decimal(value, locale=_locale)
 | |
| 
 | |
|     # convert value to SI unit
 | |
| 
 | |
|     if isinstance(source_to_si, (float, int)):
 | |
|         value = float(value) * source_to_si
 | |
|     else:
 | |
|         value = source_to_si(float(value))
 | |
| 
 | |
|     # convert value from SI unit to target unit
 | |
| 
 | |
|     if isinstance(target_from_si, (float, int)):
 | |
|         value = float(value) * target_from_si
 | |
|     else:
 | |
|         value = target_from_si(float(value))
 | |
| 
 | |
|     if measured.group('E'):
 | |
|         # when incoming notation is scientific, outgoing notation is scientific
 | |
|         result = babel.numbers.format_scientific(value, locale=_locale)
 | |
|     else:
 | |
|         result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
 | |
| 
 | |
|     return f'{result} {target_symbol}'
 |