[mod] replace engines_languages.json by engines_traits.json
Implementations of the *traits* of the engines.
Engine's traits are fetched from the origin engine and stored in a JSON file in
the *data folder*.  Most often traits are languages and region codes and their
mapping from SearXNG's representation to the representation in the origin search
engine.
To load traits from the persistence::
    searx.enginelib.traits.EngineTraitsMap.from_data()
For new traits new properties can be added to the class::
    searx.enginelib.traits.EngineTraits
.. hint::
   Implementation is downward compatible to the deprecated *supported_languages
   method* from the vintage implementation.
   The vintage code is tagged as *deprecated* an can be removed when all engines
   has been ported to the *traits method*.
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									64fea2f9cb
								
							
						
					
					
						commit
						6e5f22e558
					
				
							
								
								
									
										2
									
								
								.github/workflows/data-update.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/data-update.yml
									
									
									
									
										vendored
									
									
								
							| @ -17,7 +17,7 @@ jobs: | |||||||
|           - update_currencies.py |           - update_currencies.py | ||||||
|           - update_external_bangs.py |           - update_external_bangs.py | ||||||
|           - update_firefox_version.py |           - update_firefox_version.py | ||||||
|           - update_languages.py |           - update_engine_traits.py | ||||||
|           - update_wikidata_units.py |           - update_wikidata_units.py | ||||||
|           - update_engine_descriptions.py |           - update_engine_descriptions.py | ||||||
|     steps: |     steps: | ||||||
|  | |||||||
| @ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table | |||||||
|         - Timeout |         - Timeout | ||||||
|         - Weight |         - Weight | ||||||
|         - Paging |         - Paging | ||||||
|         - Language |         - Language, Region | ||||||
|         - Safe search |         - Safe search | ||||||
|         - Time range |         - Time range | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -569,10 +569,13 @@ engine is shown.  Most of the options have a default value or even are optional. | |||||||
|   To disable by default the engine, but not deleting it.  It will allow the user |   To disable by default the engine, but not deleting it.  It will allow the user | ||||||
|   to manually activate it in the settings. |   to manually activate it in the settings. | ||||||
| 
 | 
 | ||||||
|  | ``inactive``: optional | ||||||
|  |   Remove the engine from the settings (*disabled & removed*). | ||||||
|  | 
 | ||||||
| ``language`` : optional | ``language`` : optional | ||||||
|   If you want to use another language for a specific engine, you can define it |   If you want to use another language for a specific engine, you can define it | ||||||
|   by using the full ISO code of language and country, like ``fr_FR``, ``en_US``, |   by using the ISO code of language (and region), like ``fr``, ``en-US``, | ||||||
|   ``de_DE``. |   ``de-DE``. | ||||||
| 
 | 
 | ||||||
| ``tokens`` : optional | ``tokens`` : optional | ||||||
|   A list of secret tokens to make this engine *private*, more details see |   A list of secret tokens to make this engine *private*, more details see | ||||||
|  | |||||||
| @ -127,6 +127,10 @@ extensions = [ | |||||||
|     'notfound.extension',  # https://github.com/readthedocs/sphinx-notfound-page |     'notfound.extension',  # https://github.com/readthedocs/sphinx-notfound-page | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
|  | autodoc_default_options = { | ||||||
|  |     'member-order': 'groupwise', | ||||||
|  | } | ||||||
|  | 
 | ||||||
| myst_enable_extensions = [ | myst_enable_extensions = [ | ||||||
|   "replacements", "smartquotes" |   "replacements", "smartquotes" | ||||||
| ] | ] | ||||||
| @ -135,6 +139,7 @@ suppress_warnings = ['myst.domains'] | |||||||
| 
 | 
 | ||||||
| intersphinx_mapping = { | intersphinx_mapping = { | ||||||
|     "python": ("https://docs.python.org/3/", None), |     "python": ("https://docs.python.org/3/", None), | ||||||
|  |     "babel" : ("https://babel.readthedocs.io/en/latest/", None), | ||||||
|     "flask": ("https://flask.palletsprojects.com/", None), |     "flask": ("https://flask.palletsprojects.com/", None), | ||||||
|     "flask_babel": ("https://python-babel.github.io/flask-babel/", None), |     "flask_babel": ("https://python-babel.github.io/flask-babel/", None), | ||||||
|     # "werkzeug": ("https://werkzeug.palletsprojects.com/", None), |     # "werkzeug": ("https://werkzeug.palletsprojects.com/", None), | ||||||
|  | |||||||
| @ -54,6 +54,7 @@ Engine File | |||||||
|                                        - ``offline`` :ref:`[ref] <offline engines>` |                                        - ``offline`` :ref:`[ref] <offline engines>` | ||||||
|                                        - ``online_dictionary`` |                                        - ``online_dictionary`` | ||||||
|                                        - ``online_currency`` |                                        - ``online_currency`` | ||||||
|  |                                        - ``online_url_search`` | ||||||
|    ======================= =========== ======================================================== |    ======================= =========== ======================================================== | ||||||
| 
 | 
 | ||||||
| .. _engine settings: | .. _engine settings: | ||||||
| @ -131,8 +132,10 @@ Passed Arguments (request) | |||||||
| These arguments can be used to construct the search query.  Furthermore, | These arguments can be used to construct the search query.  Furthermore, | ||||||
| parameters with default value can be redefined for special purposes. | parameters with default value can be redefined for special purposes. | ||||||
| 
 | 
 | ||||||
|  | .. _engine request online: | ||||||
| 
 | 
 | ||||||
| .. table:: If the ``engine_type`` is ``online`` | .. table:: If the ``engine_type`` is :py:obj:`online | ||||||
|  |            <searx.search.processors.online.OnlineProcessor.get_params>` | ||||||
|    :width: 100% |    :width: 100% | ||||||
| 
 | 
 | ||||||
|    ====================== ============== ======================================================================== |    ====================== ============== ======================================================================== | ||||||
| @ -149,12 +152,16 @@ parameters with default value can be redefined for special purposes. | |||||||
|    safesearch             int            ``0``, between ``0`` and ``2`` (normal, moderate, strict) |    safesearch             int            ``0``, between ``0`` and ``2`` (normal, moderate, strict) | ||||||
|    time_range             Optional[str]  ``None``, can be ``day``, ``week``, ``month``, ``year`` |    time_range             Optional[str]  ``None``, can be ``day``, ``week``, ``month``, ``year`` | ||||||
|    pageno                 int            current pagenumber |    pageno                 int            current pagenumber | ||||||
|    language               str            specific language code like ``'en_US'``, or ``'all'`` if unspecified |    searxng_locale         str            SearXNG's locale selected by user.  Specific language code like | ||||||
|  |                                          ``'en'``, ``'en-US'``, or ``'all'`` if unspecified. | ||||||
|    ====================== ============== ======================================================================== |    ====================== ============== ======================================================================== | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| .. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the | .. _engine request online_dictionary: | ||||||
|            ``online`` arguments: | 
 | ||||||
|  | .. table:: If the ``engine_type`` is :py:obj:`online_dictionary | ||||||
|  |            <searx.search.processors.online_dictionary.OnlineDictionaryProcessor.get_params>`, | ||||||
|  |            in addition to the :ref:`online <engine request online>` arguments: | ||||||
|    :width: 100% |    :width: 100% | ||||||
| 
 | 
 | ||||||
|    ====================== ============== ======================================================================== |    ====================== ============== ======================================================================== | ||||||
| @ -165,8 +172,11 @@ parameters with default value can be redefined for special purposes. | |||||||
|    query                  str            the text query without the languages |    query                  str            the text query without the languages | ||||||
|    ====================== ============== ======================================================================== |    ====================== ============== ======================================================================== | ||||||
| 
 | 
 | ||||||
| .. table:: If the ``engine_type`` is ``online_currency```, in addition to the | .. _engine request online_currency: | ||||||
|            ``online`` arguments: | 
 | ||||||
|  | .. table:: If the ``engine_type`` is :py:obj:`online_currency | ||||||
|  |            <searx.search.processors.online_currency.OnlineCurrencyProcessor.get_params>`, | ||||||
|  |            in addition to the :ref:`online <engine request online>` arguments: | ||||||
|    :width: 100% |    :width: 100% | ||||||
| 
 | 
 | ||||||
|    ====================== ============== ======================================================================== |    ====================== ============== ======================================================================== | ||||||
| @ -179,6 +189,26 @@ parameters with default value can be redefined for special purposes. | |||||||
|    to_name                str            currency name |    to_name                str            currency name | ||||||
|    ====================== ============== ======================================================================== |    ====================== ============== ======================================================================== | ||||||
| 
 | 
 | ||||||
|  | .. _engine request online_url_search: | ||||||
|  | 
 | ||||||
|  | .. table:: If the ``engine_type`` is :py:obj:`online_url_search | ||||||
|  |            <searx.search.processors.online_url_search.OnlineUrlSearchProcessor.get_params>`, | ||||||
|  |            in addition to the :ref:`online <engine request online>` arguments: | ||||||
|  |    :width: 100% | ||||||
|  | 
 | ||||||
|  |    ====================== ============== ======================================================================== | ||||||
|  |    argument               type           default-value, information | ||||||
|  |    ====================== ============== ======================================================================== | ||||||
|  |    search_url             dict           URLs from the search query: | ||||||
|  | 
 | ||||||
|  |                                          .. code:: python | ||||||
|  | 
 | ||||||
|  |                                             { | ||||||
|  |                                               'http': str, | ||||||
|  |                                               'ftp': str, | ||||||
|  |                                               'data:image': str | ||||||
|  |                                             } | ||||||
|  |    ====================== ============== ======================================================================== | ||||||
| 
 | 
 | ||||||
| Specify Request | Specify Request | ||||||
| --------------- | --------------- | ||||||
|  | |||||||
| @ -52,12 +52,12 @@ Scripts to update static data in :origin:`searx/data/` | |||||||
|   :members: |   :members: | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| ``update_languages.py`` | ``update_engine_traits.py`` | ||||||
| ======================= | =========================== | ||||||
| 
 | 
 | ||||||
| :origin:`[source] <searxng_extra/update/update_languages.py>` | :origin:`[source] <searxng_extra/update/update_engine_traits.py>` | ||||||
| 
 | 
 | ||||||
| .. automodule:: searxng_extra.update.update_languages | .. automodule:: searxng_extra.update.update_engine_traits | ||||||
|   :members: |   :members: | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										17
									
								
								docs/src/searx.enginelib.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								docs/src/searx.enginelib.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | |||||||
|  | .. _searx.enginelib: | ||||||
|  | 
 | ||||||
|  | ============ | ||||||
|  | Engine model | ||||||
|  | ============ | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.enginelib | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | .. _searx.enginelib.traits: | ||||||
|  | 
 | ||||||
|  | ============= | ||||||
|  | Engine traits | ||||||
|  | ============= | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.enginelib.traits | ||||||
|  |   :members: | ||||||
| @ -1,8 +1,8 @@ | |||||||
| .. _load_engines: | .. _searx.engines: | ||||||
| 
 | 
 | ||||||
| ============ | ================= | ||||||
| Load Engines | SearXNG's engines | ||||||
| ============ | ================= | ||||||
| 
 | 
 | ||||||
| .. automodule:: searx.engines | .. automodule:: searx.engines | ||||||
|   :members: |   :members: | ||||||
|  | |||||||
							
								
								
									
										47
									
								
								docs/src/searx.search.processors.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								docs/src/searx.search.processors.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,47 @@ | |||||||
|  | .. _searx.search.processors: | ||||||
|  | 
 | ||||||
|  | ================= | ||||||
|  | Search processors | ||||||
|  | ================= | ||||||
|  | 
 | ||||||
|  | .. contents:: Contents | ||||||
|  |    :depth: 2 | ||||||
|  |    :local: | ||||||
|  |    :backlinks: entry | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | Abstract processor class | ||||||
|  | ======================== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.search.processors.abstract | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | Offline processor | ||||||
|  | ================= | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.search.processors.offline | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | Online processor | ||||||
|  | ================ | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.search.processors.online | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | Online currency processor | ||||||
|  | ========================= | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.search.processors.online_currency | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | Online Dictionary processor | ||||||
|  | =========================== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.search.processors.online_dictionary | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | Online URL search processor | ||||||
|  | =========================== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.search.processors.online_url_search | ||||||
|  |   :members: | ||||||
							
								
								
									
										2
									
								
								manage
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								manage
									
									
									
									
									
								
							| @ -63,7 +63,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\ | |||||||
| I,C,R,\ | I,C,R,\ | ||||||
| W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\ | W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\ | ||||||
| E1136" | E1136" | ||||||
| PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories" | PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories" | ||||||
| PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc" | PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc" | ||||||
| 
 | 
 | ||||||
| help() { | help() { | ||||||
|  | |||||||
| @ -12,7 +12,7 @@ from lxml import etree | |||||||
| from httpx import HTTPError | from httpx import HTTPError | ||||||
| 
 | 
 | ||||||
| from searx import settings | from searx import settings | ||||||
| from searx.data import ENGINES_LANGUAGES | from searx.engines import engines | ||||||
| from searx.network import get as http_get | from searx.network import get as http_get | ||||||
| from searx.exceptions import SearxEngineResponseException | from searx.exceptions import SearxEngineResponseException | ||||||
| 
 | 
 | ||||||
| @ -111,7 +111,7 @@ def seznam(query, _lang): | |||||||
| 
 | 
 | ||||||
| def startpage(query, lang): | def startpage(query, lang): | ||||||
|     # startpage autocompleter |     # startpage autocompleter | ||||||
|     lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english') |     lui = engines['startpage'].supported_languages.get(lang, 'english')  # vintage / deprecated | ||||||
|     url = 'https://startpage.com/suggestions?{query}' |     url = 'https://startpage.com/suggestions?{query}' | ||||||
|     resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui}))) |     resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui}))) | ||||||
|     data = resp.json() |     data = resp.json() | ||||||
| @ -177,12 +177,19 @@ backends = { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def search_autocomplete(backend_name, query, lang): | def search_autocomplete(backend_name, query, sxng_locale): | ||||||
|     backend = backends.get(backend_name) |     backend = backends.get(backend_name) | ||||||
|     if backend is None: |     if backend is None: | ||||||
|         return [] |         return [] | ||||||
| 
 | 
 | ||||||
|  |     if engines[backend_name].traits.data_type != "traits_v1": | ||||||
|  |         # vintage / deprecated | ||||||
|  |         if not sxng_locale or sxng_locale == 'all': | ||||||
|  |             sxng_locale = 'en' | ||||||
|  |         else: | ||||||
|  |             sxng_locale = sxng_locale.split('-')[0] | ||||||
|  | 
 | ||||||
|     try: |     try: | ||||||
|         return backend(query, lang) |         return backend(query, sxng_locale) | ||||||
|     except (HTTPError, SearxEngineResponseException): |     except (HTTPError, SearxEngineResponseException): | ||||||
|         return [] |         return [] | ||||||
|  | |||||||
| @ -7,7 +7,7 @@ | |||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| __all__ = [ | __all__ = [ | ||||||
|     'ENGINES_LANGUAGES', |     'ENGINE_TRAITS', | ||||||
|     'CURRENCIES', |     'CURRENCIES', | ||||||
|     'USER_AGENTS', |     'USER_AGENTS', | ||||||
|     'EXTERNAL_URLS', |     'EXTERNAL_URLS', | ||||||
| @ -42,7 +42,6 @@ def ahmia_blacklist_loader(): | |||||||
|         return f.read().split() |         return f.read().split() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| ENGINES_LANGUAGES = _load('engines_languages.json') |  | ||||||
| CURRENCIES = _load('currencies.json') | CURRENCIES = _load('currencies.json') | ||||||
| USER_AGENTS = _load('useragents.json') | USER_AGENTS = _load('useragents.json') | ||||||
| EXTERNAL_URLS = _load('external_urls.json') | EXTERNAL_URLS = _load('external_urls.json') | ||||||
| @ -50,3 +49,4 @@ WIKIDATA_UNITS = _load('wikidata_units.json') | |||||||
| EXTERNAL_BANGS = _load('external_bangs.json') | EXTERNAL_BANGS = _load('external_bangs.json') | ||||||
| OSM_KEYS_TAGS = _load('osm_keys_tags.json') | OSM_KEYS_TAGS = _load('osm_keys_tags.json') | ||||||
| ENGINE_DESCRIPTIONS = _load('engine_descriptions.json') | ENGINE_DESCRIPTIONS = _load('engine_descriptions.json') | ||||||
|  | ENGINE_TRAITS = _load('engine_traits.json') | ||||||
|  | |||||||
							
								
								
									
										4534
									
								
								searx/data/engine_traits.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4534
									
								
								searx/data/engine_traits.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										143
									
								
								searx/enginelib/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								searx/enginelib/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,143 @@ | |||||||
|  | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | # lint: pylint | ||||||
|  | """Engine related implementations | ||||||
|  | 
 | ||||||
|  | .. note:: | ||||||
|  | 
 | ||||||
|  |    The long term goal is to modularize all relevant implementations to the | ||||||
|  |    engines here in this Python package.  In addition to improved modularization, | ||||||
|  |    this will also be necessary in part because the probability of circular | ||||||
|  |    imports will increase due to the increased typification of implementations in | ||||||
|  |    the future. | ||||||
|  | 
 | ||||||
|  |    ToDo: | ||||||
|  | 
 | ||||||
|  |    - move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`. | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | from __future__ import annotations | ||||||
|  | from typing import Union, Dict, List, Callable, TYPE_CHECKING | ||||||
|  | 
 | ||||||
|  | if TYPE_CHECKING: | ||||||
|  |     from searx.enginelib import traits | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Engine:  # pylint: disable=too-few-public-methods | ||||||
|  |     """Class of engine instances build from YAML settings. | ||||||
|  | 
 | ||||||
|  |     Further documentation see :ref:`general engine configuration`. | ||||||
|  | 
 | ||||||
|  |     .. hint:: | ||||||
|  | 
 | ||||||
|  |        This class is currently never initialized and only used for type hinting. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     # Common options in the engine module | ||||||
|  | 
 | ||||||
|  |     engine_type: str | ||||||
|  |     """Type of the engine (:origin:`searx/search/processors`)""" | ||||||
|  | 
 | ||||||
|  |     paging: bool | ||||||
|  |     """Engine supports multiple pages.""" | ||||||
|  | 
 | ||||||
|  |     time_range_support: bool | ||||||
|  |     """Engine supports search time range.""" | ||||||
|  | 
 | ||||||
|  |     safesearch: bool | ||||||
|  |     """Engine supports SafeSearch""" | ||||||
|  | 
 | ||||||
|  |     language_support: bool | ||||||
|  |     """Engine supports languages (locales) search.""" | ||||||
|  | 
 | ||||||
|  |     language: str | ||||||
|  |     """For an engine, when there is ``language: ...`` in the YAML settings the engine | ||||||
|  |     does support only this one language: | ||||||
|  | 
 | ||||||
|  |     .. code:: yaml | ||||||
|  | 
 | ||||||
|  |       - name: google french | ||||||
|  |         engine: google | ||||||
|  |         language: fr | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     region: str | ||||||
|  |     """For an engine, when there is ``region: ...`` in the YAML settings the engine | ||||||
|  |     does support only this one region:: | ||||||
|  | 
 | ||||||
|  |     .. code:: yaml | ||||||
|  | 
 | ||||||
|  |       - name: google belgium | ||||||
|  |         engine: google | ||||||
|  |         region: fr-BE | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     fetch_traits: Callable | ||||||
|  |     """Function to to fetch engine's traits from origin.""" | ||||||
|  | 
 | ||||||
|  |     traits: traits.EngineTraits | ||||||
|  |     """Traits of the engine.""" | ||||||
|  | 
 | ||||||
|  |     # settings.yml | ||||||
|  | 
 | ||||||
|  |     categories: List[str] | ||||||
|  |     """Tabs, in which the engine is working.""" | ||||||
|  | 
 | ||||||
|  |     name: str | ||||||
|  |     """Name that will be used across SearXNG to define this engine.  In settings, on | ||||||
|  |     the result page ..""" | ||||||
|  | 
 | ||||||
|  |     engine: str | ||||||
|  |     """Name of the python file used to handle requests and responses to and from | ||||||
|  |     this search engine (file name from :origin:`searx/engines` without | ||||||
|  |     ``.py``).""" | ||||||
|  | 
 | ||||||
|  |     enable_http: bool | ||||||
|  |     """Enable HTTP (by default only HTTPS is enabled).""" | ||||||
|  | 
 | ||||||
|  |     shortcut: str | ||||||
|  |     """Code used to execute bang requests (``!foo``)""" | ||||||
|  | 
 | ||||||
|  |     timeout: float | ||||||
|  |     """Specific timeout for search-engine.""" | ||||||
|  | 
 | ||||||
|  |     display_error_messages: bool | ||||||
|  |     """Display error messages on the web UI.""" | ||||||
|  | 
 | ||||||
|  |     proxies: dict | ||||||
|  |     """Set proxies for a specific engine (YAML): | ||||||
|  | 
 | ||||||
|  |     .. code:: yaml | ||||||
|  | 
 | ||||||
|  |        proxies : | ||||||
|  |          http:  socks5://proxy:port | ||||||
|  |          https: socks5://proxy:port | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     disabled: bool | ||||||
|  |     """To disable by default the engine, but not deleting it.  It will allow the | ||||||
|  |     user to manually activate it in the settings.""" | ||||||
|  | 
 | ||||||
|  |     inactive: bool | ||||||
|  |     """Remove the engine from the settings (*disabled & removed*).""" | ||||||
|  | 
 | ||||||
|  |     about: dict | ||||||
|  |     """Additional fileds describing the engine. | ||||||
|  | 
 | ||||||
|  |     .. code:: yaml | ||||||
|  | 
 | ||||||
|  |        about: | ||||||
|  |           website: https://example.com | ||||||
|  |           wikidata_id: Q306656 | ||||||
|  |           official_api_documentation: https://example.com/api-doc | ||||||
|  |           use_official_api: true | ||||||
|  |           require_api_key: true | ||||||
|  |           results: HTML | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     # deprecated properties | ||||||
|  | 
 | ||||||
|  |     _fetch_supported_languages: Callable  # deprecated use fetch_traits | ||||||
|  |     supported_languages: Union[List[str], Dict[str, str]]  # deprecated use traits | ||||||
|  |     language_aliases: Dict[str, str]  # deprecated not needed when using triats | ||||||
|  |     supported_languages_url: str  # deprecated not needed when using triats | ||||||
							
								
								
									
										387
									
								
								searx/enginelib/traits.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										387
									
								
								searx/enginelib/traits.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,387 @@ | |||||||
|  | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | # lint: pylint | ||||||
|  | """Engine's traits are fetched from the origin engines and stored in a JSON file | ||||||
|  | in the *data folder*.  Most often traits are languages and region codes and | ||||||
|  | their mapping from SearXNG's representation to the representation in the origin | ||||||
|  | search engine.  For new traits new properties can be added to the class | ||||||
|  | :py:class:`EngineTraits`. | ||||||
|  | 
 | ||||||
|  | To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be | ||||||
|  | used. | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from __future__ import annotations | ||||||
|  | import json | ||||||
|  | import dataclasses | ||||||
|  | from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING | ||||||
|  | from typing_extensions import Literal, Self | ||||||
|  | 
 | ||||||
|  | from babel.localedata import locale_identifiers | ||||||
|  | 
 | ||||||
|  | from searx import locales | ||||||
|  | from searx.data import data_dir, ENGINE_TRAITS | ||||||
|  | 
 | ||||||
|  | if TYPE_CHECKING: | ||||||
|  |     from . import Engine | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class EngineTraitsEncoder(json.JSONEncoder): | ||||||
|  |     """Encodes :class:`EngineTraits` to a serializable object, see | ||||||
|  |     :class:`json.JSONEncoder`.""" | ||||||
|  | 
 | ||||||
|  |     def default(self, o): | ||||||
|  |         """Return dictionary of a :class:`EngineTraits` object.""" | ||||||
|  |         if isinstance(o, EngineTraits): | ||||||
|  |             return o.__dict__ | ||||||
|  |         return super().default(o) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @dataclasses.dataclass | ||||||
|  | class EngineTraits: | ||||||
|  |     """The class is intended to be instantiated for each engine.""" | ||||||
|  | 
 | ||||||
|  |     regions: Dict[str, str] = dataclasses.field(default_factory=dict) | ||||||
|  |     """Maps SearXNG's internal representation of a region to the one of the engine. | ||||||
|  | 
 | ||||||
|  |     SearXNG's internal representation can be parsed by babel and the value is | ||||||
|  |     send to the engine: | ||||||
|  | 
 | ||||||
|  |     .. code:: python | ||||||
|  | 
 | ||||||
|  |        regions ={ | ||||||
|  |            'fr-BE' : <engine's region name>, | ||||||
|  |        } | ||||||
|  | 
 | ||||||
|  |        for key, egnine_region regions.items(): | ||||||
|  |           searxng_region = babel.Locale.parse(key, sep='-') | ||||||
|  |           ... | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     languages: Dict[str, str] = dataclasses.field(default_factory=dict) | ||||||
|  |     """Maps SearXNG's internal representation of a language to the one of the engine. | ||||||
|  | 
 | ||||||
|  |     SearXNG's internal representation can be parsed by babel and the value is | ||||||
|  |     send to the engine: | ||||||
|  | 
 | ||||||
|  |     .. code:: python | ||||||
|  | 
 | ||||||
|  |        languages = { | ||||||
|  |            'ca' : <engine's language name>, | ||||||
|  |        } | ||||||
|  | 
 | ||||||
|  |        for key, egnine_lang in languages.items(): | ||||||
|  |           searxng_lang = babel.Locale.parse(key) | ||||||
|  |           ... | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     all_locale: Optional[str] = None | ||||||
|  |     """To which locale value SearXNG's ``all`` language is mapped (shown a "Default | ||||||
|  |     language"). | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1' | ||||||
|  |     """Data type, default is 'traits_v1' for vintage use 'supported_languages'. | ||||||
|  | 
 | ||||||
|  |     .. hint:: | ||||||
|  | 
 | ||||||
|  |        For the transition period until the *fetch* functions of all the engines | ||||||
|  |        are converted there will be the data_type 'supported_languages', which | ||||||
|  |        maps the old logic unchanged 1:1. | ||||||
|  | 
 | ||||||
|  |        Instances of data_type 'supported_languages' do not implement methods | ||||||
|  |        like ``self.get_language(..)`` and ``self.get_region(..)`` | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     custom: Dict[str, Dict] = dataclasses.field(default_factory=dict) | ||||||
|  |     """A place to store engine's custom traits, not related to the SearXNG core | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def get_language(self, searxng_locale: str, default=None): | ||||||
|  |         """Return engine's language string that *best fits* to SearXNG's locale. | ||||||
|  | 
 | ||||||
|  |         :param searxng_locale: SearXNG's internal representation of locale | ||||||
|  |           selected by the user. | ||||||
|  | 
 | ||||||
|  |         :param default: engine's default language | ||||||
|  | 
 | ||||||
|  |         The *best fits* rules are implemented in | ||||||
|  |         :py:obj:`locales.get_engine_locale`.  Except for the special value ``all`` | ||||||
|  |         which is determined from :py:obj`EngineTraits.all_language`. | ||||||
|  |         """ | ||||||
|  |         if searxng_locale == 'all' and self.all_locale is not None: | ||||||
|  |             return self.all_locale | ||||||
|  |         return locales.get_engine_locale(searxng_locale, self.languages, default=default) | ||||||
|  | 
 | ||||||
|  |     def get_region(self, searxng_locale: str, default=None): | ||||||
|  |         """Return engine's region string that best fits to SearXNG's locale. | ||||||
|  | 
 | ||||||
|  |         :param searxng_locale: SearXNG's internal representation of locale | ||||||
|  |           selected by the user. | ||||||
|  | 
 | ||||||
|  |         :param default: engine's default region | ||||||
|  | 
 | ||||||
|  |         The *best fits* rules are implemented in | ||||||
|  |         :py:obj:`locales.get_engine_locale`.  Except for the special value ``all`` | ||||||
|  |         which is determined from :py:obj`EngineTraits.all_language`. | ||||||
|  |         """ | ||||||
|  |         if searxng_locale == 'all' and self.all_locale is not None: | ||||||
|  |             return self.all_locale | ||||||
|  |         return locales.get_engine_locale(searxng_locale, self.regions, default=default) | ||||||
|  | 
 | ||||||
|  |     def is_locale_supported(self, searxng_locale: str) -> bool: | ||||||
|  |         """A *locale* (SearXNG's internal representation) is considered to be supported | ||||||
|  |         by the engine if the *region* or the *language* is supported by the | ||||||
|  |         engine.  For verification the functions :py:func:`self.get_region` and | ||||||
|  |         :py:func:`self.get_region` are used. | ||||||
|  |         """ | ||||||
|  |         if self.data_type == 'traits_v1': | ||||||
|  |             return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale)) | ||||||
|  | 
 | ||||||
|  |         if self.data_type == 'supported_languages':  # vintage / deprecated | ||||||
|  |             # pylint: disable=import-outside-toplevel | ||||||
|  |             from searx.utils import match_language | ||||||
|  | 
 | ||||||
|  |             if searxng_locale == 'all': | ||||||
|  |                 return True | ||||||
|  |             x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None) | ||||||
|  |             return bool(x) | ||||||
|  | 
 | ||||||
|  |             # return bool(self.get_supported_language(searxng_locale)) | ||||||
|  |         raise TypeError('engine traits of type %s is unknown' % self.data_type) | ||||||
|  | 
 | ||||||
|  |     def copy(self): | ||||||
|  |         """Create a copy of the dataclass object.""" | ||||||
|  |         return EngineTraits(**dataclasses.asdict(self)) | ||||||
|  | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def fetch_traits(cls, engine: Engine) -> Union[Self, None]: | ||||||
|  |         """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch | ||||||
|  |         and set properties from the origin engine in the object ``engine_traits``.  If | ||||||
|  |         function does not exists, ``None`` is returned. | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         fetch_traits = getattr(engine, 'fetch_traits', None) | ||||||
|  |         engine_traits = None | ||||||
|  | 
 | ||||||
|  |         if fetch_traits: | ||||||
|  |             engine_traits = cls() | ||||||
|  |             fetch_traits(engine_traits) | ||||||
|  |         return engine_traits | ||||||
|  | 
 | ||||||
|  |     def set_traits(self, engine: Engine): | ||||||
|  |         """Set traits from self object in a :py:obj:`.Engine` namespace. | ||||||
|  | 
 | ||||||
|  |         :param engine: engine instance build by :py:func:`searx.engines.load_engine` | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         if self.data_type == 'traits_v1': | ||||||
|  |             self._set_traits_v1(engine) | ||||||
|  | 
 | ||||||
|  |         elif self.data_type == 'supported_languages':  # vintage / deprecated | ||||||
|  |             self._set_supported_languages(engine) | ||||||
|  | 
 | ||||||
|  |         else: | ||||||
|  |             raise TypeError('engine traits of type %s is unknown' % self.data_type) | ||||||
|  | 
 | ||||||
|  |     def _set_traits_v1(self, engine: Engine): | ||||||
|  |         # For an engine, when there is `language: ...` in the YAML settings the engine | ||||||
|  |         # does support only this one language (region):: | ||||||
|  |         # | ||||||
|  |         #   - name: google italian | ||||||
|  |         #     engine: google | ||||||
|  |         #     language: it | ||||||
|  |         #     region: it-IT | ||||||
|  | 
 | ||||||
|  |         traits = self.copy() | ||||||
|  | 
 | ||||||
|  |         _msg = "settings.yml - engine: '%s' / %s: '%s' not supported" | ||||||
|  | 
 | ||||||
|  |         languages = traits.languages | ||||||
|  |         if hasattr(engine, 'language'): | ||||||
|  |             if engine.language not in languages: | ||||||
|  |                 raise ValueError(_msg % (engine.name, 'language', engine.language)) | ||||||
|  |             traits.languages = {engine.language: languages[engine.language]} | ||||||
|  | 
 | ||||||
|  |         regions = traits.regions | ||||||
|  |         if hasattr(engine, 'region'): | ||||||
|  |             if engine.region not in regions: | ||||||
|  |                 raise ValueError(_msg % (engine.name, 'region', engine.region)) | ||||||
|  |             traits.regions = {engine.region: regions[engine.region]} | ||||||
|  | 
 | ||||||
|  |         engine.language_support = bool(traits.languages or traits.regions) | ||||||
|  | 
 | ||||||
|  |         # set the copied & modified traits in engine's namespace | ||||||
|  |         engine.traits = traits | ||||||
|  | 
 | ||||||
|  |     # ------------------------------------------------------------------------- | ||||||
|  |     # The code below is deprecated an can hopefully be deleted at one day | ||||||
|  |     # ------------------------------------------------------------------------- | ||||||
|  | 
 | ||||||
|  |     supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict) | ||||||
|  |     """depricated: does not work for engines that do support languages based on a | ||||||
|  |     region.  With this type it is not guaranteed that the key values can be | ||||||
|  |     parsed by :py:obj:`babel.Locale.parse`! | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     # language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict) | ||||||
|  |     # """depricated: does not work for engines that do support languages based on a | ||||||
|  |     # region.  With this type it is not guaranteed that the key values can be | ||||||
|  |     # parsed by :py:obj:`babel.Locale.parse`! | ||||||
|  |     # """ | ||||||
|  | 
 | ||||||
|  |     BABEL_LANGS = [ | ||||||
|  |         lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] | ||||||
|  |         for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) | ||||||
|  |     ] | ||||||
|  | 
 | ||||||
|  |     # def get_supported_language(self, searxng_locale, default=None):  # vintage / deprecated | ||||||
|  |     #     """Return engine's language string that *best fits* to SearXNG's locale.""" | ||||||
|  |     #     if searxng_locale == 'all' and self.all_locale is not None: | ||||||
|  |     #         return self.all_locale | ||||||
|  |     #     return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default) | ||||||
|  | 
 | ||||||
|  |     @classmethod  # vintage / deprecated | ||||||
|  |     def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]: | ||||||
|  |         """DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's | ||||||
|  |         namespace to fetch languages from the origin engine.  If function does | ||||||
|  |         not exists, ``None`` is returned. | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         # pylint: disable=import-outside-toplevel | ||||||
|  |         from searx import network | ||||||
|  |         from searx.utils import gen_useragent | ||||||
|  | 
 | ||||||
|  |         fetch_languages = getattr(engine, '_fetch_supported_languages', None) | ||||||
|  |         if fetch_languages is None: | ||||||
|  |             return None | ||||||
|  | 
 | ||||||
|  |         # The headers has been moved here from commit 9b6ffed06: Some engines (at | ||||||
|  |         # least bing and startpage) return a different result list of supported | ||||||
|  |         # languages depending on the IP location where the HTTP request comes from. | ||||||
|  |         # The IP based results (from bing) can be avoided by setting a | ||||||
|  |         # 'Accept-Language' in the HTTP request. | ||||||
|  | 
 | ||||||
|  |         headers = { | ||||||
|  |             'User-Agent': gen_useragent(), | ||||||
|  |             'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language | ||||||
|  |         } | ||||||
|  |         resp = network.get(engine.supported_languages_url, headers=headers) | ||||||
|  |         supported_languages = fetch_languages(resp) | ||||||
|  |         if isinstance(supported_languages, list): | ||||||
|  |             supported_languages.sort() | ||||||
|  | 
 | ||||||
|  |         engine_traits = cls() | ||||||
|  |         engine_traits.data_type = 'supported_languages' | ||||||
|  |         engine_traits.supported_languages = supported_languages | ||||||
|  |         return engine_traits | ||||||
|  | 
 | ||||||
|  |     def _set_supported_languages(self, engine: Engine):  # vintage / deprecated | ||||||
|  |         traits = self.copy() | ||||||
|  | 
 | ||||||
|  |         # pylint: disable=import-outside-toplevel | ||||||
|  |         from searx.utils import match_language | ||||||
|  | 
 | ||||||
|  |         _msg = "settings.yml - engine: '%s' / %s: '%s' not supported" | ||||||
|  | 
 | ||||||
|  |         if hasattr(engine, 'language'): | ||||||
|  |             if engine.language not in self.supported_languages: | ||||||
|  |                 raise ValueError(_msg % (engine.name, 'language', engine.language)) | ||||||
|  | 
 | ||||||
|  |             if isinstance(self.supported_languages, dict): | ||||||
|  |                 traits.supported_languages = {engine.language: self.supported_languages[engine.language]} | ||||||
|  |             else: | ||||||
|  |                 traits.supported_languages = [engine.language] | ||||||
|  | 
 | ||||||
|  |         engine.language_support = bool(traits.supported_languages) | ||||||
|  |         engine.supported_languages = traits.supported_languages | ||||||
|  | 
 | ||||||
|  |         # find custom aliases for non standard language codes | ||||||
|  |         traits.language_aliases = {}  # pylint: disable=attribute-defined-outside-init | ||||||
|  | 
 | ||||||
|  |         for engine_lang in getattr(engine, 'language_aliases', {}): | ||||||
|  |             iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None) | ||||||
|  |             if ( | ||||||
|  |                 iso_lang | ||||||
|  |                 and iso_lang != engine_lang | ||||||
|  |                 and not engine_lang.startswith(iso_lang) | ||||||
|  |                 and iso_lang not in self.supported_languages | ||||||
|  |             ): | ||||||
|  |                 traits.language_aliases[iso_lang] = engine_lang | ||||||
|  | 
 | ||||||
|  |         engine.language_aliases = traits.language_aliases | ||||||
|  | 
 | ||||||
|  |         # set the copied & modified traits in engine's namespace | ||||||
|  |         engine.traits = traits | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class EngineTraitsMap(Dict[str, EngineTraits]): | ||||||
|  |     """A python dictionary to map :class:`EngineTraits` by engine name.""" | ||||||
|  | 
 | ||||||
|  |     ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve() | ||||||
|  |     """File with persistence of the :py:obj:`EngineTraitsMap`.""" | ||||||
|  | 
 | ||||||
|  |     def save_data(self): | ||||||
|  |         """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`""" | ||||||
|  |         with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f: | ||||||
|  |             json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder) | ||||||
|  | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def from_data(cls) -> Self: | ||||||
|  |         """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`""" | ||||||
|  |         obj = cls() | ||||||
|  |         for k, v in ENGINE_TRAITS.items(): | ||||||
|  |             obj[k] = EngineTraits(**v) | ||||||
|  |         return obj | ||||||
|  | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def fetch_traits(cls, log: Callable) -> Self: | ||||||
|  |         from searx import engines  # pylint: disable=cyclic-import, import-outside-toplevel | ||||||
|  | 
 | ||||||
|  |         names = list(engines.engines) | ||||||
|  |         names.sort() | ||||||
|  |         obj = cls() | ||||||
|  | 
 | ||||||
|  |         for engine_name in names: | ||||||
|  |             engine = engines.engines[engine_name] | ||||||
|  | 
 | ||||||
|  |             traits = EngineTraits.fetch_traits(engine) | ||||||
|  |             if traits is not None: | ||||||
|  |                 log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages))) | ||||||
|  |                 log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions))) | ||||||
|  |                 obj[engine_name] = traits | ||||||
|  | 
 | ||||||
|  |             # vintage / deprecated | ||||||
|  |             _traits = EngineTraits.fetch_supported_languages(engine) | ||||||
|  |             if _traits is not None: | ||||||
|  |                 log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages))) | ||||||
|  |                 if traits is not None: | ||||||
|  |                     traits.supported_languages = _traits.supported_languages | ||||||
|  |                     obj[engine_name] = traits | ||||||
|  |                 else: | ||||||
|  |                     obj[engine_name] = _traits | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |         return obj | ||||||
|  | 
 | ||||||
|  |     def set_traits(self, engine: Engine): | ||||||
|  |         """Set traits in a :py:obj:`Engine` namespace. | ||||||
|  | 
 | ||||||
|  |         :param engine: engine instance build by :py:func:`searx.engines.load_engine` | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         engine_traits = EngineTraits(data_type='traits_v1') | ||||||
|  |         if engine.name in self.keys(): | ||||||
|  |             engine_traits = self[engine.name] | ||||||
|  | 
 | ||||||
|  |         elif engine.engine in self.keys(): | ||||||
|  |             # The key of the dictionary traits_map is the *engine name* | ||||||
|  |             # configured in settings.xml.  When multiple engines are configured | ||||||
|  |             # in settings.yml to use the same origin engine (python module) | ||||||
|  |             # these additional engines can use the languages from the origin | ||||||
|  |             # engine.  For this use the configured ``engine: ...`` from | ||||||
|  |             # settings.yml | ||||||
|  |             engine_traits = self[engine.engine] | ||||||
|  | 
 | ||||||
|  |         engine_traits.set_traits(engine) | ||||||
| @ -11,24 +11,22 @@ usage:: | |||||||
| 
 | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | from __future__ import annotations | ||||||
|  | 
 | ||||||
| import sys | import sys | ||||||
| import copy | import copy | ||||||
| from typing import Dict, List, Optional |  | ||||||
| 
 |  | ||||||
| from os.path import realpath, dirname | from os.path import realpath, dirname | ||||||
| from babel.localedata import locale_identifiers |  | ||||||
| from searx import logger, settings |  | ||||||
| from searx.data import ENGINES_LANGUAGES |  | ||||||
| from searx.network import get |  | ||||||
| from searx.utils import load_module, match_language, gen_useragent |  | ||||||
| 
 | 
 | ||||||
|  | from typing import TYPE_CHECKING, Dict, Optional | ||||||
|  | 
 | ||||||
|  | from searx import logger, settings | ||||||
|  | from searx.utils import load_module | ||||||
|  | 
 | ||||||
|  | if TYPE_CHECKING: | ||||||
|  |     from searx.enginelib import Engine | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('engines') | logger = logger.getChild('engines') | ||||||
| ENGINE_DIR = dirname(realpath(__file__)) | ENGINE_DIR = dirname(realpath(__file__)) | ||||||
| BABEL_LANGS = [ |  | ||||||
|     lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] |  | ||||||
|     for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) |  | ||||||
| ] |  | ||||||
| ENGINE_DEFAULT_ARGS = { | ENGINE_DEFAULT_ARGS = { | ||||||
|     "engine_type": "online", |     "engine_type": "online", | ||||||
|     "inactive": False, |     "inactive": False, | ||||||
| @ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = { | |||||||
|     "timeout": settings["outgoing"]["request_timeout"], |     "timeout": settings["outgoing"]["request_timeout"], | ||||||
|     "shortcut": "-", |     "shortcut": "-", | ||||||
|     "categories": ["general"], |     "categories": ["general"], | ||||||
|     "supported_languages": [], |  | ||||||
|     "language_aliases": {}, |  | ||||||
|     "paging": False, |     "paging": False, | ||||||
|     "safesearch": False, |     "safesearch": False, | ||||||
|     "time_range_support": False, |     "time_range_support": False, | ||||||
| @ -47,29 +43,13 @@ ENGINE_DEFAULT_ARGS = { | |||||||
|     "send_accept_language_header": False, |     "send_accept_language_header": False, | ||||||
|     "tokens": [], |     "tokens": [], | ||||||
|     "about": {}, |     "about": {}, | ||||||
|  |     "supported_languages": [],  # deprecated use traits | ||||||
|  |     "language_aliases": {},  # deprecated not needed when using traits | ||||||
| } | } | ||||||
| # set automatically when an engine does not have any tab category | # set automatically when an engine does not have any tab category | ||||||
| OTHER_CATEGORY = 'other' | OTHER_CATEGORY = 'other' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Engine:  # pylint: disable=too-few-public-methods |  | ||||||
|     """This class is currently never initialized and only used for type hinting.""" |  | ||||||
| 
 |  | ||||||
|     name: str |  | ||||||
|     engine: str |  | ||||||
|     shortcut: str |  | ||||||
|     categories: List[str] |  | ||||||
|     supported_languages: List[str] |  | ||||||
|     about: dict |  | ||||||
|     inactive: bool |  | ||||||
|     disabled: bool |  | ||||||
|     language_support: bool |  | ||||||
|     paging: bool |  | ||||||
|     safesearch: bool |  | ||||||
|     time_range_support: bool |  | ||||||
|     timeout: float |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Defaults for the namespace of an engine module, see :py:func:`load_engine` | # Defaults for the namespace of an engine module, see :py:func:`load_engine` | ||||||
| 
 | 
 | ||||||
| categories = {'general': []} | categories = {'general': []} | ||||||
| @ -136,9 +116,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]: | |||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|     update_engine_attributes(engine, engine_data) |     update_engine_attributes(engine, engine_data) | ||||||
|     set_language_attributes(engine) |  | ||||||
|     update_attributes_for_tor(engine) |     update_attributes_for_tor(engine) | ||||||
| 
 | 
 | ||||||
|  |     # avoid cyclic imports | ||||||
|  |     # pylint: disable=import-outside-toplevel | ||||||
|  |     from searx.enginelib.traits import EngineTraitsMap | ||||||
|  | 
 | ||||||
|  |     trait_map = EngineTraitsMap.from_data() | ||||||
|  |     trait_map.set_traits(engine) | ||||||
|  | 
 | ||||||
|     if not is_engine_active(engine): |     if not is_engine_active(engine): | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
| @ -190,60 +176,6 @@ def update_engine_attributes(engine: Engine, engine_data): | |||||||
|             setattr(engine, arg_name, copy.deepcopy(arg_value)) |             setattr(engine, arg_name, copy.deepcopy(arg_value)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def set_language_attributes(engine: Engine): |  | ||||||
|     # assign supported languages from json file |  | ||||||
|     if engine.name in ENGINES_LANGUAGES: |  | ||||||
|         engine.supported_languages = ENGINES_LANGUAGES[engine.name] |  | ||||||
| 
 |  | ||||||
|     elif engine.engine in ENGINES_LANGUAGES: |  | ||||||
|         # The key of the dictionary ENGINES_LANGUAGES is the *engine name* |  | ||||||
|         # configured in settings.xml.  When multiple engines are configured in |  | ||||||
|         # settings.yml to use the same origin engine (python module) these |  | ||||||
|         # additional engines can use the languages from the origin engine. |  | ||||||
|         # For this use the configured ``engine: ...`` from settings.yml |  | ||||||
|         engine.supported_languages = ENGINES_LANGUAGES[engine.engine] |  | ||||||
| 
 |  | ||||||
|     if hasattr(engine, 'language'): |  | ||||||
|         # For an engine, when there is `language: ...` in the YAML settings, the |  | ||||||
|         # engine supports only one language, in this case |  | ||||||
|         # engine.supported_languages should contains this value defined in |  | ||||||
|         # settings.yml |  | ||||||
|         if engine.language not in engine.supported_languages: |  | ||||||
|             raise ValueError( |  | ||||||
|                 "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language) |  | ||||||
|             ) |  | ||||||
| 
 |  | ||||||
|         if isinstance(engine.supported_languages, dict): |  | ||||||
|             engine.supported_languages = {engine.language: engine.supported_languages[engine.language]} |  | ||||||
|         else: |  | ||||||
|             engine.supported_languages = [engine.language] |  | ||||||
| 
 |  | ||||||
|     # find custom aliases for non standard language codes |  | ||||||
|     for engine_lang in engine.supported_languages: |  | ||||||
|         iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) |  | ||||||
|         if ( |  | ||||||
|             iso_lang |  | ||||||
|             and iso_lang != engine_lang |  | ||||||
|             and not engine_lang.startswith(iso_lang) |  | ||||||
|             and iso_lang not in engine.supported_languages |  | ||||||
|         ): |  | ||||||
|             engine.language_aliases[iso_lang] = engine_lang |  | ||||||
| 
 |  | ||||||
|     # language_support |  | ||||||
|     engine.language_support = len(engine.supported_languages) > 0 |  | ||||||
| 
 |  | ||||||
|     # assign language fetching method if auxiliary method exists |  | ||||||
|     if hasattr(engine, '_fetch_supported_languages'): |  | ||||||
|         headers = { |  | ||||||
|             'User-Agent': gen_useragent(), |  | ||||||
|             'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language |  | ||||||
|         } |  | ||||||
|         engine.fetch_supported_languages = ( |  | ||||||
|             # pylint: disable=protected-access |  | ||||||
|             lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def update_attributes_for_tor(engine: Engine) -> bool: | def update_attributes_for_tor(engine: Engine) -> bool: | ||||||
|     if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): |     if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): | ||||||
|         engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') |         engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') | ||||||
|  | |||||||
| @ -63,7 +63,7 @@ def search(query, request_params): | |||||||
|     for row in result_list: |     for row in result_list: | ||||||
|         entry = { |         entry = { | ||||||
|             'query': query, |             'query': query, | ||||||
|             'language': request_params['language'], |             'language': request_params['searxng_locale'], | ||||||
|             'value': row.get("value"), |             'value': row.get("value"), | ||||||
|             # choose a result template or comment out to use the *default* |             # choose a result template or comment out to use the *default* | ||||||
|             'template': 'key-value.html', |             'template': 'key-value.html', | ||||||
|  | |||||||
| @ -8,7 +8,7 @@ from typing import Set | |||||||
| import os | import os | ||||||
| import pathlib | import pathlib | ||||||
| 
 | 
 | ||||||
| from babel import Locale | import babel | ||||||
| from babel.support import Translations | from babel.support import Translations | ||||||
| import babel.languages | import babel.languages | ||||||
| import babel.core | import babel.core | ||||||
| @ -134,7 +134,7 @@ def locales_initialize(directory=None): | |||||||
|     flask_babel.get_translations = get_translations |     flask_babel.get_translations = get_translations | ||||||
| 
 | 
 | ||||||
|     for tag, descr in ADDITIONAL_TRANSLATIONS.items(): |     for tag, descr in ADDITIONAL_TRANSLATIONS.items(): | ||||||
|         locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') |         locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') | ||||||
|         LOCALE_NAMES[tag] = descr |         LOCALE_NAMES[tag] = descr | ||||||
|         if locale.text_direction == 'rtl': |         if locale.text_direction == 'rtl': | ||||||
|             RTL_LOCALES.add(tag) |             RTL_LOCALES.add(tag) | ||||||
| @ -142,7 +142,7 @@ def locales_initialize(directory=None): | |||||||
|     for tag in LOCALE_BEST_MATCH: |     for tag in LOCALE_BEST_MATCH: | ||||||
|         descr = LOCALE_NAMES.get(tag) |         descr = LOCALE_NAMES.get(tag) | ||||||
|         if not descr: |         if not descr: | ||||||
|             locale = Locale.parse(tag, sep='-') |             locale = babel.Locale.parse(tag, sep='-') | ||||||
|             LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) |             LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) | ||||||
|             if locale.text_direction == 'rtl': |             if locale.text_direction == 'rtl': | ||||||
|                 RTL_LOCALES.add(tag) |                 RTL_LOCALES.add(tag) | ||||||
| @ -154,12 +154,66 @@ def locales_initialize(directory=None): | |||||||
|         tag = dirname.replace('_', '-') |         tag = dirname.replace('_', '-') | ||||||
|         descr = LOCALE_NAMES.get(tag) |         descr = LOCALE_NAMES.get(tag) | ||||||
|         if not descr: |         if not descr: | ||||||
|             locale = Locale.parse(dirname) |             locale = babel.Locale.parse(dirname) | ||||||
|             LOCALE_NAMES[tag] = get_locale_descr(locale, dirname) |             LOCALE_NAMES[tag] = get_locale_descr(locale, dirname) | ||||||
|             if locale.text_direction == 'rtl': |             if locale.text_direction == 'rtl': | ||||||
|                 RTL_LOCALES.add(tag) |                 RTL_LOCALES.add(tag) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def region_tag(locale: babel.Locale) -> str: | ||||||
|  |     """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US).""" | ||||||
|  |     if not locale.territory: | ||||||
|  |         raise ValueError('%s missed a territory') | ||||||
|  |     return locale.language + '-' + locale.territory | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def language_tag(locale: babel.Locale) -> str: | ||||||
|  |     """Returns SearXNG's language tag from the locale and if exits, the tag | ||||||
|  |     includes the script name (e.g. en, zh_Hant). | ||||||
|  |     """ | ||||||
|  |     sxng_lang = locale.language | ||||||
|  |     if locale.script: | ||||||
|  |         sxng_lang += '_' + locale.script | ||||||
|  |     return sxng_lang | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_offical_locales( | ||||||
|  |     territory: str, languages=None, regional: bool = False, de_facto: bool = True | ||||||
|  | ) -> Set[babel.Locale]: | ||||||
|  |     """Returns a list of :py:obj:`babel.Locale` with languages from | ||||||
|  |     :py:obj:`babel.languages.get_official_languages`. | ||||||
|  | 
 | ||||||
|  |     :param territory: The territory (country or region) code. | ||||||
|  | 
 | ||||||
|  |     :param languages: A list of language codes the languages from | ||||||
|  |       :py:obj:`babel.languages.get_official_languages` should be in | ||||||
|  |       (intersection).  If this argument is ``None``, all official languages in | ||||||
|  |       this territory are used. | ||||||
|  | 
 | ||||||
|  |     :param regional: If the regional flag is set, then languages which are | ||||||
|  |       regionally official are also returned. | ||||||
|  | 
 | ||||||
|  |     :param de_facto: If the de_facto flag is set to `False`, then languages | ||||||
|  |       which are “de facto” official are not returned. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     ret_val = set() | ||||||
|  |     o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto) | ||||||
|  | 
 | ||||||
|  |     if languages: | ||||||
|  |         languages = [l.lower() for l in languages] | ||||||
|  |         o_languages = set(l for l in o_languages if l.lower() in languages) | ||||||
|  | 
 | ||||||
|  |     for lang in o_languages: | ||||||
|  |         try: | ||||||
|  |             locale = babel.Locale.parse(lang + '_' + territory) | ||||||
|  |             ret_val.add(locale) | ||||||
|  |         except babel.UnknownLocaleError: | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |     return ret_val | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def get_engine_locale(searxng_locale, engine_locales, default=None): | def get_engine_locale(searxng_locale, engine_locales, default=None): | ||||||
|     """Return engine's language (aka locale) string that best fits to argument |     """Return engine's language (aka locale) string that best fits to argument | ||||||
|     ``searxng_locale``. |     ``searxng_locale``. | ||||||
| @ -177,6 +231,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): | |||||||
|           ... |           ... | ||||||
|           'pl-PL'          : 'pl_PL', |           'pl-PL'          : 'pl_PL', | ||||||
|           'pt-PT'          : 'pt_PT' |           'pt-PT'          : 'pt_PT' | ||||||
|  |           .. | ||||||
|  |           'zh'             : 'zh' | ||||||
|  |           'zh_Hans'        : 'zh' | ||||||
|  |           'zh_Hant'        : 'zh-classical' | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|     .. hint:: |     .. hint:: | ||||||
| @ -210,13 +268,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): | |||||||
|       engine. |       engine. | ||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
|     # pylint: disable=too-many-branches |     # pylint: disable=too-many-branches, too-many-return-statements | ||||||
| 
 | 
 | ||||||
|     engine_locale = engine_locales.get(searxng_locale) |     engine_locale = engine_locales.get(searxng_locale) | ||||||
| 
 | 
 | ||||||
|     if engine_locale is not None: |     if engine_locale is not None: | ||||||
|         # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no |         # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language | ||||||
|         # need to narrow language nor territory. |         # "zh --> zh"), no need to narrow language-script nor territory. | ||||||
|         return engine_locale |         return engine_locale | ||||||
| 
 | 
 | ||||||
|     try: |     try: | ||||||
| @ -227,6 +285,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): | |||||||
|         except babel.core.UnknownLocaleError: |         except babel.core.UnknownLocaleError: | ||||||
|             return default |             return default | ||||||
| 
 | 
 | ||||||
|  |     searxng_lang = language_tag(locale) | ||||||
|  |     engine_locale = engine_locales.get(searxng_lang) | ||||||
|  |     if engine_locale is not None: | ||||||
|  |         # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans") | ||||||
|  |         return engine_locale | ||||||
|  | 
 | ||||||
|     # SearXNG's selected locale is not supported by the engine .. |     # SearXNG's selected locale is not supported by the engine .. | ||||||
| 
 | 
 | ||||||
|     if locale.territory: |     if locale.territory: | ||||||
| @ -247,10 +311,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): | |||||||
| 
 | 
 | ||||||
|     if locale.language: |     if locale.language: | ||||||
| 
 | 
 | ||||||
|         searxng_lang = locale.language |  | ||||||
|         if locale.script: |  | ||||||
|             searxng_lang += '_' + locale.script |  | ||||||
| 
 |  | ||||||
|         terr_lang_dict = {} |         terr_lang_dict = {} | ||||||
|         for territory, langs in babel.core.get_global("territory_languages").items(): |         for territory, langs in babel.core.get_global("territory_languages").items(): | ||||||
|             if not langs.get(searxng_lang, {}).get('official_status'): |             if not langs.get(searxng_lang, {}).get('official_status'): | ||||||
|  | |||||||
| @ -13,7 +13,7 @@ from typing import Iterable, Dict, List | |||||||
| import flask | import flask | ||||||
| 
 | 
 | ||||||
| from searx import settings, autocomplete | from searx import settings, autocomplete | ||||||
| from searx.engines import Engine | from searx.enginelib import Engine | ||||||
| from searx.plugins import Plugin | from searx.plugins import Plugin | ||||||
| from searx.locales import LOCALE_NAMES | from searx.locales import LOCALE_NAMES | ||||||
| from searx.webutils import VALID_LANGUAGE_CODE | from searx.webutils import VALID_LANGUAGE_CODE | ||||||
|  | |||||||
| @ -30,7 +30,10 @@ from .abstract import EngineProcessor | |||||||
| 
 | 
 | ||||||
| logger = logger.getChild('search.processors') | logger = logger.getChild('search.processors') | ||||||
| PROCESSORS: Dict[str, EngineProcessor] = {} | PROCESSORS: Dict[str, EngineProcessor] = {} | ||||||
| """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" | """Cache request processores, stored by *engine-name* (:py:func:`initialize`) | ||||||
|  | 
 | ||||||
|  | :meta hide-value: | ||||||
|  | """ | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def get_processor_class(engine_type): | def get_processor_class(engine_type): | ||||||
|  | |||||||
| @ -138,7 +138,8 @@ class EngineProcessor(ABC): | |||||||
|         return False |         return False | ||||||
| 
 | 
 | ||||||
|     def get_params(self, search_query, engine_category): |     def get_params(self, search_query, engine_category): | ||||||
|         """Returns a set of *request params* or ``None`` if request is not supported. |         """Returns a set of (see :ref:`request params <engine request arguments>`) or | ||||||
|  |         ``None`` if request is not supported. | ||||||
| 
 | 
 | ||||||
|         Not supported conditions (``None`` is returned): |         Not supported conditions (``None`` is returned): | ||||||
| 
 | 
 | ||||||
| @ -159,11 +160,20 @@ class EngineProcessor(ABC): | |||||||
|         params['safesearch'] = search_query.safesearch |         params['safesearch'] = search_query.safesearch | ||||||
|         params['time_range'] = search_query.time_range |         params['time_range'] = search_query.time_range | ||||||
|         params['engine_data'] = search_query.engine_data.get(self.engine_name, {}) |         params['engine_data'] = search_query.engine_data.get(self.engine_name, {}) | ||||||
|  |         params['searxng_locale'] = search_query.lang | ||||||
|  | 
 | ||||||
|  |         # deprecated / vintage --> use params['searxng_locale'] | ||||||
|  |         # | ||||||
|  |         # Conditions related to engine's traits are implemented in engine.traits | ||||||
|  |         # module. Don't do 'locale' decissions here in the abstract layer of the | ||||||
|  |         # search processor, just pass the value from user's choice unchanged to | ||||||
|  |         # the engine request. | ||||||
| 
 | 
 | ||||||
|         if hasattr(self.engine, 'language') and self.engine.language: |         if hasattr(self.engine, 'language') and self.engine.language: | ||||||
|             params['language'] = self.engine.language |             params['language'] = self.engine.language | ||||||
|         else: |         else: | ||||||
|             params['language'] = search_query.lang |             params['language'] = search_query.lang | ||||||
|  | 
 | ||||||
|         return params |         return params | ||||||
| 
 | 
 | ||||||
|     @abstractmethod |     @abstractmethod | ||||||
|  | |||||||
| @ -51,6 +51,9 @@ class OnlineProcessor(EngineProcessor): | |||||||
|         super().initialize() |         super().initialize() | ||||||
| 
 | 
 | ||||||
|     def get_params(self, search_query, engine_category): |     def get_params(self, search_query, engine_category): | ||||||
|  |         """Returns a set of :ref:`request params <engine request online>` or ``None`` | ||||||
|  |         if request is not supported. | ||||||
|  |         """ | ||||||
|         params = super().get_params(search_query, engine_category) |         params = super().get_params(search_query, engine_category) | ||||||
|         if params is None: |         if params is None: | ||||||
|             return None |             return None | ||||||
|  | |||||||
| @ -38,8 +38,8 @@ class OnlineCurrencyProcessor(OnlineProcessor): | |||||||
|     engine_type = 'online_currency' |     engine_type = 'online_currency' | ||||||
| 
 | 
 | ||||||
|     def get_params(self, search_query, engine_category): |     def get_params(self, search_query, engine_category): | ||||||
|         """Returns a set of *request params* or ``None`` if search query does not match |         """Returns a set of :ref:`request params <engine request online_currency>` | ||||||
|         to :py:obj:`parser_re`.""" |         or ``None`` if search query does not match to :py:obj:`parser_re`.""" | ||||||
| 
 | 
 | ||||||
|         params = super().get_params(search_query, engine_category) |         params = super().get_params(search_query, engine_category) | ||||||
|         if params is None: |         if params is None: | ||||||
|  | |||||||
| @ -18,8 +18,9 @@ class OnlineDictionaryProcessor(OnlineProcessor): | |||||||
|     engine_type = 'online_dictionary' |     engine_type = 'online_dictionary' | ||||||
| 
 | 
 | ||||||
|     def get_params(self, search_query, engine_category): |     def get_params(self, search_query, engine_category): | ||||||
|         """Returns a set of *request params* or ``None`` if search query does not match |         """Returns a set of :ref:`request params <engine request online_dictionary>` or | ||||||
|         to :py:obj:`parser_re`.""" |         ``None`` if search query does not match to :py:obj:`parser_re`. | ||||||
|  |         """ | ||||||
|         params = super().get_params(search_query, engine_category) |         params = super().get_params(search_query, engine_category) | ||||||
|         if params is None: |         if params is None: | ||||||
|             return None |             return None | ||||||
|  | |||||||
| @ -20,9 +20,10 @@ class OnlineUrlSearchProcessor(OnlineProcessor): | |||||||
|     engine_type = 'online_url_search' |     engine_type = 'online_url_search' | ||||||
| 
 | 
 | ||||||
|     def get_params(self, search_query, engine_category): |     def get_params(self, search_query, engine_category): | ||||||
|         """Returns a set of *request params* or ``None`` if search query does not match |         """Returns a set of :ref:`request params <engine request online>` or ``None`` if | ||||||
|         to at least one of :py:obj:`re_search_urls`. |         search query does not match to :py:obj:`re_search_urls`. | ||||||
|         """ |         """ | ||||||
|  | 
 | ||||||
|         params = super().get_params(search_query, engine_category) |         params = super().get_params(search_query, engine_category) | ||||||
|         if params is None: |         if params is None: | ||||||
|             return None |             return None | ||||||
|  | |||||||
| @ -907,16 +907,11 @@ def autocompleter(): | |||||||
|     # and there is a query part |     # and there is a query part | ||||||
|     if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: |     if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: | ||||||
| 
 | 
 | ||||||
|         # get language from cookie |         # get SearXNG's locale and autocomplete backend from cookie | ||||||
|         language = request.preferences.get_value('language') |         sxng_locale = request.preferences.get_value('language') | ||||||
|         if not language or language == 'all': |         backend_name = request.preferences.get_value('autocomplete') | ||||||
|             language = 'en' |  | ||||||
|         else: |  | ||||||
|             language = language.split('-')[0] |  | ||||||
| 
 | 
 | ||||||
|         # run autocompletion |         for result in search_autocomplete(backend_name, sug_prefix, sxng_locale): | ||||||
|         raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language) |  | ||||||
|         for result in raw_results: |  | ||||||
|             # attention: this loop will change raw_text_query object and this is |             # attention: this loop will change raw_text_query object and this is | ||||||
|             # the reason why the sug_prefix was stored before (see above) |             # the reason why the sug_prefix was stored before (see above) | ||||||
|             if result != sug_prefix: |             if result != sug_prefix: | ||||||
| @ -1001,7 +996,9 @@ def preferences(): | |||||||
|             'rate80': rate80, |             'rate80': rate80, | ||||||
|             'rate95': rate95, |             'rate95': rate95, | ||||||
|             'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], |             'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], | ||||||
|             'supports_selected_language': _is_selected_language_supported(e, request.preferences), |             'supports_selected_language': e.traits.is_locale_supported( | ||||||
|  |                 str(request.preferences.get_value('language') or 'all') | ||||||
|  |             ), | ||||||
|             'result_count': result_count, |             'result_count': result_count, | ||||||
|         } |         } | ||||||
|     # end of stats |     # end of stats | ||||||
| @ -1052,7 +1049,9 @@ def preferences(): | |||||||
|     # supports |     # supports | ||||||
|     supports = {} |     supports = {} | ||||||
|     for _, e in filtered_engines.items(): |     for _, e in filtered_engines.items(): | ||||||
|         supports_selected_language = _is_selected_language_supported(e, request.preferences) |         supports_selected_language = e.traits.is_locale_supported( | ||||||
|  |             str(request.preferences.get_value('language') or 'all') | ||||||
|  |         ) | ||||||
|         safesearch = e.safesearch |         safesearch = e.safesearch | ||||||
|         time_range_support = e.time_range_support |         time_range_support = e.time_range_support | ||||||
|         for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): |         for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): | ||||||
| @ -1099,16 +1098,6 @@ def preferences(): | |||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _is_selected_language_supported(engine, preferences: Preferences):  # pylint: disable=redefined-outer-name |  | ||||||
|     language = preferences.get_value('language') |  | ||||||
|     if language == 'all': |  | ||||||
|         return True |  | ||||||
|     x = match_language( |  | ||||||
|         language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None |  | ||||||
|     ) |  | ||||||
|     return bool(x) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| @app.route('/image_proxy', methods=['GET']) | @app.route('/image_proxy', methods=['GET']) | ||||||
| def image_proxy(): | def image_proxy(): | ||||||
|     # pylint: disable=too-many-return-statements, too-many-branches |     # pylint: disable=too-many-return-statements, too-many-branches | ||||||
| @ -1327,9 +1316,11 @@ def config(): | |||||||
|         if not request.preferences.validate_token(engine): |         if not request.preferences.validate_token(engine): | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|         supported_languages = engine.supported_languages |         _languages = engine.traits.languages.keys() | ||||||
|         if isinstance(engine.supported_languages, dict): |         if engine.traits.data_type == 'supported_languages':  # vintage / deprecated | ||||||
|             supported_languages = list(engine.supported_languages.keys()) |             _languages = engine.traits.supported_languages | ||||||
|  |             if isinstance(_languages, dict): | ||||||
|  |                 _languages = _languages.keys() | ||||||
| 
 | 
 | ||||||
|         _engines.append( |         _engines.append( | ||||||
|             { |             { | ||||||
| @ -1339,7 +1330,8 @@ def config(): | |||||||
|                 'enabled': not engine.disabled, |                 'enabled': not engine.disabled, | ||||||
|                 'paging': engine.paging, |                 'paging': engine.paging, | ||||||
|                 'language_support': engine.language_support, |                 'language_support': engine.language_support, | ||||||
|                 'supported_languages': supported_languages, |                 'languages': list(_languages), | ||||||
|  |                 'regions': list(engine.traits.regions.keys()), | ||||||
|                 'safesearch': engine.safesearch, |                 'safesearch': engine.safesearch, | ||||||
|                 'time_range_support': engine.time_range_support, |                 'time_range_support': engine.time_range_support, | ||||||
|                 'timeout': engine.timeout, |                 'timeout': engine.timeout, | ||||||
|  | |||||||
| @ -1,4 +1,6 @@ | |||||||
| # -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||||
|  | from __future__ import annotations | ||||||
|  | 
 | ||||||
| import os | import os | ||||||
| import pathlib | import pathlib | ||||||
| import csv | import csv | ||||||
| @ -8,7 +10,7 @@ import re | |||||||
| import inspect | import inspect | ||||||
| import itertools | import itertools | ||||||
| from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||||
| from typing import Iterable, List, Tuple, Dict | from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING | ||||||
| 
 | 
 | ||||||
| from io import StringIO | from io import StringIO | ||||||
| from codecs import getincrementalencoder | from codecs import getincrementalencoder | ||||||
| @ -16,7 +18,10 @@ from codecs import getincrementalencoder | |||||||
| from flask_babel import gettext, format_date | from flask_babel import gettext, format_date | ||||||
| 
 | 
 | ||||||
| from searx import logger, settings | from searx import logger, settings | ||||||
| from searx.engines import Engine, OTHER_CATEGORY | from searx.engines import OTHER_CATEGORY | ||||||
|  | 
 | ||||||
|  | if TYPE_CHECKING: | ||||||
|  |     from searx.enginelib import Engine | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') | VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') | ||||||
|  | |||||||
| @ -1,19 +1,21 @@ | |||||||
| #!/usr/bin/env python | #!/usr/bin/env python | ||||||
| # lint: pylint | # lint: pylint | ||||||
| 
 |  | ||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| """This script generates languages.py from intersecting each engine's supported | """Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py` | ||||||
| languages. |  | ||||||
| 
 | 
 | ||||||
| Output files: :origin:`searx/data/engines_languages.json` and | :py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`: | ||||||
| :origin:`searx/languages.py` (:origin:`CI Update data ... |   Persistence of engines traits, fetched from the engines. | ||||||
| <.github/workflows/data-update.yml>`). | 
 | ||||||
|  | :origin:`searx/languages.py` | ||||||
|  |   Is generated  from intersecting each engine's supported traits. | ||||||
|  | 
 | ||||||
|  | The script :origin:`searxng_extra/update/update_engine_traits.py` is called in | ||||||
|  | the :origin:`CI Update data ... <.github/workflows/data-update.yml>` | ||||||
| 
 | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| # pylint: disable=invalid-name | # pylint: disable=invalid-name | ||||||
| from unicodedata import lookup | from unicodedata import lookup | ||||||
| import json |  | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from pprint import pformat | from pprint import pformat | ||||||
| from babel import Locale, UnknownLocaleError | from babel import Locale, UnknownLocaleError | ||||||
| @ -21,36 +23,26 @@ from babel.languages import get_global | |||||||
| from babel.core import parse_locale | from babel.core import parse_locale | ||||||
| 
 | 
 | ||||||
| from searx import settings, searx_dir | from searx import settings, searx_dir | ||||||
|  | from searx import network | ||||||
| from searx.engines import load_engines, engines | from searx.engines import load_engines, engines | ||||||
| from searx.network import set_timeout_for_thread | from searx.enginelib.traits import EngineTraitsMap | ||||||
| 
 | 
 | ||||||
| # Output files. | # Output files. | ||||||
| engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json' |  | ||||||
| languages_file = Path(searx_dir) / 'languages.py' | languages_file = Path(searx_dir) / 'languages.py' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Fetches supported languages for each engine and writes json file with those. | def fetch_traits_map(): | ||||||
| def fetch_supported_languages(): |     """Fetchs supported languages for each engine and writes json file with those.""" | ||||||
|     set_timeout_for_thread(10.0) |     network.set_timeout_for_thread(10.0) | ||||||
| 
 | 
 | ||||||
|     engines_languages = {} |     def log(msg): | ||||||
|     names = list(engines) |         print(msg) | ||||||
|     names.sort() |  | ||||||
| 
 | 
 | ||||||
|     for engine_name in names: |     traits_map = EngineTraitsMap.fetch_traits(log=log) | ||||||
|         if hasattr(engines[engine_name], 'fetch_supported_languages'): |     print("fetched properties from %s engines" % len(traits_map)) | ||||||
|             engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() |     print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE) | ||||||
|             print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) |     traits_map.save_data() | ||||||
|             if type(engines_languages[engine_name]) == list:  # pylint: disable=unidiomatic-typecheck |     return traits_map | ||||||
|                 engines_languages[engine_name] = sorted(engines_languages[engine_name]) |  | ||||||
| 
 |  | ||||||
|     print("fetched languages from %s engines" % len(engines_languages)) |  | ||||||
| 
 |  | ||||||
|     # write json file |  | ||||||
|     with open(engines_languages_file, 'w', encoding='utf-8') as f: |  | ||||||
|         json.dump(engines_languages, f, indent=2, sort_keys=True) |  | ||||||
| 
 |  | ||||||
|     return engines_languages |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Get babel Locale object from lang_code if possible. | # Get babel Locale object from lang_code if possible. | ||||||
| @ -124,17 +116,43 @@ def get_territory_name(lang_code): | |||||||
|     return country_name |     return country_name | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Join all language lists. | def join_language_lists(traits_map: EngineTraitsMap): | ||||||
| def join_language_lists(engines_languages): |     """Join all languages of the engines into one list.  The returned language list | ||||||
|     language_list = {} |     contains language codes (``zh``) and region codes (``zh-TW``).  The codes can | ||||||
|     for engine_name in engines_languages: |     be parsed by babel:: | ||||||
|         for lang_code in engines_languages[engine_name]: |  | ||||||
| 
 | 
 | ||||||
|             # apply custom fixes if necessary |       babel.Locale.parse(language_list[n]) | ||||||
|             if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values(): |     """ | ||||||
|                 lang_code = next( |     # pylint: disable=too-many-branches | ||||||
|                     lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias |     language_list = {} | ||||||
|                 ) | 
 | ||||||
|  |     for eng_name, eng_traits in traits_map.items(): | ||||||
|  |         eng = engines[eng_name] | ||||||
|  |         eng_codes = set() | ||||||
|  | 
 | ||||||
|  |         if eng_traits.data_type == 'traits_v1': | ||||||
|  |             # items of type 'engine_traits' do have regions & languages, the | ||||||
|  |             # list of eng_codes should contain both. | ||||||
|  |             eng_codes.update(eng_traits.regions.keys()) | ||||||
|  |             eng_codes.update(eng_traits.languages.keys()) | ||||||
|  | 
 | ||||||
|  |         elif eng_traits.data_type == 'supported_languages': | ||||||
|  |             # vintage / deprecated | ||||||
|  |             _codes = set() | ||||||
|  |             if isinstance(eng_traits.supported_languages, dict): | ||||||
|  |                 _codes.update(eng_traits.supported_languages.keys()) | ||||||
|  |             elif isinstance(eng_traits.supported_languages, list): | ||||||
|  |                 _codes.update(eng_traits.supported_languages) | ||||||
|  |             else: | ||||||
|  |                 raise TypeError('engine.supported_languages type %s is unknown' % type(eng_traits.supported_languages)) | ||||||
|  | 
 | ||||||
|  |             for lang_code in _codes: | ||||||
|  |                 # apply custom fixes if necessary | ||||||
|  |                 if lang_code in getattr(eng, 'language_aliases', {}).values(): | ||||||
|  |                     lang_code = next(lc for lc, alias in eng.language_aliases.items() if lang_code == alias) | ||||||
|  |                 eng_codes.add(lang_code) | ||||||
|  | 
 | ||||||
|  |         for lang_code in eng_codes: | ||||||
| 
 | 
 | ||||||
|             locale = get_locale(lang_code) |             locale = get_locale(lang_code) | ||||||
| 
 | 
 | ||||||
| @ -149,10 +167,10 @@ def join_language_lists(engines_languages): | |||||||
|                     # get language's data from babel's Locale object |                     # get language's data from babel's Locale object | ||||||
|                     language_name = locale.get_language_name().title() |                     language_name = locale.get_language_name().title() | ||||||
|                     english_name = locale.english_name.split(' (')[0] |                     english_name = locale.english_name.split(' (')[0] | ||||||
|                 elif short_code in engines_languages['wikipedia']: |                 elif short_code in traits_map['wikipedia'].supported_languages: | ||||||
|                     # get language's data from wikipedia if not known by babel |                     # get language's data from wikipedia if not known by babel | ||||||
|                     language_name = engines_languages['wikipedia'][short_code]['name'] |                     language_name = traits_map['wikipedia'].supported_languages[short_code]['name'] | ||||||
|                     english_name = engines_languages['wikipedia'][short_code]['english_name'] |                     english_name = traits_map['wikipedia'].supported_languages[short_code]['english_name'] | ||||||
|                 else: |                 else: | ||||||
|                     language_name = None |                     language_name = None | ||||||
|                     english_name = None |                     english_name = None | ||||||
| @ -182,15 +200,15 @@ def join_language_lists(engines_languages): | |||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|             # count engine for both language_country combination and language alone |             # count engine for both language_country combination and language alone | ||||||
|             language_list[short_code]['counter'].add(engine_name) |             language_list[short_code]['counter'].add(eng_name) | ||||||
|             if lang_code != short_code: |             if lang_code != short_code: | ||||||
|                 language_list[short_code]['countries'][lang_code]['counter'].add(engine_name) |                 language_list[short_code]['countries'][lang_code]['counter'].add(eng_name) | ||||||
| 
 | 
 | ||||||
|     return language_list |     return language_list | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Filter language list so it only includes the most supported languages and countries | # Filter language list so it only includes the most supported languages and countries | ||||||
| def filter_language_list(all_languages): | def filter_language_list(joined_languages_map): | ||||||
|     min_engines_per_lang = 12 |     min_engines_per_lang = 12 | ||||||
|     min_engines_per_country = 7 |     min_engines_per_country = 7 | ||||||
|     # pylint: disable=consider-using-dict-items, consider-iterating-dictionary |     # pylint: disable=consider-using-dict-items, consider-iterating-dictionary | ||||||
| @ -198,6 +216,7 @@ def filter_language_list(all_languages): | |||||||
|         engine_name |         engine_name | ||||||
|         for engine_name in engines.keys() |         for engine_name in engines.keys() | ||||||
|         if 'general' in engines[engine_name].categories |         if 'general' in engines[engine_name].categories | ||||||
|  |         and hasattr(engines[engine_name], 'supported_languages') | ||||||
|         and engines[engine_name].supported_languages |         and engines[engine_name].supported_languages | ||||||
|         and not engines[engine_name].disabled |         and not engines[engine_name].disabled | ||||||
|     ] |     ] | ||||||
| @ -205,7 +224,7 @@ def filter_language_list(all_languages): | |||||||
|     # filter list to include only languages supported by most engines or all default general engines |     # filter list to include only languages supported by most engines or all default general engines | ||||||
|     filtered_languages = { |     filtered_languages = { | ||||||
|         code: lang |         code: lang | ||||||
|         for code, lang in all_languages.items() |         for code, lang in joined_languages_map.items() | ||||||
|         if ( |         if ( | ||||||
|             len(lang['counter']) >= min_engines_per_lang |             len(lang['counter']) >= min_engines_per_lang | ||||||
|             or all(main_engine in lang['counter'] for main_engine in main_engines) |             or all(main_engine in lang['counter'] for main_engine in main_engines) | ||||||
| @ -214,8 +233,8 @@ def filter_language_list(all_languages): | |||||||
| 
 | 
 | ||||||
|     def _copy_lang_data(lang, country_name=None): |     def _copy_lang_data(lang, country_name=None): | ||||||
|         new_dict = {} |         new_dict = {} | ||||||
|         new_dict['name'] = all_languages[lang]['name'] |         new_dict['name'] = joined_languages_map[lang]['name'] | ||||||
|         new_dict['english_name'] = all_languages[lang]['english_name'] |         new_dict['english_name'] = joined_languages_map[lang]['english_name'] | ||||||
|         if country_name: |         if country_name: | ||||||
|             new_dict['country_name'] = country_name |             new_dict['country_name'] = country_name | ||||||
|         return new_dict |         return new_dict | ||||||
| @ -305,9 +324,13 @@ def write_languages_file(languages): | |||||||
|         new_file.close() |         new_file.close() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | def main(): | ||||||
|     load_engines(settings['engines']) |     load_engines(settings['engines']) | ||||||
|     _engines_languages = fetch_supported_languages() |     traits_map = fetch_traits_map() | ||||||
|     _all_languages = join_language_lists(_engines_languages) |     joined_languages_map = join_language_lists(traits_map) | ||||||
|     _filtered_languages = filter_language_list(_all_languages) |     filtered_languages = filter_language_list(joined_languages_map) | ||||||
|     write_languages_file(_filtered_languages) |     write_languages_file(filtered_languages) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     main() | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Markus Heiser
						Markus Heiser