diff --git a/.github/workflows/data-update.yml b/.github/workflows/data-update.yml index d20cd6c63..80fcaad12 100644 --- a/.github/workflows/data-update.yml +++ b/.github/workflows/data-update.yml @@ -17,7 +17,7 @@ jobs: - update_currencies.py - update_external_bangs.py - update_firefox_version.py - - update_languages.py + - update_engine_locales.py - update_wikidata_units.py - update_engine_descriptions.py steps: diff --git a/docs/admin/engines/configured_engines.rst b/docs/admin/engines/configured_engines.rst index c7b6a1f52..fa1e5a4b0 100644 --- a/docs/admin/engines/configured_engines.rst +++ b/docs/admin/engines/configured_engines.rst @@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table - Timeout - Weight - Paging - - Language + - Language, Region - Safe search - Time range diff --git a/manage b/manage index c887826e2..c228d530a 100755 --- a/manage +++ b/manage @@ -57,7 +57,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\ I,C,R,\ W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\ E1136" -PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories" +PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="engine_locales,supported_languages,language_aliases,logger,categories" PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc" help() { @@ -698,6 +698,7 @@ test.pyright() { | grep -v '/engines/.*.py.* - warning: "logger" is not defined'\ | grep -v '/plugins/.*.py.* - error: "logger" is not defined'\ | grep -v '/engines/.*.py.* - warning: "supported_languages" is not defined' \ + | grep -v '/engines/.*.py.* - warning: "engine_locales" is not defined' \ | grep -v '/engines/.*.py.* - warning: "language_aliases" is not defined' \ | grep -v '/engines/.*.py.* - warning: "categories" is not defined' dump_return $? diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 6fb5537a2..51b333c58 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -11,9 +11,10 @@ from lxml import etree from httpx import HTTPError from searx import settings -from searx.data import ENGINES_LANGUAGES +from searx.locales import get_engine_locale from searx.network import get as http_get from searx.exceptions import SearxEngineResponseException +from searx.engines import engines # a fetch_supported_languages() for XPath engines isn't available right now # _brave = ENGINES_LANGUAGES['brave'].keys() @@ -110,9 +111,12 @@ def seznam(query, _lang): def startpage(query, lang): # startpage autocompleter - lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english') + engine_language = default_language = 'english_uk' + if 'startpage' in engines: + engine_language = get_engine_locale(lang, engines['startpage'].engine_data.languages, default=default_language) + url = 'https://startpage.com/suggestions?{query}' - resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui}))) + resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': engine_language}))) data = resp.json() return [e['text'] for e in data.get('suggestions', []) if 'text' in e] diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 424440a71..67052b23b 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -8,6 +8,7 @@ __all__ = [ 'ENGINES_LANGUAGES', + 'ENGINES_LOCALES', 'CURRENCIES', 'USER_AGENTS', 'EXTERNAL_URLS', @@ -43,6 +44,7 @@ def ahmia_blacklist_loader(): ENGINES_LANGUAGES = _load('engines_languages.json') +ENGINES_LOCALES = _load('engine_locales.json') CURRENCIES = _load('currencies.json') USER_AGENTS = _load('useragents.json') EXTERNAL_URLS = _load('external_urls.json') diff --git a/searx/data/engine_locales.json b/searx/data/engine_locales.json new file mode 100644 index 000000000..fba8af3d3 --- /dev/null +++ b/searx/data/engine_locales.json @@ -0,0 +1,299 @@ +{ + "qwant": { + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "qwant images": { + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "qwant news": { + "languages": {}, + "regions": { + "ca-ES": "ca_ES", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "it-CH": "it_CH", + "it-IT": "it_IT", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pt-PT": "pt_PT" + } + }, + "qwant videos": { + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "startpage": { + "languages": { + "af": "afrikaans", + "am": "amharic", + "ar": "arabic", + "az": "azerbaijani", + "be": "belarusian", + "bg": "bulgarian", + "bn": "bengali", + "bs": "bosnian", + "ca": "catalan", + "cs": "czech", + "cy": "welsh", + "da": "dansk", + "de": "deutsch", + "el": "greek", + "en": "english_uk", + "eo": "esperanto", + "es": "espanol", + "et": "estonian", + "eu": "basque", + "fa": "persian", + "fi": "suomi", + "fo": "faroese", + "fr": "francais", + "fy": "frisian", + "ga": "irish", + "gd": "gaelic", + "gl": "galician", + "gu": "gujarati", + "he": "hebrew", + "hi": "hindi", + "hr": "croatian", + "hu": "hungarian", + "ia": "interlingua", + "id": "indonesian", + "is": "icelandic", + "it": "italiano", + "ja": "nihongo", + "jv": "javanese", + "ka": "georgian", + "kn": "kannada", + "ko": "hangul", + "la": "latin", + "lt": "lithuanian", + "lv": "latvian", + "mai": "bihari", + "mk": "macedonian", + "ml": "malayalam", + "mr": "marathi", + "ms": "malay", + "mt": "maltese", + "nb": "norsk", + "ne": "nepali", + "nl": "nederlands", + "oc": "occitan", + "pa": "punjabi", + "pl": "polski", + "pt": "portugues", + "ro": "romanian", + "ru": "russian", + "si": "sinhalese", + "sk": "slovak", + "sl": "slovenian", + "sq": "albanian", + "sr": "serbian", + "su": "sudanese", + "sv": "svenska", + "sw": "swahili", + "ta": "tamil", + "te": "telugu", + "th": "thai", + "ti": "tigrinya", + "tl": "tagalog", + "tr": "turkce", + "uk": "ukrainian", + "ur": "urdu", + "uz": "uzbek", + "vi": "vietnamese", + "xh": "xhosa", + "zh": "jiantizhongwen", + "zh_Hant": "fantizhengwen", + "zu": "zulu" + }, + "regions": { + "ar-EG": "ar_EG", + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en-GB_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "en-ZA": "en_ZA", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-US": "es_US", + "es-UY": "es_UY", + "fi-FI": "fi_FI", + "fil-PH": "fil_PH", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hi-IN": "hi_IN", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ja-JP": "ja_JP", + "ko-KR": "ko_KR", + "ms-MY": "ms_MY", + "nb-NO": "no_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-BR": "pt-BR_BR", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "ru-BY": "ru_BY", + "ru-RU": "ru_RU", + "sv-SE": "sv_SE", + "tr-TR": "tr_TR", + "uk-UA": "uk_UA", + "zh-CN": "zh-CN_CN", + "zh-HK": "zh-TW_HK", + "zh-TW": "zh-TW_TW" + } + } +} \ No newline at end of file diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json index acd36439c..c33a4650a 100644 --- a/searx/data/engines_languages.json +++ b/searx/data/engines_languages.json @@ -1396,406 +1396,6 @@ "sv", "zh" ], - "qwant": { - "bg-BG": "bg_BG", - "ca-ES": "ca_ES", - "cs-CZ": "cs_CZ", - "da-DK": "da_DK", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "el-GR": "el_GR", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "et-EE": "et_EE", - "fi-FI": "fi_FI", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "hu-HU": "hu_HU", - "it-CH": "it_CH", - "it-IT": "it_IT", - "ko-KR": "ko_KR", - "nb-NO": "nb_NO", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pl-PL": "pl_PL", - "pt-PT": "pt_PT", - "ro-RO": "ro_RO", - "sv-SE": "sv_SE", - "th-TH": "th_TH", - "zh-CN": "zh_CN", - "zh-HK": "zh_HK" - }, - "qwant images": { - "bg-BG": "bg_BG", - "ca-ES": "ca_ES", - "cs-CZ": "cs_CZ", - "da-DK": "da_DK", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "el-GR": "el_GR", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "et-EE": "et_EE", - "fi-FI": "fi_FI", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "hu-HU": "hu_HU", - "it-CH": "it_CH", - "it-IT": "it_IT", - "ko-KR": "ko_KR", - "nb-NO": "nb_NO", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pl-PL": "pl_PL", - "pt-PT": "pt_PT", - "ro-RO": "ro_RO", - "sv-SE": "sv_SE", - "th-TH": "th_TH", - "zh-CN": "zh_CN", - "zh-HK": "zh_HK" - }, - "qwant news": { - "ca-ES": "ca_ES", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "it-CH": "it_CH", - "it-IT": "it_IT", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pt-PT": "pt_PT" - }, - "qwant videos": { - "bg-BG": "bg_BG", - "ca-ES": "ca_ES", - "cs-CZ": "cs_CZ", - "da-DK": "da_DK", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "el-GR": "el_GR", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "et-EE": "et_EE", - "fi-FI": "fi_FI", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "hu-HU": "hu_HU", - "it-CH": "it_CH", - "it-IT": "it_IT", - "ko-KR": "ko_KR", - "nb-NO": "nb_NO", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pl-PL": "pl_PL", - "pt-PT": "pt_PT", - "ro-RO": "ro_RO", - "sv-SE": "sv_SE", - "th-TH": "th_TH", - "zh-CN": "zh_CN", - "zh-HK": "zh_HK" - }, - "startpage": { - "af": { - "alias": "afrikaans" - }, - "am": { - "alias": "amharic" - }, - "ar": { - "alias": "arabic" - }, - "az": { - "alias": "azerbaijani" - }, - "be": { - "alias": "belarusian" - }, - "bg": { - "alias": "bulgarian" - }, - "bn": { - "alias": "bengali" - }, - "bs": { - "alias": "bosnian" - }, - "ca": { - "alias": "catalan" - }, - "cs": { - "alias": "czech" - }, - "cy": { - "alias": "welsh" - }, - "da": { - "alias": "dansk" - }, - "de": { - "alias": "deutsch" - }, - "el": { - "alias": "greek" - }, - "en": { - "alias": "english" - }, - "en-GB": { - "alias": "english_uk" - }, - "eo": { - "alias": "esperanto" - }, - "es": { - "alias": "espanol" - }, - "et": { - "alias": "estonian" - }, - "eu": { - "alias": "basque" - }, - "fa": { - "alias": "persian" - }, - "fi": { - "alias": "suomi" - }, - "fo": { - "alias": "faroese" - }, - "fr": { - "alias": "francais" - }, - "fy": { - "alias": "frisian" - }, - "ga": { - "alias": "irish" - }, - "gd": { - "alias": "gaelic" - }, - "gl": { - "alias": "galician" - }, - "gu": { - "alias": "gujarati" - }, - "he": { - "alias": "hebrew" - }, - "hi": { - "alias": "hindi" - }, - "hr": { - "alias": "croatian" - }, - "hu": { - "alias": "hungarian" - }, - "ia": { - "alias": "interlingua" - }, - "id": { - "alias": "indonesian" - }, - "is": { - "alias": "icelandic" - }, - "it": { - "alias": "italiano" - }, - "ja": { - "alias": "nihongo" - }, - "jv": { - "alias": "javanese" - }, - "ka": { - "alias": "georgian" - }, - "kn": { - "alias": "kannada" - }, - "ko": { - "alias": "hangul" - }, - "la": { - "alias": "latin" - }, - "lt": { - "alias": "lithuanian" - }, - "lv": { - "alias": "latvian" - }, - "mai": { - "alias": "bihari" - }, - "mk": { - "alias": "macedonian" - }, - "ml": { - "alias": "malayalam" - }, - "mr": { - "alias": "marathi" - }, - "ms": { - "alias": "malay" - }, - "mt": { - "alias": "maltese" - }, - "ne": { - "alias": "nepali" - }, - "nl": { - "alias": "nederlands" - }, - "no": { - "alias": "norsk" - }, - "oc": { - "alias": "occitan" - }, - "pa": { - "alias": "punjabi" - }, - "pl": { - "alias": "polski" - }, - "pt": { - "alias": "portugues" - }, - "ro": { - "alias": "romanian" - }, - "ru": { - "alias": "russian" - }, - "si": { - "alias": "sinhalese" - }, - "sk": { - "alias": "slovak" - }, - "sl": { - "alias": "slovenian" - }, - "sq": { - "alias": "albanian" - }, - "sr": { - "alias": "serbian" - }, - "su": { - "alias": "sudanese" - }, - "sv": { - "alias": "svenska" - }, - "sw": { - "alias": "swahili" - }, - "ta": { - "alias": "tamil" - }, - "te": { - "alias": "telugu" - }, - "th": { - "alias": "thai" - }, - "ti": { - "alias": "tigrinya" - }, - "tl": { - "alias": "tagalog" - }, - "tr": { - "alias": "turkce" - }, - "uk": { - "alias": "ukrainian" - }, - "ur": { - "alias": "urdu" - }, - "uz": { - "alias": "uzbek" - }, - "vi": { - "alias": "vietnamese" - }, - "xh": { - "alias": "xhosa" - }, - "zh": { - "alias": "jiantizhongwen" - }, - "zh-HK": { - "alias": "fantizhengwen" - }, - "zh-TW": { - "alias": "fantizhengwen" - }, - "zu": { - "alias": "zulu" - } - }, "wikidata": { "ab": { "english_name": "Abkhazian", diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index c61f50d4b..17f67db1a 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -13,14 +13,14 @@ usage:: import sys import copy -from typing import Dict, List, Optional +import dataclasses +from typing import Dict, List, Optional, Any from os.path import realpath, dirname from babel.localedata import locale_identifiers from searx import logger, settings -from searx.data import ENGINES_LANGUAGES -from searx.network import get -from searx.utils import load_module, match_language, gen_useragent +from searx.data import ENGINES_LANGUAGES, ENGINES_LOCALES +from searx.utils import load_module, match_language logger = logger.getChild('engines') @@ -52,6 +52,27 @@ ENGINE_DEFAULT_ARGS = { OTHER_CATEGORY = 'other' +@dataclasses.dataclass +class EngineLocales: + """The class is intended to be instanciated for each engine.""" + + regions: Dict[str, str] = dataclasses.field(default_factory=dict) + """ + .. code:: python + { + 'fr-BE' : , + } + """ + + languages: Dict[str, str] = dataclasses.field(default_factory=dict) + """ + .. code:: python + { + 'ca' : , + } + """ + + class Engine: # pylint: disable=too-few-public-methods """This class is currently never initialized and only used for type hinting.""" @@ -59,15 +80,17 @@ class Engine: # pylint: disable=too-few-public-methods engine: str shortcut: str categories: List[str] - supported_languages: List[str] about: dict inactive: bool disabled: bool - language_support: bool paging: bool safesearch: bool time_range_support: bool timeout: float + language_support: bool + engine_locales: EngineLocales + supported_languages: List[str] + language_aliases: Dict[str, str] # Defaults for the namespace of an engine module, see :py:func:`load_engine` @@ -85,15 +108,15 @@ engine_shortcuts = {} """ -def load_engine(engine_data: dict) -> Optional[Engine]: - """Load engine from ``engine_data``. +def load_engine(engine_setting: Dict[str, Any]) -> Optional[Engine]: + """Load engine from ``engine_setting``. - :param dict engine_data: Attributes from YAML ``settings:engines/`` + :param dict engine_setting: Attributes from YAML ``settings:engines/`` :return: initialized namespace of the ````. 1. create a namespace and load module of the ```` 2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS` - 3. update namespace with values from ``engine_data`` + 3. update namespace with values from ``engine_setting`` If engine *is active*, return namespace of the engine, otherwise return ``None``. @@ -107,7 +130,7 @@ def load_engine(engine_data: dict) -> Optional[Engine]: """ - engine_name = engine_data['name'] + engine_name = engine_setting['name'] if '_' in engine_name: logger.error('Engine name contains underscore: "{}"'.format(engine_name)) return None @@ -115,10 +138,10 @@ def load_engine(engine_data: dict) -> Optional[Engine]: if engine_name.lower() != engine_name: logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name)) engine_name = engine_name.lower() - engine_data['name'] = engine_name + engine_setting['name'] = engine_name # load_module - engine_module = engine_data['engine'] + engine_module = engine_setting['engine'] try: engine = load_module(engine_module + '.py', ENGINE_DIR) except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError): @@ -128,9 +151,10 @@ def load_engine(engine_data: dict) -> Optional[Engine]: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None - update_engine_attributes(engine, engine_data) - set_language_attributes(engine) + update_engine_attributes(engine, engine_setting) update_attributes_for_tor(engine) + if not set_engine_locales(engine): + set_language_attributes(engine) if not is_engine_active(engine): return None @@ -165,15 +189,15 @@ def set_loggers(engine, engine_name): module.logger = logger.getChild(module_engine_name) -def update_engine_attributes(engine: Engine, engine_data): - # set engine attributes from engine_data - for param_name, param_value in engine_data.items(): +def update_engine_attributes(engine: Engine, engine_setting: Dict[str, Any]): + # set engine attributes from engine_setting + for param_name, param_value in engine_setting.items(): if param_name == 'categories': if isinstance(param_value, str): param_value = list(map(str.strip, param_value.split(','))) engine.categories = param_value elif hasattr(engine, 'about') and param_name == 'about': - engine.about = {**engine.about, **engine_data['about']} + engine.about = {**engine.about, **engine_setting['about']} else: setattr(engine, param_name, param_value) @@ -183,6 +207,28 @@ def update_engine_attributes(engine: Engine, engine_data): setattr(engine, arg_name, copy.deepcopy(arg_value)) +def set_engine_locales(engine: Engine): + engine_locales_key = None + + if engine.name in ENGINES_LOCALES: + engine_locales_key = engine.name + elif engine.engine in ENGINES_LOCALES: + # The key of the dictionary engine_data_dict is the *engine name* + # configured in settings.xml. When multiple engines are configured in + # settings.yml to use the same origin engine (python module) these + # additional engines can use the languages from the origin engine. + # For this use the configured ``engine: ...`` from settings.yml + engine_locales_key = engine.engine + else: + return False + + print(engine.name, ENGINES_LOCALES[engine_locales_key]) + engine.engine_locales = EngineLocales(**ENGINES_LOCALES[engine_locales_key]) + # language_support + engine.language_support = len(engine.engine_locales.regions) > 0 or len(engine.engine_locales.languages) > 0 + return True + + def set_language_attributes(engine: Engine): # assign supported languages from json file if engine.name in ENGINES_LANGUAGES: @@ -225,17 +271,6 @@ def set_language_attributes(engine: Engine): # language_support engine.language_support = len(engine.supported_languages) > 0 - # assign language fetching method if auxiliary method exists - if hasattr(engine, '_fetch_supported_languages'): - headers = { - 'User-Agent': gen_useragent(), - 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language - } - engine.fetch_supported_languages = ( - # pylint: disable=protected-access - lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) - ) - def update_attributes_for_tor(engine: Engine) -> bool: if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): @@ -294,8 +329,8 @@ def load_engines(engine_list): engine_shortcuts.clear() categories.clear() categories['general'] = [] - for engine_data in engine_list: - engine = load_engine(engine_data) + for engine_setting in engine_list: + engine = load_engine(engine_setting) if engine: register_engine(engine) return engines diff --git a/searx/engines/google.py b/searx/engines/google.py index 2f894b21f..1020d5071 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -136,7 +136,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): :param dict param: request parameters of the engine :param list lang_list: list of supported languages of the engine - :py:obj:`ENGINES_LANGUAGES[engine-name] ` + :py:obj:`ENGINES_DATAS[engine-name].languages ` :param dict lang_list: custom aliases for non standard language codes (used when calling :py:func:`searx.utils.match_language`) diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index c07cd4cea..59475ea2b 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -50,7 +50,6 @@ about = { # engine dependent config categories = ['science', 'scientific publications'] paging = True -language_support = True use_locale_domain = True time_range_support = True safesearch = False diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index fc574bd48..2e22079ef 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -56,7 +56,6 @@ about = { categories = ['videos', 'web'] paging = False -language_support = True use_locale_domain = True time_range_support = True safesearch = True diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 6de2176d0..18256ec5a 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -49,7 +49,7 @@ about = { # engine dependent config categories = [] paging = True -supported_languages_url = about['website'] +engine_locales_url = about['website'] qwant_categ = None # web|news|inages|videos safesearch = True @@ -95,7 +95,7 @@ def request(query, params): ) # add quant's locale - q_locale = get_engine_locale(params['language'], supported_languages, default='en_US') + q_locale = get_engine_locale(params['language'], engine_locales.regions, default='en_US') params['url'] += '&locale=' + q_locale # add safesearch option @@ -243,7 +243,7 @@ def response(resp): return results -def _fetch_supported_languages(resp): +def _fetch_engine_locales(resp, engine_locales): text = resp.text text = text[text.find('INITIAL_PROPS') :] @@ -263,8 +263,6 @@ def _fetch_supported_languages(resp): q_valid_locales.append(_locale) - supported_languages = {} - for q_locale in q_valid_locales: try: locale = babel.Locale.parse(q_locale, sep='_') @@ -272,7 +270,7 @@ def _fetch_supported_languages(resp): print("ERROR: can't determine babel locale of quant's locale %s" % q_locale) continue - # note: supported_languages (dict) + # note: engine_data.regions (dict) # # dict's key is a string build up from a babel.Locale object / the # notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and @@ -280,6 +278,6 @@ def _fetch_supported_languages(resp): # the engine. searxng_locale = locale.language + '-' + locale.territory # --> params['language'] - supported_languages[searxng_locale] = q_locale + engine_locales.regions[searxng_locale] = q_locale - return supported_languages + return engine_locales diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 24aa59d03..739a36b56 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -7,17 +7,18 @@ import re from time import time -from urllib.parse import urlencode from unicodedata import normalize, combining from datetime import datetime, timedelta +from collections import OrderedDict from dateutil import parser from lxml import html -from babel import Locale -from babel.localedata import locale_identifiers + +import babel from searx.network import get -from searx.utils import extract_text, eval_xpath, match_language +from searx.locales import get_engine_locale +from searx.utils import extract_text, eval_xpath from searx.exceptions import ( SearxEngineResponseException, SearxEngineCaptchaException, @@ -36,16 +37,22 @@ about = { # engine dependent config categories = ['general', 'web'] -# there is a mechanism to block "bot" search -# (probably the parameter qid), require -# storing of qid's between mulitble search-calls paging = True -supported_languages_url = 'https://www.startpage.com/do/settings' +number_of_results = 5 +send_accept_language_header = True + +safesearch = True +filter_mapping = {0: '0', 1: '1', 2: '1'} + +time_range_support = True +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} + +engine_locales_url = 'https://www.startpage.com/do/settings' # search-url -base_url = 'https://startpage.com/' -search_url = base_url + 'sp/search?' +base_url = 'https://www.startpage.com/' +search_url = base_url + 'sp/search' # specific xpath variables # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] @@ -103,42 +110,83 @@ def get_sc_code(headers): return sc_code -# do search-request def request(query, params): - # pylint: disable=line-too-long - # The format string from Startpage's FFox add-on [1]:: - # - # https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0 - # - # [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/ + # Startpage supports a region value: 'all' + engine_region = 'all' + engine_language = 'english_uk' + if params['language'] != 'all': + engine_region = get_engine_locale(params['language'], engine_locales.regions, default='all') + engine_language = get_engine_locale( + params['language'].split('-')[0], engine_locales.languages, default='english_uk' + ) + logger.debug( + 'selected language %s --> engine_language: %s // engine_region: %s', + params['language'], + engine_language, + engine_region, + ) + # The Accept header is also needed by the get_sc_code(..) call below. + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' + + # build arguments args = { 'query': query, - 'page': params['pageno'], 'cat': 'web', - # 'pl': 'ext-ff', - # 'extVersion': '1.3.0', - # 'abp': "-1", - 'sc': get_sc_code(params['headers']), + 't': 'device', + 'sc': get_sc_code(params['headers']), # hint: this func needs HTTP headers + 'with_date': time_range_dict.get(params['time_range'], ''), } - # set language if specified - if params['language'] != 'all': - lang_code = match_language(params['language'], supported_languages, fallback=None) - if lang_code: - language_name = supported_languages[lang_code]['alias'] - args['language'] = language_name - args['lui'] = language_name + if engine_language: + args['language'] = engine_language + args['lui'] = engine_language + + if params['pageno'] == 1: + args['abp'] = ['-1', '-1'] + + else: + args['page'] = params['pageno'] + args['abp'] = '-1' + + # build cookie + lang_homepage = 'english' + cookie = OrderedDict() + cookie['date_time'] = 'world' + cookie['disable_family_filter'] = filter_mapping[params['safesearch']] + cookie['disable_open_in_new_window'] = '0' + cookie['enable_post_method'] = '1' # hint: POST + cookie['enable_proxy_safety_suggest'] = '1' + cookie['enable_stay_control'] = '1' + cookie['instant_answers'] = '1' + cookie['lang_homepage'] = 's/device/%s/' % lang_homepage + cookie['num_of_results'] = '10' + cookie['suggestions'] = '1' + cookie['wt_unit'] = 'celsius' + + if engine_language: + cookie['language'] = engine_language + cookie['language_ui'] = engine_language + + if engine_region: + cookie['search_results_region'] = engine_region + + params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()]) + logger.debug('cookie preferences: %s', params['cookies']['preferences']) + params['method'] = 'POST' + + logger.debug("data: %s", args) + params['data'] = args + + params['url'] = search_url - params['url'] = search_url + urlencode(args) return params # get response from search-request def response(resp): results = [] - dom = html.fromstring(resp.text) # parse results @@ -200,62 +248,142 @@ def response(resp): return results -# get supported languages from their site -def _fetch_supported_languages(resp): - # startpage's language selector is a mess each option has a displayed name - # and a value, either of which may represent the language name in the native - # script, the language name in English, an English transliteration of the - # native name, the English name of the writing script used by the language, - # or occasionally something else entirely. +def _fetch_engine_locales(resp, engine_locales): - # this cases are so special they need to be hardcoded, a couple of them are misspellings - language_names = { - 'english_uk': 'en-GB', - 'fantizhengwen': ['zh-TW', 'zh-HK'], - 'hangul': 'ko', - 'malayam': 'ml', - 'norsk': 'nb', - 'sinhalese': 'si', - 'sudanese': 'su', - } + # startpage's language & region selectors are a mess. + # + # regions: + # in the list of regions there are tags we need to map to common + # region tags: + # - pt-BR_BR --> pt_BR + # - zh-CN_CN --> zh_Hans_CN + # - zh-TW_TW --> zh_Hant_TW + # - zh-TW_HK --> zh_Hant_HK + # - en-GB_GB --> en_GB + # and there is at least one tag with a three letter language tag (ISO 639-2) + # - fil_PH --> fil_PH + # + # regions + # ------- + # + # The locale code 'no_NO' from startpage does not exists and is mapped to + # nb-NO:: + # + # babel.core.UnknownLocaleError: unknown locale 'no_NO' + # + # For reference see languages-subtag at iana [1], `no` is the + # macrolanguage:: + # + # type: language + # Subtag: nb + # Description: Norwegian Bokmål + # Added: 2005-10-16 + # Suppress-Script: Latn + # Macrolanguage: no + # + # W3C recommends subtag over macrolanguage [2]: + # + # Use macrolanguages with care. Some language subtags have a Scope field set to + # macrolanguage, ie. this primary language subtag encompasses a number of more + # specific primary language subtags in the registry. + # ... + # As we recommended for the collection subtags mentioned above, in most cases + # you should try to use the more specific subtags ... + # + # [1] https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry + # [2] https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag + # + # languages + # --------- + # + # The displayed name in startpage's settings page depend on the location + # of the IP when the 'Accept-Language' HTTP header is unset (in tha + # language update script we use "en-US,en;q=0.5" to get uniform names + # independent from the IP). + # + # Each option has a displayed name and a value, either of which + # may represent the language name in the native script, the language name + # in English, an English transliteration of the native name, the English + # name of the writing script used by the language, or occasionally + # something else entirely. - # get the English name of every language known by babel - language_names.update( - { - # fmt: off - name.lower(): lang_code - # pylint: disable=protected-access - for lang_code, name in Locale('en')._data['languages'].items() - # fmt: on - } - ) + dom = html.fromstring(resp.text) + + # regions + + sp_region_names = [] + for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'): + sp_region_names.append(option.get('value')) + + for engine_region_tag in sp_region_names: + if engine_region_tag == 'all': + # 'all' does not fit to a babel locale + continue + + locale = None + babel_region_tag = {'no_NO': 'nb_NO'}.get(engine_region_tag, engine_region_tag) # norway + + if '-' in babel_region_tag: + # pt-XY_BR --> l=pt, r=BR --> pt-BR + l, r = babel_region_tag.split('-') + r = r.split('_')[-1] + locale = babel.Locale.parse(l + '_' + r, sep='_') + else: + try: + locale = babel.Locale.parse(babel_region_tag, sep='_') + except babel.core.UnknownLocaleError: + print("ERROR: can't determine babel locale of startpage's locale %s" % engine_region_tag) + continue + + if locale is None: + continue + + region_tag = locale.language + '-' + locale.territory + # print("SearXNG locale tag: %s --> Engine tag: %s" % (region_tag, engine_region_tag)) + engine_locales.regions[region_tag] = engine_region_tag + + # languages + + catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()} # get the native name of every language known by babel - for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()): - native_name = Locale(lang_code).get_language_name().lower() + + for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()): + native_name = babel.Locale(lang_code).get_language_name().lower() # add native name exactly as it is - language_names[native_name] = lang_code + catalog_engine2code[native_name] = lang_code # add "normalized" language name (i.e. français becomes francais and español becomes espanol) unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name))) if len(unaccented_name) == len(unaccented_name.encode()): # add only if result is ascii (otherwise "normalization" didn't work) - language_names[unaccented_name] = lang_code + catalog_engine2code[unaccented_name] = lang_code + + # values that can't be determined by babel's languages names + + catalog_engine2code.update( + { + 'english_uk': 'en', + # traditional chinese used in .. + 'fantizhengwen': 'zh_Hant', + # Korean alphabet + 'hangul': 'ko', + # Malayalam is one of 22 scheduled languages of India. + 'malayam': 'ml', + 'norsk': 'nb', + 'sinhalese': 'si', + } + ) - dom = html.fromstring(resp.text) - sp_lang_names = [] for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'): - sp_lang_names.append((option.get('value'), extract_text(option).lower())) + engine_lang = option.get('value') + name = extract_text(option).lower() - supported_languages = {} - for sp_option_value, sp_option_text in sp_lang_names: - lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text) - if isinstance(lang_code, str): - supported_languages[lang_code] = {'alias': sp_option_value} - elif isinstance(lang_code, list): - for _lc in lang_code: - supported_languages[_lc] = {'alias': sp_option_value} - else: - print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text)) + lang_code = catalog_engine2code.get(engine_lang) + if lang_code is None: + lang_code = catalog_engine2code[name] - return supported_languages + # print("SearXNG language tag: %s --> Engine tag: %s" % (lang_code, engine_lang)) + engine_locales.languages[lang_code] = engine_lang + + return engine_locales diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 00f208b17..ccb2f464e 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -32,7 +32,6 @@ about = { "results": 'HTML', } -language_support = False time_range_support = False safesearch = False paging = True diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 7992adf82..c398ddffc 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -20,7 +20,6 @@ about = { # engine dependent config categories = ['videos', 'music'] paging = True -language_support = False time_range_support = True # search-url diff --git a/searx/languages.py b/searx/languages.py index 377e7495b..fa5b0d4ef 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- # list of language codes -# this file is generated automatically by utils/fetch_languages.py +# this file is generated automatically by: +# +# ./manage pyenv.cmd searxng_extra/update/update_languages.py language_codes = ( ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'), ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'), diff --git a/searx/locales.py b/searx/locales.py index 620132340..7a96df6f8 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -4,7 +4,7 @@ """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. """ -from typing import Set +from typing import Set, Optional, Dict import os import pathlib @@ -24,11 +24,8 @@ logger = logger.getChild('locales') _flask_babel_get_translations = flask_babel.get_translations LOCALE_NAMES = {} -"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see -:py:obj:`locales_initialize`). - -:meta hide-value: -""" +"""Mapping of locales and their description. Locales e.g. ``fr`` or ``pt-BR`` +(see :py:obj:`locales_initialize`).""" RTL_LOCALES: Set[str] = set() """List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see @@ -157,13 +154,17 @@ def locales_initialize(directory=None): RTL_LOCALES.add(tag) -def get_engine_locale(searxng_locale, engine_locales, default=None): +def get_engine_locale( + searxng_locale: str, engine_locales: Dict[str, str], default: Optional[str] = None +) -> Optional[str]: """Return engine's language (aka locale) string that best fits to argument ``searxng_locale``. Argument ``engine_locales`` is a python dict that maps *SearXNG locales* to corresponding *engine locales*:: + .. code:: python + : { # SearXNG string : engine-string 'ca-ES' : 'ca_ES', diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html index 4aef7f986..43026cac6 100644 --- a/searx/templates/simple/preferences.html +++ b/searx/templates/simple/preferences.html @@ -307,7 +307,7 @@ {{ _("Allow") }}{{- "" -}} {{ _("Engine name") }}{{- "" -}} {{ _("Shortcut") }}{{- "" -}} - {{ _("Supports selected language") }}{{- "" -}} + {{ _("Language / Region") }}{{- "" -}} {{ _("SafeSearch") }}{{- "" -}} {{ _("Time range") }}{{- "" -}} {%- if enable_metrics %}{{ _("Response time") }}{% endif -%} @@ -333,7 +333,7 @@ {{- engine_about(search_engine) -}} {{- "" -}} {{ shortcuts[search_engine.name] }}{{- "" -}} - {{ checkbox(None, supports[search_engine.name]['supports_selected_language'], true) }}{{- "" -}} + {{ checkbox(None, supports[search_engine.name]['language_support'], true) }}{{- "" -}} {{ checkbox(None, supports[search_engine.name]['safesearch'], true) }}{{- "" -}} {{ checkbox(None, supports[search_engine.name]['time_range_support'], true) }}{{- "" -}} {%- if enable_metrics %}{{- engine_time(search_engine.name) -}}{% endif -%} diff --git a/searx/webapp.py b/searx/webapp.py index 5c3fbae8b..6d088d81f 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -990,7 +990,6 @@ def preferences(): 'rate80': rate80, 'rate95': rate95, 'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], - 'supports_selected_language': _is_selected_language_supported(e, request.preferences), 'result_count': result_count, } # end of stats @@ -1041,18 +1040,18 @@ def preferences(): # supports supports = {} for _, e in filtered_engines.items(): - supports_selected_language = _is_selected_language_supported(e, request.preferences) + language_support = e.language_support safesearch = e.safesearch time_range_support = e.time_range_support for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): - if supports_selected_language and checker_test_name.startswith('lang_'): - supports_selected_language = '?' + if language_support and checker_test_name.startswith('lang_'): + language_support = '?' elif safesearch and checker_test_name == 'safesearch': safesearch = '?' elif time_range_support and checker_test_name == 'time_range': time_range_support = '?' supports[e.name] = { - 'supports_selected_language': supports_selected_language, + 'language_support': language_support, 'safesearch': safesearch, 'time_range_support': time_range_support, } @@ -1088,16 +1087,6 @@ def preferences(): ) -def _is_selected_language_supported(engine, preferences: Preferences): # pylint: disable=redefined-outer-name - language = preferences.get_value('language') - if language == 'all': - return True - x = match_language( - language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None - ) - return bool(x) - - @app.route('/image_proxy', methods=['GET']) def image_proxy(): # pylint: disable=too-many-return-statements, too-many-branches @@ -1316,10 +1305,6 @@ def config(): if not request.preferences.validate_token(engine): continue - supported_languages = engine.supported_languages - if isinstance(engine.supported_languages, dict): - supported_languages = list(engine.supported_languages.keys()) - _engines.append( { 'name': name, @@ -1328,7 +1313,6 @@ def config(): 'enabled': not engine.disabled, 'paging': engine.paging, 'language_support': engine.language_support, - 'supported_languages': supported_languages, 'safesearch': engine.safesearch, 'time_range_support': engine.time_range_support, 'timeout': engine.timeout, diff --git a/searxng_extra/update/update_engine_locales.py b/searxng_extra/update/update_engine_locales.py new file mode 100755 index 000000000..ccc0a0237 --- /dev/null +++ b/searxng_extra/update/update_engine_locales.py @@ -0,0 +1,500 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pyright: basic +"""This script fetches engine data from engines `engine_data_url`` and updates: + +- :py:obj:`write_languages_file` updates :origin:`searx/languages.py` +- :py:obj:`fetch_engine_data` updates :origin:`searx/data/engines_datas.json` + +This script is triggered by CI in job :origin:`updateData +<.github/workflows/data-update.yml>`. +""" + +# pylint: disable=invalid-name +import json +from unicodedata import lookup +from pprint import pformat +from pathlib import Path +from typing import Dict, Generator, List, Set, Tuple, Union, Optional +from typing_extensions import TypedDict, NotRequired + +from babel import Locale, UnknownLocaleError +from babel.languages import get_global # type: ignore +from babel.core import parse_locale + +from searx import settings, searx_dir +from searx import network +from searx.data import data_dir +from searx.engines import ( + load_engines, + engines, + EngineLocales, +) +from searx.utils import gen_useragent + + +class EngineLanguageDescDict(TypedDict): + """In data/engines_languages.json, for google, wikipedia and wikidata engines: + value of the dictionnaries""" + + name: str + english_name: NotRequired[str] + + +EngineLanguageDesc = Union[List[str], Dict[str, EngineLanguageDescDict]] +"""In data/engines_languages.json, type for a engine: + +* either it is a list +* or a dictionnary""" + +EngineLanguageDict = Dict[str, EngineLanguageDesc] +"""Type description for data/engines_languages.json""" + +EngineLocalesDict = Dict[str, EngineLocales] +"""Type description for data/engine_data.json""" + + +def fetch_engine_locales() -> Tuple[EngineLocalesDict, EngineLanguageDict]: + """Fetch :class:`EngineData` for each engine and persist JSON in file. + + The script checks all engines about a function:: + + def _fetch_engine_data(resp, engine_data): + ... + + and a variable named ``engine_locales_url``. The HTTP GET response of + ``engine_locales_url`` is passed to the ``_fetch_engine_data`` function including a + instance of :py:obj:`searx.engines.EngineData`. + + .. hint:: + + This implementation is backward compatible and supports the (depricated) + ``_fetch_supported_languages`` interface. + + On the long term the depricated implementations in the engines will be + replaced by ``_fetch_engine_data``.""" + + network.set_timeout_for_thread(10.0) + engine_locales_dict: EngineLocalesDict = {} + engines_languages: EngineLanguageDict = {} + names = list(engines) + names.sort() + + # The headers has been moved here from commit 9b6ffed06: Some engines (at + # least bing and startpage) return a different result list of supported + # languages depending on the IP location where the HTTP request comes from. + # The IP based results (from bing) can be avoided by setting a + # 'Accept-Language' in the HTTP request. + + headers = { + 'User-Agent': gen_useragent(), + 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language + } + + for engine_name in names: + engine = engines[engine_name] + + fetch_locales = getattr(engine, '_fetch_engine_locales', None) + # deprecated: _fetch_supported_languages + fetch_languages = getattr(engine, '_fetch_supported_languages', None) + + if fetch_locales is not None: + resp = network.get(engine.engine_locales_url, headers=headers) # type: ignore + engine_data = EngineLocales() + fetch_locales(resp, engine_data) + engine_locales_dict[engine_name] = engine_data + print( + "%-20s: %3s language(s), %3s region(s)" + % (engine_name, len(engine_data.languages), len(engine_data.regions)) + ) + elif fetch_languages is not None: + print(engine_name) + resp = network.get(engine.supported_languages_url, headers=headers) # type: ignore + engines_languages[engine_name] = fetch_languages(resp) + print( + "%-20s: %3s languages using deprecated _fetch_supported_languages" + % (engine_name, len(engines_languages[engine_name])) + ) + if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck + engines_languages[engine_name] = sorted(engines_languages[engine_name]) + + return engine_locales_dict, engines_languages + + +# Get babel Locale object from lang_code if possible. +def get_locale(lang_code: str) -> Optional[Locale]: + try: + locale = Locale.parse(lang_code, sep='-') + return locale + except (UnknownLocaleError, ValueError): + return None + + +lang2emoji = { + 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger + 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina + 'jp': '\U0001F1EF\U0001F1F5', # Japanese + 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian + 'he': '\U0001F1EE\U0001F1F7', # Hebrew +} + + +def get_unicode_flag(lang_code: str) -> Optional[str]: + """Determine a unicode flag (emoji) that fits to the ``lang_code``""" + + emoji = lang2emoji.get(lang_code.lower()) + if emoji: + return emoji + + if len(lang_code) == 2: + return '\U0001F310' + + language = territory = script = variant = '' + try: + language, territory, script, variant = parse_locale(lang_code, '-') + except ValueError as exc: + print(exc) + + # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 + if not territory: + # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag + emoji = lang2emoji.get(language) + if not emoji: + print( + "%s --> language: %s / territory: %s / script: %s / variant: %s" + % (lang_code, language, territory, script, variant) + ) + return emoji + + emoji = lang2emoji.get(territory.lower()) + if emoji: + return emoji + + try: + c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0]) + c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1]) + # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 )) + except KeyError as exc: + print("%s --> territory: %s --> %s" % (lang_code, territory, exc)) + return None + + return c1 + c2 + + +def get_territory_name(lang_code: str) -> Optional[str]: + country_name = None + locale = get_locale(lang_code) + try: + if locale is not None: + country_name = locale.get_territory_name() + except FileNotFoundError as exc: + print("ERROR: %s --> %s" % (locale, exc)) + return country_name + + +def iter_engine_codes( + engine_data_dict: EngineLocalesDict, engines_languages: EngineLanguageDict +) -> Generator[Tuple[str, List[str]], None, None]: + """Iterator returning tuples: + + - first element is the engine name + - second element is a list of language code (the one from the engines) + + The function iterates first on the engine from engine_data_dict, + then it iterates over the engine from engines_languages. + """ + for engine_name in engine_data_dict: + engine = engines[engine_name] + engine_data = engine_data_dict[engine_name] + + # items of type 'engine_data' do have regions & languages, the list + # of engine_codes should contain both. + + engine_codes = list(engine_data.regions.keys()) + engine_codes.extend(engine_data.languages.keys()) + yield engine_name, engine_codes + + for engine_name, engine_languages in engines_languages.items(): + engine = engines[engine_name] + language_aliases_values = getattr(engine, 'language_aliases', {}).values() + engine_codes: List[str] = [] + for lang_code in engine_languages: + if lang_code in language_aliases_values: + # pylint: disable=stop-iteration-return + # we are sure that next(...) won't raise a StopIteration exception + # because of the "if" statement just above + lang_code = next(lc for lc, alias in engine.language_aliases.items() if lang_code == alias) + # pylint: enable=stop-iteration-return + engine_codes.append(lang_code) + yield engine_name, engine_codes + + +class CountryInfo(TypedDict): + """Country name with a set of engine names. + Use exclusivly in JoinLanguageResult""" + + country_name: str + """Name of the country""" + + engine_names: Set[str] + """Engine names which use the language & country""" + + +class JoinLanguageResult(TypedDict): + """Result of join_language_lists""" + + name: Optional[str] + """Native name of the language""" + + english_name: Optional[str] + """English name of the language""" + + engine_names: Set + """Engine names which use this language""" + + countries: Dict[str, CountryInfo] + """Possible country codes for this language""" + + +def join_language_lists( + engine_data_dict: EngineLocalesDict, engines_languages: EngineLanguageDict +) -> Dict[str, JoinLanguageResult]: + """Join all languages of the engines into one list. The returned language list + contains language codes (``zh``) and region codes (``zh-TW``). The codes can + be parsed by babel:: + + babel.Locale.parse(language_list[n]) + + """ + language_list: Dict[str, JoinLanguageResult] = {} + name_from_babel = set() + name_from_wikipedia = set() + name_not_found = set() + + for engine_name, engine_codes in iter_engine_codes(engine_data_dict, engines_languages): + for lang_code in engine_codes: + + locale = get_locale(lang_code) + + # ensure that lang_code uses standard language and country codes + if locale and locale.territory: + lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory) + short_code = lang_code.split('-')[0] + + # add language without country if not in list + if short_code not in language_list: + if locale: + # get language's data from babel's Locale object + language_name = locale.get_language_name().title() + english_name = locale.english_name.split(' (')[0] + name_from_babel.add(short_code) + elif short_code in engines_languages['wikipedia'] and isinstance(engines_languages['wikipedia'], dict): + # get language's data from wikipedia if not known by babel + language_name = engines_languages['wikipedia'][short_code]['name'] + english_name = engines_languages['wikipedia'][short_code].get('english_name') + name_from_wikipedia.add(short_code) + else: + language_name = None + english_name = None + name_not_found.add(short_code) + + # add language to list + language_list[short_code] = { + 'name': language_name, + 'english_name': english_name, + 'engine_names': set(), + 'countries': {}, + } + + # add language with country if not in list + if lang_code != short_code and lang_code not in language_list[short_code]['countries']: + country_name = '' + if locale: + # get country name from babel's Locale object + try: + country_name = locale.get_territory_name() + except FileNotFoundError as exc: + print("ERROR: %s --> %s" % (locale, exc)) + locale = None + + language_list[short_code]['countries'][lang_code] = { + 'country_name': country_name, + 'engine_names': set(), + } + + # count engine for both language_country combination and language alone + language_list[short_code]['engine_names'].add(engine_name) + if lang_code != short_code: + language_list[short_code]['countries'][lang_code]['engine_names'].add(engine_name) + + def set_to_list(engine_name_set: Set) -> str: + return ', '.join(sorted(list(engine_name_set))) + + print('') + print('%s name(s) found with Babel: %s\n' % (len(name_from_babel), set_to_list(name_from_babel))) + print('%s name(s) found with Wikipedia: %s\n' % (len(name_from_wikipedia), set_to_list(name_from_wikipedia))) + print('%s name(s) not found: %s\n' % (len(name_not_found), set_to_list(name_not_found))) + + return language_list + + +class LanguageCountryName(TypedDict): + """filter_language_list returns a dictionnary: + * the key are the language code + * the value is described in this type + """ + + name: Optional[str] + english_name: Optional[str] + country_name: NotRequired[str] + + +def filter_language_list(all_languages: Dict[str, JoinLanguageResult]) -> Dict[str, LanguageCountryName]: + """Filter language list so it only includes the most supported languages and + countries. + """ + min_engines_per_lang = 12 + min_engines_per_country = 7 + main_engines = [ + engine_name + for engine_name, engine in engines.items() + if 'general' in engine.categories + and hasattr(engine, 'supported_languages') + and engine.supported_languages + and not engine.disabled + ] + + # filter list to include only languages supported by most engines or all default general engines + filtered_languages = { + code: join_result + for code, join_result in all_languages.items() + if ( + len(join_result['engine_names']) >= min_engines_per_lang + or all(main_engine in join_result['engine_names'] for main_engine in main_engines) + ) + } + + def _new_language_country_name(lang: str, country_name: Optional[str]) -> LanguageCountryName: + new_dict: LanguageCountryName = { + 'name': all_languages[lang]['name'], + 'english_name': all_languages[lang]['english_name'], + } + if country_name: + new_dict['country_name'] = country_name + return new_dict + + # for each language get country codes supported by most engines or at least one country code + filtered_languages_with_countries: Dict[str, LanguageCountryName] = {} + for lang, lang_data in filtered_languages.items(): + countries = lang_data['countries'] + filtered_countries: Dict[str, LanguageCountryName] = {} + + # get language's country codes with enough supported engines + for lang_country, country_data in countries.items(): + if len(country_data['engine_names']) >= min_engines_per_country: + filtered_countries[lang_country] = _new_language_country_name(lang, country_data['country_name']) + + # add language without countries too if there's more than one country to choose from + if len(filtered_countries) > 1: + filtered_countries[lang] = _new_language_country_name(lang, None) + elif len(filtered_countries) == 1: + lang_country = next(iter(filtered_countries)) + + # if no country has enough engines try to get most likely country code from babel + if not filtered_countries: + lang_country = None + subtags = get_global('likely_subtags').get(lang) + if subtags: + country_code = subtags.split('_')[-1] + if len(country_code) == 2: + lang_country = "{lang}-{country}".format(lang=lang, country=country_code) + + if lang_country: + filtered_countries[lang_country] = _new_language_country_name(lang, None) + else: + filtered_countries[lang] = _new_language_country_name(lang, None) + + filtered_languages_with_countries.update(filtered_countries) + + return filtered_languages_with_countries + + +def write_engine_data(file_name, engine_data_dict: EngineLocalesDict): + raw = { + engine_name: { + 'regions': engine_data.regions, + 'languages': engine_data.languages, + } + for engine_name, engine_data in engine_data_dict.items() + } + with open(file_name, 'w', encoding='utf-8') as f: + json.dump(raw, f, indent=2, sort_keys=True) + + +def write_engines_languages(file_name, engines_languages: EngineLanguageDict): + # write json file + with open(file_name, 'w', encoding='utf-8') as f: + json.dump(engines_languages, f, indent=2, sort_keys=True) + + +class UnicodeEscape(str): + """Escape unicode string in :py:obj:`pprint.pformat`""" + + def __repr__(self): + return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'" + + +# Write languages.py. +def write_languages_file(language_file, languages: Dict[str, LanguageCountryName]): + """Generates :origin:`searx/languages.py`.""" + + file_headers = ( + "# -*- coding: utf-8 -*-", + "# list of language codes", + "# this file is generated automatically by:", + "#", + "# ./manage pyenv.cmd searxng_extra/update/update_languages.py", + "language_codes = (\n", + ) + + language_codes = [] + + for code in sorted(languages): + + name = languages[code]['name'] + if name is None: + print("ERROR: languages['%s'] --> %s" % (code, languages[code])) + continue + + flag = get_unicode_flag(code) or '' + item = ( + code, + name.split(' (')[0], + get_territory_name(code) or '', + languages[code].get('english_name') or '', + UnicodeEscape(flag), + ) + + language_codes.append(item) + + language_codes = tuple(language_codes) + + with open(language_file, 'w', encoding='utf-8') as new_file: + file_content = "{file_headers} {language_codes},\n)\n".format( + # fmt: off + file_headers = '\n'.join(file_headers), + language_codes = pformat(language_codes, indent=4)[1:-1] + # fmt: on + ) + new_file.write(file_content) + + +if __name__ == "__main__": + load_engines(settings['engines']) + _engine_locales_dict, _engines_languages = fetch_engine_locales() + _all_languages = join_language_lists(_engine_locales_dict, _engines_languages) + _filtered_languages = filter_language_list(_all_languages) + write_engine_data(data_dir / 'engine_locales.json', _engine_locales_dict) + write_engines_languages(data_dir / 'engines_languages.json', _engines_languages) + write_languages_file(Path(searx_dir) / 'languages.py', _filtered_languages) diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py deleted file mode 100755 index 87b13b276..000000000 --- a/searxng_extra/update/update_languages.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env python -# lint: pylint - -# SPDX-License-Identifier: AGPL-3.0-or-later -"""This script generates languages.py from intersecting each engine's supported -languages. - -Output files: :origin:`searx/data/engines_languages.json` and -:origin:`searx/languages.py` (:origin:`CI Update data ... -<.github/workflows/data-update.yml>`). - -""" - -# pylint: disable=invalid-name -from unicodedata import lookup -import json -from pathlib import Path -from pprint import pformat -from babel import Locale, UnknownLocaleError -from babel.languages import get_global -from babel.core import parse_locale - -from searx import settings, searx_dir -from searx.engines import load_engines, engines -from searx.network import set_timeout_for_thread - -# Output files. -engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json' -languages_file = Path(searx_dir) / 'languages.py' - - -# Fetches supported languages for each engine and writes json file with those. -def fetch_supported_languages(): - set_timeout_for_thread(10.0) - - engines_languages = {} - names = list(engines) - names.sort() - - for engine_name in names: - if hasattr(engines[engine_name], 'fetch_supported_languages'): - engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() - print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) - if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck - engines_languages[engine_name] = sorted(engines_languages[engine_name]) - - print("fetched languages from %s engines" % len(engines_languages)) - - # write json file - with open(engines_languages_file, 'w', encoding='utf-8') as f: - json.dump(engines_languages, f, indent=2, sort_keys=True) - - return engines_languages - - -# Get babel Locale object from lang_code if possible. -def get_locale(lang_code): - try: - locale = Locale.parse(lang_code, sep='-') - return locale - except (UnknownLocaleError, ValueError): - return None - - -lang2emoji = { - 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger - 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina - 'jp': '\U0001F1EF\U0001F1F5', # Japanese - 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian - 'he': '\U0001F1EE\U0001F1F7', # Hebrew -} - - -def get_unicode_flag(lang_code): - """Determine a unicode flag (emoji) that fits to the ``lang_code``""" - - emoji = lang2emoji.get(lang_code.lower()) - if emoji: - return emoji - - if len(lang_code) == 2: - return '\U0001F310' - - language = territory = script = variant = '' - try: - language, territory, script, variant = parse_locale(lang_code, '-') - except ValueError as exc: - print(exc) - - # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 - if not territory: - # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag - emoji = lang2emoji.get(language) - if not emoji: - print( - "%s --> language: %s / territory: %s / script: %s / variant: %s" - % (lang_code, language, territory, script, variant) - ) - return emoji - - emoji = lang2emoji.get(territory.lower()) - if emoji: - return emoji - - try: - c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0]) - c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1]) - # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 )) - except KeyError as exc: - print("%s --> territory: %s --> %s" % (lang_code, territory, exc)) - return None - - return c1 + c2 - - -def get_territory_name(lang_code): - country_name = None - locale = get_locale(lang_code) - try: - if locale is not None: - country_name = locale.get_territory_name() - except FileNotFoundError as exc: - print("ERROR: %s --> %s" % (locale, exc)) - return country_name - - -# Join all language lists. -def join_language_lists(engines_languages): - language_list = {} - for engine_name in engines_languages: - for lang_code in engines_languages[engine_name]: - - # apply custom fixes if necessary - if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values(): - lang_code = next( - lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias - ) - - locale = get_locale(lang_code) - - # ensure that lang_code uses standard language and country codes - if locale and locale.territory: - lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory) - short_code = lang_code.split('-')[0] - - # add language without country if not in list - if short_code not in language_list: - if locale: - # get language's data from babel's Locale object - language_name = locale.get_language_name().title() - english_name = locale.english_name.split(' (')[0] - elif short_code in engines_languages['wikipedia']: - # get language's data from wikipedia if not known by babel - language_name = engines_languages['wikipedia'][short_code]['name'] - english_name = engines_languages['wikipedia'][short_code]['english_name'] - else: - language_name = None - english_name = None - - # add language to list - language_list[short_code] = { - 'name': language_name, - 'english_name': english_name, - 'counter': set(), - 'countries': {}, - } - - # add language with country if not in list - if lang_code != short_code and lang_code not in language_list[short_code]['countries']: - country_name = '' - if locale: - # get country name from babel's Locale object - try: - country_name = locale.get_territory_name() - except FileNotFoundError as exc: - print("ERROR: %s --> %s" % (locale, exc)) - locale = None - - language_list[short_code]['countries'][lang_code] = { - 'country_name': country_name, - 'counter': set(), - } - - # count engine for both language_country combination and language alone - language_list[short_code]['counter'].add(engine_name) - if lang_code != short_code: - language_list[short_code]['countries'][lang_code]['counter'].add(engine_name) - - return language_list - - -# Filter language list so it only includes the most supported languages and countries -def filter_language_list(all_languages): - min_engines_per_lang = 12 - min_engines_per_country = 7 - # pylint: disable=consider-using-dict-items, consider-iterating-dictionary - main_engines = [ - engine_name - for engine_name in engines.keys() - if 'general' in engines[engine_name].categories - and engines[engine_name].supported_languages - and not engines[engine_name].disabled - ] - - # filter list to include only languages supported by most engines or all default general engines - filtered_languages = { - code: lang - for code, lang in all_languages.items() - if ( - len(lang['counter']) >= min_engines_per_lang - or all(main_engine in lang['counter'] for main_engine in main_engines) - ) - } - - def _copy_lang_data(lang, country_name=None): - new_dict = {} - new_dict['name'] = all_languages[lang]['name'] - new_dict['english_name'] = all_languages[lang]['english_name'] - if country_name: - new_dict['country_name'] = country_name - return new_dict - - # for each language get country codes supported by most engines or at least one country code - filtered_languages_with_countries = {} - for lang, lang_data in filtered_languages.items(): - countries = lang_data['countries'] - filtered_countries = {} - - # get language's country codes with enough supported engines - for lang_country, country_data in countries.items(): - if len(country_data['counter']) >= min_engines_per_country: - filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name']) - - # add language without countries too if there's more than one country to choose from - if len(filtered_countries) > 1: - filtered_countries[lang] = _copy_lang_data(lang, None) - elif len(filtered_countries) == 1: - lang_country = next(iter(filtered_countries)) - - # if no country has enough engines try to get most likely country code from babel - if not filtered_countries: - lang_country = None - subtags = get_global('likely_subtags').get(lang) - if subtags: - country_code = subtags.split('_')[-1] - if len(country_code) == 2: - lang_country = "{lang}-{country}".format(lang=lang, country=country_code) - - if lang_country: - filtered_countries[lang_country] = _copy_lang_data(lang, None) - else: - filtered_countries[lang] = _copy_lang_data(lang, None) - - filtered_languages_with_countries.update(filtered_countries) - - return filtered_languages_with_countries - - -class UnicodeEscape(str): - """Escape unicode string in :py:obj:`pprint.pformat`""" - - def __repr__(self): - return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'" - - -# Write languages.py. -def write_languages_file(languages): - file_headers = ( - "# -*- coding: utf-8 -*-", - "# list of language codes", - "# this file is generated automatically by utils/fetch_languages.py", - "language_codes = (\n", - ) - - language_codes = [] - - for code in sorted(languages): - - name = languages[code]['name'] - if name is None: - print("ERROR: languages['%s'] --> %s" % (code, languages[code])) - continue - - flag = get_unicode_flag(code) or '' - item = ( - code, - languages[code]['name'].split(' (')[0], - get_territory_name(code) or '', - languages[code].get('english_name') or '', - UnicodeEscape(flag), - ) - - language_codes.append(item) - - language_codes = tuple(language_codes) - - with open(languages_file, 'w', encoding='utf-8') as new_file: - file_content = "{file_headers} {language_codes},\n)\n".format( - # fmt: off - file_headers = '\n'.join(file_headers), - language_codes = pformat(language_codes, indent=4)[1:-1] - # fmt: on - ) - new_file.write(file_content) - new_file.close() - - -if __name__ == "__main__": - load_engines(settings['engines']) - _engines_languages = fetch_supported_languages() - _all_languages = join_language_lists(_engines_languages) - _filtered_languages = filter_language_list(_all_languages) - write_languages_file(_filtered_languages)