Add engine locales (1/n)
This commit is contained in:
parent
1a5b096578
commit
52fe8111ea
2
.github/workflows/data-update.yml
vendored
2
.github/workflows/data-update.yml
vendored
@ -17,7 +17,7 @@ jobs:
|
||||
- update_currencies.py
|
||||
- update_external_bangs.py
|
||||
- update_firefox_version.py
|
||||
- update_languages.py
|
||||
- update_engine_locales.py
|
||||
- update_wikidata_units.py
|
||||
- update_engine_descriptions.py
|
||||
steps:
|
||||
|
@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table
|
||||
- Timeout
|
||||
- Weight
|
||||
- Paging
|
||||
- Language
|
||||
- Language, Region
|
||||
- Safe search
|
||||
- Time range
|
||||
|
||||
|
3
manage
3
manage
@ -57,7 +57,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\
|
||||
I,C,R,\
|
||||
W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
|
||||
E1136"
|
||||
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories"
|
||||
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="engine_locales,supported_languages,language_aliases,logger,categories"
|
||||
PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
|
||||
|
||||
help() {
|
||||
@ -698,6 +698,7 @@ test.pyright() {
|
||||
| grep -v '/engines/.*.py.* - warning: "logger" is not defined'\
|
||||
| grep -v '/plugins/.*.py.* - error: "logger" is not defined'\
|
||||
| grep -v '/engines/.*.py.* - warning: "supported_languages" is not defined' \
|
||||
| grep -v '/engines/.*.py.* - warning: "engine_locales" is not defined' \
|
||||
| grep -v '/engines/.*.py.* - warning: "language_aliases" is not defined' \
|
||||
| grep -v '/engines/.*.py.* - warning: "categories" is not defined'
|
||||
dump_return $?
|
||||
|
@ -11,9 +11,10 @@ from lxml import etree
|
||||
from httpx import HTTPError
|
||||
|
||||
from searx import settings
|
||||
from searx.data import ENGINES_LANGUAGES
|
||||
from searx.locales import get_engine_locale
|
||||
from searx.network import get as http_get
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
from searx.engines import engines
|
||||
|
||||
# a fetch_supported_languages() for XPath engines isn't available right now
|
||||
# _brave = ENGINES_LANGUAGES['brave'].keys()
|
||||
@ -110,9 +111,12 @@ def seznam(query, _lang):
|
||||
|
||||
def startpage(query, lang):
|
||||
# startpage autocompleter
|
||||
lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
|
||||
engine_language = default_language = 'english_uk'
|
||||
if 'startpage' in engines:
|
||||
engine_language = get_engine_locale(lang, engines['startpage'].engine_data.languages, default=default_language)
|
||||
|
||||
url = 'https://startpage.com/suggestions?{query}'
|
||||
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
|
||||
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': engine_language})))
|
||||
data = resp.json()
|
||||
return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
__all__ = [
|
||||
'ENGINES_LANGUAGES',
|
||||
'ENGINES_LOCALES',
|
||||
'CURRENCIES',
|
||||
'USER_AGENTS',
|
||||
'EXTERNAL_URLS',
|
||||
@ -43,6 +44,7 @@ def ahmia_blacklist_loader():
|
||||
|
||||
|
||||
ENGINES_LANGUAGES = _load('engines_languages.json')
|
||||
ENGINES_LOCALES = _load('engine_locales.json')
|
||||
CURRENCIES = _load('currencies.json')
|
||||
USER_AGENTS = _load('useragents.json')
|
||||
EXTERNAL_URLS = _load('external_urls.json')
|
||||
|
299
searx/data/engine_locales.json
Normal file
299
searx/data/engine_locales.json
Normal file
@ -0,0 +1,299 @@
|
||||
{
|
||||
"qwant": {
|
||||
"languages": {},
|
||||
"regions": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
}
|
||||
},
|
||||
"qwant images": {
|
||||
"languages": {},
|
||||
"regions": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
}
|
||||
},
|
||||
"qwant news": {
|
||||
"languages": {},
|
||||
"regions": {
|
||||
"ca-ES": "ca_ES",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pt-PT": "pt_PT"
|
||||
}
|
||||
},
|
||||
"qwant videos": {
|
||||
"languages": {},
|
||||
"regions": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
}
|
||||
},
|
||||
"startpage": {
|
||||
"languages": {
|
||||
"af": "afrikaans",
|
||||
"am": "amharic",
|
||||
"ar": "arabic",
|
||||
"az": "azerbaijani",
|
||||
"be": "belarusian",
|
||||
"bg": "bulgarian",
|
||||
"bn": "bengali",
|
||||
"bs": "bosnian",
|
||||
"ca": "catalan",
|
||||
"cs": "czech",
|
||||
"cy": "welsh",
|
||||
"da": "dansk",
|
||||
"de": "deutsch",
|
||||
"el": "greek",
|
||||
"en": "english_uk",
|
||||
"eo": "esperanto",
|
||||
"es": "espanol",
|
||||
"et": "estonian",
|
||||
"eu": "basque",
|
||||
"fa": "persian",
|
||||
"fi": "suomi",
|
||||
"fo": "faroese",
|
||||
"fr": "francais",
|
||||
"fy": "frisian",
|
||||
"ga": "irish",
|
||||
"gd": "gaelic",
|
||||
"gl": "galician",
|
||||
"gu": "gujarati",
|
||||
"he": "hebrew",
|
||||
"hi": "hindi",
|
||||
"hr": "croatian",
|
||||
"hu": "hungarian",
|
||||
"ia": "interlingua",
|
||||
"id": "indonesian",
|
||||
"is": "icelandic",
|
||||
"it": "italiano",
|
||||
"ja": "nihongo",
|
||||
"jv": "javanese",
|
||||
"ka": "georgian",
|
||||
"kn": "kannada",
|
||||
"ko": "hangul",
|
||||
"la": "latin",
|
||||
"lt": "lithuanian",
|
||||
"lv": "latvian",
|
||||
"mai": "bihari",
|
||||
"mk": "macedonian",
|
||||
"ml": "malayalam",
|
||||
"mr": "marathi",
|
||||
"ms": "malay",
|
||||
"mt": "maltese",
|
||||
"nb": "norsk",
|
||||
"ne": "nepali",
|
||||
"nl": "nederlands",
|
||||
"oc": "occitan",
|
||||
"pa": "punjabi",
|
||||
"pl": "polski",
|
||||
"pt": "portugues",
|
||||
"ro": "romanian",
|
||||
"ru": "russian",
|
||||
"si": "sinhalese",
|
||||
"sk": "slovak",
|
||||
"sl": "slovenian",
|
||||
"sq": "albanian",
|
||||
"sr": "serbian",
|
||||
"su": "sudanese",
|
||||
"sv": "svenska",
|
||||
"sw": "swahili",
|
||||
"ta": "tamil",
|
||||
"te": "telugu",
|
||||
"th": "thai",
|
||||
"ti": "tigrinya",
|
||||
"tl": "tagalog",
|
||||
"tr": "turkce",
|
||||
"uk": "ukrainian",
|
||||
"ur": "urdu",
|
||||
"uz": "uzbek",
|
||||
"vi": "vietnamese",
|
||||
"xh": "xhosa",
|
||||
"zh": "jiantizhongwen",
|
||||
"zh_Hant": "fantizhengwen",
|
||||
"zu": "zulu"
|
||||
},
|
||||
"regions": {
|
||||
"ar-EG": "ar_EG",
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en-GB_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"en-ZA": "en_ZA",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-US": "es_US",
|
||||
"es-UY": "es_UY",
|
||||
"fi-FI": "fi_FI",
|
||||
"fil-PH": "fil_PH",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hi-IN": "hi_IN",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ja-JP": "ja_JP",
|
||||
"ko-KR": "ko_KR",
|
||||
"ms-MY": "ms_MY",
|
||||
"nb-NO": "no_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-BR": "pt-BR_BR",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"ru-BY": "ru_BY",
|
||||
"ru-RU": "ru_RU",
|
||||
"sv-SE": "sv_SE",
|
||||
"tr-TR": "tr_TR",
|
||||
"uk-UA": "uk_UA",
|
||||
"zh-CN": "zh-CN_CN",
|
||||
"zh-HK": "zh-TW_HK",
|
||||
"zh-TW": "zh-TW_TW"
|
||||
}
|
||||
}
|
||||
}
|
@ -1396,406 +1396,6 @@
|
||||
"sv",
|
||||
"zh"
|
||||
],
|
||||
"qwant": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"qwant images": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"qwant news": {
|
||||
"ca-ES": "ca_ES",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pt-PT": "pt_PT"
|
||||
},
|
||||
"qwant videos": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"startpage": {
|
||||
"af": {
|
||||
"alias": "afrikaans"
|
||||
},
|
||||
"am": {
|
||||
"alias": "amharic"
|
||||
},
|
||||
"ar": {
|
||||
"alias": "arabic"
|
||||
},
|
||||
"az": {
|
||||
"alias": "azerbaijani"
|
||||
},
|
||||
"be": {
|
||||
"alias": "belarusian"
|
||||
},
|
||||
"bg": {
|
||||
"alias": "bulgarian"
|
||||
},
|
||||
"bn": {
|
||||
"alias": "bengali"
|
||||
},
|
||||
"bs": {
|
||||
"alias": "bosnian"
|
||||
},
|
||||
"ca": {
|
||||
"alias": "catalan"
|
||||
},
|
||||
"cs": {
|
||||
"alias": "czech"
|
||||
},
|
||||
"cy": {
|
||||
"alias": "welsh"
|
||||
},
|
||||
"da": {
|
||||
"alias": "dansk"
|
||||
},
|
||||
"de": {
|
||||
"alias": "deutsch"
|
||||
},
|
||||
"el": {
|
||||
"alias": "greek"
|
||||
},
|
||||
"en": {
|
||||
"alias": "english"
|
||||
},
|
||||
"en-GB": {
|
||||
"alias": "english_uk"
|
||||
},
|
||||
"eo": {
|
||||
"alias": "esperanto"
|
||||
},
|
||||
"es": {
|
||||
"alias": "espanol"
|
||||
},
|
||||
"et": {
|
||||
"alias": "estonian"
|
||||
},
|
||||
"eu": {
|
||||
"alias": "basque"
|
||||
},
|
||||
"fa": {
|
||||
"alias": "persian"
|
||||
},
|
||||
"fi": {
|
||||
"alias": "suomi"
|
||||
},
|
||||
"fo": {
|
||||
"alias": "faroese"
|
||||
},
|
||||
"fr": {
|
||||
"alias": "francais"
|
||||
},
|
||||
"fy": {
|
||||
"alias": "frisian"
|
||||
},
|
||||
"ga": {
|
||||
"alias": "irish"
|
||||
},
|
||||
"gd": {
|
||||
"alias": "gaelic"
|
||||
},
|
||||
"gl": {
|
||||
"alias": "galician"
|
||||
},
|
||||
"gu": {
|
||||
"alias": "gujarati"
|
||||
},
|
||||
"he": {
|
||||
"alias": "hebrew"
|
||||
},
|
||||
"hi": {
|
||||
"alias": "hindi"
|
||||
},
|
||||
"hr": {
|
||||
"alias": "croatian"
|
||||
},
|
||||
"hu": {
|
||||
"alias": "hungarian"
|
||||
},
|
||||
"ia": {
|
||||
"alias": "interlingua"
|
||||
},
|
||||
"id": {
|
||||
"alias": "indonesian"
|
||||
},
|
||||
"is": {
|
||||
"alias": "icelandic"
|
||||
},
|
||||
"it": {
|
||||
"alias": "italiano"
|
||||
},
|
||||
"ja": {
|
||||
"alias": "nihongo"
|
||||
},
|
||||
"jv": {
|
||||
"alias": "javanese"
|
||||
},
|
||||
"ka": {
|
||||
"alias": "georgian"
|
||||
},
|
||||
"kn": {
|
||||
"alias": "kannada"
|
||||
},
|
||||
"ko": {
|
||||
"alias": "hangul"
|
||||
},
|
||||
"la": {
|
||||
"alias": "latin"
|
||||
},
|
||||
"lt": {
|
||||
"alias": "lithuanian"
|
||||
},
|
||||
"lv": {
|
||||
"alias": "latvian"
|
||||
},
|
||||
"mai": {
|
||||
"alias": "bihari"
|
||||
},
|
||||
"mk": {
|
||||
"alias": "macedonian"
|
||||
},
|
||||
"ml": {
|
||||
"alias": "malayalam"
|
||||
},
|
||||
"mr": {
|
||||
"alias": "marathi"
|
||||
},
|
||||
"ms": {
|
||||
"alias": "malay"
|
||||
},
|
||||
"mt": {
|
||||
"alias": "maltese"
|
||||
},
|
||||
"ne": {
|
||||
"alias": "nepali"
|
||||
},
|
||||
"nl": {
|
||||
"alias": "nederlands"
|
||||
},
|
||||
"no": {
|
||||
"alias": "norsk"
|
||||
},
|
||||
"oc": {
|
||||
"alias": "occitan"
|
||||
},
|
||||
"pa": {
|
||||
"alias": "punjabi"
|
||||
},
|
||||
"pl": {
|
||||
"alias": "polski"
|
||||
},
|
||||
"pt": {
|
||||
"alias": "portugues"
|
||||
},
|
||||
"ro": {
|
||||
"alias": "romanian"
|
||||
},
|
||||
"ru": {
|
||||
"alias": "russian"
|
||||
},
|
||||
"si": {
|
||||
"alias": "sinhalese"
|
||||
},
|
||||
"sk": {
|
||||
"alias": "slovak"
|
||||
},
|
||||
"sl": {
|
||||
"alias": "slovenian"
|
||||
},
|
||||
"sq": {
|
||||
"alias": "albanian"
|
||||
},
|
||||
"sr": {
|
||||
"alias": "serbian"
|
||||
},
|
||||
"su": {
|
||||
"alias": "sudanese"
|
||||
},
|
||||
"sv": {
|
||||
"alias": "svenska"
|
||||
},
|
||||
"sw": {
|
||||
"alias": "swahili"
|
||||
},
|
||||
"ta": {
|
||||
"alias": "tamil"
|
||||
},
|
||||
"te": {
|
||||
"alias": "telugu"
|
||||
},
|
||||
"th": {
|
||||
"alias": "thai"
|
||||
},
|
||||
"ti": {
|
||||
"alias": "tigrinya"
|
||||
},
|
||||
"tl": {
|
||||
"alias": "tagalog"
|
||||
},
|
||||
"tr": {
|
||||
"alias": "turkce"
|
||||
},
|
||||
"uk": {
|
||||
"alias": "ukrainian"
|
||||
},
|
||||
"ur": {
|
||||
"alias": "urdu"
|
||||
},
|
||||
"uz": {
|
||||
"alias": "uzbek"
|
||||
},
|
||||
"vi": {
|
||||
"alias": "vietnamese"
|
||||
},
|
||||
"xh": {
|
||||
"alias": "xhosa"
|
||||
},
|
||||
"zh": {
|
||||
"alias": "jiantizhongwen"
|
||||
},
|
||||
"zh-HK": {
|
||||
"alias": "fantizhengwen"
|
||||
},
|
||||
"zh-TW": {
|
||||
"alias": "fantizhengwen"
|
||||
},
|
||||
"zu": {
|
||||
"alias": "zulu"
|
||||
}
|
||||
},
|
||||
"wikidata": {
|
||||
"ab": {
|
||||
"english_name": "Abkhazian",
|
||||
|
@ -13,14 +13,14 @@ usage::
|
||||
|
||||
import sys
|
||||
import copy
|
||||
from typing import Dict, List, Optional
|
||||
import dataclasses
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
from os.path import realpath, dirname
|
||||
from babel.localedata import locale_identifiers
|
||||
from searx import logger, settings
|
||||
from searx.data import ENGINES_LANGUAGES
|
||||
from searx.network import get
|
||||
from searx.utils import load_module, match_language, gen_useragent
|
||||
from searx.data import ENGINES_LANGUAGES, ENGINES_LOCALES
|
||||
from searx.utils import load_module, match_language
|
||||
|
||||
|
||||
logger = logger.getChild('engines')
|
||||
@ -52,6 +52,27 @@ ENGINE_DEFAULT_ARGS = {
|
||||
OTHER_CATEGORY = 'other'
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class EngineLocales:
|
||||
"""The class is intended to be instanciated for each engine."""
|
||||
|
||||
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
"""
|
||||
.. code:: python
|
||||
{
|
||||
'fr-BE' : <engine's region name>,
|
||||
}
|
||||
"""
|
||||
|
||||
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
"""
|
||||
.. code:: python
|
||||
{
|
||||
'ca' : <engine's language name>,
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class Engine: # pylint: disable=too-few-public-methods
|
||||
"""This class is currently never initialized and only used for type hinting."""
|
||||
|
||||
@ -59,15 +80,17 @@ class Engine: # pylint: disable=too-few-public-methods
|
||||
engine: str
|
||||
shortcut: str
|
||||
categories: List[str]
|
||||
supported_languages: List[str]
|
||||
about: dict
|
||||
inactive: bool
|
||||
disabled: bool
|
||||
language_support: bool
|
||||
paging: bool
|
||||
safesearch: bool
|
||||
time_range_support: bool
|
||||
timeout: float
|
||||
language_support: bool
|
||||
engine_locales: EngineLocales
|
||||
supported_languages: List[str]
|
||||
language_aliases: Dict[str, str]
|
||||
|
||||
|
||||
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
|
||||
@ -85,15 +108,15 @@ engine_shortcuts = {}
|
||||
"""
|
||||
|
||||
|
||||
def load_engine(engine_data: dict) -> Optional[Engine]:
|
||||
"""Load engine from ``engine_data``.
|
||||
def load_engine(engine_setting: Dict[str, Any]) -> Optional[Engine]:
|
||||
"""Load engine from ``engine_setting``.
|
||||
|
||||
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
|
||||
:param dict engine_setting: Attributes from YAML ``settings:engines/<engine>``
|
||||
:return: initialized namespace of the ``<engine>``.
|
||||
|
||||
1. create a namespace and load module of the ``<engine>``
|
||||
2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`
|
||||
3. update namespace with values from ``engine_data``
|
||||
3. update namespace with values from ``engine_setting``
|
||||
|
||||
If engine *is active*, return namespace of the engine, otherwise return
|
||||
``None``.
|
||||
@ -107,7 +130,7 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
|
||||
|
||||
"""
|
||||
|
||||
engine_name = engine_data['name']
|
||||
engine_name = engine_setting['name']
|
||||
if '_' in engine_name:
|
||||
logger.error('Engine name contains underscore: "{}"'.format(engine_name))
|
||||
return None
|
||||
@ -115,10 +138,10 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
|
||||
if engine_name.lower() != engine_name:
|
||||
logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
|
||||
engine_name = engine_name.lower()
|
||||
engine_data['name'] = engine_name
|
||||
engine_setting['name'] = engine_name
|
||||
|
||||
# load_module
|
||||
engine_module = engine_data['engine']
|
||||
engine_module = engine_setting['engine']
|
||||
try:
|
||||
engine = load_module(engine_module + '.py', ENGINE_DIR)
|
||||
except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
|
||||
@ -128,9 +151,10 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
|
||||
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
||||
return None
|
||||
|
||||
update_engine_attributes(engine, engine_data)
|
||||
set_language_attributes(engine)
|
||||
update_engine_attributes(engine, engine_setting)
|
||||
update_attributes_for_tor(engine)
|
||||
if not set_engine_locales(engine):
|
||||
set_language_attributes(engine)
|
||||
|
||||
if not is_engine_active(engine):
|
||||
return None
|
||||
@ -165,15 +189,15 @@ def set_loggers(engine, engine_name):
|
||||
module.logger = logger.getChild(module_engine_name)
|
||||
|
||||
|
||||
def update_engine_attributes(engine: Engine, engine_data):
|
||||
# set engine attributes from engine_data
|
||||
for param_name, param_value in engine_data.items():
|
||||
def update_engine_attributes(engine: Engine, engine_setting: Dict[str, Any]):
|
||||
# set engine attributes from engine_setting
|
||||
for param_name, param_value in engine_setting.items():
|
||||
if param_name == 'categories':
|
||||
if isinstance(param_value, str):
|
||||
param_value = list(map(str.strip, param_value.split(',')))
|
||||
engine.categories = param_value
|
||||
elif hasattr(engine, 'about') and param_name == 'about':
|
||||
engine.about = {**engine.about, **engine_data['about']}
|
||||
engine.about = {**engine.about, **engine_setting['about']}
|
||||
else:
|
||||
setattr(engine, param_name, param_value)
|
||||
|
||||
@ -183,6 +207,28 @@ def update_engine_attributes(engine: Engine, engine_data):
|
||||
setattr(engine, arg_name, copy.deepcopy(arg_value))
|
||||
|
||||
|
||||
def set_engine_locales(engine: Engine):
|
||||
engine_locales_key = None
|
||||
|
||||
if engine.name in ENGINES_LOCALES:
|
||||
engine_locales_key = engine.name
|
||||
elif engine.engine in ENGINES_LOCALES:
|
||||
# The key of the dictionary engine_data_dict is the *engine name*
|
||||
# configured in settings.xml. When multiple engines are configured in
|
||||
# settings.yml to use the same origin engine (python module) these
|
||||
# additional engines can use the languages from the origin engine.
|
||||
# For this use the configured ``engine: ...`` from settings.yml
|
||||
engine_locales_key = engine.engine
|
||||
else:
|
||||
return False
|
||||
|
||||
print(engine.name, ENGINES_LOCALES[engine_locales_key])
|
||||
engine.engine_locales = EngineLocales(**ENGINES_LOCALES[engine_locales_key])
|
||||
# language_support
|
||||
engine.language_support = len(engine.engine_locales.regions) > 0 or len(engine.engine_locales.languages) > 0
|
||||
return True
|
||||
|
||||
|
||||
def set_language_attributes(engine: Engine):
|
||||
# assign supported languages from json file
|
||||
if engine.name in ENGINES_LANGUAGES:
|
||||
@ -225,17 +271,6 @@ def set_language_attributes(engine: Engine):
|
||||
# language_support
|
||||
engine.language_support = len(engine.supported_languages) > 0
|
||||
|
||||
# assign language fetching method if auxiliary method exists
|
||||
if hasattr(engine, '_fetch_supported_languages'):
|
||||
headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||
}
|
||||
engine.fetch_supported_languages = (
|
||||
# pylint: disable=protected-access
|
||||
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
|
||||
)
|
||||
|
||||
|
||||
def update_attributes_for_tor(engine: Engine) -> bool:
|
||||
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
||||
@ -294,8 +329,8 @@ def load_engines(engine_list):
|
||||
engine_shortcuts.clear()
|
||||
categories.clear()
|
||||
categories['general'] = []
|
||||
for engine_data in engine_list:
|
||||
engine = load_engine(engine_data)
|
||||
for engine_setting in engine_list:
|
||||
engine = load_engine(engine_setting)
|
||||
if engine:
|
||||
register_engine(engine)
|
||||
return engines
|
||||
|
@ -136,7 +136,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||
:param dict param: request parameters of the engine
|
||||
|
||||
:param list lang_list: list of supported languages of the engine
|
||||
:py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
|
||||
:py:obj:`ENGINES_DATAS[engine-name].languages <searx.data.ENGINES_DATAS>`
|
||||
|
||||
:param dict lang_list: custom aliases for non standard language codes
|
||||
(used when calling :py:func:`searx.utils.match_language`)
|
||||
|
@ -50,7 +50,6 @@ about = {
|
||||
# engine dependent config
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
language_support = True
|
||||
use_locale_domain = True
|
||||
time_range_support = True
|
||||
safesearch = False
|
||||
|
@ -56,7 +56,6 @@ about = {
|
||||
|
||||
categories = ['videos', 'web']
|
||||
paging = False
|
||||
language_support = True
|
||||
use_locale_domain = True
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
|
@ -49,7 +49,7 @@ about = {
|
||||
# engine dependent config
|
||||
categories = []
|
||||
paging = True
|
||||
supported_languages_url = about['website']
|
||||
engine_locales_url = about['website']
|
||||
qwant_categ = None # web|news|inages|videos
|
||||
|
||||
safesearch = True
|
||||
@ -95,7 +95,7 @@ def request(query, params):
|
||||
)
|
||||
|
||||
# add quant's locale
|
||||
q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
|
||||
q_locale = get_engine_locale(params['language'], engine_locales.regions, default='en_US')
|
||||
params['url'] += '&locale=' + q_locale
|
||||
|
||||
# add safesearch option
|
||||
@ -243,7 +243,7 @@ def response(resp):
|
||||
return results
|
||||
|
||||
|
||||
def _fetch_supported_languages(resp):
|
||||
def _fetch_engine_locales(resp, engine_locales):
|
||||
|
||||
text = resp.text
|
||||
text = text[text.find('INITIAL_PROPS') :]
|
||||
@ -263,8 +263,6 @@ def _fetch_supported_languages(resp):
|
||||
|
||||
q_valid_locales.append(_locale)
|
||||
|
||||
supported_languages = {}
|
||||
|
||||
for q_locale in q_valid_locales:
|
||||
try:
|
||||
locale = babel.Locale.parse(q_locale, sep='_')
|
||||
@ -272,7 +270,7 @@ def _fetch_supported_languages(resp):
|
||||
print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
|
||||
continue
|
||||
|
||||
# note: supported_languages (dict)
|
||||
# note: engine_data.regions (dict)
|
||||
#
|
||||
# dict's key is a string build up from a babel.Locale object / the
|
||||
# notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
|
||||
@ -280,6 +278,6 @@ def _fetch_supported_languages(resp):
|
||||
# the engine.
|
||||
|
||||
searxng_locale = locale.language + '-' + locale.territory # --> params['language']
|
||||
supported_languages[searxng_locale] = q_locale
|
||||
engine_locales.regions[searxng_locale] = q_locale
|
||||
|
||||
return supported_languages
|
||||
return engine_locales
|
||||
|
@ -7,17 +7,18 @@
|
||||
import re
|
||||
from time import time
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from unicodedata import normalize, combining
|
||||
from datetime import datetime, timedelta
|
||||
from collections import OrderedDict
|
||||
|
||||
from dateutil import parser
|
||||
from lxml import html
|
||||
from babel import Locale
|
||||
from babel.localedata import locale_identifiers
|
||||
|
||||
import babel
|
||||
|
||||
from searx.network import get
|
||||
from searx.utils import extract_text, eval_xpath, match_language
|
||||
from searx.locales import get_engine_locale
|
||||
from searx.utils import extract_text, eval_xpath
|
||||
from searx.exceptions import (
|
||||
SearxEngineResponseException,
|
||||
SearxEngineCaptchaException,
|
||||
@ -36,16 +37,22 @@ about = {
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'web']
|
||||
# there is a mechanism to block "bot" search
|
||||
# (probably the parameter qid), require
|
||||
# storing of qid's between mulitble search-calls
|
||||
|
||||
paging = True
|
||||
supported_languages_url = 'https://www.startpage.com/do/settings'
|
||||
number_of_results = 5
|
||||
send_accept_language_header = True
|
||||
|
||||
safesearch = True
|
||||
filter_mapping = {0: '0', 1: '1', 2: '1'}
|
||||
|
||||
time_range_support = True
|
||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
|
||||
engine_locales_url = 'https://www.startpage.com/do/settings'
|
||||
|
||||
# search-url
|
||||
base_url = 'https://startpage.com/'
|
||||
search_url = base_url + 'sp/search?'
|
||||
base_url = 'https://www.startpage.com/'
|
||||
search_url = base_url + 'sp/search'
|
||||
|
||||
# specific xpath variables
|
||||
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
||||
@ -103,42 +110,83 @@ def get_sc_code(headers):
|
||||
return sc_code
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
|
||||
# pylint: disable=line-too-long
|
||||
# The format string from Startpage's FFox add-on [1]::
|
||||
#
|
||||
# https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0
|
||||
#
|
||||
# [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/
|
||||
# Startpage supports a region value: 'all'
|
||||
engine_region = 'all'
|
||||
engine_language = 'english_uk'
|
||||
if params['language'] != 'all':
|
||||
engine_region = get_engine_locale(params['language'], engine_locales.regions, default='all')
|
||||
engine_language = get_engine_locale(
|
||||
params['language'].split('-')[0], engine_locales.languages, default='english_uk'
|
||||
)
|
||||
logger.debug(
|
||||
'selected language %s --> engine_language: %s // engine_region: %s',
|
||||
params['language'],
|
||||
engine_language,
|
||||
engine_region,
|
||||
)
|
||||
|
||||
# The Accept header is also needed by the get_sc_code(..) call below.
|
||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
|
||||
# build arguments
|
||||
args = {
|
||||
'query': query,
|
||||
'page': params['pageno'],
|
||||
'cat': 'web',
|
||||
# 'pl': 'ext-ff',
|
||||
# 'extVersion': '1.3.0',
|
||||
# 'abp': "-1",
|
||||
'sc': get_sc_code(params['headers']),
|
||||
't': 'device',
|
||||
'sc': get_sc_code(params['headers']), # hint: this func needs HTTP headers
|
||||
'with_date': time_range_dict.get(params['time_range'], ''),
|
||||
}
|
||||
|
||||
# set language if specified
|
||||
if params['language'] != 'all':
|
||||
lang_code = match_language(params['language'], supported_languages, fallback=None)
|
||||
if lang_code:
|
||||
language_name = supported_languages[lang_code]['alias']
|
||||
args['language'] = language_name
|
||||
args['lui'] = language_name
|
||||
if engine_language:
|
||||
args['language'] = engine_language
|
||||
args['lui'] = engine_language
|
||||
|
||||
if params['pageno'] == 1:
|
||||
args['abp'] = ['-1', '-1']
|
||||
|
||||
else:
|
||||
args['page'] = params['pageno']
|
||||
args['abp'] = '-1'
|
||||
|
||||
# build cookie
|
||||
lang_homepage = 'english'
|
||||
cookie = OrderedDict()
|
||||
cookie['date_time'] = 'world'
|
||||
cookie['disable_family_filter'] = filter_mapping[params['safesearch']]
|
||||
cookie['disable_open_in_new_window'] = '0'
|
||||
cookie['enable_post_method'] = '1' # hint: POST
|
||||
cookie['enable_proxy_safety_suggest'] = '1'
|
||||
cookie['enable_stay_control'] = '1'
|
||||
cookie['instant_answers'] = '1'
|
||||
cookie['lang_homepage'] = 's/device/%s/' % lang_homepage
|
||||
cookie['num_of_results'] = '10'
|
||||
cookie['suggestions'] = '1'
|
||||
cookie['wt_unit'] = 'celsius'
|
||||
|
||||
if engine_language:
|
||||
cookie['language'] = engine_language
|
||||
cookie['language_ui'] = engine_language
|
||||
|
||||
if engine_region:
|
||||
cookie['search_results_region'] = engine_region
|
||||
|
||||
params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
|
||||
logger.debug('cookie preferences: %s', params['cookies']['preferences'])
|
||||
params['method'] = 'POST'
|
||||
|
||||
logger.debug("data: %s", args)
|
||||
params['data'] = args
|
||||
|
||||
params['url'] = search_url
|
||||
|
||||
params['url'] = search_url + urlencode(args)
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
@ -200,62 +248,142 @@ def response(resp):
|
||||
return results
|
||||
|
||||
|
||||
# get supported languages from their site
|
||||
def _fetch_supported_languages(resp):
|
||||
# startpage's language selector is a mess each option has a displayed name
|
||||
# and a value, either of which may represent the language name in the native
|
||||
# script, the language name in English, an English transliteration of the
|
||||
# native name, the English name of the writing script used by the language,
|
||||
# or occasionally something else entirely.
|
||||
def _fetch_engine_locales(resp, engine_locales):
|
||||
|
||||
# this cases are so special they need to be hardcoded, a couple of them are misspellings
|
||||
language_names = {
|
||||
'english_uk': 'en-GB',
|
||||
'fantizhengwen': ['zh-TW', 'zh-HK'],
|
||||
'hangul': 'ko',
|
||||
'malayam': 'ml',
|
||||
'norsk': 'nb',
|
||||
'sinhalese': 'si',
|
||||
'sudanese': 'su',
|
||||
}
|
||||
# startpage's language & region selectors are a mess.
|
||||
#
|
||||
# regions:
|
||||
# in the list of regions there are tags we need to map to common
|
||||
# region tags:
|
||||
# - pt-BR_BR --> pt_BR
|
||||
# - zh-CN_CN --> zh_Hans_CN
|
||||
# - zh-TW_TW --> zh_Hant_TW
|
||||
# - zh-TW_HK --> zh_Hant_HK
|
||||
# - en-GB_GB --> en_GB
|
||||
# and there is at least one tag with a three letter language tag (ISO 639-2)
|
||||
# - fil_PH --> fil_PH
|
||||
#
|
||||
# regions
|
||||
# -------
|
||||
#
|
||||
# The locale code 'no_NO' from startpage does not exists and is mapped to
|
||||
# nb-NO::
|
||||
#
|
||||
# babel.core.UnknownLocaleError: unknown locale 'no_NO'
|
||||
#
|
||||
# For reference see languages-subtag at iana [1], `no` is the
|
||||
# macrolanguage::
|
||||
#
|
||||
# type: language
|
||||
# Subtag: nb
|
||||
# Description: Norwegian Bokmål
|
||||
# Added: 2005-10-16
|
||||
# Suppress-Script: Latn
|
||||
# Macrolanguage: no
|
||||
#
|
||||
# W3C recommends subtag over macrolanguage [2]:
|
||||
#
|
||||
# Use macrolanguages with care. Some language subtags have a Scope field set to
|
||||
# macrolanguage, ie. this primary language subtag encompasses a number of more
|
||||
# specific primary language subtags in the registry.
|
||||
# ...
|
||||
# As we recommended for the collection subtags mentioned above, in most cases
|
||||
# you should try to use the more specific subtags ...
|
||||
#
|
||||
# [1] https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
|
||||
# [2] https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag
|
||||
#
|
||||
# languages
|
||||
# ---------
|
||||
#
|
||||
# The displayed name in startpage's settings page depend on the location
|
||||
# of the IP when the 'Accept-Language' HTTP header is unset (in tha
|
||||
# language update script we use "en-US,en;q=0.5" to get uniform names
|
||||
# independent from the IP).
|
||||
#
|
||||
# Each option has a displayed name and a value, either of which
|
||||
# may represent the language name in the native script, the language name
|
||||
# in English, an English transliteration of the native name, the English
|
||||
# name of the writing script used by the language, or occasionally
|
||||
# something else entirely.
|
||||
|
||||
# get the English name of every language known by babel
|
||||
language_names.update(
|
||||
{
|
||||
# fmt: off
|
||||
name.lower(): lang_code
|
||||
# pylint: disable=protected-access
|
||||
for lang_code, name in Locale('en')._data['languages'].items()
|
||||
# fmt: on
|
||||
}
|
||||
)
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# regions
|
||||
|
||||
sp_region_names = []
|
||||
for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'):
|
||||
sp_region_names.append(option.get('value'))
|
||||
|
||||
for engine_region_tag in sp_region_names:
|
||||
if engine_region_tag == 'all':
|
||||
# 'all' does not fit to a babel locale
|
||||
continue
|
||||
|
||||
locale = None
|
||||
babel_region_tag = {'no_NO': 'nb_NO'}.get(engine_region_tag, engine_region_tag) # norway
|
||||
|
||||
if '-' in babel_region_tag:
|
||||
# pt-XY_BR --> l=pt, r=BR --> pt-BR
|
||||
l, r = babel_region_tag.split('-')
|
||||
r = r.split('_')[-1]
|
||||
locale = babel.Locale.parse(l + '_' + r, sep='_')
|
||||
else:
|
||||
try:
|
||||
locale = babel.Locale.parse(babel_region_tag, sep='_')
|
||||
except babel.core.UnknownLocaleError:
|
||||
print("ERROR: can't determine babel locale of startpage's locale %s" % engine_region_tag)
|
||||
continue
|
||||
|
||||
if locale is None:
|
||||
continue
|
||||
|
||||
region_tag = locale.language + '-' + locale.territory
|
||||
# print("SearXNG locale tag: %s --> Engine tag: %s" % (region_tag, engine_region_tag))
|
||||
engine_locales.regions[region_tag] = engine_region_tag
|
||||
|
||||
# languages
|
||||
|
||||
catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()}
|
||||
|
||||
# get the native name of every language known by babel
|
||||
for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()):
|
||||
native_name = Locale(lang_code).get_language_name().lower()
|
||||
|
||||
for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
|
||||
native_name = babel.Locale(lang_code).get_language_name().lower()
|
||||
# add native name exactly as it is
|
||||
language_names[native_name] = lang_code
|
||||
catalog_engine2code[native_name] = lang_code
|
||||
|
||||
# add "normalized" language name (i.e. français becomes francais and español becomes espanol)
|
||||
unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name)))
|
||||
if len(unaccented_name) == len(unaccented_name.encode()):
|
||||
# add only if result is ascii (otherwise "normalization" didn't work)
|
||||
language_names[unaccented_name] = lang_code
|
||||
catalog_engine2code[unaccented_name] = lang_code
|
||||
|
||||
# values that can't be determined by babel's languages names
|
||||
|
||||
catalog_engine2code.update(
|
||||
{
|
||||
'english_uk': 'en',
|
||||
# traditional chinese used in ..
|
||||
'fantizhengwen': 'zh_Hant',
|
||||
# Korean alphabet
|
||||
'hangul': 'ko',
|
||||
# Malayalam is one of 22 scheduled languages of India.
|
||||
'malayam': 'ml',
|
||||
'norsk': 'nb',
|
||||
'sinhalese': 'si',
|
||||
}
|
||||
)
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
sp_lang_names = []
|
||||
for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'):
|
||||
sp_lang_names.append((option.get('value'), extract_text(option).lower()))
|
||||
engine_lang = option.get('value')
|
||||
name = extract_text(option).lower()
|
||||
|
||||
supported_languages = {}
|
||||
for sp_option_value, sp_option_text in sp_lang_names:
|
||||
lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text)
|
||||
if isinstance(lang_code, str):
|
||||
supported_languages[lang_code] = {'alias': sp_option_value}
|
||||
elif isinstance(lang_code, list):
|
||||
for _lc in lang_code:
|
||||
supported_languages[_lc] = {'alias': sp_option_value}
|
||||
else:
|
||||
print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text))
|
||||
lang_code = catalog_engine2code.get(engine_lang)
|
||||
if lang_code is None:
|
||||
lang_code = catalog_engine2code[name]
|
||||
|
||||
return supported_languages
|
||||
# print("SearXNG language tag: %s --> Engine tag: %s" % (lang_code, engine_lang))
|
||||
engine_locales.languages[lang_code] = engine_lang
|
||||
|
||||
return engine_locales
|
||||
|
@ -32,7 +32,6 @@ about = {
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
language_support = False
|
||||
time_range_support = False
|
||||
safesearch = False
|
||||
paging = True
|
||||
|
@ -20,7 +20,6 @@ about = {
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music']
|
||||
paging = True
|
||||
language_support = False
|
||||
time_range_support = True
|
||||
|
||||
# search-url
|
||||
|
@ -1,6 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# list of language codes
|
||||
# this file is generated automatically by utils/fetch_languages.py
|
||||
# this file is generated automatically by:
|
||||
#
|
||||
# ./manage pyenv.cmd searxng_extra/update/update_languages.py
|
||||
language_codes = (
|
||||
('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
|
||||
('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
|
||||
|
@ -4,7 +4,7 @@
|
||||
"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
|
||||
"""
|
||||
|
||||
from typing import Set
|
||||
from typing import Set, Optional, Dict
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
@ -24,11 +24,8 @@ logger = logger.getChild('locales')
|
||||
_flask_babel_get_translations = flask_babel.get_translations
|
||||
|
||||
LOCALE_NAMES = {}
|
||||
"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see
|
||||
:py:obj:`locales_initialize`).
|
||||
|
||||
:meta hide-value:
|
||||
"""
|
||||
"""Mapping of locales and their description. Locales e.g. ``fr`` or ``pt-BR``
|
||||
(see :py:obj:`locales_initialize`)."""
|
||||
|
||||
RTL_LOCALES: Set[str] = set()
|
||||
"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
|
||||
@ -157,13 +154,17 @@ def locales_initialize(directory=None):
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
|
||||
def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||
def get_engine_locale(
|
||||
searxng_locale: str, engine_locales: Dict[str, str], default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Return engine's language (aka locale) string that best fits to argument
|
||||
``searxng_locale``.
|
||||
|
||||
Argument ``engine_locales`` is a python dict that maps *SearXNG locales* to
|
||||
corresponding *engine locales*::
|
||||
|
||||
.. code:: python
|
||||
|
||||
<engine>: {
|
||||
# SearXNG string : engine-string
|
||||
'ca-ES' : 'ca_ES',
|
||||
|
@ -307,7 +307,7 @@
|
||||
<th class="engine_checkbox">{{ _("Allow") }}</th>{{- "" -}}
|
||||
<th class="name">{{ _("Engine name") }}</th>{{- "" -}}
|
||||
<th class="shortcut">{{ _("Shortcut") }}</th>{{- "" -}}
|
||||
<th>{{ _("Supports selected language") }}</th>{{- "" -}}
|
||||
<th>{{ _("Language / Region") }}</th>{{- "" -}}
|
||||
<th>{{ _("SafeSearch") }}</th>{{- "" -}}
|
||||
<th>{{ _("Time range") }}</th>{{- "" -}}
|
||||
{%- if enable_metrics %}<th>{{ _("Response time") }}</th>{% endif -%}
|
||||
@ -333,7 +333,7 @@
|
||||
{{- engine_about(search_engine) -}}
|
||||
</th>{{- "" -}}
|
||||
<td class="shortcut">{{ shortcuts[search_engine.name] }}</td>{{- "" -}}
|
||||
<td>{{ checkbox(None, supports[search_engine.name]['supports_selected_language'], true) }}</td>{{- "" -}}
|
||||
<td>{{ checkbox(None, supports[search_engine.name]['language_support'], true) }}</td>{{- "" -}}
|
||||
<td>{{ checkbox(None, supports[search_engine.name]['safesearch'], true) }}</td>{{- "" -}}
|
||||
<td>{{ checkbox(None, supports[search_engine.name]['time_range_support'], true) }}</td>{{- "" -}}
|
||||
{%- if enable_metrics %}{{- engine_time(search_engine.name) -}}{% endif -%}
|
||||
|
@ -990,7 +990,6 @@ def preferences():
|
||||
'rate80': rate80,
|
||||
'rate95': rate95,
|
||||
'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
|
||||
'supports_selected_language': _is_selected_language_supported(e, request.preferences),
|
||||
'result_count': result_count,
|
||||
}
|
||||
# end of stats
|
||||
@ -1041,18 +1040,18 @@ def preferences():
|
||||
# supports
|
||||
supports = {}
|
||||
for _, e in filtered_engines.items():
|
||||
supports_selected_language = _is_selected_language_supported(e, request.preferences)
|
||||
language_support = e.language_support
|
||||
safesearch = e.safesearch
|
||||
time_range_support = e.time_range_support
|
||||
for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
|
||||
if supports_selected_language and checker_test_name.startswith('lang_'):
|
||||
supports_selected_language = '?'
|
||||
if language_support and checker_test_name.startswith('lang_'):
|
||||
language_support = '?'
|
||||
elif safesearch and checker_test_name == 'safesearch':
|
||||
safesearch = '?'
|
||||
elif time_range_support and checker_test_name == 'time_range':
|
||||
time_range_support = '?'
|
||||
supports[e.name] = {
|
||||
'supports_selected_language': supports_selected_language,
|
||||
'language_support': language_support,
|
||||
'safesearch': safesearch,
|
||||
'time_range_support': time_range_support,
|
||||
}
|
||||
@ -1088,16 +1087,6 @@ def preferences():
|
||||
)
|
||||
|
||||
|
||||
def _is_selected_language_supported(engine, preferences: Preferences): # pylint: disable=redefined-outer-name
|
||||
language = preferences.get_value('language')
|
||||
if language == 'all':
|
||||
return True
|
||||
x = match_language(
|
||||
language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None
|
||||
)
|
||||
return bool(x)
|
||||
|
||||
|
||||
@app.route('/image_proxy', methods=['GET'])
|
||||
def image_proxy():
|
||||
# pylint: disable=too-many-return-statements, too-many-branches
|
||||
@ -1316,10 +1305,6 @@ def config():
|
||||
if not request.preferences.validate_token(engine):
|
||||
continue
|
||||
|
||||
supported_languages = engine.supported_languages
|
||||
if isinstance(engine.supported_languages, dict):
|
||||
supported_languages = list(engine.supported_languages.keys())
|
||||
|
||||
_engines.append(
|
||||
{
|
||||
'name': name,
|
||||
@ -1328,7 +1313,6 @@ def config():
|
||||
'enabled': not engine.disabled,
|
||||
'paging': engine.paging,
|
||||
'language_support': engine.language_support,
|
||||
'supported_languages': supported_languages,
|
||||
'safesearch': engine.safesearch,
|
||||
'time_range_support': engine.time_range_support,
|
||||
'timeout': engine.timeout,
|
||||
|
500
searxng_extra/update/update_engine_locales.py
Executable file
500
searxng_extra/update/update_engine_locales.py
Executable file
@ -0,0 +1,500 @@
|
||||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
# pyright: basic
|
||||
"""This script fetches engine data from engines `engine_data_url`` and updates:
|
||||
|
||||
- :py:obj:`write_languages_file` updates :origin:`searx/languages.py`
|
||||
- :py:obj:`fetch_engine_data` updates :origin:`searx/data/engines_datas.json`
|
||||
|
||||
This script is triggered by CI in job :origin:`updateData
|
||||
<.github/workflows/data-update.yml>`.
|
||||
"""
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
import json
|
||||
from unicodedata import lookup
|
||||
from pprint import pformat
|
||||
from pathlib import Path
|
||||
from typing import Dict, Generator, List, Set, Tuple, Union, Optional
|
||||
from typing_extensions import TypedDict, NotRequired
|
||||
|
||||
from babel import Locale, UnknownLocaleError
|
||||
from babel.languages import get_global # type: ignore
|
||||
from babel.core import parse_locale
|
||||
|
||||
from searx import settings, searx_dir
|
||||
from searx import network
|
||||
from searx.data import data_dir
|
||||
from searx.engines import (
|
||||
load_engines,
|
||||
engines,
|
||||
EngineLocales,
|
||||
)
|
||||
from searx.utils import gen_useragent
|
||||
|
||||
|
||||
class EngineLanguageDescDict(TypedDict):
|
||||
"""In data/engines_languages.json, for google, wikipedia and wikidata engines:
|
||||
value of the dictionnaries"""
|
||||
|
||||
name: str
|
||||
english_name: NotRequired[str]
|
||||
|
||||
|
||||
EngineLanguageDesc = Union[List[str], Dict[str, EngineLanguageDescDict]]
|
||||
"""In data/engines_languages.json, type for a engine:
|
||||
|
||||
* either it is a list
|
||||
* or a dictionnary"""
|
||||
|
||||
EngineLanguageDict = Dict[str, EngineLanguageDesc]
|
||||
"""Type description for data/engines_languages.json"""
|
||||
|
||||
EngineLocalesDict = Dict[str, EngineLocales]
|
||||
"""Type description for data/engine_data.json"""
|
||||
|
||||
|
||||
def fetch_engine_locales() -> Tuple[EngineLocalesDict, EngineLanguageDict]:
|
||||
"""Fetch :class:`EngineData` for each engine and persist JSON in file.
|
||||
|
||||
The script checks all engines about a function::
|
||||
|
||||
def _fetch_engine_data(resp, engine_data):
|
||||
...
|
||||
|
||||
and a variable named ``engine_locales_url``. The HTTP GET response of
|
||||
``engine_locales_url`` is passed to the ``_fetch_engine_data`` function including a
|
||||
instance of :py:obj:`searx.engines.EngineData`.
|
||||
|
||||
.. hint::
|
||||
|
||||
This implementation is backward compatible and supports the (depricated)
|
||||
``_fetch_supported_languages`` interface.
|
||||
|
||||
On the long term the depricated implementations in the engines will be
|
||||
replaced by ``_fetch_engine_data``."""
|
||||
|
||||
network.set_timeout_for_thread(10.0)
|
||||
engine_locales_dict: EngineLocalesDict = {}
|
||||
engines_languages: EngineLanguageDict = {}
|
||||
names = list(engines)
|
||||
names.sort()
|
||||
|
||||
# The headers has been moved here from commit 9b6ffed06: Some engines (at
|
||||
# least bing and startpage) return a different result list of supported
|
||||
# languages depending on the IP location where the HTTP request comes from.
|
||||
# The IP based results (from bing) can be avoided by setting a
|
||||
# 'Accept-Language' in the HTTP request.
|
||||
|
||||
headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||
}
|
||||
|
||||
for engine_name in names:
|
||||
engine = engines[engine_name]
|
||||
|
||||
fetch_locales = getattr(engine, '_fetch_engine_locales', None)
|
||||
# deprecated: _fetch_supported_languages
|
||||
fetch_languages = getattr(engine, '_fetch_supported_languages', None)
|
||||
|
||||
if fetch_locales is not None:
|
||||
resp = network.get(engine.engine_locales_url, headers=headers) # type: ignore
|
||||
engine_data = EngineLocales()
|
||||
fetch_locales(resp, engine_data)
|
||||
engine_locales_dict[engine_name] = engine_data
|
||||
print(
|
||||
"%-20s: %3s language(s), %3s region(s)"
|
||||
% (engine_name, len(engine_data.languages), len(engine_data.regions))
|
||||
)
|
||||
elif fetch_languages is not None:
|
||||
print(engine_name)
|
||||
resp = network.get(engine.supported_languages_url, headers=headers) # type: ignore
|
||||
engines_languages[engine_name] = fetch_languages(resp)
|
||||
print(
|
||||
"%-20s: %3s languages using deprecated _fetch_supported_languages"
|
||||
% (engine_name, len(engines_languages[engine_name]))
|
||||
)
|
||||
if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
|
||||
engines_languages[engine_name] = sorted(engines_languages[engine_name])
|
||||
|
||||
return engine_locales_dict, engines_languages
|
||||
|
||||
|
||||
# Get babel Locale object from lang_code if possible.
|
||||
def get_locale(lang_code: str) -> Optional[Locale]:
|
||||
try:
|
||||
locale = Locale.parse(lang_code, sep='-')
|
||||
return locale
|
||||
except (UnknownLocaleError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
lang2emoji = {
|
||||
'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
|
||||
'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
|
||||
'jp': '\U0001F1EF\U0001F1F5', # Japanese
|
||||
'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
|
||||
'he': '\U0001F1EE\U0001F1F7', # Hebrew
|
||||
}
|
||||
|
||||
|
||||
def get_unicode_flag(lang_code: str) -> Optional[str]:
|
||||
"""Determine a unicode flag (emoji) that fits to the ``lang_code``"""
|
||||
|
||||
emoji = lang2emoji.get(lang_code.lower())
|
||||
if emoji:
|
||||
return emoji
|
||||
|
||||
if len(lang_code) == 2:
|
||||
return '\U0001F310'
|
||||
|
||||
language = territory = script = variant = ''
|
||||
try:
|
||||
language, territory, script, variant = parse_locale(lang_code, '-')
|
||||
except ValueError as exc:
|
||||
print(exc)
|
||||
|
||||
# https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
||||
if not territory:
|
||||
# https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
|
||||
emoji = lang2emoji.get(language)
|
||||
if not emoji:
|
||||
print(
|
||||
"%s --> language: %s / territory: %s / script: %s / variant: %s"
|
||||
% (lang_code, language, territory, script, variant)
|
||||
)
|
||||
return emoji
|
||||
|
||||
emoji = lang2emoji.get(territory.lower())
|
||||
if emoji:
|
||||
return emoji
|
||||
|
||||
try:
|
||||
c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
|
||||
c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
|
||||
# print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
|
||||
except KeyError as exc:
|
||||
print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
|
||||
return None
|
||||
|
||||
return c1 + c2
|
||||
|
||||
|
||||
def get_territory_name(lang_code: str) -> Optional[str]:
|
||||
country_name = None
|
||||
locale = get_locale(lang_code)
|
||||
try:
|
||||
if locale is not None:
|
||||
country_name = locale.get_territory_name()
|
||||
except FileNotFoundError as exc:
|
||||
print("ERROR: %s --> %s" % (locale, exc))
|
||||
return country_name
|
||||
|
||||
|
||||
def iter_engine_codes(
|
||||
engine_data_dict: EngineLocalesDict, engines_languages: EngineLanguageDict
|
||||
) -> Generator[Tuple[str, List[str]], None, None]:
|
||||
"""Iterator returning tuples:
|
||||
|
||||
- first element is the engine name
|
||||
- second element is a list of language code (the one from the engines)
|
||||
|
||||
The function iterates first on the engine from engine_data_dict,
|
||||
then it iterates over the engine from engines_languages.
|
||||
"""
|
||||
for engine_name in engine_data_dict:
|
||||
engine = engines[engine_name]
|
||||
engine_data = engine_data_dict[engine_name]
|
||||
|
||||
# items of type 'engine_data' do have regions & languages, the list
|
||||
# of engine_codes should contain both.
|
||||
|
||||
engine_codes = list(engine_data.regions.keys())
|
||||
engine_codes.extend(engine_data.languages.keys())
|
||||
yield engine_name, engine_codes
|
||||
|
||||
for engine_name, engine_languages in engines_languages.items():
|
||||
engine = engines[engine_name]
|
||||
language_aliases_values = getattr(engine, 'language_aliases', {}).values()
|
||||
engine_codes: List[str] = []
|
||||
for lang_code in engine_languages:
|
||||
if lang_code in language_aliases_values:
|
||||
# pylint: disable=stop-iteration-return
|
||||
# we are sure that next(...) won't raise a StopIteration exception
|
||||
# because of the "if" statement just above
|
||||
lang_code = next(lc for lc, alias in engine.language_aliases.items() if lang_code == alias)
|
||||
# pylint: enable=stop-iteration-return
|
||||
engine_codes.append(lang_code)
|
||||
yield engine_name, engine_codes
|
||||
|
||||
|
||||
class CountryInfo(TypedDict):
|
||||
"""Country name with a set of engine names.
|
||||
Use exclusivly in JoinLanguageResult"""
|
||||
|
||||
country_name: str
|
||||
"""Name of the country"""
|
||||
|
||||
engine_names: Set[str]
|
||||
"""Engine names which use the language & country"""
|
||||
|
||||
|
||||
class JoinLanguageResult(TypedDict):
|
||||
"""Result of join_language_lists"""
|
||||
|
||||
name: Optional[str]
|
||||
"""Native name of the language"""
|
||||
|
||||
english_name: Optional[str]
|
||||
"""English name of the language"""
|
||||
|
||||
engine_names: Set
|
||||
"""Engine names which use this language"""
|
||||
|
||||
countries: Dict[str, CountryInfo]
|
||||
"""Possible country codes for this language"""
|
||||
|
||||
|
||||
def join_language_lists(
|
||||
engine_data_dict: EngineLocalesDict, engines_languages: EngineLanguageDict
|
||||
) -> Dict[str, JoinLanguageResult]:
|
||||
"""Join all languages of the engines into one list. The returned language list
|
||||
contains language codes (``zh``) and region codes (``zh-TW``). The codes can
|
||||
be parsed by babel::
|
||||
|
||||
babel.Locale.parse(language_list[n])
|
||||
|
||||
"""
|
||||
language_list: Dict[str, JoinLanguageResult] = {}
|
||||
name_from_babel = set()
|
||||
name_from_wikipedia = set()
|
||||
name_not_found = set()
|
||||
|
||||
for engine_name, engine_codes in iter_engine_codes(engine_data_dict, engines_languages):
|
||||
for lang_code in engine_codes:
|
||||
|
||||
locale = get_locale(lang_code)
|
||||
|
||||
# ensure that lang_code uses standard language and country codes
|
||||
if locale and locale.territory:
|
||||
lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
|
||||
short_code = lang_code.split('-')[0]
|
||||
|
||||
# add language without country if not in list
|
||||
if short_code not in language_list:
|
||||
if locale:
|
||||
# get language's data from babel's Locale object
|
||||
language_name = locale.get_language_name().title()
|
||||
english_name = locale.english_name.split(' (')[0]
|
||||
name_from_babel.add(short_code)
|
||||
elif short_code in engines_languages['wikipedia'] and isinstance(engines_languages['wikipedia'], dict):
|
||||
# get language's data from wikipedia if not known by babel
|
||||
language_name = engines_languages['wikipedia'][short_code]['name']
|
||||
english_name = engines_languages['wikipedia'][short_code].get('english_name')
|
||||
name_from_wikipedia.add(short_code)
|
||||
else:
|
||||
language_name = None
|
||||
english_name = None
|
||||
name_not_found.add(short_code)
|
||||
|
||||
# add language to list
|
||||
language_list[short_code] = {
|
||||
'name': language_name,
|
||||
'english_name': english_name,
|
||||
'engine_names': set(),
|
||||
'countries': {},
|
||||
}
|
||||
|
||||
# add language with country if not in list
|
||||
if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
|
||||
country_name = ''
|
||||
if locale:
|
||||
# get country name from babel's Locale object
|
||||
try:
|
||||
country_name = locale.get_territory_name()
|
||||
except FileNotFoundError as exc:
|
||||
print("ERROR: %s --> %s" % (locale, exc))
|
||||
locale = None
|
||||
|
||||
language_list[short_code]['countries'][lang_code] = {
|
||||
'country_name': country_name,
|
||||
'engine_names': set(),
|
||||
}
|
||||
|
||||
# count engine for both language_country combination and language alone
|
||||
language_list[short_code]['engine_names'].add(engine_name)
|
||||
if lang_code != short_code:
|
||||
language_list[short_code]['countries'][lang_code]['engine_names'].add(engine_name)
|
||||
|
||||
def set_to_list(engine_name_set: Set) -> str:
|
||||
return ', '.join(sorted(list(engine_name_set)))
|
||||
|
||||
print('')
|
||||
print('%s name(s) found with Babel: %s\n' % (len(name_from_babel), set_to_list(name_from_babel)))
|
||||
print('%s name(s) found with Wikipedia: %s\n' % (len(name_from_wikipedia), set_to_list(name_from_wikipedia)))
|
||||
print('%s name(s) not found: %s\n' % (len(name_not_found), set_to_list(name_not_found)))
|
||||
|
||||
return language_list
|
||||
|
||||
|
||||
class LanguageCountryName(TypedDict):
|
||||
"""filter_language_list returns a dictionnary:
|
||||
* the key are the language code
|
||||
* the value is described in this type
|
||||
"""
|
||||
|
||||
name: Optional[str]
|
||||
english_name: Optional[str]
|
||||
country_name: NotRequired[str]
|
||||
|
||||
|
||||
def filter_language_list(all_languages: Dict[str, JoinLanguageResult]) -> Dict[str, LanguageCountryName]:
|
||||
"""Filter language list so it only includes the most supported languages and
|
||||
countries.
|
||||
"""
|
||||
min_engines_per_lang = 12
|
||||
min_engines_per_country = 7
|
||||
main_engines = [
|
||||
engine_name
|
||||
for engine_name, engine in engines.items()
|
||||
if 'general' in engine.categories
|
||||
and hasattr(engine, 'supported_languages')
|
||||
and engine.supported_languages
|
||||
and not engine.disabled
|
||||
]
|
||||
|
||||
# filter list to include only languages supported by most engines or all default general engines
|
||||
filtered_languages = {
|
||||
code: join_result
|
||||
for code, join_result in all_languages.items()
|
||||
if (
|
||||
len(join_result['engine_names']) >= min_engines_per_lang
|
||||
or all(main_engine in join_result['engine_names'] for main_engine in main_engines)
|
||||
)
|
||||
}
|
||||
|
||||
def _new_language_country_name(lang: str, country_name: Optional[str]) -> LanguageCountryName:
|
||||
new_dict: LanguageCountryName = {
|
||||
'name': all_languages[lang]['name'],
|
||||
'english_name': all_languages[lang]['english_name'],
|
||||
}
|
||||
if country_name:
|
||||
new_dict['country_name'] = country_name
|
||||
return new_dict
|
||||
|
||||
# for each language get country codes supported by most engines or at least one country code
|
||||
filtered_languages_with_countries: Dict[str, LanguageCountryName] = {}
|
||||
for lang, lang_data in filtered_languages.items():
|
||||
countries = lang_data['countries']
|
||||
filtered_countries: Dict[str, LanguageCountryName] = {}
|
||||
|
||||
# get language's country codes with enough supported engines
|
||||
for lang_country, country_data in countries.items():
|
||||
if len(country_data['engine_names']) >= min_engines_per_country:
|
||||
filtered_countries[lang_country] = _new_language_country_name(lang, country_data['country_name'])
|
||||
|
||||
# add language without countries too if there's more than one country to choose from
|
||||
if len(filtered_countries) > 1:
|
||||
filtered_countries[lang] = _new_language_country_name(lang, None)
|
||||
elif len(filtered_countries) == 1:
|
||||
lang_country = next(iter(filtered_countries))
|
||||
|
||||
# if no country has enough engines try to get most likely country code from babel
|
||||
if not filtered_countries:
|
||||
lang_country = None
|
||||
subtags = get_global('likely_subtags').get(lang)
|
||||
if subtags:
|
||||
country_code = subtags.split('_')[-1]
|
||||
if len(country_code) == 2:
|
||||
lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
|
||||
|
||||
if lang_country:
|
||||
filtered_countries[lang_country] = _new_language_country_name(lang, None)
|
||||
else:
|
||||
filtered_countries[lang] = _new_language_country_name(lang, None)
|
||||
|
||||
filtered_languages_with_countries.update(filtered_countries)
|
||||
|
||||
return filtered_languages_with_countries
|
||||
|
||||
|
||||
def write_engine_data(file_name, engine_data_dict: EngineLocalesDict):
|
||||
raw = {
|
||||
engine_name: {
|
||||
'regions': engine_data.regions,
|
||||
'languages': engine_data.languages,
|
||||
}
|
||||
for engine_name, engine_data in engine_data_dict.items()
|
||||
}
|
||||
with open(file_name, 'w', encoding='utf-8') as f:
|
||||
json.dump(raw, f, indent=2, sort_keys=True)
|
||||
|
||||
|
||||
def write_engines_languages(file_name, engines_languages: EngineLanguageDict):
|
||||
# write json file
|
||||
with open(file_name, 'w', encoding='utf-8') as f:
|
||||
json.dump(engines_languages, f, indent=2, sort_keys=True)
|
||||
|
||||
|
||||
class UnicodeEscape(str):
|
||||
"""Escape unicode string in :py:obj:`pprint.pformat`"""
|
||||
|
||||
def __repr__(self):
|
||||
return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
|
||||
|
||||
|
||||
# Write languages.py.
|
||||
def write_languages_file(language_file, languages: Dict[str, LanguageCountryName]):
|
||||
"""Generates :origin:`searx/languages.py`."""
|
||||
|
||||
file_headers = (
|
||||
"# -*- coding: utf-8 -*-",
|
||||
"# list of language codes",
|
||||
"# this file is generated automatically by:",
|
||||
"#",
|
||||
"# ./manage pyenv.cmd searxng_extra/update/update_languages.py",
|
||||
"language_codes = (\n",
|
||||
)
|
||||
|
||||
language_codes = []
|
||||
|
||||
for code in sorted(languages):
|
||||
|
||||
name = languages[code]['name']
|
||||
if name is None:
|
||||
print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
|
||||
continue
|
||||
|
||||
flag = get_unicode_flag(code) or ''
|
||||
item = (
|
||||
code,
|
||||
name.split(' (')[0],
|
||||
get_territory_name(code) or '',
|
||||
languages[code].get('english_name') or '',
|
||||
UnicodeEscape(flag),
|
||||
)
|
||||
|
||||
language_codes.append(item)
|
||||
|
||||
language_codes = tuple(language_codes)
|
||||
|
||||
with open(language_file, 'w', encoding='utf-8') as new_file:
|
||||
file_content = "{file_headers} {language_codes},\n)\n".format(
|
||||
# fmt: off
|
||||
file_headers = '\n'.join(file_headers),
|
||||
language_codes = pformat(language_codes, indent=4)[1:-1]
|
||||
# fmt: on
|
||||
)
|
||||
new_file.write(file_content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_engines(settings['engines'])
|
||||
_engine_locales_dict, _engines_languages = fetch_engine_locales()
|
||||
_all_languages = join_language_lists(_engine_locales_dict, _engines_languages)
|
||||
_filtered_languages = filter_language_list(_all_languages)
|
||||
write_engine_data(data_dir / 'engine_locales.json', _engine_locales_dict)
|
||||
write_engines_languages(data_dir / 'engines_languages.json', _engines_languages)
|
||||
write_languages_file(Path(searx_dir) / 'languages.py', _filtered_languages)
|
@ -1,313 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# lint: pylint
|
||||
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""This script generates languages.py from intersecting each engine's supported
|
||||
languages.
|
||||
|
||||
Output files: :origin:`searx/data/engines_languages.json` and
|
||||
:origin:`searx/languages.py` (:origin:`CI Update data ...
|
||||
<.github/workflows/data-update.yml>`).
|
||||
|
||||
"""
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
from unicodedata import lookup
|
||||
import json
|
||||
from pathlib import Path
|
||||
from pprint import pformat
|
||||
from babel import Locale, UnknownLocaleError
|
||||
from babel.languages import get_global
|
||||
from babel.core import parse_locale
|
||||
|
||||
from searx import settings, searx_dir
|
||||
from searx.engines import load_engines, engines
|
||||
from searx.network import set_timeout_for_thread
|
||||
|
||||
# Output files.
|
||||
engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
|
||||
languages_file = Path(searx_dir) / 'languages.py'
|
||||
|
||||
|
||||
# Fetches supported languages for each engine and writes json file with those.
|
||||
def fetch_supported_languages():
|
||||
set_timeout_for_thread(10.0)
|
||||
|
||||
engines_languages = {}
|
||||
names = list(engines)
|
||||
names.sort()
|
||||
|
||||
for engine_name in names:
|
||||
if hasattr(engines[engine_name], 'fetch_supported_languages'):
|
||||
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
|
||||
print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
|
||||
if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
|
||||
engines_languages[engine_name] = sorted(engines_languages[engine_name])
|
||||
|
||||
print("fetched languages from %s engines" % len(engines_languages))
|
||||
|
||||
# write json file
|
||||
with open(engines_languages_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(engines_languages, f, indent=2, sort_keys=True)
|
||||
|
||||
return engines_languages
|
||||
|
||||
|
||||
# Get babel Locale object from lang_code if possible.
|
||||
def get_locale(lang_code):
|
||||
try:
|
||||
locale = Locale.parse(lang_code, sep='-')
|
||||
return locale
|
||||
except (UnknownLocaleError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
lang2emoji = {
|
||||
'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
|
||||
'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
|
||||
'jp': '\U0001F1EF\U0001F1F5', # Japanese
|
||||
'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
|
||||
'he': '\U0001F1EE\U0001F1F7', # Hebrew
|
||||
}
|
||||
|
||||
|
||||
def get_unicode_flag(lang_code):
|
||||
"""Determine a unicode flag (emoji) that fits to the ``lang_code``"""
|
||||
|
||||
emoji = lang2emoji.get(lang_code.lower())
|
||||
if emoji:
|
||||
return emoji
|
||||
|
||||
if len(lang_code) == 2:
|
||||
return '\U0001F310'
|
||||
|
||||
language = territory = script = variant = ''
|
||||
try:
|
||||
language, territory, script, variant = parse_locale(lang_code, '-')
|
||||
except ValueError as exc:
|
||||
print(exc)
|
||||
|
||||
# https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
||||
if not territory:
|
||||
# https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
|
||||
emoji = lang2emoji.get(language)
|
||||
if not emoji:
|
||||
print(
|
||||
"%s --> language: %s / territory: %s / script: %s / variant: %s"
|
||||
% (lang_code, language, territory, script, variant)
|
||||
)
|
||||
return emoji
|
||||
|
||||
emoji = lang2emoji.get(territory.lower())
|
||||
if emoji:
|
||||
return emoji
|
||||
|
||||
try:
|
||||
c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
|
||||
c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
|
||||
# print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
|
||||
except KeyError as exc:
|
||||
print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
|
||||
return None
|
||||
|
||||
return c1 + c2
|
||||
|
||||
|
||||
def get_territory_name(lang_code):
|
||||
country_name = None
|
||||
locale = get_locale(lang_code)
|
||||
try:
|
||||
if locale is not None:
|
||||
country_name = locale.get_territory_name()
|
||||
except FileNotFoundError as exc:
|
||||
print("ERROR: %s --> %s" % (locale, exc))
|
||||
return country_name
|
||||
|
||||
|
||||
# Join all language lists.
|
||||
def join_language_lists(engines_languages):
|
||||
language_list = {}
|
||||
for engine_name in engines_languages:
|
||||
for lang_code in engines_languages[engine_name]:
|
||||
|
||||
# apply custom fixes if necessary
|
||||
if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
|
||||
lang_code = next(
|
||||
lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias
|
||||
)
|
||||
|
||||
locale = get_locale(lang_code)
|
||||
|
||||
# ensure that lang_code uses standard language and country codes
|
||||
if locale and locale.territory:
|
||||
lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
|
||||
short_code = lang_code.split('-')[0]
|
||||
|
||||
# add language without country if not in list
|
||||
if short_code not in language_list:
|
||||
if locale:
|
||||
# get language's data from babel's Locale object
|
||||
language_name = locale.get_language_name().title()
|
||||
english_name = locale.english_name.split(' (')[0]
|
||||
elif short_code in engines_languages['wikipedia']:
|
||||
# get language's data from wikipedia if not known by babel
|
||||
language_name = engines_languages['wikipedia'][short_code]['name']
|
||||
english_name = engines_languages['wikipedia'][short_code]['english_name']
|
||||
else:
|
||||
language_name = None
|
||||
english_name = None
|
||||
|
||||
# add language to list
|
||||
language_list[short_code] = {
|
||||
'name': language_name,
|
||||
'english_name': english_name,
|
||||
'counter': set(),
|
||||
'countries': {},
|
||||
}
|
||||
|
||||
# add language with country if not in list
|
||||
if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
|
||||
country_name = ''
|
||||
if locale:
|
||||
# get country name from babel's Locale object
|
||||
try:
|
||||
country_name = locale.get_territory_name()
|
||||
except FileNotFoundError as exc:
|
||||
print("ERROR: %s --> %s" % (locale, exc))
|
||||
locale = None
|
||||
|
||||
language_list[short_code]['countries'][lang_code] = {
|
||||
'country_name': country_name,
|
||||
'counter': set(),
|
||||
}
|
||||
|
||||
# count engine for both language_country combination and language alone
|
||||
language_list[short_code]['counter'].add(engine_name)
|
||||
if lang_code != short_code:
|
||||
language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
|
||||
|
||||
return language_list
|
||||
|
||||
|
||||
# Filter language list so it only includes the most supported languages and countries
|
||||
def filter_language_list(all_languages):
|
||||
min_engines_per_lang = 12
|
||||
min_engines_per_country = 7
|
||||
# pylint: disable=consider-using-dict-items, consider-iterating-dictionary
|
||||
main_engines = [
|
||||
engine_name
|
||||
for engine_name in engines.keys()
|
||||
if 'general' in engines[engine_name].categories
|
||||
and engines[engine_name].supported_languages
|
||||
and not engines[engine_name].disabled
|
||||
]
|
||||
|
||||
# filter list to include only languages supported by most engines or all default general engines
|
||||
filtered_languages = {
|
||||
code: lang
|
||||
for code, lang in all_languages.items()
|
||||
if (
|
||||
len(lang['counter']) >= min_engines_per_lang
|
||||
or all(main_engine in lang['counter'] for main_engine in main_engines)
|
||||
)
|
||||
}
|
||||
|
||||
def _copy_lang_data(lang, country_name=None):
|
||||
new_dict = {}
|
||||
new_dict['name'] = all_languages[lang]['name']
|
||||
new_dict['english_name'] = all_languages[lang]['english_name']
|
||||
if country_name:
|
||||
new_dict['country_name'] = country_name
|
||||
return new_dict
|
||||
|
||||
# for each language get country codes supported by most engines or at least one country code
|
||||
filtered_languages_with_countries = {}
|
||||
for lang, lang_data in filtered_languages.items():
|
||||
countries = lang_data['countries']
|
||||
filtered_countries = {}
|
||||
|
||||
# get language's country codes with enough supported engines
|
||||
for lang_country, country_data in countries.items():
|
||||
if len(country_data['counter']) >= min_engines_per_country:
|
||||
filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
|
||||
|
||||
# add language without countries too if there's more than one country to choose from
|
||||
if len(filtered_countries) > 1:
|
||||
filtered_countries[lang] = _copy_lang_data(lang, None)
|
||||
elif len(filtered_countries) == 1:
|
||||
lang_country = next(iter(filtered_countries))
|
||||
|
||||
# if no country has enough engines try to get most likely country code from babel
|
||||
if not filtered_countries:
|
||||
lang_country = None
|
||||
subtags = get_global('likely_subtags').get(lang)
|
||||
if subtags:
|
||||
country_code = subtags.split('_')[-1]
|
||||
if len(country_code) == 2:
|
||||
lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
|
||||
|
||||
if lang_country:
|
||||
filtered_countries[lang_country] = _copy_lang_data(lang, None)
|
||||
else:
|
||||
filtered_countries[lang] = _copy_lang_data(lang, None)
|
||||
|
||||
filtered_languages_with_countries.update(filtered_countries)
|
||||
|
||||
return filtered_languages_with_countries
|
||||
|
||||
|
||||
class UnicodeEscape(str):
|
||||
"""Escape unicode string in :py:obj:`pprint.pformat`"""
|
||||
|
||||
def __repr__(self):
|
||||
return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
|
||||
|
||||
|
||||
# Write languages.py.
|
||||
def write_languages_file(languages):
|
||||
file_headers = (
|
||||
"# -*- coding: utf-8 -*-",
|
||||
"# list of language codes",
|
||||
"# this file is generated automatically by utils/fetch_languages.py",
|
||||
"language_codes = (\n",
|
||||
)
|
||||
|
||||
language_codes = []
|
||||
|
||||
for code in sorted(languages):
|
||||
|
||||
name = languages[code]['name']
|
||||
if name is None:
|
||||
print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
|
||||
continue
|
||||
|
||||
flag = get_unicode_flag(code) or ''
|
||||
item = (
|
||||
code,
|
||||
languages[code]['name'].split(' (')[0],
|
||||
get_territory_name(code) or '',
|
||||
languages[code].get('english_name') or '',
|
||||
UnicodeEscape(flag),
|
||||
)
|
||||
|
||||
language_codes.append(item)
|
||||
|
||||
language_codes = tuple(language_codes)
|
||||
|
||||
with open(languages_file, 'w', encoding='utf-8') as new_file:
|
||||
file_content = "{file_headers} {language_codes},\n)\n".format(
|
||||
# fmt: off
|
||||
file_headers = '\n'.join(file_headers),
|
||||
language_codes = pformat(language_codes, indent=4)[1:-1]
|
||||
# fmt: on
|
||||
)
|
||||
new_file.write(file_content)
|
||||
new_file.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_engines(settings['engines'])
|
||||
_engines_languages = fetch_supported_languages()
|
||||
_all_languages = join_language_lists(_engines_languages)
|
||||
_filtered_languages = filter_language_list(_all_languages)
|
||||
write_languages_file(_filtered_languages)
|
Loading…
x
Reference in New Issue
Block a user