Add engine locales (1/n)

2022-10-01 09:01:59 +00:00 · 2022-10-01 09:01:59 +00:00 · 52fe8111ea
commit 52fe8111ea
parent 1a5b096578
21 changed files with 1107 additions and 870 deletions
--- a/.github/workflows/data-update.yml
+++ b/.github/workflows/data-update.yml
@ -17,7 +17,7 @@ jobs:
          - update_currencies.py
          - update_external_bangs.py
          - update_firefox_version.py
-          - update_languages.py
+          - update_engine_locales.py
          - update_wikidata_units.py
          - update_engine_descriptions.py
    steps:
--- a/docs/admin/engines/configured_engines.rst
+++ b/docs/admin/engines/configured_engines.rst
@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table
        - Timeout
        - Weight
        - Paging
-        - Language
+        - Language, Region
        - Safe search
        - Time range

--- a/3
+++ b/3
@ -57,7 +57,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\
 I,C,R,\
 W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
 E1136"
-PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories"
+PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="engine_locales,supported_languages,language_aliases,logger,categories"
 PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"

 help() {
@ -698,6 +698,7 @@ test.pyright() {
        | grep -v '/engines/.*.py.* - warning: "logger" is not defined'\
        | grep -v '/plugins/.*.py.* - error: "logger" is not defined'\
        | grep -v '/engines/.*.py.* - warning: "supported_languages" is not defined' \
+        | grep -v '/engines/.*.py.* - warning: "engine_locales" is not defined' \
        | grep -v '/engines/.*.py.* - warning: "language_aliases" is not defined' \
        | grep -v '/engines/.*.py.* - warning: "categories" is not defined'
    dump_return $?
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@ -11,9 +11,10 @@ from lxml import etree
 from httpx import HTTPError

 from searx import settings
-from searx.data import ENGINES_LANGUAGES
+from searx.locales import get_engine_locale
 from searx.network import get as http_get
 from searx.exceptions import SearxEngineResponseException
+from searx.engines import engines

 # a fetch_supported_languages() for XPath engines isn't available right now
 # _brave = ENGINES_LANGUAGES['brave'].keys()
@ -110,9 +111,12 @@ def seznam(query, _lang):

 def startpage(query, lang):
    # startpage autocompleter
-    lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
+    engine_language = default_language = 'english_uk'
+    if 'startpage' in engines:
+        engine_language = get_engine_locale(lang, engines['startpage'].engine_data.languages, default=default_language)
+
    url = 'https://startpage.com/suggestions?{query}'
-    resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
+    resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': engine_language})))
    data = resp.json()
    return [e['text'] for e in data.get('suggestions', []) if 'text' in e]

--- a/searx/data/init.py
+++ b/searx/data/init.py
@ -8,6 +8,7 @@

 __all__ = [
    'ENGINES_LANGUAGES',
+    'ENGINES_LOCALES',
    'CURRENCIES',
    'USER_AGENTS',
    'EXTERNAL_URLS',
@ -43,6 +44,7 @@ def ahmia_blacklist_loader():


 ENGINES_LANGUAGES = _load('engines_languages.json')
+ENGINES_LOCALES = _load('engine_locales.json')
 CURRENCIES = _load('currencies.json')
 USER_AGENTS = _load('useragents.json')
 EXTERNAL_URLS = _load('external_urls.json')
--- a/searx/data/engine_locales.json
+++ b/searx/data/engine_locales.json
@ -0,0 +1,299 @@
+{
+  "qwant": {
+    "languages": {},
+    "regions": {
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "et-EE": "et_EE",
+      "fi-FI": "fi_FI",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hu-HU": "hu_HU",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ko-KR": "ko_KR",
+      "nb-NO": "nb_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "sv-SE": "sv_SE",
+      "th-TH": "th_TH",
+      "zh-CN": "zh_CN",
+      "zh-HK": "zh_HK"
+    }
+  },
+  "qwant images": {
+    "languages": {},
+    "regions": {
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "et-EE": "et_EE",
+      "fi-FI": "fi_FI",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hu-HU": "hu_HU",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ko-KR": "ko_KR",
+      "nb-NO": "nb_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "sv-SE": "sv_SE",
+      "th-TH": "th_TH",
+      "zh-CN": "zh_CN",
+      "zh-HK": "zh_HK"
+    }
+  },
+  "qwant news": {
+    "languages": {},
+    "regions": {
+      "ca-ES": "ca_ES",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pt-PT": "pt_PT"
+    }
+  },
+  "qwant videos": {
+    "languages": {},
+    "regions": {
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "et-EE": "et_EE",
+      "fi-FI": "fi_FI",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hu-HU": "hu_HU",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ko-KR": "ko_KR",
+      "nb-NO": "nb_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "sv-SE": "sv_SE",
+      "th-TH": "th_TH",
+      "zh-CN": "zh_CN",
+      "zh-HK": "zh_HK"
+    }
+  },
+  "startpage": {
+    "languages": {
+      "af": "afrikaans",
+      "am": "amharic",
+      "ar": "arabic",
+      "az": "azerbaijani",
+      "be": "belarusian",
+      "bg": "bulgarian",
+      "bn": "bengali",
+      "bs": "bosnian",
+      "ca": "catalan",
+      "cs": "czech",
+      "cy": "welsh",
+      "da": "dansk",
+      "de": "deutsch",
+      "el": "greek",
+      "en": "english_uk",
+      "eo": "esperanto",
+      "es": "espanol",
+      "et": "estonian",
+      "eu": "basque",
+      "fa": "persian",
+      "fi": "suomi",
+      "fo": "faroese",
+      "fr": "francais",
+      "fy": "frisian",
+      "ga": "irish",
+      "gd": "gaelic",
+      "gl": "galician",
+      "gu": "gujarati",
+      "he": "hebrew",
+      "hi": "hindi",
+      "hr": "croatian",
+      "hu": "hungarian",
+      "ia": "interlingua",
+      "id": "indonesian",
+      "is": "icelandic",
+      "it": "italiano",
+      "ja": "nihongo",
+      "jv": "javanese",
+      "ka": "georgian",
+      "kn": "kannada",
+      "ko": "hangul",
+      "la": "latin",
+      "lt": "lithuanian",
+      "lv": "latvian",
+      "mai": "bihari",
+      "mk": "macedonian",
+      "ml": "malayalam",
+      "mr": "marathi",
+      "ms": "malay",
+      "mt": "maltese",
+      "nb": "norsk",
+      "ne": "nepali",
+      "nl": "nederlands",
+      "oc": "occitan",
+      "pa": "punjabi",
+      "pl": "polski",
+      "pt": "portugues",
+      "ro": "romanian",
+      "ru": "russian",
+      "si": "sinhalese",
+      "sk": "slovak",
+      "sl": "slovenian",
+      "sq": "albanian",
+      "sr": "serbian",
+      "su": "sudanese",
+      "sv": "svenska",
+      "sw": "swahili",
+      "ta": "tamil",
+      "te": "telugu",
+      "th": "thai",
+      "ti": "tigrinya",
+      "tl": "tagalog",
+      "tr": "turkce",
+      "uk": "ukrainian",
+      "ur": "urdu",
+      "uz": "uzbek",
+      "vi": "vietnamese",
+      "xh": "xhosa",
+      "zh": "jiantizhongwen",
+      "zh_Hant": "fantizhengwen",
+      "zu": "zulu"
+    },
+    "regions": {
+      "ar-EG": "ar_EG",
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en-GB_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "en-ZA": "en_ZA",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-US": "es_US",
+      "es-UY": "es_UY",
+      "fi-FI": "fi_FI",
+      "fil-PH": "fil_PH",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hi-IN": "hi_IN",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ja-JP": "ja_JP",
+      "ko-KR": "ko_KR",
+      "ms-MY": "ms_MY",
+      "nb-NO": "no_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-BR": "pt-BR_BR",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "ru-BY": "ru_BY",
+      "ru-RU": "ru_RU",
+      "sv-SE": "sv_SE",
+      "tr-TR": "tr_TR",
+      "uk-UA": "uk_UA",
+      "zh-CN": "zh-CN_CN",
+      "zh-HK": "zh-TW_HK",
+      "zh-TW": "zh-TW_TW"
+    }
+  }
+}
--- a/searx/data/engines_languages.json
+++ b/searx/data/engines_languages.json
@ -1396,406 +1396,6 @@
    "sv",
    "zh"
  ],
-  "qwant": {
-    "bg-BG": "bg_BG",
-    "ca-ES": "ca_ES",
-    "cs-CZ": "cs_CZ",
-    "da-DK": "da_DK",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "el-GR": "el_GR",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "et-EE": "et_EE",
-    "fi-FI": "fi_FI",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "hu-HU": "hu_HU",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "ko-KR": "ko_KR",
-    "nb-NO": "nb_NO",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pl-PL": "pl_PL",
-    "pt-PT": "pt_PT",
-    "ro-RO": "ro_RO",
-    "sv-SE": "sv_SE",
-    "th-TH": "th_TH",
-    "zh-CN": "zh_CN",
-    "zh-HK": "zh_HK"
-  },
-  "qwant images": {
-    "bg-BG": "bg_BG",
-    "ca-ES": "ca_ES",
-    "cs-CZ": "cs_CZ",
-    "da-DK": "da_DK",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "el-GR": "el_GR",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "et-EE": "et_EE",
-    "fi-FI": "fi_FI",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "hu-HU": "hu_HU",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "ko-KR": "ko_KR",
-    "nb-NO": "nb_NO",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pl-PL": "pl_PL",
-    "pt-PT": "pt_PT",
-    "ro-RO": "ro_RO",
-    "sv-SE": "sv_SE",
-    "th-TH": "th_TH",
-    "zh-CN": "zh_CN",
-    "zh-HK": "zh_HK"
-  },
-  "qwant news": {
-    "ca-ES": "ca_ES",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pt-PT": "pt_PT"
-  },
-  "qwant videos": {
-    "bg-BG": "bg_BG",
-    "ca-ES": "ca_ES",
-    "cs-CZ": "cs_CZ",
-    "da-DK": "da_DK",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "el-GR": "el_GR",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "et-EE": "et_EE",
-    "fi-FI": "fi_FI",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "hu-HU": "hu_HU",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "ko-KR": "ko_KR",
-    "nb-NO": "nb_NO",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pl-PL": "pl_PL",
-    "pt-PT": "pt_PT",
-    "ro-RO": "ro_RO",
-    "sv-SE": "sv_SE",
-    "th-TH": "th_TH",
-    "zh-CN": "zh_CN",
-    "zh-HK": "zh_HK"
-  },
-  "startpage": {
-    "af": {
-      "alias": "afrikaans"
-    },
-    "am": {
-      "alias": "amharic"
-    },
-    "ar": {
-      "alias": "arabic"
-    },
-    "az": {
-      "alias": "azerbaijani"
-    },
-    "be": {
-      "alias": "belarusian"
-    },
-    "bg": {
-      "alias": "bulgarian"
-    },
-    "bn": {
-      "alias": "bengali"
-    },
-    "bs": {
-      "alias": "bosnian"
-    },
-    "ca": {
-      "alias": "catalan"
-    },
-    "cs": {
-      "alias": "czech"
-    },
-    "cy": {
-      "alias": "welsh"
-    },
-    "da": {
-      "alias": "dansk"
-    },
-    "de": {
-      "alias": "deutsch"
-    },
-    "el": {
-      "alias": "greek"
-    },
-    "en": {
-      "alias": "english"
-    },
-    "en-GB": {
-      "alias": "english_uk"
-    },
-    "eo": {
-      "alias": "esperanto"
-    },
-    "es": {
-      "alias": "espanol"
-    },
-    "et": {
-      "alias": "estonian"
-    },
-    "eu": {
-      "alias": "basque"
-    },
-    "fa": {
-      "alias": "persian"
-    },
-    "fi": {
-      "alias": "suomi"
-    },
-    "fo": {
-      "alias": "faroese"
-    },
-    "fr": {
-      "alias": "francais"
-    },
-    "fy": {
-      "alias": "frisian"
-    },
-    "ga": {
-      "alias": "irish"
-    },
-    "gd": {
-      "alias": "gaelic"
-    },
-    "gl": {
-      "alias": "galician"
-    },
-    "gu": {
-      "alias": "gujarati"
-    },
-    "he": {
-      "alias": "hebrew"
-    },
-    "hi": {
-      "alias": "hindi"
-    },
-    "hr": {
-      "alias": "croatian"
-    },
-    "hu": {
-      "alias": "hungarian"
-    },
-    "ia": {
-      "alias": "interlingua"
-    },
-    "id": {
-      "alias": "indonesian"
-    },
-    "is": {
-      "alias": "icelandic"
-    },
-    "it": {
-      "alias": "italiano"
-    },
-    "ja": {
-      "alias": "nihongo"
-    },
-    "jv": {
-      "alias": "javanese"
-    },
-    "ka": {
-      "alias": "georgian"
-    },
-    "kn": {
-      "alias": "kannada"
-    },
-    "ko": {
-      "alias": "hangul"
-    },
-    "la": {
-      "alias": "latin"
-    },
-    "lt": {
-      "alias": "lithuanian"
-    },
-    "lv": {
-      "alias": "latvian"
-    },
-    "mai": {
-      "alias": "bihari"
-    },
-    "mk": {
-      "alias": "macedonian"
-    },
-    "ml": {
-      "alias": "malayalam"
-    },
-    "mr": {
-      "alias": "marathi"
-    },
-    "ms": {
-      "alias": "malay"
-    },
-    "mt": {
-      "alias": "maltese"
-    },
-    "ne": {
-      "alias": "nepali"
-    },
-    "nl": {
-      "alias": "nederlands"
-    },
-    "no": {
-      "alias": "norsk"
-    },
-    "oc": {
-      "alias": "occitan"
-    },
-    "pa": {
-      "alias": "punjabi"
-    },
-    "pl": {
-      "alias": "polski"
-    },
-    "pt": {
-      "alias": "portugues"
-    },
-    "ro": {
-      "alias": "romanian"
-    },
-    "ru": {
-      "alias": "russian"
-    },
-    "si": {
-      "alias": "sinhalese"
-    },
-    "sk": {
-      "alias": "slovak"
-    },
-    "sl": {
-      "alias": "slovenian"
-    },
-    "sq": {
-      "alias": "albanian"
-    },
-    "sr": {
-      "alias": "serbian"
-    },
-    "su": {
-      "alias": "sudanese"
-    },
-    "sv": {
-      "alias": "svenska"
-    },
-    "sw": {
-      "alias": "swahili"
-    },
-    "ta": {
-      "alias": "tamil"
-    },
-    "te": {
-      "alias": "telugu"
-    },
-    "th": {
-      "alias": "thai"
-    },
-    "ti": {
-      "alias": "tigrinya"
-    },
-    "tl": {
-      "alias": "tagalog"
-    },
-    "tr": {
-      "alias": "turkce"
-    },
-    "uk": {
-      "alias": "ukrainian"
-    },
-    "ur": {
-      "alias": "urdu"
-    },
-    "uz": {
-      "alias": "uzbek"
-    },
-    "vi": {
-      "alias": "vietnamese"
-    },
-    "xh": {
-      "alias": "xhosa"
-    },
-    "zh": {
-      "alias": "jiantizhongwen"
-    },
-    "zh-HK": {
-      "alias": "fantizhengwen"
-    },
-    "zh-TW": {
-      "alias": "fantizhengwen"
-    },
-    "zu": {
-      "alias": "zulu"
-    }
-  },
  "wikidata": {
    "ab": {
      "english_name": "Abkhazian",
--- a/searx/engines/init.py
+++ b/searx/engines/init.py
@ -13,14 +13,14 @@ usage::

 import sys
 import copy
-from typing import Dict, List, Optional
+import dataclasses
+from typing import Dict, List, Optional, Any

 from os.path import realpath, dirname
 from babel.localedata import locale_identifiers
 from searx import logger, settings
-from searx.data import ENGINES_LANGUAGES
-from searx.network import get
-from searx.utils import load_module, match_language, gen_useragent
+from searx.data import ENGINES_LANGUAGES, ENGINES_LOCALES
+from searx.utils import load_module, match_language


 logger = logger.getChild('engines')
@ -52,6 +52,27 @@ ENGINE_DEFAULT_ARGS = {
 OTHER_CATEGORY = 'other'


+@dataclasses.dataclass
+class EngineLocales:
+    """The class is intended to be instanciated for each engine."""
+
+    regions: Dict[str, str] = dataclasses.field(default_factory=dict)
+    """
+    .. code:: python
+       {
+           'fr-BE' : <engine's region name>,
+       }
+    """
+
+    languages: Dict[str, str] = dataclasses.field(default_factory=dict)
+    """
+    .. code:: python
+       {
+           'ca' : <engine's language name>,
+       }
+    """
+
+
 class Engine:  # pylint: disable=too-few-public-methods
    """This class is currently never initialized and only used for type hinting."""

@ -59,15 +80,17 @@ class Engine:  # pylint: disable=too-few-public-methods
    engine: str
    shortcut: str
    categories: List[str]
-    supported_languages: List[str]
    about: dict
    inactive: bool
    disabled: bool
-    language_support: bool
    paging: bool
    safesearch: bool
    time_range_support: bool
    timeout: float
+    language_support: bool
+    engine_locales: EngineLocales
+    supported_languages: List[str]
+    language_aliases: Dict[str, str]


 # Defaults for the namespace of an engine module, see :py:func:`load_engine`
@ -85,15 +108,15 @@ engine_shortcuts = {}
 """


-def load_engine(engine_data: dict) -> Optional[Engine]:
-    """Load engine from ``engine_data``.
+def load_engine(engine_setting: Dict[str, Any]) -> Optional[Engine]:
+    """Load engine from ``engine_setting``.

-    :param dict engine_data:  Attributes from YAML ``settings:engines/<engine>``
+    :param dict engine_setting:  Attributes from YAML ``settings:engines/<engine>``
    :return: initialized namespace of the ``<engine>``.

    1. create a namespace and load module of the ``<engine>``
    2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`
-    3. update namespace with values from ``engine_data``
+    3. update namespace with values from ``engine_setting``

    If engine *is active*, return namespace of the engine, otherwise return
    ``None``.
@ -107,7 +130,7 @@ def load_engine(engine_data: dict) -> Optional[Engine]:

    """

-    engine_name = engine_data['name']
+    engine_name = engine_setting['name']
    if '_' in engine_name:
        logger.error('Engine name contains underscore: "{}"'.format(engine_name))
        return None
@ -115,10 +138,10 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
    if engine_name.lower() != engine_name:
        logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
        engine_name = engine_name.lower()
-        engine_data['name'] = engine_name
+        engine_setting['name'] = engine_name

    # load_module
-    engine_module = engine_data['engine']
+    engine_module = engine_setting['engine']
    try:
        engine = load_module(engine_module + '.py', ENGINE_DIR)
    except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
@ -128,9 +151,10 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
        logger.exception('Cannot load engine "{}"'.format(engine_module))
        return None

-    update_engine_attributes(engine, engine_data)
-    set_language_attributes(engine)
+    update_engine_attributes(engine, engine_setting)
    update_attributes_for_tor(engine)
+    if not set_engine_locales(engine):
+        set_language_attributes(engine)

    if not is_engine_active(engine):
        return None
@ -165,15 +189,15 @@ def set_loggers(engine, engine_name):
            module.logger = logger.getChild(module_engine_name)


-def update_engine_attributes(engine: Engine, engine_data):
-    # set engine attributes from engine_data
-    for param_name, param_value in engine_data.items():
+def update_engine_attributes(engine: Engine, engine_setting: Dict[str, Any]):
+    # set engine attributes from engine_setting
+    for param_name, param_value in engine_setting.items():
        if param_name == 'categories':
            if isinstance(param_value, str):
                param_value = list(map(str.strip, param_value.split(',')))
            engine.categories = param_value
        elif hasattr(engine, 'about') and param_name == 'about':
-            engine.about = {**engine.about, **engine_data['about']}
+            engine.about = {**engine.about, **engine_setting['about']}
        else:
            setattr(engine, param_name, param_value)

@ -183,6 +207,28 @@ def update_engine_attributes(engine: Engine, engine_data):
            setattr(engine, arg_name, copy.deepcopy(arg_value))


+def set_engine_locales(engine: Engine):
+    engine_locales_key = None
+
+    if engine.name in ENGINES_LOCALES:
+        engine_locales_key = engine.name
+    elif engine.engine in ENGINES_LOCALES:
+        # The key of the dictionary engine_data_dict is the *engine name*
+        # configured in settings.xml.  When multiple engines are configured in
+        # settings.yml to use the same origin engine (python module) these
+        # additional engines can use the languages from the origin engine.
+        # For this use the configured ``engine: ...`` from settings.yml
+        engine_locales_key = engine.engine
+    else:
+        return False
+
+    print(engine.name, ENGINES_LOCALES[engine_locales_key])
+    engine.engine_locales = EngineLocales(**ENGINES_LOCALES[engine_locales_key])
+    # language_support
+    engine.language_support = len(engine.engine_locales.regions) > 0 or len(engine.engine_locales.languages) > 0
+    return True
+
+
 def set_language_attributes(engine: Engine):
    # assign supported languages from json file
    if engine.name in ENGINES_LANGUAGES:
@ -225,17 +271,6 @@ def set_language_attributes(engine: Engine):
    # language_support
    engine.language_support = len(engine.supported_languages) > 0

-    # assign language fetching method if auxiliary method exists
-    if hasattr(engine, '_fetch_supported_languages'):
-        headers = {
-            'User-Agent': gen_useragent(),
-            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
-        }
-        engine.fetch_supported_languages = (
-            # pylint: disable=protected-access
-            lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
-        )
-

 def update_attributes_for_tor(engine: Engine) -> bool:
    if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
@ -294,8 +329,8 @@ def load_engines(engine_list):
    engine_shortcuts.clear()
    categories.clear()
    categories['general'] = []
-    for engine_data in engine_list:
-        engine = load_engine(engine_data)
+    for engine_setting in engine_list:
+        engine = load_engine(engine_setting)
        if engine:
            register_engine(engine)
    return engines
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@ -136,7 +136,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
    :param dict param: request parameters of the engine

    :param list lang_list: list of supported languages of the engine
-        :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
+        :py:obj:`ENGINES_DATAS[engine-name].languages <searx.data.ENGINES_DATAS>`

    :param dict lang_list: custom aliases for non standard language codes
        (used when calling :py:func:`searx.utils.match_language`)
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@ -50,7 +50,6 @@ about = {
 # engine dependent config
 categories = ['science', 'scientific publications']
 paging = True
-language_support = True
 use_locale_domain = True
 time_range_support = True
 safesearch = False
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@ -56,7 +56,6 @@ about = {

 categories = ['videos', 'web']
 paging = False
-language_support = True
 use_locale_domain = True
 time_range_support = True
 safesearch = True
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@ -49,7 +49,7 @@ about = {
 # engine dependent config
 categories = []
 paging = True
-supported_languages_url = about['website']
+engine_locales_url = about['website']
 qwant_categ = None  # web|news|inages|videos

 safesearch = True
@ -95,7 +95,7 @@ def request(query, params):
    )

    # add quant's locale
-    q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
+    q_locale = get_engine_locale(params['language'], engine_locales.regions, default='en_US')
    params['url'] += '&locale=' + q_locale

    # add safesearch option
@ -243,7 +243,7 @@ def response(resp):
    return results


-def _fetch_supported_languages(resp):
+def _fetch_engine_locales(resp, engine_locales):

    text = resp.text
    text = text[text.find('INITIAL_PROPS') :]
@ -263,8 +263,6 @@ def _fetch_supported_languages(resp):

            q_valid_locales.append(_locale)

-    supported_languages = {}
-
    for q_locale in q_valid_locales:
        try:
            locale = babel.Locale.parse(q_locale, sep='_')
@ -272,7 +270,7 @@ def _fetch_supported_languages(resp):
            print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
            continue

-        # note: supported_languages (dict)
+        # note: engine_data.regions (dict)
        #
        #   dict's key is a string build up from a babel.Locale object / the
        #   notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
@ -280,6 +278,6 @@ def _fetch_supported_languages(resp):
        #   the engine.

        searxng_locale = locale.language + '-' + locale.territory  # --> params['language']
-        supported_languages[searxng_locale] = q_locale
+        engine_locales.regions[searxng_locale] = q_locale

-    return supported_languages
+    return engine_locales
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@ -7,17 +7,18 @@
 import re
 from time import time

-from urllib.parse import urlencode
 from unicodedata import normalize, combining
 from datetime import datetime, timedelta
+from collections import OrderedDict

 from dateutil import parser
 from lxml import html
-from babel import Locale
-from babel.localedata import locale_identifiers
+
+import babel

 from searx.network import get
-from searx.utils import extract_text, eval_xpath, match_language
+from searx.locales import get_engine_locale
+from searx.utils import extract_text, eval_xpath
 from searx.exceptions import (
    SearxEngineResponseException,
    SearxEngineCaptchaException,
@ -36,16 +37,22 @@ about = {

 # engine dependent config
 categories = ['general', 'web']
-# there is a mechanism to block "bot" search
-# (probably the parameter qid), require
-# storing of qid's between mulitble search-calls

 paging = True
-supported_languages_url = 'https://www.startpage.com/do/settings'
+number_of_results = 5
+send_accept_language_header = True
+
+safesearch = True
+filter_mapping = {0: '0', 1: '1', 2: '1'}
+
+time_range_support = True
+time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
+
+engine_locales_url = 'https://www.startpage.com/do/settings'

 # search-url
-base_url = 'https://startpage.com/'
-search_url = base_url + 'sp/search?'
+base_url = 'https://www.startpage.com/'
+search_url = base_url + 'sp/search'

 # specific xpath variables
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@ -103,42 +110,83 @@ def get_sc_code(headers):
    return sc_code


-# do search-request
 def request(query, params):

-    # pylint: disable=line-too-long
-    # The format string from Startpage's FFox add-on [1]::
-    #
-    #     https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0
-    #
-    # [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/
+    # Startpage supports a region value: 'all'
+    engine_region = 'all'
+    engine_language = 'english_uk'
+    if params['language'] != 'all':
+        engine_region = get_engine_locale(params['language'], engine_locales.regions, default='all')
+        engine_language = get_engine_locale(
+            params['language'].split('-')[0], engine_locales.languages, default='english_uk'
+        )
+    logger.debug(
+        'selected language %s --> engine_language: %s // engine_region: %s',
+        params['language'],
+        engine_language,
+        engine_region,
+    )

+    # The Accept header is also needed by the get_sc_code(..) call below.
+    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+
+    # build arguments
    args = {
        'query': query,
-        'page': params['pageno'],
        'cat': 'web',
-        # 'pl': 'ext-ff',
-        # 'extVersion': '1.3.0',
-        # 'abp': "-1",
-        'sc': get_sc_code(params['headers']),
+        't': 'device',
+        'sc': get_sc_code(params['headers']),  # hint: this func needs HTTP headers
+        'with_date': time_range_dict.get(params['time_range'], ''),
    }

-    # set language if specified
-    if params['language'] != 'all':
-        lang_code = match_language(params['language'], supported_languages, fallback=None)
-        if lang_code:
-            language_name = supported_languages[lang_code]['alias']
-            args['language'] = language_name
-            args['lui'] = language_name
+    if engine_language:
+        args['language'] = engine_language
+        args['lui'] = engine_language
+
+    if params['pageno'] == 1:
+        args['abp'] = ['-1', '-1']
+
+    else:
+        args['page'] = params['pageno']
+        args['abp'] = '-1'
+
+    # build cookie
+    lang_homepage = 'english'
+    cookie = OrderedDict()
+    cookie['date_time'] = 'world'
+    cookie['disable_family_filter'] = filter_mapping[params['safesearch']]
+    cookie['disable_open_in_new_window'] = '0'
+    cookie['enable_post_method'] = '1'  # hint: POST
+    cookie['enable_proxy_safety_suggest'] = '1'
+    cookie['enable_stay_control'] = '1'
+    cookie['instant_answers'] = '1'
+    cookie['lang_homepage'] = 's/device/%s/' % lang_homepage
+    cookie['num_of_results'] = '10'
+    cookie['suggestions'] = '1'
+    cookie['wt_unit'] = 'celsius'
+
+    if engine_language:
+        cookie['language'] = engine_language
+        cookie['language_ui'] = engine_language
+
+    if engine_region:
+        cookie['search_results_region'] = engine_region
+
+    params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
+    logger.debug('cookie preferences: %s', params['cookies']['preferences'])
+    params['method'] = 'POST'
+
+    logger.debug("data: %s", args)
+    params['data'] = args
+
+    params['url'] = search_url

-    params['url'] = search_url + urlencode(args)
    return params


 # get response from search-request
 def response(resp):
    results = []
-
    dom = html.fromstring(resp.text)

    # parse results
@ -200,62 +248,142 @@ def response(resp):
    return results


-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    # startpage's language selector is a mess each option has a displayed name
-    # and a value, either of which may represent the language name in the native
-    # script, the language name in English, an English transliteration of the
-    # native name, the English name of the writing script used by the language,
-    # or occasionally something else entirely.
+def _fetch_engine_locales(resp, engine_locales):

-    # this cases are so special they need to be hardcoded, a couple of them are misspellings
-    language_names = {
-        'english_uk': 'en-GB',
-        'fantizhengwen': ['zh-TW', 'zh-HK'],
-        'hangul': 'ko',
-        'malayam': 'ml',
-        'norsk': 'nb',
-        'sinhalese': 'si',
-        'sudanese': 'su',
-    }
+    # startpage's language & region selectors are a mess.
+    #
+    # regions:
+    #   in the list of regions there are tags we need to map to common
+    #   region tags:
+    #   - pt-BR_BR --> pt_BR
+    #   - zh-CN_CN --> zh_Hans_CN
+    #   - zh-TW_TW --> zh_Hant_TW
+    #   - zh-TW_HK --> zh_Hant_HK
+    #   - en-GB_GB --> en_GB
+    #   and there is at least one tag with a three letter language tag (ISO 639-2)
+    #   - fil_PH --> fil_PH
+    #
+    # regions
+    # -------
+    #
+    # The locale code 'no_NO' from startpage does not exists and is mapped to
+    # nb-NO::
+    #
+    #     babel.core.UnknownLocaleError: unknown locale 'no_NO'
+    #
+    # For reference see languages-subtag at iana [1], `no` is the
+    # macrolanguage::
+    #
+    #     type: language
+    #     Subtag: nb
+    #     Description: Norwegian Bokmål
+    #     Added: 2005-10-16
+    #     Suppress-Script: Latn
+    #     Macrolanguage: no
+    #
+    # W3C recommends subtag over macrolanguage [2]:
+    #
+    #   Use macrolanguages with care. Some language subtags have a Scope field set to
+    #   macrolanguage, ie. this primary language subtag encompasses a number of more
+    #   specific primary language subtags in the registry.
+    #   ...
+    #   As we recommended for the collection subtags mentioned above, in most cases
+    #   you should try to use the more specific subtags ...
+    #
+    # [1] https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
+    # [2] https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag
+    #
+    # languages
+    # ---------
+    #
+    #   The displayed name in startpage's settings page depend on the location
+    #   of the IP when the 'Accept-Language' HTTP header is unset (in tha
+    #   language update script we use "en-US,en;q=0.5" to get uniform names
+    #   independent from the IP).
+    #
+    #   Each option has a displayed name and a value, either of which
+    #   may represent the language name in the native script, the language name
+    #   in English, an English transliteration of the native name, the English
+    #   name of the writing script used by the language, or occasionally
+    #   something else entirely.

-    # get the English name of every language known by babel
-    language_names.update(
-        {
-            # fmt: off
-            name.lower(): lang_code
-            # pylint: disable=protected-access
-            for lang_code, name in Locale('en')._data['languages'].items()
-            # fmt: on
-        }
-    )
+    dom = html.fromstring(resp.text)
+
+    # regions
+
+    sp_region_names = []
+    for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'):
+        sp_region_names.append(option.get('value'))
+
+    for engine_region_tag in sp_region_names:
+        if engine_region_tag == 'all':
+            # 'all' does not fit to a babel locale
+            continue
+
+        locale = None
+        babel_region_tag = {'no_NO': 'nb_NO'}.get(engine_region_tag, engine_region_tag)  # norway
+
+        if '-' in babel_region_tag:
+            # pt-XY_BR --> l=pt, r=BR --> pt-BR
+            l, r = babel_region_tag.split('-')
+            r = r.split('_')[-1]
+            locale = babel.Locale.parse(l + '_' + r, sep='_')
+        else:
+            try:
+                locale = babel.Locale.parse(babel_region_tag, sep='_')
+            except babel.core.UnknownLocaleError:
+                print("ERROR: can't determine babel locale of startpage's locale %s" % engine_region_tag)
+                continue
+
+        if locale is None:
+            continue
+
+        region_tag = locale.language + '-' + locale.territory
+        # print("SearXNG locale tag: %s --> Engine tag: %s" % (region_tag, engine_region_tag))
+        engine_locales.regions[region_tag] = engine_region_tag
+
+    # languages
+
+    catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()}

    # get the native name of every language known by babel
-    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()):
-        native_name = Locale(lang_code).get_language_name().lower()
+
+    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
+        native_name = babel.Locale(lang_code).get_language_name().lower()
        # add native name exactly as it is
-        language_names[native_name] = lang_code
+        catalog_engine2code[native_name] = lang_code

        # add "normalized" language name (i.e. français becomes francais and español becomes espanol)
        unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name)))
        if len(unaccented_name) == len(unaccented_name.encode()):
            # add only if result is ascii (otherwise "normalization" didn't work)
-            language_names[unaccented_name] = lang_code
+            catalog_engine2code[unaccented_name] = lang_code
+
+    # values that can't be determined by babel's languages names
+
+    catalog_engine2code.update(
+        {
+            'english_uk': 'en',
+            # traditional chinese used in ..
+            'fantizhengwen': 'zh_Hant',
+            # Korean alphabet
+            'hangul': 'ko',
+            # Malayalam is one of 22 scheduled languages of India.
+            'malayam': 'ml',
+            'norsk': 'nb',
+            'sinhalese': 'si',
+        }
+    )

-    dom = html.fromstring(resp.text)
-    sp_lang_names = []
    for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'):
-        sp_lang_names.append((option.get('value'), extract_text(option).lower()))
+        engine_lang = option.get('value')
+        name = extract_text(option).lower()

-    supported_languages = {}
-    for sp_option_value, sp_option_text in sp_lang_names:
-        lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text)
-        if isinstance(lang_code, str):
-            supported_languages[lang_code] = {'alias': sp_option_value}
-        elif isinstance(lang_code, list):
-            for _lc in lang_code:
-                supported_languages[_lc] = {'alias': sp_option_value}
-        else:
-            print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text))
+        lang_code = catalog_engine2code.get(engine_lang)
+        if lang_code is None:
+            lang_code = catalog_engine2code[name]

-    return supported_languages
+        # print("SearXNG language tag: %s --> Engine tag: %s" % (lang_code, engine_lang))
+        engine_locales.languages[lang_code] = engine_lang
+
+    return engine_locales
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@ -32,7 +32,6 @@ about = {
    "results": 'HTML',
 }

-language_support = False
 time_range_support = False
 safesearch = False
 paging = True
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@ -20,7 +20,6 @@ about = {
 # engine dependent config
 categories = ['videos', 'music']
 paging = True
-language_support = False
 time_range_support = True

 # search-url
--- a/searx/languages.py
+++ b/searx/languages.py
@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 # list of language codes
-# this file is generated automatically by utils/fetch_languages.py
+# this file is generated automatically by:
+#
+#   ./manage pyenv.cmd searxng_extra/update/update_languages.py
 language_codes = (
    ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
    ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
--- a/searx/locales.py
+++ b/searx/locales.py
@ -4,7 +4,7 @@
 """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
 """

-from typing import Set
+from typing import Set, Optional, Dict
 import os
 import pathlib

@ -24,11 +24,8 @@ logger = logger.getChild('locales')
 _flask_babel_get_translations = flask_babel.get_translations

 LOCALE_NAMES = {}
-"""Mapping of locales and their description.  Locales e.g. 'fr' or 'pt-BR' (see
-:py:obj:`locales_initialize`).
-
-:meta hide-value:
-"""
+"""Mapping of locales and their description.  Locales e.g. ``fr`` or ``pt-BR``
+(see :py:obj:`locales_initialize`)."""

 RTL_LOCALES: Set[str] = set()
 """List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
@ -157,13 +154,17 @@ def locales_initialize(directory=None):
                RTL_LOCALES.add(tag)


-def get_engine_locale(searxng_locale, engine_locales, default=None):
+def get_engine_locale(
+    searxng_locale: str, engine_locales: Dict[str, str], default: Optional[str] = None
+) -> Optional[str]:
    """Return engine's language (aka locale) string that best fits to argument
    ``searxng_locale``.

    Argument ``engine_locales`` is a python dict that maps *SearXNG locales* to
    corresponding *engine locales*::

+    .. code:: python
+
      <engine>: {
          # SearXNG string : engine-string
          'ca-ES'          : 'ca_ES',
--- a/searx/templates/simple/preferences.html
+++ b/searx/templates/simple/preferences.html
@ -307,7 +307,7 @@
        <th class="engine_checkbox">{{ _("Allow") }}</th>{{- "" -}}
        <th class="name">{{ _("Engine name") }}</th>{{- "" -}}
        <th class="shortcut">{{ _("Shortcut") }}</th>{{- "" -}}
-        <th>{{ _("Supports selected language") }}</th>{{- "" -}}
+        <th>{{ _("Language / Region") }}</th>{{- "" -}}
        <th>{{ _("SafeSearch") }}</th>{{- "" -}}
        <th>{{ _("Time range") }}</th>{{- "" -}}
        {%- if enable_metrics %}<th>{{ _("Response time") }}</th>{% endif -%}
@ -333,7 +333,7 @@
          {{- engine_about(search_engine) -}}
        </th>{{- "" -}}
        <td class="shortcut">{{ shortcuts[search_engine.name] }}</td>{{- "" -}}
-        <td>{{ checkbox(None, supports[search_engine.name]['supports_selected_language'], true) }}</td>{{- "" -}}
+        <td>{{ checkbox(None, supports[search_engine.name]['language_support'], true) }}</td>{{- "" -}}
        <td>{{ checkbox(None, supports[search_engine.name]['safesearch'], true) }}</td>{{- "" -}}
        <td>{{ checkbox(None, supports[search_engine.name]['time_range_support'], true) }}</td>{{- "" -}}
        {%- if enable_metrics %}{{- engine_time(search_engine.name) -}}{% endif -%}
--- a/searx/webapp.py
+++ b/searx/webapp.py
@ -990,7 +990,6 @@ def preferences():
            'rate80': rate80,
            'rate95': rate95,
            'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
-            'supports_selected_language': _is_selected_language_supported(e, request.preferences),
            'result_count': result_count,
        }
    # end of stats
@ -1041,18 +1040,18 @@ def preferences():
    # supports
    supports = {}
    for _, e in filtered_engines.items():
-        supports_selected_language = _is_selected_language_supported(e, request.preferences)
+        language_support = e.language_support
        safesearch = e.safesearch
        time_range_support = e.time_range_support
        for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
-            if supports_selected_language and checker_test_name.startswith('lang_'):
-                supports_selected_language = '?'
+            if language_support and checker_test_name.startswith('lang_'):
+                language_support = '?'
            elif safesearch and checker_test_name == 'safesearch':
                safesearch = '?'
            elif time_range_support and checker_test_name == 'time_range':
                time_range_support = '?'
        supports[e.name] = {
-            'supports_selected_language': supports_selected_language,
+            'language_support': language_support,
            'safesearch': safesearch,
            'time_range_support': time_range_support,
        }
@ -1088,16 +1087,6 @@ def preferences():
    )


-def _is_selected_language_supported(engine, preferences: Preferences):  # pylint: disable=redefined-outer-name
-    language = preferences.get_value('language')
-    if language == 'all':
-        return True
-    x = match_language(
-        language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None
-    )
-    return bool(x)
-
-
@app.route('/image_proxy', methods=['GET'])
 def image_proxy():
    # pylint: disable=too-many-return-statements, too-many-branches
@ -1316,10 +1305,6 @@ def config():
        if not request.preferences.validate_token(engine):
            continue

-        supported_languages = engine.supported_languages
-        if isinstance(engine.supported_languages, dict):
-            supported_languages = list(engine.supported_languages.keys())
-
        _engines.append(
            {
                'name': name,
@ -1328,7 +1313,6 @@ def config():
                'enabled': not engine.disabled,
                'paging': engine.paging,
                'language_support': engine.language_support,
-                'supported_languages': supported_languages,
                'safesearch': engine.safesearch,
                'time_range_support': engine.time_range_support,
                'timeout': engine.timeout,
--- a/searxng_extra/update/update_engine_locales.py
+++ b/searxng_extra/update/update_engine_locales.py
@ -0,0 +1,500 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pyright: basic
+"""This script fetches engine data from engines `engine_data_url`` and updates:
+
+- :py:obj:`write_languages_file` updates :origin:`searx/languages.py`
+- :py:obj:`fetch_engine_data` updates :origin:`searx/data/engines_datas.json`
+
+This script is triggered by CI in job :origin:`updateData
+<.github/workflows/data-update.yml>`.
+"""
+
+# pylint: disable=invalid-name
+import json
+from unicodedata import lookup
+from pprint import pformat
+from pathlib import Path
+from typing import Dict, Generator, List, Set, Tuple, Union, Optional
+from typing_extensions import TypedDict, NotRequired
+
+from babel import Locale, UnknownLocaleError
+from babel.languages import get_global  # type: ignore
+from babel.core import parse_locale
+
+from searx import settings, searx_dir
+from searx import network
+from searx.data import data_dir
+from searx.engines import (
+    load_engines,
+    engines,
+    EngineLocales,
+)
+from searx.utils import gen_useragent
+
+
+class EngineLanguageDescDict(TypedDict):
+    """In data/engines_languages.json, for google, wikipedia and wikidata engines:
+    value of the dictionnaries"""
+
+    name: str
+    english_name: NotRequired[str]
+
+
+EngineLanguageDesc = Union[List[str], Dict[str, EngineLanguageDescDict]]
+"""In data/engines_languages.json, type for a engine:
+
+* either it is a list
+* or a dictionnary"""
+
+EngineLanguageDict = Dict[str, EngineLanguageDesc]
+"""Type description for data/engines_languages.json"""
+
+EngineLocalesDict = Dict[str, EngineLocales]
+"""Type description for data/engine_data.json"""
+
+
+def fetch_engine_locales() -> Tuple[EngineLocalesDict, EngineLanguageDict]:
+    """Fetch :class:`EngineData` for each engine and persist JSON in file.
+
+    The script checks all engines about a function::
+
+      def _fetch_engine_data(resp, engine_data):
+          ...
+
+    and a variable named ``engine_locales_url``.  The HTTP GET response of
+    ``engine_locales_url`` is passed to the ``_fetch_engine_data`` function including a
+    instance of :py:obj:`searx.engines.EngineData`.
+
+    .. hint::
+
+      This implementation is backward compatible and supports the (depricated)
+      ``_fetch_supported_languages`` interface.
+
+      On the long term the depricated implementations in the engines will be
+      replaced by ``_fetch_engine_data``."""
+
+    network.set_timeout_for_thread(10.0)
+    engine_locales_dict: EngineLocalesDict = {}
+    engines_languages: EngineLanguageDict = {}
+    names = list(engines)
+    names.sort()
+
+    # The headers has been moved here from commit 9b6ffed06: Some engines (at
+    # least bing and startpage) return a different result list of supported
+    # languages depending on the IP location where the HTTP request comes from.
+    # The IP based results (from bing) can be avoided by setting a
+    # 'Accept-Language' in the HTTP request.
+
+    headers = {
+        'User-Agent': gen_useragent(),
+        'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
+    }
+
+    for engine_name in names:
+        engine = engines[engine_name]
+
+        fetch_locales = getattr(engine, '_fetch_engine_locales', None)
+        # deprecated: _fetch_supported_languages
+        fetch_languages = getattr(engine, '_fetch_supported_languages', None)
+
+        if fetch_locales is not None:
+            resp = network.get(engine.engine_locales_url, headers=headers)  # type: ignore
+            engine_data = EngineLocales()
+            fetch_locales(resp, engine_data)
+            engine_locales_dict[engine_name] = engine_data
+            print(
+                "%-20s: %3s language(s), %3s region(s)"
+                % (engine_name, len(engine_data.languages), len(engine_data.regions))
+            )
+        elif fetch_languages is not None:
+            print(engine_name)
+            resp = network.get(engine.supported_languages_url, headers=headers)  # type: ignore
+            engines_languages[engine_name] = fetch_languages(resp)
+            print(
+                "%-20s: %3s languages using deprecated _fetch_supported_languages"
+                % (engine_name, len(engines_languages[engine_name]))
+            )
+            if type(engines_languages[engine_name]) == list:  # pylint: disable=unidiomatic-typecheck
+                engines_languages[engine_name] = sorted(engines_languages[engine_name])
+
+    return engine_locales_dict, engines_languages
+
+
+# Get babel Locale object from lang_code if possible.
+def get_locale(lang_code: str) -> Optional[Locale]:
+    try:
+        locale = Locale.parse(lang_code, sep='-')
+        return locale
+    except (UnknownLocaleError, ValueError):
+        return None
+
+
+lang2emoji = {
+    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
+    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
+    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
+    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
+    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
+}
+
+
+def get_unicode_flag(lang_code: str) -> Optional[str]:
+    """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
+
+    emoji = lang2emoji.get(lang_code.lower())
+    if emoji:
+        return emoji
+
+    if len(lang_code) == 2:
+        return '\U0001F310'
+
+    language = territory = script = variant = ''
+    try:
+        language, territory, script, variant = parse_locale(lang_code, '-')
+    except ValueError as exc:
+        print(exc)
+
+    # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+    if not territory:
+        # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
+        emoji = lang2emoji.get(language)
+        if not emoji:
+            print(
+                "%s --> language: %s / territory: %s / script: %s / variant: %s"
+                % (lang_code, language, territory, script, variant)
+            )
+        return emoji
+
+    emoji = lang2emoji.get(territory.lower())
+    if emoji:
+        return emoji
+
+    try:
+        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
+        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
+        # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
+    except KeyError as exc:
+        print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
+        return None
+
+    return c1 + c2
+
+
+def get_territory_name(lang_code: str) -> Optional[str]:
+    country_name = None
+    locale = get_locale(lang_code)
+    try:
+        if locale is not None:
+            country_name = locale.get_territory_name()
+    except FileNotFoundError as exc:
+        print("ERROR: %s --> %s" % (locale, exc))
+    return country_name
+
+
+def iter_engine_codes(
+    engine_data_dict: EngineLocalesDict, engines_languages: EngineLanguageDict
+) -> Generator[Tuple[str, List[str]], None, None]:
+    """Iterator returning tuples:
+
+    -  first element is the engine name
+    -  second element is a list of language code (the one from the engines)
+
+    The function iterates first on the engine from engine_data_dict,
+    then it iterates over the engine from engines_languages.
+    """
+    for engine_name in engine_data_dict:
+        engine = engines[engine_name]
+        engine_data = engine_data_dict[engine_name]
+
+        # items of type 'engine_data' do have regions & languages, the list
+        # of engine_codes should contain both.
+
+        engine_codes = list(engine_data.regions.keys())
+        engine_codes.extend(engine_data.languages.keys())
+        yield engine_name, engine_codes
+
+    for engine_name, engine_languages in engines_languages.items():
+        engine = engines[engine_name]
+        language_aliases_values = getattr(engine, 'language_aliases', {}).values()
+        engine_codes: List[str] = []
+        for lang_code in engine_languages:
+            if lang_code in language_aliases_values:
+                # pylint: disable=stop-iteration-return
+                # we are sure that next(...) won't raise a StopIteration exception
+                # because of the "if" statement just above
+                lang_code = next(lc for lc, alias in engine.language_aliases.items() if lang_code == alias)
+                # pylint: enable=stop-iteration-return
+            engine_codes.append(lang_code)
+        yield engine_name, engine_codes
+
+
+class CountryInfo(TypedDict):
+    """Country name with a set of engine names.
+    Use exclusivly in JoinLanguageResult"""
+
+    country_name: str
+    """Name of the country"""
+
+    engine_names: Set[str]
+    """Engine names which use the language & country"""
+
+
+class JoinLanguageResult(TypedDict):
+    """Result of join_language_lists"""
+
+    name: Optional[str]
+    """Native name of the language"""
+
+    english_name: Optional[str]
+    """English name of the language"""
+
+    engine_names: Set
+    """Engine names which use this language"""
+
+    countries: Dict[str, CountryInfo]
+    """Possible country codes for this language"""
+
+
+def join_language_lists(
+    engine_data_dict: EngineLocalesDict, engines_languages: EngineLanguageDict
+) -> Dict[str, JoinLanguageResult]:
+    """Join all languages of the engines into one list.  The returned language list
+    contains language codes (``zh``) and region codes (``zh-TW``).  The codes can
+    be parsed by babel::
+
+      babel.Locale.parse(language_list[n])
+
+    """
+    language_list: Dict[str, JoinLanguageResult] = {}
+    name_from_babel = set()
+    name_from_wikipedia = set()
+    name_not_found = set()
+
+    for engine_name, engine_codes in iter_engine_codes(engine_data_dict, engines_languages):
+        for lang_code in engine_codes:
+
+            locale = get_locale(lang_code)
+
+            # ensure that lang_code uses standard language and country codes
+            if locale and locale.territory:
+                lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
+            short_code = lang_code.split('-')[0]
+
+            # add language without country if not in list
+            if short_code not in language_list:
+                if locale:
+                    # get language's data from babel's Locale object
+                    language_name = locale.get_language_name().title()
+                    english_name = locale.english_name.split(' (')[0]
+                    name_from_babel.add(short_code)
+                elif short_code in engines_languages['wikipedia'] and isinstance(engines_languages['wikipedia'], dict):
+                    # get language's data from wikipedia if not known by babel
+                    language_name = engines_languages['wikipedia'][short_code]['name']
+                    english_name = engines_languages['wikipedia'][short_code].get('english_name')
+                    name_from_wikipedia.add(short_code)
+                else:
+                    language_name = None
+                    english_name = None
+                    name_not_found.add(short_code)
+
+                # add language to list
+                language_list[short_code] = {
+                    'name': language_name,
+                    'english_name': english_name,
+                    'engine_names': set(),
+                    'countries': {},
+                }
+
+            # add language with country if not in list
+            if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
+                country_name = ''
+                if locale:
+                    # get country name from babel's Locale object
+                    try:
+                        country_name = locale.get_territory_name()
+                    except FileNotFoundError as exc:
+                        print("ERROR: %s --> %s" % (locale, exc))
+                        locale = None
+
+                language_list[short_code]['countries'][lang_code] = {
+                    'country_name': country_name,
+                    'engine_names': set(),
+                }
+
+            # count engine for both language_country combination and language alone
+            language_list[short_code]['engine_names'].add(engine_name)
+            if lang_code != short_code:
+                language_list[short_code]['countries'][lang_code]['engine_names'].add(engine_name)
+
+    def set_to_list(engine_name_set: Set) -> str:
+        return ', '.join(sorted(list(engine_name_set)))
+
+    print('')
+    print('%s name(s) found with Babel: %s\n' % (len(name_from_babel), set_to_list(name_from_babel)))
+    print('%s name(s) found with Wikipedia: %s\n' % (len(name_from_wikipedia), set_to_list(name_from_wikipedia)))
+    print('%s name(s) not found: %s\n' % (len(name_not_found), set_to_list(name_not_found)))
+
+    return language_list
+
+
+class LanguageCountryName(TypedDict):
+    """filter_language_list returns a dictionnary:
+    * the key are the language code
+    * the value is described in this type
+    """
+
+    name: Optional[str]
+    english_name: Optional[str]
+    country_name: NotRequired[str]
+
+
+def filter_language_list(all_languages: Dict[str, JoinLanguageResult]) -> Dict[str, LanguageCountryName]:
+    """Filter language list so it only includes the most supported languages and
+    countries.
+    """
+    min_engines_per_lang = 12
+    min_engines_per_country = 7
+    main_engines = [
+        engine_name
+        for engine_name, engine in engines.items()
+        if 'general' in engine.categories
+        and hasattr(engine, 'supported_languages')
+        and engine.supported_languages
+        and not engine.disabled
+    ]
+
+    # filter list to include only languages supported by most engines or all default general engines
+    filtered_languages = {
+        code: join_result
+        for code, join_result in all_languages.items()
+        if (
+            len(join_result['engine_names']) >= min_engines_per_lang
+            or all(main_engine in join_result['engine_names'] for main_engine in main_engines)
+        )
+    }
+
+    def _new_language_country_name(lang: str, country_name: Optional[str]) -> LanguageCountryName:
+        new_dict: LanguageCountryName = {
+            'name': all_languages[lang]['name'],
+            'english_name': all_languages[lang]['english_name'],
+        }
+        if country_name:
+            new_dict['country_name'] = country_name
+        return new_dict
+
+    # for each language get country codes supported by most engines or at least one country code
+    filtered_languages_with_countries: Dict[str, LanguageCountryName] = {}
+    for lang, lang_data in filtered_languages.items():
+        countries = lang_data['countries']
+        filtered_countries: Dict[str, LanguageCountryName] = {}
+
+        # get language's country codes with enough supported engines
+        for lang_country, country_data in countries.items():
+            if len(country_data['engine_names']) >= min_engines_per_country:
+                filtered_countries[lang_country] = _new_language_country_name(lang, country_data['country_name'])
+
+        # add language without countries too if there's more than one country to choose from
+        if len(filtered_countries) > 1:
+            filtered_countries[lang] = _new_language_country_name(lang, None)
+        elif len(filtered_countries) == 1:
+            lang_country = next(iter(filtered_countries))
+
+        # if no country has enough engines try to get most likely country code from babel
+        if not filtered_countries:
+            lang_country = None
+            subtags = get_global('likely_subtags').get(lang)
+            if subtags:
+                country_code = subtags.split('_')[-1]
+                if len(country_code) == 2:
+                    lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
+
+            if lang_country:
+                filtered_countries[lang_country] = _new_language_country_name(lang, None)
+            else:
+                filtered_countries[lang] = _new_language_country_name(lang, None)
+
+        filtered_languages_with_countries.update(filtered_countries)
+
+    return filtered_languages_with_countries
+
+
+def write_engine_data(file_name, engine_data_dict: EngineLocalesDict):
+    raw = {
+        engine_name: {
+            'regions': engine_data.regions,
+            'languages': engine_data.languages,
+        }
+        for engine_name, engine_data in engine_data_dict.items()
+    }
+    with open(file_name, 'w', encoding='utf-8') as f:
+        json.dump(raw, f, indent=2, sort_keys=True)
+
+
+def write_engines_languages(file_name, engines_languages: EngineLanguageDict):
+    # write json file
+    with open(file_name, 'w', encoding='utf-8') as f:
+        json.dump(engines_languages, f, indent=2, sort_keys=True)
+
+
+class UnicodeEscape(str):
+    """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+    def __repr__(self):
+        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
+# Write languages.py.
+def write_languages_file(language_file, languages: Dict[str, LanguageCountryName]):
+    """Generates :origin:`searx/languages.py`."""
+
+    file_headers = (
+        "# -*- coding: utf-8 -*-",
+        "# list of language codes",
+        "# this file is generated automatically by:",
+        "#",
+        "#   ./manage pyenv.cmd searxng_extra/update/update_languages.py",
+        "language_codes = (\n",
+    )
+
+    language_codes = []
+
+    for code in sorted(languages):
+
+        name = languages[code]['name']
+        if name is None:
+            print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
+            continue
+
+        flag = get_unicode_flag(code) or ''
+        item = (
+            code,
+            name.split(' (')[0],
+            get_territory_name(code) or '',
+            languages[code].get('english_name') or '',
+            UnicodeEscape(flag),
+        )
+
+        language_codes.append(item)
+
+    language_codes = tuple(language_codes)
+
+    with open(language_file, 'w', encoding='utf-8') as new_file:
+        file_content = "{file_headers} {language_codes},\n)\n".format(
+            # fmt: off
+            file_headers = '\n'.join(file_headers),
+            language_codes = pformat(language_codes, indent=4)[1:-1]
+            # fmt: on
+        )
+        new_file.write(file_content)
+
+
+if __name__ == "__main__":
+    load_engines(settings['engines'])
+    _engine_locales_dict, _engines_languages = fetch_engine_locales()
+    _all_languages = join_language_lists(_engine_locales_dict, _engines_languages)
+    _filtered_languages = filter_language_list(_all_languages)
+    write_engine_data(data_dir / 'engine_locales.json', _engine_locales_dict)
+    write_engines_languages(data_dir / 'engines_languages.json', _engines_languages)
+    write_languages_file(Path(searx_dir) / 'languages.py', _filtered_languages)
--- a/searxng_extra/update/update_languages.py
+++ b/searxng_extra/update/update_languages.py
@ -1,313 +0,0 @@
-#!/usr/bin/env python
-# lint: pylint
-
-# SPDX-License-Identifier: AGPL-3.0-or-later
-"""This script generates languages.py from intersecting each engine's supported
-languages.
-
-Output files: :origin:`searx/data/engines_languages.json` and
-:origin:`searx/languages.py` (:origin:`CI Update data ...
-<.github/workflows/data-update.yml>`).
-
-"""
-
-# pylint: disable=invalid-name
-from unicodedata import lookup
-import json
-from pathlib import Path
-from pprint import pformat
-from babel import Locale, UnknownLocaleError
-from babel.languages import get_global
-from babel.core import parse_locale
-
-from searx import settings, searx_dir
-from searx.engines import load_engines, engines
-from searx.network import set_timeout_for_thread
-
-# Output files.
-engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
-languages_file = Path(searx_dir) / 'languages.py'
-
-
-# Fetches supported languages for each engine and writes json file with those.
-def fetch_supported_languages():
-    set_timeout_for_thread(10.0)
-
-    engines_languages = {}
-    names = list(engines)
-    names.sort()
-
-    for engine_name in names:
-        if hasattr(engines[engine_name], 'fetch_supported_languages'):
-            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
-            print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
-            if type(engines_languages[engine_name]) == list:  # pylint: disable=unidiomatic-typecheck
-                engines_languages[engine_name] = sorted(engines_languages[engine_name])
-
-    print("fetched languages from %s engines" % len(engines_languages))
-
-    # write json file
-    with open(engines_languages_file, 'w', encoding='utf-8') as f:
-        json.dump(engines_languages, f, indent=2, sort_keys=True)
-
-    return engines_languages
-
-
-# Get babel Locale object from lang_code if possible.
-def get_locale(lang_code):
-    try:
-        locale = Locale.parse(lang_code, sep='-')
-        return locale
-    except (UnknownLocaleError, ValueError):
-        return None
-
-
-lang2emoji = {
-    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
-    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
-    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
-    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
-    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
-}
-
-
-def get_unicode_flag(lang_code):
-    """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
-
-    emoji = lang2emoji.get(lang_code.lower())
-    if emoji:
-        return emoji
-
-    if len(lang_code) == 2:
-        return '\U0001F310'
-
-    language = territory = script = variant = ''
-    try:
-        language, territory, script, variant = parse_locale(lang_code, '-')
-    except ValueError as exc:
-        print(exc)
-
-    # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
-    if not territory:
-        # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
-        emoji = lang2emoji.get(language)
-        if not emoji:
-            print(
-                "%s --> language: %s / territory: %s / script: %s / variant: %s"
-                % (lang_code, language, territory, script, variant)
-            )
-        return emoji
-
-    emoji = lang2emoji.get(territory.lower())
-    if emoji:
-        return emoji
-
-    try:
-        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
-        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
-        # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
-    except KeyError as exc:
-        print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
-        return None
-
-    return c1 + c2
-
-
-def get_territory_name(lang_code):
-    country_name = None
-    locale = get_locale(lang_code)
-    try:
-        if locale is not None:
-            country_name = locale.get_territory_name()
-    except FileNotFoundError as exc:
-        print("ERROR: %s --> %s" % (locale, exc))
-    return country_name
-
-
-# Join all language lists.
-def join_language_lists(engines_languages):
-    language_list = {}
-    for engine_name in engines_languages:
-        for lang_code in engines_languages[engine_name]:
-
-            # apply custom fixes if necessary
-            if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
-                lang_code = next(
-                    lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias
-                )
-
-            locale = get_locale(lang_code)
-
-            # ensure that lang_code uses standard language and country codes
-            if locale and locale.territory:
-                lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
-            short_code = lang_code.split('-')[0]
-
-            # add language without country if not in list
-            if short_code not in language_list:
-                if locale:
-                    # get language's data from babel's Locale object
-                    language_name = locale.get_language_name().title()
-                    english_name = locale.english_name.split(' (')[0]
-                elif short_code in engines_languages['wikipedia']:
-                    # get language's data from wikipedia if not known by babel
-                    language_name = engines_languages['wikipedia'][short_code]['name']
-                    english_name = engines_languages['wikipedia'][short_code]['english_name']
-                else:
-                    language_name = None
-                    english_name = None
-
-                # add language to list
-                language_list[short_code] = {
-                    'name': language_name,
-                    'english_name': english_name,
-                    'counter': set(),
-                    'countries': {},
-                }
-
-            # add language with country if not in list
-            if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
-                country_name = ''
-                if locale:
-                    # get country name from babel's Locale object
-                    try:
-                        country_name = locale.get_territory_name()
-                    except FileNotFoundError as exc:
-                        print("ERROR: %s --> %s" % (locale, exc))
-                        locale = None
-
-                language_list[short_code]['countries'][lang_code] = {
-                    'country_name': country_name,
-                    'counter': set(),
-                }
-
-            # count engine for both language_country combination and language alone
-            language_list[short_code]['counter'].add(engine_name)
-            if lang_code != short_code:
-                language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
-
-    return language_list
-
-
-# Filter language list so it only includes the most supported languages and countries
-def filter_language_list(all_languages):
-    min_engines_per_lang = 12
-    min_engines_per_country = 7
-    # pylint: disable=consider-using-dict-items, consider-iterating-dictionary
-    main_engines = [
-        engine_name
-        for engine_name in engines.keys()
-        if 'general' in engines[engine_name].categories
-        and engines[engine_name].supported_languages
-        and not engines[engine_name].disabled
-    ]
-
-    # filter list to include only languages supported by most engines or all default general engines
-    filtered_languages = {
-        code: lang
-        for code, lang in all_languages.items()
-        if (
-            len(lang['counter']) >= min_engines_per_lang
-            or all(main_engine in lang['counter'] for main_engine in main_engines)
-        )
-    }
-
-    def _copy_lang_data(lang, country_name=None):
-        new_dict = {}
-        new_dict['name'] = all_languages[lang]['name']
-        new_dict['english_name'] = all_languages[lang]['english_name']
-        if country_name:
-            new_dict['country_name'] = country_name
-        return new_dict
-
-    # for each language get country codes supported by most engines or at least one country code
-    filtered_languages_with_countries = {}
-    for lang, lang_data in filtered_languages.items():
-        countries = lang_data['countries']
-        filtered_countries = {}
-
-        # get language's country codes with enough supported engines
-        for lang_country, country_data in countries.items():
-            if len(country_data['counter']) >= min_engines_per_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
-
-        # add language without countries too if there's more than one country to choose from
-        if len(filtered_countries) > 1:
-            filtered_countries[lang] = _copy_lang_data(lang, None)
-        elif len(filtered_countries) == 1:
-            lang_country = next(iter(filtered_countries))
-
-        # if no country has enough engines try to get most likely country code from babel
-        if not filtered_countries:
-            lang_country = None
-            subtags = get_global('likely_subtags').get(lang)
-            if subtags:
-                country_code = subtags.split('_')[-1]
-                if len(country_code) == 2:
-                    lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
-
-            if lang_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang, None)
-            else:
-                filtered_countries[lang] = _copy_lang_data(lang, None)
-
-        filtered_languages_with_countries.update(filtered_countries)
-
-    return filtered_languages_with_countries
-
-
-class UnicodeEscape(str):
-    """Escape unicode string in :py:obj:`pprint.pformat`"""
-
-    def __repr__(self):
-        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
-
-
-# Write languages.py.
-def write_languages_file(languages):
-    file_headers = (
-        "# -*- coding: utf-8 -*-",
-        "# list of language codes",
-        "# this file is generated automatically by utils/fetch_languages.py",
-        "language_codes = (\n",
-    )
-
-    language_codes = []
-
-    for code in sorted(languages):
-
-        name = languages[code]['name']
-        if name is None:
-            print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
-            continue
-
-        flag = get_unicode_flag(code) or ''
-        item = (
-            code,
-            languages[code]['name'].split(' (')[0],
-            get_territory_name(code) or '',
-            languages[code].get('english_name') or '',
-            UnicodeEscape(flag),
-        )
-
-        language_codes.append(item)
-
-    language_codes = tuple(language_codes)
-
-    with open(languages_file, 'w', encoding='utf-8') as new_file:
-        file_content = "{file_headers} {language_codes},\n)\n".format(
-            # fmt: off
-            file_headers = '\n'.join(file_headers),
-            language_codes = pformat(language_codes, indent=4)[1:-1]
-            # fmt: on
-        )
-        new_file.write(file_content)
-        new_file.close()
-
-
-if __name__ == "__main__":
-    load_engines(settings['engines'])
-    _engines_languages = fetch_supported_languages()
-    _all_languages = join_language_lists(_engines_languages)
-    _filtered_languages = filter_language_list(_all_languages)
-    write_languages_file(_filtered_languages)