[fix] make data.traits - partial revert of commit 30a8204

The entries in the catalog of search languages are build up from the "Engine
Traits" [1] and which entries are included in the catalog is controlled qby two
threshold values [2].

If possible, the values should ensure that no languages or regions disappear
from the catalog of search languages [3].

The threshold values should have been adjusted in commit 30a8204:

- ``min_eng_per_region = 18``
- ``min_eng_per_lang = 22``

Because the threshold values were not adjusted, many entries were missing in the
search language catalog.  This bug has been fixed with this patch: the threshold
values have been adjusted and the catalog of search languages has been completed
again.

[1] https://docs.searxng.org/dev/engines/enginelib.html#module-searx.enginelib.traits
[2] 96a6e3dcb2/searxng_extra/update/update_engine_traits.py (L104-L105)
[3] https://github.com/searxng/searxng/blob/master/searx/sxng_locales.py

Closes: https://github.com/searxng/searxng/issues/4519
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-03-21 10:34:15 +01:00 committed by Markus Heiser
parent 7e680d8e8e
commit 5986629c6b
4 changed files with 91 additions and 5 deletions

View File

@ -6680,6 +6680,7 @@
"custom": {}, "custom": {},
"data_type": "traits_v1", "data_type": "traits_v1",
"languages": { "languages": {
"ar": "ar",
"ca": "ca", "ca": "ca",
"cs": "cs", "cs": "cs",
"de": "de", "de": "de",
@ -6688,17 +6689,30 @@
"eo": "eo", "eo": "eo",
"es": "es", "es": "es",
"eu": "eu", "eu": "eu",
"fa": "fa",
"fi": "fi", "fi": "fi",
"fr": "fr", "fr": "fr",
"gd": "gd", "gd": "gd",
"gl": "gl",
"hr": "hr",
"hu": "hu",
"is": "is",
"it": "it", "it": "it",
"ja": "ja", "ja": "ja",
"kab": "kab",
"nl": "nl", "nl": "nl",
"no": "no",
"pl": "pl", "pl": "pl",
"pt": "pt", "pt": "pt",
"ru": "ru", "ru": "ru",
"sk": "sk",
"sq": "sq",
"sv": "sv", "sv": "sv",
"zh": "zh", "th": "th",
"tok": "tok",
"tr": "tr",
"uk": "uk",
"vi": "vi",
"zh_Hans": "zh", "zh_Hans": "zh",
"zh_Hant": "zh" "zh_Hant": "zh"
}, },
@ -6915,6 +6929,7 @@
"BQ", "BQ",
"BR", "BR",
"BS", "BS",
"BT",
"BW", "BW",
"BY", "BY",
"BZ", "BZ",
@ -6937,6 +6952,7 @@
"CY", "CY",
"CZ", "CZ",
"DE", "DE",
"DJ",
"DK", "DK",
"DM", "DM",
"DO", "DO",
@ -6950,8 +6966,10 @@
"FI", "FI",
"FJ", "FJ",
"FK", "FK",
"FM",
"FO", "FO",
"FR", "FR",
"GA",
"GB", "GB",
"GD", "GD",
"GE", "GE",
@ -6961,6 +6979,7 @@
"GI", "GI",
"GL", "GL",
"GM", "GM",
"GN",
"GP", "GP",
"GQ", "GQ",
"GR", "GR",
@ -6989,6 +7008,7 @@
"KE", "KE",
"KG", "KG",
"KH", "KH",
"KI",
"KM", "KM",
"KN", "KN",
"KP", "KP",
@ -7001,6 +7021,7 @@
"LC", "LC",
"LI", "LI",
"LK", "LK",
"LR",
"LS", "LS",
"LT", "LT",
"LU", "LU",
@ -7011,15 +7032,18 @@
"MD", "MD",
"ME", "ME",
"MG", "MG",
"MH",
"MK", "MK",
"ML", "ML",
"MM", "MM",
"MN", "MN",
"MO", "MO",
"MQ", "MQ",
"MR",
"MS", "MS",
"MT", "MT",
"MU", "MU",
"MV",
"MW", "MW",
"MX", "MX",
"MY", "MY",
@ -7032,6 +7056,7 @@
"NL", "NL",
"NO", "NO",
"NP", "NP",
"NR",
"NU", "NU",
"NZ", "NZ",
"OM", "OM",
@ -7055,6 +7080,7 @@
"RU", "RU",
"RW", "RW",
"SA", "SA",
"SB",
"SC", "SC",
"SD", "SD",
"SE", "SE",
@ -7082,8 +7108,10 @@
"TL", "TL",
"TM", "TM",
"TN", "TN",
"TO",
"TR", "TR",
"TT", "TT",
"TV",
"TW", "TW",
"TZ", "TZ",
"UA", "UA",
@ -7239,6 +7267,7 @@
"custom": {}, "custom": {},
"data_type": "traits_v1", "data_type": "traits_v1",
"languages": { "languages": {
"ar": "ar",
"ca": "ca", "ca": "ca",
"cs": "cs", "cs": "cs",
"de": "de", "de": "de",
@ -7247,17 +7276,30 @@
"eo": "eo", "eo": "eo",
"es": "es", "es": "es",
"eu": "eu", "eu": "eu",
"fa": "fa",
"fi": "fi", "fi": "fi",
"fr": "fr", "fr": "fr",
"gd": "gd", "gd": "gd",
"gl": "gl",
"hr": "hr",
"hu": "hu",
"is": "is",
"it": "it", "it": "it",
"ja": "ja", "ja": "ja",
"kab": "kab",
"nl": "nl", "nl": "nl",
"no": "no",
"pl": "pl", "pl": "pl",
"pt": "pt", "pt": "pt",
"ru": "ru", "ru": "ru",
"sk": "sk",
"sq": "sq",
"sv": "sv", "sv": "sv",
"zh": "zh", "th": "th",
"tok": "tok",
"tr": "tr",
"uk": "uk",
"vi": "vi",
"zh_Hans": "zh", "zh_Hans": "zh",
"zh_Hant": "zh" "zh_Hant": "zh"
}, },

View File

@ -10,6 +10,8 @@ used.
""" """
from __future__ import annotations from __future__ import annotations
import os
import json import json
import dataclasses import dataclasses
import types import types
@ -219,8 +221,20 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
for engine_name in names: for engine_name in names:
engine = engines.engines[engine_name] engine = engines.engines[engine_name]
traits = None
# pylint: disable=broad-exception-caught
try:
traits = EngineTraits.fetch_traits(engine) traits = EngineTraits.fetch_traits(engine)
except Exception as exc:
log("FATAL: while fetch_traits %s: %s" % (engine_name, exc))
if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']:
raise
v = ENGINE_TRAITS.get(engine_name)
if v:
log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
traits = EngineTraits(**v)
if traits is not None: if traits is not None:
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages))) log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions))) log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))

View File

@ -11,9 +11,14 @@
sxng_locales = ( sxng_locales = (
('af', 'Afrikaans', '', 'Afrikaans', '\U0001f310'), ('af', 'Afrikaans', '', 'Afrikaans', '\U0001f310'),
('ar', 'العربية', '', 'Arabic', '\U0001f310'), ('ar', 'العربية', '', 'Arabic', '\U0001f310'),
('ar-SA', 'العربية', 'المملكة العربية السعودية', 'Arabic', '\U0001f1f8\U0001f1e6'),
('be', 'Беларуская', '', 'Belarusian', '\U0001f310'),
('bg', 'Български', '', 'Bulgarian', '\U0001f310'), ('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
('ca', 'Català', '', 'Catalan', '\U0001f310'), ('ca', 'Català', '', 'Catalan', '\U0001f310'),
('cs', 'Čeština', '', 'Czech', '\U0001f310'), ('cs', 'Čeština', '', 'Czech', '\U0001f310'),
('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
('cy', 'Cymraeg', '', 'Welsh', '\U0001f310'),
('da', 'Dansk', '', 'Danish', '\U0001f310'), ('da', 'Dansk', '', 'Danish', '\U0001f310'),
('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'), ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
('de', 'Deutsch', '', 'German', '\U0001f310'), ('de', 'Deutsch', '', 'German', '\U0001f310'),
@ -21,6 +26,7 @@ sxng_locales = (
('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'), ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'), ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
('el', 'Ελληνικά', '', 'Greek', '\U0001f310'), ('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
('en', 'English', '', 'English', '\U0001f310'), ('en', 'English', '', 'English', '\U0001f310'),
('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'), ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'), ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
@ -29,13 +35,21 @@ sxng_locales = (
('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'), ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'), ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'), ('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
('en-PK', 'English', 'Pakistan', 'English', '\U0001f1f5\U0001f1f0'),
('en-SG', 'English', 'Singapore', 'English', '\U0001f1f8\U0001f1ec'),
('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'), ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'), ('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
('es', 'Español', '', 'Spanish', '\U0001f310'), ('es', 'Español', '', 'Spanish', '\U0001f310'),
('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'), ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'), ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
('es-CO', 'Español', 'Colombia', 'Spanish', '\U0001f1e8\U0001f1f4'),
('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'), ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'), ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
('es-PE', 'Español', 'Perú', 'Spanish', '\U0001f1f5\U0001f1ea'),
('et', 'Eesti', '', 'Estonian', '\U0001f310'),
('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
('eu', 'Euskara', '', 'Basque', '\U0001f310'),
('fa', 'فارسی', '', 'Persian', '\U0001f310'),
('fi', 'Suomi', '', 'Finnish', '\U0001f310'), ('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'), ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
('fr', 'Français', '', 'French', '\U0001f310'), ('fr', 'Français', '', 'French', '\U0001f310'),
@ -43,20 +57,29 @@ sxng_locales = (
('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'), ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'), ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'), ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
('ga', 'Gaeilge', '', 'Irish', '\U0001f310'),
('gd', 'Gàidhlig', '', 'Scottish Gaelic', '\U0001f310'),
('gl', 'Galego', '', 'Galician', '\U0001f310'), ('gl', 'Galego', '', 'Galician', '\U0001f310'),
('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'), ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'),
('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'), ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
('hu', 'Magyar', '', 'Hungarian', '\U0001f310'), ('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
('id', 'Indonesia', '', 'Indonesian', '\U0001f310'), ('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'), ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
('is', 'Íslenska', '', 'Icelandic', '\U0001f310'),
('it', 'Italiano', '', 'Italian', '\U0001f310'), ('it', 'Italiano', '', 'Italian', '\U0001f310'),
('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'), ('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'), ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
('ja', '日本語', '', 'Japanese', '\U0001f310'), ('ja', '日本語', '', 'Japanese', '\U0001f310'),
('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'), ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
('kn', 'ಕನ್ನಡ', '', 'Kannada', '\U0001f310'),
('ko', '한국어', '', 'Korean', '\U0001f310'), ('ko', '한국어', '', 'Korean', '\U0001f310'),
('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'), ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'), ('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
('ml', 'മലയാളം', '', 'Malayalam', '\U0001f310'),
('mr', 'मराठी', '', 'Marathi', '\U0001f310'),
('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'), ('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'), ('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
('nl', 'Nederlands', '', 'Dutch', '\U0001f310'), ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
@ -68,17 +91,24 @@ sxng_locales = (
('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'), ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'), ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
('ro', 'Română', '', 'Romanian', '\U0001f310'), ('ro', 'Română', '', 'Romanian', '\U0001f310'),
('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
('ru', 'Русский', '', 'Russian', '\U0001f310'), ('ru', 'Русский', '', 'Russian', '\U0001f310'),
('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'), ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'), ('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
('sq', 'Shqip', '', 'Albanian', '\U0001f310'), ('sq', 'Shqip', '', 'Albanian', '\U0001f310'),
('sv', 'Svenska', '', 'Swedish', '\U0001f310'), ('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'), ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
('ta', 'தமிழ்', '', 'Tamil', '\U0001f310'),
('te', 'తెలుగు', '', 'Telugu', '\U0001f310'),
('th', 'ไทย', '', 'Thai', '\U0001f310'), ('th', 'ไทย', '', 'Thai', '\U0001f310'),
('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
('tr', 'Türkçe', '', 'Turkish', '\U0001f310'), ('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'), ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
('uk', 'Українська', '', 'Ukrainian', '\U0001f310'), ('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
('ur', 'اردو', '', 'Urdu', '\U0001f310'),
('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'), ('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
('zh', '中文', '', 'Chinese', '\U0001f310'), ('zh', '中文', '', 'Chinese', '\U0001f310'),
('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'), ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'), ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),

View File

@ -101,8 +101,8 @@ def fetch_traits_map():
def filter_locales(traits_map: EngineTraitsMap): def filter_locales(traits_map: EngineTraitsMap):
"""Filter language & region tags by a threshold.""" """Filter language & region tags by a threshold."""
min_eng_per_region = 22 min_eng_per_region = 18
min_eng_per_lang = 24 min_eng_per_lang = 22
_ = {} _ = {}
for eng in traits_map.values(): for eng in traits_map.values():