From 5986629c6bfe0fd800f30913f59b2cc43c9e58b6 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 21 Mar 2025 10:34:15 +0100 Subject: [PATCH] [fix] make data.traits - partial revert of commit 30a8204 The entries in the catalog of search languages are build up from the "Engine Traits" [1] and which entries are included in the catalog is controlled qby two threshold values [2]. If possible, the values should ensure that no languages or regions disappear from the catalog of search languages [3]. The threshold values should have been adjusted in commit 30a8204: - ``min_eng_per_region = 18`` - ``min_eng_per_lang = 22`` Because the threshold values were not adjusted, many entries were missing in the search language catalog. This bug has been fixed with this patch: the threshold values have been adjusted and the catalog of search languages has been completed again. [1] https://docs.searxng.org/dev/engines/enginelib.html#module-searx.enginelib.traits [2] https://github.com/searxng/searxng/blob/96a6e3dcb2283fa7ad9db4172a00582073a166d7/searxng_extra/update/update_engine_traits.py#L104-L105 [3] https://github.com/searxng/searxng/blob/master/searx/sxng_locales.py Closes: https://github.com/searxng/searxng/issues/4519 Signed-off-by: Markus Heiser --- searx/data/engine_traits.json | 46 +++++++++++++++++++- searx/enginelib/traits.py | 16 ++++++- searx/sxng_locales.py | 30 +++++++++++++ searxng_extra/update/update_engine_traits.py | 4 +- 4 files changed, 91 insertions(+), 5 deletions(-) diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 41f17ef10..df8f1f295 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -6680,6 +6680,7 @@ "custom": {}, "data_type": "traits_v1", "languages": { + "ar": "ar", "ca": "ca", "cs": "cs", "de": "de", @@ -6688,17 +6689,30 @@ "eo": "eo", "es": "es", "eu": "eu", + "fa": "fa", "fi": "fi", "fr": "fr", "gd": "gd", + "gl": "gl", + "hr": "hr", + "hu": "hu", + "is": "is", "it": "it", "ja": "ja", + "kab": "kab", "nl": "nl", + "no": "no", "pl": "pl", "pt": "pt", "ru": "ru", + "sk": "sk", + "sq": "sq", "sv": "sv", - "zh": "zh", + "th": "th", + "tok": "tok", + "tr": "tr", + "uk": "uk", + "vi": "vi", "zh_Hans": "zh", "zh_Hant": "zh" }, @@ -6915,6 +6929,7 @@ "BQ", "BR", "BS", + "BT", "BW", "BY", "BZ", @@ -6937,6 +6952,7 @@ "CY", "CZ", "DE", + "DJ", "DK", "DM", "DO", @@ -6950,8 +6966,10 @@ "FI", "FJ", "FK", + "FM", "FO", "FR", + "GA", "GB", "GD", "GE", @@ -6961,6 +6979,7 @@ "GI", "GL", "GM", + "GN", "GP", "GQ", "GR", @@ -6989,6 +7008,7 @@ "KE", "KG", "KH", + "KI", "KM", "KN", "KP", @@ -7001,6 +7021,7 @@ "LC", "LI", "LK", + "LR", "LS", "LT", "LU", @@ -7011,15 +7032,18 @@ "MD", "ME", "MG", + "MH", "MK", "ML", "MM", "MN", "MO", "MQ", + "MR", "MS", "MT", "MU", + "MV", "MW", "MX", "MY", @@ -7032,6 +7056,7 @@ "NL", "NO", "NP", + "NR", "NU", "NZ", "OM", @@ -7055,6 +7080,7 @@ "RU", "RW", "SA", + "SB", "SC", "SD", "SE", @@ -7082,8 +7108,10 @@ "TL", "TM", "TN", + "TO", "TR", "TT", + "TV", "TW", "TZ", "UA", @@ -7239,6 +7267,7 @@ "custom": {}, "data_type": "traits_v1", "languages": { + "ar": "ar", "ca": "ca", "cs": "cs", "de": "de", @@ -7247,17 +7276,30 @@ "eo": "eo", "es": "es", "eu": "eu", + "fa": "fa", "fi": "fi", "fr": "fr", "gd": "gd", + "gl": "gl", + "hr": "hr", + "hu": "hu", + "is": "is", "it": "it", "ja": "ja", + "kab": "kab", "nl": "nl", + "no": "no", "pl": "pl", "pt": "pt", "ru": "ru", + "sk": "sk", + "sq": "sq", "sv": "sv", - "zh": "zh", + "th": "th", + "tok": "tok", + "tr": "tr", + "uk": "uk", + "vi": "vi", "zh_Hans": "zh", "zh_Hant": "zh" }, diff --git a/searx/enginelib/traits.py b/searx/enginelib/traits.py index cab6557dd..8bb4e0554 100644 --- a/searx/enginelib/traits.py +++ b/searx/enginelib/traits.py @@ -10,6 +10,8 @@ used. """ from __future__ import annotations + +import os import json import dataclasses import types @@ -219,8 +221,20 @@ class EngineTraitsMap(Dict[str, EngineTraits]): for engine_name in names: engine = engines.engines[engine_name] + traits = None + + # pylint: disable=broad-exception-caught + try: + traits = EngineTraits.fetch_traits(engine) + except Exception as exc: + log("FATAL: while fetch_traits %s: %s" % (engine_name, exc)) + if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']: + raise + v = ENGINE_TRAITS.get(engine_name) + if v: + log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name) + traits = EngineTraits(**v) - traits = EngineTraits.fetch_traits(engine) if traits is not None: log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages))) log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions))) diff --git a/searx/sxng_locales.py b/searx/sxng_locales.py index e7e601c3e..82b1f00a0 100644 --- a/searx/sxng_locales.py +++ b/searx/sxng_locales.py @@ -11,9 +11,14 @@ sxng_locales = ( ('af', 'Afrikaans', '', 'Afrikaans', '\U0001f310'), ('ar', 'العربية', '', 'Arabic', '\U0001f310'), + ('ar-SA', 'العربية', 'المملكة العربية السعودية', 'Arabic', '\U0001f1f8\U0001f1e6'), + ('be', 'Беларуская', '', 'Belarusian', '\U0001f310'), ('bg', 'Български', '', 'Bulgarian', '\U0001f310'), + ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'), ('ca', 'Català', '', 'Catalan', '\U0001f310'), ('cs', 'Čeština', '', 'Czech', '\U0001f310'), + ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'), + ('cy', 'Cymraeg', '', 'Welsh', '\U0001f310'), ('da', 'Dansk', '', 'Danish', '\U0001f310'), ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'), ('de', 'Deutsch', '', 'German', '\U0001f310'), @@ -21,6 +26,7 @@ sxng_locales = ( ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'), ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'), ('el', 'Ελληνικά', '', 'Greek', '\U0001f310'), + ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'), ('en', 'English', '', 'English', '\U0001f310'), ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'), ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'), @@ -29,13 +35,21 @@ sxng_locales = ( ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'), ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'), ('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'), + ('en-PK', 'English', 'Pakistan', 'English', '\U0001f1f5\U0001f1f0'), + ('en-SG', 'English', 'Singapore', 'English', '\U0001f1f8\U0001f1ec'), ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'), ('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'), ('es', 'Español', '', 'Spanish', '\U0001f310'), ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'), ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'), + ('es-CO', 'Español', 'Colombia', 'Spanish', '\U0001f1e8\U0001f1f4'), ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'), ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'), + ('es-PE', 'Español', 'Perú', 'Spanish', '\U0001f1f5\U0001f1ea'), + ('et', 'Eesti', '', 'Estonian', '\U0001f310'), + ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'), + ('eu', 'Euskara', '', 'Basque', '\U0001f310'), + ('fa', 'فارسی', '', 'Persian', '\U0001f310'), ('fi', 'Suomi', '', 'Finnish', '\U0001f310'), ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'), ('fr', 'Français', '', 'French', '\U0001f310'), @@ -43,20 +57,29 @@ sxng_locales = ( ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'), ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'), ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'), + ('ga', 'Gaeilge', '', 'Irish', '\U0001f310'), + ('gd', 'Gàidhlig', '', 'Scottish Gaelic', '\U0001f310'), ('gl', 'Galego', '', 'Galician', '\U0001f310'), ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'), + ('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'), ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'), ('hu', 'Magyar', '', 'Hungarian', '\U0001f310'), + ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'), ('id', 'Indonesia', '', 'Indonesian', '\U0001f310'), ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'), + ('is', 'Íslenska', '', 'Icelandic', '\U0001f310'), ('it', 'Italiano', '', 'Italian', '\U0001f310'), ('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'), ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'), ('ja', '日本語', '', 'Japanese', '\U0001f310'), ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'), + ('kn', 'ಕನ್ನಡ', '', 'Kannada', '\U0001f310'), ('ko', '한국어', '', 'Korean', '\U0001f310'), ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'), ('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'), + ('lv', 'Latviešu', '', 'Latvian', '\U0001f310'), + ('ml', 'മലയാളം', '', 'Malayalam', '\U0001f310'), + ('mr', 'मराठी', '', 'Marathi', '\U0001f310'), ('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'), ('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'), ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'), @@ -68,17 +91,24 @@ sxng_locales = ( ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'), ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'), ('ro', 'Română', '', 'Romanian', '\U0001f310'), + ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'), ('ru', 'Русский', '', 'Russian', '\U0001f310'), ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'), ('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'), + ('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'), ('sq', 'Shqip', '', 'Albanian', '\U0001f310'), ('sv', 'Svenska', '', 'Swedish', '\U0001f310'), ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'), + ('ta', 'தமிழ்', '', 'Tamil', '\U0001f310'), + ('te', 'తెలుగు', '', 'Telugu', '\U0001f310'), ('th', 'ไทย', '', 'Thai', '\U0001f310'), + ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'), ('tr', 'Türkçe', '', 'Turkish', '\U0001f310'), ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'), ('uk', 'Українська', '', 'Ukrainian', '\U0001f310'), + ('ur', 'اردو', '', 'Urdu', '\U0001f310'), ('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'), + ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'), ('zh', '中文', '', 'Chinese', '\U0001f310'), ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'), ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'), diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py index 51b86c39f..a28d4d496 100755 --- a/searxng_extra/update/update_engine_traits.py +++ b/searxng_extra/update/update_engine_traits.py @@ -101,8 +101,8 @@ def fetch_traits_map(): def filter_locales(traits_map: EngineTraitsMap): """Filter language & region tags by a threshold.""" - min_eng_per_region = 22 - min_eng_per_lang = 24 + min_eng_per_region = 18 + min_eng_per_lang = 22 _ = {} for eng in traits_map.values():