From c20038e7c309ab43582feca2a00f89a9881996aa Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 29 Apr 2025 08:13:33 +0200 Subject: [PATCH] [fix] engine yahoo: replace fetch_traits by a list of languages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Yahoo engine's fetch_traits function has been encountering an error in CI jobs for several months [1], thus aborting the process for all other engines as well. The language selection dialog (which fetch_traits calls) requires an `EuConsent` cookie. Strangely, the cookie is not needed for searching, which is why the engine itself still works. Since Yahoo won't be conquering any new marketplaces in the foreseeable future, it should be sufficient to hard-implement the list of currently available languages ​​(`yahoo_languages`). [1] https://github.com/searxng/searxng/actions/runs/14720458830/job/41313149268 Signed-off-by: Markus Heiser --- searx/data/engine_traits.json | 40 ----------------- searx/engines/yahoo.py | 85 +++++++++++++++++------------------ 2 files changed, 40 insertions(+), 85 deletions(-) diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 66f8b19c1..76026886f 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -8576,46 +8576,6 @@ "zh-classical": "zh-classical" } }, - "yahoo": { - "all_locale": "any", - "custom": {}, - "data_type": "traits_v1", - "languages": { - "ar": "ar", - "bg": "bg", - "cs": "cs", - "da": "da", - "de": "de", - "el": "el", - "en": "en", - "es": "es", - "et": "et", - "fi": "fi", - "fr": "fr", - "he": "he", - "hr": "hr", - "hu": "hu", - "it": "it", - "ja": "ja", - "ko": "ko", - "lt": "lt", - "lv": "lv", - "nl": "nl", - "no": "no", - "pl": "pl", - "pt": "pt", - "ro": "ro", - "ru": "ru", - "sk": "sk", - "sl": "sl", - "sv": "sv", - "th": "th", - "tr": "tr", - "zh_Hans": "zh_chs", - "zh_Hant": "zh_cht" - }, - "regions": {} - }, "z-library": { "all_locale": "", "custom": { diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 8dba443c7..a882c6947 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -63,21 +63,52 @@ lang2domain = { } """Map language to domain""" -locale_aliases = { - 'zh': 'zh_Hans', - 'zh-HK': 'zh_Hans', - 'zh-CN': 'zh_Hans', # dead since 2015 / routed to hk.search.yahoo.com - 'zh-TW': 'zh_Hant', +yahoo_languages = { + "all": "any", + "ar": "ar", + "bg": "bg", + "cs": "cs", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "fi": "fi", + "fr": "fr", + "he": "he", + "hr": "hr", + "hu": "hu", + "it": "it", + "ja": "ja", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "nl": "nl", + "no": "no", + "pl": "pl", + "pt": "pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sv": "sv", + "th": "th", + "tr": "tr", + "zh": "zh_chs", + "zh_Hans": "zh_chs", + 'zh-CN': "zh_chs", + "zh_Hant": "zh_cht", + "zh-HK": "zh_cht", + 'zh-TW': "zh_cht", } def request(query, params): """build request""" - lang = locale_aliases.get(params['language'], None) - if not lang: - lang = params['language'].split('-')[0] - lang = traits.get_language(lang, traits.all_locale) + lang = params["language"].split("-")[0] + lang = yahoo_languages.get(lang, "any") offset = (params['pageno'] - 1) * 7 + 1 age, btf = time_range_dict.get(params['time_range'], ('', '')) @@ -154,39 +185,3 @@ def response(resp): results.append({'suggestion': extract_text(suggestion)}) return results - - -def fetch_traits(engine_traits: EngineTraits): - """Fetch languages from yahoo""" - - # pylint: disable=import-outside-toplevel - import babel - from searx import network - from searx.locales import language_tag - - engine_traits.all_locale = 'any' - - resp = network.get('https://search.yahoo.com/preferences/languages') - if not resp.ok: - print("ERROR: response from yahoo is not OK.") - - dom = html.fromstring(resp.text) - offset = len('lang_') - - eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'} - - for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): - eng_tag = val[offset:] - - try: - sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag))) - except babel.UnknownLocaleError: - print('ERROR: unknown language --> %s' % eng_tag) - continue - - conflict = engine_traits.languages.get(sxng_tag) - if conflict: - if conflict != eng_tag: - print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) - continue - engine_traits.languages[sxng_tag] = eng_tag