From ce5be98f3951db94c5465e1ae4e395ccf5bc7816 Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Fri, 28 Jan 2022 10:55:25 +0100 Subject: [PATCH] [refactor] introduce searx.utils.find_language_aliases --- searx/engines/__init__.py | 17 ++--------------- searx/utils.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index a9e7702dc..ef01209c5 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -16,19 +16,14 @@ import copy from typing import Dict, List, Optional from os.path import realpath, dirname -from babel.localedata import locale_identifiers from searx import logger, settings from searx.data import ENGINES_LANGUAGES from searx.network import get -from searx.utils import load_module, match_language, gen_useragent +from searx.utils import load_module, gen_useragent, find_language_aliases logger = logger.getChild('engines') ENGINE_DIR = dirname(realpath(__file__)) -BABEL_LANGS = [ - lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] - for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) -] ENGINE_DEFAULT_ARGS = { "engine_type": "online", "inactive": False, @@ -206,15 +201,7 @@ def set_language_attributes(engine: ConfiguredEngine): engine.supported_languages = [engine.language] # find custom aliases for non standard language codes - for engine_lang in engine.supported_languages: - iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) - if ( - iso_lang - and iso_lang != engine_lang - and not engine_lang.startswith(iso_lang) - and iso_lang not in engine.supported_languages - ): - engine.language_aliases[iso_lang] = engine_lang + engine.language_aliases.update(find_language_aliases(engine.supported_languages)) # language_support engine.language_support = len(engine.supported_languages) > 0 diff --git a/searx/utils.py b/searx/utils.py index d44bb73ea..e1992fd14 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -6,11 +6,13 @@ from numbers import Number from os.path import splitext, join from random import choice from html.parser import HTMLParser +from typing import Collection, Dict from urllib.parse import urljoin, urlparse from lxml import html from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult from babel.core import get_global +from babel.localedata import locale_identifiers from searx import settings @@ -438,6 +440,26 @@ def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US return language or fallback +_BABEL_LANGS = [ + lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] + for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) +] + + +def find_language_aliases(supported_languages: Collection[str]) -> Dict[str, str]: + aliases = {} + for engine_lang in supported_languages: + iso_lang = match_language(engine_lang, _BABEL_LANGS, fallback=None) + if ( + iso_lang + and iso_lang != engine_lang + and not engine_lang.startswith(iso_lang) + and iso_lang not in supported_languages + ): + aliases[iso_lang] = engine_lang + return aliases + + def load_module(filename, module_dir): modname = splitext(filename)[0] filepath = join(module_dir, filename)