Merge pull request #676 from return42/fix-bing-lang
Fix issues on running update_languages.py
This commit is contained in:
		
						commit
						21d7c8b367
					
				| @ -193,7 +193,7 @@ def set_language_attributes(engine): | |||||||
|     if hasattr(engine, '_fetch_supported_languages'): |     if hasattr(engine, '_fetch_supported_languages'): | ||||||
|         headers = { |         headers = { | ||||||
|             'User-Agent': gen_useragent(), |             'User-Agent': gen_useragent(), | ||||||
|             'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3',  # bing needs a non-English language |             'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language | ||||||
|         } |         } | ||||||
|         engine.fetch_supported_languages = ( |         engine.fetch_supported_languages = ( | ||||||
|             # pylint: disable=protected-access |             # pylint: disable=protected-access | ||||||
|  | |||||||
| @ -6,7 +6,7 @@ | |||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| import re | import re | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode, urlparse, parse_qs | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.utils import eval_xpath, extract_text, match_language | from searx.utils import eval_xpath, extract_text, match_language | ||||||
| 
 | 
 | ||||||
| @ -25,7 +25,7 @@ paging = True | |||||||
| time_range_support = False | time_range_support = False | ||||||
| safesearch = False | safesearch = False | ||||||
| supported_languages_url = 'https://www.bing.com/account/general' | supported_languages_url = 'https://www.bing.com/account/general' | ||||||
| language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'} | language_aliases = {} | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| base_url = 'https://www.bing.com/' | base_url = 'https://www.bing.com/' | ||||||
| @ -127,18 +127,27 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
| # get supported languages from their site | # get supported languages from their site | ||||||
| def _fetch_supported_languages(resp): | def _fetch_supported_languages(resp): | ||||||
|  | 
 | ||||||
|     lang_tags = set() |     lang_tags = set() | ||||||
| 
 | 
 | ||||||
|     setmkt = re.compile('setmkt=([^&]*)') |  | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
|     lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]") |     lang_links = eval_xpath(dom, '//div[@id="language-section"]//li') | ||||||
| 
 | 
 | ||||||
|     for a in lang_links: |     for _li in lang_links: | ||||||
|         href = eval_xpath(a, './@href')[0] | 
 | ||||||
|         match = setmkt.search(href) |         href = eval_xpath(_li, './/@href')[0] | ||||||
|         l_tag = match.groups()[0] |         (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href) | ||||||
|         _lang, _nation = l_tag.split('-', 1) |         query = parse_qs(query, keep_blank_values=True) | ||||||
|         l_tag = _lang.lower() + '-' + _nation.upper() | 
 | ||||||
|         lang_tags.add(l_tag) |         # fmt: off | ||||||
|  |         setlang = query.get('setlang', [None, ])[0] | ||||||
|  |         # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN'] | ||||||
|  |         lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2]  # fmt: skip | ||||||
|  |         # fmt: on | ||||||
|  | 
 | ||||||
|  |         if not nation: | ||||||
|  |             nation = lang.upper() | ||||||
|  |         tag = lang + '-' + nation | ||||||
|  |         lang_tags.add(tag) | ||||||
| 
 | 
 | ||||||
|     return list(lang_tags) |     return list(lang_tags) | ||||||
|  | |||||||
| @ -35,6 +35,8 @@ def fetch_supported_languages(): | |||||||
|             if type(engines_languages[engine_name]) == list: |             if type(engines_languages[engine_name]) == list: | ||||||
|                 engines_languages[engine_name] = sorted(engines_languages[engine_name]) |                 engines_languages[engine_name] = sorted(engines_languages[engine_name]) | ||||||
| 
 | 
 | ||||||
|  |     print("fetched languages from %s engines" % len(engines_languages)) | ||||||
|  | 
 | ||||||
|     # write json file |     # write json file | ||||||
|     with open(engines_languages_file, 'w', encoding='utf-8') as f: |     with open(engines_languages_file, 'w', encoding='utf-8') as f: | ||||||
|         json.dump(engines_languages, f, indent=2, sort_keys=True) |         json.dump(engines_languages, f, indent=2, sort_keys=True) | ||||||
| @ -97,7 +99,11 @@ def join_language_lists(engines_languages): | |||||||
|                 country_name = '' |                 country_name = '' | ||||||
|                 if locale: |                 if locale: | ||||||
|                     # get country name from babel's Locale object |                     # get country name from babel's Locale object | ||||||
|                     country_name = locale.get_territory_name() |                     try: | ||||||
|  |                         country_name = locale.get_territory_name() | ||||||
|  |                     except FileNotFoundError as exc: | ||||||
|  |                         print("ERROR: %s --> %s" % (locale, exc)) | ||||||
|  |                         locale = None | ||||||
| 
 | 
 | ||||||
|                 language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} |                 language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} | ||||||
| 
 | 
 | ||||||
| @ -186,17 +192,24 @@ def write_languages_file(languages): | |||||||
|         "language_codes =", |         "language_codes =", | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     language_codes = tuple( |     language_codes = [] | ||||||
|         [ | 
 | ||||||
|             ( |     for code in sorted(languages): | ||||||
|                 code, | 
 | ||||||
|                 languages[code]['name'].split(' (')[0], |         name = languages[code]['name'] | ||||||
|                 languages[code].get('country_name') or '', |         if name is None: | ||||||
|                 languages[code].get('english_name') or '', |             print("ERROR: languages['%s'] --> %s" % (code, languages[code])) | ||||||
|             ) |             continue | ||||||
|             for code in sorted(languages) |         item = ( | ||||||
|         ] |             code, | ||||||
|     ) |             languages[code]['name'].split(' (')[0], | ||||||
|  |             languages[code].get('country_name') or '', | ||||||
|  |             languages[code].get('english_name') or '', | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         language_codes.append(item) | ||||||
|  | 
 | ||||||
|  |     language_codes = tuple(language_codes) | ||||||
| 
 | 
 | ||||||
|     with open(languages_file, 'w') as new_file: |     with open(languages_file, 'w') as new_file: | ||||||
|         file_content = "{file_headers} \\\n{language_codes}".format( |         file_content = "{file_headers} \\\n{language_codes}".format( | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Markus Heiser
						Markus Heiser