Merge pull request #794 from a01200356/languages
Changes in search language list (continuation of #748)
This commit is contained in:
		
						commit
						d08108be62
					
				
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -95,8 +95,13 @@ def _fetch_supported_languages(resp): | |||||||
|     dom = fromstring(resp.text) |     dom = fromstring(resp.text) | ||||||
|     links = dom.xpath('//span[@id="menu2"]/a') |     links = dom.xpath('//span[@id="menu2"]/a') | ||||||
|     for link in links: |     for link in links: | ||||||
|         code = link.xpath('./@href')[0][-2:] |         href = link.xpath('./@href')[0].split('lang%3A') | ||||||
|         if code != 'xx' and code not in supported_languages: |         if len(href) == 2: | ||||||
|  |             code = href[1].split('_') | ||||||
|  |             if len(code) == 2: | ||||||
|  |                 code = code[0] + '-' + code[1].upper() | ||||||
|  |             else: | ||||||
|  |                 code = code[0] | ||||||
|             supported_languages.append(code) |             supported_languages.append(code) | ||||||
| 
 | 
 | ||||||
|     return supported_languages |     return supported_languages | ||||||
|  | |||||||
| @ -132,7 +132,7 @@ def _fetch_supported_languages(resp): | |||||||
|             english_name = td[1].xpath('./a')[0].text |             english_name = td[1].xpath('./a')[0].text | ||||||
|             articles = int(td[4].xpath('./a/b')[0].text.replace(',', '')) |             articles = int(td[4].xpath('./a/b')[0].text.replace(',', '')) | ||||||
|             # exclude languages with too few articles |             # exclude languages with too few articles | ||||||
|             if articles >= 100000: |             if articles >= 100: | ||||||
|                 supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles} |                 supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles} | ||||||
| 
 | 
 | ||||||
|     return supported_languages |     return supported_languages | ||||||
|  | |||||||
| @ -3,28 +3,18 @@ | |||||||
| # this file is generated automatically by utils/update_search_languages.py | # this file is generated automatically by utils/update_search_languages.py | ||||||
| 
 | 
 | ||||||
| language_codes = ( | language_codes = ( | ||||||
|     (u"af", u"Afrikaans", u"", u""), |     (u"ar-SA", u"العربية", u"", u"Arabic"), | ||||||
|     (u"am", u"አማርኛ", u"", u"Amharic"), |     (u"bg-BG", u"Български", u"", u"Bulgarian"), | ||||||
|     (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"), |  | ||||||
|     (u"az", u"Azərbaycanca", u"", u"Azerbaijani"), |  | ||||||
|     (u"be", u"Беларуская", u"", u"Belarusian"), |  | ||||||
|     (u"bg-BG", u"Български", u"България", u"Bulgarian"), |  | ||||||
|     (u"bn", u"বাংলা", u"", u"Bengali"), |  | ||||||
|     (u"br", u"Brezhoneg", u"", u"Breton"), |  | ||||||
|     (u"bs", u"Bosnian", u"", u"Bosnian"), |  | ||||||
|     (u"ca", u"Català", u"", u"Catalan"), |     (u"ca", u"Català", u"", u"Catalan"), | ||||||
|     (u"ca-CT", u"Català", u"", u"Catalan"), |     (u"ca-CT", u"Català", u"", u"Catalan"), | ||||||
|     (u"ca-ES", u"Català", u"Espanya", u"Catalan"), |     (u"ca-ES", u"Català", u"Espanya", u"Catalan"), | ||||||
|     (u"ce", u"Нохчийн", u"", u"Chechen"), |     (u"cs-CZ", u"Čeština", u"", u"Czech"), | ||||||
|     (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"), |     (u"da-DK", u"Dansk", u"", u"Danish"), | ||||||
|     (u"cs-CZ", u"Čeština", u"Česko", u"Czech"), |  | ||||||
|     (u"cy", u"Cymraeg", u"", u"Welsh"), |  | ||||||
|     (u"da-DK", u"Dansk", u"Danmark", u"Danish"), |  | ||||||
|     (u"de", u"Deutsch", u"", u"German"), |     (u"de", u"Deutsch", u"", u"German"), | ||||||
|     (u"de-AT", u"Deutsch", u"Österreich", u"German"), |     (u"de-AT", u"Deutsch", u"Österreich", u"German"), | ||||||
|     (u"de-CH", u"Deutsch", u"Schweiz", u"German"), |     (u"de-CH", u"Deutsch", u"Schweiz", u"German"), | ||||||
|     (u"de-DE", u"Deutsch", u"Deutschland", u"German"), |     (u"de-DE", u"Deutsch", u"Deutschland", u"German"), | ||||||
|     (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"), |     (u"el-GR", u"Ελληνικά", u"", u"Greek"), | ||||||
|     (u"en", u"English", u"", u"English"), |     (u"en", u"English", u"", u"English"), | ||||||
|     (u"en-AU", u"English", u"Australia", u"English"), |     (u"en-AU", u"English", u"Australia", u"English"), | ||||||
|     (u"en-CA", u"English", u"Canada", u"English"), |     (u"en-CA", u"English", u"Canada", u"English"), | ||||||
| @ -38,7 +28,6 @@ language_codes = ( | |||||||
|     (u"en-SG", u"English", u"Singapore", u"English"), |     (u"en-SG", u"English", u"Singapore", u"English"), | ||||||
|     (u"en-US", u"English", u"United States", u"English"), |     (u"en-US", u"English", u"United States", u"English"), | ||||||
|     (u"en-ZA", u"English", u"South Africa", u"English"), |     (u"en-ZA", u"English", u"South Africa", u"English"), | ||||||
|     (u"eo", u"Esperanto", u"", u"Esperanto"), |  | ||||||
|     (u"es", u"Español", u"", u"Spanish"), |     (u"es", u"Español", u"", u"Spanish"), | ||||||
|     (u"es-AR", u"Español", u"Argentina", u"Spanish"), |     (u"es-AR", u"Español", u"Argentina", u"Spanish"), | ||||||
|     (u"es-CL", u"Español", u"Chile", u"Spanish"), |     (u"es-CL", u"Español", u"Chile", u"Spanish"), | ||||||
| @ -47,85 +36,43 @@ language_codes = ( | |||||||
|     (u"es-MX", u"Español", u"México", u"Spanish"), |     (u"es-MX", u"Español", u"México", u"Spanish"), | ||||||
|     (u"es-PE", u"Español", u"Perú", u"Spanish"), |     (u"es-PE", u"Español", u"Perú", u"Spanish"), | ||||||
|     (u"es-US", u"Español", u"Estados Unidos", u"Spanish"), |     (u"es-US", u"Español", u"Estados Unidos", u"Spanish"), | ||||||
|     (u"et-EE", u"Eesti", u"Eesti", u"Estonian"), |     (u"et-EE", u"Eesti", u"", u"Estonian"), | ||||||
|     (u"eu", u"Euskara", u"", u"Basque"), |     (u"fi-FI", u"Suomi", u"", u"Finnish"), | ||||||
|     (u"fa", u"فارسی", u"", u"Persian"), |  | ||||||
|     (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"), |  | ||||||
|     (u"fr", u"Français", u"", u"French"), |     (u"fr", u"Français", u"", u"French"), | ||||||
|     (u"fr-BE", u"Français", u"Belgique", u"French"), |     (u"fr-BE", u"Français", u"Belgique", u"French"), | ||||||
|     (u"fr-CA", u"Français", u"Canada", u"French"), |     (u"fr-CA", u"Français", u"Canada", u"French"), | ||||||
|     (u"fr-CH", u"Français", u"Suisse", u"French"), |     (u"fr-CH", u"Français", u"Suisse", u"French"), | ||||||
|     (u"fr-FR", u"Français", u"France", u"French"), |     (u"fr-FR", u"Français", u"France", u"French"), | ||||||
|     (u"ga", u"Gaeilge", u"", u"Irish"), |     (u"he-IL", u"עברית", u"", u"Hebrew"), | ||||||
|     (u"gl", u"Galego", u"", u"Galician"), |     (u"hr-HR", u"Hrvatski", u"", u"Croatian"), | ||||||
|     (u"gu", u"ગુજરાતી", u"", u"Gujarati"), |     (u"hu-HU", u"Magyar", u"", u"Hungarian"), | ||||||
|     (u"he-IL", u"עברית", u"ישראל", u"Hebrew"), |     (u"id-ID", u"Bahasa Indonesia", u"", u"Indonesian"), | ||||||
|     (u"hi", u"हिन्दी", u"", u"Hindi"), |  | ||||||
|     (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"), |  | ||||||
|     (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"), |  | ||||||
|     (u"hy", u"Հայերեն", u"", u"Armenian"), |  | ||||||
|     (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"), |  | ||||||
|     (u"is", u"Íslenska", u"", u""), |  | ||||||
|     (u"it", u"Italiano", u"", u"Italian"), |     (u"it", u"Italiano", u"", u"Italian"), | ||||||
|     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"), |     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"), | ||||||
|     (u"it-IT", u"Italiano", u"Italia", u"Italian"), |     (u"it-IT", u"Italiano", u"Italia", u"Italian"), | ||||||
|     (u"iw", u"עברית", u"", u""), |     (u"ja-JP", u"日本語", u"", u"Japanese"), | ||||||
|     (u"ja-JP", u"日本語", u"日本", u"Japanese"), |     (u"ko-KR", u"한국어", u"", u"Korean"), | ||||||
|     (u"ka", u"ქართული", u"", u"Georgian"), |     (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), | ||||||
|     (u"kk", u"Қазақша", u"", u"Kazakh"), |     (u"lv-LV", u"Latviešu", u"", u"Latvian"), | ||||||
|     (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"), |  | ||||||
|     (u"ko-KR", u"한국어", u"대한민국", u"Korean"), |  | ||||||
|     (u"la", u"Latina", u"", u"Latin"), |  | ||||||
|     (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"), |  | ||||||
|     (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""), |  | ||||||
|     (u"mi", u"Reo Māori", u"", u"Maori"), |  | ||||||
|     (u"min", u"Minangkabau", u"", u"Minangkabau"), |  | ||||||
|     (u"mk", u"Македонски", u"", u"Macedonian"), |  | ||||||
|     (u"mn", u"Монгол", u"", u"Mongolian"), |  | ||||||
|     (u"mr", u"मराठी", u"", u"Marathi"), |  | ||||||
|     (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"), |  | ||||||
|     (u"mt", u"Malti", u"", u"Maltese"), |  | ||||||
|     (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"), |  | ||||||
|     (u"nl", u"Nederlands", u"", u"Dutch"), |     (u"nl", u"Nederlands", u"", u"Dutch"), | ||||||
|     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), |     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), | ||||||
|     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), |     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), | ||||||
|     (u"nn", u"Nynorsk", u"", u"Norwegian"), |     (u"no-NO", u"Norsk", u"", u"Norwegian"), | ||||||
|     (u"no-NO", u"Norsk", u"Norge", u"Norwegian"), |     (u"pl-PL", u"Polski", u"", u"Polish"), | ||||||
|     (u"oc", u"Occitan", u"", u"Occitan"), |  | ||||||
|     (u"or", u"Oriya", u"", u"Oriya"), |  | ||||||
|     (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"), |  | ||||||
|     (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"), |  | ||||||
|     (u"ps", u"Pushto", u"", u"Pushto"), |  | ||||||
|     (u"pt", u"Português", u"", u"Portuguese"), |     (u"pt", u"Português", u"", u"Portuguese"), | ||||||
|     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), |     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), | ||||||
|     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), |     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), | ||||||
|     (u"ro-RO", u"Română", u"România", u"Romanian"), |     (u"ro-RO", u"Română", u"", u"Romanian"), | ||||||
|     (u"ru-RU", u"Русский", u"Россия", u"Russian"), |     (u"ru-RU", u"Русский", u"", u"Russian"), | ||||||
|     (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"), |     (u"sk-SK", u"Slovenčina", u"", u"Slovak"), | ||||||
|     (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"), |  | ||||||
|     (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"), |  | ||||||
|     (u"sl", u"Slovenščina", u"", u"Slovenian"), |     (u"sl", u"Slovenščina", u"", u"Slovenian"), | ||||||
|     (u"sr", u"Српски / Srpski", u"", u"Serbian"), |     (u"sv-SE", u"Svenska", u"", u"Swedish"), | ||||||
|     (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"), |     (u"th-TH", u"ไทย", u"", u"Thai"), | ||||||
|     (u"sw", u"Kiswahili", u"", u""), |     (u"tr-TR", u"Türkçe", u"", u"Turkish"), | ||||||
|     (u"ta", u"தமிழ்", u"", u"Tamil"), |     (u"uk-UA", u"Українська", u"", u"Ukrainian"), | ||||||
|     (u"th-TH", u"ไทย", u"ไทย", u"Thai"), |     (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), | ||||||
|     (u"ti", u"ትግርኛ", u"", u"Tigrinya"), |  | ||||||
|     (u"tl-PH", u"Filipino", u"Pilipinas", u""), |  | ||||||
|     (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"), |  | ||||||
|     (u"tt", u"Татарча", u"", u"Tatar"), |  | ||||||
|     (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"), |  | ||||||
|     (u"ur", u"اردو", u"", u"Urdu"), |  | ||||||
|     (u"uz", u"O‘zbek", u"", u"Uzbek"), |  | ||||||
|     (u"ve", u"Venda", u"", u"Venda"), |  | ||||||
|     (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"), |  | ||||||
|     (u"vo", u"Volapük", u"", u"Volapük"), |  | ||||||
|     (u"wa", u"Walon", u"", u"Walloon"), |  | ||||||
|     (u"war", u"Winaray", u"", u"Waray-Waray"), |  | ||||||
|     (u"xh", u"Xhosa", u"", u"Xhosa"), |  | ||||||
|     (u"zh", u"中文", u"", u"Chinese"), |     (u"zh", u"中文", u"", u"Chinese"), | ||||||
|     (u"zh-CN", u"中文", u"中国", u"Chinese"), |     (u"zh-CN", u"中文", u"中国", u"Chinese"), | ||||||
|     (u"zh-HK", u"中文", u"香港", u"Chinese"), |     (u"zh-HK", u"中文", u"香港", u"Chinese"), | ||||||
|     (u"zh-TW", u"中文", u"台湾", u"Chinese"), |     (u"zh-TW", u"中文", u"台湾", u"Chinese") | ||||||
|     (u"zu", u"Isi-Zulu", u"", u"Zulu") |  | ||||||
| ) | ) | ||||||
|  | |||||||
| @ -24,6 +24,8 @@ from searx.engines import ( | |||||||
| import string | import string | ||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
|  | VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$') | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class RawTextQuery(object): | class RawTextQuery(object): | ||||||
|     """parse raw text query (the value from the html input)""" |     """parse raw text query (the value from the html input)""" | ||||||
| @ -68,6 +70,11 @@ class RawTextQuery(object): | |||||||
|             if query_part[0] == ':': |             if query_part[0] == ':': | ||||||
|                 lang = query_part[1:].lower() |                 lang = query_part[1:].lower() | ||||||
| 
 | 
 | ||||||
|  |                 # user may set a valid, yet not selectable language | ||||||
|  |                 if VALID_LANGUAGE_CODE.match(lang): | ||||||
|  |                     self.languages.append(lang) | ||||||
|  |                     parse_next = True | ||||||
|  | 
 | ||||||
|                 # check if any language-code is equal with |                 # check if any language-code is equal with | ||||||
|                 # declared language-codes |                 # declared language-codes | ||||||
|                 for lc in language_codes: |                 for lc in language_codes: | ||||||
|  | |||||||
| @ -148,7 +148,7 @@ | |||||||
| 				    <th>{{ _("Allow") }}</th> | 				    <th>{{ _("Allow") }}</th> | ||||||
| 				    <th>{{ _("Engine name") }}</th> | 				    <th>{{ _("Engine name") }}</th> | ||||||
| 				    <th>{{ _("Shortcut") }}</th> | 				    <th>{{ _("Shortcut") }}</th> | ||||||
| 				    <th>{{ _("Language support") }}</th> | 				    <th>{{ _("Supports selected language") }}</th> | ||||||
| 				    <th>{{ _("SafeSearch") }}</th> | 				    <th>{{ _("SafeSearch") }}</th> | ||||||
| 				    <th>{{ _("Time range") }}</th> | 				    <th>{{ _("Time range") }}</th> | ||||||
| 				    <th>{{ _("Avg. time") }}</th> | 				    <th>{{ _("Avg. time") }}</th> | ||||||
| @ -157,7 +157,7 @@ | |||||||
| 				    <th>{{ _("Max time") }}</th> | 				    <th>{{ _("Max time") }}</th> | ||||||
| 				    <th>{{ _("Avg. time") }}</th> | 				    <th>{{ _("Avg. time") }}</th> | ||||||
| 				    <th>{{ _("SafeSearch") }}</th> | 				    <th>{{ _("SafeSearch") }}</th> | ||||||
| 				    <th>{{ _("Language support") }}</th> | 				    <th>{{ _("Supports selected language") }}</th> | ||||||
| 				    <th>{{ _("Shortcut") }}</th> | 				    <th>{{ _("Shortcut") }}</th> | ||||||
| 				    <th>{{ _("Engine name") }}</th> | 				    <th>{{ _("Engine name") }}</th> | ||||||
| 				    <th>{{ _("Allow") }}</th> | 				    <th>{{ _("Allow") }}</th> | ||||||
| @ -172,7 +172,7 @@ | |||||||
|                                     </td> |                                     </td> | ||||||
|                                     <th>{{ search_engine.name }}</th> |                                     <th>{{ search_engine.name }}</th> | ||||||
| 				    <td>{{ shortcuts[search_engine.name] }}</td> | 				    <td>{{ shortcuts[search_engine.name] }}</td> | ||||||
| 				    <td><input type="checkbox" {{ "checked" if search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td> | 				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
| 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> | 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
| 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td> | 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
| 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> | 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> | ||||||
|  | |||||||
| @ -102,10 +102,10 @@ Change search language | |||||||
|     Page Should Contain  preferences |     Page Should Contain  preferences | ||||||
|     Go To  http://localhost:11111/preferences |     Go To  http://localhost:11111/preferences | ||||||
|     List Selection Should Be  language  Default language |     List Selection Should Be  language  Default language | ||||||
|     Select From List  language  Türkçe (Türkiye) - tr-TR |     Select From List  language  Türkçe - tr-TR | ||||||
|     Submit Preferences |     Submit Preferences | ||||||
|     Go To  http://localhost:11111/preferences |     Go To  http://localhost:11111/preferences | ||||||
|     List Selection Should Be  language  Türkçe (Türkiye) - tr-TR |     List Selection Should Be  language  Türkçe - tr-TR | ||||||
| 
 | 
 | ||||||
| Change autocomplete | Change autocomplete | ||||||
|     Page Should Contain  about |     Page Should Contain  about | ||||||
|  | |||||||
| @ -103,7 +103,9 @@ class TestGigablastEngine(SearxTestCase): | |||||||
|                 <span id="menu2"> |                 <span id="menu2"> | ||||||
|                     <a href="/search?&rxikd=1&qlang=xx"></a> |                     <a href="/search?&rxikd=1&qlang=xx"></a> | ||||||
|                     <a href="/search?&rxikd=1&qlang=en"></a> |                     <a href="/search?&rxikd=1&qlang=en"></a> | ||||||
|                     <a href="/search?&rxikd=1&qlang=fr"></a> |                     <a href="/search?&rxikd=1&prepend=gblang%3Aen"></a> | ||||||
|  |                     <a href="/search?&rxikd=1&qlang=zh_"></a> | ||||||
|  |                     <a href="/search?&rxikd=1&prepend=gblang%3Azh_tw"></a> | ||||||
|                 </span> |                 </span> | ||||||
|             </body> |             </body> | ||||||
|         </html> |         </html> | ||||||
| @ -113,4 +115,4 @@ class TestGigablastEngine(SearxTestCase): | |||||||
|         self.assertEqual(type(languages), list) |         self.assertEqual(type(languages), list) | ||||||
|         self.assertEqual(len(languages), 2) |         self.assertEqual(len(languages), 2) | ||||||
|         self.assertIn('en', languages) |         self.assertIn('en', languages) | ||||||
|         self.assertIn('fr', languages) |         self.assertIn('zh-TW', languages) | ||||||
|  | |||||||
| @ -25,7 +25,6 @@ engines_languages_file = 'engines_languages.json' | |||||||
| languages_file = 'languages.py' | languages_file = 'languages.py' | ||||||
| 
 | 
 | ||||||
| engines_languages = {} | engines_languages = {} | ||||||
| languages = {} |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # To filter out invalid codes and dialects. | # To filter out invalid codes and dialects. | ||||||
| @ -93,16 +92,15 @@ def fetch_supported_languages(): | |||||||
| # Join all language lists. | # Join all language lists. | ||||||
| # Iterate all languages supported by each engine. | # Iterate all languages supported by each engine. | ||||||
| def join_language_lists(): | def join_language_lists(): | ||||||
|  |     global languages | ||||||
|     # include wikipedia first for more accurate language names |     # include wikipedia first for more accurate language names | ||||||
|     languages.update({code: lang for code, lang |     languages = {code: lang for code, lang | ||||||
|                  in engines_languages['wikipedia'].iteritems() |                  in engines_languages['wikipedia'].iteritems() | ||||||
|                       if valid_code(code)}) |                  if valid_code(code)} | ||||||
| 
 | 
 | ||||||
|     for engine_name in engines_languages: |     for engine_name in engines_languages: | ||||||
|         for locale in engines_languages[engine_name]: |         for locale in engines_languages[engine_name]: | ||||||
|             if not valid_code(locale): |             if valid_code(locale): | ||||||
|                 continue |  | ||||||
| 
 |  | ||||||
|                 # if language is not on list or if it has no name yet |                 # if language is not on list or if it has no name yet | ||||||
|                 if locale not in languages or not languages[locale].get('name'): |                 if locale not in languages or not languages[locale].get('name'): | ||||||
|                     if isinstance(engines_languages[engine_name], dict): |                     if isinstance(engines_languages[engine_name], dict): | ||||||
| @ -110,6 +108,21 @@ def join_language_lists(): | |||||||
|                     else: |                     else: | ||||||
|                         languages[locale] = {} |                         languages[locale] = {} | ||||||
| 
 | 
 | ||||||
|  |             # add to counter of engines that support given language | ||||||
|  |             lang = locale.split('-')[0] | ||||||
|  |             if lang in languages: | ||||||
|  |                 if 'counter' not in languages[lang]: | ||||||
|  |                     languages[lang]['counter'] = [engine_name] | ||||||
|  |                 elif engine_name not in languages[lang]['counter']: | ||||||
|  |                     languages[lang]['counter'].append(engine_name) | ||||||
|  | 
 | ||||||
|  |     # filter list to include only languages supported by most engines | ||||||
|  |     min_supported_engines = int(0.75 * len(engines_languages)) | ||||||
|  |     languages = {code: lang for code, lang | ||||||
|  |                  in languages.iteritems() | ||||||
|  |                  if len(lang.get('counter', [])) >= min_supported_engines or | ||||||
|  |                  len(languages.get(code.split('-')[0], {}).get('counter', [])) >= min_supported_engines} | ||||||
|  | 
 | ||||||
|     # get locales that have no name or country yet |     # get locales that have no name or country yet | ||||||
|     for locale in languages.keys(): |     for locale in languages.keys(): | ||||||
|         # try to get language names |         # try to get language names | ||||||
| @ -134,6 +147,7 @@ def join_language_lists(): | |||||||
| # Remove countryless language if language is featured in only one country. | # Remove countryless language if language is featured in only one country. | ||||||
| def filter_single_country_languages(): | def filter_single_country_languages(): | ||||||
|     prev_lang = None |     prev_lang = None | ||||||
|  |     prev_code = None | ||||||
|     for code in sorted(languages): |     for code in sorted(languages): | ||||||
|         lang = code.split('-')[0] |         lang = code.split('-')[0] | ||||||
|         if lang == prev_lang: |         if lang == prev_lang: | ||||||
| @ -141,8 +155,10 @@ def filter_single_country_languages(): | |||||||
|         else: |         else: | ||||||
|             if prev_lang is not None and countries == 1: |             if prev_lang is not None and countries == 1: | ||||||
|                 del languages[prev_lang] |                 del languages[prev_lang] | ||||||
|  |                 languages[prev_code]['country'] = '' | ||||||
|             countries = 0 |             countries = 0 | ||||||
|             prev_lang = lang |             prev_lang = lang | ||||||
|  |         prev_code = code | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Write languages.py. | # Write languages.py. | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Adam Tauber
						Adam Tauber