Merge pull request #748 from a01200356/languages
[mod] Allow users to search in most engine supported languages
This commit is contained in:
		
						commit
						9743bde25e
					
				| @ -43,7 +43,7 @@ generally made searx better: | |||||||
| - Kang-min Liu | - Kang-min Liu | ||||||
| - Kirill Isakov | - Kirill Isakov | ||||||
| - Guilhem Bonnefille | - Guilhem Bonnefille | ||||||
| - Marc Abonce Seguin | - Marc Abonce Seguin @a01200356 | ||||||
| - @jibe-b | - @jibe-b | ||||||
| - Christian Pietsch @pietsch | - Christian Pietsch @pietsch | ||||||
| - @Maxqia | - @Maxqia | ||||||
| @ -55,7 +55,6 @@ generally made searx better: | |||||||
| - Ammar Najjar @ammarnajjar | - Ammar Najjar @ammarnajjar | ||||||
| - @stepshal | - @stepshal | ||||||
| - François Revol @mmuman | - François Revol @mmuman | ||||||
| - marc @a01200356 |  | ||||||
| - Harry Wood @harry-wood | - Harry Wood @harry-wood | ||||||
| - Thomas Renard @threnard | - Thomas Renard @threnard | ||||||
| - Pydo `<https://github.com/pydo>`_ | - Pydo `<https://github.com/pydo>`_ | ||||||
|  | |||||||
| @ -81,17 +81,17 @@ def searx_bang(full_query): | |||||||
|             engine_query = full_query.getSearchQuery()[1:] |             engine_query = full_query.getSearchQuery()[1:] | ||||||
| 
 | 
 | ||||||
|             for lc in language_codes: |             for lc in language_codes: | ||||||
|                 lang_id, lang_name, country = map(str.lower, lc) |                 lang_id, lang_name, country, english_name = map(str.lower, lc) | ||||||
| 
 | 
 | ||||||
|                 # check if query starts with language-id |                 # check if query starts with language-id | ||||||
|                 if lang_id.startswith(engine_query): |                 if lang_id.startswith(engine_query): | ||||||
|                     if len(engine_query) <= 2: |                     if len(engine_query) <= 2: | ||||||
|                         results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0])) |                         results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0])) | ||||||
|                     else: |                     else: | ||||||
|                         results.append(':{lang_id}'.format(lang_id=lang_id)) |                         results.append(':{lang_id}'.format(lang_id=lang_id)) | ||||||
| 
 | 
 | ||||||
|                 # check if query starts with language name |                 # check if query starts with language name | ||||||
|                 if lang_name.startswith(engine_query): |                 if lang_name.startswith(engine_query) or english_name.startswith(engine_query): | ||||||
|                     results.append(':{lang_name}'.format(lang_name=lang_name)) |                     results.append(':{lang_name}'.format(lang_name=lang_name)) | ||||||
| 
 | 
 | ||||||
|                 # check if query starts with country |                 # check if query starts with country | ||||||
|  | |||||||
							
								
								
									
										1
									
								
								searx/data/engines_languages.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								searx/data/engines_languages.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -20,6 +20,8 @@ from os.path import realpath, dirname | |||||||
| import sys | import sys | ||||||
| from flask_babel import gettext | from flask_babel import gettext | ||||||
| from operator import itemgetter | from operator import itemgetter | ||||||
|  | from json import loads | ||||||
|  | from requests import get | ||||||
| from searx import settings | from searx import settings | ||||||
| from searx import logger | from searx import logger | ||||||
| from searx.utils import load_module | from searx.utils import load_module | ||||||
| @ -33,10 +35,13 @@ engines = {} | |||||||
| 
 | 
 | ||||||
| categories = {'general': []} | categories = {'general': []} | ||||||
| 
 | 
 | ||||||
|  | languages = loads(open(engine_dir + '/../data/engines_languages.json').read()) | ||||||
|  | 
 | ||||||
| engine_shortcuts = {} | engine_shortcuts = {} | ||||||
| engine_default_args = {'paging': False, | engine_default_args = {'paging': False, | ||||||
|                        'categories': ['general'], |                        'categories': ['general'], | ||||||
|                        'language_support': True, |                        'language_support': True, | ||||||
|  |                        'supported_languages': [], | ||||||
|                        'safesearch': False, |                        'safesearch': False, | ||||||
|                        'timeout': settings['outgoing']['request_timeout'], |                        'timeout': settings['outgoing']['request_timeout'], | ||||||
|                        'shortcut': '-', |                        'shortcut': '-', | ||||||
| @ -85,6 +90,15 @@ def load_engine(engine_data): | |||||||
|                          .format(engine.name, engine_attr)) |                          .format(engine.name, engine_attr)) | ||||||
|             sys.exit(1) |             sys.exit(1) | ||||||
| 
 | 
 | ||||||
|  |     # assign supported languages from json file | ||||||
|  |     if engine_data['name'] in languages: | ||||||
|  |         setattr(engine, 'supported_languages', languages[engine_data['name']]) | ||||||
|  | 
 | ||||||
|  |     # assign language fetching method if auxiliary method exists | ||||||
|  |     if hasattr(engine, '_fetch_supported_languages'): | ||||||
|  |         setattr(engine, 'fetch_supported_languages', | ||||||
|  |                 lambda: engine._fetch_supported_languages(get(engine.supported_languages_url))) | ||||||
|  | 
 | ||||||
|     engine.stats = { |     engine.stats = { | ||||||
|         'result_count': 0, |         'result_count': 0, | ||||||
|         'search_count': 0, |         'search_count': 0, | ||||||
|  | |||||||
| @ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a' | |||||||
| 
 | 
 | ||||||
| # cut 'en' from 'en_US', 'de' from 'de_CH', and so on | # cut 'en' from 'en_US', 'de' from 'de_CH', and so on | ||||||
| def locale_to_lang_code(locale): | def locale_to_lang_code(locale): | ||||||
|     if locale.find('_') >= 0: |     if locale.find('-') >= 0: | ||||||
|         locale = locale.split('_')[0] |         locale = locale.split('-')[0] | ||||||
|     return locale |     return locale | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -95,6 +95,7 @@ main_langs = { | |||||||
|     'uk': 'Українська', |     'uk': 'Українська', | ||||||
|     'zh': '简体中文' |     'zh': '简体中文' | ||||||
| } | } | ||||||
|  | supported_languages = dict(lang_urls, **main_langs) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
|  | |||||||
| @ -21,6 +21,7 @@ from searx.engines.xpath import extract_text | |||||||
| categories = ['general'] | categories = ['general'] | ||||||
| paging = True | paging = True | ||||||
| language_support = True | language_support = True | ||||||
|  | supported_languages_url = 'https://www.bing.com/account/general' | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| base_url = 'https://www.bing.com/' | base_url = 'https://www.bing.com/' | ||||||
| @ -32,7 +33,7 @@ def request(query, params): | |||||||
|     offset = (params['pageno'] - 1) * 10 + 1 |     offset = (params['pageno'] - 1) * 10 + 1 | ||||||
| 
 | 
 | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         query = u'language:{} {}'.format(params['language'].split('_')[0].upper(), |         query = u'language:{} {}'.format(params['language'].split('-')[0].upper(), | ||||||
|                                          query.decode('utf-8')).encode('utf-8') |                                          query.decode('utf-8')).encode('utf-8') | ||||||
| 
 | 
 | ||||||
|     search_path = search_string.format( |     search_path = search_string.format( | ||||||
| @ -81,3 +82,15 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = [] | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  |     options = dom.xpath('//div[@id="limit-languages"]//input') | ||||||
|  |     for option in options: | ||||||
|  |         code = option.xpath('./@id')[0].replace('_', '-') | ||||||
|  |         supported_languages.append(code) | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -19,6 +19,7 @@ from urllib import urlencode | |||||||
| from lxml import html | from lxml import html | ||||||
| from json import loads | from json import loads | ||||||
| import re | import re | ||||||
|  | from searx.engines.bing import _fetch_supported_languages, supported_languages_url | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['images'] | categories = ['images'] | ||||||
| @ -53,7 +54,7 @@ def request(query, params): | |||||||
|     if params['language'] == 'all': |     if params['language'] == 'all': | ||||||
|         language = 'en-US' |         language = 'en-US' | ||||||
|     else: |     else: | ||||||
|         language = params['language'].replace('_', '-') |         language = params['language'] | ||||||
| 
 | 
 | ||||||
|     search_path = search_string.format( |     search_path = search_string.format( | ||||||
|         query=urlencode({'q': query}), |         query=urlencode({'q': query}), | ||||||
|  | |||||||
| @ -17,6 +17,7 @@ from datetime import datetime | |||||||
| from dateutil import parser | from dateutil import parser | ||||||
| from lxml import etree | from lxml import etree | ||||||
| from searx.utils import list_get | from searx.utils import list_get | ||||||
|  | from searx.engines.bing import _fetch_supported_languages, supported_languages_url | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['news'] | categories = ['news'] | ||||||
| @ -74,7 +75,7 @@ def request(query, params): | |||||||
|     if params['language'] == 'all': |     if params['language'] == 'all': | ||||||
|         language = 'en-US' |         language = 'en-US' | ||||||
|     else: |     else: | ||||||
|         language = params['language'].replace('_', '-') |         language = params['language'] | ||||||
| 
 | 
 | ||||||
|     params['url'] = _get_url(query, language, offset, params['time_range']) |     params['url'] = _get_url(query, language, offset, params['time_range']) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -15,6 +15,7 @@ | |||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from json import loads | from json import loads | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
|  | from requests import get | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['videos'] | categories = ['videos'] | ||||||
| @ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr | |||||||
| embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\ | embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\ | ||||||
|     'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' |     'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' | ||||||
| 
 | 
 | ||||||
|  | supported_languages_url = 'https://api.dailymotion.com/languages' | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
| @ -74,3 +77,22 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = {} | ||||||
|  | 
 | ||||||
|  |     response_json = loads(resp.text) | ||||||
|  | 
 | ||||||
|  |     for language in response_json['list']: | ||||||
|  |         supported_languages[language['code']] = {} | ||||||
|  | 
 | ||||||
|  |         name = language['native_name'] | ||||||
|  |         if name: | ||||||
|  |             supported_languages[language['code']]['name'] = name | ||||||
|  |         english_name = language['name'] | ||||||
|  |         if english_name: | ||||||
|  |             supported_languages[language['code']]['english_name'] = english_name | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -15,13 +15,15 @@ | |||||||
| 
 | 
 | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from lxml.html import fromstring | from lxml.html import fromstring | ||||||
|  | from requests import get | ||||||
|  | from json import loads | ||||||
| from searx.engines.xpath import extract_text | from searx.engines.xpath import extract_text | ||||||
| from searx.languages import language_codes |  | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['general'] | categories = ['general'] | ||||||
| paging = True | paging = True | ||||||
| language_support = True | language_support = True | ||||||
|  | supported_languages_url = 'https://duckduckgo.com/d2030.js' | ||||||
| time_range_support = True | time_range_support = True | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| @ -46,19 +48,31 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     offset = (params['pageno'] - 1) * 30 |     offset = (params['pageno'] - 1) * 30 | ||||||
| 
 | 
 | ||||||
|  |     # custom fixes for languages | ||||||
|     if params['language'] == 'all': |     if params['language'] == 'all': | ||||||
|         locale = None |         locale = None | ||||||
|  |     elif params['language'][:2] == 'ja': | ||||||
|  |         locale = 'jp-jp' | ||||||
|  |     elif params['language'][:2] == 'sl': | ||||||
|  |         locale = 'sl-sl' | ||||||
|  |     elif params['language'] == 'zh-TW': | ||||||
|  |         locale = 'tw-tzh' | ||||||
|  |     elif params['language'] == 'zh-HK': | ||||||
|  |         locale = 'hk-tzh' | ||||||
|  |     elif params['language'][-2:] == 'SA': | ||||||
|  |         locale = 'xa-' + params['language'].split('-')[0] | ||||||
|  |     elif params['language'][-2:] == 'GB': | ||||||
|  |         locale = 'uk-' + params['language'].split('-')[0] | ||||||
|     else: |     else: | ||||||
|         locale = params['language'].split('_') |         locale = params['language'].split('-') | ||||||
|         if len(locale) == 2: |         if len(locale) == 2: | ||||||
|             # country code goes first |             # country code goes first | ||||||
|             locale = locale[1].lower() + '-' + locale[0].lower() |             locale = locale[1].lower() + '-' + locale[0].lower() | ||||||
|         else: |         else: | ||||||
|             # tries to get a country code from language |             # tries to get a country code from language | ||||||
|             locale = locale[0].lower() |             locale = locale[0].lower() | ||||||
|             lang_codes = [x[0] for x in language_codes] |             for lc in supported_languages: | ||||||
|             for lc in lang_codes: |                 lc = lc.split('-') | ||||||
|                 lc = lc.split('_') |  | ||||||
|                 if locale == lc[0]: |                 if locale == lc[0]: | ||||||
|                     locale = lc[1].lower() + '-' + lc[0].lower() |                     locale = lc[1].lower() + '-' + lc[0].lower() | ||||||
|                     break |                     break | ||||||
| @ -102,3 +116,17 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  | 
 | ||||||
|  |     # response is a js file with regions as an embedded object | ||||||
|  |     response_page = resp.text | ||||||
|  |     response_page = response_page[response_page.find('regions:{') + 8:] | ||||||
|  |     response_page = response_page[:response_page.find('}') + 1] | ||||||
|  | 
 | ||||||
|  |     regions_json = loads(response_page) | ||||||
|  |     supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -4,6 +4,7 @@ from re import compile, sub | |||||||
| from lxml import html | from lxml import html | ||||||
| from searx.utils import html_to_text | from searx.utils import html_to_text | ||||||
| from searx.engines.xpath import extract_text | from searx.engines.xpath import extract_text | ||||||
|  | from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url | ||||||
| 
 | 
 | ||||||
| url = 'https://api.duckduckgo.com/'\ | url = 'https://api.duckduckgo.com/'\ | ||||||
|     + '?{query}&format=json&pretty=0&no_redirect=1&d=1' |     + '?{query}&format=json&pretty=0&no_redirect=1&d=1' | ||||||
| @ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult): | |||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     params['url'] = url.format(query=urlencode({'q': query})) |     params['url'] = url.format(query=urlencode({'q': query})) | ||||||
|     params['headers']['Accept-Language'] = params['language'] |     params['headers']['Accept-Language'] = params['language'].split('-')[0] | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -14,6 +14,7 @@ from json import loads | |||||||
| from random import randint | from random import randint | ||||||
| from time import time | from time import time | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
|  | from lxml.html import fromstring | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['general'] | categories = ['general'] | ||||||
| @ -40,6 +41,8 @@ url_xpath = './/url' | |||||||
| title_xpath = './/title' | title_xpath = './/title' | ||||||
| content_xpath = './/sum' | content_xpath = './/sum' | ||||||
| 
 | 
 | ||||||
|  | supported_languages_url = 'https://gigablast.com/search?&rxikd=1' | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
| @ -48,7 +51,9 @@ def request(query, params): | |||||||
|     if params['language'] == 'all': |     if params['language'] == 'all': | ||||||
|         language = 'xx' |         language = 'xx' | ||||||
|     else: |     else: | ||||||
|         language = params['language'][0:2] |         language = params['language'].replace('-', '_').lower() | ||||||
|  |         if language.split('-')[0] != 'zh': | ||||||
|  |             language = language.split('-')[0] | ||||||
| 
 | 
 | ||||||
|     if params['safesearch'] >= 1: |     if params['safesearch'] >= 1: | ||||||
|         safesearch = 1 |         safesearch = 1 | ||||||
| @ -82,3 +87,16 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = [] | ||||||
|  |     dom = fromstring(resp.text) | ||||||
|  |     links = dom.xpath('//span[@id="menu2"]/a') | ||||||
|  |     for link in links: | ||||||
|  |         code = link.xpath('./@href')[0][-2:] | ||||||
|  |         if code != 'xx' and code not in supported_languages: | ||||||
|  |             supported_languages.append(code) | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -103,6 +103,7 @@ map_hostname_start = 'maps.google.' | |||||||
| maps_path = '/maps' | maps_path = '/maps' | ||||||
| redirect_path = '/url' | redirect_path = '/url' | ||||||
| images_path = '/images' | images_path = '/images' | ||||||
|  | supported_languages_url = 'https://www.google.com/preferences?#languages' | ||||||
| 
 | 
 | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| results_xpath = '//div[@class="g"]' | results_xpath = '//div[@class="g"]' | ||||||
| @ -167,8 +168,12 @@ def request(query, params): | |||||||
|         language = 'en' |         language = 'en' | ||||||
|         country = 'US' |         country = 'US' | ||||||
|         url_lang = '' |         url_lang = '' | ||||||
|  |     elif params['language'][:2] == 'jv': | ||||||
|  |         language = 'jw' | ||||||
|  |         country = 'ID' | ||||||
|  |         url_lang = 'lang_jw' | ||||||
|     else: |     else: | ||||||
|         language_array = params['language'].lower().split('_') |         language_array = params['language'].lower().split('-') | ||||||
|         if len(language_array) == 2: |         if len(language_array) == 2: | ||||||
|             country = language_array[1] |             country = language_array[1] | ||||||
|         else: |         else: | ||||||
| @ -355,3 +360,16 @@ def attributes_to_html(attributes): | |||||||
|         retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>' |         retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>' | ||||||
|     retval = retval + '</table>' |     retval = retval + '</table>' | ||||||
|     return retval |     return retval | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = {} | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  |     options = dom.xpath('//table//td/font/label/span') | ||||||
|  |     for option in options: | ||||||
|  |         code = option.xpath('./@id')[0][1:] | ||||||
|  |         name = option.text.title() | ||||||
|  |         supported_languages[code] = {"name": name} | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -12,6 +12,8 @@ | |||||||
| 
 | 
 | ||||||
| from lxml import html | from lxml import html | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
|  | from json import loads | ||||||
|  | from searx.engines.google import _fetch_supported_languages, supported_languages_url | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| categories = ['news'] | categories = ['news'] | ||||||
| @ -50,7 +52,7 @@ def request(query, params): | |||||||
|                                       search_options=urlencode(search_options)) |                                       search_options=urlencode(search_options)) | ||||||
| 
 | 
 | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         language_array = params['language'].lower().split('_') |         language_array = params['language'].lower().split('-') | ||||||
|         params['url'] += '&lr=lang_' + language_array[0] |         params['url'] += '&lr=lang_' + language_array[0] | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
|  | |||||||
| @ -46,7 +46,7 @@ def request(query, params): | |||||||
|     if params['language'] == 'all': |     if params['language'] == 'all': | ||||||
|         language = 'en' |         language = 'en' | ||||||
|     else: |     else: | ||||||
|         language = params['language'].split('_')[0] |         language = params['language'].split('-')[0] | ||||||
| 
 | 
 | ||||||
|     # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] |     # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] | ||||||
|     if any(x[1] == 'language' for x in format_strings): |     if any(x[1] == 'language' for x in format_strings): | ||||||
|  | |||||||
| @ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}' | |||||||
| result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' | result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' | ||||||
| 
 | 
 | ||||||
| # list of supported languages | # list of supported languages | ||||||
| allowed_languages = ['de', 'en', 'fr', 'it'] | supported_languages = ['de', 'en', 'fr', 'it'] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| @ -37,7 +37,7 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         language = params['language'].split('_')[0] |         language = params['language'].split('_')[0] | ||||||
|         if language in allowed_languages: |         if language in supported_languages: | ||||||
|             params['url'] = params['url'] + "&lang=" + language |             params['url'] = params['url'] + "&lang=" + language | ||||||
| 
 | 
 | ||||||
|     # using searx User-Agent |     # using searx User-Agent | ||||||
|  | |||||||
| @ -46,7 +46,7 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     # add language tag if specified |     # add language tag if specified | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         params['url'] += '&locale=' + params['language'].lower() |         params['url'] += '&locale=' + params['language'].replace('-', '_').lower() | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -47,7 +47,7 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     # set language if specified |     # set language if specified | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) |         params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -22,7 +22,7 @@ language = "" | |||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| url = 'http://www.subtitleseeker.com/' | url = 'http://www.subtitleseeker.com/' | ||||||
| search_url = url + 'search/TITLES/{query}&p={pageno}' | search_url = url + 'search/TITLES/{query}?p={pageno}' | ||||||
| 
 | 
 | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| results_xpath = '//div[@class="boxRows"]' | results_xpath = '//div[@class="boxRows"]' | ||||||
| @ -43,10 +43,16 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     search_lang = "" |     search_lang = "" | ||||||
| 
 | 
 | ||||||
|     if resp.search_params['language'] != 'all': |     # dirty fix for languages named differenly in their site | ||||||
|         search_lang = [lc[1] |     if resp.search_params['language'][:2] == 'fa': | ||||||
|  |         search_lang = 'Farsi' | ||||||
|  |     elif resp.search_params['language'] == 'pt-BR': | ||||||
|  |         search_lang = 'Brazilian' | ||||||
|  |     elif resp.search_params['language'] != 'all': | ||||||
|  |         search_lang = [lc[3] | ||||||
|                        for lc in language_codes |                        for lc in language_codes | ||||||
|                        if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] |                        if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]] | ||||||
|  |         search_lang = search_lang[0].split(' (')[0] | ||||||
| 
 | 
 | ||||||
|     # parse results |     # parse results | ||||||
|     for result in dom.xpath(results_xpath): |     for result in dom.xpath(results_xpath): | ||||||
|  | |||||||
| @ -13,6 +13,7 @@ | |||||||
| from json import loads | from json import loads | ||||||
| from urllib import urlencode, unquote | from urllib import urlencode, unquote | ||||||
| import re | import re | ||||||
|  | from lxml.html import fromstring | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['general', 'images'] | categories = ['general', 'images'] | ||||||
| @ -23,6 +24,8 @@ language_support = True | |||||||
| base_url = 'https://swisscows.ch/' | base_url = 'https://swisscows.ch/' | ||||||
| search_string = '?{query}&page={page}' | search_string = '?{query}&page={page}' | ||||||
| 
 | 
 | ||||||
|  | supported_languages_url = base_url | ||||||
|  | 
 | ||||||
| # regex | # regex | ||||||
| regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') | regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') | ||||||
| regex_json_remove_start = re.compile(r'^initialData:\s*') | regex_json_remove_start = re.compile(r'^initialData:\s*') | ||||||
| @ -35,9 +38,11 @@ def request(query, params): | |||||||
|     if params['language'] == 'all': |     if params['language'] == 'all': | ||||||
|         ui_language = 'browser' |         ui_language = 'browser' | ||||||
|         region = 'browser' |         region = 'browser' | ||||||
|  |     elif params['language'].split('-')[0] == 'no': | ||||||
|  |         region = 'nb-NO' | ||||||
|     else: |     else: | ||||||
|         region = params['language'].replace('_', '-') |         region = params['language'] | ||||||
|         ui_language = params['language'].split('_')[0] |         ui_language = params['language'].split('-')[0] | ||||||
| 
 | 
 | ||||||
|     search_path = search_string.format( |     search_path = search_string.format( | ||||||
|         query=urlencode({'query': query, |         query=urlencode({'query': query, | ||||||
| @ -106,3 +111,15 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = [] | ||||||
|  |     dom = fromstring(resp.text) | ||||||
|  |     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') | ||||||
|  |     for option in options: | ||||||
|  |         code = option.xpath('./@data-val')[0] | ||||||
|  |         supported_languages.append(code) | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -40,7 +40,7 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     # set language if specified |     # set language if specified | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         params['cookies']['lang'] = params['language'].split('_')[0] |         params['cookies']['lang'] = params['language'].split('-')[0] | ||||||
|     else: |     else: | ||||||
|         params['cookies']['lang'] = 'en' |         params['cookies']['lang'] = 'en' | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -14,6 +14,8 @@ | |||||||
| from searx import logger | from searx import logger | ||||||
| from searx.poolrequests import get | from searx.poolrequests import get | ||||||
| from searx.engines.xpath import extract_text | from searx.engines.xpath import extract_text | ||||||
|  | from searx.utils import format_date_by_locale | ||||||
|  | from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url | ||||||
| 
 | 
 | ||||||
| from json import loads | from json import loads | ||||||
| from lxml.html import fromstring | from lxml.html import fromstring | ||||||
| @ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     language = params['language'].split('_')[0] |     language = params['language'].split('-')[0] | ||||||
|     if language == 'all': |     if language == 'all': | ||||||
|         language = 'en' |         language = 'en' | ||||||
| 
 | 
 | ||||||
| @ -70,7 +72,7 @@ def response(resp): | |||||||
|     html = fromstring(resp.content) |     html = fromstring(resp.content) | ||||||
|     wikidata_ids = html.xpath(wikidata_ids_xpath) |     wikidata_ids = html.xpath(wikidata_ids_xpath) | ||||||
| 
 | 
 | ||||||
|     language = resp.search_params['language'].split('_')[0] |     language = resp.search_params['language'].split('-')[0] | ||||||
|     if language == 'all': |     if language == 'all': | ||||||
|         language = 'en' |         language = 'en' | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -12,6 +12,8 @@ | |||||||
| 
 | 
 | ||||||
| from json import loads | from json import loads | ||||||
| from urllib import urlencode, quote | from urllib import urlencode, quote | ||||||
|  | from lxml.html import fromstring | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| base_url = 'https://{language}.wikipedia.org/' | base_url = 'https://{language}.wikipedia.org/' | ||||||
| @ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\ | |||||||
|     '&explaintext'\ |     '&explaintext'\ | ||||||
|     '&pithumbsize=300'\ |     '&pithumbsize=300'\ | ||||||
|     '&redirects' |     '&redirects' | ||||||
|  | supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # set language in base_url | # set language in base_url | ||||||
| def url_lang(lang): | def url_lang(lang): | ||||||
|     if lang == 'all': |     lang = lang.split('-')[0] | ||||||
|  |     if lang == 'all' or lang not in supported_languages: | ||||||
|         language = 'en' |         language = 'en' | ||||||
|     else: |     else: | ||||||
|         language = lang.split('_')[0] |         language = lang | ||||||
| 
 | 
 | ||||||
|     return base_url.format(language=language) |     return base_url.format(language=language) | ||||||
| 
 | 
 | ||||||
| @ -111,3 +115,24 @@ def response(resp): | |||||||
|                     'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) |                     'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) | ||||||
| 
 | 
 | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = {} | ||||||
|  |     dom = fromstring(resp.text) | ||||||
|  |     tables = dom.xpath('//table[contains(@class,"sortable")]') | ||||||
|  |     for table in tables: | ||||||
|  |         # exclude header row | ||||||
|  |         trs = table.xpath('.//tr')[1:] | ||||||
|  |         for tr in trs: | ||||||
|  |             td = tr.xpath('./td') | ||||||
|  |             code = td[3].xpath('./a')[0].text | ||||||
|  |             name = td[2].xpath('./a')[0].text | ||||||
|  |             english_name = td[1].xpath('./a')[0].text | ||||||
|  |             articles = int(td[4].xpath('./a/b')[0].text.replace(',', '')) | ||||||
|  |             # exclude languages with too few articles | ||||||
|  |             if articles >= 100000: | ||||||
|  |                 supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles} | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -53,7 +53,7 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     # add language tag if specified |     # add language tag if specified | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         params['url'] += '&lr=lang_' + params['language'].split('_')[0] |         params['url'] += '&lr=lang_' + params['language'].split('-')[0] | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/' | |||||||
| search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' | search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' | ||||||
| search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' | search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' | ||||||
| 
 | 
 | ||||||
|  | supported_languages_url = 'https://search.yahoo.com/web/advanced' | ||||||
|  | 
 | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" | results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" | ||||||
| url_xpath = './/h3/a/@href' | url_xpath = './/h3/a/@href' | ||||||
| @ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range): | |||||||
| def _get_language(params): | def _get_language(params): | ||||||
|     if params['language'] == 'all': |     if params['language'] == 'all': | ||||||
|         return 'en' |         return 'en' | ||||||
|     return params['language'].split('_')[0] |     elif params['language'][:2] == 'zh': | ||||||
|  |         if params['language'] == 'zh' or params['language'] == 'zh-CH': | ||||||
|  |             return 'szh' | ||||||
|  |         else: | ||||||
|  |             return 'tzh' | ||||||
|  |     else: | ||||||
|  |         return params['language'].split('-')[0] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| @ -132,3 +140,15 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = [] | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  |     options = dom.xpath('//div[@id="yschlang"]/span/label/input') | ||||||
|  |     for option in options: | ||||||
|  |         code = option.xpath('./@value')[0][5:].replace('_', '-') | ||||||
|  |         supported_languages.append(code) | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -12,7 +12,7 @@ | |||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.engines.xpath import extract_text, extract_url | from searx.engines.xpath import extract_text, extract_url | ||||||
| from searx.engines.yahoo import parse_url | from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url | ||||||
| from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||||
| import re | import re | ||||||
| from dateutil import parser | from dateutil import parser | ||||||
|  | |||||||
| @ -22,7 +22,9 @@ language_support = True  # TODO | |||||||
| 
 | 
 | ||||||
| default_tld = 'com' | default_tld = 'com' | ||||||
| language_map = {'ru': 'ru', | language_map = {'ru': 'ru', | ||||||
|                 'ua': 'uk', |                 'ua': 'ua', | ||||||
|  |                 'be': 'by', | ||||||
|  |                 'kk': 'kz', | ||||||
|                 'tr': 'com.tr'} |                 'tr': 'com.tr'} | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| @ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     lang = params['language'].split('_')[0] |     lang = params['language'].split('-')[0] | ||||||
|     host = base_url.format(tld=language_map.get(lang) or default_tld) |     host = base_url.format(tld=language_map.get(lang) or default_tld) | ||||||
|     params['url'] = host + search_url.format(page=params['pageno'] - 1, |     params['url'] = host + search_url.format(page=params['pageno'] - 1, | ||||||
|                                              query=urlencode({'text': query})) |                                              query=urlencode({'text': query})) | ||||||
|  | |||||||
| @ -36,7 +36,7 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     # add language tag if specified |     # add language tag if specified | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] |         params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0] | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,78 +1,131 @@ | |||||||
| ''' | # -*- coding: utf-8 -*- | ||||||
| searx is free software: you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU Affero General Public License as published by |  | ||||||
| the Free Software Foundation, either version 3 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
| 
 |  | ||||||
| searx is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU Affero General Public License for more details. |  | ||||||
| 
 |  | ||||||
| You should have received a copy of the GNU Affero General Public License |  | ||||||
| along with searx. If not, see < http://www.gnu.org/licenses/ >. |  | ||||||
| 
 |  | ||||||
| (C) 2013- by Adam Tauber, <asciimoo@gmail.com> |  | ||||||
| ''' |  | ||||||
| 
 |  | ||||||
| # list of language codes | # list of language codes | ||||||
|  | # this file is generated automatically by utils/update_search_languages.py | ||||||
|  | 
 | ||||||
| language_codes = ( | language_codes = ( | ||||||
|     ("ar_XA", "Arabic", "Arabia"), |     (u"af", u"Afrikaans", u"", u""), | ||||||
|     ("bg_BG", "Bulgarian", "Bulgaria"), |     (u"am", u"አማርኛ", u"", u"Amharic"), | ||||||
|     ("cs_CZ", "Czech", "Czech Republic"), |     (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"), | ||||||
|     ("da_DK", "Danish", "Denmark"), |     (u"az", u"Azərbaycanca", u"", u"Azerbaijani"), | ||||||
|     ("de_AT", "German", "Austria"), |     (u"be", u"Беларуская", u"", u"Belarusian"), | ||||||
|     ("de_CH", "German", "Switzerland"), |     (u"bg-BG", u"Български", u"България", u"Bulgarian"), | ||||||
|     ("de_DE", "German", "Germany"), |     (u"bn", u"বাংলা", u"", u"Bengali"), | ||||||
|     ("el_GR", "Greek", "Greece"), |     (u"br", u"Brezhoneg", u"", u"Breton"), | ||||||
|     ("en_AU", "English", "Australia"), |     (u"bs", u"Bosnian", u"", u"Bosnian"), | ||||||
|     ("en_CA", "English", "Canada"), |     (u"ca", u"Català", u"", u"Catalan"), | ||||||
|     ("en_GB", "English", "United Kingdom"), |     (u"ca-CT", u"Català", u"", u"Catalan"), | ||||||
|     ("en_ID", "English", "Indonesia"), |     (u"ca-ES", u"Català", u"Espanya", u"Catalan"), | ||||||
|     ("en_IE", "English", "Ireland"), |     (u"ce", u"Нохчийн", u"", u"Chechen"), | ||||||
|     ("en_IN", "English", "India"), |     (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"), | ||||||
|     ("en_MY", "English", "Malaysia"), |     (u"cs-CZ", u"Čeština", u"Česko", u"Czech"), | ||||||
|     ("en_NZ", "English", "New Zealand"), |     (u"cy", u"Cymraeg", u"", u"Welsh"), | ||||||
|     ("en_PH", "English", "Philippines"), |     (u"da-DK", u"Dansk", u"Danmark", u"Danish"), | ||||||
|     ("en_SG", "English", "Singapore"), |     (u"de", u"Deutsch", u"", u"German"), | ||||||
|     ("en_US", "English", "United States"), |     (u"de-AT", u"Deutsch", u"Österreich", u"German"), | ||||||
|     ("en_XA", "English", "Arabia"), |     (u"de-CH", u"Deutsch", u"Schweiz", u"German"), | ||||||
|     ("en_ZA", "English", "South Africa"), |     (u"de-DE", u"Deutsch", u"Deutschland", u"German"), | ||||||
|     ("es_AR", "Spanish", "Argentina"), |     (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"), | ||||||
|     ("es_CL", "Spanish", "Chile"), |     (u"en", u"English", u"", u"English"), | ||||||
|     ("es_ES", "Spanish", "Spain"), |     (u"en-AU", u"English", u"Australia", u"English"), | ||||||
|     ("es_MX", "Spanish", "Mexico"), |     (u"en-CA", u"English", u"Canada", u"English"), | ||||||
|     ("es_US", "Spanish", "United States"), |     (u"en-GB", u"English", u"United Kingdom", u"English"), | ||||||
|     ("es_XL", "Spanish", "Latin America"), |     (u"en-ID", u"English", u"Indonesia", u"English"), | ||||||
|     ("et_EE", "Estonian", "Estonia"), |     (u"en-IE", u"English", u"Ireland", u"English"), | ||||||
|     ("fi_FI", "Finnish", "Finland"), |     (u"en-IN", u"English", u"India", u"English"), | ||||||
|     ("fr_BE", "French", "Belgium"), |     (u"en-MY", u"English", u"Malaysia", u"English"), | ||||||
|     ("fr_CA", "French", "Canada"), |     (u"en-NZ", u"English", u"New Zealand", u"English"), | ||||||
|     ("fr_CH", "French", "Switzerland"), |     (u"en-PH", u"English", u"Philippines", u"English"), | ||||||
|     ("fr_FR", "French", "France"), |     (u"en-SG", u"English", u"Singapore", u"English"), | ||||||
|     ("he_IL", "Hebrew", "Israel"), |     (u"en-US", u"English", u"United States", u"English"), | ||||||
|     ("hr_HR", "Croatian", "Croatia"), |     (u"en-ZA", u"English", u"South Africa", u"English"), | ||||||
|     ("hu_HU", "Hungarian", "Hungary"), |     (u"eo", u"Esperanto", u"", u"Esperanto"), | ||||||
|     ("it_IT", "Italian", "Italy"), |     (u"es", u"Español", u"", u"Spanish"), | ||||||
|     ("ja_JP", "Japanese", "Japan"), |     (u"es-AR", u"Español", u"Argentina", u"Spanish"), | ||||||
|     ("ko_KR", "Korean", "Korea"), |     (u"es-CL", u"Español", u"Chile", u"Spanish"), | ||||||
|     ("lt_LT", "Lithuanian", "Lithuania"), |     (u"es-CO", u"Español", u"Colombia", u"Spanish"), | ||||||
|     ("lv_LV", "Latvian", "Latvia"), |     (u"es-ES", u"Español", u"España", u"Spanish"), | ||||||
|     ("nb_NO", "Norwegian", "Norway"), |     (u"es-MX", u"Español", u"México", u"Spanish"), | ||||||
|     ("nl_BE", "Dutch", "Belgium"), |     (u"es-PE", u"Español", u"Perú", u"Spanish"), | ||||||
|     ("nl_NL", "Dutch", "Netherlands"), |     (u"es-US", u"Español", u"Estados Unidos", u"Spanish"), | ||||||
|     ("oc_OC", "Occitan", "Occitan"), |     (u"et-EE", u"Eesti", u"Eesti", u"Estonian"), | ||||||
|     ("pl_PL", "Polish", "Poland"), |     (u"eu", u"Euskara", u"", u"Basque"), | ||||||
|     ("pt_BR", "Portuguese", "Brazil"), |     (u"fa", u"فارسی", u"", u"Persian"), | ||||||
|     ("pt_PT", "Portuguese", "Portugal"), |     (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"), | ||||||
|     ("ro_RO", "Romanian", "Romania"), |     (u"fr", u"Français", u"", u"French"), | ||||||
|     ("ru_RU", "Russian", "Russia"), |     (u"fr-BE", u"Français", u"Belgique", u"French"), | ||||||
|     ("sk_SK", "Slovak", "Slovak Republic"), |     (u"fr-CA", u"Français", u"Canada", u"French"), | ||||||
|     ("sl_SL", "Slovenian", "Slovenia"), |     (u"fr-CH", u"Français", u"Suisse", u"French"), | ||||||
|     ("sv_SE", "Swedish", "Sweden"), |     (u"fr-FR", u"Français", u"France", u"French"), | ||||||
|     ("th_TH", "Thai", "Thailand"), |     (u"ga", u"Gaeilge", u"", u"Irish"), | ||||||
|     ("tr_TR", "Turkish", "Turkey"), |     (u"gl", u"Galego", u"", u"Galician"), | ||||||
|     ("uk_UA", "Ukrainian", "Ukraine"), |     (u"gu", u"ગુજરાતી", u"", u"Gujarati"), | ||||||
|     ("zh_CN", "Chinese", "China"), |     (u"he-IL", u"עברית", u"ישראל", u"Hebrew"), | ||||||
|     ("zh_HK", "Chinese", "Hong Kong SAR"), |     (u"hi", u"हिन्दी", u"", u"Hindi"), | ||||||
|     ("zh_TW", "Chinese", "Taiwan")) |     (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"), | ||||||
|  |     (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"), | ||||||
|  |     (u"hy", u"Հայերեն", u"", u"Armenian"), | ||||||
|  |     (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"), | ||||||
|  |     (u"is", u"Íslenska", u"", u""), | ||||||
|  |     (u"it", u"Italiano", u"", u"Italian"), | ||||||
|  |     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"), | ||||||
|  |     (u"it-IT", u"Italiano", u"Italia", u"Italian"), | ||||||
|  |     (u"iw", u"עברית", u"", u""), | ||||||
|  |     (u"ja-JP", u"日本語", u"日本", u"Japanese"), | ||||||
|  |     (u"ka", u"ქართული", u"", u"Georgian"), | ||||||
|  |     (u"kk", u"Қазақша", u"", u"Kazakh"), | ||||||
|  |     (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"), | ||||||
|  |     (u"ko-KR", u"한국어", u"대한민국", u"Korean"), | ||||||
|  |     (u"la", u"Latina", u"", u"Latin"), | ||||||
|  |     (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"), | ||||||
|  |     (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""), | ||||||
|  |     (u"mi", u"Reo Māori", u"", u"Maori"), | ||||||
|  |     (u"min", u"Minangkabau", u"", u"Minangkabau"), | ||||||
|  |     (u"mk", u"Македонски", u"", u"Macedonian"), | ||||||
|  |     (u"mn", u"Монгол", u"", u"Mongolian"), | ||||||
|  |     (u"mr", u"मराठी", u"", u"Marathi"), | ||||||
|  |     (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"), | ||||||
|  |     (u"mt", u"Malti", u"", u"Maltese"), | ||||||
|  |     (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"), | ||||||
|  |     (u"nl", u"Nederlands", u"", u"Dutch"), | ||||||
|  |     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), | ||||||
|  |     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), | ||||||
|  |     (u"nn", u"Nynorsk", u"", u"Norwegian"), | ||||||
|  |     (u"no-NO", u"Norsk", u"Norge", u"Norwegian"), | ||||||
|  |     (u"oc", u"Occitan", u"", u"Occitan"), | ||||||
|  |     (u"or", u"Oriya", u"", u"Oriya"), | ||||||
|  |     (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"), | ||||||
|  |     (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"), | ||||||
|  |     (u"ps", u"Pushto", u"", u"Pushto"), | ||||||
|  |     (u"pt", u"Português", u"", u"Portuguese"), | ||||||
|  |     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), | ||||||
|  |     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), | ||||||
|  |     (u"ro-RO", u"Română", u"România", u"Romanian"), | ||||||
|  |     (u"ru-RU", u"Русский", u"Россия", u"Russian"), | ||||||
|  |     (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"), | ||||||
|  |     (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"), | ||||||
|  |     (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"), | ||||||
|  |     (u"sl", u"Slovenščina", u"", u"Slovenian"), | ||||||
|  |     (u"sr", u"Српски / Srpski", u"", u"Serbian"), | ||||||
|  |     (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"), | ||||||
|  |     (u"sw", u"Kiswahili", u"", u""), | ||||||
|  |     (u"ta", u"தமிழ்", u"", u"Tamil"), | ||||||
|  |     (u"th-TH", u"ไทย", u"ไทย", u"Thai"), | ||||||
|  |     (u"ti", u"ትግርኛ", u"", u"Tigrinya"), | ||||||
|  |     (u"tl-PH", u"Filipino", u"Pilipinas", u""), | ||||||
|  |     (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"), | ||||||
|  |     (u"tt", u"Татарча", u"", u"Tatar"), | ||||||
|  |     (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"), | ||||||
|  |     (u"ur", u"اردو", u"", u"Urdu"), | ||||||
|  |     (u"uz", u"O‘zbek", u"", u"Uzbek"), | ||||||
|  |     (u"ve", u"Venda", u"", u"Venda"), | ||||||
|  |     (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"), | ||||||
|  |     (u"vo", u"Volapük", u"", u"Volapük"), | ||||||
|  |     (u"wa", u"Walon", u"", u"Walloon"), | ||||||
|  |     (u"war", u"Winaray", u"", u"Waray-Waray"), | ||||||
|  |     (u"xh", u"Xhosa", u"", u"Xhosa"), | ||||||
|  |     (u"zh", u"中文", u"", u"Chinese"), | ||||||
|  |     (u"zh-CN", u"中文", u"中国", u"Chinese"), | ||||||
|  |     (u"zh-HK", u"中文", u"香港", u"Chinese"), | ||||||
|  |     (u"zh-TW", u"中文", u"台湾", u"Chinese"), | ||||||
|  |     (u"zu", u"Isi-Zulu", u"", u"Zulu") | ||||||
|  | ) | ||||||
|  | |||||||
| @ -95,6 +95,25 @@ class MultipleChoiceSetting(EnumStringSetting): | |||||||
|         resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) |         resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class SearchLanguageSetting(EnumStringSetting): | ||||||
|  |     """Available choices may change, so user's value may not be in choices anymore""" | ||||||
|  | 
 | ||||||
|  |     def parse(self, data): | ||||||
|  |         if data not in self.choices and data != self.value: | ||||||
|  |             # hack to give some backwards compatibility with old language cookies | ||||||
|  |             data = str(data).replace('_', '-') | ||||||
|  |             lang = data.split('-')[0] | ||||||
|  |             if data in self.choices: | ||||||
|  |                 pass | ||||||
|  |             elif lang in self.choices: | ||||||
|  |                 data = lang | ||||||
|  |             elif data == 'ar-XA': | ||||||
|  |                 data = 'ar-SA' | ||||||
|  |             else: | ||||||
|  |                 data = self.value | ||||||
|  |         self.value = data | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class MapSetting(Setting): | class MapSetting(Setting): | ||||||
|     """Setting of a value that has to be translated in order to be storable""" |     """Setting of a value that has to be translated in order to be storable""" | ||||||
| 
 | 
 | ||||||
| @ -216,7 +235,7 @@ class Preferences(object): | |||||||
|         super(Preferences, self).__init__() |         super(Preferences, self).__init__() | ||||||
| 
 | 
 | ||||||
|         self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories), |         self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories), | ||||||
|                                    'language': EnumStringSetting(settings['search']['language'], |                                    'language': SearchLanguageSetting(settings['search']['language'], | ||||||
|                                                                      choices=LANGUAGE_CODES), |                                                                      choices=LANGUAGE_CODES), | ||||||
|                                    'locale': EnumStringSetting(settings['ui']['default_locale'], |                                    'locale': EnumStringSetting(settings['ui']['default_locale'], | ||||||
|                                                                choices=settings['locales'].keys() + ['']), |                                                                choices=settings['locales'].keys() + ['']), | ||||||
|  | |||||||
| @ -71,21 +71,24 @@ class RawTextQuery(object): | |||||||
|                 # check if any language-code is equal with |                 # check if any language-code is equal with | ||||||
|                 # declared language-codes |                 # declared language-codes | ||||||
|                 for lc in language_codes: |                 for lc in language_codes: | ||||||
|                     lang_id, lang_name, country = map(str.lower, lc) |                     lang_id, lang_name, country, english_name = map(unicode.lower, lc) | ||||||
| 
 | 
 | ||||||
|                     # if correct language-code is found |                     # if correct language-code is found | ||||||
|                     # set it as new search-language |                     # set it as new search-language | ||||||
|                     if lang == lang_id\ |                     if lang == lang_id\ | ||||||
|                        or lang_id.startswith(lang)\ |                        or lang_id.startswith(lang)\ | ||||||
|                        or lang == lang_name\ |                        or lang == lang_name\ | ||||||
|  |                        or lang == english_name\ | ||||||
|                        or lang.replace('_', ' ') == country: |                        or lang.replace('_', ' ') == country: | ||||||
|                         parse_next = True |                         parse_next = True | ||||||
|                         self.languages.append(lang) |                         self.languages.append(lang_id) | ||||||
|  |                         # to ensure best match (first match is not necessarily the best one) | ||||||
|  |                         if lang == lang_id: | ||||||
|                             break |                             break | ||||||
| 
 | 
 | ||||||
|             # this force a engine or category |             # this force a engine or category | ||||||
|             if query_part[0] == '!' or query_part[0] == '?': |             if query_part[0] == '!' or query_part[0] == '?': | ||||||
|                 prefix = query_part[1:].replace('_', ' ') |                 prefix = query_part[1:].replace('-', ' ') | ||||||
| 
 | 
 | ||||||
|                 # check if prefix is equal with engine shortcut |                 # check if prefix is equal with engine shortcut | ||||||
|                 if prefix in engine_shortcuts: |                 if prefix in engine_shortcuts: | ||||||
|  | |||||||
| @ -211,10 +211,14 @@ def get_search_query_from_webapp(preferences, form): | |||||||
|     # set query |     # set query | ||||||
|     query = raw_text_query.getSearchQuery() |     query = raw_text_query.getSearchQuery() | ||||||
| 
 | 
 | ||||||
|     # get last selected language in query, if possible |     # set specific language if set on request, query or preferences | ||||||
|     # TODO support search with multible languages |     # TODO support search with multible languages | ||||||
|     if len(raw_text_query.languages): |     if len(raw_text_query.languages): | ||||||
|         query_lang = raw_text_query.languages[-1] |         query_lang = raw_text_query.languages[-1] | ||||||
|  |     elif 'language' in form: | ||||||
|  |         query_lang = form.get('language') | ||||||
|  |     else: | ||||||
|  |         query_lang = preferences.get_value('language') | ||||||
| 
 | 
 | ||||||
|     query_time_range = form.get('time_range') |     query_time_range = form.get('time_range') | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -15,5 +15,10 @@ $(document).ready(function() { | |||||||
|                 $('#search_form').submit(); |                 $('#search_form').submit(); | ||||||
|             } |             } | ||||||
|         }); |         }); | ||||||
|  |         $('#language').change(function(e) { | ||||||
|  |             if($('#q').val()) { | ||||||
|  |                 $('#search_form').submit(); | ||||||
|  |             } | ||||||
|  |         }); | ||||||
|     } |     } | ||||||
| }); | }); | ||||||
|  | |||||||
| @ -13,9 +13,9 @@ | |||||||
|         <legend>{{ _('Search language') }}</legend> |         <legend>{{ _('Search language') }}</legend> | ||||||
|         <p> |         <p> | ||||||
|             <select name='language'> |             <select name='language'> | ||||||
|                 <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> |                 <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> | ||||||
|                 {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} |                 {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} | ||||||
|                 <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> |                 <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> | ||||||
|                 {% endfor %} |                 {% endfor %} | ||||||
|             </select> |             </select> | ||||||
|         </p> |         </p> | ||||||
|  | |||||||
| @ -14,9 +14,9 @@ | |||||||
|         <legend>{{ _('Search language') }}</legend> |         <legend>{{ _('Search language') }}</legend> | ||||||
|         <p> |         <p> | ||||||
|         <select name='language'> |         <select name='language'> | ||||||
|             <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> |             <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> | ||||||
|             {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} |             {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} | ||||||
|             <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> |             <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> | ||||||
|             {% endfor %} |             {% endfor %} | ||||||
|         </select> |         </select> | ||||||
|         </p> |         </p> | ||||||
|  | |||||||
| @ -6,4 +6,5 @@ | |||||||
| <div id="advanced-search-container"> | <div id="advanced-search-container"> | ||||||
|     {% include 'oscar/categories.html' %} |     {% include 'oscar/categories.html' %} | ||||||
|     {% include 'oscar/time-range.html' %} |     {% include 'oscar/time-range.html' %} | ||||||
|  |     {% include 'oscar/languages.html' %} | ||||||
| </div> | </div> | ||||||
|  | |||||||
							
								
								
									
										12
									
								
								searx/templates/oscar/languages.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								searx/templates/oscar/languages.html
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | |||||||
|  | {% if preferences %} | ||||||
|  | <select class="form-control" name='language'> | ||||||
|  | {% else %} | ||||||
|  | <select class="time_range" id='language' name='language'> | ||||||
|  | {% endif %} | ||||||
|  | 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> | ||||||
|  | 		{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} | ||||||
|  | 		<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> | ||||||
|  | 			{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }} | ||||||
|  | 		</option> | ||||||
|  | 		{% endfor %} | ||||||
|  | </select> | ||||||
| @ -40,12 +40,7 @@ | |||||||
|                     {% set language_label = _('Search language') %} |                     {% set language_label = _('Search language') %} | ||||||
|                     {% set language_info = _('What language do you prefer for search?') %} |                     {% set language_info = _('What language do you prefer for search?') %} | ||||||
|                     {{ preferences_item_header(language_info, language_label, rtl) }} |                     {{ preferences_item_header(language_info, language_label, rtl) }} | ||||||
|                         <select class="form-control" name='language'> | 						{% include 'oscar/languages.html' %} | ||||||
|                             <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> |  | ||||||
|                             {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} |  | ||||||
|                             <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> |  | ||||||
|                             {% endfor %} |  | ||||||
|                         </select> |  | ||||||
|                     {{ preferences_item_footer(language_info, language_label, rtl) }} |                     {{ preferences_item_footer(language_info, language_label, rtl) }} | ||||||
| 
 | 
 | ||||||
|                     {% set locale_label = _('Interface language') %} |                     {% set locale_label = _('Interface language') %} | ||||||
| @ -153,6 +148,7 @@ | |||||||
| 				    <th>{{ _("Allow") }}</th> | 				    <th>{{ _("Allow") }}</th> | ||||||
| 				    <th>{{ _("Engine name") }}</th> | 				    <th>{{ _("Engine name") }}</th> | ||||||
| 				    <th>{{ _("Shortcut") }}</th> | 				    <th>{{ _("Shortcut") }}</th> | ||||||
|  | 				    <th>{{ _("Language support") }}</th> | ||||||
| 				    <th>{{ _("SafeSearch") }}</th> | 				    <th>{{ _("SafeSearch") }}</th> | ||||||
| 				    <th>{{ _("Time range") }}</th> | 				    <th>{{ _("Time range") }}</th> | ||||||
| 				    <th>{{ _("Avg. time") }}</th> | 				    <th>{{ _("Avg. time") }}</th> | ||||||
| @ -161,6 +157,7 @@ | |||||||
| 				    <th>{{ _("Max time") }}</th> | 				    <th>{{ _("Max time") }}</th> | ||||||
| 				    <th>{{ _("Avg. time") }}</th> | 				    <th>{{ _("Avg. time") }}</th> | ||||||
| 				    <th>{{ _("SafeSearch") }}</th> | 				    <th>{{ _("SafeSearch") }}</th> | ||||||
|  | 				    <th>{{ _("Language support") }}</th> | ||||||
| 				    <th>{{ _("Shortcut") }}</th> | 				    <th>{{ _("Shortcut") }}</th> | ||||||
| 				    <th>{{ _("Engine name") }}</th> | 				    <th>{{ _("Engine name") }}</th> | ||||||
| 				    <th>{{ _("Allow") }}</th> | 				    <th>{{ _("Allow") }}</th> | ||||||
| @ -175,6 +172,7 @@ | |||||||
|                                     </td> |                                     </td> | ||||||
|                                     <th>{{ search_engine.name }}</th> |                                     <th>{{ search_engine.name }}</th> | ||||||
| 				    <td>{{ shortcuts[search_engine.name] }}</td> | 				    <td>{{ shortcuts[search_engine.name] }}</td> | ||||||
|  | 				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
| 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> | 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
| 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td> | 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
| 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> | 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> | ||||||
| @ -183,6 +181,7 @@ | |||||||
| 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td> | 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td> | ||||||
| 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> | 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> | ||||||
| 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> | 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
|  | 				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td> | ||||||
| 				    <td>{{ shortcuts[search_engine.name] }}</td> | 				    <td>{{ shortcuts[search_engine.name] }}</td> | ||||||
|                                     <th>{{ search_engine.name }}</th> |                                     <th>{{ search_engine.name }}</th> | ||||||
|                                     <td class="onoff-checkbox"> |                                     <td class="onoff-checkbox"> | ||||||
|  | |||||||
| @ -9,9 +9,9 @@ | |||||||
|         <legend>{{ _('Search language') }}</legend> |         <legend>{{ _('Search language') }}</legend> | ||||||
|         <p> |         <p> | ||||||
|         <select name='language'> |         <select name='language'> | ||||||
|             <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> |             <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> | ||||||
|             {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} |             {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} | ||||||
|             <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> |             <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> | ||||||
|             {% endfor %} |             {% endfor %} | ||||||
|         </select> |         </select> | ||||||
|         </p> |         </p> | ||||||
|  | |||||||
| @ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs): | |||||||
| 
 | 
 | ||||||
|     kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) |     kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) | ||||||
| 
 | 
 | ||||||
|  |     kwargs['language_codes'] = language_codes | ||||||
|  |     if 'current_language' not in kwargs: | ||||||
|  |         kwargs['current_language'] = request.preferences.get_value('language') | ||||||
|  | 
 | ||||||
|     # override url_for function in templates |     # override url_for function in templates | ||||||
|     kwargs['url_for'] = url_for_theme |     kwargs['url_for'] = url_for_theme | ||||||
| 
 | 
 | ||||||
| @ -510,6 +514,7 @@ def index(): | |||||||
|         answers=result_container.answers, |         answers=result_container.answers, | ||||||
|         infoboxes=result_container.infoboxes, |         infoboxes=result_container.infoboxes, | ||||||
|         paging=result_container.paging, |         paging=result_container.paging, | ||||||
|  |         current_language=search_query.lang, | ||||||
|         base_url=get_base_url(), |         base_url=get_base_url(), | ||||||
|         theme=get_current_theme_name(), |         theme=get_current_theme_name(), | ||||||
|         favicons=global_favicons[themes.index(get_current_theme_name())] |         favicons=global_favicons[themes.index(get_current_theme_name())] | ||||||
| @ -552,7 +557,7 @@ def autocompleter(): | |||||||
|         if not language or language == 'all': |         if not language or language == 'all': | ||||||
|             language = 'en' |             language = 'en' | ||||||
|         else: |         else: | ||||||
|             language = language.split('_')[0] |             language = language.split('-')[0] | ||||||
|         # run autocompletion |         # run autocompletion | ||||||
|         raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) |         raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) | ||||||
| 
 | 
 | ||||||
| @ -615,9 +620,7 @@ def preferences(): | |||||||
|     return render('preferences.html', |     return render('preferences.html', | ||||||
|                   locales=settings['locales'], |                   locales=settings['locales'], | ||||||
|                   current_locale=get_locale(), |                   current_locale=get_locale(), | ||||||
|                   current_language=lang, |  | ||||||
|                   image_proxy=image_proxy, |                   image_proxy=image_proxy, | ||||||
|                   language_codes=language_codes, |  | ||||||
|                   engines_by_category=categories, |                   engines_by_category=categories, | ||||||
|                   stats=stats, |                   stats=stats, | ||||||
|                   answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], |                   answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], | ||||||
| @ -627,7 +630,8 @@ def preferences(): | |||||||
|                   themes=themes, |                   themes=themes, | ||||||
|                   plugins=plugins, |                   plugins=plugins, | ||||||
|                   allowed_plugins=allowed_plugins, |                   allowed_plugins=allowed_plugins, | ||||||
|                   theme=get_current_theme_name()) |                   theme=get_current_theme_name(), | ||||||
|  |                   preferences=True) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @app.route('/image_proxy', methods=['GET']) | @app.route('/image_proxy', methods=['GET']) | ||||||
|  | |||||||
| @ -101,11 +101,11 @@ Change search language | |||||||
|     Page Should Contain  about |     Page Should Contain  about | ||||||
|     Page Should Contain  preferences |     Page Should Contain  preferences | ||||||
|     Go To  http://localhost:11111/preferences |     Go To  http://localhost:11111/preferences | ||||||
|     List Selection Should Be  language  Automatic |     List Selection Should Be  language  Default language | ||||||
|     Select From List  language  Turkish (Turkey) - tr_TR |     Select From List  language  Türkçe (Türkiye) - tr-TR | ||||||
|     Submit Preferences |     Submit Preferences | ||||||
|     Go To  http://localhost:11111/preferences |     Go To  http://localhost:11111/preferences | ||||||
|     List Selection Should Be  language  Turkish (Turkey) - tr_TR |     List Selection Should Be  language  Türkçe (Türkiye) - tr-TR | ||||||
| 
 | 
 | ||||||
| Change autocomplete | Change autocomplete | ||||||
|     Page Should Contain  about |     Page Should Contain  about | ||||||
|  | |||||||
| @ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[0]['title'], 'This should be the title') |         self.assertEqual(results[0]['title'], 'This should be the title') | ||||||
|         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') |         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') | ||||||
|         self.assertEqual(results[0]['content'], 'This should be the content.') |         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         html = """<html></html>""" | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         results = bing._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <html> | ||||||
|  |             <body> | ||||||
|  |                 <form> | ||||||
|  |                     <div id="limit-languages"> | ||||||
|  |                         <div> | ||||||
|  |                             <div><input id="es" value="es"></input></div> | ||||||
|  |                         </div> | ||||||
|  |                         <div> | ||||||
|  |                             <div><input id="pt_BR" value="pt_BR"></input></div> | ||||||
|  |                             <div><input id="pt_PT" value="pt_PT"></input></div> | ||||||
|  |                         </div> | ||||||
|  |                     </div> | ||||||
|  |                 </form> | ||||||
|  |             </body> | ||||||
|  |         </html> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = bing._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), list) | ||||||
|  |         self.assertEqual(len(languages), 3) | ||||||
|  |         self.assertIn('es', languages) | ||||||
|  |         self.assertIn('pt-BR', languages) | ||||||
|  |         self.assertIn('pt-PT', languages) | ||||||
|  | |||||||
| @ -1,3 +1,4 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
| import mock | import mock | ||||||
| from searx.engines import dailymotion | from searx.engines import dailymotion | ||||||
| @ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase): | |||||||
|         results = dailymotion.response(response) |         results = dailymotion.response(response) | ||||||
|         self.assertEqual(type(results), list) |         self.assertEqual(type(results), list) | ||||||
|         self.assertEqual(len(results), 0) |         self.assertEqual(len(results), 0) | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         json = r""" | ||||||
|  |         {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans", | ||||||
|  |                   "localized_name":"Afrikaans","display_name":"Afrikaans"}, | ||||||
|  |                  {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629", | ||||||
|  |                   "localized_name":"Arabic","display_name":"Arabic"}, | ||||||
|  |                  {"code":"la","name":"Latin","native_name":null, | ||||||
|  |                   "localized_name":"Latin","display_name":"Latin"} | ||||||
|  |         ]} | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=json) | ||||||
|  |         languages = dailymotion._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), dict) | ||||||
|  |         self.assertEqual(len(languages), 3) | ||||||
|  |         self.assertIn('af', languages) | ||||||
|  |         self.assertIn('ar', languages) | ||||||
|  |         self.assertIn('la', languages) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(type(languages['af']), dict) | ||||||
|  |         self.assertEqual(type(languages['ar']), dict) | ||||||
|  |         self.assertEqual(type(languages['la']), dict) | ||||||
|  | 
 | ||||||
|  |         self.assertIn('name', languages['af']) | ||||||
|  |         self.assertIn('name', languages['ar']) | ||||||
|  |         self.assertNotIn('name', languages['la']) | ||||||
|  | 
 | ||||||
|  |         self.assertIn('english_name', languages['af']) | ||||||
|  |         self.assertIn('english_name', languages['ar']) | ||||||
|  |         self.assertIn('english_name', languages['la']) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(languages['af']['name'], 'Afrikaans') | ||||||
|  |         self.assertEqual(languages['af']['english_name'], 'Afrikaans') | ||||||
|  |         self.assertEqual(languages['ar']['name'], u'العربية') | ||||||
|  |         self.assertEqual(languages['ar']['english_name'], 'Arabic') | ||||||
|  |         self.assertEqual(languages['la']['english_name'], 'Latin') | ||||||
|  | |||||||
| @ -11,7 +11,7 @@ class TestDuckduckgoEngine(SearxTestCase): | |||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|         dicto['language'] = 'de_CH' |         dicto['language'] = 'de-CH' | ||||||
|         dicto['time_range'] = '' |         dicto['time_range'] = '' | ||||||
|         params = duckduckgo.request(query, dicto) |         params = duckduckgo.request(query, dicto) | ||||||
|         self.assertIn('url', params) |         self.assertIn('url', params) | ||||||
| @ -19,6 +19,17 @@ class TestDuckduckgoEngine(SearxTestCase): | |||||||
|         self.assertIn('duckduckgo.com', params['url']) |         self.assertIn('duckduckgo.com', params['url']) | ||||||
|         self.assertIn('ch-de', params['url']) |         self.assertIn('ch-de', params['url']) | ||||||
| 
 | 
 | ||||||
|  |         # when ddg uses non standard code | ||||||
|  |         dicto['language'] = 'en-GB' | ||||||
|  |         params = duckduckgo.request(query, dicto) | ||||||
|  |         self.assertIn('uk-en', params['url']) | ||||||
|  | 
 | ||||||
|  |         # no country given | ||||||
|  |         duckduckgo.supported_languages = ['de-CH', 'en-US'] | ||||||
|  |         dicto['language'] = 'de' | ||||||
|  |         params = duckduckgo.request(query, dicto) | ||||||
|  |         self.assertIn('ch-de', params['url']) | ||||||
|  | 
 | ||||||
|     def test_no_url_in_request_year_time_range(self): |     def test_no_url_in_request_year_time_range(self): | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
| @ -73,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[0]['title'], 'This is the title') |         self.assertEqual(results[0]['title'], 'This is the title') | ||||||
|         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') |         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') | ||||||
|         self.assertEqual(results[0]['content'], 'This should be the content.') |         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         js = """some code...regions:{ | ||||||
|  |         "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)" | ||||||
|  |         }some more code...""" | ||||||
|  |         response = mock.Mock(text=js) | ||||||
|  |         languages = duckduckgo._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), list) | ||||||
|  |         self.assertEqual(len(languages), 5) | ||||||
|  |         self.assertIn('wt-WT', languages) | ||||||
|  |         self.assertIn('es-AR', languages) | ||||||
|  |         self.assertIn('en-AU', languages) | ||||||
|  |         self.assertIn('de-AT', languages) | ||||||
|  |         self.assertIn('fr-BE', languages) | ||||||
|  | |||||||
| @ -21,10 +21,14 @@ class TestDDGDefinitionsEngine(SearxTestCase): | |||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|  |         dicto['language'] = 'es' | ||||||
|         params = duckduckgo_definitions.request(query, dicto) |         params = duckduckgo_definitions.request(query, dicto) | ||||||
|         self.assertIn('url', params) |         self.assertIn('url', params) | ||||||
|         self.assertIn(query, params['url']) |         self.assertIn(query, params['url']) | ||||||
|         self.assertIn('duckduckgo.com', params['url']) |         self.assertIn('duckduckgo.com', params['url']) | ||||||
|  |         self.assertIn('headers', params) | ||||||
|  |         self.assertIn('Accept-Language', params['headers']) | ||||||
|  |         self.assertIn('es', params['headers']['Accept-Language']) | ||||||
| 
 | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         self.assertRaises(AttributeError, duckduckgo_definitions.response, None) |         self.assertRaises(AttributeError, duckduckgo_definitions.response, None) | ||||||
|  | |||||||
| @ -15,6 +15,12 @@ class TestGigablastEngine(SearxTestCase): | |||||||
|         self.assertTrue('url' in params) |         self.assertTrue('url' in params) | ||||||
|         self.assertTrue(query in params['url']) |         self.assertTrue(query in params['url']) | ||||||
|         self.assertTrue('gigablast.com' in params['url']) |         self.assertTrue('gigablast.com' in params['url']) | ||||||
|  |         self.assertTrue('xx' in params['url']) | ||||||
|  | 
 | ||||||
|  |         dicto['language'] = 'en-US' | ||||||
|  |         params = gigablast.request(query, dicto) | ||||||
|  |         self.assertTrue('en' in params['url']) | ||||||
|  |         self.assertFalse('en-US' in params['url']) | ||||||
| 
 | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         self.assertRaises(AttributeError, gigablast.response, None) |         self.assertRaises(AttributeError, gigablast.response, None) | ||||||
| @ -83,3 +89,28 @@ class TestGigablastEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[0]['title'], 'South by Southwest 2016') |         self.assertEqual(results[0]['title'], 'South by Southwest 2016') | ||||||
|         self.assertEqual(results[0]['url'], 'www.sxsw.com') |         self.assertEqual(results[0]['url'], 'www.sxsw.com') | ||||||
|         self.assertEqual(results[0]['content'], 'This should be the content.') |         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         html = """<html></html>""" | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         results = gigablast._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <html> | ||||||
|  |             <body> | ||||||
|  |                 <span id="menu2"> | ||||||
|  |                     <a href="/search?&rxikd=1&qlang=xx"></a> | ||||||
|  |                     <a href="/search?&rxikd=1&qlang=en"></a> | ||||||
|  |                     <a href="/search?&rxikd=1&qlang=fr"></a> | ||||||
|  |                 </span> | ||||||
|  |             </body> | ||||||
|  |         </html> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = gigablast._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), list) | ||||||
|  |         self.assertEqual(len(languages), 2) | ||||||
|  |         self.assertIn('en', languages) | ||||||
|  |         self.assertIn('fr', languages) | ||||||
|  | |||||||
| @ -18,7 +18,7 @@ class TestGoogleEngine(SearxTestCase): | |||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|         dicto['language'] = 'fr_FR' |         dicto['language'] = 'fr-FR' | ||||||
|         dicto['time_range'] = '' |         dicto['time_range'] = '' | ||||||
|         params = google.request(query, dicto) |         params = google.request(query, dicto) | ||||||
|         self.assertIn('url', params) |         self.assertIn('url', params) | ||||||
| @ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[0]['title'], '') |         self.assertEqual(results[0]['title'], '') | ||||||
|         self.assertEqual(results[0]['content'], '') |         self.assertEqual(results[0]['content'], '') | ||||||
|         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') |         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         html = """<html></html>""" | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = google._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), dict) | ||||||
|  |         self.assertEqual(len(languages), 0) | ||||||
|  | 
 | ||||||
|  |         html = u""" | ||||||
|  |         <html> | ||||||
|  |             <body> | ||||||
|  |                 <table> | ||||||
|  |                     <tbody> | ||||||
|  |                         <tr> | ||||||
|  |                             <td> | ||||||
|  |                                 <font> | ||||||
|  |                                     <label> | ||||||
|  |                                         <span id="ten">English</span> | ||||||
|  |                                     </label> | ||||||
|  |                                 </font> | ||||||
|  |                             </td> | ||||||
|  |                             <td> | ||||||
|  |                                 <font> | ||||||
|  |                                     <label> | ||||||
|  |                                         <span id="tzh-CN">中文 (简体)</span> | ||||||
|  |                                     </label> | ||||||
|  |                                     <label> | ||||||
|  |                                         <span id="tzh-TW">中文 (繁體)</span> | ||||||
|  |                                     </label> | ||||||
|  |                                 </font> | ||||||
|  |                             </td> | ||||||
|  |                         </tr> | ||||||
|  |                     </tbody> | ||||||
|  |                 </table> | ||||||
|  |             </body> | ||||||
|  |         </html> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = google._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), dict) | ||||||
|  |         self.assertEqual(len(languages), 3) | ||||||
|  | 
 | ||||||
|  |         self.assertIn('en', languages) | ||||||
|  |         self.assertIn('zh-CN', languages) | ||||||
|  |         self.assertIn('zh-TW', languages) | ||||||
|  | 
 | ||||||
|  |         self.assertEquals(type(languages['en']), dict) | ||||||
|  |         self.assertEquals(type(languages['zh-CN']), dict) | ||||||
|  |         self.assertEquals(type(languages['zh-TW']), dict) | ||||||
|  | 
 | ||||||
|  |         self.assertIn('name', languages['en']) | ||||||
|  |         self.assertIn('name', languages['zh-CN']) | ||||||
|  |         self.assertIn('name', languages['zh-TW']) | ||||||
|  | 
 | ||||||
|  |         self.assertEquals(languages['en']['name'], 'English') | ||||||
|  |         self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)') | ||||||
|  |         self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)') | ||||||
|  | |||||||
| @ -10,7 +10,7 @@ class TestQwantEngine(SearxTestCase): | |||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 0 |         dicto['pageno'] = 0 | ||||||
|         dicto['language'] = 'fr_FR' |         dicto['language'] = 'fr-FR' | ||||||
|         qwant.categories = [''] |         qwant.categories = [''] | ||||||
|         params = qwant.request(query, dicto) |         params = qwant.request(query, dicto) | ||||||
|         self.assertIn('url', params) |         self.assertIn('url', params) | ||||||
|  | |||||||
| @ -10,6 +10,7 @@ class TestSubtitleseekerEngine(SearxTestCase): | |||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|  |         dicto['language'] = 'fr-FR' | ||||||
|         params = subtitleseeker.request(query, dicto) |         params = subtitleseeker.request(query, dicto) | ||||||
|         self.assertTrue('url' in params) |         self.assertTrue('url' in params) | ||||||
|         self.assertTrue(query in params['url']) |         self.assertTrue(query in params['url']) | ||||||
| @ -17,7 +18,7 @@ class TestSubtitleseekerEngine(SearxTestCase): | |||||||
| 
 | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['language'] = 'fr_FR' |         dicto['language'] = 'fr-FR' | ||||||
|         response = mock.Mock(search_params=dicto) |         response = mock.Mock(search_params=dicto) | ||||||
| 
 | 
 | ||||||
|         self.assertRaises(AttributeError, subtitleseeker.response, None) |         self.assertRaises(AttributeError, subtitleseeker.response, None) | ||||||
| @ -68,6 +69,10 @@ class TestSubtitleseekerEngine(SearxTestCase): | |||||||
|         self.assertIn('1039 Subs', results[0]['content']) |         self.assertIn('1039 Subs', results[0]['content']) | ||||||
|         self.assertIn('Alternative Title', results[0]['content']) |         self.assertIn('Alternative Title', results[0]['content']) | ||||||
| 
 | 
 | ||||||
|  |         dicto['language'] = 'pt-BR' | ||||||
|  |         results = subtitleseeker.response(response) | ||||||
|  |         self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/') | ||||||
|  | 
 | ||||||
|         html = """ |         html = """ | ||||||
|         <div class="boxRows"> |         <div class="boxRows"> | ||||||
|             <div class="boxRowsInner" style="width:600px;"> |             <div class="boxRowsInner" style="width:600px;"> | ||||||
|  | |||||||
| @ -10,7 +10,7 @@ class TestSwisscowsEngine(SearxTestCase): | |||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|         dicto['language'] = 'de_DE' |         dicto['language'] = 'de-DE' | ||||||
|         params = swisscows.request(query, dicto) |         params = swisscows.request(query, dicto) | ||||||
|         self.assertTrue('url' in params) |         self.assertTrue('url' in params) | ||||||
|         self.assertTrue(query in params['url']) |         self.assertTrue(query in params['url']) | ||||||
| @ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg') |         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg') | ||||||
|         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png') |         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png') | ||||||
|         self.assertEqual(results[2]['template'], 'images.html') |         self.assertEqual(results[2]['template'], 'images.html') | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         html = """<html></html>""" | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = swisscows._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), list) | ||||||
|  |         self.assertEqual(len(languages), 0) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <html> | ||||||
|  |             <div id="regions-popup"> | ||||||
|  |                 <div> | ||||||
|  |                     <ul> | ||||||
|  |                         <li><a data-val="browser"></a></li> | ||||||
|  |                         <li><a data-val="de-CH"></a></li> | ||||||
|  |                         <li><a data-val="fr-CH"></a></li> | ||||||
|  |                     </ul> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </html> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = swisscows._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), list) | ||||||
|  |         self.assertEqual(len(languages), 3) | ||||||
|  |         self.assertIn('de-CH', languages) | ||||||
|  |         self.assertIn('fr-CH', languages) | ||||||
|  | |||||||
| @ -8,9 +8,11 @@ from searx.testing import SearxTestCase | |||||||
| class TestWikipediaEngine(SearxTestCase): | class TestWikipediaEngine(SearxTestCase): | ||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|  |         wikipedia.supported_languages = ['fr', 'en'] | ||||||
|  | 
 | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['language'] = 'fr_FR' |         dicto['language'] = 'fr-FR' | ||||||
|         params = wikipedia.request(query, dicto) |         params = wikipedia.request(query, dicto) | ||||||
|         self.assertIn('url', params) |         self.assertIn('url', params) | ||||||
|         self.assertIn(query, params['url']) |         self.assertIn(query, params['url']) | ||||||
| @ -27,6 +29,10 @@ class TestWikipediaEngine(SearxTestCase): | |||||||
|         params = wikipedia.request(query, dicto) |         params = wikipedia.request(query, dicto) | ||||||
|         self.assertIn('en', params['url']) |         self.assertIn('en', params['url']) | ||||||
| 
 | 
 | ||||||
|  |         dicto['language'] = 'xx' | ||||||
|  |         params = wikipedia.request(query, dicto) | ||||||
|  |         self.assertIn('en', params['url']) | ||||||
|  | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['language'] = 'fr' |         dicto['language'] = 'fr' | ||||||
| @ -158,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase): | |||||||
|         self.assertEqual(len(results), 2) |         self.assertEqual(len(results), 2) | ||||||
|         self.assertEqual(results[1]['infobox'], u'披頭四樂隊') |         self.assertEqual(results[1]['infobox'], u'披頭四樂隊') | ||||||
|         self.assertIn(u'披头士乐队...', results[1]['content']) |         self.assertIn(u'披头士乐队...', results[1]['content']) | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         html = u"""<html></html>""" | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = wikipedia._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), dict) | ||||||
|  |         self.assertEqual(len(languages), 0) | ||||||
|  | 
 | ||||||
|  |         html = u""" | ||||||
|  |         <html> | ||||||
|  |             <body> | ||||||
|  |                 <div> | ||||||
|  |                     <div> | ||||||
|  |                         <h3>Table header</h3> | ||||||
|  |                         <table class="sortable jquery-tablesorter"> | ||||||
|  |                             <thead> | ||||||
|  |                                 <tr> | ||||||
|  |                                     <th>N</th> | ||||||
|  |                                     <th>Language</th> | ||||||
|  |                                     <th>Language (local)</th> | ||||||
|  |                                     <th>Wiki</th> | ||||||
|  |                                     <th>Articles</th> | ||||||
|  |                                 </tr> | ||||||
|  |                             </thead> | ||||||
|  |                             <tbody> | ||||||
|  |                                 <tr> | ||||||
|  |                                     <td>2</td> | ||||||
|  |                                     <td><a>Swedish</a></td> | ||||||
|  |                                     <td><a>Svenska</a></td> | ||||||
|  |                                     <td><a>sv</a></td> | ||||||
|  |                                     <td><a><b>3000000</b></a></td> | ||||||
|  |                                 </tr> | ||||||
|  |                                 <tr> | ||||||
|  |                                     <td>3</td> | ||||||
|  |                                     <td><a>Cebuano</a></td> | ||||||
|  |                                     <td><a>Sinugboanong Binisaya</a></td> | ||||||
|  |                                     <td><a>ceb</a></td> | ||||||
|  |                                     <td><a><b>3000000</b></a></td> | ||||||
|  |                                 </tr> | ||||||
|  |                             </tbody> | ||||||
|  |                         </table> | ||||||
|  |                         <h3>Table header</h3> | ||||||
|  |                         <table class="sortable jquery-tablesorter"> | ||||||
|  |                             <thead> | ||||||
|  |                                 <tr> | ||||||
|  |                                     <th>N</th> | ||||||
|  |                                     <th>Language</th> | ||||||
|  |                                     <th>Language (local)</th> | ||||||
|  |                                     <th>Wiki</th> | ||||||
|  |                                     <th>Articles</th> | ||||||
|  |                                 </tr> | ||||||
|  |                             </thead> | ||||||
|  |                             <tbody> | ||||||
|  |                                 <tr> | ||||||
|  |                                     <td>2</td> | ||||||
|  |                                     <td><a>Norwegian (Bokmål)</a></td> | ||||||
|  |                                     <td><a>Norsk (Bokmål)</a></td> | ||||||
|  |                                     <td><a>no</a></td> | ||||||
|  |                                     <td><a><b>100000</b></a></td> | ||||||
|  |                                 </tr> | ||||||
|  |                             </tbody> | ||||||
|  |                         </table> | ||||||
|  |                     </div> | ||||||
|  |                 </div> | ||||||
|  |             </body> | ||||||
|  |         </html> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = wikipedia._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), dict) | ||||||
|  |         self.assertEqual(len(languages), 3) | ||||||
|  | 
 | ||||||
|  |         self.assertIn('sv', languages) | ||||||
|  |         self.assertIn('ceb', languages) | ||||||
|  |         self.assertIn('no', languages) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(type(languages['sv']), dict) | ||||||
|  |         self.assertEqual(type(languages['ceb']), dict) | ||||||
|  |         self.assertEqual(type(languages['no']), dict) | ||||||
|  | 
 | ||||||
|  |         self.assertIn('name', languages['sv']) | ||||||
|  |         self.assertIn('english_name', languages['sv']) | ||||||
|  |         self.assertIn('articles', languages['sv']) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(languages['sv']['name'], 'Svenska') | ||||||
|  |         self.assertEqual(languages['sv']['english_name'], 'Swedish') | ||||||
|  |         self.assertEqual(languages['sv']['articles'], 3000000) | ||||||
|  |         self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya') | ||||||
|  |         self.assertEqual(languages['ceb']['english_name'], 'Cebuano') | ||||||
|  |         self.assertEqual(languages['ceb']['articles'], 3000000) | ||||||
|  |         self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)') | ||||||
|  |         self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)') | ||||||
|  |         self.assertEqual(languages['no']['articles'], 100000) | ||||||
|  | |||||||
| @ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase): | |||||||
|         results = yahoo.response(response) |         results = yahoo.response(response) | ||||||
|         self.assertEqual(type(results), list) |         self.assertEqual(type(results), list) | ||||||
|         self.assertEqual(len(results), 0) |         self.assertEqual(len(results), 0) | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         html = """<html></html>""" | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         results = yahoo._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <html> | ||||||
|  |             <div> | ||||||
|  |                 <div id="yschlang"> | ||||||
|  |                     <span> | ||||||
|  |                         <label><input value="lang_ar"></input></label> | ||||||
|  |                     </span> | ||||||
|  |                     <span> | ||||||
|  |                         <label><input value="lang_zh_chs"></input></label> | ||||||
|  |                         <label><input value="lang_zh_cht"></input></label> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </html> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = yahoo._fetch_supported_languages(response) | ||||||
|  |         self.assertEqual(type(languages), list) | ||||||
|  |         self.assertEqual(len(languages), 3) | ||||||
|  |         self.assertIn('ar', languages) | ||||||
|  |         self.assertIn('zh-chs', languages) | ||||||
|  |         self.assertIn('zh-cht', languages) | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, | from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting, | ||||||
|                                MultipleChoiceSetting, PluginsSetting, ValidationException) |                                MultipleChoiceSetting, PluginsSetting, ValidationException) | ||||||
| from searx.testing import SearxTestCase | from searx.testing import SearxTestCase | ||||||
| 
 | 
 | ||||||
| @ -88,6 +88,27 @@ class TestSettings(SearxTestCase): | |||||||
|         setting.parse('2') |         setting.parse('2') | ||||||
|         self.assertEquals(setting.get_value(), ['2']) |         self.assertEquals(setting.get_value(), ['2']) | ||||||
| 
 | 
 | ||||||
|  |     # search language settings | ||||||
|  |     def test_lang_setting_valid_choice(self): | ||||||
|  |         setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) | ||||||
|  |         setting.parse('de') | ||||||
|  |         self.assertEquals(setting.get_value(), 'de') | ||||||
|  | 
 | ||||||
|  |     def test_lang_setting_invalid_choice(self): | ||||||
|  |         setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) | ||||||
|  |         setting.parse('xx') | ||||||
|  |         self.assertEquals(setting.get_value(), 'all') | ||||||
|  | 
 | ||||||
|  |     def test_lang_setting_old_cookie_choice(self): | ||||||
|  |         setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) | ||||||
|  |         setting.parse('es_XA') | ||||||
|  |         self.assertEquals(setting.get_value(), 'es') | ||||||
|  | 
 | ||||||
|  |     def test_lang_setting_old_cookie_format(self): | ||||||
|  |         setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) | ||||||
|  |         setting.parse('es_ES') | ||||||
|  |         self.assertEquals(setting.get_value(), 'es-ES') | ||||||
|  | 
 | ||||||
|     # plugins settings |     # plugins settings | ||||||
|     def test_plugins_setting_all_default_enabled(self): |     def test_plugins_setting_all_default_enabled(self): | ||||||
|         plugin1 = PluginStub('plugin1', True) |         plugin1 = PluginStub('plugin1', True) | ||||||
|  | |||||||
							
								
								
									
										171
									
								
								utils/fetch_languages.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								utils/fetch_languages.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,171 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  | 
 | ||||||
|  | # This script generates languages.py from intersecting each engine's supported languages. | ||||||
|  | # | ||||||
|  | # The country names are obtained from http://api.geonames.org which requires registering as a user. | ||||||
|  | # | ||||||
|  | # Output files (engines_languages.json and languages.py) | ||||||
|  | # are written in current directory to avoid overwriting in case something goes wrong. | ||||||
|  | 
 | ||||||
|  | from requests import get | ||||||
|  | from urllib import urlencode | ||||||
|  | from lxml.html import fromstring | ||||||
|  | from json import loads, dumps | ||||||
|  | import io | ||||||
|  | from sys import path | ||||||
|  | path.append('../searx')  # noqa | ||||||
|  | from searx.engines import engines | ||||||
|  | 
 | ||||||
|  | # Geonames API for country names. | ||||||
|  | geonames_user = ''  # ADD USER NAME HERE | ||||||
|  | country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}' | ||||||
|  | 
 | ||||||
|  | # Output files. | ||||||
|  | engines_languages_file = 'engines_languages.json' | ||||||
|  | languages_file = 'languages.py' | ||||||
|  | 
 | ||||||
|  | engines_languages = {} | ||||||
|  | languages = {} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # To filter out invalid codes and dialects. | ||||||
|  | def valid_code(lang_code): | ||||||
|  |     # filter invalid codes | ||||||
|  |     # sl-SL is technically not invalid, but still a mistake | ||||||
|  |     invalid_codes = ['sl-SL', 'wt-WT', 'jw'] | ||||||
|  |     invalid_countries = ['UK', 'XA', 'XL'] | ||||||
|  |     if lang_code[:2] == 'xx'\ | ||||||
|  |        or lang_code in invalid_codes\ | ||||||
|  |        or lang_code[-2:] in invalid_countries\ | ||||||
|  |        or is_dialect(lang_code): | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  |     return True | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Language codes with any additional tags other than language and country. | ||||||
|  | def is_dialect(lang_code): | ||||||
|  |     lang_code = lang_code.split('-') | ||||||
|  |     if len(lang_code) > 2 or len(lang_code[0]) > 3: | ||||||
|  |         return True | ||||||
|  |     if len(lang_code) == 2 and len(lang_code[1]) > 2: | ||||||
|  |         return True | ||||||
|  | 
 | ||||||
|  |     return False | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Get country name in specified language. | ||||||
|  | def get_country_name(locale): | ||||||
|  |     if geonames_user is '': | ||||||
|  |         return '' | ||||||
|  | 
 | ||||||
|  |     locale = locale.split('-') | ||||||
|  |     if len(locale) != 2: | ||||||
|  |         return '' | ||||||
|  | 
 | ||||||
|  |     url = country_names_url.format(parameters=urlencode({'lang': locale[0], | ||||||
|  |                                                          'country': locale[1], | ||||||
|  |                                                          'username': geonames_user})) | ||||||
|  |     response = get(url) | ||||||
|  |     json = loads(response.text) | ||||||
|  |     content = json.get('geonames', None) | ||||||
|  |     if content is None or len(content) != 1: | ||||||
|  |         print "No country name found for " + locale[0] + "-" + locale[1] | ||||||
|  |         return '' | ||||||
|  | 
 | ||||||
|  |     return content[0].get('countryName', '') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Fetchs supported languages for each engine and writes json file with those. | ||||||
|  | def fetch_supported_languages(): | ||||||
|  |     for engine_name in engines: | ||||||
|  |         if hasattr(engines[engine_name], 'fetch_supported_languages'): | ||||||
|  |             try: | ||||||
|  |                 engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() | ||||||
|  |             except Exception as e: | ||||||
|  |                 print e | ||||||
|  | 
 | ||||||
|  |     # write json file | ||||||
|  |     with io.open(engines_languages_file, "w", encoding="utf-8") as f: | ||||||
|  |         f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8"))) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Join all language lists. | ||||||
|  | # Iterate all languages supported by each engine. | ||||||
|  | def join_language_lists(): | ||||||
|  |     # include wikipedia first for more accurate language names | ||||||
|  |     languages.update({code: lang for code, lang | ||||||
|  |                       in engines_languages['wikipedia'].iteritems() | ||||||
|  |                       if valid_code(code)}) | ||||||
|  | 
 | ||||||
|  |     for engine_name in engines_languages: | ||||||
|  |         for locale in engines_languages[engine_name]: | ||||||
|  |             if not valid_code(locale): | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             # if language is not on list or if it has no name yet | ||||||
|  |             if locale not in languages or not languages[locale].get('name'): | ||||||
|  |                 if isinstance(engines_languages[engine_name], dict): | ||||||
|  |                     languages[locale] = engines_languages[engine_name][locale] | ||||||
|  |                 else: | ||||||
|  |                     languages[locale] = {} | ||||||
|  | 
 | ||||||
|  |     # get locales that have no name or country yet | ||||||
|  |     for locale in languages.keys(): | ||||||
|  |         # try to get language names | ||||||
|  |         if not languages[locale].get('name'): | ||||||
|  |             name = languages.get(locale.split('-')[0], {}).get('name', None) | ||||||
|  |             if name: | ||||||
|  |                 languages[locale]['name'] = name | ||||||
|  |             else: | ||||||
|  |                 # filter out locales with no name | ||||||
|  |                 del languages[locale] | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |         # try to get language name in english | ||||||
|  |         if not languages[locale].get('english_name'): | ||||||
|  |             languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '') | ||||||
|  | 
 | ||||||
|  |         # try to get country name | ||||||
|  |         if locale.find('-') > 0 and not languages[locale].get('country'): | ||||||
|  |             languages[locale]['country'] = get_country_name(locale) or '' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Remove countryless language if language is featured in only one country. | ||||||
|  | def filter_single_country_languages(): | ||||||
|  |     prev_lang = None | ||||||
|  |     for code in sorted(languages): | ||||||
|  |         lang = code.split('-')[0] | ||||||
|  |         if lang == prev_lang: | ||||||
|  |             countries += 1 | ||||||
|  |         else: | ||||||
|  |             if prev_lang is not None and countries == 1: | ||||||
|  |                 del languages[prev_lang] | ||||||
|  |             countries = 0 | ||||||
|  |             prev_lang = lang | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Write languages.py. | ||||||
|  | def write_languages_file(): | ||||||
|  |     new_file = open(languages_file, 'w') | ||||||
|  |     file_content = '# -*- coding: utf-8 -*-\n'\ | ||||||
|  |                    + '# list of language codes\n'\ | ||||||
|  |                    + '# this file is generated automatically by utils/update_search_languages.py\n'\ | ||||||
|  |                    + '\nlanguage_codes = (' | ||||||
|  |     for code in sorted(languages): | ||||||
|  |         file_content += '\n    (u"' + code + '"'\ | ||||||
|  |                         + ', u"' + languages[code]['name'].split(' (')[0] + '"'\ | ||||||
|  |                         + ', u"' + languages[code].get('country', '') + '"'\ | ||||||
|  |                         + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),' | ||||||
|  |     # remove last comma | ||||||
|  |     file_content = file_content[:-1] | ||||||
|  |     file_content += '\n)\n' | ||||||
|  |     new_file.write(file_content.encode('utf8')) | ||||||
|  |     new_file.close() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     fetch_supported_languages() | ||||||
|  |     join_language_lists() | ||||||
|  |     filter_single_country_languages() | ||||||
|  |     write_languages_file() | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Adam Tauber
						Adam Tauber