[pylint] Bing (Web) engine
Fix remarks from pylint and improved code-style. In preparation for a bug-fix of the Bing (Web) engine I add this engine to the pylint-list. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									4da1e0026c
								
							
						
					
					
						commit
						b2177e5916
					
				| @ -1,6 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Bing (Web) | ||||
| # lint: pylint | ||||
| """Bing (Web) | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| @ -8,7 +8,6 @@ from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.utils import eval_xpath, extract_text, match_language | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.bing.com', | ||||
|     "wikidata_id": 'Q182496', | ||||
| @ -21,6 +20,8 @@ about = { | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = True | ||||
| time_range_support = False | ||||
| safesearch = False | ||||
| supported_languages_url = 'https://www.bing.com/account/general' | ||||
| language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'} | ||||
| 
 | ||||
| @ -28,71 +29,80 @@ language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'} | ||||
| base_url = 'https://www.bing.com/' | ||||
| search_string = 'search?{query}&first={offset}' | ||||
| 
 | ||||
| 
 | ||||
| def _get_offset_from_pageno(pageno): | ||||
|     return (pageno - 1) * 10 + 1 | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = _get_offset_from_pageno(params.get('pageno', 0)) | ||||
| 
 | ||||
|     offset = _get_offset_from_pageno(params.get('pageno', 0)) | ||||
|     if params['language'] == 'all': | ||||
|         lang = 'EN' | ||||
|     else: | ||||
|         lang = match_language(params['language'], supported_languages, language_aliases) | ||||
|         lang = match_language( | ||||
|             params['language'], supported_languages, language_aliases | ||||
|         ) | ||||
| 
 | ||||
|     query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) | ||||
|     query = 'language:{} {}'.format( | ||||
|         lang.split('-')[0].upper(), query | ||||
|     ) | ||||
| 
 | ||||
|     search_path = search_string.format( | ||||
|         query=urlencode({'q': query}), | ||||
|         offset=offset) | ||||
| 
 | ||||
|         query = urlencode({'q': query}), | ||||
|         offset = offset) | ||||
|     params['url'] = base_url + search_path | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
| 
 | ||||
|     results = [] | ||||
|     result_len = 0 | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
|     # parse results | ||||
| 
 | ||||
|     for result in eval_xpath(dom, '//div[@class="sa_cc"]'): | ||||
| 
 | ||||
|         link = eval_xpath(result, './/h3/a')[0] | ||||
|         url = link.attrib.get('href') | ||||
|         title = extract_text(link) | ||||
|         content = extract_text(eval_xpath(result, './/p')) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|         results.append({ | ||||
|             'url': url, | ||||
|             'title': title, | ||||
|                         'content': content}) | ||||
|             'content': content | ||||
|         }) | ||||
| 
 | ||||
|     # parse results again if nothing is found yet | ||||
|     for result in eval_xpath(dom, '//li[@class="b_algo"]'): | ||||
| 
 | ||||
|         link = eval_xpath(result, './/h2/a')[0] | ||||
|         url = link.attrib.get('href') | ||||
|         title = extract_text(link) | ||||
|         content = extract_text(eval_xpath(result, './/p')) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|         results.append({ | ||||
|             'url': url, | ||||
|             'title': title, | ||||
|                         'content': content}) | ||||
|             'content': content | ||||
|         }) | ||||
| 
 | ||||
|     try: | ||||
|         result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()')) | ||||
|         if "-" in result_len_container: | ||||
| 
 | ||||
|             # Remove the part "from-to" for paginated request ... | ||||
|             result_len_container = result_len_container[result_len_container.find("-") * 2 + 2:] | ||||
| 
 | ||||
|         result_len_container = re.sub('[^0-9]', '', result_len_container) | ||||
| 
 | ||||
|         if len(result_len_container) > 0: | ||||
|             result_len = int(result_len_container) | ||||
|     except Exception as e: | ||||
| 
 | ||||
|     except Exception as e:  # pylint: disable=broad-except | ||||
|         logger.debug('result error :\n%s', e) | ||||
| 
 | ||||
|     if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len: | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Markus Heiser
						Markus Heiser