[enh] yandex engine added
This commit is contained in:
		
							parent
							
								
									357fc47811
								
							
						
					
					
						commit
						fafc564874
					
				
							
								
								
									
										55
									
								
								searx/engines/yandex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								searx/engines/yandex.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,55 @@ | |||||||
|  | """ | ||||||
|  |  Yahoo (Web) | ||||||
|  | 
 | ||||||
|  |  @website     https://yandex.ru/ | ||||||
|  |  @provide-api ? | ||||||
|  |  @using-api   no | ||||||
|  |  @results     HTML (using search portal) | ||||||
|  |  @stable      no (HTML can change) | ||||||
|  |  @parse       url, title, content | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from urllib import urlencode | ||||||
|  | from lxml import html | ||||||
|  | from searx.search import logger | ||||||
|  | 
 | ||||||
|  | logger = logger.getChild('yandex engine') | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['general'] | ||||||
|  | paging = True | ||||||
|  | language_support = True  # TODO | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'https://yandex.ru/' | ||||||
|  | search_url = 'search/?{query}&p={page}' | ||||||
|  | 
 | ||||||
|  | results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]' | ||||||
|  | url_xpath = './/h2/a/@href' | ||||||
|  | title_xpath = './/h2/a//text()' | ||||||
|  | content_xpath = './/div[@class="serp-item__text"]//text()' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def request(query, params): | ||||||
|  |     params['url'] = base_url + search_url.format(page=params['pageno']-1, | ||||||
|  |                                                  query=urlencode({'text': query})) | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     for result in dom.xpath(results_xpath): | ||||||
|  |         try: | ||||||
|  |             res = {'url': result.xpath(url_xpath)[0], | ||||||
|  |                    'title': ''.join(result.xpath(title_xpath)), | ||||||
|  |                    'content': ''.join(result.xpath(content_xpath))} | ||||||
|  |         except: | ||||||
|  |             logger.exception('yandex parse crash') | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         results.append(res) | ||||||
|  | 
 | ||||||
|  |     return results | ||||||
| @ -274,6 +274,11 @@ engines: | |||||||
|     engine : yahoo |     engine : yahoo | ||||||
|     shortcut : yh |     shortcut : yh | ||||||
| 
 | 
 | ||||||
|  |   - name : yandex | ||||||
|  |     engine : yandex | ||||||
|  |     shortcut : ya | ||||||
|  |     disabled : True | ||||||
|  | 
 | ||||||
|   - name : yahoo news |   - name : yahoo news | ||||||
|     engine : yahoo_news |     engine : yahoo_news | ||||||
|     shortcut : yhn |     shortcut : yhn | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Adam Tauber
						Adam Tauber