commit
						0cb55ddfde
					
				
							
								
								
									
										115
									
								
								searx/engines/findx.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								searx/engines/findx.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,115 @@ | |||||||
|  | """ | ||||||
|  | FindX (General, Images, Videos) | ||||||
|  | 
 | ||||||
|  | @website     https://www.findx.com | ||||||
|  | @provide-api no | ||||||
|  | @using-api   no | ||||||
|  | @results     HTML | ||||||
|  | @stable      no | ||||||
|  | @parse       url, title, content, embedded, img_src, thumbnail_src | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from dateutil import parser | ||||||
|  | from json import loads | ||||||
|  | import re | ||||||
|  | 
 | ||||||
|  | from lxml import html | ||||||
|  | 
 | ||||||
|  | from searx import logger | ||||||
|  | from searx.engines.xpath import extract_text | ||||||
|  | from searx.engines.youtube_noapi import base_youtube_url, embedded_url | ||||||
|  | from searx.url_utils import urlencode | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | paging = True | ||||||
|  | results_xpath = '//script[@id="initial-state"]' | ||||||
|  | search_url = 'https://www.findx.com/{category}?{q}' | ||||||
|  | type_map = { | ||||||
|  |     'none': 'web', | ||||||
|  |     'general': 'web', | ||||||
|  |     'images': 'images', | ||||||
|  |     'videos': 'videos', | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def request(query, params): | ||||||
|  |     params['url'] = search_url.format( | ||||||
|  |         category=type_map[params['category']], | ||||||
|  |         q=urlencode({ | ||||||
|  |             'q': query, | ||||||
|  |             'page': params['pageno'] | ||||||
|  |         }) | ||||||
|  |     ) | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def response(resp): | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  |     results_raw_json = dom.xpath(results_xpath) | ||||||
|  |     results_json = loads(extract_text(results_raw_json)) | ||||||
|  | 
 | ||||||
|  |     if len(results_json['web']['results']) > 0: | ||||||
|  |         return _general_results(results_json['web']['results']) | ||||||
|  | 
 | ||||||
|  |     if len(results_json['images']['results']) > 0: | ||||||
|  |         return _images_results(results_json['images']['results']) | ||||||
|  | 
 | ||||||
|  |     if len(results_json['video']['results']) > 0: | ||||||
|  |         return _videos_results(results_json['video']['results']) | ||||||
|  | 
 | ||||||
|  |     return [] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _general_results(general_results): | ||||||
|  |     results = [] | ||||||
|  |     for result in general_results: | ||||||
|  |         results.append({ | ||||||
|  |             'url': result['url'], | ||||||
|  |             'title': result['title'], | ||||||
|  |             'content': result['sum'], | ||||||
|  |         }) | ||||||
|  |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _images_results(image_results): | ||||||
|  |     results = [] | ||||||
|  |     for result in image_results: | ||||||
|  |         results.append({ | ||||||
|  |             'url': result['sourceURL'], | ||||||
|  |             'title': result['title'], | ||||||
|  |             'content': result['source'], | ||||||
|  |             'thumbnail_src': _extract_url(result['assets']['thumb']['url']), | ||||||
|  |             'img_src': _extract_url(result['assets']['file']['url']), | ||||||
|  |             'template': 'images.html', | ||||||
|  |         }) | ||||||
|  |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _videos_results(video_results): | ||||||
|  |     results = [] | ||||||
|  |     for result in video_results: | ||||||
|  |         if not result['kind'].startswith('youtube'): | ||||||
|  |             logger.warn('Unknown video kind in findx: {}'.format(result['kind'])) | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         description = result['snippet']['description'] | ||||||
|  |         if len(description) > 300: | ||||||
|  |             description = description[:300] + '...' | ||||||
|  | 
 | ||||||
|  |         results.append({ | ||||||
|  |             'url': base_youtube_url + result['id'], | ||||||
|  |             'title': result['snippet']['title'], | ||||||
|  |             'content': description, | ||||||
|  |             'thumbnail': _extract_url(result['snippet']['thumbnails']['default']['url']), | ||||||
|  |             'publishedDate': parser.parse(result['snippet']['publishedAt']), | ||||||
|  |             'embedded': embedded_url.format(videoid=result['id']), | ||||||
|  |             'template': 'videos.html', | ||||||
|  |         }) | ||||||
|  |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _extract_url(url): | ||||||
|  |     matching = re.search('(/https?://[^)]+)', url) | ||||||
|  |     if matching: | ||||||
|  |         return matching.group(0)[1:] | ||||||
|  |     return '' | ||||||
| @ -107,14 +107,21 @@ class RawTextQuery(object): | |||||||
|                 # check if prefix is equal with engine shortcut |                 # check if prefix is equal with engine shortcut | ||||||
|                 if prefix in engine_shortcuts: |                 if prefix in engine_shortcuts: | ||||||
|                     parse_next = True |                     parse_next = True | ||||||
|                     self.engines.append({'category': 'none', |                     engine_name = engine_shortcuts[prefix] | ||||||
|                                          'name': engine_shortcuts[prefix]}) |                     if engine_name in engines: | ||||||
|  |                         for engine_category in engines[engine_name].categories: | ||||||
|  |                             self.engines.append({'category': engine_category, | ||||||
|  |                                                  'name': engine_name, | ||||||
|  |                                                  'from_bang': True}) | ||||||
| 
 | 
 | ||||||
|                 # check if prefix is equal with engine name |                 # check if prefix is equal with engine name | ||||||
|                 elif prefix in engines: |                 elif prefix in engines: | ||||||
|                     parse_next = True |                     parse_next = True | ||||||
|                     self.engines.append({'category': 'none', |                     if prefix in engines: | ||||||
|                                          'name': prefix}) |                         for engine_category in engines[engine_name].categories: | ||||||
|  |                             self.engines.append({'category': engine_category, | ||||||
|  |                                                  'name': engine_name, | ||||||
|  |                                                  'from_bang': True}) | ||||||
| 
 | 
 | ||||||
|                 # check if prefix is equal with categorie name |                 # check if prefix is equal with categorie name | ||||||
|                 elif prefix in categories: |                 elif prefix in categories: | ||||||
|  | |||||||
| @ -258,8 +258,13 @@ def get_search_query_from_webapp(preferences, form): | |||||||
|     # if engines are calculated from query, |     # if engines are calculated from query, | ||||||
|     # set categories by using that informations |     # set categories by using that informations | ||||||
|     if query_engines and raw_text_query.specific: |     if query_engines and raw_text_query.specific: | ||||||
|         query_categories = list(set(engine['category'] |         additional_categories = set() | ||||||
|                                     for engine in query_engines)) |         for engine in query_engines: | ||||||
|  |             if 'from_bang' in engine and engine['from_bang']: | ||||||
|  |                 additional_categories.add('none') | ||||||
|  |             else: | ||||||
|  |                 additional_categories.add(engine['category']) | ||||||
|  |         query_categories = list(additional_categories) | ||||||
| 
 | 
 | ||||||
|     # otherwise, using defined categories to |     # otherwise, using defined categories to | ||||||
|     # calculate which engines should be used |     # calculate which engines should be used | ||||||
|  | |||||||
| @ -218,6 +218,24 @@ engines: | |||||||
|     shortcut : fd |     shortcut : fd | ||||||
|     disabled : True |     disabled : True | ||||||
| 
 | 
 | ||||||
|  |   - name : findx | ||||||
|  |     engine : findx | ||||||
|  |     shortcut : fx | ||||||
|  |     categories : general | ||||||
|  |     disabled : True | ||||||
|  | 
 | ||||||
|  |   - name : findx images | ||||||
|  |     engine : findx | ||||||
|  |     shortcut : fxi | ||||||
|  |     categories : images | ||||||
|  |     disabled : True | ||||||
|  | 
 | ||||||
|  |   - name : findx videos | ||||||
|  |     engine : findx | ||||||
|  |     shortcut : fxv | ||||||
|  |     categories : videos | ||||||
|  |     disabled : True | ||||||
|  | 
 | ||||||
|   - name : flickr |   - name : flickr | ||||||
|     categories : images |     categories : images | ||||||
|     shortcut : fl |     shortcut : fl | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Adam Tauber
						Adam Tauber