Merge pull request #348 from pointhi/swisscows
implement swisscows engine
This commit is contained in:
		
						commit
						9c7578bab6
					
				
							
								
								
									
										108
									
								
								searx/engines/swisscows.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								searx/engines/swisscows.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,108 @@ | ||||
| """ | ||||
|  Swisscows (Web, Images) | ||||
| 
 | ||||
|  @website     https://swisscows.ch | ||||
|  @provide-api no | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib import urlencode, unquote | ||||
| import re | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general', 'images'] | ||||
| paging = True | ||||
| language_support = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://swisscows.ch/' | ||||
| search_string = '?{query}&page={page}' | ||||
| 
 | ||||
| # regex | ||||
| regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment') | ||||
| regex_json_remove_start = re.compile('^initialData:\s*') | ||||
| regex_json_remove_end = re.compile(',\s*environment$') | ||||
| regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=') | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     if params['language'] == 'all': | ||||
|         ui_language = 'browser' | ||||
|         region = 'browser' | ||||
|     else: | ||||
|         region = params['language'].replace('_', '-') | ||||
|         ui_language = params['language'].split('_')[0] | ||||
| 
 | ||||
|     search_path = search_string.format( | ||||
|         query=urlencode({'query': query, | ||||
|                          'uiLanguage': ui_language, | ||||
|                          'region': region}), | ||||
|         page=params['pageno']) | ||||
| 
 | ||||
|     # image search query is something like 'image?{query}&page={page}' | ||||
|     if params['category'] == 'images': | ||||
|         search_path = 'image' + search_path | ||||
| 
 | ||||
|     params['url'] = base_url + search_path | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     json_regex = regex_json.search(resp.content) | ||||
| 
 | ||||
|     # check if results are returned | ||||
|     if not json_regex: | ||||
|         return [] | ||||
| 
 | ||||
|     json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group())) | ||||
|     json = loads(json_raw) | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in json['Results'].get('items', []): | ||||
|         result_title = result['Title'].replace(u'\uE000', '').replace(u'\uE001', '') | ||||
| 
 | ||||
|         # parse image results | ||||
|         if result.get('ContentType', '').startswith('image'): | ||||
|             img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) | ||||
| 
 | ||||
|             # append result | ||||
|             results.append({'url': result['SourceUrl'], | ||||
|                             'title': result['Title'], | ||||
|                             'content': '', | ||||
|                             'img_src': img_url, | ||||
|                             'template': 'images.html'}) | ||||
| 
 | ||||
|         # parse general results | ||||
|         else: | ||||
|             result_url = result['Url'].replace(u'\uE000', '').replace(u'\uE001', '') | ||||
|             result_content = result['Description'].replace(u'\uE000', '').replace(u'\uE001', '') | ||||
| 
 | ||||
|             # append result | ||||
|             results.append({'url': result_url, | ||||
|                             'title': result_title, | ||||
|                             'content': result_content}) | ||||
| 
 | ||||
|     # parse images | ||||
|     for result in json.get('Images', []): | ||||
|         # decode image url | ||||
|         img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': result['SourceUrl'], | ||||
|                         'title': result['Title'], | ||||
|                         'content': '', | ||||
|                         'img_src': img_url, | ||||
|                         'template': 'images.html'}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
| @ -213,6 +213,10 @@ engines: | ||||
|     timeout : 6.0 | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : swisscows | ||||
|     engine : swisscows | ||||
|     shortcut : sw | ||||
| 
 | ||||
|   - name : twitter | ||||
|     engine : twitter | ||||
|     shortcut : tw | ||||
|  | ||||
							
								
								
									
										124
									
								
								searx/tests/engines/test_swisscows.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								searx/tests/engines/test_swisscows.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,124 @@ | ||||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import swisscows | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestSwisscowsEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 1 | ||||
|         dicto['language'] = 'de_DE' | ||||
|         params = swisscows.request(query, dicto) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('swisscows.ch' in params['url']) | ||||
|         self.assertTrue('uiLanguage=de' in params['url']) | ||||
|         self.assertTrue('region=de-DE' in params['url']) | ||||
| 
 | ||||
|         dicto['language'] = 'all' | ||||
|         params = swisscows.request(query, dicto) | ||||
|         self.assertTrue('uiLanguage=browser' in params['url']) | ||||
|         self.assertTrue('region=browser' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         self.assertRaises(AttributeError, swisscows.response, None) | ||||
|         self.assertRaises(AttributeError, swisscows.response, []) | ||||
|         self.assertRaises(AttributeError, swisscows.response, '') | ||||
|         self.assertRaises(AttributeError, swisscows.response, '[]') | ||||
| 
 | ||||
|         response = mock.Mock(content='<html></html>') | ||||
|         self.assertEqual(swisscows.response(response), []) | ||||
| 
 | ||||
|         response = mock.Mock(content='<html></html>') | ||||
|         self.assertEqual(swisscows.response(response), []) | ||||
| 
 | ||||
|         html = u""" | ||||
|         <script> | ||||
|             App.Dispatcher.dispatch("initialize", { | ||||
|                 html5history: true, | ||||
|                 initialData: {"Request": | ||||
|                     {"Page":1, | ||||
|                     "ItemsCount":1, | ||||
|                     "Query":"This should ", | ||||
|                     "NormalizedQuery":"This should ", | ||||
|                     "Region":"de-AT", | ||||
|                     "UILanguage":"de"}, | ||||
|                     "Results":{"items":[ | ||||
|                             {"Title":"\uE000This should\uE001 be the title", | ||||
|                             "Description":"\uE000This should\uE001 be the content.", | ||||
|                             "Url":"http://this.should.be.the.link/", | ||||
|                             "DisplayUrl":"www.\uE000this.should.be.the\uE001.link", | ||||
|                             "Id":"782ef287-e439-451c-b380-6ebc14ba033d"}, | ||||
|                             {"Title":"Datei:This should1.svg", | ||||
|                             "Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should1.png", | ||||
|                             "SourceUrl":"http://de.wikipedia.org/wiki/Datei:This should1.svg", | ||||
|                             "DisplayUrl":"de.wikipedia.org/wiki/Datei:This should1.svg", | ||||
|                             "Width":950, | ||||
|                             "Height":534, | ||||
|                             "FileSize":92100, | ||||
|                             "ContentType":"image/jpeg", | ||||
|                             "Thumbnail":{ | ||||
|                                 "Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should1.png", | ||||
|                                 "ContentType":"image/jpeg", | ||||
|                                 "Width":300, | ||||
|                                 "Height":168, | ||||
|                                 "FileSize":9134}, | ||||
|                                 "Id":"6a97a542-8f65-425f-b7f6-1178c3aba7be" | ||||
|                             } | ||||
|                         ],"TotalCount":55300, | ||||
|                         "Query":"This should " | ||||
|                     }, | ||||
|                     "Images":[{"Title":"Datei:This should.svg", | ||||
|                         "Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should.png", | ||||
|                         "SourceUrl":"http://de.wikipedia.org/wiki/Datei:This should.svg", | ||||
|                         "DisplayUrl":"de.wikipedia.org/wiki/Datei:This should.svg", | ||||
|                         "Width":1280, | ||||
|                         "Height":677, | ||||
|                         "FileSize":50053, | ||||
|                         "ContentType":"image/png", | ||||
|                         "Thumbnail":{"Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should.png", | ||||
|                             "ContentType":"image/png", | ||||
|                             "Width":300, | ||||
|                             "Height":158, | ||||
|                             "FileSize":8023}, | ||||
|                         "Id":"ae230fd8-a06a-47d6-99d5-e74766d8143a"}]}, | ||||
|                 environment: "production" | ||||
|             }).then(function (options) { | ||||
|                 $('#Search_Form').on('submit', function (e) { | ||||
|                     if (!Modernizr.history) return; | ||||
|                     e.preventDefault(); | ||||
| 
 | ||||
|                     var $form = $(this), | ||||
|                         $query = $('#Query'), | ||||
|                         query = $.trim($query.val()), | ||||
|                         path = App.Router.makePath($form.attr('action'), null, $form.serializeObject()) | ||||
| 
 | ||||
|                     if (query.length) { | ||||
|                         options.html5history ? | ||||
|                             ReactRouter.HistoryLocation.push(path) : | ||||
|                             ReactRouter.RefreshLocation.push(path); | ||||
|                     } | ||||
|                     else $('#Query').trigger('blur'); | ||||
|                 }); | ||||
| 
 | ||||
|             }); | ||||
|         </script> | ||||
|         """ | ||||
|         response = mock.Mock(content=html) | ||||
|         results = swisscows.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 3) | ||||
|         self.assertEqual(results[0]['title'], 'This should be the title') | ||||
|         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') | ||||
|         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||
|         self.assertEqual(results[1]['title'], 'Datei:This should1.svg') | ||||
|         self.assertEqual(results[1]['url'], 'http://de.wikipedia.org/wiki/Datei:This should1.svg') | ||||
|         self.assertEqual(results[1]['img_src'], 'http://ts2.mm.This/should1.png') | ||||
|         self.assertEqual(results[1]['template'], 'images.html') | ||||
|         self.assertEqual(results[2]['title'], 'Datei:This should.svg') | ||||
|         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg') | ||||
|         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png') | ||||
|         self.assertEqual(results[2]['template'], 'images.html') | ||||
| @ -32,6 +32,7 @@ from searx.tests.engines.test_spotify import *  # noqa | ||||
| from searx.tests.engines.test_stackoverflow import *  # noqa | ||||
| from searx.tests.engines.test_startpage import *  # noqa | ||||
| from searx.tests.engines.test_subtitleseeker import *  # noqa | ||||
| from searx.tests.engines.test_swisscows import *  # noqa | ||||
| from searx.tests.engines.test_twitter import *  # noqa | ||||
| from searx.tests.engines.test_vimeo import *  # noqa | ||||
| from searx.tests.engines.test_www1x import *  # noqa | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Adam Tauber
						Adam Tauber