Bing news' unit test
I have no idea why coverage tell 97% and 2 misses in branches. If anyone has an idea...
This commit is contained in:
		
							parent
							
								
									dad0434f34
								
							
						
					
					
						commit
						efde2c21c8
					
				| @ -15,6 +15,7 @@ from lxml import html | |||||||
| from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||||
| from dateutil import parser | from dateutil import parser | ||||||
| import re | import re | ||||||
|  | from searx.engines.xpath import extract_text | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['news'] | categories = ['news'] | ||||||
| @ -42,6 +43,7 @@ def request(query, params): | |||||||
|     params['cookies']['_FP'] = "ui=en-US" |     params['cookies']['_FP'] = "ui=en-US" | ||||||
| 
 | 
 | ||||||
|     params['url'] = base_url + search_path |     params['url'] = base_url + search_path | ||||||
|  | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -55,44 +57,37 @@ def response(resp): | |||||||
|     for result in dom.xpath('//div[@class="sn_r"]'): |     for result in dom.xpath('//div[@class="sn_r"]'): | ||||||
|         link = result.xpath('.//div[@class="newstitle"]/a')[0] |         link = result.xpath('.//div[@class="newstitle"]/a')[0] | ||||||
|         url = link.attrib.get('href') |         url = link.attrib.get('href') | ||||||
|         title = ' '.join(link.xpath('.//text()')) |         title = extract_text(link) | ||||||
|         contentXPath = result.xpath('.//div[@class="sn_txt"]/div' |         contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]') | ||||||
|                                     '//span[@class="sn_snip"]//text()') |  | ||||||
|         if contentXPath is not None: |         if contentXPath is not None: | ||||||
|             content = escape(' '.join(contentXPath)) |             content = escape(extract_text(contentXPath)) | ||||||
| 
 | 
 | ||||||
|         # parse publishedDate |         # parse publishedDate | ||||||
|         publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div' |         publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div' | ||||||
|                                           '//span[contains(@class,"sn_ST")]' |                                           '//span[contains(@class,"sn_ST")]' | ||||||
|                                           '//span[contains(@class,"sn_tm")]' |                                           '//span[contains(@class,"sn_tm")]') | ||||||
|                                           '//text()') | 
 | ||||||
|         if publishedDateXPath is not None: |         if publishedDateXPath is not None: | ||||||
|             publishedDate = escape(' '.join(publishedDateXPath)) |             publishedDate = escape(extract_text(publishedDateXPath)) | ||||||
| 
 | 
 | ||||||
|         if re.match("^[0-9]+ minute(s|) ago$", publishedDate): |         if re.match("^[0-9]+ minute(s|) ago$", publishedDate): | ||||||
|             timeNumbers = re.findall(r'\d+', publishedDate) |             timeNumbers = re.findall(r'\d+', publishedDate) | ||||||
|             publishedDate = datetime.now()\ |             publishedDate = datetime.now() - timedelta(minutes=int(timeNumbers[0])) | ||||||
|                 - timedelta(minutes=int(timeNumbers[0])) |  | ||||||
|         elif re.match("^[0-9]+ hour(s|) ago$", publishedDate): |         elif re.match("^[0-9]+ hour(s|) ago$", publishedDate): | ||||||
|             timeNumbers = re.findall(r'\d+', publishedDate) |             timeNumbers = re.findall(r'\d+', publishedDate) | ||||||
|             publishedDate = datetime.now()\ |             publishedDate = datetime.now() - timedelta(hours=int(timeNumbers[0])) | ||||||
|                 - timedelta(hours=int(timeNumbers[0])) |         elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): | ||||||
|         elif re.match("^[0-9]+ hour(s|)," |  | ||||||
|                       " [0-9]+ minute(s|) ago$", publishedDate): |  | ||||||
|             timeNumbers = re.findall(r'\d+', publishedDate) |             timeNumbers = re.findall(r'\d+', publishedDate) | ||||||
|             publishedDate = datetime.now()\ |             publishedDate = datetime.now()\ | ||||||
|                 - timedelta(hours=int(timeNumbers[0]))\ |                 - timedelta(hours=int(timeNumbers[0]))\ | ||||||
|                 - timedelta(minutes=int(timeNumbers[1])) |                 - timedelta(minutes=int(timeNumbers[1])) | ||||||
|         elif re.match("^[0-9]+ day(s|) ago$", publishedDate): |         elif re.match("^[0-9]+ day(s|) ago$", publishedDate): | ||||||
|             timeNumbers = re.findall(r'\d+', publishedDate) |             timeNumbers = re.findall(r'\d+', publishedDate) | ||||||
|             publishedDate = datetime.now()\ |             publishedDate = datetime.now() - timedelta(days=int(timeNumbers[0])) | ||||||
|                 - timedelta(days=int(timeNumbers[0])) |  | ||||||
|         else: |         else: | ||||||
|             try: |             try: | ||||||
|                 # FIXME use params['language'] to parse either mm/dd or dd/mm |  | ||||||
|                 publishedDate = parser.parse(publishedDate, dayfirst=False) |                 publishedDate = parser.parse(publishedDate, dayfirst=False) | ||||||
|             except TypeError: |             except TypeError: | ||||||
|                 # FIXME |  | ||||||
|                 publishedDate = datetime.now() |                 publishedDate = datetime.now() | ||||||
| 
 | 
 | ||||||
|         # append result |         # append result | ||||||
|  | |||||||
							
								
								
									
										236
									
								
								searx/tests/engines/test_bing_news.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										236
									
								
								searx/tests/engines/test_bing_news.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,236 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import bing_news | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestBingNewsEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dicto = defaultdict(dict) | ||||||
|  |         dicto['pageno'] = 1 | ||||||
|  |         dicto['language'] = 'fr_FR' | ||||||
|  |         params = bing_news.request(query, dicto) | ||||||
|  |         self.assertIn('url', params) | ||||||
|  |         self.assertIn(query, params['url']) | ||||||
|  |         self.assertIn('bing.com', params['url']) | ||||||
|  |         self.assertIn('fr', params['url']) | ||||||
|  |         self.assertIn('_FP', params['cookies']) | ||||||
|  |         self.assertIn('en', params['cookies']['_FP']) | ||||||
|  | 
 | ||||||
|  |         dicto['language'] = 'all' | ||||||
|  |         params = bing_news.request(query, dicto) | ||||||
|  |         self.assertIn('en', params['url']) | ||||||
|  |         self.assertIn('_FP', params['cookies']) | ||||||
|  |         self.assertIn('en', params['cookies']['_FP']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         self.assertRaises(AttributeError, bing_news.response, None) | ||||||
|  |         self.assertRaises(AttributeError, bing_news.response, []) | ||||||
|  |         self.assertRaises(AttributeError, bing_news.response, '') | ||||||
|  |         self.assertRaises(AttributeError, bing_news.response, '[]') | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(content='<html></html>') | ||||||
|  |         self.assertEqual(bing_news.response(response), []) | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(content='<html></html>') | ||||||
|  |         self.assertEqual(bing_news.response(response), []) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <div class="sn_r"> | ||||||
|  |             <div class="newstitle"> | ||||||
|  |                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                     Title | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_img"> | ||||||
|  |                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_txt"> | ||||||
|  |                 <div class="sn_oi"> | ||||||
|  |                     <span class="sn_snip">Article Content</span> | ||||||
|  |                     <span class="sn_ST"> | ||||||
|  |                         <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                          ·  | ||||||
|  |                         <span class="sn_tm">44 minutes ago</span> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(content=html) | ||||||
|  |         results = bing_news.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertEqual(results[0]['title'], 'Title') | ||||||
|  |         self.assertEqual(results[0]['url'], 'http://url.of.article/') | ||||||
|  |         self.assertEqual(results[0]['content'], 'Article Content') | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <div class="sn_r"> | ||||||
|  |             <div class="newstitle"> | ||||||
|  |                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                     Title | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_img"> | ||||||
|  |                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_txt"> | ||||||
|  |                 <div class="sn_oi"> | ||||||
|  |                     <span class="sn_snip">Article Content</span> | ||||||
|  |                     <span class="sn_ST"> | ||||||
|  |                         <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                          ·  | ||||||
|  |                         <span class="sn_tm">44 minutes ago</span> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="sn_r"> | ||||||
|  |             <div class="newstitle"> | ||||||
|  |                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                     Title | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_img"> | ||||||
|  |                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_txt"> | ||||||
|  |                 <div class="sn_oi"> | ||||||
|  |                     <span class="sn_snip">Article Content</span> | ||||||
|  |                     <span class="sn_ST"> | ||||||
|  |                         <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                          ·  | ||||||
|  |                         <span class="sn_tm">3 hours, 44 minutes ago</span> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="sn_r"> | ||||||
|  |             <div class="newstitle"> | ||||||
|  |                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                     Title | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_img"> | ||||||
|  |                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_txt"> | ||||||
|  |                 <div class="sn_oi"> | ||||||
|  |                     <span class="sn_snip">Article Content</span> | ||||||
|  |                     <span class="sn_ST"> | ||||||
|  |                         <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                          ·  | ||||||
|  |                         <span class="sn_tm">44 hours ago</span> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="sn_r"> | ||||||
|  |             <div class="newstitle"> | ||||||
|  |                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                     Title | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_img"> | ||||||
|  |                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_txt"> | ||||||
|  |                 <div class="sn_oi"> | ||||||
|  |                     <span class="sn_snip">Article Content</span> | ||||||
|  |                     <span class="sn_ST"> | ||||||
|  |                         <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                          ·  | ||||||
|  |                         <span class="sn_tm">2 days ago</span> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="sn_r"> | ||||||
|  |             <div class="newstitle"> | ||||||
|  |                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                     Title | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_img"> | ||||||
|  |                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_txt"> | ||||||
|  |                 <div class="sn_oi"> | ||||||
|  |                     <span class="sn_snip">Article Content</span> | ||||||
|  |                     <span class="sn_ST"> | ||||||
|  |                         <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                          ·  | ||||||
|  |                         <span class="sn_tm">27/01/2015</span> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="sn_r"> | ||||||
|  |             <div class="newstitle"> | ||||||
|  |                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                     Title | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_img"> | ||||||
|  |                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |                 </a> | ||||||
|  |             </div> | ||||||
|  |             <div class="sn_txt"> | ||||||
|  |                 <div class="sn_oi"> | ||||||
|  |                     <span class="sn_snip">Article Content</span> | ||||||
|  |                     <span class="sn_ST"> | ||||||
|  |                         <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                          ·  | ||||||
|  |                         <span class="sn_tm">Il y a 3 heures</span> | ||||||
|  |                     </span> | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(content=html) | ||||||
|  |         results = bing_news.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 6) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <div class="newstitle"> | ||||||
|  |             <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||||
|  |                 Title | ||||||
|  |             </a> | ||||||
|  |         </div> | ||||||
|  |         <div class="sn_img"> | ||||||
|  |             <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||||
|  |                 <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||||
|  |             </a> | ||||||
|  |         </div> | ||||||
|  |         <div class="sn_txt"> | ||||||
|  |             <div class="sn_oi"> | ||||||
|  |                 <span class="sn_snip">Article Content</span> | ||||||
|  |                 <span class="sn_ST"> | ||||||
|  |                     <cite class="sn_src">metronews.fr</cite> | ||||||
|  |                      ·  | ||||||
|  |                     <span class="sn_tm">44 minutes ago</span> | ||||||
|  |                 </span> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(content=html) | ||||||
|  |         results = bing_news.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
| @ -1,5 +1,6 @@ | |||||||
| from searx.tests.engines.test_bing import *  # noqa | from searx.tests.engines.test_bing import *  # noqa | ||||||
| from searx.tests.engines.test_bing_images import *  # noqa | from searx.tests.engines.test_bing_images import *  # noqa | ||||||
|  | from searx.tests.engines.test_bing_news import *  # noqa | ||||||
| from searx.tests.engines.test_dailymotion import *  # noqa | from searx.tests.engines.test_dailymotion import *  # noqa | ||||||
| from searx.tests.engines.test_deezer import *  # noqa | from searx.tests.engines.test_deezer import *  # noqa | ||||||
| from searx.tests.engines.test_deviantart import *  # noqa | from searx.tests.engines.test_deviantart import *  # noqa | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Cqoicebordel
						Cqoicebordel