[fix] bing images
This commit is contained in:
		
							parent
							
								
									c0bb89fd46
								
							
						
					
					
						commit
						9ee8e552da
					
				| @ -31,7 +31,6 @@ time_range_support = True | |||||||
| base_url = 'https://www.bing.com/' | base_url = 'https://www.bing.com/' | ||||||
| search_string = 'images/search?{query}&count=10&first={offset}' | search_string = 'images/search?{query}&count=10&first={offset}' | ||||||
| time_range_string = '&qft=+filterui:age-lt{interval}' | time_range_string = '&qft=+filterui:age-lt{interval}' | ||||||
| thumb_url = "https://www.bing.com/th?id={ihk}" |  | ||||||
| time_range_dict = {'day': '1440', | time_range_dict = {'day': '1440', | ||||||
|                    'week': '10080', |                    'week': '10080', | ||||||
|                    'month': '43200', |                    'month': '43200', | ||||||
| @ -78,30 +77,32 @@ def response(resp): | |||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     # parse results |     # parse results | ||||||
|     for result in dom.xpath('//div[@class="dg_u"]/div'): |     for result in dom.xpath('//div[@id="mmComponent_images_1"]/ul/li/div/div[@class="imgpt"]'): | ||||||
|         link = result.xpath('./a')[0] |         link = result.xpath('./a')[0] | ||||||
| 
 | 
 | ||||||
|  |         # TODO find actual title | ||||||
|  |         title = link.xpath('.//img/@alt')[0] | ||||||
|  | 
 | ||||||
|         # parse json-data (it is required to add a space, to make it parsable) |         # parse json-data (it is required to add a space, to make it parsable) | ||||||
|         json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('m'))) |         json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('m'))) | ||||||
| 
 | 
 | ||||||
|         title = link.attrib.get('t1') |         url = json_data.get('purl') | ||||||
|         ihk = link.attrib.get('ihk') |         img_src = json_data.get('murl') | ||||||
| 
 | 
 | ||||||
|         # url = 'http://' + link.attrib.get('t3') |         thumb_json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('mad'))) | ||||||
|         url = json_data.get('surl') |         thumbnail = thumb_json_data.get('turl') | ||||||
|         img_src = json_data.get('imgurl') |  | ||||||
| 
 | 
 | ||||||
|         # append result |         # append result | ||||||
|         results.append({'template': 'images.html', |         results.append({'template': 'images.html', | ||||||
|                         'url': url, |                         'url': url, | ||||||
|                         'title': title, |                         'title': title, | ||||||
|                         'content': '', |                         'content': '', | ||||||
|                         'thumbnail_src': thumb_url.format(ihk=ihk), |                         'thumbnail_src': thumbnail, | ||||||
|                         'img_src': img_src}) |                         'img_src': img_src}) | ||||||
| 
 | 
 | ||||||
|         # TODO stop parsing if 10 images are found |         # TODO stop parsing if 10 images are found | ||||||
|         if len(results) >= 10: |         # if len(results) >= 10: | ||||||
|             break |         #     break | ||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | |||||||
| @ -39,85 +39,46 @@ class TestBingImagesEngine(SearxTestCase): | |||||||
|         self.assertEqual(bing_images.response(response), []) |         self.assertEqual(bing_images.response(response), []) | ||||||
| 
 | 
 | ||||||
|         html = """ |         html = """ | ||||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> |         <div id="mmComponent_images_1"> | ||||||
|             <a href="/images/search?q=south&view=detailv2&&id=7E92863981CCFB89FBDD55205C742DFDA3290CF6&selectedIndex=9&ccid=vzvIfv5u&simid=608055786735667000&thid=OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0" ihk="OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0" t1="South Carolina" t2="747 x 589 · 29 kB · gif" t3="www.digital-topo-maps.com/county-map/south-carolina.shtml" hh="236" hw="300" m='{ns:"images",k:"5117",mid:"7E92863981CCFB89FBDD55205C742DFDA3290CF6",md5:"bf3bc87efe6e0e476be8cc34bf6cd80e",surl:"http://www.digital-topo-maps.com/county-map/south-carolina.shtml",imgurl:"http://www.digital-topo-maps.com/county-map/south-carolina-county-map.gif",tid:"OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0",ow:"480",docid:"608055786735667000",oh:"378",tft:"45"}' mid="7E92863981CCFB89FBDD55205C742DFDA3290CF6" h="ID=images,5117.1"> |             <ul> | ||||||
|                 <img class="img_hid" src2="https://tse4.mm.bing.net/th?id=OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0&w=210&h=154&c=7&rs=1&qlt=90&o=4&pid=1.1" style="width:210px;height:154px;" width="210" height="154"> |                 <li> | ||||||
|             </a> |                     <div> | ||||||
| 
 |                         <div class="imgpt"> | ||||||
|         </div></div> |                             <a m='{"purl":"page_url","murl":"img_url"}' mad='{"turl":"thumb_url"}'> | ||||||
|         """  # noqa |                                 <img src="" alt="alt text" /> | ||||||
|         html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') |                             </a> | ||||||
|         response = mock.Mock(text=html) |                         </div> | ||||||
|         results = bing_images.response(response) |                         <div></div> | ||||||
|         self.assertEqual(type(results), list) |                     </div> | ||||||
|         self.assertEqual(len(results), 1) |                     <div> | ||||||
|         self.assertEqual(results[0]['title'], 'South Carolina') |                         <div class="imgpt"> | ||||||
|         self.assertEqual(results[0]['url'], |                             <a m='{"purl":"page_url2","murl":"img_url2"}' mad='{"turl":"thumb_url2"}'> | ||||||
|                          'http://www.digital-topo-maps.com/county-map/south-carolina.shtml') |                                 <img src="" alt="alt text 2" /> | ||||||
|         self.assertEqual(results[0]['content'], '') |                             </a> | ||||||
|         self.assertEqual(results[0]['thumbnail_src'], |                         </div> | ||||||
|                          'https://www.bing.com/th?id=OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0') |                     </div> | ||||||
|         self.assertEqual(results[0]['img_src'], |                 </li> | ||||||
|                          'http://www.digital-topo-maps.com/county-map/south-carolina-county-map.gif') |             </ul> | ||||||
| 
 |             <ul> | ||||||
|         html = """ |                 <li> | ||||||
|         <a href="#" ihk="HN.608003696942779811" |                     <div> | ||||||
|             m="{ns:"images",k:"5045", |                         <div class="imgpt"> | ||||||
|             mid:"59EB92C317974F34517A1CCAEBEF76A578E08DEE", |                             <a m='{"purl":"page_url3","murl":"img_url3"}' mad='{"turl":"thumb_url3"}'> | ||||||
|             surl:"http://www.page.url/", |                                 <img src="" alt="alt text 3" /> | ||||||
|             imgurl:"http://test.url/Test%20Query.jpg",oh:"238", |                             </a> | ||||||
|             tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" |                         </div> | ||||||
|             mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" |                     </div> | ||||||
|             t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> |                 </li> | ||||||
|             <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" |             </ul> | ||||||
|             style="height:144px;" width="178" height="144"/> |         </div> | ||||||
|         </a> |  | ||||||
|         """ |  | ||||||
|         response = mock.Mock(text=html) |  | ||||||
|         results = bing_images.response(response) |  | ||||||
|         self.assertEqual(type(results), list) |  | ||||||
|         self.assertEqual(len(results), 0) |  | ||||||
| 
 |  | ||||||
|         html = """ |  | ||||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> |  | ||||||
|             <a href="#" ihk="HN.608003696942779811" |  | ||||||
|                 m="{ns:"images",k:"5045", |  | ||||||
| mid:"659EB92C317974F34517A1CCAEBEF76A578E08DEE", |  | ||||||
| surl:"http://www.page.url/",imgurl:"http://test.url/Test%20Query.jpg", |  | ||||||
| oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" |  | ||||||
|                 mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" |  | ||||||
|                 t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> |  | ||||||
|                 <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" |  | ||||||
|                 style="height:144px;" width="178" height="144"/> |  | ||||||
|             </a> |  | ||||||
|         </div></div> |  | ||||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> |  | ||||||
|             <a href="#" ihk="HN.608003696942779811" |  | ||||||
|                 m="{ns:"images",k:"5045", |  | ||||||
| mid:"659EB92C317974F34517A1CCAEBEF76A578E08DEE", |  | ||||||
| surl:"http://www.page.url/",imgurl:"http://test.url/Test%20Query.jpg", |  | ||||||
| oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" |  | ||||||
|                 mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" |  | ||||||
|                 t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> |  | ||||||
|                 <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" |  | ||||||
|                 style="height:144px;" width="178" height="144"/> |  | ||||||
|             </a> |  | ||||||
|         </div></div> |  | ||||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> |  | ||||||
|             <a href="#" ihk="HN.608003696942779811" |  | ||||||
|                 m="{ns:"images",k:"5045", |  | ||||||
| mid:"659EB92C317974F34517A1CCAEBEF76A578E08DEE", |  | ||||||
| surl:"http://www.page.url/",imgurl:"http://test.url/Test%20Query.jpg", |  | ||||||
| oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" |  | ||||||
|                 mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" |  | ||||||
|                 t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> |  | ||||||
|                 <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" |  | ||||||
|                 style="height:144px;" width="178" height="144"/> |  | ||||||
|             </a> |  | ||||||
|         </div></div> |  | ||||||
|         """ |         """ | ||||||
|         html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') |         html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') | ||||||
|         response = mock.Mock(text=html) |         response = mock.Mock(text=html) | ||||||
|         results = bing_images.response(response) |         results = bing_images.response(response) | ||||||
|         self.assertEqual(type(results), list) |         self.assertEqual(type(results), list) | ||||||
|         self.assertEqual(len(results), 3) |         self.assertEqual(len(results), 3) | ||||||
|  |         self.assertEqual(results[0]['title'], 'alt text') | ||||||
|  |         self.assertEqual(results[0]['url'], 'page_url') | ||||||
|  |         self.assertEqual(results[0]['content'], '') | ||||||
|  |         self.assertEqual(results[0]['thumbnail_src'], 'thumb_url') | ||||||
|  |         self.assertEqual(results[0]['img_src'], 'img_url') | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 marc
						marc