[mod] add/modify image fetching for bing_news, qwant and twitter engines
This commit is contained in:
		
							parent
							
								
									4cffd78650
								
							
						
					
					
						commit
						f5128c7cb9
					
				| @ -112,12 +112,11 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|         # append result |         # append result | ||||||
|         if thumbnail is not None: |         if thumbnail is not None: | ||||||
|             results.append({'template': 'videos.html', |             results.append({'url': url, | ||||||
|                             'url': url, |  | ||||||
|                             'title': title, |                             'title': title, | ||||||
|                             'publishedDate': publishedDate, |                             'publishedDate': publishedDate, | ||||||
|                             'content': content, |                             'content': content, | ||||||
|                             'thumbnail': thumbnail}) |                             'img_src': thumbnail}) | ||||||
|         else: |         else: | ||||||
|             results.append({'url': url, |             results.append({'url': url, | ||||||
|                             'title': title, |                             'title': title, | ||||||
|  | |||||||
| @ -96,14 +96,27 @@ def response(resp): | |||||||
|                             'thumbnail_src': thumbnail_src, |                             'thumbnail_src': thumbnail_src, | ||||||
|                             'img_src': img_src}) |                             'img_src': img_src}) | ||||||
| 
 | 
 | ||||||
|         elif (category_to_keyword.get(categories[0], '') == 'news' or |         elif category_to_keyword.get(categories[0], '') == 'social': | ||||||
|               category_to_keyword.get(categories[0], '') == 'social'): |  | ||||||
|             published_date = datetime.fromtimestamp(result['date'], None) |             published_date = datetime.fromtimestamp(result['date'], None) | ||||||
| 
 |             img_src = result.get('img', None) | ||||||
|             results.append({'url': res_url, |             results.append({'url': res_url, | ||||||
|                             'title': title, |                             'title': title, | ||||||
|                             'publishedDate': published_date, |                             'publishedDate': published_date, | ||||||
|                             'content': content}) |                             'content': content, | ||||||
|  |                             'img_src': img_src}) | ||||||
|  | 
 | ||||||
|  |         elif category_to_keyword.get(categories[0], '') == 'news': | ||||||
|  |             published_date = datetime.fromtimestamp(result['date'], None) | ||||||
|  |             media = result.get('media', []) | ||||||
|  |             if len(media) > 0: | ||||||
|  |                 img_src = media[0].get('pict', {}).get('url', None) | ||||||
|  |             else: | ||||||
|  |                 img_src = None | ||||||
|  |             results.append({'url': res_url, | ||||||
|  |                             'title': title, | ||||||
|  |                             'publishedDate': published_date, | ||||||
|  |                             'content': content, | ||||||
|  |                             'img_src': img_src}) | ||||||
| 
 | 
 | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -27,6 +27,7 @@ search_url = base_url + 'search?' | |||||||
| 
 | 
 | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| results_xpath = '//li[@data-item-type="tweet"]' | results_xpath = '//li[@data-item-type="tweet"]' | ||||||
|  | avatar_xpath = './/img[contains(@class, "avatar")]/@src' | ||||||
| link_xpath = './/small[@class="time"]//a' | link_xpath = './/small[@class="time"]//a' | ||||||
| title_xpath = './/span[contains(@class, "username")]' | title_xpath = './/span[contains(@class, "username")]' | ||||||
| content_xpath = './/p[contains(@class, "tweet-text")]' | content_xpath = './/p[contains(@class, "tweet-text")]' | ||||||
| @ -57,6 +58,8 @@ def response(resp): | |||||||
|         try: |         try: | ||||||
|             link = tweet.xpath(link_xpath)[0] |             link = tweet.xpath(link_xpath)[0] | ||||||
|             content = extract_text(tweet.xpath(content_xpath)[0]) |             content = extract_text(tweet.xpath(content_xpath)[0]) | ||||||
|  |             img_src = tweet.xpath(avatar_xpath)[0] | ||||||
|  |             img_src = img_src.replace('_bigger', '_normal') | ||||||
|         except Exception: |         except Exception: | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
| @ -71,12 +74,14 @@ def response(resp): | |||||||
|             results.append({'url': url, |             results.append({'url': url, | ||||||
|                             'title': title, |                             'title': title, | ||||||
|                             'content': content, |                             'content': content, | ||||||
|  |                             'img_src': img_src, | ||||||
|                             'publishedDate': publishedDate}) |                             'publishedDate': publishedDate}) | ||||||
|         else: |         else: | ||||||
|             # append result |             # append result | ||||||
|             results.append({'url': url, |             results.append({'url': url, | ||||||
|                             'title': title, |                             'title': title, | ||||||
|                             'content': content}) |                             'content': content, | ||||||
|  |                             'img_src': img_src}) | ||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | |||||||
| @ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[0]['title'], 'Title') |         self.assertEqual(results[0]['title'], 'Title') | ||||||
|         self.assertEqual(results[0]['url'], 'http://url.of.article/') |         self.assertEqual(results[0]['url'], 'http://url.of.article/') | ||||||
|         self.assertEqual(results[0]['content'], 'Article Content') |         self.assertEqual(results[0]['content'], 'Article Content') | ||||||
|         self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337') |         self.assertEqual(results[0]['img_src'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337') | ||||||
|         self.assertEqual(results[1]['title'], 'Another Title') |         self.assertEqual(results[1]['title'], 'Another Title') | ||||||
|         self.assertEqual(results[1]['url'], 'http://another.url.of.article/') |         self.assertEqual(results[1]['url'], 'http://another.url.of.article/') | ||||||
|         self.assertEqual(results[1]['content'], 'Another Article Content') |         self.assertEqual(results[1]['content'], 'Another Article Content') | ||||||
|         self.assertNotIn('thumbnail', results[1]) |         self.assertNotIn('img_src', results[1]) | ||||||
| 
 | 
 | ||||||
|         html = """<?xml version="1.0" encoding="utf-8" ?> |         html = """<?xml version="1.0" encoding="utf-8" ?> | ||||||
| <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS"> | <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS"> | ||||||
| @ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[0]['title'], 'Title') |         self.assertEqual(results[0]['title'], 'Title') | ||||||
|         self.assertEqual(results[0]['url'], 'http://another.url.of.article/') |         self.assertEqual(results[0]['url'], 'http://another.url.of.article/') | ||||||
|         self.assertEqual(results[0]['content'], 'Article Content') |         self.assertEqual(results[0]['content'], 'Article Content') | ||||||
|         self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image') |         self.assertEqual(results[0]['img_src'], 'http://another.bing.com/image') | ||||||
| 
 | 
 | ||||||
|         html = """<?xml version="1.0" encoding="utf-8" ?> |         html = """<?xml version="1.0" encoding="utf-8" ?> | ||||||
| <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS"> | <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS"> | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Alexandre Flament
						Alexandre Flament