Digg + Twitter corrections
Digg engines, with thumbnails Add pubdate for twitter
This commit is contained in:
		
							parent
							
								
									011c43b485
								
							
						
					
					
						commit
						e7e2981536
					
				
							
								
								
									
										66
									
								
								searx/engines/digg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								searx/engines/digg.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | ||||
| ## Digg (News, Social media) | ||||
| # | ||||
| # @website     https://digg.com/ | ||||
| # @provide-api no | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, content, publishedDate, thumbnail | ||||
| 
 | ||||
| from urllib import quote_plus | ||||
| from json import loads | ||||
| from lxml import html | ||||
| from cgi import escape | ||||
| from dateutil import parser | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['news', 'social media'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://digg.com/' | ||||
| search_url = base_url+'api/search/{query}.json?position={position}&format=html' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| results_xpath = '//article' | ||||
| link_xpath = './/small[@class="time"]//a' | ||||
| title_xpath = './/h2//a//text()' | ||||
| content_xpath = './/p//text()' | ||||
| pubdate_xpath = './/time' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = (params['pageno'] - 1) * 10 | ||||
|     params['url'] = search_url.format(position=offset, | ||||
|                                       query=quote_plus(query)) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     search_result = loads(resp.text) | ||||
| 
 | ||||
|     dom = html.fromstring(search_result['html']) | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in dom.xpath(results_xpath): | ||||
|         url = result.attrib.get('data-contenturl') | ||||
|         thumbnail = result.xpath('.//img')[0].attrib.get('src') | ||||
|         title = ''.join(result.xpath(title_xpath)) | ||||
|         content = escape(''.join(result.xpath(content_xpath))) | ||||
|         publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime')) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|                         'title': title, | ||||
|                         'content': content, | ||||
|                         'template': 'videos.html', | ||||
|                         'publishedDate': publishedDate, | ||||
|                         'thumbnail': thumbnail}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
| @ -1,6 +1,6 @@ | ||||
| ## Twitter (Social media) | ||||
| # | ||||
| # @website     https://www.bing.com/news | ||||
| # @website     https://twitter.com/ | ||||
| # @provide-api yes (https://dev.twitter.com/docs/using-search) | ||||
| # | ||||
| # @using-api   no | ||||
| @ -14,6 +14,7 @@ from urlparse import urljoin | ||||
| from urllib import urlencode | ||||
| from lxml import html | ||||
| from cgi import escape | ||||
| from datetime import datetime | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['social media'] | ||||
| @ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]' | ||||
| link_xpath = './/small[@class="time"]//a' | ||||
| title_xpath = './/span[@class="username js-action-profile-name"]//text()' | ||||
| content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' | ||||
| timestamp_xpath = './/span[contains(@class,"_timestamp")]' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| @ -53,11 +55,19 @@ def response(resp): | ||||
|         url = urljoin(base_url, link.attrib.get('href')) | ||||
|         title = ''.join(tweet.xpath(title_xpath)) | ||||
|         content = escape(''.join(tweet.xpath(content_xpath))) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|                         'title': title, | ||||
|                         'content': content}) | ||||
|         pubdate = tweet.xpath(timestamp_xpath) | ||||
|         if len(pubdate) > 0: | ||||
|             publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None) | ||||
|             # append result | ||||
|             results.append({'url': url, | ||||
|                             'title': title, | ||||
|                             'content': content, | ||||
|                             'publishedDate': publishedDate}) | ||||
|         else: | ||||
|             # append result | ||||
|             results.append({'url': url, | ||||
|                             'title': title, | ||||
|                             'content': content}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
|  | ||||
| @ -45,6 +45,10 @@ engines: | ||||
|     engine : duckduckgo_definitions | ||||
|     shortcut : ddd | ||||
|      | ||||
|   - name : digg | ||||
|     engine : digg | ||||
|     shortcut : dg | ||||
| 
 | ||||
|   - name : wikidata | ||||
|     engine : wikidata | ||||
|     shortcut : wd | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Cqoicebordel
						Cqoicebordel