Digg + Twitter corrections
Digg engines, with thumbnails Add pubdate for twitter
This commit is contained in:
		
							parent
							
								
									011c43b485
								
							
						
					
					
						commit
						e7e2981536
					
				
							
								
								
									
										66
									
								
								searx/engines/digg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								searx/engines/digg.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | |||||||
|  | ## Digg (News, Social media) | ||||||
|  | # | ||||||
|  | # @website     https://digg.com/ | ||||||
|  | # @provide-api no | ||||||
|  | # | ||||||
|  | # @using-api   no | ||||||
|  | # @results     HTML (using search portal) | ||||||
|  | # @stable      no (HTML can change) | ||||||
|  | # @parse       url, title, content, publishedDate, thumbnail | ||||||
|  | 
 | ||||||
|  | from urllib import quote_plus | ||||||
|  | from json import loads | ||||||
|  | from lxml import html | ||||||
|  | from cgi import escape | ||||||
|  | from dateutil import parser | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['news', 'social media'] | ||||||
|  | paging = True | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'https://digg.com/' | ||||||
|  | search_url = base_url+'api/search/{query}.json?position={position}&format=html' | ||||||
|  | 
 | ||||||
|  | # specific xpath variables | ||||||
|  | results_xpath = '//article' | ||||||
|  | link_xpath = './/small[@class="time"]//a' | ||||||
|  | title_xpath = './/h2//a//text()' | ||||||
|  | content_xpath = './/p//text()' | ||||||
|  | pubdate_xpath = './/time' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  |     offset = (params['pageno'] - 1) * 10 | ||||||
|  |     params['url'] = search_url.format(position=offset, | ||||||
|  |                                       query=quote_plus(query)) | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     search_result = loads(resp.text) | ||||||
|  | 
 | ||||||
|  |     dom = html.fromstring(search_result['html']) | ||||||
|  | 
 | ||||||
|  |     # parse results | ||||||
|  |     for result in dom.xpath(results_xpath): | ||||||
|  |         url = result.attrib.get('data-contenturl') | ||||||
|  |         thumbnail = result.xpath('.//img')[0].attrib.get('src') | ||||||
|  |         title = ''.join(result.xpath(title_xpath)) | ||||||
|  |         content = escape(''.join(result.xpath(content_xpath))) | ||||||
|  |         publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime')) | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url, | ||||||
|  |                         'title': title, | ||||||
|  |                         'content': content, | ||||||
|  |                         'template': 'videos.html', | ||||||
|  |                         'publishedDate': publishedDate, | ||||||
|  |                         'thumbnail': thumbnail}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|  |     return results | ||||||
| @ -1,6 +1,6 @@ | |||||||
| ## Twitter (Social media) | ## Twitter (Social media) | ||||||
| # | # | ||||||
| # @website     https://www.bing.com/news | # @website     https://twitter.com/ | ||||||
| # @provide-api yes (https://dev.twitter.com/docs/using-search) | # @provide-api yes (https://dev.twitter.com/docs/using-search) | ||||||
| # | # | ||||||
| # @using-api   no | # @using-api   no | ||||||
| @ -14,6 +14,7 @@ from urlparse import urljoin | |||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from lxml import html | from lxml import html | ||||||
| from cgi import escape | from cgi import escape | ||||||
|  | from datetime import datetime | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['social media'] | categories = ['social media'] | ||||||
| @ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]' | |||||||
| link_xpath = './/small[@class="time"]//a' | link_xpath = './/small[@class="time"]//a' | ||||||
| title_xpath = './/span[@class="username js-action-profile-name"]//text()' | title_xpath = './/span[@class="username js-action-profile-name"]//text()' | ||||||
| content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' | content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' | ||||||
|  | timestamp_xpath = './/span[contains(@class,"_timestamp")]' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| @ -53,11 +55,19 @@ def response(resp): | |||||||
|         url = urljoin(base_url, link.attrib.get('href')) |         url = urljoin(base_url, link.attrib.get('href')) | ||||||
|         title = ''.join(tweet.xpath(title_xpath)) |         title = ''.join(tweet.xpath(title_xpath)) | ||||||
|         content = escape(''.join(tweet.xpath(content_xpath))) |         content = escape(''.join(tweet.xpath(content_xpath))) | ||||||
| 
 |         pubdate = tweet.xpath(timestamp_xpath) | ||||||
|         # append result |         if len(pubdate) > 0: | ||||||
|         results.append({'url': url, |             publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None) | ||||||
|                         'title': title, |             # append result | ||||||
|                         'content': content}) |             results.append({'url': url, | ||||||
|  |                             'title': title, | ||||||
|  |                             'content': content, | ||||||
|  |                             'publishedDate': publishedDate}) | ||||||
|  |         else: | ||||||
|  |             # append result | ||||||
|  |             results.append({'url': url, | ||||||
|  |                             'title': title, | ||||||
|  |                             'content': content}) | ||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | |||||||
| @ -45,6 +45,10 @@ engines: | |||||||
|     engine : duckduckgo_definitions |     engine : duckduckgo_definitions | ||||||
|     shortcut : ddd |     shortcut : ddd | ||||||
|      |      | ||||||
|  |   - name : digg | ||||||
|  |     engine : digg | ||||||
|  |     shortcut : dg | ||||||
|  | 
 | ||||||
|   - name : wikidata |   - name : wikidata | ||||||
|     engine : wikidata |     engine : wikidata | ||||||
|     shortcut : wd |     shortcut : wd | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Cqoicebordel
						Cqoicebordel