commit
						57149661e4
					
				| @ -31,8 +31,6 @@ if xpath_results is a string element, then it's already done | |||||||
| def extract_text(xpath_results): | def extract_text(xpath_results): | ||||||
|     if type(xpath_results) == list: |     if type(xpath_results) == list: | ||||||
|         # it's list of result : concat everything using recursive call |         # it's list of result : concat everything using recursive call | ||||||
|         if not xpath_results: |  | ||||||
|             raise Exception('Empty url resultset') |  | ||||||
|         result = '' |         result = '' | ||||||
|         for e in xpath_results: |         for e in xpath_results: | ||||||
|             result = result + extract_text(e) |             result = result + extract_text(e) | ||||||
| @ -48,6 +46,8 @@ def extract_text(xpath_results): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def extract_url(xpath_results, search_url): | def extract_url(xpath_results, search_url): | ||||||
|  |     if xpath_results == []: | ||||||
|  |         raise Exception('Empty url resultset') | ||||||
|     url = extract_text(xpath_results) |     url = extract_text(xpath_results) | ||||||
| 
 | 
 | ||||||
|     if url.startswith('//'): |     if url.startswith('//'): | ||||||
| @ -103,8 +103,8 @@ def response(resp): | |||||||
|     if results_xpath: |     if results_xpath: | ||||||
|         for result in dom.xpath(results_xpath): |         for result in dom.xpath(results_xpath): | ||||||
|             url = extract_url(result.xpath(url_xpath), search_url) |             url = extract_url(result.xpath(url_xpath), search_url) | ||||||
|             title = extract_text(result.xpath(title_xpath)[0]) |             title = extract_text(result.xpath(title_xpath)) | ||||||
|             content = extract_text(result.xpath(content_xpath)[0]) |             content = extract_text(result.xpath(content_xpath)) | ||||||
|             results.append({'url': url, 'title': title, 'content': content}) |             results.append({'url': url, 'title': title, 'content': content}) | ||||||
|     else: |     else: | ||||||
|         for url, title, content in zip( |         for url, title, content in zip( | ||||||
|  | |||||||
| @ -462,6 +462,17 @@ engines: | |||||||
| #        - ... | #        - ... | ||||||
| #    disabled : True | #    disabled : True | ||||||
| 
 | 
 | ||||||
|  |   - name : semantic scholar | ||||||
|  |     engine : xpath | ||||||
|  |     paging : True | ||||||
|  |     search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false | ||||||
|  |     results_xpath : //article | ||||||
|  |     url_xpath : .//div[@class="search-result-title"]/a/@href | ||||||
|  |     title_xpath : .//div[@class="search-result-title"]/a | ||||||
|  |     content_xpath : .//div[@class="search-result-abstract"] | ||||||
|  |     shortcut : se | ||||||
|  |     categories : science | ||||||
|  | 
 | ||||||
|   - name : spotify |   - name : spotify | ||||||
|     engine : spotify |     engine : spotify | ||||||
|     shortcut : stf |     shortcut : stf | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Adam Tauber
						Adam Tauber