commit
						a77d8c8227
					
				| @ -8,11 +8,10 @@ | |||||||
|  @parse       url, title, content |  @parse       url, title, content | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from lxml import html, etree |  | ||||||
| import re | import re | ||||||
| from urllib.parse import quote, urljoin | from urllib.parse import quote, urljoin | ||||||
| from searx.utils import extract_text, eval_xpath | from lxml import html | ||||||
| from searx import logger | from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex | ||||||
| 
 | 
 | ||||||
| categories = ['general'] | categories = ['general'] | ||||||
| paging = True | paging = True | ||||||
| @ -40,6 +39,9 @@ def request(query, params): | |||||||
|         params['url'] = search_url_fmt.format(query=quote(query)) |         params['url'] = search_url_fmt.format(query=quote(query)) | ||||||
|     else: |     else: | ||||||
|         params['url'] = search_url.format(offset=offset, query=quote(query)) |         params['url'] = search_url.format(offset=offset, query=quote(query)) | ||||||
|  |     # after the last page of results, spelling corrections are returned after a HTTP redirect | ||||||
|  |     # whatever the page number is | ||||||
|  |     params['soft_max_redirects'] = 1 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -51,19 +53,15 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     try: |     number_of_results_element =\ | ||||||
|         number_of_results_string =\ |         eval_xpath_getindex(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', | ||||||
|             re.sub('[^0-9]', '', |                             0, default=None) | ||||||
|                    eval_xpath(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0]) |     if number_of_results_element is not None: | ||||||
| 
 |         number_of_results_string = re.sub('[^0-9]', '', number_of_results_element) | ||||||
|         results.append({'number_of_results': int(number_of_results_string)}) |         results.append({'number_of_results': int(number_of_results_string)}) | ||||||
| 
 | 
 | ||||||
|     except: |     for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'): | ||||||
|         logger.debug("Couldn't read number of results.") |         url = eval_xpath_getindex(result, './/h2/a', 0).get('href') | ||||||
| 
 |  | ||||||
|     for result in eval_xpath(dom, '//section[not(contains(@class, "essay"))]'): |  | ||||||
|         try: |  | ||||||
|             url = eval_xpath(result, './/h2/a')[0].get('href') |  | ||||||
|         url = urljoin(base_url, url) |         url = urljoin(base_url, url) | ||||||
|         title = eval_xpath(result, 'string(.//h2/a)').strip() |         title = eval_xpath(result, 'string(.//h2/a)').strip() | ||||||
|         content = extract_text(eval_xpath(result, './/p')) |         content = extract_text(eval_xpath(result, './/p')) | ||||||
| @ -71,8 +69,5 @@ def response(resp): | |||||||
|         results.append({'url': url, |         results.append({'url': url, | ||||||
|                         'title': title, |                         'title': title, | ||||||
|                         'content': content}) |                         'content': content}) | ||||||
|         except: |  | ||||||
|             logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) |  | ||||||
|             continue |  | ||||||
| 
 | 
 | ||||||
|     return results |     return results | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Alexandre Flament
						Alexandre Flament