Merge pull request #800 from dalf/searchpy5
search.py : Fix and some minor modifications
This commit is contained in:
		
						commit
						3d8c9bab96
					
				| @ -133,6 +133,7 @@ class ResultContainer(object): | |||||||
| 
 | 
 | ||||||
|     def extend(self, engine_name, results): |     def extend(self, engine_name, results): | ||||||
|         for result in list(results): |         for result in list(results): | ||||||
|  |             result['engine'] = engine_name | ||||||
|             if 'suggestion' in result: |             if 'suggestion' in result: | ||||||
|                 self.suggestions.add(result['suggestion']) |                 self.suggestions.add(result['suggestion']) | ||||||
|                 results.remove(result) |                 results.remove(result) | ||||||
|  | |||||||
| @ -37,7 +37,7 @@ logger = logger.getChild('search') | |||||||
| number_of_searches = 0 | number_of_searches = 0 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def send_http_request(engine, request_params, timeout_limit): | def send_http_request(engine, request_params, start_time, timeout_limit): | ||||||
|     # for page_load_time stats |     # for page_load_time stats | ||||||
|     time_before_request = time() |     time_before_request = time() | ||||||
| 
 | 
 | ||||||
| @ -62,9 +62,10 @@ def send_http_request(engine, request_params, timeout_limit): | |||||||
| 
 | 
 | ||||||
|     # is there a timeout (no parsing in this case) |     # is there a timeout (no parsing in this case) | ||||||
|     timeout_overhead = 0.2  # seconds |     timeout_overhead = 0.2  # seconds | ||||||
|     search_duration = time() - request_params['started'] |     time_after_request = time() | ||||||
|  |     search_duration = time_after_request - start_time | ||||||
|     if search_duration > timeout_limit + timeout_overhead: |     if search_duration > timeout_limit + timeout_overhead: | ||||||
|         raise Timeout(response=response) |         raise requests.exceptions.Timeout(response=response) | ||||||
| 
 | 
 | ||||||
|     with threading.RLock(): |     with threading.RLock(): | ||||||
|         # no error : reset the suspend variables |         # no error : reset the suspend variables | ||||||
| @ -72,14 +73,14 @@ def send_http_request(engine, request_params, timeout_limit): | |||||||
|         engine.suspend_end_time = 0 |         engine.suspend_end_time = 0 | ||||||
|         # update stats with current page-load-time |         # update stats with current page-load-time | ||||||
|         # only the HTTP request |         # only the HTTP request | ||||||
|         engine.stats['page_load_time'] += time() - time_before_request |         engine.stats['page_load_time'] += time_after_request - time_before_request | ||||||
|         engine.stats['page_load_count'] += 1 |         engine.stats['page_load_count'] += 1 | ||||||
| 
 | 
 | ||||||
|     # everything is ok : return the response |     # everything is ok : return the response | ||||||
|     return response |     return response | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def search_one_request(engine, query, request_params, timeout_limit): | def search_one_request(engine, query, request_params, start_time, timeout_limit): | ||||||
|     # update request parameters dependent on |     # update request parameters dependent on | ||||||
|     # search-engine (contained in engines folder) |     # search-engine (contained in engines folder) | ||||||
|     engine.request(query, request_params) |     engine.request(query, request_params) | ||||||
| @ -92,24 +93,21 @@ def search_one_request(engine, query, request_params, timeout_limit): | |||||||
|         return [] |         return [] | ||||||
| 
 | 
 | ||||||
|     # send request |     # send request | ||||||
|     response = send_http_request(engine, request_params, timeout_limit) |     response = send_http_request(engine, request_params, start_time, timeout_limit) | ||||||
| 
 | 
 | ||||||
|     # parse the response |     # parse the response | ||||||
|     response.search_params = request_params |     response.search_params = request_params | ||||||
|     return engine.response(response) |     return engine.response(response) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit): | def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit): | ||||||
|     start_time = time() |  | ||||||
|     engine = engines[engine_name] |     engine = engines[engine_name] | ||||||
| 
 | 
 | ||||||
|     try: |     try: | ||||||
|         # send requests and parse the results |         # send requests and parse the results | ||||||
|         search_results = search_one_request(engine, query, request_params, timeout_limit) |         search_results = search_one_request(engine, query, request_params, start_time, timeout_limit) | ||||||
| 
 | 
 | ||||||
|         # add results |         # add results | ||||||
|         for result in search_results: |  | ||||||
|             result['engine'] = engine_name |  | ||||||
|         result_container.extend(engine_name, search_results) |         result_container.extend(engine_name, search_results) | ||||||
| 
 | 
 | ||||||
|         # update engine time when there is no exception |         # update engine time when there is no exception | ||||||
| @ -131,7 +129,7 @@ def search_one_request_safe(engine_name, query, request_params, result_container | |||||||
|                          "(search duration : {1} s, timeout: {2} s) : {3}" |                          "(search duration : {1} s, timeout: {2} s) : {3}" | ||||||
|                          .format(engine_name, search_duration, timeout_limit, e.__class__.__name__)) |                          .format(engine_name, search_duration, timeout_limit, e.__class__.__name__)) | ||||||
|             requests_exception = True |             requests_exception = True | ||||||
|         if (issubclass(e.__class__, requests.exceptions.RequestException)): |         elif (issubclass(e.__class__, requests.exceptions.RequestException)): | ||||||
|             # other requests exception |             # other requests exception | ||||||
|             logger.exception("engine {0} : requests exception" |             logger.exception("engine {0} : requests exception" | ||||||
|                              "(search duration : {1} s, timeout: {2} s) : {3}" |                              "(search duration : {1} s, timeout: {2} s) : {3}" | ||||||
| @ -151,14 +149,13 @@ def search_one_request_safe(engine_name, query, request_params, result_container | |||||||
|         return False |         return False | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def search_multiple_requests(requests, result_container, timeout_limit): | def search_multiple_requests(requests, result_container, start_time, timeout_limit): | ||||||
|     start_time = time() |  | ||||||
|     search_id = uuid4().__str__() |     search_id = uuid4().__str__() | ||||||
| 
 | 
 | ||||||
|     for engine_name, query, request_params in requests: |     for engine_name, query, request_params in requests: | ||||||
|         th = threading.Thread( |         th = threading.Thread( | ||||||
|             target=search_one_request_safe, |             target=search_one_request_safe, | ||||||
|             args=(engine_name, query, request_params, result_container, timeout_limit), |             args=(engine_name, query, request_params, result_container, start_time, timeout_limit), | ||||||
|             name=search_id, |             name=search_id, | ||||||
|         ) |         ) | ||||||
|         th._engine_name = engine_name |         th._engine_name = engine_name | ||||||
| @ -368,7 +365,6 @@ class Search(object): | |||||||
|             request_params = default_request_params() |             request_params = default_request_params() | ||||||
|             request_params['headers']['User-Agent'] = user_agent |             request_params['headers']['User-Agent'] = user_agent | ||||||
|             request_params['category'] = selected_engine['category'] |             request_params['category'] = selected_engine['category'] | ||||||
|             request_params['started'] = start_time |  | ||||||
|             request_params['pageno'] = search_query.pageno |             request_params['pageno'] = search_query.pageno | ||||||
| 
 | 
 | ||||||
|             if hasattr(engine, 'language') and engine.language: |             if hasattr(engine, 'language') and engine.language: | ||||||
| @ -388,7 +384,7 @@ class Search(object): | |||||||
| 
 | 
 | ||||||
|         if requests: |         if requests: | ||||||
|             # send all search-request |             # send all search-request | ||||||
|             search_multiple_requests(requests, self.result_container, timeout_limit - (time() - start_time)) |             search_multiple_requests(requests, self.result_container, start_time, timeout_limit) | ||||||
|             start_new_thread(gc.collect, tuple()) |             start_new_thread(gc.collect, tuple()) | ||||||
| 
 | 
 | ||||||
|         # return results, suggestions, answers and infoboxes |         # return results, suggestions, answers and infoboxes | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Adam Tauber
						Adam Tauber