Merge pull request #2319 from dalf/multiple-proxies
[enh] Add multiple outgoing proxies
This commit is contained in:
		
						commit
						aec6a2656c
					
				| @ -36,14 +36,22 @@ Global Settings | ||||
|        image_proxy : False           # proxying image results through searx | ||||
|        default_locale : ""           # default interface locale | ||||
| 
 | ||||
|    # uncomment below section if you want to use a proxy | ||||
|    outgoing: # communication with search engines | ||||
|        request_timeout : 2.0 # default timeout in seconds, can be override by engine | ||||
|        # max_request_timeout: 10.0 # the maximum timeout in seconds | ||||
|        useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator | ||||
|        pool_connections : 100 # Number of different hosts | ||||
|        pool_maxsize : 10 # Number of simultaneous requests by host | ||||
| 
 | ||||
|    #outgoing_proxies : | ||||
|    #    http : http://127.0.0.1:8080 | ||||
|    #    https: http://127.0.0.1:8080 | ||||
| 
 | ||||
|    # uncomment below section only if you have more than one network interface | ||||
|    # which can be the source of outgoing search requests | ||||
|        #proxies: | ||||
|        #    http: | ||||
|        #        - http://proxy1:8080 | ||||
|        #        - http://proxy2:8080 | ||||
|        #    https: | ||||
|        #        - http://proxy1:8080 | ||||
|        #        - http://proxy2:8080 | ||||
|        #        - socks5://user:password@proxy3:1080 | ||||
|        #        - socks5h://user:password@proxy4:1080 | ||||
| 
 | ||||
|        #source_ips: | ||||
|        #    - 1.1.1.1 | ||||
| @ -105,15 +113,16 @@ Global Settings | ||||
|   code, like ``fr``, ``en``, ``de``. | ||||
| 
 | ||||
| .. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies | ||||
| .. _PR SOCKS support: https://github.com/kennethreitz/requests/pull/478 | ||||
| .. _PySocks: https://pypi.org/project/PySocks/ | ||||
| 
 | ||||
| ``outgoing_proxies`` : | ||||
|   Define a proxy you wish to use, see `requests proxies`_.  SOCKS proxies are | ||||
|   not supported / see `PR SOCKS support`. | ||||
| ``proxies`` : | ||||
|   Define one or more proxies you wish to use, see `requests proxies`_. | ||||
|   If there are more than one proxy for one protocol (http, https), | ||||
|   requests to the engines are distributed in a round-robin fashion. | ||||
| 
 | ||||
| ``source_ips`` : | ||||
|   If you use multiple network interfaces, define from which IP the requests must | ||||
|   be made. | ||||
|   be made. This parameter is ignored when ``proxies`` is set. | ||||
| 
 | ||||
| ``locales`` : | ||||
|   Locales codes and their names.  Available translations of searx interface. | ||||
| @ -139,6 +148,15 @@ Engine settings | ||||
|      api_key : 'apikey' | ||||
|      disabled : True | ||||
|      language : en_US | ||||
|      #proxies: | ||||
|      #    http: | ||||
|      #        - http://proxy1:8080 | ||||
|      #        - http://proxy2:8080 | ||||
|      #    https: | ||||
|      #        - http://proxy1:8080 | ||||
|      #        - http://proxy2:8080 | ||||
|      #        - socks5://user:password@proxy3:1080 | ||||
|      #        - socks5h://user:password@proxy4:1080 | ||||
| 
 | ||||
| ``name`` : | ||||
|   Name that will be used across searx to define this engine.  In settings, on | ||||
|  | ||||
| @ -25,7 +25,7 @@ from operator import itemgetter | ||||
| from searx import settings | ||||
| from searx import logger | ||||
| from searx.data import ENGINES_LANGUAGES | ||||
| from searx.poolrequests import get | ||||
| from searx.poolrequests import get, get_proxy_cycles | ||||
| from searx.utils import load_module, match_language, get_engine_from_settings | ||||
| 
 | ||||
| 
 | ||||
| @ -79,16 +79,18 @@ def load_engine(engine_data): | ||||
|         logger.exception('Cannot load engine "{}"'.format(engine_module)) | ||||
|         return None | ||||
| 
 | ||||
|     for param_name in engine_data: | ||||
|     for param_name, param_value in engine_data.items(): | ||||
|         if param_name == 'engine': | ||||
|             continue | ||||
|         if param_name == 'categories': | ||||
|             if engine_data['categories'] == 'none': | ||||
|             pass | ||||
|         elif param_name == 'categories': | ||||
|             if param_value == 'none': | ||||
|                 engine.categories = [] | ||||
|             else: | ||||
|                 engine.categories = list(map(str.strip, engine_data['categories'].split(','))) | ||||
|             continue | ||||
|         setattr(engine, param_name, engine_data[param_name]) | ||||
|                 engine.categories = list(map(str.strip, param_value.split(','))) | ||||
|         elif param_name == 'proxies': | ||||
|             engine.proxies = get_proxy_cycles(param_value) | ||||
|         else: | ||||
|             setattr(engine, param_name, param_value) | ||||
| 
 | ||||
|     for arg_name, arg_value in engine_default_args.items(): | ||||
|         if not hasattr(engine, arg_name): | ||||
|  | ||||
| @ -111,6 +111,32 @@ def get_time_for_thread(): | ||||
|     return threadLocal.total_time | ||||
| 
 | ||||
| 
 | ||||
| def get_proxy_cycles(proxy_settings): | ||||
|     if not proxy_settings: | ||||
|         return None | ||||
|     # Backwards compatibility for single proxy in settings.yml | ||||
|     for protocol, proxy in proxy_settings.items(): | ||||
|         if isinstance(proxy, str): | ||||
|             proxy_settings[protocol] = [proxy] | ||||
| 
 | ||||
|     for protocol in proxy_settings: | ||||
|         proxy_settings[protocol] = cycle(proxy_settings[protocol]) | ||||
|     return proxy_settings | ||||
| 
 | ||||
| 
 | ||||
| GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies')) | ||||
| 
 | ||||
| 
 | ||||
| def get_proxies(proxy_cycles): | ||||
|     if proxy_cycles: | ||||
|         return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()} | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
| def get_global_proxies(): | ||||
|     return get_proxies(GLOBAL_PROXY_CYCLES) | ||||
| 
 | ||||
| 
 | ||||
| def request(method, url, **kwargs): | ||||
|     """same as requests/requests/api.py request(...)""" | ||||
|     time_before_request = time() | ||||
| @ -119,8 +145,8 @@ def request(method, url, **kwargs): | ||||
|     session = SessionSinglePool() | ||||
| 
 | ||||
|     # proxies | ||||
|     if kwargs.get('proxies') is None: | ||||
|         kwargs['proxies'] = settings['outgoing'].get('proxies') | ||||
|     if not kwargs.get('proxies'): | ||||
|         kwargs['proxies'] = get_global_proxies() | ||||
| 
 | ||||
|     # timeout | ||||
|     if 'timeout' in kwargs: | ||||
|  | ||||
| @ -119,7 +119,7 @@ def send_http_request(engine, request_params): | ||||
| 
 | ||||
|     # setting engine based proxies | ||||
|     if hasattr(engine, 'proxies'): | ||||
|         request_args['proxies'] = engine.proxies | ||||
|         request_args['proxies'] = requests_lib.get_proxies(engine.proxies) | ||||
| 
 | ||||
|     # specific type of request (GET or POST) | ||||
|     if request_params['method'] == 'GET': | ||||
|  | ||||
| @ -63,13 +63,15 @@ outgoing: # communication with search engines | ||||
|     pool_connections : 100 # Number of different hosts | ||||
|     pool_maxsize : 10 # Number of simultaneous requests by host | ||||
| # uncomment below section if you want to use a proxy | ||||
| # see http://docs.python-requests.org/en/latest/user/advanced/#proxies | ||||
| # SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks | ||||
| #    proxies : | ||||
| #        http : socks5h://127.0.0.1:9050 | ||||
| #        https: socks5h://127.0.0.1:9050 | ||||
| #    using_tor_proxy : True | ||||
| #    extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy | ||||
| # see https://2.python-requests.org/en/latest/user/advanced/#proxies | ||||
| # SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks | ||||
| #    proxies: | ||||
| #        http: | ||||
| #            - http://proxy1:8080 | ||||
| #            - http://proxy2:8080 | ||||
| #        https: | ||||
| #            - http://proxy1:8080 | ||||
| #            - http://proxy2:8080 | ||||
| # uncomment below section only if you have more than one network interface | ||||
| # which can be the source of outgoing search requests | ||||
| #    source_ips: | ||||
|  | ||||
| @ -78,6 +78,7 @@ from searx.plugins import plugins | ||||
| from searx.plugins.oa_doi_rewrite import get_doi_resolver | ||||
| from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES | ||||
| from searx.answerers import answerers | ||||
| from searx.poolrequests import get_global_proxies | ||||
| 
 | ||||
| 
 | ||||
| # serve pages with HTTP/1.1 | ||||
| @ -149,8 +150,6 @@ _category_names = (gettext('files'), | ||||
|                    gettext('onions'), | ||||
|                    gettext('science')) | ||||
| 
 | ||||
| outgoing_proxies = settings['outgoing'].get('proxies') or None | ||||
| 
 | ||||
| _flask_babel_get_translations = flask_babel.get_translations | ||||
| 
 | ||||
| 
 | ||||
| @ -905,7 +904,7 @@ def image_proxy(): | ||||
|                         stream=True, | ||||
|                         timeout=settings['outgoing']['request_timeout'], | ||||
|                         headers=headers, | ||||
|                         proxies=outgoing_proxies) | ||||
|                         proxies=get_global_proxies()) | ||||
| 
 | ||||
|     if resp.status_code == 304: | ||||
|         return '', resp.status_code | ||||
|  | ||||
							
								
								
									
										89
									
								
								tests/unit/test_poolrequests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								tests/unit/test_poolrequests.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,89 @@ | ||||
| from unittest.mock import patch | ||||
| from requests.models import Response | ||||
| 
 | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| import searx.poolrequests | ||||
| from searx.poolrequests import get_proxy_cycles, get_proxies | ||||
| 
 | ||||
| 
 | ||||
| CONFIG = {'http': ['http://localhost:9090', 'http://localhost:9092'], | ||||
|           'https': ['http://localhost:9091', 'http://localhost:9093']} | ||||
| 
 | ||||
| 
 | ||||
| class TestProxy(SearxTestCase): | ||||
| 
 | ||||
|     def test_noconfig(self): | ||||
|         cycles = get_proxy_cycles(None) | ||||
|         self.assertIsNone(cycles) | ||||
| 
 | ||||
|         cycles = get_proxy_cycles(False) | ||||
|         self.assertIsNone(cycles) | ||||
| 
 | ||||
|     def test_oldconfig(self): | ||||
|         config = { | ||||
|             'http': 'http://localhost:9090', | ||||
|             'https': 'http://localhost:9091', | ||||
|         } | ||||
|         cycles = get_proxy_cycles(config) | ||||
|         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||
|         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||
|         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||
|         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||
| 
 | ||||
|     def test_one_proxy(self): | ||||
|         config = { | ||||
|             'http': ['http://localhost:9090'], | ||||
|             'https': ['http://localhost:9091'], | ||||
|         } | ||||
|         cycles = get_proxy_cycles(config) | ||||
|         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||
|         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||
|         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||
|         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||
| 
 | ||||
|     def test_multiple_proxies(self): | ||||
|         cycles = get_proxy_cycles(CONFIG) | ||||
|         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||
|         self.assertEqual(next(cycles['http']), 'http://localhost:9092') | ||||
|         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||
|         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||
|         self.assertEqual(next(cycles['https']), 'http://localhost:9093') | ||||
|         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||
| 
 | ||||
|     def test_getproxies_none(self): | ||||
|         self.assertIsNone(get_proxies(None)) | ||||
| 
 | ||||
|     def test_getproxies_config(self): | ||||
|         cycles = get_proxy_cycles(CONFIG) | ||||
|         self.assertEqual(get_proxies(cycles), { | ||||
|             'http': 'http://localhost:9090', | ||||
|             'https': 'http://localhost:9091' | ||||
|         }) | ||||
|         self.assertEqual(get_proxies(cycles), { | ||||
|             'http': 'http://localhost:9092', | ||||
|             'https': 'http://localhost:9093' | ||||
|         }) | ||||
| 
 | ||||
|     @patch('searx.poolrequests.get_global_proxies') | ||||
|     def test_request(self, mock_get_global_proxies): | ||||
|         method = 'GET' | ||||
|         url = 'http://localhost' | ||||
|         custom_proxies = { | ||||
|             'https': 'http://localhost:1080' | ||||
|         } | ||||
|         global_proxies = { | ||||
|             'http': 'http://localhost:9092', | ||||
|             'https': 'http://localhost:9093' | ||||
|         } | ||||
|         mock_get_global_proxies.return_value = global_proxies | ||||
| 
 | ||||
|         # check the global proxies usage | ||||
|         with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method: | ||||
|             searx.poolrequests.request(method, url) | ||||
|         mock_method.assert_called_once_with(method=method, url=url, proxies=global_proxies) | ||||
| 
 | ||||
|         # check if the proxies parameter overrides the global proxies | ||||
|         with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method: | ||||
|             searx.poolrequests.request(method, url, proxies=custom_proxies) | ||||
|         mock_method.assert_called_once_with(method=method, url=url, proxies=custom_proxies) | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Alexandre Flament
						Alexandre Flament