Merge pull request #2319 from dalf/multiple-proxies
[enh] Add multiple outgoing proxies
This commit is contained in:
		
						commit
						aec6a2656c
					
				| @ -36,14 +36,22 @@ Global Settings | |||||||
|        image_proxy : False           # proxying image results through searx |        image_proxy : False           # proxying image results through searx | ||||||
|        default_locale : ""           # default interface locale |        default_locale : ""           # default interface locale | ||||||
| 
 | 
 | ||||||
|    # uncomment below section if you want to use a proxy |    outgoing: # communication with search engines | ||||||
|  |        request_timeout : 2.0 # default timeout in seconds, can be override by engine | ||||||
|  |        # max_request_timeout: 10.0 # the maximum timeout in seconds | ||||||
|  |        useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator | ||||||
|  |        pool_connections : 100 # Number of different hosts | ||||||
|  |        pool_maxsize : 10 # Number of simultaneous requests by host | ||||||
| 
 | 
 | ||||||
|    #outgoing_proxies : |        #proxies: | ||||||
|    #    http : http://127.0.0.1:8080 |        #    http: | ||||||
|    #    https: http://127.0.0.1:8080 |        #        - http://proxy1:8080 | ||||||
| 
 |        #        - http://proxy2:8080 | ||||||
|    # uncomment below section only if you have more than one network interface |        #    https: | ||||||
|    # which can be the source of outgoing search requests |        #        - http://proxy1:8080 | ||||||
|  |        #        - http://proxy2:8080 | ||||||
|  |        #        - socks5://user:password@proxy3:1080 | ||||||
|  |        #        - socks5h://user:password@proxy4:1080 | ||||||
| 
 | 
 | ||||||
|        #source_ips: |        #source_ips: | ||||||
|        #    - 1.1.1.1 |        #    - 1.1.1.1 | ||||||
| @ -105,15 +113,16 @@ Global Settings | |||||||
|   code, like ``fr``, ``en``, ``de``. |   code, like ``fr``, ``en``, ``de``. | ||||||
| 
 | 
 | ||||||
| .. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies | .. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies | ||||||
| .. _PR SOCKS support: https://github.com/kennethreitz/requests/pull/478 | .. _PySocks: https://pypi.org/project/PySocks/ | ||||||
| 
 | 
 | ||||||
| ``outgoing_proxies`` : | ``proxies`` : | ||||||
|   Define a proxy you wish to use, see `requests proxies`_.  SOCKS proxies are |   Define one or more proxies you wish to use, see `requests proxies`_. | ||||||
|   not supported / see `PR SOCKS support`. |   If there are more than one proxy for one protocol (http, https), | ||||||
|  |   requests to the engines are distributed in a round-robin fashion. | ||||||
| 
 | 
 | ||||||
| ``source_ips`` : | ``source_ips`` : | ||||||
|   If you use multiple network interfaces, define from which IP the requests must |   If you use multiple network interfaces, define from which IP the requests must | ||||||
|   be made. |   be made. This parameter is ignored when ``proxies`` is set. | ||||||
| 
 | 
 | ||||||
| ``locales`` : | ``locales`` : | ||||||
|   Locales codes and their names.  Available translations of searx interface. |   Locales codes and their names.  Available translations of searx interface. | ||||||
| @ -139,6 +148,15 @@ Engine settings | |||||||
|      api_key : 'apikey' |      api_key : 'apikey' | ||||||
|      disabled : True |      disabled : True | ||||||
|      language : en_US |      language : en_US | ||||||
|  |      #proxies: | ||||||
|  |      #    http: | ||||||
|  |      #        - http://proxy1:8080 | ||||||
|  |      #        - http://proxy2:8080 | ||||||
|  |      #    https: | ||||||
|  |      #        - http://proxy1:8080 | ||||||
|  |      #        - http://proxy2:8080 | ||||||
|  |      #        - socks5://user:password@proxy3:1080 | ||||||
|  |      #        - socks5h://user:password@proxy4:1080 | ||||||
| 
 | 
 | ||||||
| ``name`` : | ``name`` : | ||||||
|   Name that will be used across searx to define this engine.  In settings, on |   Name that will be used across searx to define this engine.  In settings, on | ||||||
|  | |||||||
| @ -25,7 +25,7 @@ from operator import itemgetter | |||||||
| from searx import settings | from searx import settings | ||||||
| from searx import logger | from searx import logger | ||||||
| from searx.data import ENGINES_LANGUAGES | from searx.data import ENGINES_LANGUAGES | ||||||
| from searx.poolrequests import get | from searx.poolrequests import get, get_proxy_cycles | ||||||
| from searx.utils import load_module, match_language, get_engine_from_settings | from searx.utils import load_module, match_language, get_engine_from_settings | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -79,16 +79,18 @@ def load_engine(engine_data): | |||||||
|         logger.exception('Cannot load engine "{}"'.format(engine_module)) |         logger.exception('Cannot load engine "{}"'.format(engine_module)) | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|     for param_name in engine_data: |     for param_name, param_value in engine_data.items(): | ||||||
|         if param_name == 'engine': |         if param_name == 'engine': | ||||||
|             continue |             pass | ||||||
|         if param_name == 'categories': |         elif param_name == 'categories': | ||||||
|             if engine_data['categories'] == 'none': |             if param_value == 'none': | ||||||
|                 engine.categories = [] |                 engine.categories = [] | ||||||
|             else: |             else: | ||||||
|                 engine.categories = list(map(str.strip, engine_data['categories'].split(','))) |                 engine.categories = list(map(str.strip, param_value.split(','))) | ||||||
|             continue |         elif param_name == 'proxies': | ||||||
|         setattr(engine, param_name, engine_data[param_name]) |             engine.proxies = get_proxy_cycles(param_value) | ||||||
|  |         else: | ||||||
|  |             setattr(engine, param_name, param_value) | ||||||
| 
 | 
 | ||||||
|     for arg_name, arg_value in engine_default_args.items(): |     for arg_name, arg_value in engine_default_args.items(): | ||||||
|         if not hasattr(engine, arg_name): |         if not hasattr(engine, arg_name): | ||||||
|  | |||||||
| @ -111,6 +111,32 @@ def get_time_for_thread(): | |||||||
|     return threadLocal.total_time |     return threadLocal.total_time | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def get_proxy_cycles(proxy_settings): | ||||||
|  |     if not proxy_settings: | ||||||
|  |         return None | ||||||
|  |     # Backwards compatibility for single proxy in settings.yml | ||||||
|  |     for protocol, proxy in proxy_settings.items(): | ||||||
|  |         if isinstance(proxy, str): | ||||||
|  |             proxy_settings[protocol] = [proxy] | ||||||
|  | 
 | ||||||
|  |     for protocol in proxy_settings: | ||||||
|  |         proxy_settings[protocol] = cycle(proxy_settings[protocol]) | ||||||
|  |     return proxy_settings | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies')) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_proxies(proxy_cycles): | ||||||
|  |     if proxy_cycles: | ||||||
|  |         return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()} | ||||||
|  |     return None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_global_proxies(): | ||||||
|  |     return get_proxies(GLOBAL_PROXY_CYCLES) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def request(method, url, **kwargs): | def request(method, url, **kwargs): | ||||||
|     """same as requests/requests/api.py request(...)""" |     """same as requests/requests/api.py request(...)""" | ||||||
|     time_before_request = time() |     time_before_request = time() | ||||||
| @ -119,8 +145,8 @@ def request(method, url, **kwargs): | |||||||
|     session = SessionSinglePool() |     session = SessionSinglePool() | ||||||
| 
 | 
 | ||||||
|     # proxies |     # proxies | ||||||
|     if kwargs.get('proxies') is None: |     if not kwargs.get('proxies'): | ||||||
|         kwargs['proxies'] = settings['outgoing'].get('proxies') |         kwargs['proxies'] = get_global_proxies() | ||||||
| 
 | 
 | ||||||
|     # timeout |     # timeout | ||||||
|     if 'timeout' in kwargs: |     if 'timeout' in kwargs: | ||||||
|  | |||||||
| @ -119,7 +119,7 @@ def send_http_request(engine, request_params): | |||||||
| 
 | 
 | ||||||
|     # setting engine based proxies |     # setting engine based proxies | ||||||
|     if hasattr(engine, 'proxies'): |     if hasattr(engine, 'proxies'): | ||||||
|         request_args['proxies'] = engine.proxies |         request_args['proxies'] = requests_lib.get_proxies(engine.proxies) | ||||||
| 
 | 
 | ||||||
|     # specific type of request (GET or POST) |     # specific type of request (GET or POST) | ||||||
|     if request_params['method'] == 'GET': |     if request_params['method'] == 'GET': | ||||||
|  | |||||||
| @ -63,13 +63,15 @@ outgoing: # communication with search engines | |||||||
|     pool_connections : 100 # Number of different hosts |     pool_connections : 100 # Number of different hosts | ||||||
|     pool_maxsize : 10 # Number of simultaneous requests by host |     pool_maxsize : 10 # Number of simultaneous requests by host | ||||||
| # uncomment below section if you want to use a proxy | # uncomment below section if you want to use a proxy | ||||||
| # see http://docs.python-requests.org/en/latest/user/advanced/#proxies | # see https://2.python-requests.org/en/latest/user/advanced/#proxies | ||||||
| # SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks | # SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks | ||||||
| #    proxies: | #    proxies: | ||||||
| #        http : socks5h://127.0.0.1:9050 | #        http: | ||||||
| #        https: socks5h://127.0.0.1:9050 | #            - http://proxy1:8080 | ||||||
| #    using_tor_proxy : True | #            - http://proxy2:8080 | ||||||
| #    extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy | #        https: | ||||||
|  | #            - http://proxy1:8080 | ||||||
|  | #            - http://proxy2:8080 | ||||||
| # uncomment below section only if you have more than one network interface | # uncomment below section only if you have more than one network interface | ||||||
| # which can be the source of outgoing search requests | # which can be the source of outgoing search requests | ||||||
| #    source_ips: | #    source_ips: | ||||||
|  | |||||||
| @ -78,6 +78,7 @@ from searx.plugins import plugins | |||||||
| from searx.plugins.oa_doi_rewrite import get_doi_resolver | from searx.plugins.oa_doi_rewrite import get_doi_resolver | ||||||
| from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES | from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES | ||||||
| from searx.answerers import answerers | from searx.answerers import answerers | ||||||
|  | from searx.poolrequests import get_global_proxies | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # serve pages with HTTP/1.1 | # serve pages with HTTP/1.1 | ||||||
| @ -149,8 +150,6 @@ _category_names = (gettext('files'), | |||||||
|                    gettext('onions'), |                    gettext('onions'), | ||||||
|                    gettext('science')) |                    gettext('science')) | ||||||
| 
 | 
 | ||||||
| outgoing_proxies = settings['outgoing'].get('proxies') or None |  | ||||||
| 
 |  | ||||||
| _flask_babel_get_translations = flask_babel.get_translations | _flask_babel_get_translations = flask_babel.get_translations | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -905,7 +904,7 @@ def image_proxy(): | |||||||
|                         stream=True, |                         stream=True, | ||||||
|                         timeout=settings['outgoing']['request_timeout'], |                         timeout=settings['outgoing']['request_timeout'], | ||||||
|                         headers=headers, |                         headers=headers, | ||||||
|                         proxies=outgoing_proxies) |                         proxies=get_global_proxies()) | ||||||
| 
 | 
 | ||||||
|     if resp.status_code == 304: |     if resp.status_code == 304: | ||||||
|         return '', resp.status_code |         return '', resp.status_code | ||||||
|  | |||||||
							
								
								
									
										89
									
								
								tests/unit/test_poolrequests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								tests/unit/test_poolrequests.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,89 @@ | |||||||
|  | from unittest.mock import patch | ||||||
|  | from requests.models import Response | ||||||
|  | 
 | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | import searx.poolrequests | ||||||
|  | from searx.poolrequests import get_proxy_cycles, get_proxies | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | CONFIG = {'http': ['http://localhost:9090', 'http://localhost:9092'], | ||||||
|  |           'https': ['http://localhost:9091', 'http://localhost:9093']} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestProxy(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_noconfig(self): | ||||||
|  |         cycles = get_proxy_cycles(None) | ||||||
|  |         self.assertIsNone(cycles) | ||||||
|  | 
 | ||||||
|  |         cycles = get_proxy_cycles(False) | ||||||
|  |         self.assertIsNone(cycles) | ||||||
|  | 
 | ||||||
|  |     def test_oldconfig(self): | ||||||
|  |         config = { | ||||||
|  |             'http': 'http://localhost:9090', | ||||||
|  |             'https': 'http://localhost:9091', | ||||||
|  |         } | ||||||
|  |         cycles = get_proxy_cycles(config) | ||||||
|  |         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||||
|  |         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||||
|  |         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||||
|  |         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||||
|  | 
 | ||||||
|  |     def test_one_proxy(self): | ||||||
|  |         config = { | ||||||
|  |             'http': ['http://localhost:9090'], | ||||||
|  |             'https': ['http://localhost:9091'], | ||||||
|  |         } | ||||||
|  |         cycles = get_proxy_cycles(config) | ||||||
|  |         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||||
|  |         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||||
|  |         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||||
|  |         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||||
|  | 
 | ||||||
|  |     def test_multiple_proxies(self): | ||||||
|  |         cycles = get_proxy_cycles(CONFIG) | ||||||
|  |         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||||
|  |         self.assertEqual(next(cycles['http']), 'http://localhost:9092') | ||||||
|  |         self.assertEqual(next(cycles['http']), 'http://localhost:9090') | ||||||
|  |         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||||
|  |         self.assertEqual(next(cycles['https']), 'http://localhost:9093') | ||||||
|  |         self.assertEqual(next(cycles['https']), 'http://localhost:9091') | ||||||
|  | 
 | ||||||
|  |     def test_getproxies_none(self): | ||||||
|  |         self.assertIsNone(get_proxies(None)) | ||||||
|  | 
 | ||||||
|  |     def test_getproxies_config(self): | ||||||
|  |         cycles = get_proxy_cycles(CONFIG) | ||||||
|  |         self.assertEqual(get_proxies(cycles), { | ||||||
|  |             'http': 'http://localhost:9090', | ||||||
|  |             'https': 'http://localhost:9091' | ||||||
|  |         }) | ||||||
|  |         self.assertEqual(get_proxies(cycles), { | ||||||
|  |             'http': 'http://localhost:9092', | ||||||
|  |             'https': 'http://localhost:9093' | ||||||
|  |         }) | ||||||
|  | 
 | ||||||
|  |     @patch('searx.poolrequests.get_global_proxies') | ||||||
|  |     def test_request(self, mock_get_global_proxies): | ||||||
|  |         method = 'GET' | ||||||
|  |         url = 'http://localhost' | ||||||
|  |         custom_proxies = { | ||||||
|  |             'https': 'http://localhost:1080' | ||||||
|  |         } | ||||||
|  |         global_proxies = { | ||||||
|  |             'http': 'http://localhost:9092', | ||||||
|  |             'https': 'http://localhost:9093' | ||||||
|  |         } | ||||||
|  |         mock_get_global_proxies.return_value = global_proxies | ||||||
|  | 
 | ||||||
|  |         # check the global proxies usage | ||||||
|  |         with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method: | ||||||
|  |             searx.poolrequests.request(method, url) | ||||||
|  |         mock_method.assert_called_once_with(method=method, url=url, proxies=global_proxies) | ||||||
|  | 
 | ||||||
|  |         # check if the proxies parameter overrides the global proxies | ||||||
|  |         with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method: | ||||||
|  |             searx.poolrequests.request(method, url, proxies=custom_proxies) | ||||||
|  |         mock_method.assert_called_once_with(method=method, url=url, proxies=custom_proxies) | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Alexandre Flament
						Alexandre Flament