From 2b69780f0d73c6ae1642230a446f7365e9ce18aa Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 21 Sep 2022 15:17:02 +0200 Subject: [PATCH] [DRAFT] simplify searx.metrics implementation - Moved code of the wrapper functions to to methods of class HistogramStorage and class CounterStorage. - Renamed global histogram and counter objects to HISTOGRAM_STORAGE and COUNTER_STORAGE. - The imports of names from the metrics module in the application code has been reduced to: from searx import metrics By this convention the wrapper functions can be replaced by the globals from: metrics.HISTOGRAM_STORAGE metrics.COUNTER_STORAGE - comment out the context manager from searx.metrics.histogram_observe_time / we do not have a usage of this context manager. Signed-off-by: Markus Heiser --- searx/metrics/__init__.py | 110 +++++++++++----------------- searx/metrics/models.py | 13 +++- searx/results.py | 9 +-- searx/search/__init__.py | 6 +- searx/search/checker/impl.py | 4 +- searx/search/processors/abstract.py | 14 ++-- searx/search/processors/online.py | 5 +- searx/webapp.py | 25 +++---- 8 files changed, 82 insertions(+), 104 deletions(-) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index d078a7fcc..152ec3d5c 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -17,68 +17,42 @@ __all__ = [ "initialize", "get_engines_metrics", "get_engine_errors", - "histogram", - "histogram_observe", - "histogram_observe_time", - "counter", - "counter_inc", - "counter_add", "count_error", "count_exception", ] -histogram_storage: Optional[HistogramStorage] = None -counter_storage: Optional[CounterStorage] = None +HISTOGRAM_STORAGE: Optional[HistogramStorage] = None +COUNTER_STORAGE: Optional[CounterStorage] = None -@contextlib.contextmanager -def histogram_observe_time(*args): - h = histogram_storage.get(*args) - before = default_timer() - yield before - duration = default_timer() - before - if h: - h.observe(duration) - else: - raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") - - -def histogram_observe(duration, *args): - histogram_storage.get(*args).observe(duration) - - -def histogram(*args, raise_on_not_found=True) -> Histogram: - h = histogram_storage.get(*args) - if raise_on_not_found and h is None: - raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") - return h - - -def counter_inc(*args): - counter_storage.add(1, *args) - - -def counter_add(value, *args): - counter_storage.add(value, *args) - - -def counter(*args) -> int: - return counter_storage.get(*args) +# We do not have a usage of this context manager +# +# @contextlib.contextmanager +# def histogram_observe_time(*args): +# h = histogram_storage.get(*args) +# before = default_timer() +# yield before +# duration = default_timer() - before +# if h: +# h.observe(duration) +# else: +# raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") def initialize(engine_names=None, enabled=True): """ Initialize metrics """ - global counter_storage, histogram_storage # pylint: disable=global-statement + + global COUNTER_STORAGE, HISTOGRAM_STORAGE # pylint: disable=global-statement if enabled: - counter_storage = CounterStorage() - histogram_storage = HistogramStorage() + COUNTER_STORAGE = CounterStorage() + HISTOGRAM_STORAGE = HistogramStorage() else: - counter_storage = VoidCounterStorage() - histogram_storage = HistogramStorage(histogram_class=VoidHistogram) + COUNTER_STORAGE = VoidCounterStorage() + HISTOGRAM_STORAGE = HistogramStorage(histogram_class=VoidHistogram) # max_timeout = max of all the engine.timeout max_timeout = 2 @@ -93,19 +67,19 @@ def initialize(engine_names=None, enabled=True): # engines for engine_name in engine_names or engines: # search count - counter_storage.configure('engine', engine_name, 'search', 'count', 'sent') - counter_storage.configure('engine', engine_name, 'search', 'count', 'successful') + COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'sent') + COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'successful') # global counter of errors - counter_storage.configure('engine', engine_name, 'search', 'count', 'error') + COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'error') # score of the engine - counter_storage.configure('engine', engine_name, 'score') + COUNTER_STORAGE.configure('engine', engine_name, 'score') # result count per requests - histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count') + HISTOGRAM_STORAGE.configure(1, 100, 'engine', engine_name, 'result', 'count') # time doing HTTP requests - histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http') + HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http') # total time # .time.request and ...response times may overlap .time.http time. - histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total') + HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total') class EngineError(TypedDict): @@ -131,7 +105,7 @@ def get_engine_errors(engline_name_list) -> Dict[str, List[EngineError]]: continue error_stats = errors_per_engines[engine_name] - sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1) + sent_search_count = max(COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent'), 1) sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1]) r = [] for context, count in sorted_context_count_list: @@ -170,7 +144,7 @@ def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineRel checker_result = checker_results.get(engine_name, {}) checker_success = checker_result.get('success', True) errors = engine_errors.get(engine_name) or [] - if counter('engine', engine_name, 'search', 'count', 'sent') == 0: + if COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent') == 0: # no request reliablity = None elif checker_success and not errors: @@ -223,23 +197,23 @@ class EngineStatResult(TypedDict): def get_engines_metrics(engine_name_list) -> EngineStatResult: - assert counter_storage is not None - assert histogram_storage is not None + assert COUNTER_STORAGE is not None + assert HISTOGRAM_STORAGE is not None list_time = [] max_time_total = max_result_count = None for engine_name in engine_name_list: - sent_count = counter('engine', engine_name, 'search', 'count', 'sent') + sent_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent') if sent_count == 0: continue - result_count = histogram('engine', engine_name, 'result', 'count').percentile(50) - result_count_sum = histogram('engine', engine_name, 'result', 'count').sum - successful_count = counter('engine', engine_name, 'search', 'count', 'successful') + result_count = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').percentile(50) + result_count_sum = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').sum + successful_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'successful') - time_total = histogram('engine', engine_name, 'time', 'total').percentile(50) + time_total = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(50) max_time_total = max(time_total or 0, max_time_total or 0) max_result_count = max(result_count or 0, max_result_count or 0) @@ -260,18 +234,18 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult: } if successful_count and result_count_sum: - score = counter('engine', engine_name, 'score') + score = COUNTER_STORAGE.get('engine', engine_name, 'score') stats['score'] = score stats['score_per_result'] = score / float(result_count_sum) - time_http = histogram('engine', engine_name, 'time', 'http').percentile(50) + time_http = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(50) time_http_p80 = time_http_p95 = 0 if time_http is not None: - time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentile(80) - time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentile(95) + time_http_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(80) + time_http_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(95) stats['http'] = round(time_http, 1) stats['http_p80'] = round(time_http_p80, 1) @@ -279,8 +253,8 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult: if time_total is not None: - time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentile(80) - time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentile(95) + time_total_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(80) + time_total_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(95) stats['total'] = round(time_total, 1) stats['total_p80'] = round(time_total_p80, 1) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index 532ae9863..499f66632 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -111,8 +111,14 @@ class HistogramStorage: self.measures[args] = measure return measure - def get(self, *args) -> Optional[Histogram]: - return self.measures.get(args, None) + def get(self, *args, raise_on_not_found=True) -> Optional[Histogram]: + h = self.measures.get(args, None) + if raise_on_not_found and h is None: + raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") + return h + + def observe(self, duration, *args): + self.get(*args).observe(duration) def dump(self): logger.debug("Histograms:") @@ -140,6 +146,9 @@ class CounterStorage: def get(self, *args) -> int: return self.counters[args] + def inc(self, *args): + self.add(1, *args) + def add(self, value, *args): with self.lock: self.counters[args] += value diff --git a/searx/results.py b/searx/results.py index ab242b838..da8228065 100644 --- a/searx/results.py +++ b/searx/results.py @@ -7,8 +7,7 @@ from urllib.parse import urlparse, unquote from searx import logger from searx.engines import engines -from searx.metrics import histogram_observe, counter_add, count_error - +from searx import metrics CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) @@ -226,10 +225,10 @@ class ResultContainer: if len(error_msgs) > 0: for msg in error_msgs: - count_error(engine_name, 'some results are invalids: ' + msg, secondary=True) + metrics.count_error(engine_name, 'some results are invalids: ' + msg, secondary=True) if engine_name in engines: - histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count') + metrics.HISTOGRAM_STORAGE.observe(standard_result_count, 'engine', engine_name, 'result', 'count') if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging: self.paging = True @@ -354,7 +353,7 @@ class ResultContainer: score = result_score(result) result['score'] = score for result_engine in result['engines']: - counter_add(score, 'engine', result_engine, 'score') + metrics.COUNTER_STORAGE.add(score, 'engine', result_engine, 'score') results = sorted(self._merged_results, key=itemgetter('score'), reverse=True) diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 9d337916c..846ad4341 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -17,7 +17,7 @@ from searx.plugins import plugins from searx.search.models import EngineRef, SearchQuery from searx.engines import load_engines from searx.network import initialize as initialize_network, check_network_configuration -from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time +from searx import metrics from searx.search.processors import PROCESSORS, initialize as initialize_processors from searx.search.checker import initialize as initialize_checker @@ -31,7 +31,7 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False, initialize_network(settings_engines, settings['outgoing']) if check_network: check_network_configuration() - initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics) + metrics.initialize([engine['name'] for engine in settings_engines], enable_metrics) initialize_processors(settings_engines) if enable_checker: initialize_checker() @@ -98,7 +98,7 @@ class Search: if request_params is None: continue - counter_inc('engine', engineref.name, 'search', 'count', 'sent') + metrics.COUNTER_STORAGE.inc('engine', engineref.name, 'search', 'count', 'sent') # append request to list requests.append((engineref.name, self.search_query.query, request_params)) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index bc5cdf968..f11465f89 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -19,7 +19,7 @@ from searx.utils import gen_useragent from searx.results import ResultContainer from searx.search.models import SearchQuery, EngineRef from searx.search.processors import EngineProcessor -from searx.metrics import counter_inc +from searx import metrics logger = logger.getChild('searx.search.checker') @@ -414,7 +414,7 @@ class Checker: engineref_category = search_query.engineref_list[0].category params = self.processor.get_params(search_query, engineref_category) if params is not None: - counter_inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent') + metrics.COUNTER_STORAGE.inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent') self.processor.search(search_query.query, params, result_container, default_timer(), 5) return result_container diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index d74616db0..b4905b218 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -13,7 +13,7 @@ from typing import Dict, Union from searx import settings, logger from searx.engines import engines from searx.network import get_time_for_thread, get_network -from searx.metrics import histogram_observe, counter_inc, count_exception, count_error +from searx import metrics from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException from searx.utils import get_engine_from_settings @@ -95,11 +95,11 @@ class EngineProcessor(ABC): error_message = exception_or_message result_container.add_unresponsive_engine(self.engine_name, error_message) # metrics - counter_inc('engine', self.engine_name, 'search', 'count', 'error') + metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'error') if isinstance(exception_or_message, BaseException): - count_exception(self.engine_name, exception_or_message) + metrics.count_exception(self.engine_name, exception_or_message) else: - count_error(self.engine_name, exception_or_message) + metrics.count_error(self.engine_name, exception_or_message) # suspend the engine ? if suspend: suspended_time = None @@ -114,10 +114,10 @@ class EngineProcessor(ABC): page_load_time = get_time_for_thread() result_container.add_timing(self.engine_name, engine_time, page_load_time) # metrics - counter_inc('engine', self.engine_name, 'search', 'count', 'successful') - histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total') + metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'successful') + metrics.HISTOGRAM_STORAGE.observe(engine_time, 'engine', self.engine_name, 'time', 'total') if page_load_time is not None: - histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http') + metrics.HISTOGRAM_STORAGE.observe(page_load_time, 'engine', self.engine_name, 'time', 'http') def extend_container(self, result_container, start_time, search_results): if getattr(threading.current_thread(), '_timeout', False): diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 17e9b6a96..927127a76 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -16,7 +16,8 @@ from searx.exceptions import ( SearxEngineCaptchaException, SearxEngineTooManyRequestsException, ) -from searx.metrics.error_recorder import count_error + +from searx import metrics from .abstract import EngineProcessor @@ -113,7 +114,7 @@ class OnlineProcessor(EngineProcessor): status_code = str(response.status_code or '') reason = response.reason_phrase or '' hostname = response.url.host - count_error( + metrics.count_error( self.engine_name, '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), (status_code, reason, hostname), diff --git a/searx/webapp.py b/searx/webapp.py index 929910ca5..16a4c1fe3 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -102,13 +102,8 @@ from searx.answerers import ( answerers, ask, ) -from searx.metrics import ( - get_engines_metrics, - get_engine_errors, - get_reliabilities, - histogram, - counter, -) +from searx import metrics + from searx.flaskfix import patch_application from searx.locales import ( @@ -983,15 +978,15 @@ def preferences(): stats = {} # pylint: disable=redefined-outer-name max_rate95 = 0 for _, e in filtered_engines.items(): - h = histogram('engine', e.name, 'time', 'total') + h = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'time', 'total') median = round(h.percentile(50), 1) if h.count > 0 else None # type: ignore rate80 = round(h.percentile(80), 1) if h.count > 0 else None # type: ignore rate95 = round(h.percentile(95), 1) if h.count > 0 else None # type: ignore max_rate95 = max(max_rate95, rate95 or 0) - result_count_sum = histogram('engine', e.name, 'result', 'count').sum - successful_count = counter('engine', e.name, 'search', 'count', 'successful') + result_count_sum = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'result', 'count').sum + successful_count = metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'successful') result_count = int(result_count_sum / float(successful_count)) if successful_count else 0 stats[e.name] = { @@ -1006,7 +1001,7 @@ def preferences(): # reliabilities reliabilities = {} - engine_errors = get_engine_errors(filtered_engines) + engine_errors = metrics.get_engine_errors(filtered_engines) checker_results = checker_get_result() checker_results = ( checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} @@ -1015,7 +1010,7 @@ def preferences(): checker_result = checker_results.get(e.name, {}) checker_success = checker_result.get('success', True) errors = engine_errors.get(e.name) or [] - if counter('engine', e.name, 'search', 'count', 'sent') == 0: + if metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'sent') == 0: # no request reliablity = None elif checker_success and not errors: @@ -1222,8 +1217,8 @@ def stats(): checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} ) - engine_metrics = get_engines_metrics(filtered_engines) - engine_reliabilities = get_reliabilities(filtered_engines, checker_results) + engine_metrics = metrics.get_engines_metrics(filtered_engines) + engine_reliabilities = metrics.get_reliabilities(filtered_engines, checker_results) if sort_order not in STATS_SORT_PARAMETERS: sort_order = 'name' @@ -1258,7 +1253,7 @@ def stats(): @app.route('/stats/errors', methods=['GET']) def stats_errors(): filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items())) - result = get_engine_errors(filtered_engines) + result = metrics.get_engine_errors(filtered_engines) return jsonify(result)