[enh] use longest title and test get_ordered_results()
This commit is contained in:
		
							parent
							
								
									94aafc83a6
								
							
						
					
					
						commit
						6948689d2a
					
				| @ -12,7 +12,6 @@ from searx import logger | |||||||
| from searx.engines import engines | from searx.engines import engines | ||||||
| from searx.metrics import histogram_observe, counter_add, count_error | from searx.metrics import histogram_observe, counter_add, count_error | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) | CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) | ||||||
| WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) | WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) | ||||||
| 
 | 
 | ||||||
| @ -133,7 +132,7 @@ def result_score(result, priority): | |||||||
|     weight = 1.0 |     weight = 1.0 | ||||||
| 
 | 
 | ||||||
|     for result_engine in result['engines']: |     for result_engine in result['engines']: | ||||||
|         if hasattr(engines[result_engine], 'weight'): |         if hasattr(engines.get(result_engine), 'weight'): | ||||||
|             weight *= float(engines[result_engine].weight) |             weight *= float(engines[result_engine].weight) | ||||||
| 
 | 
 | ||||||
|     weight *= len(result['positions']) |     weight *= len(result['positions']) | ||||||
| @ -332,10 +331,14 @@ class ResultContainer: | |||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|     def __merge_duplicated_http_result(self, duplicated, result, position): |     def __merge_duplicated_http_result(self, duplicated, result, position): | ||||||
|         # using content with more text |         # use content with more text | ||||||
|         if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): |         if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): | ||||||
|             duplicated['content'] = result['content'] |             duplicated['content'] = result['content'] | ||||||
| 
 | 
 | ||||||
|  |         # use title with more text | ||||||
|  |         if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')): | ||||||
|  |             duplicated['title'] = result['title'] | ||||||
|  | 
 | ||||||
|         # merge all result's parameters not found in duplicate |         # merge all result's parameters not found in duplicate | ||||||
|         for key in result.keys(): |         for key in result.keys(): | ||||||
|             if not duplicated.get(key): |             if not duplicated.get(key): | ||||||
| @ -347,7 +350,7 @@ class ResultContainer: | |||||||
|         # add engine to list of result-engines |         # add engine to list of result-engines | ||||||
|         duplicated['engines'].add(result['engine']) |         duplicated['engines'].add(result['engine']) | ||||||
| 
 | 
 | ||||||
|         # using https if possible |         # use https if possible | ||||||
|         if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': |         if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': | ||||||
|             duplicated['url'] = result['parsed_url'].geturl() |             duplicated['url'] = result['parsed_url'].geturl() | ||||||
|             duplicated['parsed_url'] = result['parsed_url'] |             duplicated['parsed_url'] = result['parsed_url'] | ||||||
|  | |||||||
| @ -2,9 +2,26 @@ | |||||||
| # pylint: disable=missing-module-docstring | # pylint: disable=missing-module-docstring | ||||||
| 
 | 
 | ||||||
| from searx.results import ResultContainer | from searx.results import ResultContainer | ||||||
|  | from searx.engines import load_engines | ||||||
| from tests import SearxTestCase | from tests import SearxTestCase | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def make_test_engine_dict(**kwargs) -> dict: | ||||||
|  |     test_engine = { | ||||||
|  |         # fmt: off | ||||||
|  |         'name': None, | ||||||
|  |         'engine': None, | ||||||
|  |         'categories': 'general', | ||||||
|  |         'shortcut': 'dummy', | ||||||
|  |         'timeout': 3.0, | ||||||
|  |         'tokens': [], | ||||||
|  |         # fmt: on | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     test_engine.update(**kwargs) | ||||||
|  |     return test_engine | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs): | def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs): | ||||||
|     result = { |     result = { | ||||||
|         # fmt: off |         # fmt: off | ||||||
| @ -19,23 +36,41 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ResultContainerTestCase(SearxTestCase):  # pylint: disable=missing-class-docstring | class ResultContainerTestCase(SearxTestCase):  # pylint: disable=missing-class-docstring | ||||||
|  |     def setUp(self) -> None: | ||||||
|  |         stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra") | ||||||
|  |         duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg") | ||||||
|  |         mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk") | ||||||
|  | 
 | ||||||
|  |         load_engines([stract_engine, duckduckgo_engine, mojeek_engine]) | ||||||
|  | 
 | ||||||
|  |         self.container = ResultContainer() | ||||||
|  | 
 | ||||||
|  |     def tearDown(self): | ||||||
|  |         load_engines([]) | ||||||
|  | 
 | ||||||
|     def test_empty(self): |     def test_empty(self): | ||||||
|         c = ResultContainer() |         self.assertEqual(self.container.get_ordered_results(), []) | ||||||
|         self.assertEqual(c.get_ordered_results(), []) |  | ||||||
| 
 | 
 | ||||||
|     def test_one_result(self): |     def test_one_result(self): | ||||||
|         c = ResultContainer() |         self.container.extend('wikipedia', [fake_result()]) | ||||||
|         c.extend('wikipedia', [fake_result()]) | 
 | ||||||
|         self.assertEqual(c.results_length(), 1) |         self.assertEqual(self.container.results_length(), 1) | ||||||
| 
 | 
 | ||||||
|     def test_one_suggestion(self): |     def test_one_suggestion(self): | ||||||
|         c = ResultContainer() |         self.container.extend('wikipedia', [fake_result(suggestion=True)]) | ||||||
|         c.extend('wikipedia', [fake_result(suggestion=True)]) | 
 | ||||||
|         self.assertEqual(len(c.suggestions), 1) |         self.assertEqual(len(self.container.suggestions), 1) | ||||||
|         self.assertEqual(c.results_length(), 0) |         self.assertEqual(self.container.results_length(), 0) | ||||||
| 
 | 
 | ||||||
|     def test_result_merge(self): |     def test_result_merge(self): | ||||||
|         c = ResultContainer() |         self.container.extend('wikipedia', [fake_result()]) | ||||||
|         c.extend('wikipedia', [fake_result()]) |         self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')]) | ||||||
|         c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')]) | 
 | ||||||
|         self.assertEqual(c.results_length(), 2) |         self.assertEqual(self.container.results_length(), 2) | ||||||
|  | 
 | ||||||
|  |     def test_result_merge_by_title(self): | ||||||
|  |         self.container.extend('stract', [fake_result(engine='stract', title='short title')]) | ||||||
|  |         self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')]) | ||||||
|  |         self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')]) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title') | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Allen
						Allen