Merge 8baa7c8e714391d18a45507fb3841e57d4a9841c into 1a8884fa26bedf983516308e69b9863888cd397e

This commit is contained in:
Finn Steffens 2025-05-13 21:38:46 +02:00 committed by GitHub
commit 0ad1f2348a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -38,6 +38,7 @@ import babel.languages
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
from searx.locales import language_tag, region_tag from searx.locales import language_tag, region_tag
from searx.enginelib.traits import EngineTraits from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineAPIException
if TYPE_CHECKING: if TYPE_CHECKING:
import logging import logging
@ -161,28 +162,34 @@ def response(resp):
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})
# get number_of_results # get number_of_results
try: if results:
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()')) result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
if "-" in result_len_container: if "-" in result_len_container:
start_str, result_len_container = re.split(r'-\d+', result_len_container)
# Remove the part "from-to" for paginated request ... start = int(start_str)
result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :] else:
start = 1
result_len_container = re.sub('[^0-9]', '', result_len_container) result_len_container = re.sub('[^0-9]', '', result_len_container)
if len(result_len_container) > 0: if len(result_len_container) > 0:
result_len = int(result_len_container) result_len = int(result_len_container)
except Exception as e: # pylint: disable=broad-except expected_start = _page_offset(resp.search_params.get("pageno", 1))
logger.debug('result error :\n%s', e)
if result_len and _page_offset(resp.search_params.get("pageno", 0)) > result_len: if expected_start != start:
if expected_start > result_len:
# Avoid reading more results than available. # Avoid reading more results than available.
# For example, if there is 100 results from some search and we try to get results from 120 to 130, # For example, if there is 100 results from some search and we try to get results from 120 to 130,
# Bing will send back the results from 0 to 10 and no error. # Bing will send back the results from 0 to 10 and no error.
# If we compare results count with the first parameter of the request we can avoid this "invalid" results. # If we compare results count with the first parameter of the request we can avoid this "invalid"
# results.
return [] return []
# Sometimes Bing will send back the first result page instead of the requested page as a rate limiting
# measure.
msg = f"Expected results to start at {expected_start}, but got results starting at {start}"
raise SearxEngineAPIException(msg)
results.append({'number_of_results': result_len}) results.append({'number_of_results': result_len})
return results return results