[feat] bing: raise error upon receiving wrong results page
This commit is contained in:
parent
1a16281490
commit
8baa7c8e71
@ -38,6 +38,7 @@ import babel.languages
|
|||||||
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
|
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
|
||||||
from searx.locales import language_tag, region_tag
|
from searx.locales import language_tag, region_tag
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
from searx.exceptions import SearxEngineAPIException
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import logging
|
import logging
|
||||||
@ -161,27 +162,33 @@ def response(resp):
|
|||||||
results.append({'url': url, 'title': title, 'content': content})
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
|
|
||||||
# get number_of_results
|
# get number_of_results
|
||||||
try:
|
if results:
|
||||||
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
||||||
if "-" in result_len_container:
|
if "-" in result_len_container:
|
||||||
|
start_str, result_len_container = re.split(r'-\d+', result_len_container)
|
||||||
# Remove the part "from-to" for paginated request ...
|
start = int(start_str)
|
||||||
result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]
|
else:
|
||||||
|
start = 1
|
||||||
|
|
||||||
result_len_container = re.sub('[^0-9]', '', result_len_container)
|
result_len_container = re.sub('[^0-9]', '', result_len_container)
|
||||||
|
|
||||||
if len(result_len_container) > 0:
|
if len(result_len_container) > 0:
|
||||||
result_len = int(result_len_container)
|
result_len = int(result_len_container)
|
||||||
|
|
||||||
except Exception as e: # pylint: disable=broad-except
|
expected_start = _page_offset(resp.search_params.get("pageno", 1))
|
||||||
logger.debug('result error :\n%s', e)
|
|
||||||
|
|
||||||
if result_len and _page_offset(resp.search_params.get("pageno", 0)) > result_len:
|
if expected_start != start:
|
||||||
# Avoid reading more results than available.
|
if expected_start > result_len:
|
||||||
# For example, if there is 100 results from some search and we try to get results from 120 to 130,
|
# Avoid reading more results than available.
|
||||||
# Bing will send back the results from 0 to 10 and no error.
|
# For example, if there is 100 results from some search and we try to get results from 120 to 130,
|
||||||
# If we compare results count with the first parameter of the request we can avoid this "invalid" results.
|
# Bing will send back the results from 0 to 10 and no error.
|
||||||
return []
|
# If we compare results count with the first parameter of the request we can avoid this "invalid"
|
||||||
|
# results.
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Sometimes Bing will send back the first result page instead of the requested page as a rate limiting
|
||||||
|
# measure.
|
||||||
|
msg = f"Expected results to start at {expected_start}, but got results starting at {start}"
|
||||||
|
raise SearxEngineAPIException(msg)
|
||||||
|
|
||||||
results.append({'number_of_results': result_len})
|
results.append({'number_of_results': result_len})
|
||||||
return results
|
return results
|
||||||
|
Loading…
x
Reference in New Issue
Block a user