[fix] hardening against arguments of type None, where str or dict is expected
On a long-running server, the tracebacks below can be found (albeit rarely), which indicate problems with NoneType where a string or another data type is expected. result.img_src:: File "/usr/local/searxng/searxng-src/searx/templates/simple/result_templates/images.html", line 13, in top-level template code <img src="" data-src="{{ image_proxify(result.img_src) }}" alt="{{ result.title|striptags }}">{{- "" -}} ^ File "/usr/local/searxng/searxng-src/searx/webapp.py", line 284, in image_proxify if url.startswith('//'): ^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'startswith' result.content:: File "/usr/local/searxng/searxng-src/searx/result_types/_base.py", line 105, in _normalize_text_fields result.content = WHITESPACE_REGEX.sub(" ", result.content).strip() ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^ TypeError: expected string or bytes-like object, got 'NoneType' html_to_text, when html_str is a NoneType:: File "/usr/local/searxng/searxng-src/searx/engines/wikipedia.py", line 190, in response title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title')) File "/usr/local/searxng/searxng-src/searx/utils.py", line 158, in html_to_text html_str = html_str.replace('\n', ' ').replace('\r', ' ') ^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'replace' presearch engine, when json_resp is a NoneType:: File "/usr/local/searxng/searxng-src/searx/engines/presearch.py", line 221, in response results = parse_search_query(json_resp.get('results')) File "/usr/local/searxng/searxng-src/searx/engines/presearch.py", line 161, in parse_search_query for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []): ^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'get' Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
08885d0614
commit
e6308b8167
@ -184,6 +184,8 @@ def _fix_title(title, url):
|
|||||||
|
|
||||||
def parse_search_query(json_results):
|
def parse_search_query(json_results):
|
||||||
results = []
|
results = []
|
||||||
|
if not json_results:
|
||||||
|
return results
|
||||||
|
|
||||||
for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []):
|
for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []):
|
||||||
result = {
|
result = {
|
||||||
@ -245,7 +247,7 @@ def response(resp):
|
|||||||
json_resp = resp.json()
|
json_resp = resp.json()
|
||||||
|
|
||||||
if search_type == 'search':
|
if search_type == 'search':
|
||||||
results = parse_search_query(json_resp.get('results'))
|
results = parse_search_query(json_resp.get('results', {}))
|
||||||
|
|
||||||
elif search_type == 'images':
|
elif search_type == 'images':
|
||||||
for item in json_resp.get('images', []):
|
for item in json_resp.get('images', []):
|
||||||
|
@ -103,7 +103,9 @@ def _normalize_text_fields(result: MainResult | LegacyResult):
|
|||||||
result.content = str(result)
|
result.content = str(result)
|
||||||
|
|
||||||
# normalize title and content
|
# normalize title and content
|
||||||
|
if result.title:
|
||||||
result.title = WHITESPACE_REGEX.sub(" ", result.title).strip()
|
result.title = WHITESPACE_REGEX.sub(" ", result.title).strip()
|
||||||
|
if result.content:
|
||||||
result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
|
result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
|
||||||
if result.content == result.title:
|
if result.content == result.title:
|
||||||
# avoid duplicate content between the content and title fields
|
# avoid duplicate content between the content and title fields
|
||||||
|
@ -154,6 +154,8 @@ def html_to_text(html_str: str) -> str:
|
|||||||
>>> html_to_text(r'regexp: (?<![a-zA-Z]')
|
>>> html_to_text(r'regexp: (?<![a-zA-Z]')
|
||||||
'regexp: (?<![a-zA-Z]'
|
'regexp: (?<![a-zA-Z]'
|
||||||
"""
|
"""
|
||||||
|
if not html_str:
|
||||||
|
return ""
|
||||||
html_str = html_str.replace('\n', ' ').replace('\r', ' ')
|
html_str = html_str.replace('\n', ' ').replace('\r', ' ')
|
||||||
html_str = ' '.join(html_str.split())
|
html_str = ' '.join(html_str.split())
|
||||||
s = _HTMLTextExtractor()
|
s = _HTMLTextExtractor()
|
||||||
|
@ -265,6 +265,9 @@ def custom_url_for(endpoint: str, **values):
|
|||||||
|
|
||||||
|
|
||||||
def morty_proxify(url: str):
|
def morty_proxify(url: str):
|
||||||
|
if not url:
|
||||||
|
return url
|
||||||
|
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
url = 'https:' + url
|
url = 'https:' + url
|
||||||
|
|
||||||
@ -280,6 +283,8 @@ def morty_proxify(url: str):
|
|||||||
|
|
||||||
|
|
||||||
def image_proxify(url: str):
|
def image_proxify(url: str):
|
||||||
|
if not url:
|
||||||
|
return url
|
||||||
|
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
url = 'https:' + url
|
url = 'https:' + url
|
||||||
|
Loading…
x
Reference in New Issue
Block a user