[fix] baidu engine: properly decoding HTML escape codes

This commit is contained in:
Aadniz 2025-03-26 19:56:58 +01:00 committed by Markus Heiser
parent 4dfc47584d
commit 02f5002a5f

View File

@ -9,6 +9,7 @@
from urllib.parse import urlencode from urllib.parse import urlencode
from datetime import datetime from datetime import datetime
from html import unescape
import time import time
import json import json
@ -119,11 +120,15 @@ def parse_general(data):
except (ValueError, TypeError): except (ValueError, TypeError):
published_date = None published_date = None
# title and content sometimes containing characters such as & ' " etc...
title = unescape(entry["title"])
content = unescape(entry.get("abs", ""))
results.append( results.append(
{ {
"title": entry["title"], "title": title,
"url": entry["url"], "url": entry["url"],
"content": entry.get("abs", ""), "content": content,
"publishedDate": published_date, "publishedDate": published_date,
} }
) )