[fix] baidu engine: properly decoding HTML escape codes

This commit is contained in:
Aadniz 2025-03-26 19:56:58 +01:00
parent 4dfc47584d
commit 5ccad2735b

View File

@ -9,6 +9,7 @@
from urllib.parse import urlencode
from datetime import datetime
from html import unescape
import time
import json
@ -119,11 +120,15 @@ def parse_general(data):
except (ValueError, TypeError):
published_date = None
# title and content sometimes containing characters such as & ' " etc...
title = unescape(entry["title"])
content = unescape(entry.get("abs", ""))
results.append(
{
"title": entry["title"],
"title": title,
"url": entry["url"],
"content": entry.get("abs", ""),
"content": content,
"publishedDate": published_date,
}
)