[fix] baidu engine: properly decoding HTML escape codes
This commit is contained in:
parent
4dfc47584d
commit
02f5002a5f
@ -9,6 +9,7 @@
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
from html import unescape
|
||||
import time
|
||||
import json
|
||||
|
||||
@ -119,11 +120,15 @@ def parse_general(data):
|
||||
except (ValueError, TypeError):
|
||||
published_date = None
|
||||
|
||||
# title and content sometimes containing characters such as & ' " etc...
|
||||
title = unescape(entry["title"])
|
||||
content = unescape(entry.get("abs", ""))
|
||||
|
||||
results.append(
|
||||
{
|
||||
"title": entry["title"],
|
||||
"title": title,
|
||||
"url": entry["url"],
|
||||
"content": entry.get("abs", ""),
|
||||
"content": content,
|
||||
"publishedDate": published_date,
|
||||
}
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user