[fix] baidu engine: properly decoding HTML escape codes
This commit is contained in:
parent
4dfc47584d
commit
02f5002a5f
@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from html import unescape
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@ -119,11 +120,15 @@ def parse_general(data):
|
|||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
published_date = None
|
published_date = None
|
||||||
|
|
||||||
|
# title and content sometimes containing characters such as & ' " etc...
|
||||||
|
title = unescape(entry["title"])
|
||||||
|
content = unescape(entry.get("abs", ""))
|
||||||
|
|
||||||
results.append(
|
results.append(
|
||||||
{
|
{
|
||||||
"title": entry["title"],
|
"title": title,
|
||||||
"url": entry["url"],
|
"url": entry["url"],
|
||||||
"content": entry.get("abs", ""),
|
"content": content,
|
||||||
"publishedDate": published_date,
|
"publishedDate": published_date,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user