[fix] fix invalid escape error in Baidu Images & default config typo

This commit is contained in:
Zhijie He 2025-03-15 18:44:46 +08:00 committed by Markus Heiser
parent 4ce7f1accc
commit 38caa49540
2 changed files with 27 additions and 16 deletions

View File

@ -11,6 +11,7 @@ from urllib.parse import urlencode
from datetime import datetime
import time
import json
import re
from searx.exceptions import SearxEngineAPIException
from searx.utils import html_to_text
@ -92,11 +93,12 @@ def request(query, params):
def response(resp):
try:
data = json.loads(resp.text, strict=False)
except Exception as e:
raise SearxEngineAPIException(f"Invalid response: {e}") from e
text = resp.text
if baidu_category == 'images':
# baidu's JSON encoder wrongly quotes / and ' characters by \\ and \'
text = text.replace(r"\/", "/").replace(r"\'", "'")
data = json.loads(text, strict=False)
parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it}
return parsers[baidu_category](data)
@ -133,19 +135,28 @@ def parse_images(data):
results = []
if "data" in data:
for item in data["data"]:
if not item:
# the last item in the JSON list is empty, the JSON string ends with "}, {}]"
continue
replace_url = item.get("replaceUrl", [{}])[0]
from_url = replace_url.get("FromURL", "").replace("\\/", "/")
img_src = replace_url.get("ObjURL", "").replace("\\/", "/")
width = item.get("width")
height = item.get("height")
img_date = item.get("bdImgnewsDate")
publishedDate = None
if img_date:
publishedDate = datetime.strptime(img_date, "%Y-%m-%d %H:%M")
results.append(
{
"template": "images.html",
"url": from_url,
"thumbnail_src": item.get("thumbURL", ""),
"img_src": img_src,
"content": html_to_text(item.get("fromPageTitleEnc", "")),
"title": html_to_text(item.get("fromPageTitle", "")),
"source": item.get("fromURLHost", ""),
"url": replace_url.get("FromURL"),
"thumbnail_src": item.get("thumbURL"),
"img_src": replace_url.get("ObjURL"),
"title": html_to_text(item.get("fromPageTitle")),
"source": item.get("fromURLHost"),
"resolution": f"{width} x {height}",
"img_format": item.get("type"),
"filesize": item.get("filesize"),
"publishedDate": publishedDate,
}
)
return results

View File

@ -502,21 +502,21 @@ engines:
categories: music
- name: baidu
baidu_categories: general
baidu_category: general
categories: [general]
engine: baidu
shortcut: bd
disabled: true
- name: baidu images
baidu_categories: images
baidu_category: images
categories: [images]
engine: baidu
shortcut: bdi
disabled: true
- name: baidu kaifa
baidu_categories: it
baidu_category: it
categories: [it]
engine: baidu
shortcut: bdk