[fix] fix invalid escape error in Baidu Images & default config typo

This commit is contained in:
Zhijie He 2025-03-15 18:44:46 +08:00 committed by Markus Heiser
parent 4ce7f1accc
commit 38caa49540
2 changed files with 27 additions and 16 deletions

View File

@ -11,6 +11,7 @@ from urllib.parse import urlencode
from datetime import datetime from datetime import datetime
import time import time
import json import json
import re
from searx.exceptions import SearxEngineAPIException from searx.exceptions import SearxEngineAPIException
from searx.utils import html_to_text from searx.utils import html_to_text
@ -92,11 +93,12 @@ def request(query, params):
def response(resp): def response(resp):
try:
data = json.loads(resp.text, strict=False)
except Exception as e:
raise SearxEngineAPIException(f"Invalid response: {e}") from e
text = resp.text
if baidu_category == 'images':
# baidu's JSON encoder wrongly quotes / and ' characters by \\ and \'
text = text.replace(r"\/", "/").replace(r"\'", "'")
data = json.loads(text, strict=False)
parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it} parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it}
return parsers[baidu_category](data) return parsers[baidu_category](data)
@ -133,19 +135,28 @@ def parse_images(data):
results = [] results = []
if "data" in data: if "data" in data:
for item in data["data"]: for item in data["data"]:
if not item:
# the last item in the JSON list is empty, the JSON string ends with "}, {}]"
continue
replace_url = item.get("replaceUrl", [{}])[0] replace_url = item.get("replaceUrl", [{}])[0]
from_url = replace_url.get("FromURL", "").replace("\\/", "/") width = item.get("width")
img_src = replace_url.get("ObjURL", "").replace("\\/", "/") height = item.get("height")
img_date = item.get("bdImgnewsDate")
publishedDate = None
if img_date:
publishedDate = datetime.strptime(img_date, "%Y-%m-%d %H:%M")
results.append( results.append(
{ {
"template": "images.html", "template": "images.html",
"url": from_url, "url": replace_url.get("FromURL"),
"thumbnail_src": item.get("thumbURL", ""), "thumbnail_src": item.get("thumbURL"),
"img_src": img_src, "img_src": replace_url.get("ObjURL"),
"content": html_to_text(item.get("fromPageTitleEnc", "")), "title": html_to_text(item.get("fromPageTitle")),
"title": html_to_text(item.get("fromPageTitle", "")), "source": item.get("fromURLHost"),
"source": item.get("fromURLHost", ""), "resolution": f"{width} x {height}",
"img_format": item.get("type"),
"filesize": item.get("filesize"),
"publishedDate": publishedDate,
} }
) )
return results return results

View File

@ -502,21 +502,21 @@ engines:
categories: music categories: music
- name: baidu - name: baidu
baidu_categories: general baidu_category: general
categories: [general] categories: [general]
engine: baidu engine: baidu
shortcut: bd shortcut: bd
disabled: true disabled: true
- name: baidu images - name: baidu images
baidu_categories: images baidu_category: images
categories: [images] categories: [images]
engine: baidu engine: baidu
shortcut: bdi shortcut: bdi
disabled: true disabled: true
- name: baidu kaifa - name: baidu kaifa
baidu_categories: it baidu_category: it
categories: [it] categories: [it]
engine: baidu engine: baidu
shortcut: bdk shortcut: bdk