[feat]: engines add images & kaifa from baidu.com
This commit is contained in:
		
							parent
							
								
									11b5ae186b
								
							
						
					
					
						commit
						4ce7f1accc
					
				@ -9,8 +9,11 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from urllib.parse import urlencode
 | 
					from urllib.parse import urlencode
 | 
				
			||||||
from datetime import datetime
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx.exceptions import SearxEngineAPIException
 | 
					from searx.exceptions import SearxEngineAPIException
 | 
				
			||||||
 | 
					from searx.utils import html_to_text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
about = {
 | 
					about = {
 | 
				
			||||||
    "website": "https://www.baidu.com",
 | 
					    "website": "https://www.baidu.com",
 | 
				
			||||||
@ -23,33 +26,85 @@ about = {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
paging = True
 | 
					paging = True
 | 
				
			||||||
categories = ["general"]
 | 
					categories = []
 | 
				
			||||||
base_url = "https://www.baidu.com/s"
 | 
					 | 
				
			||||||
results_per_page = 10
 | 
					results_per_page = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					baidu_category = 'general'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					time_range_support = True
 | 
				
			||||||
 | 
					time_range_dict = {"day": 86400, "week": 604800, "month": 2592000, "year": 31536000}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def init(_):
 | 
				
			||||||
 | 
					    if baidu_category not in ('general', 'images', 'it'):
 | 
				
			||||||
 | 
					        raise SearxEngineAPIException(f"Unsupported category: {baidu_category}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def request(query, params):
 | 
					def request(query, params):
 | 
				
			||||||
    keyword = query.strip()
 | 
					    page_num = params["pageno"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    query_params = {
 | 
					    category_config = {
 | 
				
			||||||
        "wd": keyword,
 | 
					        'general': {
 | 
				
			||||||
 | 
					            'endpoint': 'https://www.baidu.com/s',
 | 
				
			||||||
 | 
					            'params': {
 | 
				
			||||||
 | 
					                "wd": query,
 | 
				
			||||||
                "rn": results_per_page,
 | 
					                "rn": results_per_page,
 | 
				
			||||||
        "pn": (params["pageno"] - 1) * results_per_page,
 | 
					                "pn": (page_num - 1) * results_per_page,
 | 
				
			||||||
                "tn": "json",
 | 
					                "tn": "json",
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'images': {
 | 
				
			||||||
 | 
					            'endpoint': 'https://image.baidu.com/search/acjson',
 | 
				
			||||||
 | 
					            'params': {
 | 
				
			||||||
 | 
					                "word": query,
 | 
				
			||||||
 | 
					                "rn": results_per_page,
 | 
				
			||||||
 | 
					                "pn": (page_num - 1) * results_per_page,
 | 
				
			||||||
 | 
					                "tn": "resultjson_com",
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'it': {
 | 
				
			||||||
 | 
					            'endpoint': 'https://kaifa.baidu.com/rest/v1/search',
 | 
				
			||||||
 | 
					            'params': {
 | 
				
			||||||
 | 
					                "wd": query,
 | 
				
			||||||
 | 
					                "pageSize": results_per_page,
 | 
				
			||||||
 | 
					                "pageNum": page_num,
 | 
				
			||||||
 | 
					                "paramList": f"page_num={page_num},page_size={results_per_page}",
 | 
				
			||||||
 | 
					                "position": 0,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params["url"] = f"{base_url}?{urlencode(query_params)}"
 | 
					    query_params = category_config[baidu_category]['params']
 | 
				
			||||||
 | 
					    query_url = category_config[baidu_category]['endpoint']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if params.get("time_range") in time_range_dict:
 | 
				
			||||||
 | 
					        now = int(time.time())
 | 
				
			||||||
 | 
					        past = now - time_range_dict[params["time_range"]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if baidu_category == 'general':
 | 
				
			||||||
 | 
					            query_params["gpc"] = f"stf={past},{now}|stftype=1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if baidu_category == 'it':
 | 
				
			||||||
 | 
					            query_params["paramList"] += f",timestamp_range={past}-{now}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    params["url"] = f"{query_url}?{urlencode(query_params)}"
 | 
				
			||||||
    return params
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def response(resp):
 | 
					def response(resp):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        data = resp.json()
 | 
					        data = json.loads(resp.text, strict=False)
 | 
				
			||||||
    except Exception as e:
 | 
					    except Exception as e:
 | 
				
			||||||
        raise SearxEngineAPIException(f"Invalid response: {e}") from e
 | 
					        raise SearxEngineAPIException(f"Invalid response: {e}") from e
 | 
				
			||||||
    results = []
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if "feed" not in data or "entry" not in data["feed"]:
 | 
					    parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return parsers[baidu_category](data)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def parse_general(data):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					    if not data.get("feed", {}).get("entry"):
 | 
				
			||||||
        raise SearxEngineAPIException("Invalid response")
 | 
					        raise SearxEngineAPIException("Invalid response")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for entry in data["feed"]["entry"]:
 | 
					    for entry in data["feed"]["entry"]:
 | 
				
			||||||
@ -69,8 +124,44 @@ def response(resp):
 | 
				
			|||||||
                "url": entry["url"],
 | 
					                "url": entry["url"],
 | 
				
			||||||
                "content": entry.get("abs", ""),
 | 
					                "content": entry.get("abs", ""),
 | 
				
			||||||
                "publishedDate": published_date,
 | 
					                "publishedDate": published_date,
 | 
				
			||||||
                # "source": entry.get('source')
 | 
					 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def parse_images(data):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					    if "data" in data:
 | 
				
			||||||
 | 
					        for item in data["data"]:
 | 
				
			||||||
 | 
					            replace_url = item.get("replaceUrl", [{}])[0]
 | 
				
			||||||
 | 
					            from_url = replace_url.get("FromURL", "").replace("\\/", "/")
 | 
				
			||||||
 | 
					            img_src = replace_url.get("ObjURL", "").replace("\\/", "/")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            results.append(
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    "template": "images.html",
 | 
				
			||||||
 | 
					                    "url": from_url,
 | 
				
			||||||
 | 
					                    "thumbnail_src": item.get("thumbURL", ""),
 | 
				
			||||||
 | 
					                    "img_src": img_src,
 | 
				
			||||||
 | 
					                    "content": html_to_text(item.get("fromPageTitleEnc", "")),
 | 
				
			||||||
 | 
					                    "title": html_to_text(item.get("fromPageTitle", "")),
 | 
				
			||||||
 | 
					                    "source": item.get("fromURLHost", ""),
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def parse_it(data):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					    if not data.get("data", {}).get("documents", {}).get("data"):
 | 
				
			||||||
 | 
					        raise SearxEngineAPIException("Invalid response")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for entry in data["data"]["documents"]["data"]:
 | 
				
			||||||
 | 
					        results.append(
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                'title': entry["techDocDigest"]["title"],
 | 
				
			||||||
 | 
					                'url': entry["techDocDigest"]["url"],
 | 
				
			||||||
 | 
					                'content': entry["techDocDigest"]["summary"],
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
    return results
 | 
					    return results
 | 
				
			||||||
 | 
				
			|||||||
@ -502,8 +502,24 @@ engines:
 | 
				
			|||||||
    categories: music
 | 
					    categories: music
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name: baidu
 | 
					  - name: baidu
 | 
				
			||||||
 | 
					    baidu_categories: general
 | 
				
			||||||
 | 
					    categories: [general]
 | 
				
			||||||
    engine: baidu
 | 
					    engine: baidu
 | 
				
			||||||
    shortcut: baidu
 | 
					    shortcut: bd
 | 
				
			||||||
 | 
					    disabled: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name: baidu images
 | 
				
			||||||
 | 
					    baidu_categories: images
 | 
				
			||||||
 | 
					    categories: [images]
 | 
				
			||||||
 | 
					    engine: baidu
 | 
				
			||||||
 | 
					    shortcut: bdi
 | 
				
			||||||
 | 
					    disabled: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name: baidu kaifa
 | 
				
			||||||
 | 
					    baidu_categories: it
 | 
				
			||||||
 | 
					    categories: [it]
 | 
				
			||||||
 | 
					    engine: baidu
 | 
				
			||||||
 | 
					    shortcut: bdk
 | 
				
			||||||
    disabled: true
 | 
					    disabled: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name: wikipedia
 | 
					  - name: wikipedia
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user