[feat] engines: add Hugging Face engine

This commit is contained in:
Zhijie He 2025-03-29 12:06:41 +08:00 committed by Markus Heiser
parent d1c584b961
commit f94802f2d2
3 changed files with 140 additions and 0 deletions

View File

@ -0,0 +1,8 @@
.. _huggingface engine:
============
Hugging Face
============
.. automodule:: searx.engines.huggingface
:members:

View File

@ -0,0 +1,116 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""`Hugging Face`_ search engine for SearXNG.
.. _Hugging Face: https://huggingface.co
Configuration
=============
The engine has the following additional settings:
- :py:obj:`huggingface_endpoint`
Configurations for endpoints:
.. code:: yaml
- name: huggingface
engine: huggingface
shortcut: hf
- name: huggingface datasets
huggingface_endpoint: datasets
engine: huggingface
shortcut: hfd
- name: huggingface spaces
huggingface_endpoint: spaces
engine: huggingface
shortcut: hfs
Implementations
===============
"""
from urllib.parse import urlencode
from datetime import datetime
from searx.exceptions import SearxEngineAPIException
from searx.utils import html_to_text
from searx.result_types import EngineResults, MainResult
about = {
"website": "https://huggingface.co/",
"wikidata_id": "Q108943604",
"official_api_documentation": "https://huggingface.co/docs/hub/en/api",
"use_official_api": True,
"require_api_key": False,
"results": "JSON",
}
categories = ['it', 'repos']
base_url = "https://huggingface.co"
huggingface_endpoint = 'models'
"""Hugging Face supports datasets, models, spaces as search endpoint.
- ``datasets``: search for datasets
- ``models``: search for models
- ``spaces``: search for spaces
"""
def init(_):
if huggingface_endpoint not in ('datasets', 'models', 'spaces'):
raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}")
def request(query, params):
query_params = {
"direction": -1,
"search": query,
}
params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}"
return params
def response(resp) -> EngineResults:
results = EngineResults()
data = resp.json()
for entry in data:
if huggingface_endpoint != 'models':
url = f"{base_url}/{huggingface_endpoint}/{entry['id']}"
else:
url = f"{base_url}/{entry['id']}"
published_date = None
try:
published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
except (ValueError, TypeError):
pass
contents = []
if entry.get("likes"):
contents.append(f"Likes: {entry['likes']}")
if entry.get("downloads"):
contents.append(f"Downloads: {entry['downloads']:,}")
if entry.get("tags"):
contents.append(f"Tags: {', '.join(entry['tags'])}")
if entry.get("description"):
contents.append(f"Description: {entry['description']}")
item = MainResult(
title=entry["id"],
content=html_to_text(" | ".join(contents)),
url=url,
publishedDate=published_date,
)
results.add(item)
return results

View File

@ -1134,6 +1134,22 @@ engines:
- name: il post
engine: il_post
shortcut: pst
- name: huggingface
engine: huggingface
shortcut: hf
disabled: true
- name: huggingface datasets
huggingface_endpoint: datasets
engine: huggingface
shortcut: hfd
disabled: true
- name: huggingface spaces
huggingface_endpoint: spaces
engine: huggingface
shortcut: hfs
disabled: true
- name: imdb