From f94802f2d2268e5a458bef5985e0f4f53ab9882a Mon Sep 17 00:00:00 2001 From: Zhijie He Date: Sat, 29 Mar 2025 12:06:41 +0800 Subject: [PATCH] [feat] engines: add Hugging Face engine --- docs/dev/engines/online/huggingface.rst | 8 ++ searx/engines/huggingface.py | 116 ++++++++++++++++++++++++ searx/settings.yml | 16 ++++ 3 files changed, 140 insertions(+) create mode 100644 docs/dev/engines/online/huggingface.rst create mode 100644 searx/engines/huggingface.py diff --git a/docs/dev/engines/online/huggingface.rst b/docs/dev/engines/online/huggingface.rst new file mode 100644 index 000000000..06727e719 --- /dev/null +++ b/docs/dev/engines/online/huggingface.rst @@ -0,0 +1,8 @@ +.. _huggingface engine: + +============ +Hugging Face +============ + +.. automodule:: searx.engines.huggingface + :members: diff --git a/searx/engines/huggingface.py b/searx/engines/huggingface.py new file mode 100644 index 000000000..b49bb3f21 --- /dev/null +++ b/searx/engines/huggingface.py @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""`Hugging Face`_ search engine for SearXNG. + +.. _Hugging Face: https://huggingface.co + +Configuration +============= + +The engine has the following additional settings: + +- :py:obj:`huggingface_endpoint` + +Configurations for endpoints: + +.. code:: yaml + + - name: huggingface + engine: huggingface + shortcut: hf + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs + +Implementations +=============== + +""" + +from urllib.parse import urlencode +from datetime import datetime + +from searx.exceptions import SearxEngineAPIException +from searx.utils import html_to_text +from searx.result_types import EngineResults, MainResult + +about = { + "website": "https://huggingface.co/", + "wikidata_id": "Q108943604", + "official_api_documentation": "https://huggingface.co/docs/hub/en/api", + "use_official_api": True, + "require_api_key": False, + "results": "JSON", +} + +categories = ['it', 'repos'] + +base_url = "https://huggingface.co" + +huggingface_endpoint = 'models' +"""Hugging Face supports datasets, models, spaces as search endpoint. + +- ``datasets``: search for datasets +- ``models``: search for models +- ``spaces``: search for spaces +""" + + +def init(_): + if huggingface_endpoint not in ('datasets', 'models', 'spaces'): + raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}") + + +def request(query, params): + query_params = { + "direction": -1, + "search": query, + } + + params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}" + + return params + + +def response(resp) -> EngineResults: + results = EngineResults() + + data = resp.json() + + for entry in data: + if huggingface_endpoint != 'models': + url = f"{base_url}/{huggingface_endpoint}/{entry['id']}" + else: + url = f"{base_url}/{entry['id']}" + + published_date = None + try: + published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + except (ValueError, TypeError): + pass + + contents = [] + if entry.get("likes"): + contents.append(f"Likes: {entry['likes']}") + if entry.get("downloads"): + contents.append(f"Downloads: {entry['downloads']:,}") + if entry.get("tags"): + contents.append(f"Tags: {', '.join(entry['tags'])}") + if entry.get("description"): + contents.append(f"Description: {entry['description']}") + + item = MainResult( + title=entry["id"], + content=html_to_text(" | ".join(contents)), + url=url, + publishedDate=published_date, + ) + results.add(item) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 5a87da3cd..c9435e4de 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1134,6 +1134,22 @@ engines: - name: il post engine: il_post shortcut: pst + + - name: huggingface + engine: huggingface + shortcut: hf + disabled: true + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + disabled: true + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs disabled: true - name: imdb