[feat] engines: add Hugging Face engine
This commit is contained in:
parent
d1c584b961
commit
f94802f2d2
8
docs/dev/engines/online/huggingface.rst
Normal file
8
docs/dev/engines/online/huggingface.rst
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
.. _huggingface engine:
|
||||||
|
|
||||||
|
============
|
||||||
|
Hugging Face
|
||||||
|
============
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.huggingface
|
||||||
|
:members:
|
116
searx/engines/huggingface.py
Normal file
116
searx/engines/huggingface.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""`Hugging Face`_ search engine for SearXNG.
|
||||||
|
|
||||||
|
.. _Hugging Face: https://huggingface.co
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
=============
|
||||||
|
|
||||||
|
The engine has the following additional settings:
|
||||||
|
|
||||||
|
- :py:obj:`huggingface_endpoint`
|
||||||
|
|
||||||
|
Configurations for endpoints:
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: huggingface
|
||||||
|
engine: huggingface
|
||||||
|
shortcut: hf
|
||||||
|
|
||||||
|
- name: huggingface datasets
|
||||||
|
huggingface_endpoint: datasets
|
||||||
|
engine: huggingface
|
||||||
|
shortcut: hfd
|
||||||
|
|
||||||
|
- name: huggingface spaces
|
||||||
|
huggingface_endpoint: spaces
|
||||||
|
engine: huggingface
|
||||||
|
shortcut: hfs
|
||||||
|
|
||||||
|
Implementations
|
||||||
|
===============
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from searx.exceptions import SearxEngineAPIException
|
||||||
|
from searx.utils import html_to_text
|
||||||
|
from searx.result_types import EngineResults, MainResult
|
||||||
|
|
||||||
|
about = {
|
||||||
|
"website": "https://huggingface.co/",
|
||||||
|
"wikidata_id": "Q108943604",
|
||||||
|
"official_api_documentation": "https://huggingface.co/docs/hub/en/api",
|
||||||
|
"use_official_api": True,
|
||||||
|
"require_api_key": False,
|
||||||
|
"results": "JSON",
|
||||||
|
}
|
||||||
|
|
||||||
|
categories = ['it', 'repos']
|
||||||
|
|
||||||
|
base_url = "https://huggingface.co"
|
||||||
|
|
||||||
|
huggingface_endpoint = 'models'
|
||||||
|
"""Hugging Face supports datasets, models, spaces as search endpoint.
|
||||||
|
|
||||||
|
- ``datasets``: search for datasets
|
||||||
|
- ``models``: search for models
|
||||||
|
- ``spaces``: search for spaces
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def init(_):
|
||||||
|
if huggingface_endpoint not in ('datasets', 'models', 'spaces'):
|
||||||
|
raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}")
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
query_params = {
|
||||||
|
"direction": -1,
|
||||||
|
"search": query,
|
||||||
|
}
|
||||||
|
|
||||||
|
params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}"
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp) -> EngineResults:
|
||||||
|
results = EngineResults()
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
for entry in data:
|
||||||
|
if huggingface_endpoint != 'models':
|
||||||
|
url = f"{base_url}/{huggingface_endpoint}/{entry['id']}"
|
||||||
|
else:
|
||||||
|
url = f"{base_url}/{entry['id']}"
|
||||||
|
|
||||||
|
published_date = None
|
||||||
|
try:
|
||||||
|
published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
contents = []
|
||||||
|
if entry.get("likes"):
|
||||||
|
contents.append(f"Likes: {entry['likes']}")
|
||||||
|
if entry.get("downloads"):
|
||||||
|
contents.append(f"Downloads: {entry['downloads']:,}")
|
||||||
|
if entry.get("tags"):
|
||||||
|
contents.append(f"Tags: {', '.join(entry['tags'])}")
|
||||||
|
if entry.get("description"):
|
||||||
|
contents.append(f"Description: {entry['description']}")
|
||||||
|
|
||||||
|
item = MainResult(
|
||||||
|
title=entry["id"],
|
||||||
|
content=html_to_text(" | ".join(contents)),
|
||||||
|
url=url,
|
||||||
|
publishedDate=published_date,
|
||||||
|
)
|
||||||
|
results.add(item)
|
||||||
|
|
||||||
|
return results
|
@ -1134,6 +1134,22 @@ engines:
|
|||||||
- name: il post
|
- name: il post
|
||||||
engine: il_post
|
engine: il_post
|
||||||
shortcut: pst
|
shortcut: pst
|
||||||
|
|
||||||
|
- name: huggingface
|
||||||
|
engine: huggingface
|
||||||
|
shortcut: hf
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
- name: huggingface datasets
|
||||||
|
huggingface_endpoint: datasets
|
||||||
|
engine: huggingface
|
||||||
|
shortcut: hfd
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
- name: huggingface spaces
|
||||||
|
huggingface_endpoint: spaces
|
||||||
|
engine: huggingface
|
||||||
|
shortcut: hfs
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
- name: imdb
|
- name: imdb
|
||||||
|
Loading…
x
Reference in New Issue
Block a user