diff --git a/docs/admin/settings/settings_engines.rst b/docs/admin/settings/settings_engines.rst index 944972143..c614d8b59 100644 --- a/docs/admin/settings/settings_engines.rst +++ b/docs/admin/settings/settings_engines.rst @@ -148,6 +148,8 @@ engine is shown. Most of the options have a default value or even are optional. ``display_error_messages`` : default ``true`` When an engine returns an error, the message is displayed on the user interface. +.. _engine network: + ``network`` : optional Use the network configuration from another engine. In addition, there are two default networks: @@ -257,4 +259,3 @@ Example configuration in settings.yml for a German and English speaker: When searching, the default google engine will return German results and "google english" will return English results. - diff --git a/docs/dev/engines/online/chinaso.rst b/docs/dev/engines/online/chinaso.rst new file mode 100644 index 000000000..31fba3790 --- /dev/null +++ b/docs/dev/engines/online/chinaso.rst @@ -0,0 +1,8 @@ +.. _chinaso engine: + +======= +ChinaSo +======= + +.. automodule:: searx.engines.chinaso + :members: diff --git a/searx/engines/chinaso.py b/searx/engines/chinaso.py index 23c2a2a82..ebf7682fc 100644 --- a/searx/engines/chinaso.py +++ b/searx/engines/chinaso.py @@ -1,5 +1,60 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""ChinaSo: A search engine from ChinaSo.""" +"""ChinaSo_, a search engine for the chinese language area. + +.. attention:: + + ChinaSo engine does not return real URL, the links from these search + engines violate the privacy of the users!! + + We try to find a solution for this problem, please follow `issue #4694`_. + + As long as the problem has not been resolved, these engines are + not active in a standard setup (``inactive: true``). + +.. _ChinaSo: https://www.chinaso.com/ +.. _issue #4694: https://github.com/searxng/searxng/issues/4694 + +Configuration +============= + +The engine has the following additional settings: + +- :py:obj:`chinaso_category` (:py:obj:`ChinasoCategoryType`) +- :py:obj:`chinaso_news_source` (:py:obj:`ChinasoNewsSourceType`) + +In the example below, all three ChinaSO engines are using the :ref:`network +` from the ``chinaso news`` engine. + +.. code:: yaml + + - name: chinaso news + engine: chinaso + shortcut: chinaso + categories: [news] + chinaso_category: news + chinaso_news_source: all + + - name: chinaso images + engine: chinaso + network: chinaso news + shortcut: chinasoi + categories: [images] + chinaso_category: images + + - name: chinaso videos + engine: chinaso + network: chinaso news + shortcut: chinasov + categories: [videos] + chinaso_category: videos + + +Implementations +=============== + +""" + +import typing from urllib.parse import urlencode from datetime import datetime @@ -20,13 +75,31 @@ paging = True time_range_support = True results_per_page = 10 categories = [] -chinaso_category = 'news' + +ChinasoCategoryType = typing.Literal['news', 'videos', 'images'] """ChinaSo supports news, videos, images search. - ``news``: search for news - ``videos``: search for videos - ``images``: search for images + +In the category ``news`` you can additionally filter by option +:py:obj:`chinaso_news_source`. """ +chinaso_category = 'news' +"""Configure ChinaSo category (:py:obj:`ChinasoCategoryType`).""" + +ChinasoNewsSourceType = typing.Literal['CENTRAL', 'LOCAL', 'BUSINESS', 'EPAPER', 'all'] +"""Filtering ChinaSo-News results by source: + +- ``CENTRAL``: central publication +- ``LOCAL``: local publication +- ``BUSINESS``: business publication +- ``EPAPER``: E-Paper +- ``all``: all sources +""" +chinaso_news_source: ChinasoNewsSourceType = 'all' +"""Configure ChinaSo-News type (:py:obj:`ChinasoNewsSourceType`).""" time_range_dict = {'day': '24h', 'week': '1w', 'month': '1m', 'year': '1y'} @@ -35,7 +108,9 @@ base_url = "https://www.chinaso.com" def init(_): if chinaso_category not in ('news', 'videos', 'images'): - raise SearxEngineAPIException(f"Unsupported category: {chinaso_category}") + raise ValueError(f"Unsupported category: {chinaso_category}") + if chinaso_category == 'news' and chinaso_news_source not in typing.get_args(ChinasoNewsSourceType): + raise ValueError(f"Unsupported news source: {chinaso_news_source}") def request(query, params): @@ -56,6 +131,11 @@ def request(query, params): 'params': {'start_index': (params["pageno"] - 1) * results_per_page, 'rn': results_per_page}, }, } + if chinaso_news_source != 'all': + if chinaso_news_source == 'EPAPER': + category_config['news']['params']["type"] = 'EPAPER' + else: + category_config['news']['params']["cate"] = chinaso_news_source query_params.update(category_config[chinaso_category]['params']) diff --git a/searx/settings.yml b/searx/settings.yml index 07d0bdd2d..d756e9b1c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -619,23 +619,37 @@ engines: # to show premium or plus results too: # skip_premium: false + # WARNING: links from chinaso.com voilate users privacy + # Before activate these engines its mandatory to read + # - https://github.com/searxng/searxng/issues/4694 + # - https://docs.searxng.org/dev/engines/online/chinaso.html + - name: chinaso news - chinaso_category: news engine: chinaso shortcut: chinaso + categories: [news] + chinaso_category: news + chinaso_news_source: all disabled: true + inactive: true - name: chinaso images - chinaso_category: images engine: chinaso + network: chinaso news shortcut: chinasoi + categories: [images] + chinaso_category: images disabled: true + inactive: true - name: chinaso videos - chinaso_category: videos engine: chinaso + network: chinaso news shortcut: chinasov + categories: [videos] + chinaso_category: videos disabled: true + inactive: true - name: cloudflareai engine: cloudflareai