
The EngineCache class replaces all previously individual solutions for caches in the context of the engines. - demo_offline.py - duckduckgo.py - radio_browser.py - soundcloud.py - startpage.py - wolframalpha_api.py - wolframalpha_noapi.py Search term to test most of the modified engines:: !ddg !rb !sc !sp !wa test !ddg !rb !sc !sp !wa foo For introspection of the DB, jump into developer environment and run command to show cache state:: $ ./manage pyenv.cmd bash --norc --noprofile (py3) python -m searx.enginelib cache state cache tables and key/values =========================== [demo_offline ] 2025-04-22 11:32:50 count --> (int) 4 [startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20 [duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325 [duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451 [radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...] [soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp [wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7 number of tables: 6 number of key/value pairs: 7 In the "cache tables and key/values" section, the table name (engine name) is at first position on the second there is the calculated expire date and on the third and fourth position the key/value is shown. About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore the key is a hash value of the query term (to not to store the raw query term). In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl / ExpireCache and their last modification date are shown:: properties of ENGINES_CACHE =========================== [last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1 [last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE : [last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe2b1c9349f461158d07cb78a3750e5c5be686aa8ebdc [last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline [last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage [last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo [last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser [last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud [last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha These properties provide information about the state of the ExpireCache and control the behavior. For example, the maintenance intervals are controlled by the last modification date of the LAST_MAINTENANCE property and the hash value of the password can be used to detect whether the password has been changed (in this case the DB entries can no longer be decrypted and the entire cache must be discarded). Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
142 lines
4.2 KiB
Python
142 lines
4.2 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Wolfram|Alpha (Science)
|
|
|
|
"""
|
|
|
|
from urllib.parse import urlencode
|
|
|
|
import lxml.etree
|
|
|
|
# about
|
|
about = {
|
|
"website": 'https://www.wolframalpha.com',
|
|
"wikidata_id": 'Q207006',
|
|
"official_api_documentation": 'https://products.wolframalpha.com/api/',
|
|
"use_official_api": True,
|
|
"require_api_key": False,
|
|
"results": 'XML',
|
|
}
|
|
|
|
# search-url
|
|
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
|
site_url = 'https://www.wolframalpha.com/input/?{query}'
|
|
api_key = '' # defined in settings.yml
|
|
|
|
# xpath variables
|
|
failure_xpath = '/queryresult[attribute::success="false"]'
|
|
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
|
|
pods_xpath = '//pod'
|
|
subpods_xpath = './subpod'
|
|
pod_primary_xpath = './@primary'
|
|
pod_id_xpath = './@id'
|
|
pod_title_xpath = './@title'
|
|
plaintext_xpath = './plaintext'
|
|
image_xpath = './img'
|
|
img_src_xpath = './@src'
|
|
img_alt_xpath = './@alt'
|
|
|
|
# pods to display as image in infobox
|
|
# this pods do return a plaintext, but they look better and are more useful as images
|
|
image_pods = {'VisualRepresentation', 'Illustration'}
|
|
|
|
|
|
# do search-request
|
|
def request(query, params):
|
|
params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
|
|
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
|
|
|
|
return params
|
|
|
|
|
|
# replace private user area characters to make text legible
|
|
def replace_pua_chars(text):
|
|
pua_chars = {
|
|
'\uf522': '\u2192', # right arrow
|
|
'\uf7b1': '\u2115', # set of natural numbers
|
|
'\uf7b4': '\u211a', # set of rational numbers
|
|
'\uf7b5': '\u211d', # set of real numbers
|
|
'\uf7bd': '\u2124', # set of integer numbers
|
|
'\uf74c': 'd', # differential
|
|
'\uf74d': '\u212f', # euler's number
|
|
'\uf74e': 'i', # imaginary number
|
|
'\uf7d9': '=',
|
|
} # equals sign
|
|
|
|
for k, v in pua_chars.items():
|
|
text = text.replace(k, v)
|
|
|
|
return text
|
|
|
|
|
|
# get response from search-request
|
|
def response(resp):
|
|
results = []
|
|
|
|
search_results = lxml.etree.XML(resp.content)
|
|
|
|
# return empty array if there are no results
|
|
if search_results.xpath(failure_xpath):
|
|
return []
|
|
|
|
try:
|
|
infobox_title = search_results.xpath(input_xpath)[0].text
|
|
except: # pylint: disable=bare-except
|
|
infobox_title = ""
|
|
|
|
pods = search_results.xpath(pods_xpath)
|
|
result_chunks = []
|
|
result_content = ""
|
|
for pod in pods:
|
|
pod_id = pod.xpath(pod_id_xpath)[0]
|
|
pod_title = pod.xpath(pod_title_xpath)[0]
|
|
pod_is_result = pod.xpath(pod_primary_xpath)
|
|
|
|
subpods = pod.xpath(subpods_xpath)
|
|
if not subpods:
|
|
continue
|
|
|
|
# Appends either a text or an image, depending on which one is more suitable
|
|
for subpod in subpods:
|
|
content = subpod.xpath(plaintext_xpath)[0].text
|
|
image = subpod.xpath(image_xpath)
|
|
|
|
if content and pod_id not in image_pods:
|
|
|
|
if pod_is_result or not result_content:
|
|
if pod_id != "Input":
|
|
result_content = "%s: %s" % (pod_title, content)
|
|
|
|
# if no input pod was found, title is first plaintext pod
|
|
if not infobox_title:
|
|
infobox_title = content
|
|
|
|
content = replace_pua_chars(content)
|
|
result_chunks.append({'label': pod_title, 'value': content})
|
|
|
|
elif image:
|
|
result_chunks.append(
|
|
{
|
|
'label': pod_title,
|
|
'image': {'src': image[0].xpath(img_src_xpath)[0], 'alt': image[0].xpath(img_alt_xpath)[0]},
|
|
}
|
|
)
|
|
|
|
if not result_chunks:
|
|
return []
|
|
|
|
title = "Wolfram Alpha (%s)" % infobox_title
|
|
|
|
# append infobox
|
|
results.append(
|
|
{
|
|
'infobox': infobox_title,
|
|
'attributes': result_chunks,
|
|
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}],
|
|
}
|
|
)
|
|
|
|
# append link to site
|
|
results.append({'url': resp.request.headers['Referer'], 'title': title, 'content': result_content})
|
|
|
|
return results
|