From eaa96a0b6478baff605f3c7acfbc3072deb05db7 Mon Sep 17 00:00:00 2001 From: Jordan Date: Sun, 16 Mar 2025 13:08:45 -0700 Subject: [PATCH] initial implementation --- client/simple/package.json | 4 +- client/simple/src/js/main/quick-answer.js | 280 ++++++++++++++++++ client/simple/src/js/searxng.js | 1 + client/simple/src/less/definitions.less | 16 + client/simple/src/less/quick-answer.less | 83 ++++++ client/simple/src/less/style.less | 3 + client/simple/vite.config.js | 14 + docs/admin/settings/settings_ui.rst | 3 + searx/plugins/quick_answer.py | 113 +++++++ searx/preferences.py | 52 ++++ searx/settings.yml | 2 + searx/templates/simple/base.html | 3 + searx/templates/simple/preferences.html | 3 +- .../simple/preferences/quick_answer.html | 39 +++ searx/webapp.py | 102 +++++++ 15 files changed, 716 insertions(+), 2 deletions(-) create mode 100644 client/simple/src/js/main/quick-answer.js create mode 100644 client/simple/src/less/quick-answer.less create mode 100644 searx/plugins/quick_answer.py create mode 100644 searx/templates/simple/preferences/quick_answer.html diff --git a/client/simple/package.json b/client/simple/package.json index 1f1e8318c..966bbb726 100644 --- a/client/simple/package.json +++ b/client/simple/package.json @@ -35,6 +35,8 @@ "webpack-cli": "^6.0.1" }, "dependencies": { - "autocomplete-js": "^2.7.1" + "autocomplete-js": "^2.7.1", + "katex": "^0.16.19", + "marked": "^15.0.6" } } diff --git a/client/simple/src/js/main/quick-answer.js b/client/simple/src/js/main/quick-answer.js new file mode 100644 index 000000000..d158a31b6 --- /dev/null +++ b/client/simple/src/js/main/quick-answer.js @@ -0,0 +1,280 @@ +import { marked } from "../../../node_modules/marked/lib/marked.esm.js"; +import renderMathInElement from "../../../node_modules/katex/dist/contrib/auto-render.js"; + +document.addEventListener("DOMContentLoaded", () => { + if (typeof window.referenceMap === "undefined") { + console.error("referenceMap is not defined"); + return; + } + + marked.setOptions({ + gfm: true, + breaks: true, + highlight: function (code, language) { + if (language) { + return ( + '
' +
+          code.replace(
+            /[&<>'"]/g,
+            (c) =>
+              ({
+                "&": "&",
+                "<": "<",
+                ">": ">",
+                "'": "'",
+                '"': """,
+              })[c],
+          ) +
+          "
" + ); + } + return code; + }, + }); + + const renderer = new marked.Renderer(); + renderer.link = (token) => { + const href = token.href; + const title = token.title || ""; + const text = token.text; + return `${text}`; + }; + marked.use({ renderer }); + + // Custom math handling; we roll our own because LLMs are inconsistent(!) + const mathExtension = { + name: "math", + level: "block", + start(src) { + return src.match(/\$\$/)?.index; + }, + tokenizer(src) { + const rule = /^\$\$([\s\S]+?)\$\$/; + const match = rule.exec(src); + if (match) { + return { + type: "math", + raw: match[0], + text: match[1].trim(), + }; + } + }, + renderer(token) { + return `$$${token.text}$$`; + }, + }; + + const inlineMathExtension = { + name: "inlineMath", + level: "inline", + start(src) { + return src.match(/\$/)?.index; + }, + tokenizer(src) { + const rule = /^\$([^$\n]+?)\$/; + const match = rule.exec(src); + if (match) { + return { + type: "inlineMath", + raw: match[0], + text: match[1].trim(), + }; + } + }, + renderer(token) { + return `$${token.text}$`; + }, + }; + + marked.use({ extensions: [mathExtension, inlineMathExtension] }); + + // Answer + const qa = document.querySelector(".infobox p"); + if (!qa) { + console.error("Quick answer container not found"); + return; + } + qa.id = "quick-answer"; + qa.className = "markdown-content"; + + // References + const refContainer = document.createElement("div"); + refContainer.className = "references"; + const refHeading = document.createElement("h4"); + refHeading.textContent = "References"; + refContainer.appendChild(refHeading); + const refList = document.createElement("ol"); + refContainer.appendChild(refList); + qa.after(refContainer); + + let accumulatedText = ""; + let lastProcessedLength = 0; + let references = {}; + let referenceCounter = 1; + let referenceMap = window.referenceMap; + + function escapeHtml(unsafe) { + return unsafe.replace(/[&<>"']/g, function (m) { + switch (m) { + case "&": + return "&"; + case "<": + return "<"; + case ">": + return ">"; + case '"': + return """; + case "'": + return "'"; + default: + return m; + } + }); + } + + function replaceCitations(text) { + // First pass: replace citations with temporary markers to be replaced by actual spaces in the second pass + // LLMs do not consistently follow prompted formatting, and inline citations *need* to be space-delimited + let processedText = text.replace( + /【(\d+)】/g, + (match, citationIndex, offset) => { + const isFollowedByCitation = text + .slice(offset + match.length) + .match(/^【\d+】/); + const source = referenceMap[citationIndex]; + + if (source) { + const [url, title] = source; + let refNumber = references[citationIndex]; + const escapedTitle = escapeHtml(title); + + if (!refNumber) { + const refItem = document.createElement("li"); + const refLink = document.createElement("a"); + refLink.href = url; + refLink.textContent = title; + refLink.rel = "noreferrer"; + refItem.appendChild(refLink); + refList.appendChild(refItem); + references[citationIndex] = referenceCounter; + refNumber = referenceCounter; + referenceCounter += 1; + } + + // Add look-ahead marker |||CITATION_SPACE||| if followed by another citation + return `${refNumber}${ + isFollowedByCitation ? "|||CITATION_SPACE|||" : "" + }`; + } + return match; + }, + ); + + // Second pass: replace temporary markers with spaces + return processedText.replace(/\|\|\|CITATION_SPACE\|\|\|/g, " "); + } + + fetch("/quick_answer", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + system: window.systemPrompt, + user: window.userPrompt, + token: window.userToken, + model: window.userModel, + providers: window.userProviders, + }), + }) + .then((response) => { + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + + function processMarkdownChunk(text) { + accumulatedText += text; + + const markdownElements = { + codeBlock: { start: "```", end: "```" }, + bold: { start: "**", end: "**" }, + italic: { start: "_", end: "_" }, + link: { start: "[", end: ")" }, + mathDisplay: { start: "$$", end: "$$" }, + mathInline: { start: "$", end: "$" }, + }; + + let processUpTo = accumulatedText.length; + + // Find last complete element + for (const element of Object.values(markdownElements)) { + const lastStart = accumulatedText.lastIndexOf(element.start); + if (lastStart > lastProcessedLength) { + const nextEnd = accumulatedText.indexOf( + element.end, + lastStart + element.start.length, + ); + if (nextEnd === -1) { + processUpTo = Math.min(processUpTo, lastStart); + } + } + } + + // Process complete portion + if (processUpTo > lastProcessedLength) { + const processedText = replaceCitations( + accumulatedText.substring(0, processUpTo), + ); + qa.innerHTML = marked.parse(processedText); + + renderMathInElement(qa, { + delimiters: [ + { left: "$$", right: "$$", display: true }, + { left: "$", right: "$", display: false }, + ], + throwOnError: false, + }); + + lastProcessedLength = processUpTo; + } + } + + function readStream() { + reader + .read() + .then(({ done, value }) => { + if (done) { + // Process any remaining text + if (accumulatedText.length > lastProcessedLength) { + const processedText = replaceCitations(accumulatedText); + qa.innerHTML = marked.parse(processedText); + renderMathInElement(qa, { + delimiters: [ + { left: "$$", right: "$$", display: true }, + { left: "$", right: "$", display: false }, + ], + throwOnError: false, + }); + } + return; + } + + const text = decoder.decode(value, { stream: true }); + processMarkdownChunk(text); + + // Scroll to bottom of the div to show new content + qa.scrollTop = qa.scrollHeight; + + // Continue reading + readStream(); + }) + .catch((error) => console.error("Error:", error)); + } + readStream(); + }) + .catch((error) => { + console.error("Error:", error); + qa.innerHTML = marked.parse(`**Error**: ${error.message}`); + }); +}); diff --git a/client/simple/src/js/searxng.js b/client/simple/src/js/searxng.js index c9a6eea43..55cf0fef9 100644 --- a/client/simple/src/js/searxng.js +++ b/client/simple/src/js/searxng.js @@ -5,3 +5,4 @@ import "./main/mapresult.js"; import "./main/preferences.js"; import "./main/results.js"; import "./main/search.js"; +import "./main/quick-answer.js"; diff --git a/client/simple/src/less/definitions.less b/client/simple/src/less/definitions.less index 395a02cde..a8627c309 100644 --- a/client/simple/src/less/definitions.less +++ b/client/simple/src/less/definitions.less @@ -130,6 +130,14 @@ // Favicons Colors --color-favicon-background-color: #ddd; --color-favicon-border-color: #ccc; + + /// Quick Answer Colors + --color-quick-answer-code-background: rgb(27 31 35 / 5%); + --color-quick-answer-pre-background: #f6f8fa; + --color-quick-answer-blockquote-border: #dfe2e5; + --color-quick-answer-blockquote-font: #6a737d; + --color-quick-answer-table-border: #dfe2e5; + --color-quick-answer-table-tr-background: #f6f8fa; } .dark-themes() { @@ -249,6 +257,14 @@ // Favicons Colors --color-favicon-background-color: #ddd; --color-favicon-border-color: #ccc; + + /// Quick Answer Colors + --color-quick-answer-code-background: #4d5a6f; + --color-quick-answer-pre-background: #4d5a6f; + --color-quick-answer-blockquote-border: #555; + --color-quick-answer-blockquote-font: #bbb; + --color-quick-answer-table-border: #555; + --color-quick-answer-table-tr-background: #4d5a6f; } .black-themes() { diff --git a/client/simple/src/less/quick-answer.less b/client/simple/src/less/quick-answer.less new file mode 100644 index 000000000..c58c3b797 --- /dev/null +++ b/client/simple/src/less/quick-answer.less @@ -0,0 +1,83 @@ +#sidebar .infobox .markdown-content code { + background-color: var(--color-quick-answer-code-background); + border-radius: 3px; + font-family: + SFMono-Regular, Consolas, "Liberation Mono", Menlo, Courier, monospace; + font-size: 85%; + margin: 0; + padding: 0.2em 0.4em; +} + +#sidebar .infobox .markdown-content pre { + background-color: var(--color-quick-answer-pre-background); + border-radius: 3px; + font-size: 85%; + overflow: auto; + padding: 16px; +} + +#sidebar .infobox .markdown-content pre > code { + background-color: transparent; + border: 0; + font-size: 100%; + margin: 0; + padding: 0; + white-space: pre; + word-break: normal; +} + +#sidebar .infobox .markdown-content blockquote { + border-left: 0.25em solid var(--color-quick-answer-blockquote-border); + color: var(--color-quick-answer-blockquote-font); + margin: 0; + padding: 0 1em; +} + +#sidebar .infobox .markdown-content a.inline-reference { + color: var(--color-result-link-font); + line-height: 1.4; + position: relative; + top: -0.2em; + vertical-align: top; + font-size: smaller; +} + +#sidebar .infobox .markdown-content table { + border-collapse: collapse; + margin: 1em 0; + width: 100%; +} + +#sidebar .infobox .markdown-content table th, +#sidebar .infobox .markdown-content table td { + border: 1px solid var(--color-quick-answer-table-border); + padding: 6px 13px; +} + +#sidebar .infobox .markdown-content table tr:nth-child(2n) { + background-color: var(--color-quick-answer-table-tr-background); +} + +#sidebar .infobox .markdown-content h1, +#sidebar .infobox .markdown-content h2, +#sidebar .infobox .markdown-content h3, +#sidebar .infobox .markdown-content h4 { + font-size: 1em; +} + +#sidebar .infobox .references a { + text-decoration: underline; +} + +#sidebar .infobox .references ol { + list-style-type: decimal; + list-style-position: inside; + margin-left: 0; + padding-left: 0; +} + +#sidebar .infobox .references ol li { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} diff --git a/client/simple/src/less/style.less b/client/simple/src/less/style.less index 0454f121e..aaccd1b7e 100644 --- a/client/simple/src/less/style.less +++ b/client/simple/src/less/style.less @@ -32,6 +32,9 @@ // to center the results @import "style-center.less"; +// quick-answer plugin +@import "quick-answer.less"; + // sxng-icon-set .sxng-icon-set { display: inline-block; diff --git a/client/simple/vite.config.js b/client/simple/vite.config.js index f8d3e6757..cb5dc7012 100644 --- a/client/simple/vite.config.js +++ b/client/simple/vite.config.js @@ -22,6 +22,8 @@ const PATH = { brand: "src/brand", static: resolve(ROOT, "client/simple/static"), leaflet: resolve(ROOT, "client/simple/node_modules/leaflet/dist"), + katex: resolve(ROOT, "client/simple/node_modules/katex"), + marked: resolve(ROOT, "client/simple/node_modules/marked"), templates: resolve(ROOT, "searx/templates/simple"), }; @@ -133,6 +135,18 @@ export default defineConfig({ { src: PATH.leaflet + "/leaflet.{js,js.map}", dest: PATH.dist + "/js" }, { src: PATH.leaflet + "/images/*.png", dest: PATH.dist + "/css/images/" }, { src: PATH.leaflet + "/*.{css,css.map}", dest: PATH.dist + "/css" }, + ] + }), + + // Quick Answer (KaTeX + Marked) + + viteStaticCopy({ + targets: [ + { src: PATH.katex + "/dist/katex.js", dest: PATH.dist + "/js" }, + { src: PATH.katex + "/contrib/auto-render/auto-render.js", dest: PATH.dist + "/js" }, + { src: PATH.katex + "/dist/katex.css", dest: PATH.dist + "/css" }, + { src: PATH.katex + "/dist/fonts/*.{ttf,woff,woff2}", dest: PATH.dist + "/css/fonts/" }, + { src: PATH.marked + "/lib/marked.esm.js", dest: PATH.dist + "/js" }, { src: PATH.static + "/**/*", dest: PATH.dist }, ] }), diff --git a/docs/admin/settings/settings_ui.rst b/docs/admin/settings/settings_ui.rst index 0e0235594..9c4ec575a 100644 --- a/docs/admin/settings/settings_ui.rst +++ b/docs/admin/settings/settings_ui.rst @@ -72,3 +72,6 @@ ``url_formatting``: Formatting type to use for result URLs: ``pretty``, ``full`` or ``host``. + +``quick_answer_api``: + Quick Answer OpenAI-compatible API endpoint to query for search-supported LLM responses. diff --git a/searx/plugins/quick_answer.py b/searx/plugins/quick_answer.py new file mode 100644 index 000000000..44bcc52bc --- /dev/null +++ b/searx/plugins/quick_answer.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring, missing-class-docstring +import json +from datetime import datetime + +from flask_babel import gettext + +from searx.plugins import Plugin, PluginInfo + + +class SXNGPlugin(Plugin): + id = "quick_answer" + default_on = False + + def __init__(self): + super().__init__() + + self.info = PluginInfo( + id=self.id, + name=gettext("Quick Answer"), + description=gettext("Use search results to obtain cited answers from LLMs by appending '?' to queries"), + examples=["Linear congruential generator?"], + preference_section="general/quick_answer", + ) + + def get_sys_prompt(self): + now = datetime.now() + return f""" + The current date is {now:%Y-%m-%d} + + You ALWAYS follow these guidelines when writing your response: + - Use markdown formatting to enhance clarity and readability of your response. + - If you need to include mathematical expressions, use LaTeX to format them properly. Only use LaTeX when necessary for math. + - Delimit inline mathematical expressions with '$', for example: $y = mx + b$. + - Delimit block mathematical expressions with '$$', for example: $$F = ma$$. + - If you need to write code or program commands, format them as markdown code blocks. + - For all other output, use plain text formatting unless the user specifically requests otherwise. + - DO NOT include headers which only describe or rephrase the query before beginning your response. + - DO NOT include URLs or links in your response. + - ALWAYS enclose currency and price values in '**', for example: **$5.99**, to ensure they are formatted correctly. + + The relevant available information is contained within the tags. When a user asks a question, perform the following tasks: + 0. Examine the available information and assess whether you can answer the question based on it, even if the answer is not explicitly stated. For example, if the question asks about a specific feature of a product and the available information discusses the product's features without mentioning the specific feature, you can infer that the product likely does not have that feature. + 1. Use the available information to inform your answer. + 2. When answering questions, provide inline citation references by putting their citation index delimited by 【 and 】 at end of sentence, example: This is a claim【1】." + 3. If you need to cite multiple pieces of information inline, use separate 【 and 】 for each citation, example: "This is a claim【1】【2】." + 4. Use citations most relevant to the query to augment your answer with informative supportive resources; do not create unhelpful, extended chains of citations. + 5. DO NOT list URLs/links of the citation source or an aggregate list of citations at the end of the response. They would be automatically added by the system based on citation indices. + 6. DO NOT provide inline citations inside or around code blocks, as they break formatting of output, only provide them to augment plaintext. + 7. DO NOT use markdown to format your citations, always provide them in plaintext. + + A few guidelines for you when answering questions: + - Highlight relevant entities/phrases with **, for example: "**Neil Armstrong** is known as the first person to land on the moon." (Do not apply this guideline to citations or in code blocks.) + - DO NOT talk about how you based your answer on the information provided to you as it may confuse the user. + - Don't copy-paste the information from the available information directly. Paraphrase the information in your own words. + - Even if the information is in another format, your output MUST follow the guidelines. for example: output O₁ instead of O1, output R⁷ instead of R7, etc. + - Be concise and informative in your answers. + """ + + def format_sources(self, sources): + ret = "\n" + for pos, source in enumerate(sources): + ret += "\n" + ret += f'\n' + ret += f"\n{source.get('url', '')}\n\n" + ret += f"\n{source.get('title', '')}\n\n" + ret += f"\n{source.get('content', '')}\n\n" + ret += "\n" + + return ret + "" + + def post_search(self, request, search): + query = search.search_query + if query.pageno > 1 or not query.query.endswith("?"): + return + + token = request.preferences.get_value("quick_answer_token") + if not token: + return + + model = request.preferences.get_value("quick_answer_model") + providers = request.preferences.get_value("quick_answer_providers") + if providers: + providers = [provider.strip() for provider in providers.split(",")] + + sources = search.result_container.get_ordered_results() + formatted_sources = self.format_sources(sources) + user = formatted_sources + f"\n\nUser query: {query.query}" + system = self.get_sys_prompt() + + reference_map = {str(i): (source.get("url"), source.get("title")) for i, source in enumerate(sources)} + + search.result_container.infoboxes.append( + { + "infobox": "Quick Answer", + "id": "quick_answer", + "content": f""" + + """, + } + ) + + name = gettext("Quick Answer") + description = gettext("Use search results to obtain cited answers from LLMs by appending '?' to queries") + default_on = False + preference_section = "general" diff --git a/searx/preferences.py b/searx/preferences.py index 9f810ec72..810f95f89 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -76,6 +76,26 @@ class Setting: class StringSetting(Setting): """Setting of plain string values""" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.value = "" + + def get_value(self): + return self.value + + def parse(self, data: str): + self.value = data + + def parse_form(self, data: str): + if self.locked: + return + + self.value = data + + def save(self, name: str, resp: flask.Response): + """Save cookie ``name`` in the HTTP response object""" + resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE) + class EnumStringSetting(Setting): """Setting of a value which can only come from the given choices""" @@ -132,6 +152,35 @@ class MultipleChoiceSetting(Setting): resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) +class ListSetting(Setting): + """Setting of values of type ``list`` (ordered comma separated string)""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.values = [] + + def get_value(self): + return ",".join(self.values) + + def parse(self, data: str): + """Parse and validate ``data`` and store the result at ``self.value``""" + if data == "": + self.values = [] + return + + self.values = data.split(",") + + def parse_form(self, data: str): + if self.locked: + return + + self.values = data.split(",") + + def save(self, name: str, resp: flask.Response): + """Save cookie ``name`` in the HTTP response object""" + resp.set_cookie(name, ",".join(self.values), max_age=COOKIE_MAX_AGE) + + class SetSetting(Setting): """Setting of values of type ``set`` (comma separated string)""" @@ -479,6 +528,9 @@ class Preferences: settings['ui']['url_formatting'], choices=['pretty', 'full', 'host'] ), + "quick_answer_token": StringSetting("quick_answer_token"), + "quick_answer_model": StringSetting("quick_answer_model"), + "quick_answer_providers": ListSetting("quick_answer_providers"), # fmt: on } diff --git a/searx/settings.yml b/searx/settings.yml index ec6f4c1c5..a44e65410 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -2666,3 +2666,5 @@ doi_resolvers: sci-hub.ru: 'https://sci-hub.ru/' default_doi_resolver: 'oadoi.org' + +quick_answer_api: "https://openrouter.ai/api/v1/chat/completions" diff --git a/searx/templates/simple/base.html b/searx/templates/simple/base.html index 2eebde676..4ea894b52 100644 --- a/searx/templates/simple/base.html +++ b/searx/templates/simple/base.html @@ -20,6 +20,9 @@ {% if get_setting('server.limiter') or get_setting('server.public_instance') %} {% endif %} + {% if 'Quick Answer' in get_setting('enabled_plugins') %} + + {% endif %} diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html index e86e926cc..e27400dbd 100644 --- a/searx/templates/simple/preferences.html +++ b/searx/templates/simple/preferences.html @@ -180,8 +180,9 @@ {% if 'safesearch' not in locked_preferences %} {%- include 'simple/preferences/safesearch.html' -%} {%- endif -%} - {%- include 'simple/preferences/tokens.html' -%} {{- plugin_preferences('general') -}} + {%- include 'simple/preferences/tokens.html' -%} + {%- include 'simple/preferences/quick_answer.html' -%} {%- if 'doi_resolver' not in locked_preferences %} diff --git a/searx/templates/simple/preferences/quick_answer.html b/searx/templates/simple/preferences/quick_answer.html new file mode 100644 index 000000000..c8037aa51 --- /dev/null +++ b/searx/templates/simple/preferences/quick_answer.html @@ -0,0 +1,39 @@ +
{{- _('Quick Answer') -}}
+ +{{- plugin_preferences('general/quick_answer') -}} + +
{{- '' -}} + {{- _('Quick Answer token') -}}{{- '' -}} +
{{- '' -}} + {{- '' -}} +
{{- '' -}} +
+ {{- _('OpenRouter access token used to authenticate Quick Answer API requests') -}} +
{{- '' -}} +
{{- '' -}} + +
{{- '' -}} + {{- _('Quick Answer model') -}}{{- '' -}} +
{{- '' -}} + {{- '' -}} +
{{- '' -}} +
+ {{- _('OpenRouter LLM used to provide Quick Answers (e.g. meta-llama/llama-3.3-70b-instruct)') -}} +
{{- '' -}} +
{{- '' -}} + +
{{- '' -}} + {{- _('Quick Answer providers') -}}{{- '' -}} +
{{- '' -}} + {{- '' -}} +
{{- '' -}} +
+ {{- _('List of OpenRouter providers used to supply Quick Answers (e.g. Fireworks,DeepInfra)') -}} +
{{- '' -}} +
{{- '' -}} diff --git a/searx/webapp.py b/searx/webapp.py index 7104853e8..c86c53b3b 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -13,6 +13,7 @@ import os import sys import base64 +from datetime import timedelta, datetime from timeit import default_timer from html import escape from io import StringIO @@ -1327,6 +1328,107 @@ def config(): ) +# User-scoped cache for quick answer responses +quick_answer_cache = {} +quick_answer_cache_max_keys = 1000 +quick_answer_cache_expiry = timedelta(minutes=60) + + +@app.route("/quick_answer", methods=["POST"]) +def quick_answer(): + """Endpoint to handle LLM requests.""" + data = sxng_request.get_json() + if not data: + return "Invalid JSON data", 400 + + user = data.get("user") + system = data.get("system") + token = data.get("token") + if not all([user, system, token]): + return "Missing required fields", 400 + + # These can be unproblematically empty; account defaults are OK + model = data.get("model") + providers = data.get("providers") + + now = datetime.now() + expired_keys = [ + key for key, value in quick_answer_cache.items() if now - value["timestamp"] >= quick_answer_cache_expiry + ] + for key in expired_keys: + del quick_answer_cache[key] + + if len(quick_answer_cache) >= quick_answer_cache_max_keys: + sorted_keys = sorted(quick_answer_cache.keys(), key=lambda k: quick_answer_cache[k]["timestamp"]) + for key in sorted_keys[: len(quick_answer_cache) - quick_answer_cache_max_keys + 1]: + del quick_answer_cache[key] + + # Prevent re-generation of LLM responses when navigating to/from results pages + query_hash = hashlib.sha256((token + model + user + system).encode("utf-8")).hexdigest() + cached_response = quick_answer_cache.get(query_hash) + if cached_response and datetime.now() - cached_response["timestamp"] < quick_answer_cache_expiry: + return Response(cached_response["content"], mimetype="text/html") + + def stream_response(): + try: + with httpx.stream( + method="POST", + url=settings["quick_answer_api"], + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "provider": {"order": providers}, + "stream": True, + "messages": [ + {"role": "system", "content": system}, + { + "role": "user", + "content": user, + }, + ], + }, + ) as resp: + resp.raise_for_status() + + content_buffer = [] + + for line in resp.iter_lines(): + try: + if line.startswith("data: "): + json_str = line[6:] # Remove 'data: ' prefix + if json_str.strip() == "[DONE]": + break + + json_data = json.loads(json_str) + if "choices" in json_data: + content = json_data["choices"][0].get("delta", {}).get("content", "") + if content: + content_buffer.append(content) + yield content + + except json.JSONDecodeError: + continue + except Exception as e: # pylint: disable=broad-except + yield f"Error processing chunk: {str(e)}" + break + + if not any( + error in "".join(content_buffer) for error in ["API request failed", "Error processing chunk"] + ): + quick_answer_cache[query_hash] = { + "content": "".join(content_buffer), + "timestamp": datetime.now(), + } + + except Exception as e: # pylint: disable=broad-except + yield f"API request failed: {str(e)}" + + return Response(stream_response(), mimetype="text/html") + + @app.errorhandler(404) def page_not_found(_e): return render('404.html'), 404