initial implementation

2025-03-16 13:08:45 -07:00 · 2025-03-16 13:08:45 -07:00 · eaa96a0b64
commit eaa96a0b64
parent a1d5add718
15 changed files with 716 additions and 2 deletions
--- a/client/simple/package.json
+++ b/client/simple/package.json
@ -35,6 +35,8 @@
    "webpack-cli": "^6.0.1"
  },
  "dependencies": {
-    "autocomplete-js": "^2.7.1"
+    "autocomplete-js": "^2.7.1",
+    "katex": "^0.16.19",
+    "marked": "^15.0.6"
  }
 }
--- a/client/simple/src/js/main/quick-answer.js
+++ b/client/simple/src/js/main/quick-answer.js
@ -0,0 +1,280 @@
+import { marked } from "../../../node_modules/marked/lib/marked.esm.js";
+import renderMathInElement from "../../../node_modules/katex/dist/contrib/auto-render.js";
+
+document.addEventListener("DOMContentLoaded", () => {
+  if (typeof window.referenceMap === "undefined") {
+    console.error("referenceMap is not defined");
+    return;
+  }
+
+  marked.setOptions({
+    gfm: true,
+    breaks: true,
+    highlight: function (code, language) {
+      if (language) {
+        return (
+          '<pre><code class="language-' +
+          language +
+          '">' +
+          code.replace(
+            /[&<>'"]/g,
+            (c) =>
+              ({
+                "&": "&amp;",
+                "<": "&lt;",
+                ">": "&gt;",
+                "'": "&#39;",
+                '"': "&quot;",
+              })[c],
+          ) +
+          "</code></pre>"
+        );
+      }
+      return code;
+    },
+  });
+
+  const renderer = new marked.Renderer();
+  renderer.link = (token) => {
+    const href = token.href;
+    const title = token.title || "";
+    const text = token.text;
+    return `<a href="${href}" title="${title}" rel="noreferrer">${text}</a>`;
+  };
+  marked.use({ renderer });
+
+  // Custom math handling; we roll our own because LLMs are inconsistent(!)
+  const mathExtension = {
+    name: "math",
+    level: "block",
+    start(src) {
+      return src.match(/\$\$/)?.index;
+    },
+    tokenizer(src) {
+      const rule = /^\$\$([\s\S]+?)\$\$/;
+      const match = rule.exec(src);
+      if (match) {
+        return {
+          type: "math",
+          raw: match[0],
+          text: match[1].trim(),
+        };
+      }
+    },
+    renderer(token) {
+      return `$$${token.text}$$`;
+    },
+  };
+
+  const inlineMathExtension = {
+    name: "inlineMath",
+    level: "inline",
+    start(src) {
+      return src.match(/\$/)?.index;
+    },
+    tokenizer(src) {
+      const rule = /^\$([^$\n]+?)\$/;
+      const match = rule.exec(src);
+      if (match) {
+        return {
+          type: "inlineMath",
+          raw: match[0],
+          text: match[1].trim(),
+        };
+      }
+    },
+    renderer(token) {
+      return `$${token.text}$`;
+    },
+  };
+
+  marked.use({ extensions: [mathExtension, inlineMathExtension] });
+
+  // Answer
+  const qa = document.querySelector(".infobox p");
+  if (!qa) {
+    console.error("Quick answer container not found");
+    return;
+  }
+  qa.id = "quick-answer";
+  qa.className = "markdown-content";
+
+  // References
+  const refContainer = document.createElement("div");
+  refContainer.className = "references";
+  const refHeading = document.createElement("h4");
+  refHeading.textContent = "References";
+  refContainer.appendChild(refHeading);
+  const refList = document.createElement("ol");
+  refContainer.appendChild(refList);
+  qa.after(refContainer);
+
+  let accumulatedText = "";
+  let lastProcessedLength = 0;
+  let references = {};
+  let referenceCounter = 1;
+  let referenceMap = window.referenceMap;
+
+  function escapeHtml(unsafe) {
+    return unsafe.replace(/[&<>"']/g, function (m) {
+      switch (m) {
+      case "&":
+        return "&amp;";
+      case "<":
+        return "&lt;";
+      case ">":
+        return "&gt;";
+      case '"':
+        return "&quot;";
+      case "'":
+        return "&#039;";
+      default:
+        return m;
+      }
+    });
+  }
+
+  function replaceCitations(text) {
+    // First pass: replace citations with temporary markers to be replaced by actual spaces in the second pass
+    // LLMs do not consistently follow prompted formatting, and inline citations *need* to be space-delimited
+    let processedText = text.replace(
+      /【(\d+)】/g,
+      (match, citationIndex, offset) => {
+        const isFollowedByCitation = text
+          .slice(offset + match.length)
+          .match(/^【\d+】/);
+        const source = referenceMap[citationIndex];
+
+        if (source) {
+          const [url, title] = source;
+          let refNumber = references[citationIndex];
+          const escapedTitle = escapeHtml(title);
+
+          if (!refNumber) {
+            const refItem = document.createElement("li");
+            const refLink = document.createElement("a");
+            refLink.href = url;
+            refLink.textContent = title;
+            refLink.rel = "noreferrer";
+            refItem.appendChild(refLink);
+            refList.appendChild(refItem);
+            references[citationIndex] = referenceCounter;
+            refNumber = referenceCounter;
+            referenceCounter += 1;
+          }
+
+          // Add look-ahead marker |||CITATION_SPACE||| if followed by another citation
+          return `<a href="${url}" class="inline-reference" title="${escapedTitle}">${refNumber}</a>${
+            isFollowedByCitation ? "|||CITATION_SPACE|||" : ""
+          }`;
+        }
+        return match;
+      },
+    );
+
+    // Second pass: replace temporary markers with spaces
+    return processedText.replace(/\|\|\|CITATION_SPACE\|\|\|/g, " ");
+  }
+
+  fetch("/quick_answer", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      system: window.systemPrompt,
+      user: window.userPrompt,
+      token: window.userToken,
+      model: window.userModel,
+      providers: window.userProviders,
+    }),
+  })
+    .then((response) => {
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+
+      function processMarkdownChunk(text) {
+        accumulatedText += text;
+
+        const markdownElements = {
+          codeBlock: { start: "```", end: "```" },
+          bold: { start: "**", end: "**" },
+          italic: { start: "_", end: "_" },
+          link: { start: "[", end: ")" },
+          mathDisplay: { start: "$$", end: "$$" },
+          mathInline: { start: "$", end: "$" },
+        };
+
+        let processUpTo = accumulatedText.length;
+
+        // Find last complete element
+        for (const element of Object.values(markdownElements)) {
+          const lastStart = accumulatedText.lastIndexOf(element.start);
+          if (lastStart > lastProcessedLength) {
+            const nextEnd = accumulatedText.indexOf(
+              element.end,
+              lastStart + element.start.length,
+            );
+            if (nextEnd === -1) {
+              processUpTo = Math.min(processUpTo, lastStart);
+            }
+          }
+        }
+
+        // Process complete portion
+        if (processUpTo > lastProcessedLength) {
+          const processedText = replaceCitations(
+            accumulatedText.substring(0, processUpTo),
+          );
+          qa.innerHTML = marked.parse(processedText);
+
+          renderMathInElement(qa, {
+            delimiters: [
+              { left: "$$", right: "$$", display: true },
+              { left: "$", right: "$", display: false },
+            ],
+            throwOnError: false,
+          });
+
+          lastProcessedLength = processUpTo;
+        }
+      }
+
+      function readStream() {
+        reader
+          .read()
+          .then(({ done, value }) => {
+            if (done) {
+              // Process any remaining text
+              if (accumulatedText.length > lastProcessedLength) {
+                const processedText = replaceCitations(accumulatedText);
+                qa.innerHTML = marked.parse(processedText);
+                renderMathInElement(qa, {
+                  delimiters: [
+                    { left: "$$", right: "$$", display: true },
+                    { left: "$", right: "$", display: false },
+                  ],
+                  throwOnError: false,
+                });
+              }
+              return;
+            }
+
+            const text = decoder.decode(value, { stream: true });
+            processMarkdownChunk(text);
+
+            // Scroll to bottom of the div to show new content
+            qa.scrollTop = qa.scrollHeight;
+
+            // Continue reading
+            readStream();
+          })
+          .catch((error) => console.error("Error:", error));
+      }
+      readStream();
+    })
+    .catch((error) => {
+      console.error("Error:", error);
+      qa.innerHTML = marked.parse(`**Error**: ${error.message}`);
+    });
+});
--- a/client/simple/src/js/searxng.js
+++ b/client/simple/src/js/searxng.js
@ -5,3 +5,4 @@ import "./main/mapresult.js";
 import "./main/preferences.js";
 import "./main/results.js";
 import "./main/search.js";
+import "./main/quick-answer.js";
--- a/client/simple/src/less/definitions.less
+++ b/client/simple/src/less/definitions.less
@ -130,6 +130,14 @@
  // Favicons Colors
  --color-favicon-background-color: #ddd;
  --color-favicon-border-color: #ccc;
+
+  /// Quick Answer Colors
+  --color-quick-answer-code-background: rgb(27 31 35 / 5%);
+  --color-quick-answer-pre-background: #f6f8fa;
+  --color-quick-answer-blockquote-border: #dfe2e5;
+  --color-quick-answer-blockquote-font: #6a737d;
+  --color-quick-answer-table-border: #dfe2e5;
+  --color-quick-answer-table-tr-background: #f6f8fa;
 }

 .dark-themes() {
@ -249,6 +257,14 @@
  // Favicons Colors
  --color-favicon-background-color: #ddd;
  --color-favicon-border-color: #ccc;
+
+  /// Quick Answer Colors
+  --color-quick-answer-code-background: #4d5a6f;
+  --color-quick-answer-pre-background: #4d5a6f;
+  --color-quick-answer-blockquote-border: #555;
+  --color-quick-answer-blockquote-font: #bbb;
+  --color-quick-answer-table-border: #555;
+  --color-quick-answer-table-tr-background: #4d5a6f;
 }

 .black-themes() {
--- a/client/simple/src/less/quick-answer.less
+++ b/client/simple/src/less/quick-answer.less
@ -0,0 +1,83 @@
+#sidebar .infobox .markdown-content code {
+  background-color: var(--color-quick-answer-code-background);
+  border-radius: 3px;
+  font-family:
+    SFMono-Regular, Consolas, "Liberation Mono", Menlo, Courier, monospace;
+  font-size: 85%;
+  margin: 0;
+  padding: 0.2em 0.4em;
+}
+
+#sidebar .infobox .markdown-content pre {
+  background-color: var(--color-quick-answer-pre-background);
+  border-radius: 3px;
+  font-size: 85%;
+  overflow: auto;
+  padding: 16px;
+}
+
+#sidebar .infobox .markdown-content pre > code {
+  background-color: transparent;
+  border: 0;
+  font-size: 100%;
+  margin: 0;
+  padding: 0;
+  white-space: pre;
+  word-break: normal;
+}
+
+#sidebar .infobox .markdown-content blockquote {
+  border-left: 0.25em solid var(--color-quick-answer-blockquote-border);
+  color: var(--color-quick-answer-blockquote-font);
+  margin: 0;
+  padding: 0 1em;
+}
+
+#sidebar .infobox .markdown-content a.inline-reference {
+  color: var(--color-result-link-font);
+  line-height: 1.4;
+  position: relative;
+  top: -0.2em;
+  vertical-align: top;
+  font-size: smaller;
+}
+
+#sidebar .infobox .markdown-content table {
+  border-collapse: collapse;
+  margin: 1em 0;
+  width: 100%;
+}
+
+#sidebar .infobox .markdown-content table th,
+#sidebar .infobox .markdown-content table td {
+  border: 1px solid var(--color-quick-answer-table-border);
+  padding: 6px 13px;
+}
+
+#sidebar .infobox .markdown-content table tr:nth-child(2n) {
+  background-color: var(--color-quick-answer-table-tr-background);
+}
+
+#sidebar .infobox .markdown-content h1,
+#sidebar .infobox .markdown-content h2,
+#sidebar .infobox .markdown-content h3,
+#sidebar .infobox .markdown-content h4 {
+  font-size: 1em;
+}
+
+#sidebar .infobox .references a {
+  text-decoration: underline;
+}
+
+#sidebar .infobox .references ol {
+  list-style-type: decimal;
+  list-style-position: inside;
+  margin-left: 0;
+  padding-left: 0;
+}
+
+#sidebar .infobox .references ol li {
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
--- a/client/simple/src/less/style.less
+++ b/client/simple/src/less/style.less
@ -32,6 +32,9 @@
 // to center the results
@import "style-center.less";

+// quick-answer plugin
+@import "quick-answer.less";
+
 // sxng-icon-set
 .sxng-icon-set {
  display: inline-block;
--- a/client/simple/vite.config.js
+++ b/client/simple/vite.config.js
@ -22,6 +22,8 @@ const PATH = {
  brand: "src/brand",
  static: resolve(ROOT, "client/simple/static"),
  leaflet: resolve(ROOT, "client/simple/node_modules/leaflet/dist"),
+  katex: resolve(ROOT, "client/simple/node_modules/katex"),
+  marked: resolve(ROOT, "client/simple/node_modules/marked"),
  templates: resolve(ROOT, "searx/templates/simple"),
 };

@ -133,6 +135,18 @@ export default defineConfig({
        { src: PATH.leaflet + "/leaflet.{js,js.map}", dest: PATH.dist + "/js" },
        { src: PATH.leaflet + "/images/*.png", dest: PATH.dist + "/css/images/" },
        { src: PATH.leaflet + "/*.{css,css.map}", dest: PATH.dist + "/css" },
+      ]
+    }),
+
+    // Quick Answer (KaTeX + Marked)
+
+    viteStaticCopy({
+      targets: [
+        { src: PATH.katex + "/dist/katex.js", dest: PATH.dist + "/js" },
+        { src: PATH.katex + "/contrib/auto-render/auto-render.js", dest: PATH.dist + "/js" },
+        { src: PATH.katex + "/dist/katex.css", dest: PATH.dist + "/css" },
+        { src: PATH.katex + "/dist/fonts/*.{ttf,woff,woff2}", dest: PATH.dist + "/css/fonts/" },
+        { src: PATH.marked + "/lib/marked.esm.js", dest: PATH.dist + "/js" },
        { src: PATH.static + "/**/*", dest: PATH.dist },
      ]
    }),
--- a/docs/admin/settings/settings_ui.rst
+++ b/docs/admin/settings/settings_ui.rst
@ -72,3 +72,6 @@

 ``url_formatting``:
  Formatting type to use for result URLs: ``pretty``, ``full`` or ``host``.
+
+``quick_answer_api``:
+  Quick Answer OpenAI-compatible API endpoint to query for search-supported LLM responses.
--- a/searx/plugins/quick_answer.py
+++ b/searx/plugins/quick_answer.py
@ -0,0 +1,113 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# pylint: disable=missing-module-docstring, missing-class-docstring
+import json
+from datetime import datetime
+
+from flask_babel import gettext
+
+from searx.plugins import Plugin, PluginInfo
+
+
+class SXNGPlugin(Plugin):
+    id = "quick_answer"
+    default_on = False
+
+    def __init__(self):
+        super().__init__()
+
+        self.info = PluginInfo(
+            id=self.id,
+            name=gettext("Quick Answer"),
+            description=gettext("Use search results to obtain cited answers from LLMs by appending '?' to queries"),
+            examples=["Linear congruential generator?"],
+            preference_section="general/quick_answer",
+        )
+
+    def get_sys_prompt(self):
+        now = datetime.now()
+        return f"""
+        The current date is {now:%Y-%m-%d}
+
+        You ALWAYS follow these guidelines when writing your response:
+        - Use markdown formatting to enhance clarity and readability of your response.
+        - If you need to include mathematical expressions, use LaTeX to format them properly. Only use LaTeX when necessary for math.
+        - Delimit inline mathematical expressions with '$', for example: $y = mx + b$.
+        - Delimit block mathematical expressions with '$$', for example: $$F = ma$$.
+        - If you need to write code or program commands, format them as markdown code blocks.
+        - For all other output, use plain text formatting unless the user specifically requests otherwise.
+        - DO NOT include headers which only describe or rephrase the query before beginning your response.
+        - DO NOT include URLs or links in your response.
+        - ALWAYS enclose currency and price values in '**', for example: **$5.99**, to ensure they are formatted correctly.
+
+        The relevant available information is contained within the <information></information> tags. When a user asks a question, perform the following tasks:
+        0. Examine the available information and assess whether you can answer the question based on it, even if the answer is not explicitly stated. For example, if the question asks about a specific feature of a product and the available information discusses the product's features without mentioning the specific feature, you can infer that the product likely does not have that feature.
+        1. Use the available information to inform your answer.
+        2. When answering questions, provide inline citation references by putting their citation index delimited by 【 and 】 at end of sentence, example: This is a claim【1】."
+        3. If you need to cite multiple pieces of information inline, use separate 【 and 】 for each citation, example: "This is a claim【1】【2】."
+        4. Use citations most relevant to the query to augment your answer with informative supportive resources; do not create unhelpful, extended chains of citations.
+        5. DO NOT list URLs/links of the citation source or an aggregate list of citations at the end of the response. They would be automatically added by the system based on citation indices.
+        6. DO NOT provide inline citations inside or around code blocks, as they break formatting of output, only provide them to augment plaintext.
+        7. DO NOT use markdown to format your citations, always provide them in plaintext.
+
+        A few guidelines for you when answering questions:
+        - Highlight relevant entities/phrases with **, for example: "**Neil Armstrong** is known as the first person to land on the moon." (Do not apply this guideline to citations or in code blocks.)
+        - DO NOT talk about how you based your answer on the information provided to you as it may confuse the user.
+        - Don't copy-paste the information from the available information directly. Paraphrase the information in your own words.
+        - Even if the information is in another format, your output MUST follow the guidelines. for example: output O₁ instead of O<sub>1</sub>, output R⁷ instead of R<sup>7</sup>, etc.
+        - Be concise and informative in your answers.
+        """
+
+    def format_sources(self, sources):
+        ret = "<available_information>\n"
+        for pos, source in enumerate(sources):
+            ret += "<datum>\n"
+            ret += f'<citation index="{pos}">\n'
+            ret += f"<source>\n{source.get('url', '')}\n</source>\n"
+            ret += f"<title>\n{source.get('title', '')}\n</title>\n"
+            ret += f"<content>\n{source.get('content', '')}\n</content>\n"
+            ret += "</datum>\n"
+
+        return ret + "</available_information>"
+
+    def post_search(self, request, search):
+        query = search.search_query
+        if query.pageno > 1 or not query.query.endswith("?"):
+            return
+
+        token = request.preferences.get_value("quick_answer_token")
+        if not token:
+            return
+
+        model = request.preferences.get_value("quick_answer_model")
+        providers = request.preferences.get_value("quick_answer_providers")
+        if providers:
+            providers = [provider.strip() for provider in providers.split(",")]
+
+        sources = search.result_container.get_ordered_results()
+        formatted_sources = self.format_sources(sources)
+        user = formatted_sources + f"\n\nUser query: {query.query}"
+        system = self.get_sys_prompt()
+
+        reference_map = {str(i): (source.get("url"), source.get("title")) for i, source in enumerate(sources)}
+
+        search.result_container.infoboxes.append(
+            {
+                "infobox": "Quick Answer",
+                "id": "quick_answer",
+                "content": f"""
+                <script>
+                    window.systemPrompt = {json.dumps(system)};
+                    window.userPrompt = {json.dumps(user)};
+                    window.userToken = {json.dumps(token)};
+                    window.userModel = {json.dumps(model)};
+                    window.userProviders = {json.dumps(providers)};
+                    window.referenceMap = {json.dumps(reference_map)};
+                </script>
+            """,
+            }
+        )
+
+    name = gettext("Quick Answer")
+    description = gettext("Use search results to obtain cited answers from LLMs by appending '?' to queries")
+    default_on = False
+    preference_section = "general"
--- a/searx/preferences.py
+++ b/searx/preferences.py
@ -76,6 +76,26 @@ class Setting:
 class StringSetting(Setting):
    """Setting of plain string values"""

+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.value = ""
+
+    def get_value(self):
+        return self.value
+
+    def parse(self, data: str):
+        self.value = data
+
+    def parse_form(self, data: str):
+        if self.locked:
+            return
+
+        self.value = data
+
+    def save(self, name: str, resp: flask.Response):
+        """Save cookie ``name`` in the HTTP response object"""
+        resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
+

 class EnumStringSetting(Setting):
    """Setting of a value which can only come from the given choices"""
@ -132,6 +152,35 @@ class MultipleChoiceSetting(Setting):
        resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)


+class ListSetting(Setting):
+    """Setting of values of type ``list`` (ordered comma separated string)"""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.values = []
+
+    def get_value(self):
+        return ",".join(self.values)
+
+    def parse(self, data: str):
+        """Parse and validate ``data`` and store the result at ``self.value``"""
+        if data == "":
+            self.values = []
+            return
+
+        self.values = data.split(",")
+
+    def parse_form(self, data: str):
+        if self.locked:
+            return
+
+        self.values = data.split(",")
+
+    def save(self, name: str, resp: flask.Response):
+        """Save cookie ``name`` in the HTTP response object"""
+        resp.set_cookie(name, ",".join(self.values), max_age=COOKIE_MAX_AGE)
+
+
 class SetSetting(Setting):
    """Setting of values of type ``set`` (comma separated string)"""

@ -479,6 +528,9 @@ class Preferences:
                settings['ui']['url_formatting'],
                choices=['pretty', 'full', 'host']
            ),
+            "quick_answer_token": StringSetting("quick_answer_token"),
+            "quick_answer_model": StringSetting("quick_answer_model"),
+            "quick_answer_providers": ListSetting("quick_answer_providers"),
            # fmt: on
        }

--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -2666,3 +2666,5 @@ doi_resolvers:
  sci-hub.ru: 'https://sci-hub.ru/'

 default_doi_resolver: 'oadoi.org'
+
+quick_answer_api: "https://openrouter.ai/api/v1/chat/completions"
--- a/searx/templates/simple/base.html
+++ b/searx/templates/simple/base.html
@ -20,6 +20,9 @@
  {% if get_setting('server.limiter') or get_setting('server.public_instance') %}
  <link rel="stylesheet" href="{{ url_for('client_token', token=link_token) }}" type="text/css">
  {% endif %}
+  {% if 'Quick Answer' in get_setting('enabled_plugins') %}
+  <link rel="stylesheet" href="{{ url_for('static', filename='css/katex.css') }}" type="text/css">
+  {% endif %}
  <!--[if gte IE 9]>-->
  <script src="{{ url_for('static', filename='js/searxng.head.min.js') }}" client_settings="{{ client_settings }}"></script>
  <!--<![endif]-->
--- a/searx/templates/simple/preferences.html
+++ b/searx/templates/simple/preferences.html
@ -180,8 +180,9 @@
    {% if 'safesearch' not in locked_preferences %}
      {%- include 'simple/preferences/safesearch.html' -%}
    {%- endif -%}
-    {%- include 'simple/preferences/tokens.html' -%}
    {{- plugin_preferences('general') -}}
+    {%- include 'simple/preferences/tokens.html' -%}
+    {%- include 'simple/preferences/quick_answer.html' -%}


    {%- if 'doi_resolver' not in locked_preferences %}
--- a/searx/templates/simple/preferences/quick_answer.html
+++ b/searx/templates/simple/preferences/quick_answer.html
@ -0,0 +1,39 @@
+<div class="pref-group">{{- _('Quick Answer') -}}</div>
+
+{{- plugin_preferences('general/quick_answer') -}}
+
+<fieldset>{{- '' -}}
+  <legend id="quick_answer_token">{{- _('Quick Answer token') -}}</legend>{{- '' -}}
+  <div class="value">{{- '' -}}
+    <input name="quick_answer_token" aria-labelledby="quick_answer_token" type="text"
+           autocomplete="off" spellcheck="false" autocorrect="off"
+           value='{{ preferences.get_value("quick_answer_token") }}'>{{- '' -}}
+  </div>{{- '' -}}
+  <div class="description">
+    {{- _('OpenRouter access token used to authenticate Quick Answer API requests') -}}
+  </div>{{- '' -}}
+</fieldset>{{- '' -}}
+
+<fieldset>{{- '' -}}
+  <legend id="quick_answer_model">{{- _('Quick Answer model') -}}</legend>{{- '' -}}
+  <div class="value">{{- '' -}}
+    <input name="quick_answer_model" aria-labelledby="quick_answer_model" type="text"
+           autocomplete="off" spellcheck="false" autocorrect="off"
+           value='{{ preferences.get_value("quick_answer_model") }}'>{{- '' -}}
+  </div>{{- '' -}}
+  <div class="description">
+    {{- _('OpenRouter LLM used to provide Quick Answers (e.g. meta-llama/llama-3.3-70b-instruct)') -}}
+  </div>{{- '' -}}
+</fieldset>{{- '' -}}
+
+<fieldset>{{- '' -}}
+  <legend id="quick_answer_providers">{{- _('Quick Answer providers') -}}</legend>{{- '' -}}
+  <div class="value">{{- '' -}}
+    <input name="quick_answer_providers" aria-labelledby="quick_answer_providers" type="text"
+           autocomplete="off" spellcheck="false" autocorrect="off"
+           value='{{ preferences.get_value("quick_answer_providers") }}'>{{- '' -}}
+  </div>{{- '' -}}
+  <div class="description">
+    {{- _('List of OpenRouter providers used to supply Quick Answers (e.g. Fireworks,DeepInfra)') -}}
+  </div>{{- '' -}}
+</fieldset>{{- '' -}}
--- a/searx/webapp.py
+++ b/searx/webapp.py
@ -13,6 +13,7 @@ import os
 import sys
 import base64

+from datetime import timedelta, datetime
 from timeit import default_timer
 from html import escape
 from io import StringIO
@ -1327,6 +1328,107 @@ def config():
    )


+# User-scoped cache for quick answer responses
+quick_answer_cache = {}
+quick_answer_cache_max_keys = 1000
+quick_answer_cache_expiry = timedelta(minutes=60)
+
+
+@app.route("/quick_answer", methods=["POST"])
+def quick_answer():
+    """Endpoint to handle LLM requests."""
+    data = sxng_request.get_json()
+    if not data:
+        return "Invalid JSON data", 400
+
+    user = data.get("user")
+    system = data.get("system")
+    token = data.get("token")
+    if not all([user, system, token]):
+        return "Missing required fields", 400
+
+    # These can be unproblematically empty; account defaults are OK
+    model = data.get("model")
+    providers = data.get("providers")
+
+    now = datetime.now()
+    expired_keys = [
+        key for key, value in quick_answer_cache.items() if now - value["timestamp"] >= quick_answer_cache_expiry
+    ]
+    for key in expired_keys:
+        del quick_answer_cache[key]
+
+    if len(quick_answer_cache) >= quick_answer_cache_max_keys:
+        sorted_keys = sorted(quick_answer_cache.keys(), key=lambda k: quick_answer_cache[k]["timestamp"])
+        for key in sorted_keys[: len(quick_answer_cache) - quick_answer_cache_max_keys + 1]:
+            del quick_answer_cache[key]
+
+    # Prevent re-generation of LLM responses when navigating to/from results pages
+    query_hash = hashlib.sha256((token + model + user + system).encode("utf-8")).hexdigest()
+    cached_response = quick_answer_cache.get(query_hash)
+    if cached_response and datetime.now() - cached_response["timestamp"] < quick_answer_cache_expiry:
+        return Response(cached_response["content"], mimetype="text/html")
+
+    def stream_response():
+        try:
+            with httpx.stream(
+                method="POST",
+                url=settings["quick_answer_api"],
+                headers={
+                    "Authorization": f"Bearer {token}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": model,
+                    "provider": {"order": providers},
+                    "stream": True,
+                    "messages": [
+                        {"role": "system", "content": system},
+                        {
+                            "role": "user",
+                            "content": user,
+                        },
+                    ],
+                },
+            ) as resp:
+                resp.raise_for_status()
+
+                content_buffer = []
+
+                for line in resp.iter_lines():
+                    try:
+                        if line.startswith("data: "):
+                            json_str = line[6:]  # Remove 'data: ' prefix
+                            if json_str.strip() == "[DONE]":
+                                break
+
+                            json_data = json.loads(json_str)
+                            if "choices" in json_data:
+                                content = json_data["choices"][0].get("delta", {}).get("content", "")
+                                if content:
+                                    content_buffer.append(content)
+                                    yield content
+
+                    except json.JSONDecodeError:
+                        continue
+                    except Exception as e:  # pylint: disable=broad-except
+                        yield f"Error processing chunk: {str(e)}"
+                        break
+
+                if not any(
+                    error in "".join(content_buffer) for error in ["API request failed", "Error processing chunk"]
+                ):
+                    quick_answer_cache[query_hash] = {
+                        "content": "".join(content_buffer),
+                        "timestamp": datetime.now(),
+                    }
+
+        except Exception as e:  # pylint: disable=broad-except
+            yield f"API request failed: {str(e)}"
+
+    return Response(stream_response(), mimetype="text/html")
+
+
@app.errorhandler(404)
 def page_not_found(_e):
    return render('404.html'), 404