diff --git a/apps/client/src/translations/en/translation.json b/apps/client/src/translations/en/translation.json index e5dcef0d1be..a71d9b8d487 100644 --- a/apps/client/src/translations/en/translation.json +++ b/apps/client/src/translations/en/translation.json @@ -2453,6 +2453,16 @@ "api_key": "API Key", "api_key_placeholder": "Enter your API key", "cancel": "Cancel", + "search_provider_title": "Web Search Providers", + "search_provider_description": "Configure third-party web search engines (e.g. Exa, Tavily, SearXNG). When any provider is configured, the AI agent uses it for web searches with every model. When empty, the AI agent falls back to each LLM provider's built-in web search (Anthropic, OpenAI, Google).", + "add_search_provider": "Add Search Provider", + "add_search_provider_title": "Add Search Provider", + "configured_search_providers": "Configured Search Providers", + "no_search_providers_configured": "No search providers configured. Falling back to built-in web search.", + "delete_search_provider": "Delete", + "delete_search_provider_confirmation": "Are you sure you want to delete the search provider \"{{name}}\"?", + "search_provider_type": "Search provider", + "search_provider_base_url": "Base URL", "mcp_title": "MCP (Model Context Protocol)", "mcp_enabled": "MCP server", "mcp_enabled_description": "Expose a Model Context Protocol (MCP) endpoint so that AI coding assistants (e.g. Claude Code, GitHub Copilot) can read and modify your notes. The endpoint is only accessible from localhost.", diff --git a/apps/client/src/widgets/type_widgets/options/llm.tsx b/apps/client/src/widgets/type_widgets/options/llm.tsx index ce709ef7427..732e18ffe15 100644 --- a/apps/client/src/widgets/type_widgets/options/llm.tsx +++ b/apps/client/src/widgets/type_widgets/options/llm.tsx @@ -9,6 +9,7 @@ import { useTriliumOption, useTriliumOptionBool } from "../../react/hooks"; import OptionsRow, { OptionsRowWithToggle } from "./components/OptionsRow"; import OptionsSection from "./components/OptionsSection"; import AddProviderModal, { type LlmProviderConfig, PROVIDER_TYPES } from "./llm/AddProviderModal"; +import AddSearchProviderModal, { type SearchProviderConfig, SEARCH_PROVIDER_TYPES } from "./llm/AddSearchProviderModal"; export default function LlmSettings() { if (!isExperimentalFeatureEnabled("llm")) { @@ -22,6 +23,7 @@ export default function LlmSettings() { return ( <> + > ); @@ -80,6 +82,102 @@ function ProviderSettings() { ); } +function SearchProviderSettings() { + const [providersJson, setProvidersJson] = useTriliumOption("searchProviders"); + const providers = useMemo(() => { + try { + return providersJson ? JSON.parse(providersJson) : []; + } catch { + return []; + } + }, [providersJson]); + const setProviders = useCallback((newProviders: SearchProviderConfig[]) => { + setProvidersJson(JSON.stringify(newProviders)); + }, [setProvidersJson]); + const [showAddModal, setShowAddModal] = useState(false); + + const handleAddProvider = useCallback((newProvider: SearchProviderConfig) => { + setProviders([...providers, newProvider]); + }, [providers, setProviders]); + + const handleDeleteProvider = useCallback(async (providerId: string, providerName: string) => { + if (!(await dialog.confirm(t("llm.delete_search_provider_confirmation", { name: providerName })))) { + return; + } + setProviders(providers.filter(p => p.id !== providerId)); + }, [providers, setProviders]); + + return ( + + {t("llm.search_provider_description")} + + setShowAddModal(true)} + /> + + + + {t("llm.configured_search_providers")} + + + setShowAddModal(false)} + onSave={handleAddProvider} + /> + + ); +} + +interface SearchProviderListProps { + providers: SearchProviderConfig[]; + onDelete: (providerId: string, providerName: string) => Promise; +} + +function SearchProviderList({ providers, onDelete }: SearchProviderListProps) { + if (!providers.length) { + return {t("llm.no_search_providers_configured")}; + } + + return ( + + + + + {t("llm.provider_name")} + {t("llm.provider_type")} + {t("llm.actions")} + + + + {providers.map((provider) => { + const providerType = SEARCH_PROVIDER_TYPES.find(p => p.id === provider.provider); + return ( + + {provider.name} + {providerType?.name || provider.provider} + + onDelete(provider.id, provider.name)} + /> + + + ); + })} + + + + ); +} + function getMcpEndpointUrl() { const port = window.location.port || (window.location.protocol === "https:" ? "443" : "80"); return `${window.location.protocol}//localhost:${port}/mcp`; diff --git a/apps/client/src/widgets/type_widgets/options/llm/AddSearchProviderModal.tsx b/apps/client/src/widgets/type_widgets/options/llm/AddSearchProviderModal.tsx new file mode 100644 index 00000000000..9d292fb384f --- /dev/null +++ b/apps/client/src/widgets/type_widgets/options/llm/AddSearchProviderModal.tsx @@ -0,0 +1,155 @@ +import { createPortal } from "preact/compat"; +import { useRef, useState } from "preact/hooks"; + +import { t } from "../../../../services/i18n"; +import FormGroup from "../../../react/FormGroup"; +import FormSelect from "../../../react/FormSelect"; +import FormTextBox from "../../../react/FormTextBox"; +import Modal from "../../../react/Modal"; + +export interface SearchProviderConfig { + id: string; + name: string; + provider: string; + apiKey?: string; + baseUrl?: string; +} + +export interface SearchProviderType { + id: string; + name: string; + /** Whether this provider requires an API key. */ + requiresApiKey: boolean; + /** Whether this provider requires a base URL (e.g. self-hosted). */ + requiresBaseUrl: boolean; + apiKeyPlaceholder?: string; + baseUrlPlaceholder?: string; +} + +export const SEARCH_PROVIDER_TYPES: SearchProviderType[] = [ + { + id: "exa", + name: "Exa", + requiresApiKey: true, + requiresBaseUrl: false, + apiKeyPlaceholder: "..." + }, + { + id: "tavily", + name: "Tavily", + requiresApiKey: true, + requiresBaseUrl: false, + apiKeyPlaceholder: "tvly-..." + }, + { + id: "searxng", + name: "SearXNG", + requiresApiKey: false, + requiresBaseUrl: true, + baseUrlPlaceholder: "http://localhost:8888" + } +]; + +interface AddSearchProviderModalProps { + show: boolean; + onHidden: () => void; + onSave: (provider: SearchProviderConfig) => void; +} + +export default function AddSearchProviderModal({ show, onHidden, onSave }: AddSearchProviderModalProps) { + const [selectedProvider, setSelectedProvider] = useState(SEARCH_PROVIDER_TYPES[0].id); + const [apiKey, setApiKey] = useState(""); + const [baseUrl, setBaseUrl] = useState(""); + const formRef = useRef(null); + + const providerType = SEARCH_PROVIDER_TYPES.find(p => p.id === selectedProvider) ?? SEARCH_PROVIDER_TYPES[0]; + const canSubmit = + (!providerType.requiresApiKey || apiKey.trim().length > 0) && + (!providerType.requiresBaseUrl || baseUrl.trim().length > 0); + + function handleSubmit() { + if (!canSubmit) { + return; + } + + const newProvider: SearchProviderConfig = { + id: `${selectedProvider}_${Date.now()}`, + name: providerType.name, + provider: selectedProvider, + ...(providerType.requiresApiKey && { apiKey: apiKey.trim() }), + ...(providerType.requiresBaseUrl && { baseUrl: baseUrl.trim() }) + }; + + onSave(newProvider); + resetForm(); + onHidden(); + } + + function resetForm() { + setSelectedProvider(SEARCH_PROVIDER_TYPES[0].id); + setApiKey(""); + setBaseUrl(""); + } + + function handleCancel() { + resetForm(); + onHidden(); + } + + return createPortal( + + + {t("llm.cancel")} + + + {t("llm.add_search_provider")} + + > + } + > + + + + + {providerType.requiresApiKey && ( + + + + )} + + {providerType.requiresBaseUrl && ( + + + + )} + , + document.body + ); +} diff --git a/apps/server/src/routes/api/options.ts b/apps/server/src/routes/api/options.ts index 3ab6ca0a7e9..67999e18282 100644 --- a/apps/server/src/routes/api/options.ts +++ b/apps/server/src/routes/api/options.ts @@ -116,6 +116,7 @@ const ALLOWED_OPTIONS = new Set([ // LLM options "llmProviders", "mcpEnabled", + "searchProviders", // OCR options "ocrAutoProcessImages", "ocrMinConfidence" diff --git a/apps/server/src/services/llm/providers/base_provider.ts b/apps/server/src/services/llm/providers/base_provider.ts index 5000b4fcbfd..0c15a28d88b 100644 --- a/apps/server/src/services/llm/providers/base_provider.ts +++ b/apps/server/src/services/llm/providers/base_provider.ts @@ -9,6 +9,8 @@ import { generateText, type ModelMessage, stepCountIs, streamText, type ToolSet import yaml from "js-yaml"; import becca from "../../../becca/becca.js"; +import { getFirstSearchProvider } from "../../search_providers/index.js"; +import { addConfiguredSearchTool } from "../../search_providers/tool.js"; import { getSkillsSummary } from "../skills/index.js"; import { getNoteMeta,SYSTEM_PROMPT_LIMITS } from "../tools/helpers.js"; import { allToolRegistries } from "../tools/index.js"; @@ -157,7 +159,14 @@ export abstract class BaseProvider implements LlmProvider { const tools: ToolSet = {}; if (config.enableWebSearch) { - this.addWebSearchTool(tools); + // Prefer a user-configured pluggable search provider (Exa/Tavily/SearXNG/…); + // otherwise fall back to each LLM provider's built-in web search. + const configuredSearch = getFirstSearchProvider(); + if (configuredSearch) { + addConfiguredSearchTool(tools, configuredSearch); + } else { + this.addWebSearchTool(tools); + } } if (config.enableNoteTools) { diff --git a/apps/server/src/services/options_init.ts b/apps/server/src/services/options_init.ts index 1437a836ffb..9a60514c148 100644 --- a/apps/server/src/services/options_init.ts +++ b/apps/server/src/services/options_init.ts @@ -257,6 +257,7 @@ const defaultOptions: DefaultOption[] = [ // AI / LLM { name: "llmProviders", value: "[]", isSynced: true }, { name: "mcpEnabled", value: "false", isSynced: false }, + { name: "searchProviders", value: "[]", isSynced: true }, // OCR options { name: "ocrAutoProcessImages", value: "false", isSynced: true }, diff --git a/apps/server/src/services/search_providers/base_search_provider.ts b/apps/server/src/services/search_providers/base_search_provider.ts new file mode 100644 index 00000000000..7a66a70da60 --- /dev/null +++ b/apps/server/src/services/search_providers/base_search_provider.ts @@ -0,0 +1,60 @@ +/** + * Shared interface and types for pluggable web search providers used by the LLM agent. + * + * Each search provider implementation wraps a third-party search API and returns a + * unified {@link SearchResult} array so the LLM tool layer can remain provider-agnostic. + */ + +/** Normalised search result returned by all providers. */ +export interface SearchResult { + title: string; + url: string; + /** Short extract of the page (provider-chosen: highlights, summary or truncated body). */ + snippet: string; + publishedDate?: string; + author?: string; +} + +/** Optional search parameters understood by all providers. Providers ignore unsupported fields. */ +export interface SearchOptions { + numResults?: number; + includeDomains?: string[]; + excludeDomains?: string[]; + /** ISO-8601 date, e.g. "2025-01-01T00:00:00.000Z" */ + startPublishedDate?: string; + /** ISO-8601 date */ + endPublishedDate?: string; + /** Provider-specific category hint (Exa: company, research paper, news, ...). */ + category?: string; +} + +/** Implemented by every concrete search provider. */ +export interface SearchProvider { + /** Human-readable provider name shown to the LLM and in logs (e.g. "Exa", "Tavily"). */ + name: string; + search(query: string, options?: SearchOptions): Promise; +} + +/** + * User-supplied configuration for one search-provider instance, stored as JSON in the + * {@code searchProviders} option. Shape mirrors {@code LlmProviderSetup} so the same UI + * patterns (multiple named instances, optional API key and base URL) can be reused. + */ +export interface SearchProviderSetup { + id: string; + name: string; + /** Provider type id, e.g. "exa", "tavily", "searxng". */ + provider: string; + /** API key, required by providers like Exa and Tavily. */ + apiKey?: string; + /** Custom endpoint, required by providers like SearXNG. */ + baseUrl?: string; +} + +export const DEFAULT_MAX_RESULTS = 5; +export const DEFAULT_TIMEOUT_MS = 15_000; + +export abstract class BaseSearchProvider implements SearchProvider { + abstract name: string; + abstract search(query: string, options?: SearchOptions): Promise; +} diff --git a/apps/server/src/services/search_providers/exa.spec.ts b/apps/server/src/services/search_providers/exa.spec.ts new file mode 100644 index 00000000000..b75cf49327d --- /dev/null +++ b/apps/server/src/services/search_providers/exa.spec.ts @@ -0,0 +1,173 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { ExaSearchProvider } from "./exa.js"; + +function mockFetchOnce(body: unknown, init: { ok?: boolean; status?: number; statusText?: string } = {}) { + const fetchMock = vi.fn(async () => ({ + ok: init.ok ?? true, + status: init.status ?? 200, + statusText: init.statusText ?? "OK", + json: async () => body, + text: async () => (typeof body === "string" ? body : JSON.stringify(body)) + } as unknown as Response)); + vi.stubGlobal("fetch", fetchMock); + return fetchMock; +} + +describe("ExaSearchProvider", () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it("throws when constructed without an API key", () => { + expect(() => new ExaSearchProvider("")).toThrow(/API key is required/); + }); + + it("sends the x-exa-integration header and requests highlights/summary/text", async () => { + const fetchMock = mockFetchOnce({ results: [] }); + const provider = new ExaSearchProvider("test-key"); + + await provider.search("foo bar"); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + expect(url).toBe("https://api.exa.ai/search"); + const headers = init.headers as Record; + expect(headers["x-exa-integration"]).toBe("trilium"); + expect(headers["x-api-key"]).toBe("test-key"); + + const body = JSON.parse(init.body as string); + expect(body.query).toBe("foo bar"); + expect(body.type).toBe("auto"); + expect(body.contents).toMatchObject({ + highlights: expect.any(Object), + summary: true, + text: expect.any(Object) + }); + }); + + it("passes numResults, domain and date filters through unchanged", async () => { + const fetchMock = mockFetchOnce({ results: [] }); + const provider = new ExaSearchProvider("test-key"); + + await provider.search("ai news", { + numResults: 7, + includeDomains: ["wired.com"], + excludeDomains: ["example.com"], + startPublishedDate: "2025-01-01T00:00:00.000Z", + endPublishedDate: "2025-06-01T00:00:00.000Z", + category: "news" + }); + + const [, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + const body = JSON.parse(init.body as string); + expect(body.numResults).toBe(7); + expect(body.includeDomains).toEqual(["wired.com"]); + expect(body.excludeDomains).toEqual(["example.com"]); + expect(body.startPublishedDate).toBe("2025-01-01T00:00:00.000Z"); + expect(body.endPublishedDate).toBe("2025-06-01T00:00:00.000Z"); + expect(body.category).toBe("news"); + }); + + it("uses highlights when present", async () => { + mockFetchOnce({ + results: [ + { + title: "Example", + url: "https://example.com", + highlights: ["first important sentence", "second context sentence"], + summary: "should be ignored", + text: "full body text that should also be ignored" + } + ] + }); + const provider = new ExaSearchProvider("test-key"); + + const results = await provider.search("q"); + expect(results).toHaveLength(1); + expect(results[0].snippet).toBe("first important sentence … second context sentence"); + }); + + it("falls back to summary when highlights are missing", async () => { + mockFetchOnce({ + results: [ + { + title: "Example", + url: "https://example.com", + summary: "summary sentence describing the page", + text: "full body text" + } + ] + }); + const provider = new ExaSearchProvider("test-key"); + + const [result] = await provider.search("q"); + expect(result.snippet).toBe("summary sentence describing the page"); + }); + + it("falls back to text when both highlights and summary are missing and truncates to 500 chars", async () => { + const longText = "x".repeat(900); + mockFetchOnce({ + results: [ + { + title: "Example", + url: "https://example.com", + text: longText + } + ] + }); + const provider = new ExaSearchProvider("test-key"); + + const [result] = await provider.search("q"); + expect(result.snippet).toHaveLength(501); + expect(result.snippet.endsWith("…")).toBe(true); + }); + + it("returns an empty snippet when no content fields are present", async () => { + mockFetchOnce({ + results: [{ title: "No content", url: "https://example.com" }] + }); + const provider = new ExaSearchProvider("test-key"); + + const [result] = await provider.search("q"); + expect(result.snippet).toBe(""); + expect(result.title).toBe("No content"); + expect(result.url).toBe("https://example.com"); + }); + + it("propagates publishedDate and author when present", async () => { + mockFetchOnce({ + results: [ + { + title: "Example", + url: "https://example.com", + summary: "summary", + publishedDate: "2025-03-15", + author: "Jane Doe" + } + ] + }); + const provider = new ExaSearchProvider("test-key"); + + const [result] = await provider.search("q"); + expect(result.publishedDate).toBe("2025-03-15"); + expect(result.author).toBe("Jane Doe"); + }); + + it("throws a descriptive error on non-2xx responses", async () => { + mockFetchOnce("unauthorized", { ok: false, status: 401 }); + const provider = new ExaSearchProvider("test-key"); + + await expect(provider.search("q")).rejects.toThrow(/Exa search failed: 401/); + }); + + it("handles a response with no results field", async () => { + mockFetchOnce({}); + const provider = new ExaSearchProvider("test-key"); + + await expect(provider.search("q")).resolves.toEqual([]); + }); +}); diff --git a/apps/server/src/services/search_providers/exa.ts b/apps/server/src/services/search_providers/exa.ts new file mode 100644 index 00000000000..3ed3e3b050e --- /dev/null +++ b/apps/server/src/services/search_providers/exa.ts @@ -0,0 +1,98 @@ +import { + BaseSearchProvider, + DEFAULT_MAX_RESULTS, + DEFAULT_TIMEOUT_MS, + type SearchOptions, + type SearchResult +} from "./base_search_provider.js"; + +interface ExaResult { + title: string; + url: string; + publishedDate?: string | null; + author?: string | null; + text?: string; + highlights?: string[]; + summary?: string; +} + +interface ExaSearchResponse { + results?: ExaResult[]; +} + +/** + * Exa search provider (https://exa.ai). Uses POST /search with the unified `contents` + * field to fetch highlights, a summary and a short text extract in a single request. + */ +export class ExaSearchProvider extends BaseSearchProvider { + name = "Exa"; + + constructor(private readonly apiKey: string, private readonly timeoutMs: number = DEFAULT_TIMEOUT_MS) { + super(); + if (!apiKey) { + throw new Error("API key is required for Exa search provider"); + } + } + + async search(query: string, options: SearchOptions = {}): Promise { + const body: Record = { + query, + type: "auto", + numResults: options.numResults ?? DEFAULT_MAX_RESULTS, + contents: { + highlights: { numSentences: 3, highlightsPerUrl: 3 }, + summary: true, + text: { maxCharacters: 500 } + } + }; + + if (options.category) body.category = options.category; + if (options.includeDomains?.length) body.includeDomains = options.includeDomains; + if (options.excludeDomains?.length) body.excludeDomains = options.excludeDomains; + if (options.startPublishedDate) body.startPublishedDate = options.startPublishedDate; + if (options.endPublishedDate) body.endPublishedDate = options.endPublishedDate; + + const response = await fetch("https://api.exa.ai/search", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": this.apiKey, + "x-exa-integration": "trilium" + }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(this.timeoutMs) + }); + + if (!response.ok) { + const errorBody = await response.text().catch(() => ""); + throw new Error(`Exa search failed: ${response.status} ${errorBody}`.trim()); + } + + const data = (await response.json()) as ExaSearchResponse; + return (data.results ?? []).map(toSearchResult); + } +} + +/** + * Exa returns any combination of highlights/summary/text on a result. Pick the most + * useful snippet in priority order so the LLM sees meaningful context even when some + * fields are absent (e.g. highlights missing for a short page). + */ +function toSearchResult(r: ExaResult): SearchResult { + let snippet = ""; + if (r.highlights && r.highlights.length > 0) { + snippet = r.highlights.join(" … "); + } else if (r.summary) { + snippet = r.summary; + } else if (r.text) { + snippet = r.text.length > 500 ? `${r.text.slice(0, 500)}…` : r.text; + } + + return { + title: r.title, + url: r.url, + snippet, + publishedDate: r.publishedDate ?? undefined, + author: r.author ?? undefined + }; +} diff --git a/apps/server/src/services/search_providers/index.spec.ts b/apps/server/src/services/search_providers/index.spec.ts new file mode 100644 index 00000000000..cedeecc5e1b --- /dev/null +++ b/apps/server/src/services/search_providers/index.spec.ts @@ -0,0 +1,108 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +vi.mock("../options.js", () => ({ + default: { + getOptionOrNull: vi.fn() + } +})); + +vi.mock("../log.js", () => ({ + default: { + error: vi.fn(), + info: vi.fn() + } +})); + +import optionService from "../options.js"; +import { + clearSearchProviderCache, + getConfiguredSearchProviders, + getFirstSearchProvider, + getSearchProvider, + hasConfiguredSearchProviders +} from "./index.js"; + +const getOption = vi.mocked(optionService.getOptionOrNull); + +describe("search_providers registry", () => { + beforeEach(() => { + clearSearchProviderCache(); + getOption.mockReset(); + }); + afterEach(() => { + getOption.mockReset(); + }); + + it("returns [] and reports no providers when option is empty", () => { + getOption.mockReturnValue(null); + expect(getConfiguredSearchProviders()).toEqual([]); + expect(hasConfiguredSearchProviders()).toBe(false); + expect(getFirstSearchProvider()).toBeNull(); + }); + + it("returns [] gracefully on malformed JSON", () => { + getOption.mockReturnValue("not json{"); + expect(getConfiguredSearchProviders()).toEqual([]); + expect(getFirstSearchProvider()).toBeNull(); + }); + + it("parses a configured Exa provider and instantiates it", () => { + getOption.mockReturnValue(JSON.stringify([ + { id: "exa_1", name: "Exa", provider: "exa", apiKey: "k" } + ])); + + const first = getFirstSearchProvider(); + expect(first).not.toBeNull(); + expect(first?.name).toBe("Exa"); + }); + + it("returns the requested provider when looked up by id", () => { + getOption.mockReturnValue(JSON.stringify([ + { id: "tav_1", name: "Tavily", provider: "tavily", apiKey: "tk" }, + { id: "exa_1", name: "Exa", provider: "exa", apiKey: "ek" } + ])); + + const byId = getSearchProvider("exa_1"); + expect(byId?.name).toBe("Exa"); + + const first = getSearchProvider(); + expect(first?.name).toBe("Tavily"); + }); + + it("returns null when the configured provider type is unknown", () => { + getOption.mockReturnValue(JSON.stringify([ + { id: "mystery_1", name: "Mystery", provider: "mystery", apiKey: "k" } + ])); + + expect(getFirstSearchProvider()).toBeNull(); + }); + + it("caches instantiated providers across calls", () => { + getOption.mockReturnValue(JSON.stringify([ + { id: "exa_1", name: "Exa", provider: "exa", apiKey: "k" } + ])); + + const a = getFirstSearchProvider(); + const b = getFirstSearchProvider(); + expect(a).toBe(b); + }); + + it("clearSearchProviderCache forces re-instantiation", () => { + getOption.mockReturnValue(JSON.stringify([ + { id: "exa_1", name: "Exa", provider: "exa", apiKey: "k" } + ])); + + const a = getFirstSearchProvider(); + clearSearchProviderCache(); + const b = getFirstSearchProvider(); + expect(a).not.toBe(b); + }); + + it("returns null rather than throwing when a provider fails to instantiate (missing key)", () => { + getOption.mockReturnValue(JSON.stringify([ + { id: "exa_bad", name: "Bad Exa", provider: "exa", apiKey: "" } + ])); + + expect(getFirstSearchProvider()).toBeNull(); + }); +}); diff --git a/apps/server/src/services/search_providers/index.ts b/apps/server/src/services/search_providers/index.ts new file mode 100644 index 00000000000..39469d1978b --- /dev/null +++ b/apps/server/src/services/search_providers/index.ts @@ -0,0 +1,95 @@ +/** + * Registry for pluggable web-search providers used by the LLM agent. + * + * Mirrors the shape of {@code services/llm/index.ts}: user-configured provider + * instances are persisted as JSON in the {@code searchProviders} option; this module + * instantiates them lazily and caches them by id. The LLM {@code base_provider.ts} + * consults {@link getFirstSearchProvider} to decide whether to expose a pluggable + * search tool or fall back to each LLM provider's native web-search implementation. + */ + +import log from "../log.js"; +import optionService from "../options.js"; +import type { SearchProvider, SearchProviderSetup } from "./base_search_provider.js"; +import { ExaSearchProvider } from "./exa.js"; +import { SearxngSearchProvider } from "./searxng.js"; +import { TavilySearchProvider } from "./tavily.js"; + +/** Factory functions for creating search-provider instances. */ +const providerFactories: Record SearchProvider> = { + exa: (s) => new ExaSearchProvider(s.apiKey ?? ""), + tavily: (s) => new TavilySearchProvider(s.apiKey ?? ""), + searxng: (s) => new SearxngSearchProvider(s.baseUrl ?? "") +}; + +/** Cache of instantiated providers by their config id. */ +let cachedProviders: Record = {}; + +export function getConfiguredSearchProviders(): SearchProviderSetup[] { + try { + const providersJson = optionService.getOptionOrNull("searchProviders"); + if (!providersJson) { + return []; + } + return JSON.parse(providersJson) as SearchProviderSetup[]; + } catch (e) { + log.error(`Failed to parse searchProviders option: ${e}`); + return []; + } +} + +export function hasConfiguredSearchProviders(): boolean { + return getConfiguredSearchProviders().length > 0; +} + +/** + * Return an instantiated provider by id (or the first configured one if no id is given). + * Returns null when no provider is configured or the config points to an unknown type. + */ +export function getSearchProvider(providerId?: string): SearchProvider | null { + const configs = getConfiguredSearchProviders(); + if (configs.length === 0) { + return null; + } + + const config = providerId ? configs.find(c => c.id === providerId) : configs[0]; + if (!config) { + return null; + } + + if (cachedProviders[config.id]) { + return cachedProviders[config.id]; + } + + const factory = providerFactories[config.provider]; + if (!factory) { + log.error(`Unknown search provider type: ${config.provider}. Available: ${Object.keys(providerFactories).join(", ")}`); + return null; + } + + try { + const provider = factory(config); + cachedProviders[config.id] = provider; + return provider; + } catch (e) { + log.error(`Failed to instantiate ${config.provider} search provider: ${e}`); + return null; + } +} + +/** Convenience: first configured provider (the one the LLM agent will use by default). */ +export function getFirstSearchProvider(): SearchProvider | null { + return getSearchProvider(); +} + +/** Clear the provider cache. Call this when search-provider configurations change. */ +export function clearSearchProviderCache(): void { + cachedProviders = {}; +} + +export type { + SearchOptions, + SearchProvider, + SearchProviderSetup, + SearchResult +} from "./base_search_provider.js"; diff --git a/apps/server/src/services/search_providers/searxng.spec.ts b/apps/server/src/services/search_providers/searxng.spec.ts new file mode 100644 index 00000000000..65a6c2d6f01 --- /dev/null +++ b/apps/server/src/services/search_providers/searxng.spec.ts @@ -0,0 +1,70 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { SearxngSearchProvider } from "./searxng.js"; + +function mockFetchOnce(body: unknown, init: { ok?: boolean; status?: number } = {}) { + const fetchMock = vi.fn(async () => ({ + ok: init.ok ?? true, + status: init.status ?? 200, + json: async () => body, + text: async () => (typeof body === "string" ? body : JSON.stringify(body)) + } as unknown as Response)); + vi.stubGlobal("fetch", fetchMock); + return fetchMock; +} + +describe("SearxngSearchProvider", () => { + beforeEach(() => vi.restoreAllMocks()); + afterEach(() => vi.unstubAllGlobals()); + + it("throws when constructed without a base URL", () => { + expect(() => new SearxngSearchProvider("")).toThrow(/Base URL is required/); + }); + + it("strips trailing slashes from the base URL", async () => { + const fetchMock = mockFetchOnce({ results: [] }); + const provider = new SearxngSearchProvider("http://localhost:8888/"); + + await provider.search("hello"); + + const [url] = fetchMock.mock.calls[0] as [string]; + expect(url.startsWith("http://localhost:8888/search?")).toBe(true); + expect(url).toContain("q=hello"); + expect(url).toContain("format=json"); + }); + + it("maps SearXNG content to snippet and caps to numResults", async () => { + mockFetchOnce({ + results: [ + { title: "one", url: "https://a.com", content: "snippet one" }, + { title: "two", url: "https://b.com", content: "snippet two" }, + { title: "three", url: "https://c.com", content: "snippet three" } + ] + }); + const provider = new SearxngSearchProvider("http://localhost:8888"); + + const results = await provider.search("q", { numResults: 2 }); + expect(results).toHaveLength(2); + expect(results[0]).toEqual({ + title: "one", + url: "https://a.com", + snippet: "snippet one", + publishedDate: undefined + }); + }); + + it("defaults missing content to an empty snippet rather than crashing", async () => { + mockFetchOnce({ results: [{ title: "x", url: "https://x.com" }] }); + const provider = new SearxngSearchProvider("http://localhost:8888"); + + const [r] = await provider.search("q"); + expect(r.snippet).toBe(""); + }); + + it("throws on non-2xx responses", async () => { + mockFetchOnce("server error", { ok: false, status: 500 }); + const provider = new SearxngSearchProvider("http://localhost:8888"); + + await expect(provider.search("q")).rejects.toThrow(/SearXNG search failed: 500/); + }); +}); diff --git a/apps/server/src/services/search_providers/searxng.ts b/apps/server/src/services/search_providers/searxng.ts new file mode 100644 index 00000000000..f709216222c --- /dev/null +++ b/apps/server/src/services/search_providers/searxng.ts @@ -0,0 +1,64 @@ +import { + BaseSearchProvider, + DEFAULT_MAX_RESULTS, + DEFAULT_TIMEOUT_MS, + type SearchOptions, + type SearchResult +} from "./base_search_provider.js"; + +interface SearxngResult { + title: string; + url: string; + content?: string; + publishedDate?: string; +} + +interface SearxngResponse { + results?: SearxngResult[]; +} + +/** + * SearXNG search provider. Self-hosted metasearch; no API key, just a base URL. + * See https://docs.searxng.org/. + */ +export class SearxngSearchProvider extends BaseSearchProvider { + name = "SearXNG"; + private readonly baseUrl: string; + + constructor(baseUrl: string, private readonly timeoutMs: number = DEFAULT_TIMEOUT_MS) { + super(); + if (!baseUrl) { + throw new Error("Base URL is required for SearXNG search provider"); + } + this.baseUrl = baseUrl.replace(/\/+$/, ""); + } + + async search(query: string, options: SearchOptions = {}): Promise { + const params = new URLSearchParams({ + q: query, + format: "json", + categories: "general", + language: "auto" + }); + + const response = await fetch(`${this.baseUrl}/search?${params.toString()}`, { + headers: { Accept: "application/json" }, + signal: AbortSignal.timeout(this.timeoutMs) + }); + + if (!response.ok) { + const errorBody = await response.text().catch(() => ""); + throw new Error(`SearXNG search failed: ${response.status} ${errorBody}`.trim()); + } + + const data = (await response.json()) as SearxngResponse; + const limit = options.numResults ?? DEFAULT_MAX_RESULTS; + + return (data.results ?? []).slice(0, limit).map(r => ({ + title: r.title, + url: r.url, + snippet: r.content ?? "", + publishedDate: r.publishedDate + })); + } +} diff --git a/apps/server/src/services/search_providers/tavily.spec.ts b/apps/server/src/services/search_providers/tavily.spec.ts new file mode 100644 index 00000000000..c6f932bbbe5 --- /dev/null +++ b/apps/server/src/services/search_providers/tavily.spec.ts @@ -0,0 +1,77 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { TavilySearchProvider } from "./tavily.js"; + +function mockFetchOnce(body: unknown, init: { ok?: boolean; status?: number } = {}) { + const fetchMock = vi.fn(async () => ({ + ok: init.ok ?? true, + status: init.status ?? 200, + json: async () => body, + text: async () => (typeof body === "string" ? body : JSON.stringify(body)) + } as unknown as Response)); + vi.stubGlobal("fetch", fetchMock); + return fetchMock; +} + +describe("TavilySearchProvider", () => { + beforeEach(() => vi.restoreAllMocks()); + afterEach(() => vi.unstubAllGlobals()); + + it("throws when constructed without an API key", () => { + expect(() => new TavilySearchProvider("")).toThrow(/API key is required/); + }); + + it("posts the query with api_key and honours domain filters", async () => { + const fetchMock = mockFetchOnce({ results: [] }); + const provider = new TavilySearchProvider("tvly-test"); + + await provider.search("example", { + numResults: 3, + includeDomains: ["good.com"], + excludeDomains: ["bad.com"] + }); + + const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + expect(url).toBe("https://api.tavily.com/search"); + const body = JSON.parse(init.body as string); + expect(body.api_key).toBe("tvly-test"); + expect(body.query).toBe("example"); + expect(body.max_results).toBe(3); + expect(body.include_domains).toEqual(["good.com"]); + expect(body.exclude_domains).toEqual(["bad.com"]); + }); + + it("maps content to snippet and forwards published_date", async () => { + mockFetchOnce({ + results: [ + { + title: "t", + url: "https://a.com", + content: "the snippet", + published_date: "2025-02-01" + } + ] + }); + const provider = new TavilySearchProvider("tvly-test"); + + const [r] = await provider.search("q"); + expect(r.title).toBe("t"); + expect(r.url).toBe("https://a.com"); + expect(r.snippet).toBe("the snippet"); + expect(r.publishedDate).toBe("2025-02-01"); + }); + + it("throws on non-2xx responses", async () => { + mockFetchOnce("rate limited", { ok: false, status: 429 }); + const provider = new TavilySearchProvider("tvly-test"); + + await expect(provider.search("q")).rejects.toThrow(/Tavily search failed: 429/); + }); + + it("returns [] when the response omits results", async () => { + mockFetchOnce({}); + const provider = new TavilySearchProvider("tvly-test"); + + await expect(provider.search("q")).resolves.toEqual([]); + }); +}); diff --git a/apps/server/src/services/search_providers/tavily.ts b/apps/server/src/services/search_providers/tavily.ts new file mode 100644 index 00000000000..bea3e3a2c65 --- /dev/null +++ b/apps/server/src/services/search_providers/tavily.ts @@ -0,0 +1,62 @@ +import { + BaseSearchProvider, + DEFAULT_MAX_RESULTS, + DEFAULT_TIMEOUT_MS, + type SearchOptions, + type SearchResult +} from "./base_search_provider.js"; + +interface TavilyResult { + title: string; + url: string; + content: string; + published_date?: string; +} + +interface TavilySearchResponse { + results?: TavilyResult[]; +} + +/** + * Tavily search provider (https://tavily.com). Free tier: 1000 queries/month. + */ +export class TavilySearchProvider extends BaseSearchProvider { + name = "Tavily"; + + constructor(private readonly apiKey: string, private readonly timeoutMs: number = DEFAULT_TIMEOUT_MS) { + super(); + if (!apiKey) { + throw new Error("API key is required for Tavily search provider"); + } + } + + async search(query: string, options: SearchOptions = {}): Promise { + const body: Record = { + api_key: this.apiKey, + query, + max_results: options.numResults ?? DEFAULT_MAX_RESULTS + }; + if (options.includeDomains?.length) body.include_domains = options.includeDomains; + if (options.excludeDomains?.length) body.exclude_domains = options.excludeDomains; + + const response = await fetch("https://api.tavily.com/search", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(this.timeoutMs) + }); + + if (!response.ok) { + const errorBody = await response.text().catch(() => ""); + throw new Error(`Tavily search failed: ${response.status} ${errorBody}`.trim()); + } + + const data = (await response.json()) as TavilySearchResponse; + return (data.results ?? []).map(r => ({ + title: r.title, + url: r.url, + snippet: r.content, + publishedDate: r.published_date + })); + } +} diff --git a/apps/server/src/services/search_providers/tool.ts b/apps/server/src/services/search_providers/tool.ts new file mode 100644 index 00000000000..9c90b764503 --- /dev/null +++ b/apps/server/src/services/search_providers/tool.ts @@ -0,0 +1,46 @@ +/** + * AI-SDK tool wrapper that exposes a configured {@link SearchProvider} as the + * {@code web_search} tool consumed by the LLM chat layer. + */ + +import { tool, type ToolSet } from "ai"; +import { z } from "zod"; + +import log from "../log.js"; +import type { SearchProvider } from "./base_search_provider.js"; + +const MAX_REQUESTED_RESULTS = 20; + +export function addConfiguredSearchTool(tools: ToolSet, provider: SearchProvider): void { + tools.web_search = tool({ + description: `Search the web for current information using ${provider.name}. Use this when the user asks about recent events, real-time data, or anything requiring up-to-date web information.`, + inputSchema: z.object({ + query: z.string().describe("The search query"), + numResults: z + .number() + .int() + .min(1) + .max(MAX_REQUESTED_RESULTS) + .optional() + .describe("Maximum number of results to return") + }), + execute: async ({ query, numResults }) => { + try { + const results = await provider.search(query, { numResults }); + return { + results: results.map(r => ({ + title: r.title, + url: r.url, + snippet: r.snippet, + ...(r.publishedDate && { publishedDate: r.publishedDate }), + ...(r.author && { author: r.author }) + })) + }; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + log.error(`${provider.name} search error: ${message}`); + return { error: `Search failed: ${message}` }; + } + } + }); +} diff --git a/packages/commons/src/lib/options_interface.ts b/packages/commons/src/lib/options_interface.ts index 5ecc585e614..13611de6d9a 100644 --- a/packages/commons/src/lib/options_interface.ts +++ b/packages/commons/src/lib/options_interface.ts @@ -194,6 +194,12 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions
{t("llm.search_provider_description")}