diff --git a/CHANGELOG.md b/CHANGELOG.md index bd94d0181..0238e7f12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [EE] Added prompt caching for Ask Sourcebot. For Anthropic models, the static prompt prefix (tool definitions, system prompt, and conversation history) is marked with a cache breakpoint so it is billed at the provider's discounted cache-read rate on subsequent agent steps and follow-up turns. Toggle with `SOURCEBOT_CHAT_PROMPT_CACHING_ENABLED` (default `true`). [#1278](https://github.com/sourcebot-dev/sourcebot/pull/1278) - [EE] Added a cached-token breakdown to the Ask Sourcebot message details, showing what share of the input tokens were served from the model provider's prompt cache. [#1278](https://github.com/sourcebot-dev/sourcebot/pull/1278) +### Changed +- Anthropic thinking mode (adaptive vs. extended) is now resolved from the model's capabilities via the Anthropic Models API instead of a hardcoded model list. [#1294](https://github.com/sourcebot-dev/sourcebot/pull/1294) + ### Fixed - Upgraded `protobufjs` to `^7.6.2`. [#1281](https://github.com/sourcebot-dev/sourcebot/pull/1281) - Upgraded `picomatch` to `^4.0.4`. [#1283](https://github.com/sourcebot-dev/sourcebot/pull/1283) diff --git a/packages/web/package.json b/packages/web/package.json index cd06162c7..aaf466095 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -26,6 +26,7 @@ "@ai-sdk/openai-compatible": "^2.0.41", "@ai-sdk/react": "^3.0.169", "@ai-sdk/xai": "^3.0.83", + "@anthropic-ai/sdk": "^0.104.0", "@auth/prisma-adapter": "^2.11.1", "@aws-sdk/credential-providers": "^3.1036.0", "@bprogress/next": "^3.2.12", diff --git a/packages/web/src/features/chat/llm.server.ts b/packages/web/src/features/chat/llm.server.ts index 38f9fb5db..2803f9971 100644 --- a/packages/web/src/features/chat/llm.server.ts +++ b/packages/web/src/features/chat/llm.server.ts @@ -1,6 +1,8 @@ import 'server-only'; import { createPostHogClient, tryGetPostHogDistinctId } from "@/lib/posthog"; +import { logger } from "./logger"; +import Anthropic from "@anthropic-ai/sdk"; import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'; import { AnthropicProviderOptions, createAnthropic } from '@ai-sdk/anthropic'; import { createAzure } from '@ai-sdk/azure'; @@ -20,6 +22,7 @@ import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; import { Token } from "@sourcebot/schemas/v3/shared.type"; import { env, getTokenFromConfig } from '@sourcebot/shared'; import { extractReasoningMiddleware, JSONValue, wrapLanguageModel } from "ai"; +import * as Sentry from "@sentry/nextjs"; // @note: This module resolves a configured language model into an AI SDK // provider object. It is intentionally FSL (open source) provider plumbing — @@ -68,34 +71,36 @@ export const getAISDKLanguageModelAndOptions = async (config: LanguageModel): Pr }; } case 'anthropic': { + const apiKey = config.token + ? await getTokenFromConfig(config.token) + : env.ANTHROPIC_API_KEY; + const authToken = config.authToken + ? await getTokenFromConfig(config.authToken) + : env.ANTHROPIC_AUTH_TOKEN; + const headers = config.headers + ? await extractLanguageModelKeyValuePairs(config.headers) + : undefined; + const anthropic = createAnthropic({ baseURL: config.baseUrl, - apiKey: config.token - ? await getTokenFromConfig(config.token) - : env.ANTHROPIC_API_KEY, - authToken: config.authToken - ? await getTokenFromConfig(config.authToken) - : env.ANTHROPIC_AUTH_TOKEN, - headers: config.headers - ? await extractLanguageModelKeyValuePairs(config.headers) - : undefined, + apiKey, + authToken, + headers, }); - const isAdaptiveThinkingSupported = - modelId.startsWith('claude-opus-4-7') || - modelId.startsWith('claude-opus-4-8'); + const thinking = await tryResolveAnthropicThinkingConfig({ + modelId, + baseUrl: config.baseUrl, + apiKey, + authToken, + headers, + }); return { model: anthropic(modelId), providerOptions: { anthropic: { - thinking: isAdaptiveThinkingSupported ? { - type: "adaptive", - display: "summarized" - } : { - type: "enabled", - budgetTokens: env.ANTHROPIC_THINKING_BUDGET_TOKENS, - } + ...(thinking ? { thinking } : {}), } satisfies AnthropicProviderOptions, }, }; @@ -344,3 +349,111 @@ const extractLanguageModelKeyValuePairs = async ( return resolvedPairs; }; + +type AnthropicThinkingConfig = NonNullable; +const anthropicThinkingConfigCache = new Map(); + +/** + * Resolves the `thinking` provider option we pass to the + * ai sdk for anthropic models. Queries the Models API to + * determine the model's capabilities. Returns undefined + * if we are unable to resolve. Results are cached in a + * in-memory cache. + * + * @see https://docs.anthropic.com/en/api/models + */ +const tryResolveAnthropicThinkingConfig = async ({ + modelId, + baseUrl, + apiKey, + authToken, + headers, +}: { + modelId: string, + baseUrl?: string, + apiKey?: string, + authToken?: string, + headers?: Record, +}): Promise => { + const cacheKey = `${baseUrl ?? 'default'}::${modelId}`; + if (anthropicThinkingConfigCache.has(cacheKey)) { + return anthropicThinkingConfigCache.get(cacheKey); + } + + const { + thinkingConfig, + shouldCache + } = await (async (): Promise<{ thinkingConfig: AnthropicThinkingConfig | undefined, shouldCache: boolean }> => { + try { + // `@ai-sdk/anthropic` expects `baseURL` to include the `/v1` path segment, + // whereas the SDK client appends `/v1` itself — so strip a trailing `/v1` + // from the same configured value before handing it to the client. + const baseURL = baseUrl + ? (baseUrl.replace(/\/+$/, '').replace(/\/v1$/, '') || undefined) + : undefined; + + const client = new Anthropic({ + apiKey, + authToken, + baseURL, + defaultHeaders: headers, + maxRetries: 1, + }); + + const { capabilities } = await client.models.retrieve(modelId, undefined, { + timeout: 10_000, + }); + + if (!capabilities) { + throw new Error('the models API did not return a capabilities object.'); + } + + const thinking = capabilities.thinking; + if (thinking.supported === false) { + return { + thinkingConfig: undefined, + shouldCache: true + }; + } + + if (thinking.types.adaptive.supported) { + return { + thinkingConfig: { + type: "adaptive", + display: "summarized", + } satisfies AnthropicThinkingConfig, + shouldCache: true, + }; + } + + if (thinking.types.enabled.supported) { + return { + thinkingConfig: { + type: "enabled", + budgetTokens: env.ANTHROPIC_THINKING_BUDGET_TOKENS, + } satisfies AnthropicThinkingConfig, + shouldCache: true, + }; + } + + return { + thinkingConfig: undefined, + shouldCache: true + }; + } catch (error) { + Sentry.captureException(error); + logger.warn(`Failed to fetch Anthropic model capabilities for '${modelId}'. Omitting the thinking option. ${error}`); + return { + thinkingConfig: undefined, + shouldCache: false + }; + } + })(); + + + if (shouldCache) { + anthropicThinkingConfigCache.set(cacheKey, thinkingConfig); + } + + return thinkingConfig; +}; diff --git a/yarn.lock b/yarn.lock index 08e794ba1..f72c0d012 100644 --- a/yarn.lock +++ b/yarn.lock @@ -251,6 +251,23 @@ __metadata: languageName: node linkType: hard +"@anthropic-ai/sdk@npm:^0.104.0": + version: 0.104.0 + resolution: "@anthropic-ai/sdk@npm:0.104.0" + dependencies: + json-schema-to-ts: "npm:^3.1.1" + standardwebhooks: "npm:^1.0.0" + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + peerDependenciesMeta: + zod: + optional: true + bin: + anthropic-ai-sdk: bin/cli + checksum: 10c0/d73197e0fccea3a8a957a895d4fc637bd95ea2ef5a812dce5e48056b06cb044f837bafdbb0c6a4b154e149b352807b5ef43df05f51ffe6f27832a440e1b07569 + languageName: node + linkType: hard + "@anthropic-ai/sdk@npm:^0.78.0": version: 0.78.0 resolution: "@anthropic-ai/sdk@npm:0.78.0" @@ -9381,6 +9398,7 @@ __metadata: "@ai-sdk/openai-compatible": "npm:^2.0.41" "@ai-sdk/react": "npm:^3.0.169" "@ai-sdk/xai": "npm:^3.0.83" + "@anthropic-ai/sdk": "npm:^0.104.0" "@asteasolutions/zod-to-openapi": "npm:7.3.4" "@auth/prisma-adapter": "npm:^2.11.1" "@aws-sdk/credential-providers": "npm:^3.1036.0" @@ -9604,6 +9622,13 @@ __metadata: languageName: node linkType: hard +"@stablelib/base64@npm:^1.0.0": + version: 1.0.1 + resolution: "@stablelib/base64@npm:1.0.1" + checksum: 10c0/6330720f021819d19cecfe274111b79a256caa81df478d6b0ae7effc8842b96915b6aeed85926ff05b4d48ec1fc78ad043d928b730ee4e6cc6e8cba6aa097bed + languageName: node + linkType: hard + "@standard-schema/spec@npm:1.1.0, @standard-schema/spec@npm:^1.1.0": version: 1.1.0 resolution: "@standard-schema/spec@npm:1.1.0" @@ -14389,6 +14414,13 @@ __metadata: languageName: node linkType: hard +"fast-sha256@npm:^1.3.0": + version: 1.3.0 + resolution: "fast-sha256@npm:1.3.0" + checksum: 10c0/87f9e4baa7639576cf60a2b6235c9f436e1a1c52323abbd8a705b5bea8355500acf176f2aed0c14f2ecd6d6007e26151461bab2f27b8953bcca8d9d6b76a86e4 + languageName: node + linkType: hard + "fast-string-truncated-width@npm:^3.0.2": version: 3.0.3 resolution: "fast-string-truncated-width@npm:3.0.3" @@ -21492,6 +21524,16 @@ __metadata: languageName: node linkType: hard +"standardwebhooks@npm:^1.0.0": + version: 1.0.0 + resolution: "standardwebhooks@npm:1.0.0" + dependencies: + "@stablelib/base64": "npm:^1.0.0" + fast-sha256: "npm:^1.3.0" + checksum: 10c0/aee097d0f3c05172c19b80df1ed9596a2ce92f8956957650d0bbe47c2ca6d36515796b51d523333cb4a48c889b2ab130d789e7879e14975c4381bc7a61274327 + languageName: node + linkType: hard + "statuses@npm:2.0.1, statuses@npm:^2.0.1": version: 2.0.1 resolution: "statuses@npm:2.0.1"