feat(byok): add direct BYOK support for Ollama Cloud (#2819) ☁️

kilo-code-bot[bot] · chrarnoldus · web-flow · commit f0649ef286f8 · 2026-05-02T08:54:30.000+02:00
* feat(byok): add direct BYOK support for Ollama Cloud Adds Ollama Cloud as a new direct BYOK provider using its OpenAI-compatible endpoint at https://ollama.com/v1. Model list and metadata are curated from https://models.dev/api.json with descriptions sourced from the Kilo gateway model catalog. * refactor(byok): sync Ollama Cloud models dynamically from models.dev Reworks the Ollama Cloud BYOK provider to pull its model catalog dynamically via sync-direct-byok (mirroring zai-coding) instead of hardcoding 37 models. Keeps a single recommended model (gpt-oss:120b) inline and adds the display name to direct-byok-meta. * chore(byok): use kimi-k2.6 as recommended Ollama Cloud model * refactor(byok): fetch models.dev catalog once per sync * refactor(byok): bareModelId strips vendor prefix and variant suffix * chore(byok): drop ollama-cloud comment slop * fix(byok): align ollama-cloud test model with recommended model The dynamic sync leaves Redis empty until the first cron run, so only the hardcoded recommended model is guaranteed to be present in the model list. Use it as the test model to avoid a broken test-key flow pre-sync. * feat(ollama-cloud): add none/low/medium/high reasoning variants * fix(byok): drop description from ollama-cloud recommended model, simplify sync * refactor(byok): share models.dev fetch, strip :cloud suffix, fix ollama-cloud types - Cache the models.dev catalog within a single syncDirectByokModels run so providers sourced from it share one HTTP fetch. - Rename stripVendorPrefix to modelIdToDisplayName and strip trailing :cloud tags (e.g. kimi-k2.6:cloud -> kimi-k2.6) in addition to vendor prefixes. - Drop variants: null from the ollama-cloud recommended model; the schema on main is now .optional(), so the explicit null was rejected by tsgo. * refactor(byok): strip any suffix after ':' in model display names Broaden modelIdToDisplayName so tags other than ':cloud' (e.g. ':latest') are stripped from the user-visible name as well. * feat(byok): map reasoning.effort to reasoning_effort for Ollama Cloud Mirror the chutes-byok transformRequest so reasoning variants (none/low/ medium/high) flow through to Ollama Cloud's OpenAI-compatible endpoint as the reasoning_effort field. --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com> Co-authored-by: Christiaan Arnoldus <christiaan@kilocode.ai>
diff --git a/apps/web/src/lib/ai-gateway/providers/direct-byok/direct-byok-definitions.ts b/apps/web/src/lib/ai-gateway/providers/direct-byok/direct-byok-definitions.ts
@@ -3,12 +3,14 @@ import byteplusCoding from './byteplus-coding';
 import chutesByok from './chutes-byok';
 import kimiCoding from './kimi-coding';
 import neuralwatt from './neurowatt';
+import ollamaCloud from './ollama-cloud';
 import zaiCoding from './zai-coding';
 
 export default [
   byteplusCoding,
   chutesByok,
   kimiCoding,
   neuralwatt,
+  ollamaCloud,
   zaiCoding,
 ] satisfies ReadonlyArray<DirectByokProvider>;
diff --git a/apps/web/src/lib/ai-gateway/providers/direct-byok/direct-byok-meta.ts b/apps/web/src/lib/ai-gateway/providers/direct-byok/direct-byok-meta.ts
@@ -6,6 +6,7 @@ export const DIRECT_BYOK_PROVIDERS_META = {
   'chutes-byok': 'Chutes BYOK',
   'kimi-coding': 'Kimi Code',
   neuralwatt: 'Neuralwatt',
+  'ollama-cloud': 'Ollama Cloud',
   'zai-coding': 'Z.ai Coding Plan',
 } as const satisfies Record<Exclude<DirectUserByokInferenceProviderId, 'codestral'>, string>;
 
diff --git a/apps/web/src/lib/ai-gateway/providers/direct-byok/ollama-cloud.ts b/apps/web/src/lib/ai-gateway/providers/direct-byok/ollama-cloud.ts
@@ -0,0 +1,29 @@
+import { cachedEnhancedDirectByokModelList } from '@/lib/ai-gateway/providers/direct-byok/model-list';
+import type { DirectByokProvider } from '@/lib/ai-gateway/providers/direct-byok/types';
+import { REASONING_VARIANTS_NONE_LOW_MEDIUM_HIGH } from '@/lib/ai-gateway/providers/model-settings';
+
+export default {
+  id: 'ollama-cloud',
+  base_url: 'https://ollama.com/v1',
+  ai_sdk_provider: 'openai-compatible',
+  transformRequest(context) {
+    const { request } = context;
+    if (request.kind !== 'chat_completions') {
+      return;
+    }
+    request.body.reasoning_effort ??= request.body.reasoning?.effort ?? undefined;
+  },
+  models: cachedEnhancedDirectByokModelList({
+    providerId: 'ollama-cloud',
+    recommendedModels: [
+      {
+        id: 'kimi-k2.6:cloud',
+        name: 'kimi-k2.6',
+        flags: ['vision'],
+        context_length: 262144,
+        max_completion_tokens: 262144,
+      },
+    ],
+    variants: REASONING_VARIANTS_NONE_LOW_MEDIUM_HIGH,
+  }),
+} satisfies DirectByokProvider;
diff --git a/apps/web/src/lib/ai-gateway/providers/direct-byok/sync-direct-byok.ts b/apps/web/src/lib/ai-gateway/providers/direct-byok/sync-direct-byok.ts
@@ -135,6 +135,7 @@ const FETCHERS: ReadonlyArray<ProviderFetcher> = [
     url: 'https://llm.chutes.ai/v1/models',
   }),
   modelsDevFetcher('zai-coding', 'zai-coding-plan'),
+  modelsDevFetcher('ollama-cloud', 'ollama-cloud'),
 ];
 
 function modelIdToDisplayName(id: string) {
diff --git a/apps/web/src/lib/ai-gateway/providers/model-settings.ts b/apps/web/src/lib/ai-gateway/providers/model-settings.ts
@@ -29,6 +29,11 @@ export const REASONING_VARIANTS_MINIMAL_LOW_MEDIUM_HIGH = {
   ...REASONING_VARIANTS_LOW_MEDIUM_HIGH,
 } as const;
 
+export const REASONING_VARIANTS_NONE_LOW_MEDIUM_HIGH = {
+  none: { reasoning: { enabled: false, effort: 'none' } },
+  ...REASONING_VARIANTS_LOW_MEDIUM_HIGH,
+} as const;
+
 export function getModelVariants(model: string): OpenCodeSettings['variants'] {
   if (isOpusModel(model) && model.includes('4.7')) {
     return {
diff --git a/apps/web/src/lib/ai-gateway/providers/openrouter/inference-provider-id.ts b/apps/web/src/lib/ai-gateway/providers/openrouter/inference-provider-id.ts
@@ -98,6 +98,7 @@ export const DirectUserByokInferenceProviderIdSchema = z.enum([
   'codestral',
   'kimi-coding',
   'neuralwatt',
+  'ollama-cloud',
   'zai-coding',
 ]);
 
@@ -130,6 +131,7 @@ export const UserByokTestModels = {
   [DirectUserByokInferenceProviderIdSchema.enum.codestral]: 'mistral/codestral',
   [DirectUserByokInferenceProviderIdSchema.enum['kimi-coding']]: 'kimi-for-coding',
   [DirectUserByokInferenceProviderIdSchema.enum.neuralwatt]: 'Qwen/Qwen3.5-35B-A3B',
+  [DirectUserByokInferenceProviderIdSchema.enum['ollama-cloud']]: 'kimi-k2.6:cloud',
   [DirectUserByokInferenceProviderIdSchema.enum['zai-coding']]: 'glm-4.7',
 } satisfies Record<UserByokProviderId, string>;