Skip to content

Commit f0649ef

Browse files
feat(byok): add direct BYOK support for Ollama Cloud (#2819) ☁️
* feat(byok): add direct BYOK support for Ollama Cloud Adds Ollama Cloud as a new direct BYOK provider using its OpenAI-compatible endpoint at https://ollama.com/v1. Model list and metadata are curated from https://models.dev/api.json with descriptions sourced from the Kilo gateway model catalog. * refactor(byok): sync Ollama Cloud models dynamically from models.dev Reworks the Ollama Cloud BYOK provider to pull its model catalog dynamically via sync-direct-byok (mirroring zai-coding) instead of hardcoding 37 models. Keeps a single recommended model (gpt-oss:120b) inline and adds the display name to direct-byok-meta. * chore(byok): use kimi-k2.6 as recommended Ollama Cloud model * refactor(byok): fetch models.dev catalog once per sync * refactor(byok): bareModelId strips vendor prefix and variant suffix * chore(byok): drop ollama-cloud comment slop * fix(byok): align ollama-cloud test model with recommended model The dynamic sync leaves Redis empty until the first cron run, so only the hardcoded recommended model is guaranteed to be present in the model list. Use it as the test model to avoid a broken test-key flow pre-sync. * feat(ollama-cloud): add none/low/medium/high reasoning variants * fix(byok): drop description from ollama-cloud recommended model, simplify sync * refactor(byok): share models.dev fetch, strip :cloud suffix, fix ollama-cloud types - Cache the models.dev catalog within a single syncDirectByokModels run so providers sourced from it share one HTTP fetch. - Rename stripVendorPrefix to modelIdToDisplayName and strip trailing :cloud tags (e.g. kimi-k2.6:cloud -> kimi-k2.6) in addition to vendor prefixes. - Drop variants: null from the ollama-cloud recommended model; the schema on main is now .optional(), so the explicit null was rejected by tsgo. * refactor(byok): strip any suffix after ':' in model display names Broaden modelIdToDisplayName so tags other than ':cloud' (e.g. ':latest') are stripped from the user-visible name as well. * feat(byok): map reasoning.effort to reasoning_effort for Ollama Cloud Mirror the chutes-byok transformRequest so reasoning variants (none/low/ medium/high) flow through to Ollama Cloud's OpenAI-compatible endpoint as the reasoning_effort field. --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com> Co-authored-by: Christiaan Arnoldus <christiaan@kilocode.ai>
1 parent 4206937 commit f0649ef

6 files changed

Lines changed: 40 additions & 0 deletions

File tree

apps/web/src/lib/ai-gateway/providers/direct-byok/direct-byok-definitions.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ import byteplusCoding from './byteplus-coding';
33
import chutesByok from './chutes-byok';
44
import kimiCoding from './kimi-coding';
55
import neuralwatt from './neurowatt';
6+
import ollamaCloud from './ollama-cloud';
67
import zaiCoding from './zai-coding';
78

89
export default [
910
byteplusCoding,
1011
chutesByok,
1112
kimiCoding,
1213
neuralwatt,
14+
ollamaCloud,
1315
zaiCoding,
1416
] satisfies ReadonlyArray<DirectByokProvider>;

apps/web/src/lib/ai-gateway/providers/direct-byok/direct-byok-meta.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export const DIRECT_BYOK_PROVIDERS_META = {
66
'chutes-byok': 'Chutes BYOK',
77
'kimi-coding': 'Kimi Code',
88
neuralwatt: 'Neuralwatt',
9+
'ollama-cloud': 'Ollama Cloud',
910
'zai-coding': 'Z.ai Coding Plan',
1011
} as const satisfies Record<Exclude<DirectUserByokInferenceProviderId, 'codestral'>, string>;
1112

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { cachedEnhancedDirectByokModelList } from '@/lib/ai-gateway/providers/direct-byok/model-list';
2+
import type { DirectByokProvider } from '@/lib/ai-gateway/providers/direct-byok/types';
3+
import { REASONING_VARIANTS_NONE_LOW_MEDIUM_HIGH } from '@/lib/ai-gateway/providers/model-settings';
4+
5+
export default {
6+
id: 'ollama-cloud',
7+
base_url: 'https://ollama.com/v1',
8+
ai_sdk_provider: 'openai-compatible',
9+
transformRequest(context) {
10+
const { request } = context;
11+
if (request.kind !== 'chat_completions') {
12+
return;
13+
}
14+
request.body.reasoning_effort ??= request.body.reasoning?.effort ?? undefined;
15+
},
16+
models: cachedEnhancedDirectByokModelList({
17+
providerId: 'ollama-cloud',
18+
recommendedModels: [
19+
{
20+
id: 'kimi-k2.6:cloud',
21+
name: 'kimi-k2.6',
22+
flags: ['vision'],
23+
context_length: 262144,
24+
max_completion_tokens: 262144,
25+
},
26+
],
27+
variants: REASONING_VARIANTS_NONE_LOW_MEDIUM_HIGH,
28+
}),
29+
} satisfies DirectByokProvider;

apps/web/src/lib/ai-gateway/providers/direct-byok/sync-direct-byok.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ const FETCHERS: ReadonlyArray<ProviderFetcher> = [
135135
url: 'https://llm.chutes.ai/v1/models',
136136
}),
137137
modelsDevFetcher('zai-coding', 'zai-coding-plan'),
138+
modelsDevFetcher('ollama-cloud', 'ollama-cloud'),
138139
];
139140

140141
function modelIdToDisplayName(id: string) {

apps/web/src/lib/ai-gateway/providers/model-settings.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ export const REASONING_VARIANTS_MINIMAL_LOW_MEDIUM_HIGH = {
2929
...REASONING_VARIANTS_LOW_MEDIUM_HIGH,
3030
} as const;
3131

32+
export const REASONING_VARIANTS_NONE_LOW_MEDIUM_HIGH = {
33+
none: { reasoning: { enabled: false, effort: 'none' } },
34+
...REASONING_VARIANTS_LOW_MEDIUM_HIGH,
35+
} as const;
36+
3237
export function getModelVariants(model: string): OpenCodeSettings['variants'] {
3338
if (isOpusModel(model) && model.includes('4.7')) {
3439
return {

apps/web/src/lib/ai-gateway/providers/openrouter/inference-provider-id.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ export const DirectUserByokInferenceProviderIdSchema = z.enum([
9898
'codestral',
9999
'kimi-coding',
100100
'neuralwatt',
101+
'ollama-cloud',
101102
'zai-coding',
102103
]);
103104

@@ -130,6 +131,7 @@ export const UserByokTestModels = {
130131
[DirectUserByokInferenceProviderIdSchema.enum.codestral]: 'mistral/codestral',
131132
[DirectUserByokInferenceProviderIdSchema.enum['kimi-coding']]: 'kimi-for-coding',
132133
[DirectUserByokInferenceProviderIdSchema.enum.neuralwatt]: 'Qwen/Qwen3.5-35B-A3B',
134+
[DirectUserByokInferenceProviderIdSchema.enum['ollama-cloud']]: 'kimi-k2.6:cloud',
133135
[DirectUserByokInferenceProviderIdSchema.enum['zai-coding']]: 'glm-4.7',
134136
} satisfies Record<UserByokProviderId, string>;
135137

0 commit comments

Comments
 (0)