From 57940ad4167e957a27e529ff6b0a387b6c1e7eb0 Mon Sep 17 00:00:00 2001 From: smakosh Date: Sat, 13 Jun 2026 23:02:24 +0100 Subject: [PATCH 1/2] feat: add LLM Gateway kimi-k2.7-code, nemotron-3-ultra-550b, grok-build-0-1 Newest text models from the LLM Gateway catalog, using the base_model structure to inherit from the canonical model registry with gateway-specific cost overrides. Co-Authored-By: Claude Opus 4.8 (1M context) --- providers/llmgateway/models/grok-build-0-1.toml | 12 ++++++++++++ providers/llmgateway/models/kimi-k2.7-code.toml | 9 +++++++++ .../llmgateway/models/nemotron-3-ultra-550b.toml | 6 ++++++ 3 files changed, 27 insertions(+) create mode 100644 providers/llmgateway/models/grok-build-0-1.toml create mode 100644 providers/llmgateway/models/kimi-k2.7-code.toml create mode 100644 providers/llmgateway/models/nemotron-3-ultra-550b.toml diff --git a/providers/llmgateway/models/grok-build-0-1.toml b/providers/llmgateway/models/grok-build-0-1.toml new file mode 100644 index 0000000000..ca6cb815bc --- /dev/null +++ b/providers/llmgateway/models/grok-build-0-1.toml @@ -0,0 +1,12 @@ +base_model = "xai/grok-build-0.1" + +[cost] +input = 1 +output = 2 +cache_read = 0.2 + +[[cost.tiers]] +tier = { type = "context", size = 200_000 } +input = 2 +output = 4 +cache_read = 0.4 diff --git a/providers/llmgateway/models/kimi-k2.7-code.toml b/providers/llmgateway/models/kimi-k2.7-code.toml new file mode 100644 index 0000000000..1a35459391 --- /dev/null +++ b/providers/llmgateway/models/kimi-k2.7-code.toml @@ -0,0 +1,9 @@ +base_model = "moonshotai/kimi-k2.7-code" + +[interleaved] +field = "reasoning_content" + +[cost] +input = 0.95 +output = 4 +cache_read = 0.19 diff --git a/providers/llmgateway/models/nemotron-3-ultra-550b.toml b/providers/llmgateway/models/nemotron-3-ultra-550b.toml new file mode 100644 index 0000000000..4ad3359cdc --- /dev/null +++ b/providers/llmgateway/models/nemotron-3-ultra-550b.toml @@ -0,0 +1,6 @@ +base_model = "nvidia/nemotron-3-ultra-550b-a55b" + +[cost] +input = 0.5 +output = 2.5 +cache_read = 0.15 From fb2b96a4b78b601cdf9b4c76e6b47af6eacece0c Mon Sep 17 00:00:00 2001 From: smakosh Date: Sun, 14 Jun 2026 18:15:02 +0200 Subject: [PATCH 2/2] feat: add reasoning_options to new LLM Gateway models Addresses review feedback: kimi-k2.7-code and grok-build-0-1 use the effort (low/medium/high) option matching the kimi/grok gateway models; nemotron-3-ultra-550b uses a reasoning toggle per its nvidia source. Co-Authored-By: Claude Opus 4.8 (1M context) --- providers/llmgateway/models/grok-build-0-1.toml | 1 + providers/llmgateway/models/kimi-k2.7-code.toml | 1 + providers/llmgateway/models/nemotron-3-ultra-550b.toml | 1 + 3 files changed, 3 insertions(+) diff --git a/providers/llmgateway/models/grok-build-0-1.toml b/providers/llmgateway/models/grok-build-0-1.toml index ca6cb815bc..3b75a24412 100644 --- a/providers/llmgateway/models/grok-build-0-1.toml +++ b/providers/llmgateway/models/grok-build-0-1.toml @@ -1,4 +1,5 @@ base_model = "xai/grok-build-0.1" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }] [cost] input = 1 diff --git a/providers/llmgateway/models/kimi-k2.7-code.toml b/providers/llmgateway/models/kimi-k2.7-code.toml index 1a35459391..1967bf56e2 100644 --- a/providers/llmgateway/models/kimi-k2.7-code.toml +++ b/providers/llmgateway/models/kimi-k2.7-code.toml @@ -1,4 +1,5 @@ base_model = "moonshotai/kimi-k2.7-code" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }] [interleaved] field = "reasoning_content" diff --git a/providers/llmgateway/models/nemotron-3-ultra-550b.toml b/providers/llmgateway/models/nemotron-3-ultra-550b.toml index 4ad3359cdc..4773351399 100644 --- a/providers/llmgateway/models/nemotron-3-ultra-550b.toml +++ b/providers/llmgateway/models/nemotron-3-ultra-550b.toml @@ -1,4 +1,5 @@ base_model = "nvidia/nemotron-3-ultra-550b-a55b" +reasoning_options = [{ type = "toggle" }] [cost] input = 0.5