diff --git a/providers/llmgateway/models/grok-build-0-1.toml b/providers/llmgateway/models/grok-build-0-1.toml new file mode 100644 index 0000000000..3b75a24412 --- /dev/null +++ b/providers/llmgateway/models/grok-build-0-1.toml @@ -0,0 +1,13 @@ +base_model = "xai/grok-build-0.1" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }] + +[cost] +input = 1 +output = 2 +cache_read = 0.2 + +[[cost.tiers]] +tier = { type = "context", size = 200_000 } +input = 2 +output = 4 +cache_read = 0.4 diff --git a/providers/llmgateway/models/kimi-k2.7-code.toml b/providers/llmgateway/models/kimi-k2.7-code.toml new file mode 100644 index 0000000000..1967bf56e2 --- /dev/null +++ b/providers/llmgateway/models/kimi-k2.7-code.toml @@ -0,0 +1,10 @@ +base_model = "moonshotai/kimi-k2.7-code" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }] + +[interleaved] +field = "reasoning_content" + +[cost] +input = 0.95 +output = 4 +cache_read = 0.19 diff --git a/providers/llmgateway/models/nemotron-3-ultra-550b.toml b/providers/llmgateway/models/nemotron-3-ultra-550b.toml new file mode 100644 index 0000000000..4773351399 --- /dev/null +++ b/providers/llmgateway/models/nemotron-3-ultra-550b.toml @@ -0,0 +1,7 @@ +base_model = "nvidia/nemotron-3-ultra-550b-a55b" +reasoning_options = [{ type = "toggle" }] + +[cost] +input = 0.5 +output = 2.5 +cache_read = 0.15