Skip to content

Commit 1ab4ec6

Browse files
author
zhenjun.chen
committed
feat: add OrcaRouter as an LLM provider
OrcaRouter (https://www.orcarouter.ai) is an OpenAI-compatible API gateway that aggregates ~120 chat models from OpenAI, Anthropic, Google, DeepSeek, xAI, Qwen, Kimi, MiniMax, Z-AI and others behind a single sk-orca- key. It also exposes an orcarouter/auto virtual model with configurable adaptive routing (cheapest / balanced / quality / contextual bandit / difficulty-gated). Provider class extends OpenAI and injects attribution headers (HTTP-Referer, X-Title, User-Agent, X-Continue-Provider) and reuses the existing Anthropic cache_control pass-through for Claude models. Also registers orcarouter in the AI SDK provider map for use via `provider: ai-sdk`. Follows the same patterns as CometAPI (#7809), ClawRouter (#11751), and Tensorix (#11184). Disclosure: I am an engineer on the OrcaRouter team.
1 parent cb27309 commit 1ab4ec6

13 files changed

Lines changed: 414 additions & 0 deletions

File tree

core/control-plane/schema.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const modelDescriptionSchema = z.object({
2020
"nebius",
2121
"siliconflow",
2222
"tensorix",
23+
"orcarouter",
2324
"scaleway",
2425
"watsonx",
2526
]),

core/llm/autodetect.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ const PROVIDER_HANDLES_TEMPLATING: string[] = [
6565
"nebius",
6666
"relace",
6767
"openrouter",
68+
"orcarouter",
6869
"clawrouter",
6970
"deepseek",
7071
"xAI",
@@ -124,6 +125,7 @@ const PROVIDER_SUPPORTS_IMAGES: string[] = [
124125
"sagemaker",
125126
"continue-proxy",
126127
"openrouter",
128+
"orcarouter",
127129
"clawrouter",
128130
"venice",
129131
"sambanova",

core/llm/llms/OrcaRouter.ts

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import { ChatCompletionCreateParams } from "openai/resources/index";
2+
3+
import { ORCAROUTER_HEADERS } from "@continuedev/openai-adapters";
4+
5+
import { LLMOptions } from "../../index.js";
6+
import { osModelsEditPrompt } from "../templates/edit.js";
7+
8+
import OpenAI from "./OpenAI.js";
9+
10+
class OrcaRouter extends OpenAI {
11+
static providerName = "orcarouter";
12+
protected supportsReasoningField = true;
13+
protected supportsReasoningDetailsField = true;
14+
static defaultOptions: Partial<LLMOptions> = {
15+
apiBase: "https://api.orcarouter.ai/v1/",
16+
model: "orcarouter/auto",
17+
promptTemplates: {
18+
edit: osModelsEditPrompt,
19+
},
20+
useLegacyCompletionsEndpoint: false,
21+
};
22+
23+
constructor(options: LLMOptions) {
24+
super({
25+
...options,
26+
requestOptions: {
27+
...options.requestOptions,
28+
headers: {
29+
...ORCAROUTER_HEADERS,
30+
...options.requestOptions?.headers,
31+
},
32+
},
33+
});
34+
}
35+
36+
private isAnthropicModel(model?: string): boolean {
37+
if (!model) return false;
38+
return model.toLowerCase().includes("claude");
39+
}
40+
41+
private addCacheControlToContent(content: any, addCaching: boolean): any {
42+
if (!addCaching) return content;
43+
44+
if (typeof content === "string") {
45+
return [
46+
{
47+
type: "text",
48+
text: content,
49+
cache_control: { type: "ephemeral" },
50+
},
51+
];
52+
}
53+
54+
if (Array.isArray(content)) {
55+
return content.map((part, idx) => {
56+
if (part.type === "text" && idx === content.length - 1) {
57+
return {
58+
...part,
59+
cache_control: { type: "ephemeral" },
60+
};
61+
}
62+
return part;
63+
});
64+
}
65+
66+
return content;
67+
}
68+
69+
protected modifyChatBody(
70+
body: ChatCompletionCreateParams,
71+
): ChatCompletionCreateParams {
72+
body = super.modifyChatBody(body);
73+
74+
if (
75+
!this.isAnthropicModel(body.model) ||
76+
(!this.cacheBehavior && !this.completionOptions.promptCaching)
77+
) {
78+
return body;
79+
}
80+
81+
const shouldCacheConversation =
82+
this.cacheBehavior?.cacheConversation ||
83+
this.completionOptions.promptCaching;
84+
const shouldCacheSystemMessage =
85+
this.cacheBehavior?.cacheSystemMessage ||
86+
this.completionOptions.promptCaching;
87+
88+
if (!shouldCacheConversation && !shouldCacheSystemMessage) {
89+
return body;
90+
}
91+
92+
const filteredMessages = body.messages.filter(
93+
(m: any) => m.role !== "system" && !!m.content,
94+
);
95+
96+
const lastTwoUserMsgIndices = filteredMessages
97+
.map((msg: any, index: number) => (msg.role === "user" ? index : -1))
98+
.filter((index: number) => index !== -1)
99+
.slice(-2);
100+
101+
let filteredIndex = 0;
102+
const filteredToOriginalIndexMap: number[] = [];
103+
body.messages.forEach((msg: any, originalIndex: number) => {
104+
if (msg.role !== "system" && !!msg.content) {
105+
filteredToOriginalIndexMap[filteredIndex] = originalIndex;
106+
filteredIndex++;
107+
}
108+
});
109+
110+
body.messages = body.messages.map((message: any, idx) => {
111+
if (message.role === "system" && shouldCacheSystemMessage) {
112+
return {
113+
...message,
114+
content: this.addCacheControlToContent(message.content, true),
115+
};
116+
}
117+
118+
const filteredIdx = filteredToOriginalIndexMap.indexOf(idx);
119+
if (
120+
message.role === "user" &&
121+
shouldCacheConversation &&
122+
filteredIdx !== -1 &&
123+
lastTwoUserMsgIndices.includes(filteredIdx)
124+
) {
125+
return {
126+
...message,
127+
content: this.addCacheControlToContent(message.content, true),
128+
};
129+
}
130+
131+
return message;
132+
});
133+
134+
return body;
135+
}
136+
}
137+
138+
export default OrcaRouter;

core/llm/llms/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import Nvidia from "./Nvidia";
5050
import Ollama from "./Ollama";
5151
import OpenAI from "./OpenAI";
5252
import OpenRouter from "./OpenRouter";
53+
import OrcaRouter from "./OrcaRouter";
5354
import ClawRouter from "./ClawRouter";
5455
import OVHcloud from "./OVHcloud";
5556
import { Relace } from "./Relace";
@@ -112,6 +113,7 @@ export const LLMClasses = [
112113
Azure,
113114
WatsonX,
114115
OpenRouter,
116+
OrcaRouter,
115117
ClawRouter,
116118
Nvidia,
117119
Vllm,

core/llm/toolSupport.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,57 @@ export const PROVIDER_TOOL_SUPPORT: Record<string, (model: string) => boolean> =
399399

400400
return false;
401401
},
402+
orcarouter: (model) => {
403+
// OrcaRouter routes to various upstream providers via prefixed model names
404+
// like openai/gpt-5, anthropic/claude-opus-4.7, deepseek/deepseek-v4-pro
405+
const lower = model.toLowerCase();
406+
407+
// orcarouter/auto and other named routers - assume tool support
408+
// (router pool should be configured to only include tool-capable upstreams
409+
// when used with agent mode; see docs caveat)
410+
if (lower.startsWith("orcarouter/")) {
411+
return true;
412+
}
413+
414+
// Explicit skip: image-generation models that occasionally appear in chat
415+
// routing pools (e.g. google/gemini-2.5-flash-image) — they reject tool calls
416+
if (
417+
lower.includes("-image") ||
418+
lower.includes("imagen") ||
419+
lower.includes("dall-e")
420+
) {
421+
return false;
422+
}
423+
424+
// Tool-supporting model name patterns across upstream vendors
425+
const toolSupportingPatterns = [
426+
"claude",
427+
"sonnet",
428+
"opus",
429+
"haiku",
430+
"gemini",
431+
"command-r",
432+
"mistral",
433+
"mixtral",
434+
"llama-3.1",
435+
"llama-3.2",
436+
"llama-3.3",
437+
"llama-4",
438+
"qwen3",
439+
"qwen-2.5",
440+
"deepseek",
441+
"kimi",
442+
"glm-4",
443+
"minimax",
444+
];
445+
446+
return (
447+
toolSupportingPatterns.some((pattern) => lower.includes(pattern)) ||
448+
!!lower.match(/gpt-[4-9]/) ||
449+
!!lower.match(/\bo[1-9]\b/) ||
450+
!!lower.match(/grok-[3-9]/)
451+
);
452+
},
402453
clawrouter: (model) => {
403454
// ClawRouter routes to various providers, so we check common tool-supporting patterns
404455
const lower = model.toLowerCase();

extensions/vscode/config_schema.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@
216216
"msty",
217217
"watsonx",
218218
"openrouter",
219+
"orcarouter",
219220
"clawrouter",
220221
"sambanova",
221222
"nvidia",
@@ -269,6 +270,7 @@
269270
"### Msty\nMsty is the simplest way to get started with online or local LLMs on all desktop platforms - Windows, Mac, and Linux. No fussing around, one-click and you are up and running. To get started, follow these steps:\n1. Download from [Msty.app](https://msty.app/), open the application, and click 'Setup Local AI'.\n2. Go to the Local AI Module page and download a model of your choice.\n3. Once the model has finished downloading, you can start asking questions through Continue.\n> [Reference](https://continue.dev/docs/reference/Model%20Providers/Msty)",
270271
"### IBM watsonx\nwatsonx, developed by IBM, offers a variety of pre-trained AI foundation models that can be used for natural language processing (NLP), computer vision, and speech recognition tasks.",
271272
"### OpenRouter\nOpenRouter offers a single API to access almost any language model. To get started, obtain an API key from [their console](https://openrouter.ai/settings/keys).",
273+
"### OrcaRouter\nOrcaRouter is an OpenAI-compatible API gateway that aggregates ~120 chat models from OpenAI, Anthropic, Google, DeepSeek, xAI, Qwen, Kimi, MiniMax, Z-AI, and others behind a single `sk-orca-` key. It also exposes an `orcarouter/auto` virtual model with configurable adaptive routing (cheapest / balanced / quality / contextual bandit / difficulty-gated).\nTo get started, sign up at [orcarouter.ai](https://www.orcarouter.ai) and obtain an API key from your [console](https://www.orcarouter.ai/console).\n> [Reference](https://docs.orcarouter.ai)",
272274
"### ClawRouter\nClawRouter is an open-source LLM router that automatically selects the cheapest capable model for each request based on prompt complexity, providing 78-96% cost savings. To get started, run `npx clawrouter` to start the router at localhost:1337. A wallet is auto-generated on first run - fund it with USDC (Solana/Base) to access premium models, or use `blockrun/free` tier without payment.\n> [Reference](https://github.com/BlockRunAI/ClawRouter)",
273275
"### SambaNova\n SambaNova provides fast inference of open-source language models with zero data retention. To get started, obtain an API key in [SambaNova Cloud](https://cloud.sambanova.ai/apis?utm_source=continue&utm_medium=external&utm_campaign=cloud_signup ).",
274276
"### NVIDIA NIMs\nNVIDIA offers a single API to access almost any language model. To find out more, visit the [LLM APIs Documentation](https://docs.api.nvidia.com/nim/reference/llm-apis).\nFor information specific to getting a key, please check out the [docs here](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html#option-1-from-api-catalog)",

gui/public/logos/orcarouter.png

862 KB
Loading

gui/src/pages/AddNewModel/configs/models.ts

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2802,6 +2802,107 @@ export const models: { [key: string]: ModelPackage } = {
28022802
isOpenSource: true,
28032803
},
28042804

2805+
// OrcaRouter Models
2806+
orcarouterAuto: {
2807+
title: "OrcaRouter Auto",
2808+
description:
2809+
"Adaptive routing across upstream models with configurable strategy (cheapest / balanced / quality / contextual bandit / difficulty-gated). Routing pools and weights are tunable from the OrcaRouter console.",
2810+
params: {
2811+
title: "OrcaRouter Auto",
2812+
model: "orcarouter/auto",
2813+
contextLength: 128_000,
2814+
},
2815+
icon: "orcarouter.png",
2816+
providerOptions: ["orcarouter"],
2817+
isOpenSource: false,
2818+
},
2819+
orcarouterGpt55: {
2820+
title: "OpenAI: GPT-5.5",
2821+
description: "OpenAI GPT-5.5 routed through OrcaRouter.",
2822+
params: {
2823+
title: "OpenAI: GPT-5.5",
2824+
model: "openai/gpt-5.5",
2825+
contextLength: 400_000,
2826+
},
2827+
icon: "orcarouter.png",
2828+
providerOptions: ["orcarouter"],
2829+
isOpenSource: false,
2830+
},
2831+
orcarouterClaudeOpus47: {
2832+
title: "Anthropic: Claude Opus 4.7",
2833+
description:
2834+
"Anthropic Claude Opus 4.7 routed through OrcaRouter. Reasoning model — set `requestOptions.extraBodyProperties.thinking` to control thinking budget.",
2835+
params: {
2836+
title: "Anthropic: Claude Opus 4.7",
2837+
model: "anthropic/claude-opus-4.7",
2838+
contextLength: 200_000,
2839+
},
2840+
icon: "orcarouter.png",
2841+
providerOptions: ["orcarouter"],
2842+
isOpenSource: false,
2843+
},
2844+
orcarouterGemini3Flash: {
2845+
title: "Google: Gemini 3 Flash Preview",
2846+
description:
2847+
"Google Gemini 3 Flash Preview routed through OrcaRouter. Reasoning model — chat may appear blank for several seconds while the model reasons. Set `reasoning_effort: 'minimal'` in `requestOptions.extraBodyProperties` for fast responses.",
2848+
params: {
2849+
title: "Google: Gemini 3 Flash",
2850+
model: "google/gemini-3-flash-preview",
2851+
contextLength: 1_000_000,
2852+
},
2853+
icon: "orcarouter.png",
2854+
providerOptions: ["orcarouter"],
2855+
isOpenSource: false,
2856+
},
2857+
orcarouterDeepseekV4Pro: {
2858+
title: "DeepSeek: DeepSeek V4 Pro",
2859+
description: "DeepSeek V4 Pro routed through OrcaRouter.",
2860+
params: {
2861+
title: "DeepSeek: DeepSeek V4 Pro",
2862+
model: "deepseek/deepseek-v4-pro",
2863+
contextLength: 128_000,
2864+
},
2865+
icon: "orcarouter.png",
2866+
providerOptions: ["orcarouter"],
2867+
isOpenSource: true,
2868+
},
2869+
orcarouterGrok43: {
2870+
title: "xAI: Grok 4.3",
2871+
description: "xAI Grok 4.3 routed through OrcaRouter.",
2872+
params: {
2873+
title: "xAI: Grok 4.3",
2874+
model: "grok/grok-4.3",
2875+
contextLength: 256_000,
2876+
},
2877+
icon: "orcarouter.png",
2878+
providerOptions: ["orcarouter"],
2879+
isOpenSource: false,
2880+
},
2881+
orcarouterQwen36Flash: {
2882+
title: "Alibaba: Qwen 3.6 Flash",
2883+
description: "Alibaba Qwen 3.6 Flash routed through OrcaRouter.",
2884+
params: {
2885+
title: "Alibaba: Qwen 3.6 Flash",
2886+
model: "qwen/qwen3.6-flash",
2887+
contextLength: 128_000,
2888+
},
2889+
icon: "orcarouter.png",
2890+
providerOptions: ["orcarouter"],
2891+
isOpenSource: true,
2892+
},
2893+
orcarouterMinimaxM27: {
2894+
title: "MiniMax: MiniMax M2.7",
2895+
description: "MiniMax M2.7 routed through OrcaRouter.",
2896+
params: {
2897+
title: "MiniMax: MiniMax M2.7",
2898+
model: "minimax/minimax-m2.7",
2899+
contextLength: 200_000,
2900+
},
2901+
icon: "orcarouter.png",
2902+
providerOptions: ["orcarouter"],
2903+
isOpenSource: false,
2904+
},
2905+
28052906
AUTODETECT: {
28062907
title: "Autodetect",
28072908
description:

0 commit comments

Comments
 (0)