continuedev
diff --git a/‎core/control-plane/schema.ts‎
Lines changed: 1 addition & 0 deletions b/‎core/control-plane/schema.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/llm/autodetect.ts‎
Lines changed: 2 additions & 0 deletions b/‎core/llm/autodetect.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/llm/llms/OrcaRouter.ts‎
Lines changed: 138 additions & 0 deletions b/‎core/llm/llms/OrcaRouter.ts‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎core/llm/llms/index.ts‎
Lines changed: 2 additions & 0 deletions b/‎core/llm/llms/index.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/llm/toolSupport.ts‎
Lines changed: 51 additions & 0 deletions b/‎core/llm/toolSupport.ts‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎extensions/vscode/config_schema.json‎
Lines changed: 2 additions & 0 deletions b/‎extensions/vscode/config_schema.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎gui/public/logos/orcarouter.png‎
862 KB b/‎gui/public/logos/orcarouter.png‎
862 KB
diff --git a/‎gui/src/pages/AddNewModel/configs/models.ts‎
Lines changed: 101 additions & 0 deletions b/‎gui/src/pages/AddNewModel/configs/models.ts‎
Lines changed: 101 additions & 0 deletions
@@ -20,6 +20,7 @@ const modelDescriptionSchema = z.object({
     "nebius",
     "siliconflow",
     "tensorix",
+    "orcarouter",
     "scaleway",
     "watsonx",
   ]),
 
@@ -65,6 +65,7 @@ const PROVIDER_HANDLES_TEMPLATING: string[] = [
   "nebius",
   "relace",
   "openrouter",
+  "orcarouter",
   "clawrouter",
   "deepseek",
   "xAI",
@@ -124,6 +125,7 @@ const PROVIDER_SUPPORTS_IMAGES: string[] = [
   "sagemaker",
   "continue-proxy",
   "openrouter",
+  "orcarouter",
   "clawrouter",
   "venice",
   "sambanova",
 
@@ -0,0 +1,138 @@
+import { ChatCompletionCreateParams } from "openai/resources/index";
+
+import { ORCAROUTER_HEADERS } from "@continuedev/openai-adapters";
+
+import { LLMOptions } from "../../index.js";
+import { osModelsEditPrompt } from "../templates/edit.js";
+
+import OpenAI from "./OpenAI.js";
+
+class OrcaRouter extends OpenAI {
+  static providerName = "orcarouter";
+  protected supportsReasoningField = true;
+  protected supportsReasoningDetailsField = true;
+  static defaultOptions: Partial<LLMOptions> = {
+    apiBase: "https://api.orcarouter.ai/v1/",
+    model: "orcarouter/auto",
+    promptTemplates: {
+      edit: osModelsEditPrompt,
+    },
+    useLegacyCompletionsEndpoint: false,
+  };
+
+  constructor(options: LLMOptions) {
+    super({
+      ...options,
+      requestOptions: {
+        ...options.requestOptions,
+        headers: {
+          ...ORCAROUTER_HEADERS,
+          ...options.requestOptions?.headers,
+        },
+      },
+    });
+  }
+
+  private isAnthropicModel(model?: string): boolean {
+    if (!model) return false;
+    return model.toLowerCase().includes("claude");
+  }
+
+  private addCacheControlToContent(content: any, addCaching: boolean): any {
+    if (!addCaching) return content;
+
+    if (typeof content === "string") {
+      return [
+        {
+          type: "text",
+          text: content,
+          cache_control: { type: "ephemeral" },
+        },
+      ];
+    }
+
+    if (Array.isArray(content)) {
+      return content.map((part, idx) => {
+        if (part.type === "text" && idx === content.length - 1) {
+          return {
+            ...part,
+            cache_control: { type: "ephemeral" },
+          };
+        }
+        return part;
+      });
+    }
+
+    return content;
+  }
+
+  protected modifyChatBody(
+    body: ChatCompletionCreateParams,
+  ): ChatCompletionCreateParams {
+    body = super.modifyChatBody(body);
+
+    if (
+      !this.isAnthropicModel(body.model) ||
+      (!this.cacheBehavior && !this.completionOptions.promptCaching)
+    ) {
+      return body;
+    }
+
+    const shouldCacheConversation =
+      this.cacheBehavior?.cacheConversation ||
+      this.completionOptions.promptCaching;
+    const shouldCacheSystemMessage =
+      this.cacheBehavior?.cacheSystemMessage ||
+      this.completionOptions.promptCaching;
+
+    if (!shouldCacheConversation && !shouldCacheSystemMessage) {
+      return body;
+    }
+
+    const filteredMessages = body.messages.filter(
+      (m: any) => m.role !== "system" && !!m.content,
+    );
+
+    const lastTwoUserMsgIndices = filteredMessages
+      .map((msg: any, index: number) => (msg.role === "user" ? index : -1))
+      .filter((index: number) => index !== -1)
+      .slice(-2);
+
+    let filteredIndex = 0;
+    const filteredToOriginalIndexMap: number[] = [];
+    body.messages.forEach((msg: any, originalIndex: number) => {
+      if (msg.role !== "system" && !!msg.content) {
+        filteredToOriginalIndexMap[filteredIndex] = originalIndex;
+        filteredIndex++;
+      }
+    });
+
+    body.messages = body.messages.map((message: any, idx) => {
+      if (message.role === "system" && shouldCacheSystemMessage) {
+        return {
+          ...message,
+          content: this.addCacheControlToContent(message.content, true),
+        };
+      }
+
+      const filteredIdx = filteredToOriginalIndexMap.indexOf(idx);
+      if (
+        message.role === "user" &&
+        shouldCacheConversation &&
+        filteredIdx !== -1 &&
+        lastTwoUserMsgIndices.includes(filteredIdx)
+      ) {
+        return {
+          ...message,
+          content: this.addCacheControlToContent(message.content, true),
+        };
+      }
+
+      return message;
+    });
+
+    return body;
+  }
+}
+
+export default OrcaRouter;
@@ -50,6 +50,7 @@ import Nvidia from "./Nvidia";
 import Ollama from "./Ollama";
 import OpenAI from "./OpenAI";
 import OpenRouter from "./OpenRouter";
+import OrcaRouter from "./OrcaRouter";
 import ClawRouter from "./ClawRouter";
 import OVHcloud from "./OVHcloud";
 import { Relace } from "./Relace";
@@ -112,6 +113,7 @@ export const LLMClasses = [
   Azure,
   WatsonX,
   OpenRouter,
+  OrcaRouter,
   ClawRouter,
   Nvidia,
   Vllm,
 
@@ -399,6 +399,57 @@ export const PROVIDER_TOOL_SUPPORT: Record<string, (model: string) => boolean> =
 
       return false;
     },
+    orcarouter: (model) => {
+      // OrcaRouter routes to various upstream providers via prefixed model names
+      // like openai/gpt-5, anthropic/claude-opus-4.7, deepseek/deepseek-v4-pro
+      const lower = model.toLowerCase();
+
+      // orcarouter/auto and other named routers - assume tool support
+      // (router pool should be configured to only include tool-capable upstreams
+      // when used with agent mode; see docs caveat)
+      if (lower.startsWith("orcarouter/")) {
+        return true;
+      }
+
+      // Explicit skip: image-generation models that occasionally appear in chat
+      // routing pools (e.g. google/gemini-2.5-flash-image) — they reject tool calls
+      if (
+        lower.includes("-image") ||
+        lower.includes("imagen") ||
+        lower.includes("dall-e")
+      ) {
+        return false;
+      }
+
+      // Tool-supporting model name patterns across upstream vendors
+      const toolSupportingPatterns = [
+        "claude",
+        "sonnet",
+        "opus",
+        "haiku",
+        "gemini",
+        "command-r",
+        "mistral",
+        "mixtral",
+        "llama-3.1",
+        "llama-3.2",
+        "llama-3.3",
+        "llama-4",
+        "qwen3",
+        "qwen-2.5",
+        "deepseek",
+        "kimi",
+        "glm-4",
+        "minimax",
+      ];
+
+      return (
+        toolSupportingPatterns.some((pattern) => lower.includes(pattern)) ||
+        !!lower.match(/gpt-[4-9]/) ||
+        !!lower.match(/\bo[1-9]\b/) ||
+        !!lower.match(/grok-[3-9]/)
+      );
+    },
     clawrouter: (model) => {
       // ClawRouter routes to various providers, so we check common tool-supporting patterns
       const lower = model.toLowerCase();
 
@@ -216,6 +216,7 @@
             "msty",
             "watsonx",
             "openrouter",
+            "orcarouter",
             "clawrouter",
             "sambanova",
             "nvidia",
@@ -269,6 +270,7 @@
             "### Msty\nMsty is the simplest way to get started with online or local LLMs on all desktop platforms - Windows, Mac, and Linux. No fussing around, one-click and you are up and running. To get started, follow these steps:\n1. Download from [Msty.app](https://msty.app/), open the application, and click 'Setup Local AI'.\n2. Go to the Local AI Module page and download a model of your choice.\n3. Once the model has finished downloading, you can start asking questions through Continue.\n> [Reference](https://continue.dev/docs/reference/Model%20Providers/Msty)",
             "### IBM watsonx\nwatsonx, developed by IBM, offers a variety of pre-trained AI foundation models that can be used for natural language processing (NLP), computer vision, and speech recognition tasks.",
             "### OpenRouter\nOpenRouter offers a single API to access almost any language model. To get started, obtain an API key from [their console](https://openrouter.ai/settings/keys).",
+            "### OrcaRouter\nOrcaRouter is an OpenAI-compatible API gateway that aggregates ~120 chat models from OpenAI, Anthropic, Google, DeepSeek, xAI, Qwen, Kimi, MiniMax, Z-AI, and others behind a single `sk-orca-` key. It also exposes an `orcarouter/auto` virtual model with configurable adaptive routing (cheapest / balanced / quality / contextual bandit / difficulty-gated).\nTo get started, sign up at [orcarouter.ai](https://www.orcarouter.ai) and obtain an API key from your [console](https://www.orcarouter.ai/console).\n> [Reference](https://docs.orcarouter.ai)",
             "### ClawRouter\nClawRouter is an open-source LLM router that automatically selects the cheapest capable model for each request based on prompt complexity, providing 78-96% cost savings. To get started, run `npx clawrouter` to start the router at localhost:1337. A wallet is auto-generated on first run - fund it with USDC (Solana/Base) to access premium models, or use `blockrun/free` tier without payment.\n> [Reference](https://github.com/BlockRunAI/ClawRouter)",
             "### SambaNova\n SambaNova provides fast inference of open-source language models with zero data retention. To get started, obtain an API key in [SambaNova Cloud](https://cloud.sambanova.ai/apis?utm_source=continue&utm_medium=external&utm_campaign=cloud_signup ).",
             "### NVIDIA NIMs\nNVIDIA offers a single API to access almost any language model. To find out more, visit the [LLM APIs Documentation](https://docs.api.nvidia.com/nim/reference/llm-apis).\nFor information specific to getting a key, please check out the [docs here](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html#option-1-from-api-catalog)",
 
@@ -2802,6 +2802,107 @@ export const models: { [key: string]: ModelPackage } = {
     isOpenSource: true,
   },
 
+  // OrcaRouter Models
+  orcarouterAuto: {
+    title: "OrcaRouter Auto",
+    description:
+      "Adaptive routing across upstream models with configurable strategy (cheapest / balanced / quality / contextual bandit / difficulty-gated). Routing pools and weights are tunable from the OrcaRouter console.",
+    params: {
+      title: "OrcaRouter Auto",
+      model: "orcarouter/auto",
+      contextLength: 128_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: false,
+  },
+  orcarouterGpt55: {
+    title: "OpenAI: GPT-5.5",
+    description: "OpenAI GPT-5.5 routed through OrcaRouter.",
+    params: {
+      title: "OpenAI: GPT-5.5",
+      model: "openai/gpt-5.5",
+      contextLength: 400_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: false,
+  },
+  orcarouterClaudeOpus47: {
+    title: "Anthropic: Claude Opus 4.7",
+    description:
+      "Anthropic Claude Opus 4.7 routed through OrcaRouter. Reasoning model — set `requestOptions.extraBodyProperties.thinking` to control thinking budget.",
+    params: {
+      title: "Anthropic: Claude Opus 4.7",
+      model: "anthropic/claude-opus-4.7",
+      contextLength: 200_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: false,
+  },
+  orcarouterGemini3Flash: {
+    title: "Google: Gemini 3 Flash Preview",
+    description:
+      "Google Gemini 3 Flash Preview routed through OrcaRouter. Reasoning model — chat may appear blank for several seconds while the model reasons. Set `reasoning_effort: 'minimal'` in `requestOptions.extraBodyProperties` for fast responses.",
+    params: {
+      title: "Google: Gemini 3 Flash",
+      model: "google/gemini-3-flash-preview",
+      contextLength: 1_000_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: false,
+  },
+  orcarouterDeepseekV4Pro: {
+    title: "DeepSeek: DeepSeek V4 Pro",
+    description: "DeepSeek V4 Pro routed through OrcaRouter.",
+    params: {
+      title: "DeepSeek: DeepSeek V4 Pro",
+      model: "deepseek/deepseek-v4-pro",
+      contextLength: 128_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: true,
+  },
+  orcarouterGrok43: {
+    title: "xAI: Grok 4.3",
+    description: "xAI Grok 4.3 routed through OrcaRouter.",
+    params: {
+      title: "xAI: Grok 4.3",
+      model: "grok/grok-4.3",
+      contextLength: 256_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: false,
+  },
+  orcarouterQwen36Flash: {
+    title: "Alibaba: Qwen 3.6 Flash",
+    description: "Alibaba Qwen 3.6 Flash routed through OrcaRouter.",
+    params: {
+      title: "Alibaba: Qwen 3.6 Flash",
+      model: "qwen/qwen3.6-flash",
+      contextLength: 128_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: true,
+  },
+  orcarouterMinimaxM27: {
+    title: "MiniMax: MiniMax M2.7",
+    description: "MiniMax M2.7 routed through OrcaRouter.",
+    params: {
+      title: "MiniMax: MiniMax M2.7",
+      model: "minimax/minimax-m2.7",
+      contextLength: 200_000,
+    },
+    icon: "orcarouter.png",
+    providerOptions: ["orcarouter"],
+    isOpenSource: false,
+  },
+
   AUTODETECT: {
     title: "Autodetect",
     description: