Merge pull request #4 from Core-Mate/harvey/configure-vlm-env

dxcsmam · web-flow · commit bf2098d71c7f · 2026-05-01T09:07:48.000-07:00
Configure graph agents with VLM env
diff --git a/client/start.sh b/client/start.sh
@@ -57,7 +57,7 @@ info "Install completed"
 # --------------------------------------------------
 # 5. Launch app
 # --------------------------------------------------
-PACKAGE="com.haomai.promotor"
+PACKAGE="com.coremate.opengui"
 adb shell am start -n "$PACKAGE/.login.SplashActivity" >/dev/null 2>&1
 info "App launched"
 
diff --git a/server/apps/backend/.env.example b/server/apps/backend/.env.example
@@ -1,10 +1,5 @@
-# OpenGUI Server environment configuration
-#
-# Quick start:
-#   1. Copy this file to .env
-#      cp apps/backend/.env.example apps/backend/.env
-#   2. Fill in the required model settings below.
-#   3. Never commit the real .env file or real API keys.
+# OpenGUI Server Environment Configuration
+# Copy this file to .env and fill in your values
 
 # App
 NODE_ENV=development
@@ -19,42 +14,10 @@ REDIS_PORT=6379
 REDIS_DB=0
 REDIS_PASSWORD=
 
-# ============================================================
-# Required model configuration
-# ============================================================
-#
-# OpenGUI uses these variables at runtime:
-# - CLAUDE_* is the default text/action model config for:
-#   plan-supervisor, summarizer, executor-a11y, and action-summarizer.
-# - VLM_* is the vision model config for executor-vlm.
-#
-# CLAUDE_BASE_URL and VLM_BASE_URL are optional when you use the provider's
-# default endpoint. Set them when using an OpenAI-compatible gateway.
-#
-# OpenAI-compatible example:
-#   CLAUDE_BASE_URL=https://your-openai-compatible-endpoint/v1
-#   CLAUDE_MODEL=your-text-model
-#   CLAUDE_SMALL_MODEL=your-small-or-cheaper-text-model
-#   VLM_BASE_URL=https://your-openai-compatible-endpoint/v1
-#   VLM_MODEL=your-vision-model
-#
-# Do not put real API keys in this example file.
-
-# Text/action agents
-CLAUDE_API_KEY=
-CLAUDE_BASE_URL=
-CLAUDE_MODEL=
-CLAUDE_SMALL_MODEL=
-
-# Vision agent
+# AI model config (used by all graph agents)
 VLM_API_KEY=
-VLM_BASE_URL=
-VLM_MODEL=
-
-# Optional: Creator Agent (Claude Agent SDK)
-# Only required if you use creator-agent SDK features.
-ANTHROPIC_API_KEY=
-# ANTHROPIC_BASE_URL=
+VLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
+VLM_MODEL=qwen3.6-plus
 
 # LangSmith tracing (optional, for debugging agent execution)
 # LANGSMITH_TRACING=true
diff --git a/server/apps/backend/src/modules/graph-agent/config/agent-config.provider.ts b/server/apps/backend/src/modules/graph-agent/config/agent-config.provider.ts
@@ -45,50 +45,39 @@ export class AgentConfigProvider {
 
 	/**
 	 *
-	 * - executor-vlm: VLM_API_KEY, VLM_BASE_URL, VLM_MODEL
-	 * - action-summarizer: CLAUDE_API_KEY, CLAUDE_BASE_URL, CLAUDE_SMALL_MODEL
-	 * - Other Agent: CLAUDE_API_KEY, CLAUDE_BASE_URL, CLAUDE_MODEL
+	 * All graph agents use the same OpenAI-compatible model config:
+	 * - VLM_API_KEY
+	 * - VLM_BASE_URL
+	 * - VLM_MODEL
 	 *
 	 */
 	async getModelConfig(agentName: AgentName, region = "CN"): Promise<ModelConfig> {
 		const config = await this.getConfig(agentName, region);
 
-		const isVLM = agentName === AgentName.EXECUTOR_VLM;
-		const isSmall = agentName === AgentName.ACTION_SUMMARIZER;
+		const apiKey = this.configService.get<string>("VLM_API_KEY");
+		const baseURL = this.configService.get<string>("VLM_BASE_URL");
+		const model = this.configService.get<string>("VLM_MODEL");
 
-		const apiKey = isVLM
-			? this.configService.get<string>("VLM_API_KEY")
-			: this.configService.get<string>("CLAUDE_API_KEY");
-		const baseURL = isVLM
-			? this.configService.get<string>("VLM_BASE_URL")
-			: this.configService.get<string>("CLAUDE_BASE_URL");
-		const envModel = isVLM
-			? this.configService.get<string>("VLM_MODEL")
-			: isSmall
-				? this.configService.get<string>("CLAUDE_SMALL_MODEL")
-				: this.configService.get<string>("CLAUDE_MODEL");
-
-		const model = envModel;
 
 		if (!apiKey) {
 			throw new Error(
 				`API key not configured for agent: ${agentName}. ` +
-					`Set ${isVLM ? "VLM_API_KEY" : "CLAUDE_API_KEY"} in .env`,
+					"Set VLM_API_KEY in .env",
 			);
 		}
 
 		if (!model) {
 			throw new Error(
 				`Model not configured for agent: ${agentName}. ` +
-					`Set ${isVLM ? "VLM_MODEL" : "CLAUDE_MODEL"} in .env`,
+					"Set VLM_MODEL in .env",
 			);
 		}
 
 		return {
 			model,
 			apiKey,
 			baseURL: baseURL || undefined,
-			fallbackModel: config.fallbackModel ?? undefined,
+			fallbackModel: model,
 			temperature: config.temperature ?? undefined,
 			maxTokens: config.maxTokens ?? undefined,
 			topP: config.topP ?? undefined,
diff --git a/server/apps/backend/src/modules/graph-agent/config/chat-model.factory.ts b/server/apps/backend/src/modules/graph-agent/config/chat-model.factory.ts
@@ -0,0 +1,33 @@
+import { ChatOpenAI } from "@langchain/openai";
+import type { ModelConfig } from "./types";
+
+/**
+ * Creates the single OpenAI-compatible chat model used by OpenGUI graph agents.
+ *
+ * Provider credentials and model selection come from AgentConfigProvider, which
+ * is backed by VLM_API_KEY, VLM_BASE_URL, and VLM_MODEL.
+ */
+export function createConfiguredChatModel(
+	config: ModelConfig,
+	options: {
+		model?: string;
+		temperature?: number;
+		maxTokens?: number;
+		maxRetries?: number;
+		timeout?: number;
+		topP?: number;
+	} = {},
+) {
+	return new ChatOpenAI({
+		model: options.model ?? config.model,
+		apiKey: config.apiKey,
+		temperature: options.temperature ?? config.temperature,
+		maxTokens: options.maxTokens ?? config.maxTokens,
+		maxRetries: options.maxRetries,
+		timeout: options.timeout,
+		topP: options.topP ?? config.topP,
+		...(config.baseURL && {
+			configuration: { baseURL: config.baseURL },
+		}),
+	});
+}
diff --git a/server/apps/backend/src/modules/graph-agent/graph/nodes/plan-supervisor.node.spec.ts b/server/apps/backend/src/modules/graph-agent/graph/nodes/plan-supervisor.node.spec.ts
@@ -1,22 +1,23 @@
 /**
  * Plan Supervisor 结构化输出集成测试
  *
- * 验证 ChatAnthropic + createAgent + providerStrategy 能否拿到 structuredResponse
+ * 验证 ChatOpenAI + createAgent + providerStrategy 能否拿到 structuredResponse
  * 使用真实模型调用，mock 数据尽量简化
  *
  * 运行：pnpm test -- plan-supervisor.node.spec
  */
 
-import { ChatAnthropic } from "@langchain/anthropic";
 import { HumanMessage } from "@langchain/core/messages";
 import { tool } from "@langchain/core/tools";
+import { ChatOpenAI } from "@langchain/openai";
 import { createAgent, providerStrategy } from "langchain";
 import { z } from "zod";
 
 // ====== API 配置（与 plan-supervisor.node.ts 保持一致）======
-const API_KEY = process.env.CLAUDE_API_KEY ?? "test-api-key-placeholder";
-const BASE_URL = process.env.CLAUDE_BASE_URL ?? "https://ai-gateway.vercel.sh";
-const MODEL = process.env.CLAUDE_MODEL ?? "anthropic/claude-sonnet-4.6";
+const API_KEY = process.env.VLM_API_KEY ?? "test-api-key-placeholder";
+const BASE_URL =
+	process.env.VLM_BASE_URL ?? "https://dashscope.aliyuncs.com/compatible-mode/v1";
+const MODEL = process.env.VLM_MODEL ?? "qwen3.6-plus";
 // =============================================================
 
 const SupervisorOutputSchema = z.object({
@@ -72,18 +73,17 @@ const mockLoadSkill = tool(
 	},
 );
 
-describe("PlanSupervisor - ChatAnthropic 结构化输出集成测试", () => {
-	let model: ChatAnthropic;
+describe("PlanSupervisor - ChatOpenAI 结构化输出集成测试", () => {
+	let model: ChatOpenAI;
 
 	beforeAll(() => {
-		model = new ChatAnthropic({
+		model = new ChatOpenAI({
 			model: MODEL,
 			apiKey: API_KEY,
-			clientOptions: {
+			maxRetries: 2,
+			timeout: 120000,
+			configuration: {
 				baseURL: BASE_URL,
-				maxRetries: 2,
-				timeout: 120000,
-				authToken: null,
 			},
 		});
 	});
diff --git a/server/apps/backend/src/modules/graph-agent/graph/utils/execution-connection.ts b/server/apps/backend/src/modules/graph-agent/graph/utils/execution-connection.ts
@@ -0,0 +1,33 @@
+const CONNECTION_LOST_PATTERNS = [
+	"no connection for execution",
+	"socket disconnected",
+	"client disconnected",
+	"connection lost",
+	"ack timeout",
+	"operation has timed out",
+	"timeout",
+];
+
+function getErrorText(error: unknown): string {
+	if (error instanceof Error) {
+		return `${error.name} ${error.message}`.toLowerCase();
+	}
+	if (typeof error === "string") {
+		return error.toLowerCase();
+	}
+	return "";
+}
+
+export function isExecutionConnectionLost(error: unknown): boolean {
+	return isExecutionConnectionLostMessage(getErrorText(error));
+}
+
+export function isExecutionConnectionLostMessage(message: unknown): boolean {
+	if (typeof message !== "string") return false;
+	const lower = message.toLowerCase();
+	return CONNECTION_LOST_PATTERNS.some((pattern) => lower.includes(pattern));
+}
+
+export function buildExecutionConnectionLostMessage(executionId: number): string {
+	return `Execution ${executionId} lost the client connection. Reconnect the device and retry.`;
+}
diff --git a/server/start.sh b/server/start.sh
@@ -83,11 +83,9 @@ ENV_EXAMPLE=apps/backend/.env.example
 if [ ! -f "$ENV_FILE" ]; then
   if [ -f "$ENV_EXAMPLE" ]; then
     cp "$ENV_EXAMPLE" "$ENV_FILE"
-    warn ".env was created from .env.example. Please edit it and add the required model configuration:"
+    warn ".env was created from .env.example. Please edit it and add your API keys:"
     warn "  File: $ENV_FILE"
-    warn "  Required: CLAUDE_API_KEY, CLAUDE_MODEL, CLAUDE_SMALL_MODEL, VLM_API_KEY, VLM_MODEL"
-    warn "  Optional: CLAUDE_BASE_URL, VLM_BASE_URL for OpenAI-compatible gateways"
-    warn "  Optional: ANTHROPIC_API_KEY only for Creator Agent SDK features"
+    warn "  VLM_API_KEY, VLM_BASE_URL, VLM_MODEL"
     warn "Run this script again after editing the file."
     exit 0
   else
@@ -97,30 +95,16 @@ fi
 
 set -a; source "$ENV_FILE" 2>/dev/null || true; set +a
 
-REQUIRED_MODEL_VARS=(
-  CLAUDE_API_KEY
-  CLAUDE_MODEL
-  CLAUDE_SMALL_MODEL
-  VLM_API_KEY
-  VLM_MODEL
-)
-
-MISSING_MODEL_VARS=()
-for var_name in "${REQUIRED_MODEL_VARS[@]}"; do
-  if [ -z "${!var_name}" ]; then
-    MISSING_MODEL_VARS+=("$var_name")
-  fi
-done
+if [ -z "$VLM_API_KEY" ]; then
+  warn "VLM_API_KEY is not set. Please edit .env."
+fi
 
-if [ "${#MISSING_MODEL_VARS[@]}" -gt 0 ]; then
-  warn "Missing required model configuration in $ENV_FILE:"
-  for var_name in "${MISSING_MODEL_VARS[@]}"; do
-    warn "  $var_name"
-  done
-  warn "Fill these values and run ./start.sh again."
-  warn "If text and vision use the same provider, fill both CLAUDE_* and VLM_* explicitly."
-  warn "ANTHROPIC_API_KEY is optional unless you use Creator Agent SDK features."
-  exit 1
+if [ -z "$VLM_MODEL" ]; then
+  warn "VLM_MODEL is not set. Please edit .env."
+fi
+
+if [ -z "$VLM_BASE_URL" ]; then
+  warn "VLM_BASE_URL is not set. Please edit .env."
 fi
 
 info ".env loaded"