diff --git a/.dockerignore b/.dockerignore index 45c1def32..385a6449f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -37,8 +37,6 @@ build/ *.tgz # Backend -backend/assets/* -!backend/assets/test.wav backend/flower_db.sqlite uploads/ test/ @@ -60,4 +58,4 @@ assets/ .Spotlight-V100 .Trashes ehthumbs.db -Thumbs.db \ No newline at end of file +Thumbs.db diff --git a/.github/workflows/build-offline-package.yml b/.github/workflows/build-offline-package.yml new file mode 100644 index 000000000..6619cf764 --- /dev/null +++ b/.github/workflows/build-offline-package.yml @@ -0,0 +1,105 @@ +name: Build Offline Deployment Package + +on: + workflow_dispatch: + inputs: + include_source: + description: 'Include source code in the package' + required: false + default: true + type: boolean + +jobs: + build-offline-package: + runs-on: ubuntu-latest + strategy: + matrix: + platform: [amd64, arm64] + + steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set version and platform variables + id: set-vars + run: | + PLATFORM="${{ matrix.platform }}" + REF_TYPE="${{ github.ref_type }}" + REF_NAME="${{ github.ref_name }}" + + if [ "$REF_TYPE" = "tag" ]; then + VERSION="$REF_NAME" + elif [ "$REF_TYPE" = "branch" ]; then + if [ "$REF_NAME" = "main" ]; then + VERSION="latest" + else + VERSION="${REF_NAME//\//-}" + fi + else + VERSION="latest" + fi + + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "platform=$PLATFORM" >> $GITHUB_OUTPUT + echo "package-name=nexent-offline-${PLATFORM}-${VERSION}" >> $GITHUB_OUTPUT + + - name: Build offline package + run: | + chmod +x scripts/offline/build_offline_package.sh + + ./scripts/offline/build_offline_package.sh \ + --version "${{ steps.set-vars.outputs.version }}" \ + --platform "${{ matrix.platform }}" \ + --output-dir ./offline-output \ + --include-source "${{ inputs.include_source }}" + + + + - name: Create ZIP package + run: | + PACKAGE_NAME="${{ steps.set-vars.outputs.package-name }}" + + cd offline-output + zip -r "../${PACKAGE_NAME}.zip" . + cd .. + + echo "Package created: ${PACKAGE_NAME}.zip" + + ls -lh "${PACKAGE_NAME}.zip" + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.set-vars.outputs.package-name }} + path: ${{ steps.set-vars.outputs.package-name }}.zip + retention-days: 30 + + - name: Summary + run: | + echo "" + echo "========================================" + echo "Offline Package Build Summary" + echo "========================================" + echo "Version: ${{ steps.set-vars.outputs.version }}" + echo "Platform: ${{ matrix.platform }}" + echo "Package: ${{ steps.set-vars.outputs.package-name }}.zip" + echo "Ref Type: ${{ github.ref_type }}" + echo "Ref Name: ${{ github.ref_name }}" + echo "========================================" + echo "" + echo "Package contents:" + unzip -l "${{ steps.set-vars.outputs.package-name }}.zip" | head -50 \ No newline at end of file diff --git a/.github/workflows/docker-deploy.yml b/.github/workflows/docker-deploy.yml index 9d04c8913..a77c2491f 100644 --- a/.github/workflows/docker-deploy.yml +++ b/.github/workflows/docker-deploy.yml @@ -38,7 +38,10 @@ jobs: - name: Check if model is cached locally id: check-model run: | - if [ -f ~/model-assets/clip-vit-base-patch32/config.json ] && [ -d ~/model-assets/nltk_data ]; then + if [ -f ~/model-assets/clip-vit-base-patch32/config.json ] && \ + [ -d ~/model-assets/nltk_data ] && \ + [ -d ~/model-assets/table-transformer-structure-recognition ] && \ + [ -d ~/model-assets/yolox ]; then echo "cache-hit=true" >> "$GITHUB_OUTPUT" cp -r ~/model-assets ./ else @@ -105,4 +108,4 @@ jobs: ./deploy.sh --mode 3 --is-mainland N --enable-terminal N --version 2 --root-dir "$HOME/nexent-production-data" else ./deploy.sh --mode 1 --is-mainland N --enable-terminal N --version 2 --root-dir "$HOME/nexent-development-data" - fi \ No newline at end of file + fi diff --git a/.gitignore b/.gitignore index 20de73e8a..ec5b3a3f9 100644 --- a/.gitignore +++ b/.gitignore @@ -19,9 +19,16 @@ docker/uploads docker/openssh-server docker/volumes/db/data docker/.env +docker/monitoring/monitoring.env docker/.run docker/deploy.options -k8s/helm/.deploy.options +k8s/helm/deploy.options +scripts/deployment/local-config.yaml +scripts/deployment/generated/ +docker/.env.generated +docker/docker-compose.generated.yml +k8s/helm/nexent/generated-values.yaml +k8s/helm/nexent/generated-secrets-values.yaml frontend_standalone/ .pnpm-store/ @@ -43,11 +50,15 @@ model-assets/ openspec/ logs/ +.agents/ .devspace/ devspace.yaml k8s/helm/**/*.tgz k8s/helm/nexent/Chart.lock MAC_DEVELOPMENT_GUIDE.md -# Mac本地开发数据持久化(无需提交) data/ +sdk/benchmark/.env +/docker/.env.bak + +.venv \ No newline at end of file diff --git a/README.md b/README.md index 51eb0927b..7983e6c6c 100644 --- a/README.md +++ b/README.md @@ -47,10 +47,13 @@ Quick and straightforward for most users. Prerequisites: Docker 24+ and Docker C ```bash git clone https://github.com/ModelEngine-Group/nexent.git cd nexent/docker -cp .env.example .env bash deploy.sh ``` +The Docker and Kubernetes deploy scripts share the same deployment configuration model. Interactive runs show Bash TUI menus for component selection, port policy, and image source. `infrastructure` is required; `application` is selected by default but can be disabled. Use `b`/Backspace to return to the previous TUI step and `q` to quit. Non-interactive runs can pass the same choices with `--components`, `--port-policy development|production`, and `--image-source general|mainland|local-latest`. Successful deployments save non-sensitive choices to each deploy directory's `deploy.options` for reuse on the next run. + +Docker uninstall is handled by `bash uninstall.sh`. It can preserve or delete data volumes: run it interactively, pass `--delete-volumes true|false`, or use `bash uninstall.sh delete-all` to remove containers and persistent data. + For detailed deployment instructions, see [Docker Installation](https://modelengine-group.github.io/nexent/en/quick-start/installation.html). ### Kubernetes Deployment (For Enterprise Production) @@ -60,9 +63,11 @@ Ideal for enterprise scenarios requiring high availability and elastic scaling. ```bash git clone https://github.com/ModelEngine-Group/nexent.git cd nexent/k8s/helm -./deploy-helm.sh apply +./deploy.sh ``` +Kubernetes uninstall is handled by `bash uninstall.sh`. It removes the Helm release first, then can optionally delete the namespace and local hostPath data. Use `--delete-namespace true|false`, `--delete-local-data true|false`, or `bash uninstall.sh delete-all`; pass `--keep-local-data` with `delete-all` to preserve local volume contents. + For detailed deployment instructions, see [Kubernetes Installation](https://modelengine-group.github.io/nexent/en/quick-start/kubernetes-installation.html). # ✨ Core Features diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py index 5a11b550b..be7f73142 100644 --- a/backend/agents/create_agent_info.py +++ b/backend/agents/create_agent_info.py @@ -1,4 +1,4 @@ -import threading +import threading import logging from typing import List, Optional from urllib.parse import urljoin @@ -21,7 +21,7 @@ from database.a2a_agent_db import PROTOCOL_JSONRPC from services.memory_config_service import build_memory_context -from services.image_service import get_vlm_model +from services.image_service import get_video_understanding_model, get_vlm_model from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list from database.agent_version_db import query_current_version_no from database.tool_db import search_tools_for_sub_agent @@ -31,13 +31,37 @@ from utils.model_name_utils import add_repo_to_name from utils.prompt_template_utils import get_agent_prompt_template from utils.config_utils import tenant_config_manager, get_model_name_from_config -from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE +from utils.context_utils import build_context_components +from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE, MINIO_DEFAULT_BUCKET from consts.exceptions import ValidationError logger = logging.getLogger("create_agent_info") logger.setLevel(logging.DEBUG) +def _build_internal_s3_url(file: dict) -> str: + """Build a valid S3 URL for internal tools from uploaded file metadata.""" + if not isinstance(file, dict): + return "" + + object_name = str(file.get("object_name") or "").strip().lstrip("/") + if object_name: + bucket = MINIO_DEFAULT_BUCKET or "nexent" + return f"s3://{bucket}/{object_name}" + + url = str(file.get("url") or "").strip() + if not url or url.startswith("blob:") or url.startswith("s3:/blob:"): + return "" + + if url.startswith("s3://"): + return url + + if url.startswith("s3:/"): + return "s3://" + url.replace("s3:/", "", 1).lstrip("/") + + return "s3:/" + url + + def _get_skills_for_template( agent_id: int, tenant_id: str, @@ -247,7 +271,9 @@ async def create_model_config_list(tenant_id): ), url=record["base_url"], ssl_verify=record.get("ssl_verify", True), - model_factory=record.get("model_factory"))) + model_factory=record.get("model_factory"), + timeout_seconds=record.get("timeout_seconds"), + concurrency_limit=record.get("concurrency_limit"))) # fit for old version, main_model and sub_model use default model main_model_config = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) @@ -258,7 +284,9 @@ async def create_model_config_list(tenant_id): "model_name") else "", url=main_model_config.get("base_url", ""), ssl_verify=main_model_config.get("ssl_verify", True), - model_factory=main_model_config.get("model_factory"))) + model_factory=main_model_config.get("model_factory"), + timeout_seconds=main_model_config.get("timeout_seconds"), + concurrency_limit=main_model_config.get("concurrency_limit"))) model_list.append( ModelConfig(cite_name="sub_model", api_key=main_model_config.get("api_key", ""), @@ -266,7 +294,9 @@ async def create_model_config_list(tenant_id): "model_name") else "", url=main_model_config.get("base_url", ""), ssl_verify=main_model_config.get("ssl_verify", True), - model_factory=main_model_config.get("model_factory"))) + model_factory=main_model_config.get("model_factory"), + timeout_seconds=main_model_config.get("timeout_seconds"), + concurrency_limit=main_model_config.get("concurrency_limit"))) return model_list @@ -383,6 +413,9 @@ async def create_agent_config( # Get skills list for prompt template skills = _get_skills_for_template(agent_id, tenant_id, version_no) + time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + is_manager = len(managed_agents) > 0 or len(external_a2a_agents) > 0 + render_kwargs = { "duty": duty_prompt, "constraint": constraint_prompt, @@ -395,11 +428,30 @@ async def create_agent_config( "APP_DESCRIPTION": app_description, "memory_list": memory_list, "knowledge_base_summary": knowledge_base_summary, - "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "time": time_str, "user_id": user_id, } system_prompt = Template(prompt_template["system_prompt"], undefined=StrictUndefined).render(render_kwargs) + context_components = build_context_components( + duty=duty_prompt, + constraint=constraint_prompt, + few_shots=few_shots_prompt, + app_name=app_name, + app_description=app_description, + time_str=time_str, + user_id=user_id, + language=language, + is_manager=is_manager, + tools=render_kwargs["tools"], + skills=skills, + managed_agents=render_kwargs["managed_agents"], + external_a2a_agents=render_kwargs["external_a2a_agents"], + memory_list=memory_list, + memory_search_query=last_user_query, + knowledge_base_summary=knowledge_base_summary, + ) + model_id_to_use = override_model_id if override_model_id else agent_info.get("model_id") model_max_tokens = 10000 if model_id_to_use is not None: @@ -425,12 +477,13 @@ async def create_agent_config( agent_id=agent_id ), tools=tool_list + _get_skill_script_tools(agent_id, tenant_id, version_no), - max_steps=agent_info.get("max_steps", 10), + max_steps=agent_info.get("max_steps", 15), model_name=model_name, provide_run_summary=agent_info.get("provide_run_summary", False), managed_agents=managed_agents, external_a2a_agents=external_a2a_agents, - context_manager_config=cm_config + context_manager_config=cm_config, + context_components=context_components, ) return agent_config @@ -469,6 +522,7 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int rerank = param_dict.get("rerank", False) rerank_model_name = param_dict.get("rerank_model_name", "") rerank_model = None + is_multimodal = bool(tool_config.params.pop("multimodal", False)) if rerank and rerank_model_name: rerank_model = get_rerank_model( tenant_id=tenant_id, model_name=rerank_model_name @@ -526,10 +580,17 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int } elif tool_config.class_name == "AnalyzeImageTool": tool_config.metadata = { + # get_vlm_model reads the first multimodal slot, now shown as image understanding. "vlm_model": get_vlm_model(tenant_id=tenant_id), "storage_client": minio_client, "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } + elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]: + tool_config.metadata = { + "vlm_model": get_video_understanding_model(tenant_id=tenant_id), + "storage_client": minio_client, + "validate_url_access": lambda urls: validate_urls_access(urls, user_id) + } tool_config_list.append(tool_config) @@ -630,10 +691,12 @@ async def join_minio_file_description_to_query( # Collect files from current message first (higher priority) if minio_files and isinstance(minio_files, list): for file in minio_files: - if isinstance(file, dict) and file.get("url") and file.get("name"): - url = file["url"] - if url not in seen_urls: - seen_urls.add(url) + if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")): + s3_url = _build_internal_s3_url(file) + if not s3_url: + continue + if s3_url not in seen_urls: + seen_urls.add(s3_url) all_files.append(file) # Collect files from historical messages (lower priority, already-deduped) @@ -641,10 +704,12 @@ async def join_minio_file_description_to_query( for msg in history: if isinstance(msg, dict) and msg.get("minio_files"): for file in msg["minio_files"]: - if isinstance(file, dict) and file.get("url") and file.get("name"): - url = file["url"] - if url not in seen_urls: - seen_urls.add(url) + if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")): + s3_url = _build_internal_s3_url(file) + if not s3_url: + continue + if s3_url not in seen_urls: + seen_urls.add(s3_url) all_files.append(file) # Enforce file count limit (keep most recent files by truncating from the end) @@ -660,7 +725,7 @@ async def join_minio_file_description_to_query( fixed_overhead = len(prefix) + len(suffix) for i, file in enumerate(all_files): - s3_url = f"s3:/{file['url']}" + s3_url = _build_internal_s3_url(file) presigned_url = file.get("presigned_url", "") # Build description with both URLs @@ -712,8 +777,10 @@ def _format_minio_files_for_content(minio_files: Optional[List[dict]], max_files if i >= max_files: file_lines.append(f" - ... (and {len(minio_files) - max_files} more files)") break - if isinstance(file, dict) and file.get("url") and file.get("name"): - s3_url = f"s3:/{file['url']}" + if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")): + s3_url = _build_internal_s3_url(file) + if not s3_url: + continue presigned_url = file.get("presigned_url", "") if presigned_url: file_lines.append( @@ -835,7 +902,7 @@ async def create_agent_run_info( # Filter MCP servers and tools, and build mcp_host with authorization used_mcp_urls = filter_mcp_servers_and_tools(agent_config, remote_mcp_dict) - # Build mcp_host list with authorization tokens + # Build mcp_host list with authorization tokens and custom headers mcp_host = [] for url in used_mcp_urls: # Find the MCP record for this URL @@ -850,10 +917,15 @@ async def create_agent_run_info( "url": url, "transport": "sse" if url.endswith("/sse") else "streamable-http" } - # Add authorization if present + headers = {} auth_token = mcp_record.get("authorization_token") if auth_token: - mcp_config["authorization"] = auth_token + headers["Authorization"] = auth_token + custom_headers = mcp_record.get("custom_headers") + if custom_headers and isinstance(custom_headers, dict): + headers.update(custom_headers) + if headers: + mcp_config["headers"] = headers mcp_host.append(mcp_config) else: # Fallback to string format if record not found diff --git a/backend/apps/agent_app.py b/backend/apps/agent_app.py index 86716d80a..e280ff422 100644 --- a/backend/apps/agent_app.py +++ b/backend/apps/agent_app.py @@ -1,12 +1,17 @@ +import json import logging from http import HTTPStatus from typing import Optional from fastapi import APIRouter, Body, Header, HTTPException, Request, Query from fastapi.encoders import jsonable_encoder -from starlette.responses import JSONResponse +from starlette.responses import JSONResponse, Response +from consts.const import ASSET_OWNER_TENANT_ID from consts.model import AgentRequest, AgentInfoRequest, AgentIDRequest, ConversationResponse, AgentImportRequest, AgentNameBatchCheckRequest, AgentNameBatchRegenerateRequest, VersionPublishRequest, VersionListResponse, VersionDetailResponse, VersionRollbackRequest, VersionStatusRequest, CurrentVersionResponse, VersionCompareRequest, VersionUpdateRequest +from consts.exceptions import SkillDuplicateError +from services.asset_owner_visibility import apply_agent_detail_prompt_visibility + from services.agent_service import ( get_agent_info_impl, get_creating_sub_agent_info_impl, @@ -22,6 +27,8 @@ get_agent_call_relationship_impl, clear_agent_new_mark_impl, get_agent_by_name_impl, + export_agent_with_skills_impl, + import_agent_with_skills_impl, ) from services.agent_version_service import ( publish_version_impl, @@ -38,9 +45,6 @@ ) from utils.auth_utils import get_current_user_info, get_current_user_id -# Import monitoring utilities -from utils.monitoring import monitoring_manager - agent_runtime_router = APIRouter(prefix="/agent") agent_config_router = APIRouter(prefix="/agent") logger = logging.getLogger("agent_app") @@ -48,7 +52,6 @@ # Define API route @agent_runtime_router.post("/run") -@monitoring_manager.monitor_endpoint("agent.run", exclude_params=["authorization"]) async def agent_run_api(agent_request: AgentRequest, http_request: Request, authorization: str = Header(None)): """ Agent execution API endpoint @@ -61,8 +64,11 @@ async def agent_run_api(agent_request: AgentRequest, http_request: Request, auth ) except Exception as e: logger.error(f"Agent run error: {str(e)}") + # Only expose actual error in debug mode for better diagnosis + # Keep generic message in normal mode for user experience + error_detail = str(e) if agent_request.is_debug else "Agent run error." raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Agent run error.") + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=error_detail) @agent_runtime_router.get("/stop/{conversation_id}") @@ -85,12 +91,14 @@ async def search_agent_info_api( """ Search agent info by agent_id and version_no version_no defaults to 0 (current/draft version) + Returns permission field indicating whether the user can edit this agent. """ try: - _, auth_tenant_id = get_current_user_id(authorization) + user_id, auth_tenant_id = get_current_user_id(authorization) # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id - return await get_agent_info_impl(agent_id, effective_tenant_id, version_no) + agent_info = await get_agent_info_impl(agent_id, effective_tenant_id, version_no, user_id) + return apply_agent_detail_prompt_visibility(auth_tenant_id, agent_info) except Exception as e: logger.error(f"Agent search info error: {str(e)}") raise HTTPException( @@ -157,7 +165,8 @@ async def delete_agent_api( Delete an agent """ try: - user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) + user_id, auth_tenant_id, _ = get_current_user_info( + authorization, http_request) # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id await delete_agent_impl(request.agent_id, effective_tenant_id, user_id) @@ -171,11 +180,24 @@ async def delete_agent_api( @agent_config_router.post("/export") async def export_agent_api(request: AgentIDRequest, authorization: Optional[str] = Header(None)): """ - export an agent + export an agent. + + Returns a ZIP file if the agent has skill instances, otherwise returns plain JSON. + The response Content-Type and body differ based on the agent's skill configuration. """ try: - agent_info_str = await export_agent_impl(request.agent_id, authorization) - return ConversationResponse(code=0, message="success", data=agent_info_str) + result = await export_agent_with_skills_impl(request.agent_id, authorization) + if isinstance(result, dict) and result.get("_zip"): + return Response( + content=result["data"], + media_type="application/zip", + headers={ + "Content-Disposition": f"attachment; filename=\"{result.get('filename', 'agent_export.zip')}\"" + } + ) + if isinstance(result, str): + result = json.loads(result) + return ConversationResponse(code=0, message="success", data=result) except Exception as e: logger.error(f"Agent export error: {str(e)}") raise HTTPException( @@ -185,15 +207,32 @@ async def export_agent_api(request: AgentIDRequest, authorization: Optional[str] @agent_config_router.post("/import") async def import_agent_api(request: AgentImportRequest, authorization: Optional[str] = Header(None)): """ - import an agent + import an agent. + + Accepts both plain JSON (agent without skills) and JSON with embedded skill ZIPs + (agent with skills). The skills field, if present, should contain base64-encoded + ZIP packages for each skill. """ try: - await import_agent_impl( - request.agent_info, - authorization, - force_import=request.force_import - ) + if request.skills: + await import_agent_with_skills_impl( + request.agent_info, + request.skills, + authorization, + force_import=request.force_import + ) + else: + await import_agent_impl( + request.agent_info, + authorization, + force_import=request.force_import + ) return {} + except SkillDuplicateError as exc: + raise HTTPException(status_code=409, detail={ + "type": "skill_duplicate", + "duplicate_skills": exc.duplicate_names + }) except Exception as e: logger.error(f"Agent import error: {str(e)}") raise HTTPException( @@ -256,10 +295,18 @@ async def list_all_agent_info_api( list all agent info """ try: - user_id, auth_tenant_id, _ = get_current_user_info(authorization, request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - return await list_all_agent_info_impl(tenant_id=effective_tenant_id, user_id=user_id) + user_id, tenant_id, _ = get_current_user_info( + authorization, request) + + agent_list = await list_all_agent_info_impl( + tenant_id=tenant_id, user_id=user_id + ) + if tenant_id != ASSET_OWNER_TENANT_ID: + asset_agent_list = await list_all_agent_info_impl( + tenant_id=ASSET_OWNER_TENANT_ID, user_id=user_id + ) + return agent_list + asset_agent_list + return agent_list except Exception as e: logger.error(f"Agent list error: {str(e)}") raise HTTPException( @@ -308,7 +355,8 @@ async def publish_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Publish version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Publish version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Publish version error.") @agent_config_router.post("/{agent_id}/versions/compare") @@ -333,7 +381,8 @@ async def compare_versions_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Compare versions error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Compare versions error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Compare versions error.") @agent_config_router.get("/{agent_id}/versions", response_model=VersionListResponse) @@ -344,14 +393,14 @@ async def get_version_list_api( authorization: Optional[str] = Header(None), request: Request = None ): - """ + """versions = session.query(AgentVersion) Get version list for an agent """ try: - user_id, auth_tenant_id, _ = get_current_user_info(authorization, request) + _, auth_tenant_id, _ = get_current_user_info( + authorization, request) # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id - logger.info(f"Get version list for agent_id: {agent_id}, tenant_id: {effective_tenant_id}") result = get_version_list_impl( agent_id=agent_id, tenant_id=effective_tenant_id, @@ -360,7 +409,8 @@ async def get_version_list_api( return JSONResponse(status_code=HTTPStatus.OK, content=jsonable_encoder(result)) except Exception as e: logger.error(f"Get version list error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version list error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version list error.") @agent_config_router.get("/{agent_id}/versions/{version_no}", response_model=VersionDetailResponse) @@ -384,7 +434,9 @@ async def get_version_api( raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: logger.error(f"Get version detail error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") + @agent_config_router.get("/{agent_id}/versions/{version_no}/detail", response_model=VersionDetailResponse) async def get_version_detail_api( @@ -407,7 +459,8 @@ async def get_version_detail_api( raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: logger.error(f"Get version detail error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.") @agent_config_router.post("/{agent_id}/versions/{version_no}/rollback") @@ -434,7 +487,8 @@ async def rollback_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Rollback version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Rollback version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Rollback version error.") @agent_config_router.patch("/{agent_id}/versions/{version_no}/status") @@ -461,7 +515,8 @@ async def update_version_status_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Update version status error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version status error.") + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Update version status error.") @agent_config_router.put("/{agent_id}/versions/{version_no}") @@ -489,7 +544,8 @@ async def update_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Update version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version error.") @agent_config_router.delete("/{agent_id}/versions/{version_no}") @@ -514,7 +570,8 @@ async def delete_version_api( raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logger.error(f"Delete version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Delete version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Delete version error.") @agent_config_router.get("/{agent_id}/current_version", response_model=CurrentVersionResponse) @@ -536,7 +593,8 @@ async def get_current_version_api( raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: logger.error(f"Get current version error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get current version error.") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get current version error.") @agent_config_router.get("/published_list") @@ -549,10 +607,17 @@ async def list_published_agents_api( """ try: user_id, tenant_id, _ = get_current_user_info(authorization, request) - return await list_published_agents_impl(tenant_id=tenant_id, user_id=user_id) + agent_list = await list_published_agents_impl( + tenant_id=tenant_id, user_id=user_id + ) + if tenant_id != ASSET_OWNER_TENANT_ID: + asset_agent_list = await list_published_agents_impl( + tenant_id=ASSET_OWNER_TENANT_ID, user_id=user_id + ) + return agent_list + asset_agent_list + return agent_list except Exception as e: logger.error(f"Published agents list error: {str(e)}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Published agents list error." ) - diff --git a/backend/apps/config_app.py b/backend/apps/config_app.py index 0cfc962ea..8cb383df7 100644 --- a/backend/apps/config_app.py +++ b/backend/apps/config_app.py @@ -16,6 +16,8 @@ from apps.model_managment_app import router as model_manager_router from apps.oauth_app import router as oauth_router from apps.prompt_app import router as prompt_router +from apps.prompt_template_app import router as prompt_template_router +from apps.mcp_management_app import router as mcp_management_router from apps.remote_mcp_app import router as remote_mcp_router from apps.skill_app import router as skill_router from apps.tenant_config_app import router as tenant_config_router @@ -29,7 +31,9 @@ from apps.a2a_client_app import router as a2a_client_router from apps.monitoring_app import router as monitoring_router from apps.a2a_server_app import router as a2a_server_router +from apps.haotian_app import router as haotian_router from consts.const import IS_SPEED_MODE +from services.prompt_template_service import sync_system_default_prompt_template # Create logger instance logger = logging.getLogger("base_app") @@ -37,6 +41,16 @@ # Create FastAPI app with common configurations app = create_app(title="Nexent Config API", description="Configuration APIs") + +@app.on_event("startup") +async def sync_default_prompt_template_on_startup(): + """Sync the YAML-backed system default prompt template into the database on startup.""" + try: + sync_system_default_prompt_template() + logger.info("System default prompt template synced successfully.") + except Exception as exc: + logger.error(f"Failed to sync system default prompt template: {str(exc)}") + app.include_router(model_manager_router) app.include_router(config_sync_router) app.include_router(agent_router) @@ -62,8 +76,10 @@ app.include_router(summary_router) app.include_router(prompt_router) +app.include_router(prompt_template_router) app.include_router(skill_router) app.include_router(tenant_config_router) +app.include_router(mcp_management_router) app.include_router(remote_mcp_router) app.include_router(tenant_router) app.include_router(group_router) @@ -71,3 +87,4 @@ app.include_router(invitation_router) app.include_router(a2a_client_router) app.include_router(a2a_server_router) +app.include_router(haotian_router) diff --git a/backend/apps/data_process_app.py b/backend/apps/data_process_app.py index 9138d5ef1..693eb987e 100644 --- a/backend/apps/data_process_app.py +++ b/backend/apps/data_process_app.py @@ -204,9 +204,14 @@ async def get_index_tasks(index_name: str): Returns tasks that are being processed or waiting to be processed """ + import time + start = time.time() try: - return await service.get_index_tasks(index_name) + result = await service.get_index_tasks(index_name) + logger.info(f"[get_index_tasks] index={index_name}, tasks={len(result)}, duration={time.time()-start:.3f}s") + return result except Exception as e: + logger.error(f"[get_index_tasks] error: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) diff --git a/backend/apps/file_management_app.py b/backend/apps/file_management_app.py index 578277b6d..427bde6f3 100644 --- a/backend/apps/file_management_app.py +++ b/backend/apps/file_management_app.py @@ -14,7 +14,8 @@ from consts.model import ProcessParams from services.file_management_service import upload_to_minio, upload_files_impl, \ get_file_url_impl, get_file_stream_impl, delete_file_impl, list_files_impl, \ - resolve_preview_file, get_preview_stream, check_file_access, check_file_access_batch + resolve_preview_file, get_preview_stream, check_file_access, check_file_access_batch, \ + resolve_minio_upload_folder from utils.auth_utils import get_current_user_id from utils.file_management_utils import trigger_data_process @@ -101,7 +102,9 @@ async def upload_files( detail="No files in the request") user_id, tenant_id = get_current_user_id(authorization) - errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl(destination, file, folder, index_name, user_id) + errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl( + destination, file, folder, index_name, user_id, uploader_tenant_id=tenant_id + ) if uploaded_file_paths: return JSONResponse( @@ -126,12 +129,13 @@ async def upload_files( @file_management_config_router.post("/process") async def process_files( - files: List[dict] = Body( - ..., description="List of file details to process, including path_or_url and filename"), - chunking_strategy: Optional[str] = Body("basic"), - index_name: str = Body(...), - destination: str = Body(...), - authorization: Optional[str] = Header(None) + files: Annotated[List[dict], Body( + ..., description="List of file details to process, including path_or_url and filename")], + index_name: Annotated[str, Body(...)], + destination: Annotated[str, Body(...)], + chunking_strategy: Annotated[Optional[str], Body(...)] = "basic", + model_id: Annotated[Optional[int], Body(...)] = None, + authorization: Annotated[Optional[str], Header()] = None ): """ Trigger data processing for a list of uploaded files. @@ -144,7 +148,8 @@ async def process_files( chunking_strategy=chunking_strategy, source_type=destination, index_name=index_name, - authorization=authorization + authorization=authorization, + model_id=model_id ) process_result = await trigger_data_process(files, process_params) @@ -199,7 +204,7 @@ async def get_storage_file( try: user_id, tenant_id = get_current_user_id(authorization) - if not check_file_access(object_name, user_id): + if not check_file_access(object_name, user_id, tenant_id): logger.warning(f"[get_storage_file] Access denied: object_name={object_name}, user_id={user_id}") raise HTTPException( status_code=HTTPStatus.FORBIDDEN, @@ -282,15 +287,8 @@ async def storage_upload_files( try: user_id, tenant_id = get_current_user_id(authorization) - if folder == "knowledge_base": - actual_folder = "knowledge_base" - else: - if user_id: - actual_folder = f"attachments/{user_id}" - else: - actual_folder = folder or "attachments" - - results = await upload_to_minio(files=files, folder=actual_folder, user_id=user_id) + actual_folder = resolve_minio_upload_folder(folder, user_id, tenant_id) + results = await upload_to_minio(files=files, folder=actual_folder) return { "message": f"Processed {len(results)} files", @@ -344,7 +342,7 @@ async def get_storage_files( if user_id: filtered_files = [ f for f in files - if f.get("key") and check_file_access(f.get("key"), user_id) + if f.get("key") and check_file_access(f.get("key"), user_id, tenant_id) ] else: filtered_files = [ @@ -592,7 +590,7 @@ async def remove_storage_file( try: user_id, tenant_id = get_current_user_id(authorization) - if not check_file_access(object_name, user_id): + if not check_file_access(object_name, user_id, tenant_id): logger.warning(f"[remove_storage_file] Access denied: object_name={object_name}, user_id={user_id}") raise HTTPException( status_code=HTTPStatus.FORBIDDEN, @@ -643,7 +641,7 @@ async def get_storage_file_batch_urls( results = [] for object_name in object_names: - if not check_file_access(object_name, user_id): + if not check_file_access(object_name, user_id, tenant_id): results.append({ "object_name": object_name, "success": False, @@ -693,6 +691,7 @@ async def preview_file( Access control: - knowledge_base/*: All authenticated users can access - attachments/{user_id}/*: Only the owner (user_id) can access + - attachments/asset_owner/{user_id}/*: ASSET_OWNER virtual tenant and owner only - **object_name**: File object name in storage - **filename**: Original filename for Content-Disposition header (optional) @@ -703,7 +702,7 @@ async def preview_file( try: user_id, tenant_id = get_current_user_id(authorization) - if not check_file_access(object_name, user_id): + if not check_file_access(object_name, user_id, tenant_id): logger.warning(f"[preview_file] Access denied: object_name={object_name}, user_id={user_id}") raise HTTPException( status_code=HTTPStatus.FORBIDDEN, diff --git a/backend/apps/invitation_app.py b/backend/apps/invitation_app.py index 2aa3edc9e..55bbac998 100644 --- a/backend/apps/invitation_app.py +++ b/backend/apps/invitation_app.py @@ -69,6 +69,12 @@ async def list_invitations_endpoint( status_code=HTTPStatus.UNAUTHORIZED, detail=str(exc) ) + except ValidationError as exc: + logger.warning(f"Invitation list rejected by feature flag: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=str(exc) + ) except Exception as exc: logger.error(f"Unexpected error retrieving invitation list: {str(exc)}") raise HTTPException( @@ -131,6 +137,12 @@ async def create_invitation_endpoint( status_code=HTTPStatus.BAD_REQUEST, detail=str(exc) ) + except ValidationError as exc: + logger.warning(f"Invitation creation rejected by feature flag: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=str(exc) + ) except DuplicateError as exc: logger.warning(f"Duplicate invitation code: {str(exc)}") raise HTTPException( diff --git a/backend/apps/mcp_management_app.py b/backend/apps/mcp_management_app.py new file mode 100644 index 000000000..cfb0c292a --- /dev/null +++ b/backend/apps/mcp_management_app.py @@ -0,0 +1,302 @@ +import logging +from typing import Optional + +from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request +from fastapi.responses import JSONResponse +from http import HTTPStatus + +from consts.exceptions import ( + MCPConnectionError, + McpNotFoundError, + McpValidationError, + UnauthorizedError, +) +from consts.model import ( + RegistryListQuery, + CommunityListRequest, + CommunityPublishRequest, + CommunityUpdateRequest, +) +from services.mcp_management_service import ( + list_community_mcp_services, + list_community_mcp_tag_stats, + list_my_community_mcp_services, + list_registry_mcp_services, + publish_community_mcp_service, + update_community_mcp_service, + delete_community_mcp_service, +) +from utils.auth_utils import get_current_user_info + +router = APIRouter(prefix="/mcp-tools") +logger = logging.getLogger("mcp_management_app") + + +# --------------------------------------------------------------------------- +# Registry Endpoints (MCP Registry - external service) +# --------------------------------------------------------------------------- + +@router.get("/registry/list") +async def list_registry_mcp_services_api( + query: RegistryListQuery = Depends(), + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + List MCP services from the official MCP Registry. + """ + try: + get_current_user_info(authorization, http_request) + + data = await list_registry_mcp_services( + search=query.search, + include_deleted=query.include_deleted, + updated_since=query.updated_since, + version=query.version, + cursor=query.cursor, + limit=query.limit, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content=data, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list MCP registry services: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list MCP registry services" + ) + + +# --------------------------------------------------------------------------- +# Community Endpoints +# --------------------------------------------------------------------------- + +@router.get("/community/list") +async def list_community_mcp_services_api( + query: CommunityListRequest = Depends(), + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + List public community MCP services. + """ + try: + get_current_user_info(authorization, http_request) + data = await list_community_mcp_services( + search=query.search, + tag=query.tag, + transport_type=query.transport_type, + cursor=query.cursor, + limit=query.limit, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": data}, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list MCP community services: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list MCP community services" + ) + + +@router.get("/community/tags/stats") +async def list_community_mcp_tag_stats_api( + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Get community MCP tag statistics. + """ + try: + get_current_user_info(authorization, http_request) + stats = list_community_mcp_tag_stats() + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": stats}, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list community MCP tag stats: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list community MCP tag stats" + ) + + +@router.post("/community/publish") +async def publish_community_mcp_service_api( + payload: CommunityPublishRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Publish a local MCP service to the community. + """ + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + community_id = await publish_community_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=payload.mcp_id, + name=payload.name, + description=payload.description, + version=payload.version, + tags=payload.tags, + mcp_server=payload.mcp_server, + config_json=payload.config_json, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"community_id": community_id}}, + ) + except McpNotFoundError as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except McpValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to publish MCP community service: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to publish MCP community service" + ) + + +@router.put("/community/update") +async def update_community_mcp_service_api( + payload: CommunityUpdateRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Update a community MCP service. + """ + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + await update_community_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + community_id=payload.community_id, + name=payload.name, + description=payload.description, + tags=payload.tags, + version=payload.version, + registry_json=payload.registry_json, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"}, + ) + except McpNotFoundError as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except McpValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to update MCP community service: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP community service" + ) + + +@router.delete("/community/delete") +async def delete_community_mcp_service_api( + community_id: int = Query(gt=0), + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + Delete a community MCP service. + """ + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + await delete_community_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + community_id=community_id, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"}, + ) + except McpNotFoundError as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to delete MCP community service: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to delete MCP community service" + ) + + +@router.get("/community/mine") +async def list_my_community_mcp_services_api( + authorization: Optional[str] = Header(None), + http_request: Request = None, +): + """ + List MCP services published by the current user to the community. + """ + try: + _, tenant_id, _ = get_current_user_info(authorization, http_request) + data = await list_my_community_mcp_services(tenant_id=tenant_id) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": data}, + ) + except UnauthorizedError as exc: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, + detail=str(exc), + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Failed to list my MCP community services: {exc}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to list my MCP community services" + ) diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py index 278b729e8..53dfebb02 100644 --- a/backend/apps/model_managment_app.py +++ b/backend/apps/model_managment_app.py @@ -33,7 +33,7 @@ from fastapi.responses import JSONResponse from fastapi.encoders import jsonable_encoder from http import HTTPStatus -from typing import List, Optional +from typing import Annotated, List, Optional from services.model_health_service import ( check_model_connectivity, verify_model_config_connectivity, @@ -264,6 +264,7 @@ async def get_model_list(authorization: Optional[str] = Header(None)): Returns each model enriched with repo-qualified `model_name` and a normalized `connect_status` value. """ + try: user_id, tenant_id = get_current_user_id(authorization) logger.debug( @@ -297,7 +298,8 @@ async def get_llm_model_list(authorization: Optional[str] = Header(None)): @router.post("/healthcheck") async def check_model_health( - display_name: str = Query(..., description="Display name to check"), + display_name: Annotated[str, Query(..., description="Display name to check")], + model_type: Annotated[str, Query(..., description="...")], authorization: Optional[str] = Header(None) ): """Check and update model connectivity, returning the latest status. @@ -308,7 +310,7 @@ async def check_model_health( """ try: _, tenant_id = get_current_user_id(authorization) - result = await check_model_connectivity(display_name, tenant_id) + result = await check_model_connectivity(display_name, tenant_id, model_type) return JSONResponse(status_code=HTTPStatus.OK, content={ "message": "Successfully checked model connectivity", "data": result diff --git a/backend/apps/monitoring_app.py b/backend/apps/monitoring_app.py index 310365293..f89f4312f 100644 --- a/backend/apps/monitoring_app.py +++ b/backend/apps/monitoring_app.py @@ -7,11 +7,16 @@ import logging from http import HTTPStatus -from typing import Annotated, Optional +from typing import Annotated, Any from fastapi import APIRouter, Header, HTTPException, Query from sqlalchemy import text +from consts.const import ( + ENABLE_TELEMETRY, + MONITORING_DASHBOARD_URL, + MONITORING_PROVIDER, +) from consts.model import ConversationResponse from database.client import get_monitoring_db_session from utils.auth_utils import get_current_user_id @@ -21,6 +26,25 @@ router = APIRouter(prefix="/monitoring") +def _normalize_monitoring_provider(value: str | None) -> str: + return str(value or "otlp").strip().lower() + + +def get_monitoring_status() -> dict[str, Any]: + """Return telemetry state and the monitoring UI entrypoint for frontend use.""" + telemetry_enabled = ENABLE_TELEMETRY + provider = _normalize_monitoring_provider(MONITORING_PROVIDER) + dashboard_url = MONITORING_DASHBOARD_URL.strip() or None + + return { + "telemetry_enabled": telemetry_enabled, + "provider": provider, + "dashboard_url": dashboard_url, + "dashboard_port": None, + "dashboard_path": None, + } + + def _compute_time_range_filter(time_range: str) -> str: """Convert time_range parameter to SQL timestamp condition.""" hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 24) @@ -28,12 +52,12 @@ def _compute_time_range_filter(time_range: str) -> str: def _query_model_metrics_from_db( - time_range: str, tenant_id: Optional[str] = None -) -> list[dict]: + time_range: str, tenant_id: str | None = None +) -> list[dict[str, Any]]: time_filter = _compute_time_range_filter(time_range) tenant_filter = "" - params = {} + params: dict[str, str] = {} if tenant_id: tenant_filter = "AND m.tenant_id = :tenant_id" params["tenant_id"] = tenant_id @@ -96,7 +120,7 @@ async def list_models_endpoint( page: Annotated[int, Query(ge=1, description="Page number")] = 1, page_size: Annotated[int, Query( ge=1, le=100, description="Items per page")] = 20, - authorization: Annotated[Optional[str], Header()] = None, + authorization: Annotated[str | None, Header()] = None, ): """List all models with aggregated monitoring metrics from database.""" try: @@ -113,3 +137,13 @@ async def list_models_endpoint( logger.error(f"Failed to list monitoring models: {str(e)}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) + + +@router.get("/status", response_model=ConversationResponse) +async def get_monitoring_status_endpoint(): + """Return whether monitoring UI should be shown in the frontend.""" + return ConversationResponse( + code=0, + message="success", + data=get_monitoring_status(), + ) diff --git a/backend/apps/northbound_base_app.py b/backend/apps/northbound_base_app.py index db303e00f..66d937b52 100644 --- a/backend/apps/northbound_base_app.py +++ b/backend/apps/northbound_base_app.py @@ -16,6 +16,7 @@ from apps.app_factory import create_app from .northbound_app import router as northbound_router +from .northbound_knowledge_app import router as northbound_knowledge_router class A2AServerSettings(BaseModel): @@ -49,6 +50,7 @@ class A2AServerSettings(BaseModel): ) northbound_app.include_router(northbound_router) +northbound_app.include_router(northbound_knowledge_router) # ============================================================================= diff --git a/backend/apps/northbound_knowledge_app.py b/backend/apps/northbound_knowledge_app.py new file mode 100644 index 000000000..775d6c567 --- /dev/null +++ b/backend/apps/northbound_knowledge_app.py @@ -0,0 +1,488 @@ +import base64 +import logging +from http import HTTPStatus +from typing import Optional, Dict, Any, List, Annotated + +from fastapi import APIRouter, Body, File, Form, Path, Path as PathParam, Query, Request, HTTPException, UploadFile +from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse + +from consts.const import ASSET_OWNER_TENANT_ID, VectorDatabaseType +from consts.exceptions import ( + LimitExceededError, + UnauthorizedError, +) +from consts.model import ProcessParams +from services.file_management_service import ( + upload_files_impl, + get_file_url_impl, + get_file_stream_impl, + check_file_access, +) +from services.northbound_service import NorthboundContext +from services.redis_service import get_redis_service +from services.vectordatabase_service import ElasticSearchService, get_vector_db_core +from utils.auth_utils import generate_session_jwt +from utils.file_management_utils import trigger_data_process + +from .file_management_app import build_content_disposition_header +from .northbound_app import _get_northbound_context + + +logger = logging.getLogger("northbound_knowledge_app") + +router = APIRouter(prefix="/nb/v1/knowledge", tags=["northbound"]) + +__all__ = ["router"] + +RATE_LIMIT_EXCEEDED_DETAIL = "Too Many Requests: rate limit exceeded" + + +async def _require_asset_owner_context(request: Request) -> NorthboundContext: + """Resolve northbound context and ensure the caller belongs to the asset-owner tenant.""" + ctx = await _get_northbound_context(request) + if ctx.tenant_id != ASSET_OWNER_TENANT_ID: + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="This endpoint is restricted to asset administrators.", + ) + return ctx + + +@router.get("/indices") +async def get_list_indices( + request: Request, + pattern: Annotated[str, Query(description="Pattern to match index names")] = "*", +): + """List knowledge bases visible to the asset-owner tenant. + + Restricted to asset administrators (same auth as create_new_index). + """ + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + return ElasticSearchService.list_indices( + pattern, True, ctx.tenant_id, ctx.user_id, vdb_core + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while listing knowledge bases") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error listing knowledge bases") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error listing knowledge bases") + + +@router.post("/indices/{index_name}") +async def create_new_index( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index to create")], + embedding_dim: Annotated[ + Optional[int], + Query(description="Dimension of the embedding vectors"), + ] = None, + body: Annotated[ + Optional[Dict[str, Any]], + Body( + description=( + "Request body with optional fields (ingroup_permission, group_ids, embedding_model_name)" + ), + ), + ] = None, +): + """Create a new vector index and store it in the knowledge table. + + Restricted to the asset-owner tenant: only callers whose access key resolves + to the asset-owner tenant are allowed to create knowledge bases through the + northbound API. + """ + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + + ingroup_permission = None + group_ids = None + embedding_model_name = None + if body: + ingroup_permission = body.get("ingroup_permission") + group_ids = body.get("group_ids") + embedding_model_name = body.get("embedding_model_name") + + return ElasticSearchService.create_knowledge_base( + knowledge_name=index_name, + embedding_dim=embedding_dim, + vdb_core=vdb_core, + user_id=ctx.user_id, + tenant_id=ctx.tenant_id, + ingroup_permission=ingroup_permission, + group_ids=group_ids, + embedding_model_name=embedding_model_name, + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while creating index") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error creating index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error creating index") + + +@router.delete("/indices/{index_name}") +async def delete_index( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index to delete")], +): + """Delete a knowledge base and all related data. + + Restricted to asset administrators (same auth as create_new_index). + """ + logger.debug("Received northbound request to delete knowledge base") + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + return await ElasticSearchService.full_delete_knowledge_base( + index_name, vdb_core, ctx.user_id + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while deleting index") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error deleting index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error deleting index") + + +@router.get("/indices/{index_name}/files") +async def get_index_files( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index")], +): + """Get all files from an index, including those that are not yet stored in ES. + + Restricted to asset administrators (same auth as get_list_indices). + """ + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + logger.debug( + "Listing files for index %s, tenant_id=%s, user_id=%s", + index_name, + ctx.tenant_id, + ctx.user_id, + ) + result = await ElasticSearchService.list_files( + index_name, include_chunks=False, vdb_core=vdb_core + ) + return { + "status": "success", + "files": result.get("files", []), + } + except LimitExceededError as e: + logger.exception("Rate limit exceeded while listing files") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error getting files for index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error getting index files") + + +@router.delete("/indices/{index_name}/documents") +async def delete_documents( + request: Request, + index_name: Annotated[str, Path(..., description="Name of the index")], + path_or_url: Annotated[str, Query(..., description="Path or URL of documents to delete")], +): + """Delete documents by path or URL and clean up related Redis records. + + Restricted to asset administrators (same auth as get_list_indices). + """ + try: + ctx = await _require_asset_owner_context(request) + vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH) + logger.debug("Deleting documents for index %s", index_name) + result = ElasticSearchService.delete_documents( + index_name, path_or_url, vdb_core) + + try: + redis_service = get_redis_service() + redis_cleanup_result = redis_service.delete_document_records( + index_name, path_or_url) + + result["redis_cleanup"] = redis_cleanup_result + + original_message = result.get( + "message", "Documents deleted successfully") + result["message"] = ( + f"{original_message}. " + f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records " + f"({redis_cleanup_result['celery_tasks_deleted']} tasks, " + f"{redis_cleanup_result['cache_keys_deleted']} cache keys)." + ) + + if redis_cleanup_result.get("errors"): + result["redis_warnings"] = redis_cleanup_result["errors"] + + except Exception as redis_error: + logger.warning( + "Redis cleanup failed for index %s: %s", + index_name, + redis_error, + ) + result["redis_cleanup_error"] = str(redis_error) + original_message = result.get( + "message", "Documents deleted successfully") + result["message"] = ( + f"{original_message}, but Redis cleanup encountered an error: " + f"{str(redis_error)}" + ) + + return result + except LimitExceededError as e: + logger.exception("Rate limit exceeded while deleting documents") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Error deleting documents for index") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Error deleting documents") + + +@router.post("/file/upload") +async def upload_files( + request: Request, + file: Annotated[List[UploadFile], File(..., alias="file")], + index_name: str = Form(..., description="Knowledge base index"), +): + """Upload files to MinIO and trigger knowledge base data processing. + + Uses chunking_strategy=basic. Restricted to asset administrators + (same auth as create_new_index). + """ + try: + ctx = await _require_asset_owner_context(request) + destination = "minio" + if not file: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="No files in the request", + ) + + errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl( + destination, file, None, index_name, ctx.user_id, uploader_tenant_id=ctx.tenant_id + ) + + if uploaded_file_paths: + files = [ + {"path_or_url": path, "filename": name} + for path, name in zip(uploaded_file_paths, uploaded_filenames) + ] + # Internal data-process / ES indexing expects JWT, not northbound API key + internal_jwt = generate_session_jwt(ctx.user_id) + process_params = ProcessParams( + chunking_strategy="basic", + source_type="minio", + index_name=index_name, + authorization=internal_jwt, + ) + process_result = await trigger_data_process(files, process_params) + + if process_result is None or ( + isinstance(process_result, dict) + and process_result.get("status") == "error" + ): + error_message = "Data process service failed" + if isinstance(process_result, dict) and "message" in process_result: + error_message = process_result["message"] + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=error_message, + ) + + return JSONResponse( + status_code=HTTPStatus.CREATED, + content={ + "message": ( + "Files uploaded and processing triggered successfully" + ), + "uploaded_filenames": uploaded_filenames, + "uploaded_file_paths": uploaded_file_paths, + "errors": errors, + "process_tasks": process_result, + }, + ) + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="No valid files uploaded", + ) + except LimitExceededError as e: + logger.exception("Rate limit exceeded while uploading files") + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("File upload error") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="File upload error.") + + +@router.get("/file/download/{object_name:path}") +async def get_storage_file( + request: Request, + object_name: str = PathParam(..., description="File object name"), + download: str = Query( + "ignore", + description=( + "How to get the file: " + "'ignore' (default, return file info), " + "'stream' (return file stream), " + "'redirect' (redirect to download URL), " + "'base64' (return base64-encoded content for images)." + ), + ), + expires: int = Query(86400, description="URL validity period (seconds)"), + filename: Optional[str] = Query( + None, description="Original filename for download (optional)"), +): + """Get file information, download link, or file stream. + + Restricted to asset administrators (same auth as create_new_index). + """ + try: + ctx = await _require_asset_owner_context(request) + + if not check_file_access(object_name, ctx.user_id, ctx.tenant_id): + logger.warning( + "[get_storage_file] Access denied: user_id=%s", + ctx.user_id, + ) + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="You don't have permission to access this file", + ) + + logger.info( + "[get_storage_file] download=%s", + download, + ) + if download == "redirect": + result = await get_file_url_impl( + object_name=object_name, expires=expires) + return RedirectResponse(url=result["url"]) + if download == "stream": + file_stream, content_type = await get_file_stream_impl( + object_name=object_name) + logger.info( + "Streaming file: object_name=%s, content_type=%s", + object_name, + content_type, + ) + + download_filename = filename + if not download_filename: + download_filename = ( + object_name.split("/")[-1] + if "/" in object_name + else object_name + ) + + content_disposition = build_content_disposition_header( + download_filename) + + return StreamingResponse( + file_stream, + media_type=content_type, + headers={ + "Content-Disposition": content_disposition, + "Cache-Control": "public, max-age=3600", + "ETag": f'"{object_name}"', + }, + ) + if download == "base64": + file_stream, content_type = await get_file_stream_impl( + object_name=object_name) + try: + data = file_stream.read() + except Exception as exc: + logger.error( + "Failed to read file stream for base64: %s", str(exc)) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to read file content for base64 encoding", + ) + + base64_content = base64.b64encode(data).decode("utf-8") + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "success": True, + "base64": base64_content, + "content_type": content_type, + "object_name": object_name, + }, + ) + return await get_file_url_impl( + object_name=object_name, expires=expires) + except LimitExceededError as e: + logger.error( + "%s: %s", + RATE_LIMIT_EXCEEDED_DETAIL, + str(e), + exc_info=e, + ) + raise HTTPException( + status_code=HTTPStatus.TOO_MANY_REQUESTS, + detail=RATE_LIMIT_EXCEEDED_DETAIL) + except UnauthorizedError as e: + raise HTTPException( + status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except HTTPException: + raise + except Exception: + logger.exception("Failed to get file") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to get file.") diff --git a/backend/apps/oauth_app.py b/backend/apps/oauth_app.py index bda69f935..f05102d0c 100644 --- a/backend/apps/oauth_app.py +++ b/backend/apps/oauth_app.py @@ -1,27 +1,36 @@ import logging -from fastapi import APIRouter, Header, HTTPException +from fastapi import APIRouter, Header, HTTPException, Request from fastapi.responses import JSONResponse, RedirectResponse from http import HTTPStatus from typing import Optional +from pydantic import ValidationError as PydanticValidationError + +from consts.model import OAuthCompleteRequest from consts.exceptions import OAuthLinkError, OAuthProviderError, UnauthorizedError from consts.oauth_providers import get_all_provider_definitions from database.oauth_account_db import get_oauth_account_by_provider from services.oauth_service import ( + complete_pending_oauth_account, create_or_update_oauth_account, ensure_user_tenant_exists, exchange_code_for_provider_token, + find_supabase_user_id_by_email, + generate_pending_oauth_token, get_authorize_url, get_enabled_providers, + get_pending_oauth_info, get_provider_user_info, list_linked_accounts, - unlink_account, parse_state, + parse_state, + unlink_account, ) from utils.auth_utils import ( calculate_expires_at, generate_session_jwt, - get_current_user_id, get_supabase_admin_client, + get_current_user_id, + get_supabase_admin_client, ) logger = logging.getLogger(__name__) @@ -142,44 +151,37 @@ async def callback( if existing_binding: supabase_user_id = existing_binding["user_id"] else: - # No binding found, search/create user by email in Supabase - admin_client = get_supabase_admin_client() - if not admin_client: - raise RuntimeError("Supabase admin client not available") - supabase_user_id = None - page = 1 - while True: - users_resp = admin_client.auth.admin.list_users( - page=page, per_page=100 + if email: + admin_client = get_supabase_admin_client() + if not admin_client: + raise RuntimeError("Supabase admin client not available") + supabase_user_id = find_supabase_user_id_by_email( + admin_client, + email, ) - users = users_resp if len(users_resp) > 0 else [] - if not users: - break - for u in users: - if u.email and u.email.lower() == email.lower(): - supabase_user_id = u.id - break - if supabase_user_id: - break - if len(users) < 100: - break - page += 1 if not supabase_user_id: - if not email: - email = f"{provider}_{provider_user_id}@oauth.nexent" - create_resp = admin_client.auth.admin.create_user( - { - "email": email, - "email_confirm": True, - "user_metadata": { - "full_name": username, + pending_token = generate_pending_oauth_token( + provider=provider, + provider_user_id=provider_user_id, + provider_email=email, + provider_username=username, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "OAuth account information required", + "data": { + "requires_account_completion": True, + "pending_token": pending_token, "provider": provider, + "provider_username": username, + "provider_email": email, + "email_required": not bool(email), }, - } + }, ) - supabase_user_id = create_resp.user.id ensure_user_tenant_exists(user_id=supabase_user_id, email=email) @@ -214,6 +216,18 @@ async def callback( }, ) + except OAuthLinkError as e: + logger.warning(f"OAuth callback link failed for provider={provider}: {e}") + return JSONResponse( + status_code=HTTPStatus.BAD_REQUEST, + content={ + "message": "OAuth account link failed", + "data": { + "oauth_error": "oauth_account_already_bound", + "oauth_error_description": "OAuth account is already bound to another user", + }, + }, + ) except Exception as e: logger.error(f"OAuth callback failed for provider={provider}: {e}") return JSONResponse( @@ -228,6 +242,67 @@ async def callback( ) +@router.get("/pending") +async def get_pending( + pending_token: Optional[str] = Header(None, alias="X-OAuth-Pending-Token"), +): + try: + pending = get_pending_oauth_info(pending_token or "") + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "success", "data": pending}, + ) + except OAuthLinkError as e: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e)) + except OAuthProviderError as e: + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) + except Exception as e: + logger.error(f"Failed to get pending OAuth info: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to get pending OAuth info", + ) + + +@router.post("/complete") +async def complete( + request: Request, + pending_token: Optional[str] = Header(None, alias="X-OAuth-Pending-Token"), +): + try: + request_data = OAuthCompleteRequest(**(await request.json())) + result = await complete_pending_oauth_account( + pending_token=pending_token or "", + email=str(request_data.email) if request_data.email else None, + password=request_data.password, + invite_code=request_data.invite_code, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "OAuth account completed", "data": result}, + ) + except OAuthLinkError as e: + status_code = ( + HTTPStatus.CONFLICT + if "Email already exists" in str(e) + else HTTPStatus.BAD_REQUEST + ) + raise HTTPException(status_code=status_code, detail=str(e)) + except PydanticValidationError as e: + raise HTTPException( + status_code=HTTPStatus.UNPROCESSABLE_ENTITY, + detail=e.errors(), + ) + except OAuthProviderError as e: + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e)) + except Exception as e: + logger.error(f"Failed to complete OAuth account: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to complete OAuth account", + ) + + @router.get("/accounts") async def get_accounts(authorization: Optional[str] = Header(None)): if not authorization: @@ -257,20 +332,7 @@ async def delete_account(provider: str, authorization: Optional[str] = Header(No try: user_id, _ = get_current_user_id(authorization) - - has_password_auth = False - - admin_client = get_supabase_admin_client() - if admin_client: - try: - user_resp = admin_client.auth.admin.get_user_by_id(user_id) - user_metadata = getattr(user_resp.user, "user_metadata", {}) or {} - signup_provider = user_metadata.get("provider", "email") - has_password_auth = signup_provider == "email" - except Exception as e: - logger.warning(f"Failed to check user identities for {user_id}: {e}") - - unlink_account(user_id, provider, has_password_auth=has_password_auth) + unlink_account(user_id, provider) return JSONResponse( status_code=HTTPStatus.OK, content={ diff --git a/backend/apps/prompt_app.py b/backend/apps/prompt_app.py index a9bd8d3a6..987729e69 100644 --- a/backend/apps/prompt_app.py +++ b/backend/apps/prompt_app.py @@ -1,11 +1,14 @@ import logging from http import HTTPStatus from typing import Optional -from fastapi import APIRouter, Header, HTTPException, Request -from fastapi.responses import StreamingResponse +from fastapi import APIRouter, Header, Request +from fastapi.responses import JSONResponse, StreamingResponse -from consts.model import GeneratePromptRequest -from services.prompt_service import gen_system_prompt_streamable +from consts.model import GeneratePromptRequest, OptimizePromptSectionRequest +from services.prompt_service import ( + gen_system_prompt_streamable, + optimize_prompt_section_impl, +) from utils.auth_utils import get_current_user_info router = APIRouter(prefix="/prompt") @@ -25,14 +28,50 @@ async def generate_and_save_system_prompt_api( agent_id=prompt_request.agent_id, model_id=prompt_request.model_id, task_description=prompt_request.task_description, + prompt_template_id=prompt_request.prompt_template_id, user_id=user_id, tenant_id=tenant_id, language=language, tool_ids=prompt_request.tool_ids, sub_agent_ids=prompt_request.sub_agent_ids, - knowledge_base_display_names=prompt_request.knowledge_base_display_names + knowledge_base_display_names=prompt_request.knowledge_base_display_names, + has_selected_resources=prompt_request.has_selected_resources, ), media_type="text/event-stream") except Exception as e: logger.exception(f"Error occurred while generating system prompt: {e}") - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Error occurred while generating system prompt.") + raise + + +@router.post("/optimize") +async def optimize_prompt_section_api( + optimize_request: OptimizePromptSectionRequest, + http_request: Request, + authorization: Optional[str] = Header(None) +): + try: + _, tenant_id, language = get_current_user_info( + authorization, http_request) + optimized_section = optimize_prompt_section_impl( + agent_id=optimize_request.agent_id, + model_id=optimize_request.model_id, + task_description=optimize_request.task_description, + tenant_id=tenant_id, + language=language, + section_type=optimize_request.section_type, + section_title=optimize_request.section_title, + current_content=optimize_request.current_content, + feedback=optimize_request.feedback, + tool_ids=optimize_request.tool_ids, + sub_agent_ids=optimize_request.sub_agent_ids, + knowledge_base_display_names=optimize_request.knowledge_base_display_names, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "Prompt section optimized successfully", + "data": optimized_section, + } + ) + except Exception as exc: + logger.exception(f"Error occurred while optimizing prompt section: {exc}") + raise diff --git a/backend/apps/prompt_template_app.py b/backend/apps/prompt_template_app.py new file mode 100644 index 000000000..0f12bd614 --- /dev/null +++ b/backend/apps/prompt_template_app.py @@ -0,0 +1,143 @@ +import logging +from http import HTTPStatus +from typing import Optional + +from fastapi import APIRouter, Header, HTTPException +from starlette.responses import JSONResponse + +from consts.exceptions import DuplicateError, NotFoundException, ValidationError +from consts.model import PromptTemplateRequest +from services.prompt_template_service import ( + create_prompt_template_impl, + delete_prompt_template_impl, + get_prompt_template_detail_impl, + list_prompt_templates_impl, + update_prompt_template_impl, +) +from utils.auth_utils import get_current_user_id + +router = APIRouter(prefix="/prompt_templates") +logger = logging.getLogger("prompt_template_app") + + +@router.get("") +async def list_prompt_templates_api( + authorization: Optional[str] = Header(None), +): + """List prompt templates for the current user.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = list_prompt_templates_impl(tenant_id=tenant_id, user_id=user_id) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except Exception as exc: + logger.error(f"Prompt template list error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template list error.", + ) + + +@router.get("/{template_id}") +async def get_prompt_template_api( + template_id: int, + authorization: Optional[str] = Header(None), +): + """Get prompt template detail.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = get_prompt_template_detail_impl( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except NotFoundException as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template detail error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template detail error.", + ) + + +@router.post("") +async def create_prompt_template_api( + request: PromptTemplateRequest, + authorization: Optional[str] = Header(None), +): + """Create a prompt template.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = create_prompt_template_impl( + request=request, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except DuplicateError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except ValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template create error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template create error.", + ) + + +@router.put("/{template_id}") +async def update_prompt_template_api( + template_id: int, + request: PromptTemplateRequest, + authorization: Optional[str] = Header(None), +): + """Update a prompt template.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = update_prompt_template_impl( + template_id=template_id, + request=request, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except NotFoundException as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except DuplicateError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except ValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template update error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template update error.", + ) + + +@router.delete("/{template_id}") +async def delete_prompt_template_api( + template_id: int, + authorization: Optional[str] = Header(None), +): + """Delete a prompt template.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + result = delete_prompt_template_impl( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + ) + return JSONResponse(status_code=HTTPStatus.OK, content=result) + except NotFoundException as exc: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc)) + except ValidationError as exc: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + except Exception as exc: + logger.error(f"Prompt template delete error: {str(exc)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Prompt template delete error.", + ) diff --git a/backend/apps/remote_mcp_app.py b/backend/apps/remote_mcp_app.py index 0dd6127fd..3993e24ce 100644 --- a/backend/apps/remote_mcp_app.py +++ b/backend/apps/remote_mcp_app.py @@ -6,12 +6,27 @@ from fastapi.responses import JSONResponse, StreamingResponse from http import HTTPStatus -from consts.const import NEXENT_MCP_DOCKER_IMAGE, ENABLE_UPLOAD_IMAGE -from consts.exceptions import MCPConnectionError, MCPNameIllegal, MCPContainerError -from consts.model import MCPConfigRequest, MCPUpdateRequest +from consts.const import ENABLE_UPLOAD_IMAGE +from consts.exceptions import ( + MCPConnectionError, + MCPNameIllegal, + MCPContainerError, + McpNotFoundError, + McpValidationError, + McpNameConflictError, + McpPortConflictError, +) +from consts.model import ( + MCPConfigRequest, + AddMcpServiceRequest, + AddContainerMcpServiceRequest, + UpdateMcpServiceRequest, + EnableMcpServiceRequest, + DisableMcpServiceRequest, + HealthcheckMcpServiceRequest, + ListMcpServicesQuery, +) from services.remote_mcp_service import ( - add_remote_mcp_server_list, - delete_remote_mcp_server_list, get_remote_mcp_server_list, check_mcp_health_and_update_db, delete_mcp_by_container_id, @@ -19,8 +34,16 @@ update_remote_mcp_server_list, attach_mcp_container_permissions, get_mcp_record_by_id, + list_mcp_service_tools_by_id, + add_mcp_service, + add_container_mcp_service, + update_mcp_service, + update_mcp_service_enabled, + delete_mcp_service, + check_mcp_service_health, + check_container_port_conflict, + suggest_container_port, ) -from database.remote_mcp_db import check_mcp_name_exists from services.tool_configuration_service import get_tool_from_remote_mcp_server from services.mcp_container_service import MCPContainerManager from utils.auth_utils import get_current_user_info @@ -29,454 +52,388 @@ logger = logging.getLogger("remote_mcp_app") -@router.post("/tools") -async def get_tools_from_remote_mcp( - service_name: str, - mcp_url: str, +# --------------------------------------------------------------------------- +# Tools Endpoint +# --------------------------------------------------------------------------- + +@router.get("/tools") +async def get_tools_from_mcp( + mcp_id: int = Query(..., description="MCP service ID"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to list tool information from the remote MCP server """ + """ + Get tools from MCP server by MCP ID. + """ try: - _, tenant_id, _ = get_current_user_info( - authorization, http_request) - tools_info = await get_tool_from_remote_mcp_server( - mcp_server_name=service_name, - remote_mcp_server=mcp_url, - tenant_id=tenant_id + _, tenant_id, _ = get_current_user_info(authorization, http_request) + + tools_info = await list_mcp_service_tools_by_id( + tenant_id=tenant_id, + mcp_id=mcp_id, ) + return JSONResponse( status_code=HTTPStatus.OK, content={ - "tools": [tool.__dict__ for tool in tools_info], "status": "success"} + "tools": [t.model_dump() if hasattr(t, 'model_dump') else t for t in tools_info], + "status": "success" + } ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except MCPConnectionError as e: - logger.error(f"Failed to get tools from remote MCP server: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="MCP connection failed") + logger.error(f"Failed to get tools from MCP server: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="MCP connection failed" + ) except Exception as e: - logger.error(f"get tools from remote MCP server failed, error: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to get tools from remote MCP server.") + logger.error(f"get tools from MCP server failed, error: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to get tools from MCP server." + ) +# --------------------------------------------------------------------------- +# Add Endpoints +# --------------------------------------------------------------------------- + @router.post("/add") -async def add_remote_proxies( - mcp_url: str, - service_name: str, - authorization_token: Optional[str] = Query( - None, description="Authorization token for MCP server authentication (e.g., Bearer token)"), - tenant_id: Optional[str] = Query( - None, description="Tenant ID for filtering (uses auth if not provided)"), +async def add_mcp_service_endpoint( + payload: AddMcpServiceRequest, authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to add a remote MCP server """ + """ + Add an MCP service. + Supports both remote MCP (URL-based) and local MCP (record-based). + """ try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - await add_remote_mcp_server_list(tenant_id=effective_tenant_id, - user_id=user_id, - remote_mcp_server=mcp_url, - remote_mcp_server_name=service_name, - container_id=None, - authorization_token=authorization_token) + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + await add_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + name=payload.name, + description=payload.description, + source=payload.source.value if hasattr(payload.source, 'value') else payload.source, + server_url=payload.server_url, + tags=payload.tags, + authorization_token=payload.authorization_token, + custom_headers=payload.custom_headers, + container_config=payload.container_config, + registry_json=payload.registry_json, + enabled=payload.enabled if payload.enabled is not None else False, + ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"message": "Successfully added remote MCP proxy", - "status": "success"} + content={"message": "Successfully added MCP service", "status": "success"} ) except MCPNameIllegal as e: - logger.error(f"Failed to add remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.CONFLICT, - detail="MCP name already exists") + logger.error(f"Failed to add MCP service: {e}") + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail="MCP name already exists") except MCPConnectionError as e: - logger.error(f"Failed to add remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="MCP connection failed") + logger.error(f"Failed to add MCP service: {e}") + raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="MCP connection failed") + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: - logger.error(f"Failed to add remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to add remote MCP proxy") + logger.error(f"Failed to add MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to add MCP service" + ) -@router.delete("") -async def delete_remote_proxies( - service_name: str, - mcp_url: str, - tenant_id: Optional[str] = Query( - None, description="Tenant ID for filtering (uses auth if not provided)"), +@router.post("/add-from-config") +async def add_container_mcp_service_endpoint( + payload: AddContainerMcpServiceRequest, authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to delete a remote MCP server """ + """ + Add a container-based MCP service with full configuration. + Endpoint path is kept as /add-from-config for backward compatibility. + """ try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - await delete_remote_mcp_server_list(tenant_id=effective_tenant_id, - user_id=user_id, - remote_mcp_server=mcp_url, - remote_mcp_server_name=service_name) + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + container_info = await add_container_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + name=payload.name, + description=payload.description, + source=payload.source.value if hasattr(payload.source, 'value') else payload.source, + tags=payload.tags, + authorization_token=payload.authorization_token, + registry_json=payload.registry_json, + port=payload.port, + mcp_config=payload.mcp_config, + ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"message": "Successfully deleted remote MCP proxy", - "status": "success"} + content={ + "status": "success", + "data": { + "service_name": container_info.get("service_name"), + "mcp_url": container_info.get("mcp_url"), + "container_id": container_info.get("container_id"), + "container_name": container_info.get("container_name"), + "host_port": container_info.get("host_port"), + }, + }, + ) + + except McpNameConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpPortConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except MCPContainerError as e: + logger.error(f"Failed to start MCP container service: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="Docker service unavailable" + ) + except MCPConnectionError as e: + logger.error(f"MCP connection failed when adding container service: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="MCP connection failed" ) except Exception as e: - logger.error(f"Failed to delete remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to delete remote MCP proxy") + logger.error(f"Failed to add container MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to add container MCP service" + ) + +# --------------------------------------------------------------------------- +# Update Endpoint +# --------------------------------------------------------------------------- @router.put("/update") -async def update_remote_proxy( - update_data: MCPUpdateRequest, +async def update_mcp_service_endpoint( + payload: UpdateMcpServiceRequest, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to update an existing remote MCP server """ + """Update an existing MCP service by ID.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - await update_remote_mcp_server_list( - update_data=update_data, + + update_mcp_service( tenant_id=effective_tenant_id, - user_id=user_id + user_id=user_id, + mcp_id=payload.mcp_id, + new_name=payload.name, + description=payload.description, + server_url=payload.server_url, + authorization_token=payload.authorization_token, + custom_headers=payload.custom_headers, + tags=payload.tags, ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"message": "Successfully updated remote MCP proxy", - "status": "success"} + content={"message": "Successfully updated MCP service", "status": "success"} ) - except MCPNameIllegal as e: - logger.error(f"Failed to update remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.CONFLICT, - detail=str(e)) - except MCPConnectionError as e: - logger.error(f"Failed to update remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail=str(e)) + + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: - logger.error(f"Failed to update remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to update remote MCP proxy") + logger.error(f"Failed to update MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP service" + ) -@router.get("/list") -async def get_remote_proxies( +# --------------------------------------------------------------------------- +# Delete Endpoints +# --------------------------------------------------------------------------- + +@router.delete("/{mcp_id}") +async def delete_mcp_by_id( + mcp_id: int, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Used to get the list of remote MCP servers """ + """Delete MCP service by ID.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - remote_mcp_server_list = await get_remote_mcp_server_list( + + await delete_mcp_service( tenant_id=effective_tenant_id, user_id=user_id, - is_need_auth=False + mcp_id=mcp_id ) + return JSONResponse( status_code=HTTPStatus.OK, - content={"remote_mcp_server_list": remote_mcp_server_list, - "enable_upload_image": ENABLE_UPLOAD_IMAGE, - "status": "success"} + content={"message": "Successfully deleted MCP service", "status": "success"} ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) except Exception as e: - logger.error(f"Failed to get remote MCP proxy: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to get remote MCP proxy") + logger.error(f"Failed to delete MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to delete MCP service" + ) -@router.get("/record/{mcp_id}") -async def get_mcp_record( - mcp_id: int, +@router.delete("/container/{container_id}") +async def stop_mcp_container( + container_id: str, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Get single MCP record by ID """ + """Stop and remove MCP container.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - mcp_record = await get_mcp_record_by_id( - mcp_id=mcp_id, - tenant_id=effective_tenant_id - ) - - if not mcp_record: + try: + container_manager = MCPContainerManager() + except MCPContainerError as e: + logger.error(f"Failed to initialize container manager: {e}") raise HTTPException( - status_code=HTTPStatus.NOT_FOUND, - detail="MCP record not found" + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="Docker service unavailable" ) - return JSONResponse( - status_code=HTTPStatus.OK, - content={ - "mcp_name": mcp_record.get("mcp_name"), - "mcp_server": mcp_record.get("mcp_server"), - "authorization_token": mcp_record.get("authorization_token"), - "status": "success" - } - ) + success = await container_manager.stop_mcp_container(container_id) + + if success: + await delete_mcp_by_container_id( + tenant_id=effective_tenant_id, + user_id=user_id, + container_id=container_id, + ) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "message": "Container and MCP service stopped successfully", + "status": "success", + }, + ) + else: + return JSONResponse( + status_code=HTTPStatus.NOT_FOUND, + content={"message": "Container not found", "status": "error"}, + ) except HTTPException: raise except Exception as e: - logger.error(f"Failed to get MCP record: {e}") + logger.error(f"Failed to stop container: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to get MCP record" + detail=f"Failed to stop container: {str(e)}" ) -@router.get("/healthcheck") -async def check_mcp_health( - mcp_url: str, - service_name: str, - tenant_id: Optional[str] = Query( - None, description="Tenant ID for filtering (uses auth if not provided)"), - authorization: Optional[str] = Header(None), - http_request: Request = None -): - """ Used to check the health of the MCP server, the front end can call it, - and automatically update the database status """ - try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - await check_mcp_health_and_update_db(mcp_url, service_name, effective_tenant_id, user_id) - return JSONResponse( - status_code=HTTPStatus.OK, - content={"status": "success"} - ) - except MCPConnectionError as e: - logger.error(f"MCP connection failed: {e}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="MCP connection failed") - except Exception as e: - logger.error(f"Failed to check the health of the MCP server: {e}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to check the health of the MCP server") +# --------------------------------------------------------------------------- +# List Endpoints +# --------------------------------------------------------------------------- - -@router.post("/add-from-config") -async def add_mcp_from_config( - mcp_config: MCPConfigRequest, +@router.get("/list") +async def get_mcp_list( tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): """ - Add MCP server by starting a container with command+args config. - Similar to Cursor's MCP server configuration format. - - Example request: - { - "mcpServers": { - "12306-mcp": { - "command": "npx", - "args": ["-y", "12306-mcp"], - "env": {"NODE_ENV": "production"} - } - } - } + Get list of MCP services. + Returns remote MCP list with full details including container_id, description, + enabled, source, update_time, tags, container_port, registry_json, config_json, + container_status, and authorization_token. """ try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - # Initialize container manager - try: - container_manager = MCPContainerManager() - except MCPContainerError as e: - logger.error(f"Failed to initialize container manager: {e}") - raise HTTPException( - status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="Docker service unavailable. Please ensure Docker socket is mounted." - ) - - results = [] - errors = [] - - for service_name, config in mcp_config.mcpServers.items(): - try: - command = config.command - args = config.args or [] - env_vars = config.env or {} - port = config.port - - if not command: - errors.append(f"{service_name}: command is required") - continue - - if port is None: - errors.append(f"{service_name}: port is required") - continue - - # Check if MCP service name already exists before starting container - if check_mcp_name_exists(mcp_name=service_name, tenant_id=effective_tenant_id): - errors.append(f"{service_name}: MCP name already exists") - continue - - # Build full command to run inside nexent/nexent-mcp image - full_command = [ - "python", - "-m", - "mcp_proxy", - "--host", - "0.0.0.0", - "--port", - str(port), - "--transport", - "streamablehttp", - "--", - command, - *args, - ] - - # Start container - container_info = await container_manager.start_mcp_container( - service_name=service_name, - tenant_id=effective_tenant_id, - user_id=user_id, - env_vars=env_vars, - host_port=port, - image=config.image or NEXENT_MCP_DOCKER_IMAGE, - full_command=full_command, - ) - - # Register to remote MCP server list - await add_remote_mcp_server_list( - tenant_id=effective_tenant_id, - user_id=user_id, - remote_mcp_server=container_info["mcp_url"], - remote_mcp_server_name=service_name, - container_id=container_info["container_id"], - ) - - results.append({ - "service_name": service_name, - "status": "success", - "mcp_url": container_info["mcp_url"], - "container_id": container_info["container_id"], - "container_name": container_info.get("container_name"), - "host_port": container_info.get("host_port") - }) - - except MCPContainerError as e: - logger.error( - f"Failed to start MCP container {service_name}: {e}") - error_str = str(e) - # Check if error is related to image not found - if "not found" in error_str.lower() or "404" in error_str: - errors.append( - f"{service_name}: Image not found - MCP service startup image is missing") - else: - errors.append(f"{service_name}: {error_str}") - except Exception as e: - logger.error( - f"Unexpected error adding MCP {service_name}: {e}") - errors.append(f"{service_name}: {str(e)}") - - if errors and not results: - raise HTTPException( - status_code=HTTPStatus.BAD_REQUEST, - detail=f"All MCP servers failed: {errors}" - ) + remote_mcp_list = await get_remote_mcp_server_list( + tenant_id=effective_tenant_id, + user_id=user_id, + is_need_auth=True + ) return JSONResponse( status_code=HTTPStatus.OK, content={ - "message": "MCP servers processed", - "results": results, - "errors": errors if errors else None, + "remote_mcp_server_list": remote_mcp_list, + "enable_upload_image": ENABLE_UPLOAD_IMAGE, "status": "success" } ) - - except HTTPException: - raise except Exception as e: - logger.error(f"Failed to add MCP from config: {e}") + logger.error(f"Failed to get MCP list: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=f"Failed to add MCP servers: {str(e)}" + detail="Failed to get MCP list" ) -@router.delete("/container/{container_id}") -async def stop_mcp_container( - container_id: str, +@router.get("/record/{mcp_id}") +async def get_mcp_record( + mcp_id: int, tenant_id: Optional[str] = Query( None, description="Tenant ID for filtering (uses auth if not provided)"), authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Stop and remove MCP container """ + """Get single MCP record by ID.""" try: - user_id, auth_tenant_id, _ = get_current_user_info( - authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id + user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request) effective_tenant_id = tenant_id or auth_tenant_id - try: - container_manager = MCPContainerManager() - except MCPContainerError as e: - logger.error(f"Failed to initialize container manager: {e}") - raise HTTPException( - status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail="Docker service unavailable" - ) - - success = await container_manager.stop_mcp_container(container_id) + mcp_record = await get_mcp_record_by_id( + mcp_id=mcp_id, + tenant_id=effective_tenant_id + ) - if success: - # Soft delete the corresponding MCP record (if any) by container ID - await delete_mcp_by_container_id( - tenant_id=effective_tenant_id, - user_id=user_id, - container_id=container_id, - ) - return JSONResponse( - status_code=HTTPStatus.OK, - content={ - "message": "Container and MCP service stopped successfully", - "status": "success", - }, - ) - else: - return JSONResponse( + if not mcp_record: + raise HTTPException( status_code=HTTPStatus.NOT_FOUND, - content={"message": "Container not found", "status": "error"}, + detail="MCP record not found" ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "mcp_name": mcp_record.get("mcp_name"), + "mcp_server": mcp_record.get("mcp_server"), + "authorization_token": mcp_record.get("authorization_token"), + "custom_headers": mcp_record.get("custom_headers"), + "status": "success" + } + ) except HTTPException: raise except Exception as e: - logger.error(f"Failed to stop container: {e}") + logger.error(f"Failed to get MCP record: {e}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=f"Failed to stop container: {str(e)}" + detail="Failed to get MCP record" ) @@ -487,11 +444,10 @@ async def list_mcp_containers( authorization: Optional[str] = Header(None), http_request: Request = None ): - """ List all MCP containers for the current tenant """ + """List all MCP containers for the current tenant.""" try: user_id, auth_tenant_id, _ = get_current_user_info( authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id try: @@ -539,11 +495,10 @@ async def get_container_logs( authorization: Optional[str] = Header(None), http_request: Request = None ): - """ Get logs from MCP container via SSE stream """ + """Get logs from MCP container via SSE stream.""" try: user_id, auth_tenant_id, _ = get_current_user_info( authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id try: @@ -556,12 +511,11 @@ async def get_container_logs( ) async def generate_log_stream(): - """Generate SSE stream of container logs""" + """Generate SSE stream of container logs.""" try: async for log_line in container_manager.stream_container_logs( container_id, tail=tail, follow=follow ): - # Format as SSE: data: {json}\n\n payload = json.dumps( {"logs": log_line, "status": "success"}, ensure_ascii=False @@ -597,7 +551,185 @@ async def generate_log_stream(): ) -# Conditionally add upload-image route based on ENABLE_UPLOAD_IMAGE setting +@router.get("/healthcheck") +async def check_mcp_health( + mcp_id: int = Query(..., description="MCP service ID"), + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Check MCP service health by ID.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + health_status = await check_mcp_service_health( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=mcp_id, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"health_status": health_status}} + ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except MCPConnectionError as e: + logger.error(f"MCP connection failed: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail=str(e) or "MCP connection failed" + ) + except Exception as e: + logger.error(f"Failed to check MCP health: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to check MCP health" + ) + + +# --------------------------------------------------------------------------- +# Port Management Endpoints +# --------------------------------------------------------------------------- + +@router.get("/port/check") +async def check_mcp_port( + port: int = Query(..., ge=1, le=65535), + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Check if a port is available for MCP container.""" + try: + get_current_user_info(authorization, http_request) + available = check_container_port_conflict(port=port) + no_cache_headers = { + "Cache-Control": "no-cache, no-store, must-revalidate", + "Pragma": "no-cache", + "Expires": "0", + } + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"available": available}}, + headers=no_cache_headers + ) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"Failed to check MCP port: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to check MCP port" + ) + + +@router.get("/port/suggest") +async def suggest_mcp_port( + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Suggest an available port for MCP container.""" + try: + get_current_user_info(authorization, http_request) + port = suggest_container_port() + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": {"port": port}} + ) + except McpPortConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except Exception as e: + logger.error(f"Failed to suggest MCP port: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to suggest MCP port" + ) + + +# --------------------------------------------------------------------------- +# Enable/Disable Endpoints +# --------------------------------------------------------------------------- + +@router.post("/enable") +async def enable_mcp_service( + payload: EnableMcpServiceRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Enable an MCP service by ID.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + await update_mcp_service_enabled( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=payload.mcp_id, + enabled=True, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"} + ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpNameConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpPortConflictError as e: + raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except MCPConnectionError as e: + logger.error(f"MCP connection failed while enabling service: {e}") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail="MCP connection failed" + ) + except Exception as e: + logger.error(f"Failed to enable MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP service status" + ) + + +@router.post("/disable") +async def disable_mcp_service( + payload: DisableMcpServiceRequest, + authorization: Optional[str] = Header(None), + http_request: Request = None +): + """Disable an MCP service by ID.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + await update_mcp_service_enabled( + tenant_id=tenant_id, + user_id=user_id, + mcp_id=payload.mcp_id, + enabled=False, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success"} + ) + except McpNotFoundError as e: + raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e)) + except McpValidationError as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.error(f"Failed to disable MCP service: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update MCP service status" + ) + + +# --------------------------------------------------------------------------- +# Image Upload Endpoint +# --------------------------------------------------------------------------- + if ENABLE_UPLOAD_IMAGE: @router.post("/upload-image") async def upload_mcp_image( @@ -621,13 +753,10 @@ async def upload_mcp_image( try: user_id, auth_tenant_id, _ = get_current_user_info( authorization, http_request) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id effective_tenant_id = tenant_id or auth_tenant_id - # Read file content content = await file.read() - # Call service layer to handle the business logic result = await upload_and_start_mcp_image( tenant_id=effective_tenant_id, user_id=user_id, diff --git a/backend/apps/skill_app.py b/backend/apps/skill_app.py index 510a0e481..a2a3b38cf 100644 --- a/backend/apps/skill_app.py +++ b/backend/apps/skill_app.py @@ -1,10 +1,12 @@ """Skill management HTTP endpoints.""" +from nexent.core.agents.agent_model import ModelConfig import logging from typing import Any, Dict, List, Optional from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form, Header from starlette.responses import JSONResponse, StreamingResponse +from http import HTTPStatus from pydantic import BaseModel, Field from consts.const import APP_VERSION, STREAMABLE_CONTENT_TYPES @@ -13,10 +15,14 @@ SkillService, skill_creation_task_manager, stream_skill_creation, + update_skill_list, + get_official_skills_with_status, ) from consts.model import SkillInstanceInfoRequest, SkillCreateRequest, SkillCreateInteractiveRequest, SkillUpdateRequest, SkillResponse from utils.auth_utils import get_current_user_id, get_current_user_info -from nexent.core.agents.agent_model import ModelConfig +from services.asset_owner_visibility import can_view_skill + +ASSET_OWNER_SKILL_VIEW_DENIED = {"content": "您无权限查看"} logger = logging.getLogger(__name__) @@ -24,13 +30,27 @@ skill_creator_router = APIRouter(prefix="/skills", tags=["nl2skill"]) +def _asset_owner_skill_view_denied_response(skill: Optional[Dict[str, Any]], tenant_id: str): + """Return a denial JSONResponse when the caller cannot view an ASSET_OWNER-scoped skill.""" + if skill and not can_view_skill(tenant_id, skill.get("tenant_id")): + return JSONResponse(content=ASSET_OWNER_SKILL_VIEW_DENIED) + return None + + # List routes first (no path parameters) @router.get("") -async def list_skills() -> JSONResponse: - """List all available skills.""" +async def list_skills( + tenant_id: Optional[str] = Query( + None, description="Tenant ID for super admin to query specific tenant's skills"), + authorization: Optional[str] = Header(None) +) -> JSONResponse: + """List all available skills for the current tenant (or a specific tenant for super admin).""" try: - service = SkillService() - skills = service.list_skills() + _, current_tenant_id = get_current_user_id(authorization) + # Super admin can query a specific tenant's skills; otherwise use current user's tenant + effective_tenant_id = tenant_id if tenant_id else current_tenant_id + service = SkillService(tenant_id=effective_tenant_id) + skills = service.list_skills(tenant_id=effective_tenant_id) return JSONResponse(content={"skills": skills}) except SkillException as e: raise HTTPException(status_code=500, detail=str(e)) @@ -39,6 +59,68 @@ async def list_skills() -> JSONResponse: raise HTTPException(status_code=500, detail="Internal server error") +@router.get("/official") +async def list_official_skills( + tenant_id: Optional[str] = Query( + None, description="Tenant ID for super admin to query specific tenant's skills"), + authorization: Optional[str] = Header(None) +) -> JSONResponse: + """List all official skills with installation status for the current tenant (or a specific tenant for super admin). + + Returns skills that have source='official', each with a status field: + - installable: skill exists globally but not yet installed for this tenant + - installed: skill already exists for this tenant + """ + try: + _, current_tenant_id = get_current_user_id(authorization) + effective_tenant_id = tenant_id if tenant_id else current_tenant_id + skills = get_official_skills_with_status(tenant_id=effective_tenant_id) + return JSONResponse(content={"skills": skills}) + except Exception as e: + logger.error(f"Error listing official skills: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +class InstallSkillsRequest(BaseModel): + skill_names: List[str] = Field(..., + description="List of skill names to install") + locale: Optional[str] = Field( + default="en", description="Frontend locale (zh or en)") + + +@router.post("/install") +async def install_skills( + request: InstallSkillsRequest, + tenant_id: Optional[str] = Query( + None, description="Tenant ID for super admin to install skills for a specific tenant"), + authorization: Optional[str] = Header(None) +) -> JSONResponse: + """Install official skills for the current tenant (or a specific tenant for super admin). + + Uses ZIP-based installation for each skill name provided. + Skills that already exist are skipped. + """ + try: + user_id, current_tenant_id = get_current_user_id(authorization) + from services.skill_service import install_skills_from_zip_for_tenant + + effective_tenant_id = tenant_id if tenant_id else current_tenant_id + installed_names = install_skills_from_zip_for_tenant( + skill_names=request.skill_names, + tenant_id=effective_tenant_id, + user_id=user_id, + locale=request.locale + ) + return JSONResponse(content={ + "message": "Skills installed successfully", + "installed": installed_names, + "total": len(installed_names) + }) + except Exception as e: + logger.error(f"Error installing skills: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + # POST routes @router.post("") async def create_skill( @@ -48,12 +130,13 @@ async def create_skill( """Create a new skill (JSON format).""" try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) # Convert tool_names to tool_ids if provided tool_ids = request.tool_ids or [] if request.tool_names: - raise NotImplementedError("Tool names are not supported for skill creation") + raise NotImplementedError( + "Tool names are not supported for skill creation") skill_data = { "name": request.name, @@ -62,10 +145,12 @@ async def create_skill( "tool_ids": tool_ids, "tags": request.tags, "source": request.source, - "params": request.params, + "config_schemas": request.config_schemas, + "config_values": request.config_values, "files": request.files if request.files else [], } - skill = service.create_skill(skill_data, user_id=user_id) + skill = service.create_skill( + skill_data, tenant_id=tenant_id, user_id=user_id) return JSONResponse(content=skill, status_code=201) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) @@ -82,7 +167,8 @@ async def create_skill( @router.post("/upload") async def create_skill_from_file( file: UploadFile = File(..., description="SKILL.md file or ZIP archive"), - skill_name: Optional[str] = Form(None, description="Optional skill name override"), + skill_name: Optional[str] = Form( + None, description="Optional skill name override"), source: Optional[str] = Form("自定义", description="Skill source"), authorization: Optional[str] = Header(None) ) -> JSONResponse: @@ -92,9 +178,9 @@ async def create_skill_from_file( - Single SKILL.md file: Extracts metadata and saves directly - ZIP archive: Contains SKILL.md plus scripts/assets folders """ - try: + try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) content = await file.read() file_type = "auto" @@ -123,22 +209,39 @@ async def create_skill_from_file( raise HTTPException(status_code=409, detail=str(e)) raise HTTPException(status_code=400, detail=str(e)) except Exception as e: - logger.error(f"Unexpected error: {type(e).__name__}: {e}", exc_info=True) + logger.error( + f"Unexpected error: {type(e).__name__}: {e}", exc_info=True) raise HTTPException(status_code=500, detail="Internal server error") # Routes with path parameters @router.get("/{skill_name}/files") -async def get_skill_file_tree(skill_name: str) -> JSONResponse: +async def get_skill_file_tree( + skill_name: str, + authorization: Optional[str] = Header(None) +) -> JSONResponse: """Get file tree structure of a skill.""" try: - service = SkillService() + _, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill(skill_name) + if not skill: + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") + + denied = _asset_owner_skill_view_denied_response(skill, tenant_id) + if denied: + return denied + tree = service.get_skill_file_tree(skill_name) if not tree: - raise HTTPException(status_code=404, detail=f"Skill not found: {skill_name}") + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") return JSONResponse(content=tree) except HTTPException: raise + except UnauthorizedError as e: + raise HTTPException(status_code=401, detail=str(e)) except SkillException as e: raise HTTPException(status_code=500, detail=str(e)) except Exception as e: @@ -149,7 +252,8 @@ async def get_skill_file_tree(skill_name: str) -> JSONResponse: @router.get("/{skill_name}/files/{file_path:path}") async def get_skill_file_content( skill_name: str, - file_path: str + file_path: str, + authorization: Optional[str] = Header(None) ) -> JSONResponse: """Get content of a specific file within a skill. @@ -158,13 +262,26 @@ async def get_skill_file_content( file_path: Relative path to the file within the skill directory """ try: - service = SkillService() + _, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill(skill_name) + if not skill: + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") + + denied = _asset_owner_skill_view_denied_response(skill, tenant_id) + if denied: + return denied + content = service.get_skill_file_content(skill_name, file_path) if content is None: - raise HTTPException(status_code=404, detail=f"File not found: {file_path}") + raise HTTPException( + status_code=404, detail=f"File not found: {file_path}") return JSONResponse(content={"content": content}) except HTTPException: raise + except UnauthorizedError as e: + raise HTTPException(status_code=401, detail=str(e)) except SkillException as e: raise HTTPException(status_code=500, detail=str(e)) except Exception as e: @@ -184,7 +301,7 @@ async def update_skill_from_file( """ try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) content = await file.read() @@ -227,7 +344,7 @@ async def get_skill_instance( try: _, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) instance = service.get_skill_instance( agent_id=agent_id, skill_id=skill_id, @@ -241,13 +358,22 @@ async def get_skill_instance( detail=f"Skill instance not found for agent {agent_id} and skill {skill_id}" ) - # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, params) - skill = service.get_skill_by_id(skill_id) + # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, config_schemas, config_values) + # The instance's per-agent overrides are mapped to config_values for the frontend. + skill = service.get_skill_by_id(skill_id, tenant_id) if skill: instance["skill_name"] = skill.get("name") instance["skill_description"] = skill.get("description", "") instance["skill_content"] = skill.get("content", "") - instance["skill_params"] = skill.get("params") or {} + # Template defaults from YAML-enriched skill + instance["config_schemas"] = skill.get("config_schemas") or [] + instance["config_values"] = skill.get("config_values") or {} + # Per-agent overrides from SkillInstance.config_values override the template defaults + instance_params = instance.get("config_values") or {} + if instance_params: + merged = dict(instance.get("config_values") or {}) + merged.update(instance_params) + instance["config_values"] = merged return JSONResponse(content=instance) except UnauthorizedError as e: @@ -273,10 +399,11 @@ async def update_skill_instance( user_id, tenant_id = get_current_user_id(authorization) # Validate skill exists - service = SkillService() - skill = service.get_skill_by_id(request.skill_id) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill_by_id(request.skill_id, tenant_id) if not skill: - raise HTTPException(status_code=404, detail=f"Skill with ID {request.skill_id} not found") + raise HTTPException( + status_code=404, detail=f"Skill with ID {request.skill_id} not found") # Create or update skill instance instance = service.create_or_update_skill_instance( @@ -286,6 +413,18 @@ async def update_skill_instance( version_no=request.version_no ) + # Enrich with template info so the frontend gets config_schemas and config_values + instance["skill_name"] = skill.get("name") + instance["skill_description"] = skill.get("description", "") + instance["skill_content"] = skill.get("content", "") + instance["config_schemas"] = skill.get("config_schemas") or [] + instance["config_values"] = skill.get("config_values") or {} + instance_params = instance.get("config_values") or {} + if instance_params: + merged = dict(instance.get("config_values") or {}) + merged.update(instance_params) + instance["config_values"] = merged + return JSONResponse(content={"message": "Skill instance updated", "instance": instance}) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) @@ -300,7 +439,8 @@ async def update_skill_instance( @router.get("/instance/list") async def list_skill_instances( - agent_id: int = Query(..., description="Agent ID to query skill instances"), + agent_id: int = Query(..., + description="Agent ID to query skill instances"), version_no: int = Query(0, description="Version number (0 for draft)"), authorization: Optional[str] = Header(None) ) -> JSONResponse: @@ -308,7 +448,7 @@ async def list_skill_instances( try: _, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) instances = service.list_skill_instances( agent_id=agent_id, @@ -316,14 +456,21 @@ async def list_skill_instances( version_no=version_no ) - # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, params) + # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, config_values) + # Also include config_schemas and config_values from the template (via YAML enrichment). + # The instance's per-agent overrides (config_values) are used as-is for the frontend. for instance in instances: - skill = service.get_skill_by_id(instance.get("skill_id")) + skill = service.get_skill_by_id( + instance.get("skill_id"), tenant_id) if skill: instance["skill_name"] = skill.get("name") instance["skill_description"] = skill.get("description", "") instance["skill_content"] = skill.get("content", "") - instance["skill_params"] = skill.get("params") or {} + # Template defaults from YAML-enriched skill + instance["config_schemas"] = skill.get("config_schemas") or [] + # Per-agent config_values from SkillInstance override template defaults + instance["config_values"] = instance.get( + "config_values") or skill.get("config_values") or {} return JSONResponse(content={"instances": instances}) except UnauthorizedError as e: @@ -333,14 +480,32 @@ async def list_skill_instances( raise HTTPException(status_code=500, detail="Internal server error") +@router.get("/scan_skill") +async def scan_and_update_skill(authorization: Optional[str] = Header(None)): + """Scan local skill directories and update skill list in database.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + await update_skill_list(tenant_id=tenant_id, user_id=user_id) + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "Successfully update skill", "status": "success"} + ) + except Exception as e: + logger.error(f"Failed to update skill: {e}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Failed to update skill") + + @router.get("/{skill_name}") -async def get_skill(skill_name: str) -> JSONResponse: +async def get_skill(skill_name: str, authorization: Optional[str] = Header(None)) -> JSONResponse: """Get a specific skill by name.""" try: - service = SkillService() - skill = service.get_skill(skill_name) + _, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + skill = service.get_skill(skill_name, tenant_id=tenant_id) if not skill: - raise HTTPException(status_code=404, detail=f"Skill not found: {skill_name}") + raise HTTPException( + status_code=404, detail=f"Skill not found: {skill_name}") return JSONResponse(content=skill) except HTTPException: raise @@ -363,7 +528,7 @@ async def update_skill( """ try: user_id, tenant_id = get_current_user_id(authorization) - service = SkillService() + service = SkillService(tenant_id=tenant_id) update_data = {} if request.description is not None: update_data["description"] = request.description @@ -373,15 +538,22 @@ async def update_skill( update_data["tags"] = request.tags if request.source is not None: update_data["source"] = request.source - if request.params is not None: - update_data["params"] = request.params + if request.config_schemas is not None: + update_data["config_schemas"] = request.config_schemas + if request.config_values is not None: + update_data["config_values"] = request.config_values if request.files is not None: update_data["files"] = [f.model_dump() for f in request.files] if not update_data: raise HTTPException(status_code=400, detail="No fields to update") - skill = service.update_skill(skill_name, update_data, user_id=user_id) + skill = service.update_skill( + skill_name, + update_data, + tenant_id=tenant_id, + user_id=user_id, + ) return JSONResponse(content=skill) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) @@ -403,9 +575,9 @@ async def delete_skill( ) -> JSONResponse: """Delete a skill.""" try: - user_id, _ = get_current_user_id(authorization) - service = SkillService() - service.delete_skill(skill_name, user_id=user_id) + user_id, tenant_id = get_current_user_id(authorization) + service = SkillService(tenant_id=tenant_id) + service.delete_skill(skill_name, tenant_id=tenant_id, user_id=user_id) return JSONResponse(content={"message": f"Skill {skill_name} deleted successfully"}) except UnauthorizedError as e: raise HTTPException(status_code=401, detail=str(e)) diff --git a/backend/apps/tenant_app.py b/backend/apps/tenant_app.py index e0d612902..291cd22fa 100644 --- a/backend/apps/tenant_app.py +++ b/backend/apps/tenant_app.py @@ -49,7 +49,10 @@ async def create_tenant_endpoint( # Create tenant tenant_info = create_tenant( tenant_name=request.tenant_name, - created_by=user_id + created_by=user_id, + skill_ids=request.skill_ids, + skill_names=request.skill_names, + locale=request.locale, ) logger.info(f"Created tenant {tenant_info['tenant_id']} by user {user_id}") diff --git a/backend/apps/user_management_app.py b/backend/apps/user_management_app.py index d50cdc1f0..edbcdf27d 100644 --- a/backend/apps/user_management_app.py +++ b/backend/apps/user_management_app.py @@ -8,18 +8,28 @@ from supabase_auth.errors import AuthApiError, AuthWeakPasswordError -from consts.model import UserSignInRequest, UserSignUpRequest -from consts.exceptions import NoInviteCodeException, IncorrectInviteCodeException, UserRegistrationException +from consts.const import ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL +from consts.model import UserSignInRequest, UserSignUpRequest, UpdatePasswordRequest +from consts.exceptions import ( + NoInviteCodeException, + IncorrectInviteCodeException, + UserRegistrationException, + AppException, + UnauthorizedError, + ValidationError, +) +from consts.error_code import ErrorCode from services.user_management_service import get_authorized_client, validate_token, \ check_auth_service_health, signup_user_with_invitation, signin_user, refresh_user_token, \ - get_session_by_authorization, get_user_info, create_token, list_tokens_by_user, delete_token + get_session_by_authorization, get_user_info, create_token, list_tokens_by_user, delete_token, \ + update_password from services.user_service import delete_user_and_cleanup -from consts.exceptions import UnauthorizedError from utils.auth_utils import get_current_user_id load_dotenv() logging.getLogger("httpx").setLevel(logging.WARNING) +logger = logging.getLogger("user_management_app") router = APIRouter(prefix="/user", tags=["user"]) @@ -33,10 +43,12 @@ async def service_health(): content={"message": "Auth service is available"}) except ConnectionError as e: logging.error(f"Auth service health check failed: {str(e)}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="Auth service is unavailable") + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="Auth service is unavailable") except Exception as e: logging.error(f"Auth service health check failed: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Auth service is unavailable") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Auth service is unavailable") @router.post("/signup") @@ -49,7 +61,7 @@ async def signup(request: UserSignUpRequest): auto_login=request.auto_login) success_message = "🎉 User account registered successfully! Please start experiencing the AI assistant service." return JSONResponse(status_code=HTTPStatus.OK, - content={"message":success_message, "data":user_data}) + content={"message": success_message, "data": user_data}) except NoInviteCodeException as e: logging.error(f"User registration failed by invite code: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, @@ -58,18 +70,28 @@ async def signup(request: UserSignUpRequest): logging.error(f"User registration failed by invite code: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="INVITE_CODE_INVALID") + except ValidationError as e: + detail = str(e) + if detail == ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL: + logging.warning( + "User registration rejected: asset owner invite requires OAuth") + else: + logging.warning( + f"User registration rejected by validation: {detail}") + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=detail) except UserRegistrationException as e: - logging.error(f"User registration failed by registration service: {str(e)}") + logging.error( + f"User registration failed by registration service: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="REGISTRATION_SERVICE_ERROR") - except AuthApiError as e: - logging.error(f"User registration failed by email already exists: {str(e)}") - raise HTTPException(status_code=HTTPStatus.CONFLICT, - detail="EMAIL_ALREADY_EXISTS") except AuthWeakPasswordError as e: logging.error(f"User registration failed by weak password: {str(e)}") - raise HTTPException(status_code=HTTPStatus.NOT_ACCEPTABLE, + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail="WEAK_PASSWORD") + except AuthApiError as e: + logging.error(f"User registration failed by auth error: {str(e)}") + raise HTTPException(status_code=HTTPStatus.CONFLICT, + detail="EMAIL_ALREADY_EXISTS") except Exception as e: logging.error(f"User registration failed, unknown error: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, @@ -81,13 +103,16 @@ async def signin(request: UserSignInRequest): """User login""" try: signin_content = await signin_user(email=request.email, - password=request.password) + password=request.password) return JSONResponse(status_code=HTTPStatus.OK, content=signin_content) except AuthApiError as e: logging.error(f"User login failed: {str(e)}") raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Email or password error") + except ValidationError as e: + logging.warning(f"User login rejected by feature flag: {str(e)}") + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) except Exception as e: logging.error(f"User login failed, unknown error: {str(e)}") raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, @@ -108,7 +133,7 @@ async def user_refresh_token(request: Request): raise ValueError("No refresh token provided") session_info = await refresh_user_token(authorization, refresh_token) return JSONResponse(status_code=HTTPStatus.OK, - content={"message":"Token refresh successful", "data":{"session": session_info}}) + content={"message": "Token refresh successful", "data": {"session": session_info}}) except ValueError as e: logging.error(f"Refresh token failed: {str(e)}") raise HTTPException(status_code=HTTPStatus.UNPROCESSABLE_ENTITY, @@ -134,7 +159,7 @@ async def logout(request: Request): logging.warning( f"Sign out encountered an error but will be ignored: {str(signout_err)}") return JSONResponse(status_code=HTTPStatus.OK, - content={"message":"Logout successful"}) + content={"message": "Logout successful"}) except Exception as e: logging.error(f"User logout failed: {str(e)}") @@ -154,8 +179,8 @@ async def get_session(request: Request): try: data = await get_session_by_authorization(authorization) return JSONResponse(status_code=HTTPStatus.OK, - content={"message": "Session is valid", - "data": data}) + content={"message": "Session is valid", + "data": data}) except UnauthorizedError as e: logging.error(f"Get user session unauthorized: {str(e)}") raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, @@ -276,6 +301,7 @@ async def revoke_user_account(request: Request): raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="User revoke failed") + @router.post("/tokens") async def create_token_endpoint( authorization: Optional[str] = Header(None) @@ -379,3 +405,49 @@ async def delete_token_endpoint( logging.error(f"Failed to delete token: {str(e)}", exc_info=e) raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Internal Server Error") + + +@router.put("/password") +async def update_password_endpoint( + request: UpdatePasswordRequest, + authorization: Optional[str] = Header(None) +): + """Update current user's password. + + This endpoint requires the user to provide their current password for verification + before setting a new password. + """ + try: + if not authorization: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, + detail="Unauthorized: No authorization token provided") + + user_id, _ = get_current_user_id(authorization) + if not user_id: + raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, + detail="Unauthorized: missing user_id in JWT token") + + await update_password( + user_id=str(user_id), + old_password=request.old_password, + new_password=request.new_password + ) + + logger.info(f"Password updated successfully for user {user_id}") + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "Password updated successfully"} + ) + + except UnauthorizedError as e: + logger.warning(f"Password update unauthorized for user: {str(e)}") + raise AppException(ErrorCode.PROFILE_INVALID_CREDENTIALS, str(e)) + except AppException as e: + logger.warning( + f"Password update business error: {e.error_code} - {str(e)}") + raise e # Let app_exception_handler format the response + except Exception as e: + logging.error(f"Failed to update password: {str(e)}", exc_info=e) + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Internal Server Error") diff --git a/backend/apps/vectordatabase_app.py b/backend/apps/vectordatabase_app.py index 6f4232afd..118537766 100644 --- a/backend/apps/vectordatabase_app.py +++ b/backend/apps/vectordatabase_app.py @@ -7,6 +7,7 @@ from fastapi.responses import JSONResponse import re +from consts.const import ASSET_OWNER_TENANT_ID, PERMISSION_READ from consts.model import ChunkCreateRequest, ChunkUpdateRequest, HybridSearchRequest, IndexingResponse from consts.scheduler import VALID_SUMMARY_FREQUENCIES, SUMMARY_FREQUENCY_OPTIONS_FOR_API from nexent.vector_database.base import VectorDatabaseCore @@ -17,6 +18,7 @@ check_knowledge_base_exist_impl, KnowledgeBaseNeedsModelConfigError, ) +from services.file_management_service import check_file_access from services.redis_service import get_redis_service from utils.auth_utils import get_current_user_id from utils.file_management_utils import get_all_files_status @@ -27,6 +29,8 @@ service = ElasticSearchService() logger = logging.getLogger("vectordatabase_app") +INTERNAL_INDEX_NAME_DESC = "Internal index_name from knowledge_record_t" + @router.get("/summary_frequency_options") async def get_summary_frequency_options(): @@ -42,6 +46,7 @@ async def get_summary_frequency_options(): } ) + @router.post("/check_exist") async def check_knowledge_base_exist( request: Dict[str, str] = Body( @@ -82,11 +87,13 @@ def create_new_index( # Extract optional fields from request body ingroup_permission = None group_ids = None - embedding_model_name = None + embedding_model_name: Optional[str] = None + is_multimodal: Optional[bool] = None if request: ingroup_permission = request.get("ingroup_permission") group_ids = request.get("group_ids") - embedding_model_name = request.get("embedding_model_name") + embedding_model_name = request.get("embeddingModel") + is_multimodal = request.get("is_multimodal") # Treat path parameter as user-facing knowledge base name for new creations return ElasticSearchService.create_knowledge_base( @@ -98,6 +105,7 @@ def create_new_index( ingroup_permission=ingroup_permission, group_ids=group_ids, embedding_model_name=embedding_model_name, + is_multimodal=is_multimodal, ) except Exception as e: raise HTTPException( @@ -206,7 +214,8 @@ async def update_summary_frequency_endpoint( if success: return JSONResponse( status_code=HTTPStatus.OK, - content={"message": "Summary frequency updated successfully", "status": "success"} + content={ + "message": "Summary frequency updated successfully", "status": "success"} ) else: raise HTTPException( @@ -243,7 +252,8 @@ def get_embedding_model_status( # Get the knowledge base record by index_name knowledge_record = get_knowledge_record({ "index_name": index_name, - "tenant_id": tenant_id + "tenant_id": tenant_id, + "include_asset_owner_assets": True, }) if not knowledge_record: @@ -301,7 +311,8 @@ def get_embedding_model_status( except HTTPException: raise except Exception as e: - logger.error(f"Error getting embedding model status for '{index_name}': {e}", exc_info=True) + logger.error( + f"Error getting embedding model status for '{index_name}': {e}", exc_info=True) raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error checking embedding model status: {str(e)}" @@ -310,7 +321,8 @@ def get_embedding_model_status( @router.put("/{index_name}/embedding-model") def update_embedding_model( - index_name: str = Path(..., description="Internal index name of the knowledge base to update"), + index_name: str = Path( + ..., description="Internal index name of the knowledge base to update"), request: Dict[str, Any] = Body(..., description="Update payload with model_id"), authorization: Optional[str] = Header(None) @@ -350,13 +362,45 @@ def update_embedding_model( except HTTPException: raise except Exception as exc: - logger.error(f"Error updating embedding model for '{index_name}': {exc}", exc_info=True) + logger.error( + f"Error updating embedding model for '{index_name}': {exc}", exc_info=True) raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating embedding model: {str(exc)}" ) +def _apply_read_only_to_asset_indices_info(asset_result: Dict[str, Any]) -> Dict[str, Any]: + """Force READ_ONLY permission on asset-owner indices_info before merge.""" + indices_info = asset_result.get("indices_info") + if not indices_info: + return asset_result + normalized = dict(asset_result) + normalized["indices_info"] = [ + {**info, "permission": PERMISSION_READ} for info in indices_info + ] + return normalized + + +def _merge_list_indices_results( + primary: Dict[str, Any], + asset_owner: Dict[str, Any], +) -> Dict[str, Any]: + """Merge tenant and ASSET_OWNER list_indices responses (concat, no dedup).""" + merged_indices = primary.get("indices", []) + \ + asset_owner.get("indices", []) + merged: Dict[str, Any] = { + "indices": merged_indices, + "count": len(merged_indices), + } + if "indices_info" in primary or "indices_info" in asset_owner: + merged["indices_info"] = ( + primary.get("indices_info", []) + + asset_owner.get("indices_info", []) + ) + return merged + + @router.get("") def get_list_indices( pattern: str = Query("*", description="Pattern to match index names"), @@ -370,9 +414,21 @@ def get_list_indices( """List all user indices with optional stats""" try: user_id, auth_tenant_id = get_current_user_id(authorization) - # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id - effective_tenant_id = tenant_id or auth_tenant_id - return ElasticSearchService.list_indices(pattern, include_stats, effective_tenant_id, user_id, vdb_core) + if tenant_id is None: + result = ElasticSearchService.list_indices( + pattern, include_stats, auth_tenant_id, user_id, vdb_core + ) + if auth_tenant_id != ASSET_OWNER_TENANT_ID: + asset_result = ElasticSearchService.list_indices( + pattern, include_stats, ASSET_OWNER_TENANT_ID, user_id, vdb_core + ) + asset_result = _apply_read_only_to_asset_indices_info( + asset_result) + return _merge_list_indices_results(result, asset_result) + return result + return ElasticSearchService.list_indices( + pattern, include_stats, tenant_id, user_id, vdb_core + ) except Exception as e: raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error get index: {str(e)}") @@ -402,10 +458,12 @@ def create_index_documents( knowledge_record = get_knowledge_record({'index_name': index_name}) saved_embedding_model_id = None if knowledge_record: - saved_embedding_model_id = knowledge_record.get('embedding_model_id') + saved_embedding_model_id = knowledge_record.get( + 'embedding_model_id') # Use the saved model from knowledge base by model_id - embedding_model, _ = get_embedding_model_by_id(tenant_id, saved_embedding_model_id) if saved_embedding_model_id else (None, None) + embedding_model, _ = get_embedding_model_by_id( + tenant_id, saved_embedding_model_id) if saved_embedding_model_id else (None, None) return ElasticSearchService.index_documents( embedding_model=embedding_model, @@ -565,13 +623,14 @@ def health_check(vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)): # Try to list indices as a health check return ElasticSearchService.health_check(vdb_core) except Exception as e: - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}") @router.post("/{index_name}/chunks") def get_index_chunks( index_name: str = Path(..., - description="Name of the index (or knowledge_name) to get chunks from"), + description=INTERNAL_INDEX_NAME_DESC), page: int = Query( None, description="Page number (1-based) for pagination"), page_size: int = Query( @@ -583,12 +642,18 @@ def get_index_chunks( ): """Get chunks from the specified index, with optional pagination support""" try: - _, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) + user_id, tenant_id = get_current_user_id(authorization) + + if path_or_url is not None and not check_file_access( + path_or_url, user_id, tenant_id + ): + raise HTTPException( + status_code=HTTPStatus.FORBIDDEN, + detail="You don't have permission to access this file", + ) result = ElasticSearchService.get_index_chunks( - index_name=actual_index_name, + index_name=index_name, page=page, page_size=page_size, path_or_url=path_or_url, @@ -602,8 +667,6 @@ def get_index_chunks( ) except Exception as e: error_msg = str(e) - logger.error( - f"Error getting chunks for index '{index_name}': {error_msg}") raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error getting chunks: {error_msg}") @@ -611,7 +674,7 @@ def get_index_chunks( @router.post("/{index_name}/chunk") def create_chunk( index_name: str = Path(..., - description="Name of the index (or knowledge_name)"), + description=INTERNAL_INDEX_NAME_DESC), payload: ChunkCreateRequest = Body(..., description="Chunk data"), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), authorization: Optional[str] = Header(None), @@ -619,10 +682,8 @@ def create_chunk( """Create a manual chunk.""" try: user_id, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) result = ElasticSearchService.create_chunk( - index_name=actual_index_name, + index_name=index_name, chunk_request=payload, vdb_core=vdb_core, user_id=user_id, @@ -646,7 +707,7 @@ def create_chunk( @router.put("/{index_name}/chunk/{chunk_id}") def update_chunk( index_name: str = Path(..., - description="Name of the index (or knowledge_name)"), + description=INTERNAL_INDEX_NAME_DESC), chunk_id: str = Path(..., description="Chunk identifier"), payload: ChunkUpdateRequest = Body(..., description="Chunk update payload"), @@ -656,14 +717,13 @@ def update_chunk( """Update an existing chunk.""" try: user_id, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) result = ElasticSearchService.update_chunk( - index_name=actual_index_name, + index_name=index_name, chunk_id=chunk_id, chunk_request=payload, vdb_core=vdb_core, user_id=user_id, + tenant_id=tenant_id, ) return JSONResponse(status_code=HTTPStatus.OK, content=result) except ValueError as e: @@ -687,18 +747,16 @@ def update_chunk( @router.delete("/{index_name}/chunk/{chunk_id}") def delete_chunk( index_name: str = Path(..., - description="Name of the index (or knowledge_name)"), + description=INTERNAL_INDEX_NAME_DESC), chunk_id: str = Path(..., description="Chunk identifier"), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), authorization: Optional[str] = Header(None), ): """Delete a chunk.""" try: - _, tenant_id = get_current_user_id(authorization) - actual_index_name = get_index_name_by_knowledge_name( - index_name, tenant_id) + get_current_user_id(authorization) result = ElasticSearchService.delete_chunk( - index_name=actual_index_name, + index_name=index_name, chunk_id=chunk_id, vdb_core=vdb_core, ) @@ -730,8 +788,17 @@ async def hybrid_search( """Run a hybrid (accurate + semantic) search across indices.""" try: _, tenant_id = get_current_user_id(authorization) + resolved_index_names: List[str] = [] + for requested_name in payload.index_names: + try: + resolved_name = get_index_name_by_knowledge_name( + requested_name, tenant_id + ) + except Exception: + resolved_name = requested_name + resolved_index_names.append(resolved_name) result = ElasticSearchService.search_hybrid( - index_names=payload.index_names, + index_names=resolved_index_names, query=payload.query, tenant_id=tenant_id, top_k=payload.top_k, @@ -751,7 +818,8 @@ async def hybrid_search( } ) except ValueError as exc: - raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) except Exception as exc: logger.error(f"Hybrid search failed: {exc}", exc_info=True) raise HTTPException( diff --git a/backend/apps/voice_app.py b/backend/apps/voice_app.py index 7451a95c4..cc1b37e87 100644 --- a/backend/apps/voice_app.py +++ b/backend/apps/voice_app.py @@ -1,3 +1,4 @@ +import asyncio import logging from http import HTTPStatus @@ -7,6 +8,8 @@ from consts.exceptions import ( VoiceServiceException, STTConnectionException, + TTSConnectionException, + VoiceConfigException, ) from consts.model import VoiceConnectivityRequest, VoiceConnectivityResponse from services.voice_service import get_voice_service @@ -56,12 +59,97 @@ async def stt_websocket(websocket: WebSocket): logger.info("STT WebSocket connection closed") +@voice_runtime_router.websocket("/tts/ws") +async def tts_websocket(websocket: WebSocket): + """WebSocket endpoint for streaming TTS""" + logger.info("TTS WebSocket connection attempt...") + await websocket.accept() + logger.info("TTS WebSocket connection accepted") + + try: + # Receive config and text from client + msg = await websocket.receive() + client_config = {} + text = None + + if msg["type"] == "websocket.receive": + if "text" in msg: + import json + client_config = json.loads(msg["text"]) + text = client_config.get("text") + elif "bytes" in msg: + try: + import json + client_config = json.loads(msg["bytes"].decode('utf-8')) + text = client_config.get("text") + except Exception as e: + logger.warning(f"Failed to parse bytes as JSON: {e}") + + if not text: + if websocket.client_state.name == "CONNECTED": + await websocket.send_json({"error": "No text provided"}) + return + + # Extract config from client + tenant_id = client_config.get("tenant_id") + model_factory = client_config.get("model_factory") + model_name = client_config.get("model_name") + api_key = client_config.get("api_key") + model_appid = client_config.get("model_appid") + access_token = client_config.get("access_token") + base_url = client_config.get("base_url") + + logger.info(f"TTS request - model_name: {model_name}, model_factory: {model_factory}, " + f"has_api_key: {bool(api_key)}") + + # Build tts_config dict for voice service + tts_config = { + "model_factory": model_factory, + "api_key": api_key, + "model_appid": model_appid, + "access_token": access_token, + "base_url": base_url, + "model_name": model_name, + } + + # Stream TTS audio to WebSocket + voice_service = get_voice_service() + await voice_service.stream_tts_to_websocket( + websocket, + text, + tenant_id=tenant_id, + model_name=model_name, + tts_config=tts_config + ) + + except TTSConnectionException as e: + logger.error(f"TTS WebSocket error: {str(e)}") + await websocket.send_json({"error": str(e)}) + except Exception as e: + logger.error(f"TTS WebSocket error: {str(e)}") + await websocket.send_json({"error": str(e)}) + finally: + logger.info("TTS WebSocket connection closed") + # Ensure connection is properly closed + if websocket.client_state.name == "CONNECTED": + await websocket.close() + + @voice_config_router.post("/connectivity") async def check_voice_connectivity(request: VoiceConnectivityRequest): - """Check voice service connectivity.""" + """ + Check voice service connectivity + + Args: + request: VoiceConnectivityRequest containing model_type + + Returns: + VoiceConnectivityResponse with connectivity status + """ try: voice_service = get_voice_service() connected = await voice_service.check_voice_connectivity(request.model_type) + return JSONResponse( status_code=HTTPStatus.OK, content=VoiceConnectivityResponse( @@ -72,10 +160,25 @@ async def check_voice_connectivity(request: VoiceConnectivityRequest): ) except VoiceServiceException as e: logger.error(f"Voice service error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) - except STTConnectionException as e: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=str(e) + ) + except (STTConnectionException, TTSConnectionException) as e: logger.error(f"Voice connectivity error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail=str(e)) + raise HTTPException( + status_code=HTTPStatus.SERVICE_UNAVAILABLE, + detail=str(e) + ) + except VoiceConfigException as e: + logger.error(f"Voice configuration error: {str(e)}") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=str(e) + ) except Exception as e: logger.error(f"Unexpected voice service error: {str(e)}") - raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Voice service error") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Voice service error" + ) diff --git a/backend/consts/agent_unavailable_reasons.py b/backend/consts/agent_unavailable_reasons.py new file mode 100644 index 000000000..4e710ee7d --- /dev/null +++ b/backend/consts/agent_unavailable_reasons.py @@ -0,0 +1,43 @@ +""" +Agent Unavailable Reason Constants + +Centralized definition of all possible reasons why an agent may be unavailable. +These values are returned to the frontend via the 'unavailable_reasons' field. +""" + + +class AgentUnavailableReason: + """Reason codes for agent unavailability.""" + + # Identity conflicts + DUPLICATE_NAME = "duplicate_name" + DUPLICATE_DISPLAY_NAME = "duplicate_display_name" + + # Model issues + MODEL_NOT_CONFIGURED = "model_not_configured" + MODEL_UNAVAILABLE = "model_unavailable" + + # Tool issues + TOOL_UNAVAILABLE = "tool_unavailable" + ALL_TOOLS_DISABLED = "all_tools_disabled" + + # Agent issues + AGENT_NOT_FOUND = "agent_not_found" + + @classmethod + def all_reasons(cls) -> list[str]: + """Return all defined unavailable reason codes.""" + return [ + cls.DUPLICATE_NAME, + cls.DUPLICATE_DISPLAY_NAME, + cls.MODEL_NOT_CONFIGURED, + cls.MODEL_UNAVAILABLE, + cls.TOOL_UNAVAILABLE, + cls.ALL_TOOLS_DISABLED, + cls.AGENT_NOT_FOUND, + ] + + @classmethod + def is_valid_reason(cls, reason: str) -> bool: + """Check if a reason string is a valid reason code.""" + return reason in cls.all_reasons() diff --git a/backend/consts/const.py b/backend/consts/const.py index 77e86a185..b8045f8e8 100644 --- a/backend/consts/const.py +++ b/backend/consts/const.py @@ -31,6 +31,10 @@ class VectorDatabaseType(str, Enum): # Data Processing Service Configuration DATA_PROCESS_SERVICE = os.getenv("DATA_PROCESS_SERVICE") CLIP_MODEL_PATH = os.getenv("CLIP_MODEL_PATH") +TABLE_TRANSFORMER_MODEL_PATH = os.getenv("TABLE_TRANSFORMER_MODEL_PATH") +UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH = os.getenv( + "UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH" +) # Upload Configuration @@ -43,10 +47,12 @@ class VectorDatabaseType(str, Enum): MAX_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_MAX_S", "1800")) - # Container-internal skills storage path CONTAINER_SKILLS_PATH = os.getenv("SKILLS_PATH") +# Container-internal official skills ZIP directory +OFFICIAL_SKILLS_ZIP_PATH = "/mnt/nexent/official-skills-zip" + # Preview Configuration FILE_PREVIEW_SIZE_LIMIT = 100 * 1024 * 1024 # 100MB @@ -74,7 +80,8 @@ class VectorDatabaseType(str, Enum): SERVICE_ROLE_KEY = os.getenv('SERVICE_ROLE_KEY', SUPABASE_KEY) # JWT secret for verifying Supabase-signed access tokens. # GoTrue uses GOTRUE_JWT_SECRET (= JWT_SECRET in docker setup) to sign tokens. -SUPABASE_JWT_SECRET = os.getenv('SUPABASE_JWT_SECRET') or os.getenv('JWT_SECRET', '') +SUPABASE_JWT_SECRET = os.getenv( + 'SUPABASE_JWT_SECRET') or os.getenv('JWT_SECRET', '') # OAuth Configuration @@ -105,15 +112,37 @@ class VectorDatabaseType(str, Enum): DEFAULT_USER_ID = "user_id" DEFAULT_TENANT_ID = "tenant_id" +# Invitation code type for asset administrator registration +ASSET_OWNER_INVITE_CODE_TYPE = "ASSET_OWNER_INVITE" + +# User role identifier for asset administrators +ASSET_OWNER_ROLE = "ASSET_OWNER" + +# Tenant ID for asset administrators (virtual tenant, not a real tenant) +ASSET_OWNER_TENANT_ID = "asset_owner_tenant_id" + +# MinIO prefix for ASSET_OWNER-scoped attachment uploads (attachments/asset_owner/{user_id}/...) +ASSET_OWNER_ATTACHMENTS_PREFIX = "attachments/asset_owner" + +# When false, block ASSET_OWNER invites, registrations, and sign-in. +ENABLE_ASSET_OWNER_ROLE = os.getenv( + "ENABLE_ASSET_OWNER_ROLE", "false").lower() == "true" + +# HTTP detail key: asset owner must register via OAuth, not email/password signup. +ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL = "ASSET_OWNER_USE_OAUTH" + # Roles that can edit all resources within a tenant (permission = EDIT). # Keep this centralized to avoid drifting role logic across modules. -CAN_EDIT_ALL_USER_ROLES = {"SU", "ADMIN", "SPEED"} +CAN_EDIT_ALL_USER_ROLES = {"SU", "ADMIN", "SPEED", "ASSET_OWNER"} # Permission constants used by list endpoints (e.g., /agent/list, /mcp/list). PERMISSION_READ = "READ_ONLY" PERMISSION_EDIT = "EDIT" PERMISSION_PRIVATE = "PRIVATE" +# Response flag when system prompts are withheld from non-ASSET_OWNER callers. +AGENT_PROMPTS_HIDDEN_FLAG = "prompts_hidden" + # Deployment Version Configuration DEPLOYMENT_VERSION = os.getenv("DEPLOYMENT_VERSION", "speed") @@ -129,6 +158,7 @@ class VectorDatabaseType(str, Enum): MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY") MINIO_REGION = os.getenv("MINIO_REGION") MINIO_DEFAULT_BUCKET = os.getenv("MINIO_DEFAULT_BUCKET") +S3_URL_PREFIX = "s3://" # Postgres Configuration @@ -194,8 +224,10 @@ class VectorDatabaseType(str, Enum): # Will be dynamically set based on PID if not provided WORKER_NAME = os.getenv("WORKER_NAME") WORKER_CONCURRENCY = int(os.getenv("WORKER_CONCURRENCY", "4")) -RAY_WARM_ACTOR_POOL_SIZE_PART = int(os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PART", "2")) -RAY_WARM_ACTOR_POOL_SIZE_PROCESS = int(os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PROCESS", "1")) +RAY_WARM_ACTOR_POOL_SIZE_PART = int( + os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PART", "2")) +RAY_WARM_ACTOR_POOL_SIZE_PROCESS = int( + os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PROCESS", "1")) # Global Ray actor pool (shared by process_q/process_part_q workers) RAY_GLOBAL_ACTOR_POOL_SIZE = int(os.getenv("RAY_GLOBAL_ACTOR_POOL_SIZE", "3")) RAY_ACTOR_WARM_TIMEOUT_S = float(os.getenv("RAY_ACTOR_WARM_TIMEOUT_S", "60")) @@ -205,9 +237,6 @@ class VectorDatabaseType(str, Enum): "RAY_GLOBAL_ACTOR_POOL_NAMESPACE", "nexent-data-process") - - - # Voice Service Configuration APPID = os.getenv("APPID", "") TOKEN = os.getenv("TOKEN", "") @@ -305,6 +334,8 @@ class VectorDatabaseType(str, Enum): "multiEmbedding": "MULTI_EMBEDDING_ID", "rerank": "RERANK_ID", "vlm": "VLM_ID", + "vlm2": "VLM2_ID", + "vlm3": "VLM3_ID", "stt": "STT_ID", "tts": "TTS_ID" } @@ -336,19 +367,66 @@ class VectorDatabaseType(str, Enum): THINK_END_PATTERN = "" -# Telemetry and Monitoring Configuration -ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false").lower() == "true" -SERVICE_NAME = os.getenv("SERVICE_NAME", "nexent-backend") -JAEGER_ENDPOINT = os.getenv( - "JAEGER_ENDPOINT", "http://localhost:14268/api/traces") -PROMETHEUS_PORT = int(os.getenv("PROMETHEUS_PORT", "8000")) -TELEMETRY_SAMPLE_RATE = float(os.getenv("TELEMETRY_SAMPLE_RATE", "1.0")) - -# Performance monitoring thresholds -LLM_SLOW_REQUEST_THRESHOLD_SECONDS = float( - os.getenv("LLM_SLOW_REQUEST_THRESHOLD_SECONDS", "5.0")) -LLM_SLOW_TOKEN_RATE_THRESHOLD = float( - os.getenv("LLM_SLOW_TOKEN_RATE_THRESHOLD", "10.0")) # tokens per second +# Telemetry and Monitoring Configuration (OTLP Protocol) +MONITORING_PROVIDER = os.getenv("MONITORING_PROVIDER", "") +ENABLE_TELEMETRY_RAW = os.getenv("ENABLE_TELEMETRY") +ENABLE_TELEMETRY = (ENABLE_TELEMETRY_RAW or "false").lower() == "true" +OTEL_SERVICE_NAME_RAW = os.getenv("OTEL_SERVICE_NAME") +OTEL_SERVICE_NAME = OTEL_SERVICE_NAME_RAW or "nexent-backend" +OTEL_EXPORTER_OTLP_ENDPOINT_RAW = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") +OTEL_EXPORTER_OTLP_ENDPOINT = OTEL_EXPORTER_OTLP_ENDPOINT_RAW or "http://localhost:4318" +OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "") +OTEL_EXPORTER_OTLP_METRICS_ENDPOINT = os.getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "") +OTEL_EXPORTER_OTLP_PROTOCOL_RAW = os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL") +OTEL_EXPORTER_OTLP_PROTOCOL = OTEL_EXPORTER_OTLP_PROTOCOL_RAW or "http" +OTEL_EXPORTER_OTLP_HEADERS_RAW = os.getenv("OTEL_EXPORTER_OTLP_HEADERS") +OTEL_EXPORTER_OTLP_HEADERS = OTEL_EXPORTER_OTLP_HEADERS_RAW or "" +OTEL_EXPORTER_OTLP_AUTHORIZATION = os.getenv("OTEL_EXPORTER_OTLP_AUTHORIZATION", "") +OTEL_EXPORTER_OTLP_X_API_KEY = os.getenv("OTEL_EXPORTER_OTLP_X_API_KEY", "") +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION = os.getenv( + "OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION", "") +LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY", "") +LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT", "") +OTEL_EXPORTER_OTLP_METRICS_ENABLED_RAW = os.getenv("OTEL_EXPORTER_OTLP_METRICS_ENABLED") +OTEL_EXPORTER_OTLP_METRICS_ENABLED = ( + OTEL_EXPORTER_OTLP_METRICS_ENABLED_RAW or "true").lower() == "true" +MONITORING_INSTRUMENT_REQUESTS_RAW = os.getenv("MONITORING_INSTRUMENT_REQUESTS") +MONITORING_INSTRUMENT_REQUESTS = ( + MONITORING_INSTRUMENT_REQUESTS_RAW or "false").lower() == "true" +MONITORING_FASTAPI_INCLUDED_URLS = os.getenv("MONITORING_FASTAPI_INCLUDED_URLS", "") +MONITORING_FASTAPI_EXCLUDED_URLS = os.getenv("MONITORING_FASTAPI_EXCLUDED_URLS", "") +MONITORING_FASTAPI_EXCLUDE_SPANS = os.getenv("MONITORING_FASTAPI_EXCLUDE_SPANS", "receive,send") +MONITORING_PROJECT_NAME = os.getenv("MONITORING_PROJECT_NAME", "") +MONITORING_DASHBOARD_URL = os.getenv("MONITORING_DASHBOARD_URL", "") +MONITORING_TRACE_CONTENT_MODE = os.getenv("MONITORING_TRACE_CONTENT_MODE", "summary") +MONITORING_TRACE_MAX_CHARS = os.getenv("MONITORING_TRACE_MAX_CHARS", "4000") +MONITORING_TRACE_MAX_ITEMS = os.getenv("MONITORING_TRACE_MAX_ITEMS", "20") +TELEMETRY_SAMPLE_RATE_RAW = os.getenv("TELEMETRY_SAMPLE_RATE") +TELEMETRY_SAMPLE_RATE = float(TELEMETRY_SAMPLE_RATE_RAW or "1.0") + +# Parse OTLP headers into dict format +def _parse_otlp_headers(headers_str: str) -> dict: + """Parse OTLP headers string into dict. Format: 'key1=value1,key2=value2'""" + if not headers_str: + return {} + headers = {} + for pair in headers_str.split(","): + if "=" in pair: + key, value = pair.split("=", 1) + headers[key.strip()] = value.strip() + return headers + +OTLP_HEADERS = _parse_otlp_headers(OTEL_EXPORTER_OTLP_HEADERS) +if OTEL_EXPORTER_OTLP_AUTHORIZATION: + OTLP_HEADERS["Authorization"] = OTEL_EXPORTER_OTLP_AUTHORIZATION +if OTEL_EXPORTER_OTLP_X_API_KEY: + OTLP_HEADERS["x-api-key"] = OTEL_EXPORTER_OTLP_X_API_KEY +elif LANGSMITH_API_KEY: + OTLP_HEADERS["x-api-key"] = LANGSMITH_API_KEY +if LANGSMITH_PROJECT: + OTLP_HEADERS["Langsmith-Project"] = LANGSMITH_PROJECT +if OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION: + OTLP_HEADERS["x-langfuse-ingestion-version"] = OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION DEFAULT_ZH_TITLE = "新对话" @@ -360,11 +438,13 @@ class VectorDatabaseType(str, Enum): # Container Platform Configuration -IS_DEPLOYED_BY_KUBERNETES = os.getenv("IS_DEPLOYED_BY_KUBERNETES", "false").lower() == "true" +IS_DEPLOYED_BY_KUBERNETES = os.getenv( + "IS_DEPLOYED_BY_KUBERNETES", "false").lower() == "true" KUBERNETES_NAMESPACE = os.getenv("KUBERNETES_NAMESPACE", "nexent") # Northbound API public base URL (used for A2A agent cards and external file proxy links) -NORTHBOUND_EXTERNAL_URL = os.getenv("NORTHBOUND_EXTERNAL_URL", "http://localhost:5013/api").rstrip("/") +NORTHBOUND_EXTERNAL_URL = os.getenv( + "NORTHBOUND_EXTERNAL_URL", "http://localhost:5013/api").rstrip("/") # APP Version diff --git a/backend/consts/error_code.py b/backend/consts/error_code.py index 4b4792e47..fc94680fb 100644 --- a/backend/consts/error_code.py +++ b/backend/consts/error_code.py @@ -141,6 +141,9 @@ class ErrorCode(Enum): PROFILE_UPDATE_FAILED = "110102" # Profile update failed PROFILE_USER_ALREADY_EXISTS = "110103" # User already exists PROFILE_INVALID_CREDENTIALS = "110104" # Invalid credentials + # 02 - Password + PROFILE_PASSWORD_WEAK = "110201" # Password does not meet strength requirements + PROFILE_PASSWORD_SAME_AS_OLD = "110202" # New password cannot be the same as old password # ==================== 16 OAuth / 第三方登录 ==================== # 01 - Provider @@ -261,4 +264,8 @@ class ErrorCode(Enum): ErrorCode.OAUTH_UNLINK_LAST_METHOD: 400, ErrorCode.OAUTH_ACCOUNT_NOT_FOUND: 404, ErrorCode.OAUTH_ACCOUNT_ALREADY_LINKED: 409, + # Profile - Password (module 11) + ErrorCode.PROFILE_INVALID_CREDENTIALS: 400, + ErrorCode.PROFILE_PASSWORD_WEAK: 400, + ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD: 400, } diff --git a/backend/consts/error_message.py b/backend/consts/error_message.py index 27ac33d00..59d290a52 100644 --- a/backend/consts/error_message.py +++ b/backend/consts/error_message.py @@ -104,6 +104,9 @@ class ErrorMessage: ErrorCode.PROFILE_UPDATE_FAILED: "Profile update failed.", ErrorCode.PROFILE_USER_ALREADY_EXISTS: "User already exists.", ErrorCode.PROFILE_INVALID_CREDENTIALS: "Invalid username or password.", + # Profile - Password + ErrorCode.PROFILE_PASSWORD_WEAK: "Password does not meet security requirements. Please use a stronger password.", + ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD: "New password cannot be the same as the old password.", # ==================== 12 TenantResource / 租户资源 ==================== ErrorCode.TENANT_NOT_FOUND: "Tenant not found.", diff --git a/backend/consts/exceptions.py b/backend/consts/exceptions.py index a32f0282e..e5e4c7a89 100644 --- a/backend/consts/exceptions.py +++ b/backend/consts/exceptions.py @@ -22,6 +22,7 @@ from .error_code import ErrorCode, ERROR_CODE_HTTP_STATUS from .error_message import ErrorMessage +from typing import List # ==================== New Framework: AppException with ErrorCode ==================== @@ -118,6 +119,26 @@ class MCPNameIllegal(Exception): pass +class McpNotFoundError(Exception): + """Raised when MCP resource is not found.""" + pass + + +class McpValidationError(Exception): + """Raised when MCP payload or runtime data is invalid.""" + pass + + +class McpNameConflictError(Exception): + """Raised when MCP name conflicts with an existing enabled service.""" + pass + + +class McpPortConflictError(Exception): + """Raised when an MCP container port conflicts with an existing service or runtime port.""" + pass + + class NoInviteCodeException(Exception): """Raised when invite code is not found.""" @@ -184,12 +205,24 @@ class VoiceServiceException(Exception): pass +class VoiceConfigException(Exception): + """Raised when voice configuration is invalid or missing.""" + + pass + + class STTConnectionException(Exception): """Raised when STT service connection fails.""" pass +class TTSConnectionException(Exception): + """Raised when TTS service connection fails.""" + + pass + + class ToolExecutionException(Exception): """Raised when mcp tool execution failed.""" @@ -214,9 +247,14 @@ class DataMateConnectionError(Exception): pass +class SkillDuplicateError(Exception): + """Raised when importing an agent with skills that have duplicate names in target tenant.""" + def __init__(self, duplicate_names: List[str]): + self.duplicate_names = duplicate_names + + class SkillException(Exception): """Raised when skill operations fail.""" - pass diff --git a/backend/consts/model.py b/backend/consts/model.py index bcaffcae7..6969999fe 100644 --- a/backend/consts/model.py +++ b/backend/consts/model.py @@ -1,9 +1,11 @@ from enum import Enum from typing import Optional, Any, List, Dict -from pydantic import BaseModel, Field, EmailStr +from pydantic import BaseModel, Field, EmailStr, ConfigDict, field_validator from nexent.core.agents.agent_model import ToolConfig +from consts.prompt_template import PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP + class ModelConnectStatusEnum(Enum): """Enum class for model connection status""" @@ -29,7 +31,7 @@ def get_value(cls, status: Optional[str]) -> str: class UserSignUpRequest(BaseModel): """User registration request model""" email: EmailStr - password: str = Field(..., min_length=6) + password: str = Field(..., min_length=8) invite_code: Optional[str] = None auto_login: Optional[bool] = True # Whether to return session after signup @@ -40,6 +42,19 @@ class UserSignInRequest(BaseModel): password: str +class OAuthCompleteRequest(BaseModel): + """Complete a pending OAuth signup.""" + email: Optional[EmailStr] = None + password: str = Field(..., min_length=6) + invite_code: str = Field(..., min_length=1) + + +class UpdatePasswordRequest(BaseModel): + """Password update request model for changing user password""" + old_password: str = Field(..., min_length=1, description="Current password for verification") + new_password: str = Field(..., min_length=8, description="New password to set (min 8 characters)") + + class UserUpdateRequest(BaseModel): """User update request model""" username: Optional[str] = Field(None, min_length=1, max_length=50) @@ -121,6 +136,8 @@ class ModelRequest(BaseModel): # STT specific fields model_appid: Optional[str] = None access_token: Optional[str] = None + timeout_seconds: Optional[int] = None + concurrency_limit: Optional[int] = None class ProviderModelRequest(BaseModel): @@ -160,13 +177,34 @@ class STTModelConfig(BaseModel): accessToken: Optional[str] = None +def _empty_model_config() -> SingleModelConfig: + return SingleModelConfig( + modelName="", + displayName="", + apiConfig=ModelApiConfig(apiKey="", modelUrl="") + ) + + +class TTSModelConfig(BaseModel): + """TTS model specific configuration with factory, appid, and access token fields""" + modelName: str + displayName: str + apiConfig: Optional[ModelApiConfig] = None + modelFactory: Optional[str] = None + modelAppid: Optional[str] = None + accessToken: Optional[str] = None + + class ModelConfig(BaseModel): llm: SingleModelConfig embedding: SingleModelConfig multiEmbedding: SingleModelConfig rerank: SingleModelConfig vlm: SingleModelConfig + vlm2: SingleModelConfig = Field(default_factory=_empty_model_config) + vlm3: SingleModelConfig = Field(default_factory=_empty_model_config) stt: STTModelConfig + tts: TTSModelConfig class AppConfig(BaseModel): @@ -300,6 +338,7 @@ class ProcessParams(BaseModel): source_type: str index_name: str authorization: Optional[str] = None + model_id: Optional[int] = None class OpinionRequest(BaseModel): @@ -312,6 +351,69 @@ class GeneratePromptRequest(BaseModel): task_description: str agent_id: int model_id: int + prompt_template_id: Optional[int] = None + tool_ids: Optional[List[int]] = Field( + None, description="Optional: tool IDs from frontend (takes precedence over database query)") + sub_agent_ids: Optional[List[int]] = Field( + None, description="Optional: sub-agent IDs from frontend (takes precedence over database query)") + knowledge_base_display_names: Optional[List[str]] = Field( + None, description="Optional: knowledge base display names from frontend (takes precedence over database query)") + has_selected_resources: bool = Field( + True, description="Whether tools or sub-agents are selected; when False, skips generating constraint and few_shots sections") + + +class PromptTemplateContentRequest(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + duty_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["duty_system_prompt"] + ) + constraint_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["constraint_system_prompt"] + ) + few_shots_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["few_shots_system_prompt"] + ) + agent_variable_name_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_variable_name_system_prompt"] + ) + agent_display_name_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_system_prompt"] + ) + agent_description_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_description_system_prompt"] + ) + user_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["user_prompt"] + ) + agent_name_regenerate_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_name_regenerate_system_prompt"] + ) + agent_name_regenerate_user_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_name_regenerate_user_prompt"] + ) + agent_display_name_regenerate_system_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_regenerate_system_prompt"] + ) + agent_display_name_regenerate_user_prompt: str = Field( + alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_regenerate_user_prompt"] + ) + + +class PromptTemplateRequest(BaseModel): + template_name: str + description: Optional[str] = None + template_type: str = "agent_generate" + template_content_zh: PromptTemplateContentRequest + template_content_en: Optional[PromptTemplateContentRequest] = None +class OptimizePromptSectionRequest(BaseModel): + task_description: str + agent_id: int + model_id: int + section_type: str + section_title: str + current_content: str + feedback: str tool_ids: Optional[List[int]] = Field( None, description="Optional: tool IDs from frontend (takes precedence over database query)") sub_agent_ids: Optional[List[int]] = Field( @@ -335,7 +437,7 @@ class AgentInfoRequest(BaseModel): author: Optional[str] = None model_name: Optional[str] = None model_id: Optional[int] = None - max_steps: Optional[int] = None + max_steps: Optional[int] = Field(default=None, ge=1, le=30) provide_run_summary: Optional[bool] = None duty_prompt: Optional[str] = None constraint_prompt: Optional[str] = None @@ -343,6 +445,8 @@ class AgentInfoRequest(BaseModel): enabled: Optional[bool] = None business_logic_model_name: Optional[str] = None business_logic_model_id: Optional[int] = None + prompt_template_id: Optional[int] = None + prompt_template_name: Optional[str] = None enabled_tool_ids: Optional[List[int]] = None enabled_skill_ids: Optional[List[int]] = None related_agent_ids: Optional[List[int]] = None @@ -375,6 +479,7 @@ class SkillInstanceInfoRequest(BaseModel): agent_id: int enabled: bool = True version_no: int = 0 + config_values: Optional[Dict[str, Any]] = None class ToolInstanceSearchRequest(BaseModel): @@ -432,6 +537,9 @@ class ExportAndImportAgentInfo(BaseModel): model_name: Optional[str] = None business_logic_model_id: Optional[int] = None business_logic_model_name: Optional[str] = None + skill_names: Optional[List[str]] = None + prompt_template_id: Optional[int] = None + prompt_template_name: Optional[str] = None class Config: arbitrary_types_allowed = True @@ -448,9 +556,16 @@ class ExportAndImportDataFormat(BaseModel): mcp_info: List[MCPInfo] +class SkillZipEntry(BaseModel): + """A skill bundled inside an agent export ZIP.""" + skill_name: str + skill_zip_base64: str + + class AgentImportRequest(BaseModel): agent_info: ExportAndImportDataFormat force_import: bool = False + skills: Optional[List[SkillZipEntry]] = None class AgentNameBatchRegenerateItem(BaseModel): @@ -505,7 +620,7 @@ def default(cls) -> "MemoryAgentShareMode": class VoiceConnectivityRequest(BaseModel): """Request model for voice service connectivity check""" model_type: str = Field(..., - description="Type of model to check ('stt')") + description="Type of model to check ('stt' or 'tts')") class VoiceConnectivityResponse(BaseModel): @@ -565,6 +680,8 @@ class MCPUpdateRequest(BaseModel): new_mcp_url: str = Field(..., description="New MCP server URL") new_authorization_token: Optional[str] = Field( None, description="New authorization token for MCP server authentication (e.g., Bearer token)") + custom_headers: Optional[Dict[str, Any]] = Field( + None, description="Custom HTTP headers as JSON object") # Tenant Management Data Models @@ -573,6 +690,22 @@ class TenantCreateRequest(BaseModel): """Request model for creating a tenant""" tenant_name: str = Field(..., min_length=1, description="Tenant display name") + skill_ids: Optional[List[int]] = Field( + default=None, + description="Skill IDs to install for the new tenant (legacy, use skill_names instead)" + ) + skill_names: Optional[List[str]] = Field( + default=None, + description="Skill names to install for the new tenant. " + "Each name is used to derive a .zip filename from " + "OFFICIAL_SKILLS_ZIP_PATH and installed via upload." + ) + locale: Optional[str] = Field( + default=None, + description="Frontend locale when creating the tenant (e.g. 'zh' or 'en'). " + "Determines the source label for auto-installed skills: " + "'zh' → '官方', other locales → 'official'." + ) class TenantUpdateRequest(BaseModel): @@ -756,6 +889,8 @@ class ManageTenantModelCreateRequest(BaseModel): # STT specific fields model_appid: Optional[str] = Field(None, description="Application ID for STT models (e.g., Volcano Engine)") access_token: Optional[str] = Field(None, description="Access token for STT models (e.g., Volcano Engine)") + timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds") + concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model") class ManageTenantModelUpdateRequest(BaseModel): @@ -776,6 +911,8 @@ class ManageTenantModelUpdateRequest(BaseModel): # STT specific fields model_appid: Optional[str] = Field(None, description="Application ID for STT models") access_token: Optional[str] = Field(None, description="Access token for STT models") + timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds") + concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model") class ManageTenantModelDeleteRequest(BaseModel): @@ -907,7 +1044,8 @@ class SkillCreateRequest(BaseModel): tool_names: Optional[List[str]] = [] tags: Optional[List[str]] = [] source: Optional[str] = "custom" - params: Optional[Dict[str, Any]] = None + config_schemas: Optional[Dict[str, Any]] = None + config_values: Optional[Dict[str, Any]] = None files: Optional[List[Dict[str, str]]] = Field( default_factory=list, description="Additional skill files beyond SKILL.md. " @@ -930,7 +1068,8 @@ class SkillUpdateRequest(BaseModel): tool_names: Optional[List[str]] = None tags: Optional[List[str]] = None source: Optional[str] = None - params: Optional[Dict[str, Any]] = None + config_schemas: Optional[Dict[str, Any]] = None + config_values: Optional[Dict[str, Any]] = None files: Optional[List[SkillFileData]] = Field( default_factory=list, description="Updated skill files. Each entry has file_path and content. " @@ -947,7 +1086,8 @@ class SkillResponse(BaseModel): tool_ids: List[int] tags: List[str] source: str - params: Optional[Dict[str, Any]] = None + config_schemas: Optional[Dict[str, Any]] = None + config_values: Optional[Dict[str, Any]] = None created_by: Optional[str] = None create_time: Optional[str] = None updated_by: Optional[str] = None @@ -960,3 +1100,192 @@ class SkillCreateInteractiveRequest(BaseModel): existing_skill: Optional[Dict[str, Any]] = None complexity: Optional[str] = "simple" language: Optional[str] = "zh" + + +# --------------------------------------------------------------------------- +# MCP Management Data Models +# --------------------------------------------------------------------------- + +class MCPSourceType(str, Enum): + """MCP source type enumeration""" + LOCAL = "local" + MCP_REGISTRY = "mcp_registry" + COMMUNITY = "community" + + +class AddMcpServiceRequest(BaseModel): + """Request model for adding an MCP service""" + name: str = Field(..., min_length=1, description="MCP service name") + server_url: str = Field(..., min_length=1, description="MCP server URL") + description: Optional[str] = Field(None, description="MCP service description") + source: MCPSourceType = Field(default=MCPSourceType.LOCAL, description="MCP source type") + tags: List[str] = Field(default_factory=list, description="MCP tags") + authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server") + custom_headers: Optional[Dict[str, Any]] = Field(None, description="Custom HTTP headers as JSON object") + container_config: Optional[Dict[str, Any]] = Field(None, description="Container configuration") + registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON") + enabled: Optional[bool] = Field(default=False, description="Whether the MCP is enabled after creation") + + @field_validator("name", "server_url", "description", "authorization_token", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + return value.strip() + return value + + +class AddContainerMcpServiceRequest(BaseModel): + """Request model for adding a container-based MCP service""" + name: str = Field(..., min_length=1, description="MCP service name") + description: Optional[str] = Field(None, description="MCP service description") + source: MCPSourceType = Field(default=MCPSourceType.LOCAL, description="MCP source type") + tags: List[str] = Field(default_factory=list, description="MCP tags") + authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server") + registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON") + port: int = Field(..., ge=1, le=65535, description="Host port for the container") + mcp_config: MCPConfigRequest = Field(..., description="MCP server configuration") + + @field_validator("name", "description", "authorization_token", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + return value.strip() + return value + + +class UpdateMcpServiceRequest(BaseModel): + """Request model for updating an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID") + name: str = Field(..., min_length=1, description="New MCP service name") + description: Optional[str] = Field(None, description="MCP service description") + server_url: str = Field(..., min_length=1, description="New MCP server URL") + tags: List[str] = Field(default_factory=list, description="MCP tags") + authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server") + custom_headers: Optional[Dict[str, Any]] = Field(None, description="Custom HTTP headers as JSON object") + + @field_validator("name", "server_url", "description", "authorization_token", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + return value.strip() + return value + + +class EnableMcpServiceRequest(BaseModel): + """Request model for enabling an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to enable") + + +class DisableMcpServiceRequest(BaseModel): + """Request model for disabling an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to disable") + + +class HealthcheckMcpServiceRequest(BaseModel): + """Request model for checking MCP service health""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to health check") + + +class ListMcpToolsRequest(BaseModel): + """Request model for listing MCP service tools""" + mcp_id: int = Field(..., gt=0, description="MCP record ID") + + +class PortConflictCheckRequest(BaseModel): + """Request model for checking port availability""" + port: int = Field(..., ge=1, le=65535, description="Port number to check") + + +class ListMcpServicesQuery(BaseModel): + """Query parameters for listing MCP services""" + tag: Optional[str] = Field(None, description="Filter by tag") + + @field_validator("tag", mode="before") + @classmethod + def _strip_tag(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class RegistryListQuery(BaseModel): + """Query parameters for listing MCP registry services""" + search: Optional[str] = Field(None, description="Search keyword") + include_deleted: bool = Field(default=False, description="Include deleted records") + updated_since: Optional[str] = Field(None, description="Filter by update time") + version: Optional[str] = Field(None, description="Filter by version") + cursor: Optional[str] = Field(None, description="Pagination cursor") + limit: int = Field(default=30, ge=1, le=100, description="Items per page") + + @field_validator("search", "updated_since", "version", "cursor", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class CommunityListRequest(BaseModel): + """Request model for listing community MCP services""" + search: Optional[str] = Field(None, description="Search keyword") + tag: Optional[str] = Field(None, description="Filter by tag") + transport_type: Optional[str] = Field(None,description="Filter by transport: url or container") + cursor: Optional[str] = Field(None, description="Pagination cursor") + limit: int = Field(default=30, ge=1, le=100, description="Items per page") + + @field_validator("search", "tag", "cursor", "transport_type", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class CommunityPublishRequest(BaseModel): + """Publish a local MCP to the community; optional fields override the snapshot.""" + + mcp_id: int = Field(..., gt=0, description="MCP record ID to publish") + name: Optional[str] = Field(None, description="Community display name override") + description: Optional[str] = Field(None, description="Description override") + version: Optional[str] = Field(None, description="Version override") + tags: Optional[List[str]] = Field(None, description="Tags override") + mcp_server: Optional[str] = Field(None, max_length=500, description="Remote MCP server URL override (URL / HTTP / SSE transports)") + config_json: Optional[Dict[str, Any]] = Field(None, description="Container MCP configuration JSON override") + + @field_validator("name", "description", "version", "mcp_server", mode="before") + @classmethod + def _strip_publish_optional_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class CommunityUpdateRequest(BaseModel): + """Request model for updating community MCP service""" + community_id: int = Field(..., gt=0, description="Community record ID") + name: Optional[str] = Field(default=None, min_length=1, description="New MCP service name") + description: Optional[str] = Field(None, description="MCP service description") + tags: List[str] = Field(default_factory=list, description="MCP tags") + version: Optional[str] = Field(None, description="MCP version") + registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON") + config_json: Optional[Dict[str, Any]] = Field( + None, + description="Container MCP configuration JSON (omit to leave unchanged)", + ) + + @field_validator("name", "description", "version", mode="before") + @classmethod + def _strip_text(cls, value: Any): + if isinstance(value, str): + stripped = value.strip() + return stripped or None + return value + + +class DeleteMcpServiceRequest(BaseModel): + """Request model for deleting an MCP service""" + mcp_id: int = Field(..., gt=0, description="MCP record ID to delete") diff --git a/backend/consts/oauth_providers.py b/backend/consts/oauth_providers.py index 2dd01f0d6..7429855b6 100644 --- a/backend/consts/oauth_providers.py +++ b/backend/consts/oauth_providers.py @@ -47,6 +47,32 @@ client_secret_env="GDE_OAUTH_CLIENT_SECRET", ) +LINK_APP_PROVIDER = OAuthProviderDefinition( + name="link_app", + display_name="Link App", + icon="link_app", + authorize_url=f"{os.getenv('LINK_APP_URL')}/CNS/oauth2/authorize", + authorize_params={"response_type": "code", "scope": "read write"}, + token_url=f"{os.getenv('LINK_APP_URL')}/CNS/oauth2/token", + token_params_map={ + "client_id": "client_id", + "client_secret": "client_secret", + "code": "code", + "grant_type": "grant_type", + "redirect_uri": "redirect_uri", + }, + token_error_key="error", + token_error_message_key="error_description", + userinfo_url=f"{os.getenv('LINK_APP_URL')}/CNS/getUserInfo", + userinfo_field_map={ + "id": "data.id", + "email": "data.email", + "username": "data.username", + }, + client_id_env="LINK_APP_OAUTH_CLIENT_ID", + client_secret_env="LINK_APP_OAUTH_CLIENT_SECRET", +) + WECHAT_PROVIDER = OAuthProviderDefinition( name="wechat", display_name="WeChat", @@ -89,6 +115,7 @@ "github": GITHUB_PROVIDER, "wechat": WECHAT_PROVIDER, "gde": GDE_PROVIDER, + "link_app": LINK_APP_PROVIDER, } diff --git a/backend/consts/prompt_template.py b/backend/consts/prompt_template.py new file mode 100644 index 000000000..febcaeca5 --- /dev/null +++ b/backend/consts/prompt_template.py @@ -0,0 +1,15 @@ +PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP = { + "duty_system_prompt": "DUTY_SYSTEM_PROMPT", + "constraint_system_prompt": "CONSTRAINT_SYSTEM_PROMPT", + "few_shots_system_prompt": "FEW_SHOTS_SYSTEM_PROMPT", + "agent_variable_name_system_prompt": "AGENT_VARIABLE_NAME_SYSTEM_PROMPT", + "agent_display_name_system_prompt": "AGENT_DISPLAY_NAME_SYSTEM_PROMPT", + "agent_description_system_prompt": "AGENT_DESCRIPTION_SYSTEM_PROMPT", + "user_prompt": "USER_PROMPT", + "agent_name_regenerate_system_prompt": "AGENT_NAME_REGENERATE_SYSTEM_PROMPT", + "agent_name_regenerate_user_prompt": "AGENT_NAME_REGENERATE_USER_PROMPT", + "agent_display_name_regenerate_system_prompt": "AGENT_DISPLAY_NAME_REGENERATE_SYSTEM_PROMPT", + "agent_display_name_regenerate_user_prompt": "AGENT_DISPLAY_NAME_REGENERATE_USER_PROMPT", +} + +PROMPT_GENERATE_TEMPLATE_FIELDS = tuple(PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP.keys()) diff --git a/backend/data_process/ray_actors.py b/backend/data_process/ray_actors.py index 0dea828ce..c3879c007 100644 --- a/backend/data_process/ray_actors.py +++ b/backend/data_process/ray_actors.py @@ -1,3 +1,4 @@ +from io import BytesIO import logging import json import time @@ -5,8 +6,15 @@ import ray -from consts.const import RAY_ACTOR_NUM_CPUS, REDIS_BACKEND_URL, DEFAULT_EXPECTED_CHUNK_SIZE, DEFAULT_MAXIMUM_CHUNK_SIZE -from database.attachment_db import get_file_stream +from consts.const import ( + RAY_ACTOR_NUM_CPUS, + REDIS_BACKEND_URL, + DEFAULT_EXPECTED_CHUNK_SIZE, + DEFAULT_MAXIMUM_CHUNK_SIZE, + TABLE_TRANSFORMER_MODEL_PATH, + UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH, +) +from database.attachment_db import build_s3_url, get_file_stream, upload_fileobj from database.model_management_db import get_model_by_model_id from nexent.data_process import DataProcessCore @@ -43,35 +51,16 @@ def _prepare_process_params( Normalize task/model-related processing params. """ process_params = dict(params) + self._apply_model_paths(process_params) if task_id: process_params["task_id"] = task_id - if not (model_id and tenant_id): - return process_params - - try: - model_record = get_model_by_model_id( - model_id=model_id, tenant_id=tenant_id) - if not model_record: - logger.warning( - f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes") - return process_params - - expected_chunk_size = model_record.get( - "expected_chunk_size", DEFAULT_EXPECTED_CHUNK_SIZE) - maximum_chunk_size = model_record.get( - "maximum_chunk_size", DEFAULT_MAXIMUM_CHUNK_SIZE) - model_name = model_record.get("display_name") - - process_params["max_characters"] = maximum_chunk_size - process_params["new_after_n_chars"] = expected_chunk_size - - logger.info( - f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): " - f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}") - except Exception as e: - logger.warning( - f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes") + # Reuse shared model param logic so we also keep extra fields + self._apply_model_chunk_sizes( + model_id=model_id, + tenant_id=tenant_id, + params=process_params, + ) return process_params def _run_file_process( @@ -82,24 +71,19 @@ def _run_file_process( process_params: Dict[str, Any], log_subject: str, ) -> List[Dict[str, Any]]: - chunks = self._processor.file_process( + result = self._processor.file_process( file_data=file_data, filename=filename, chunking_strategy=chunking_strategy, **process_params ) - - if chunks is None: - logger.warning( - f"[RayActor] file_process returned None for {log_subject}='{filename}'") - return [] - if not isinstance(chunks, list): - logger.error( - f"[RayActor] file_process returned non-list type {type(chunks)} for {log_subject}='{filename}'") - return [] - if len(chunks) == 0: - logger.warning( - f"[RayActor] file_process returned empty list for {log_subject}='{filename}'") + + chunks, images_info = self._normalize_processor_result(result) + if images_info: + self._append_image_chunks( + source=filename, chunks=chunks, images_info=images_info) + chunks = self._validate_chunks(chunks, filename) + if not chunks: return [] logger.info( @@ -161,8 +145,129 @@ def process_file( chunking_strategy=chunking_strategy, process_params=process_params, log_subject="source", - ) + ) + + def _apply_model_paths(self, params: Dict[str, Any]) -> None: + params["table_transformer_model_path"] = TABLE_TRANSFORMER_MODEL_PATH + params[ + "unstructured_default_model_initialize_params_json_path" + ] = UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH + + def _apply_model_chunk_sizes( + self, + model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], + ) -> None: + if not (model_id and tenant_id): + return + + try: + model_record = get_model_by_model_id( + model_id=model_id, tenant_id=tenant_id) + if not model_record: + logger.warning( + f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes") + return + + expected_chunk_size = model_record.get( + 'expected_chunk_size', DEFAULT_EXPECTED_CHUNK_SIZE) + maximum_chunk_size = model_record.get( + 'maximum_chunk_size', DEFAULT_MAXIMUM_CHUNK_SIZE) + model_name = model_record.get('display_name') + model_type = model_record.get('model_type') + + params['max_characters'] = maximum_chunk_size + params['new_after_n_chars'] = expected_chunk_size + if model_type: + params['model_type'] = model_type + + logger.info( + f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): " + f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}") + except Exception as e: + logger.warning( + f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes") + + def _read_file_bytes(self, source: str) -> bytes: + try: + file_stream = get_file_stream(source) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {source}") + return file_stream.read() + except Exception as e: + logger.error(f"Failed to fetch file from {source}: {e}") + raise + def _normalize_processor_result( + self, result: Any + ) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + if isinstance(result, tuple) and len(result) == 2: + chunks, images_info = result + return chunks or [], images_info or [] + return result or [], [] + + def _append_image_chunks( + self, + source: str, + chunks: List[Dict[str, Any]], + images_info: List[Dict[str, Any]], + ) -> None: + folder = "images_in_attachments" + for index, image_data in enumerate(images_info): + if not isinstance(image_data, dict): + logger.warning( + f"[RayActor] Skipping image entry at index {index}: unexpected type {type(image_data)}" + ) + continue + if "image_bytes" not in image_data: + logger.warning( + f"[RayActor] Skipping image entry at index {index}: missing image_bytes" + ) + continue + + img_obj = BytesIO(image_data["image_bytes"]) + result = upload_fileobj( + file_obj=img_obj, + file_name=f"{index}.{image_data['image_format']}", + prefix=folder) + image_url = build_s3_url(result.get("object_name", "")) + + image_data["source_file"] = source + image_data["image_url"] = image_url + + chunks.append({ + "content": json.dumps({ + "source_file": source, + "position": image_data["position"], + "image_url": image_url, + }), + "filename": source, + "metadata": { + "chunk_index": len(chunks) + index, + "process_source": "UniversalImageExtractor", + "image_url": image_url, + } + }) + + def _validate_chunks( + self, chunks: Any, source: str + ) -> List[Dict[str, Any]]: + if chunks is None: + logger.warning( + f"[RayActor] file_process returned None for source='{source}'") + return [] + if not isinstance(chunks, list): + logger.error( + f"[RayActor] file_process returned non-list type {type(chunks)} for source='{source}'") + return [] + if len(chunks) == 0: + logger.warning( + f"[RayActor] file_process returned empty list for source='{source}'") + return [] + return chunks + def process_bytes( self, file_bytes: bytes, diff --git a/backend/database/agent_db.py b/backend/database/agent_db.py index 7d14d7b8e..82696ffab 100644 --- a/backend/database/agent_db.py +++ b/backend/database/agent_db.py @@ -1,9 +1,10 @@ import logging from typing import List -from sqlalchemy import update +from sqlalchemy import or_, update from database.client import get_db_session, as_dict, filter_property from database.db_models import AgentInfo, ToolInstance, AgentRelation +from consts.const import ASSET_OWNER_TENANT_ID from utils.str_utils import convert_list_to_string logger = logging.getLogger("agent_db") @@ -22,9 +23,12 @@ def search_agent_info_by_agent_id(agent_id: int, tenant_id: str, version_no: int with get_db_session() as session: agent = session.query(AgentInfo).filter( AgentInfo.agent_id == agent_id, - AgentInfo.tenant_id == tenant_id, AgentInfo.version_no == version_no, - AgentInfo.delete_flag != 'Y' + or_( + AgentInfo.tenant_id == tenant_id, + AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID, + ), + AgentInfo.delete_flag != 'Y', ).first() if not agent: @@ -158,7 +162,7 @@ def create_agent(agent_info, tenant_id: str, user_id: str): :return: Created agent object """ info_with_metadata = dict(agent_info) - info_with_metadata.setdefault("max_steps", 5) + info_with_metadata.setdefault("max_steps", 15) info_with_metadata.update({ "tenant_id": tenant_id, "version_no": 0, # Default to draft version @@ -192,6 +196,8 @@ def create_agent(agent_info, tenant_id: str, user_id: str): "business_description": new_agent.business_description, "business_logic_model_id": new_agent.business_logic_model_id, "business_logic_model_name": new_agent.business_logic_model_name, + "prompt_template_id": new_agent.prompt_template_id, + "prompt_template_name": new_agent.prompt_template_name, "group_ids": new_agent.group_ids, "is_new": new_agent.is_new, "enable_context_manager": new_agent.enable_context_manager, diff --git a/backend/database/agent_version_db.py b/backend/database/agent_version_db.py index aea8c06dc..c895cb249 100644 --- a/backend/database/agent_version_db.py +++ b/backend/database/agent_version_db.py @@ -1,9 +1,10 @@ import logging from typing import List, Optional, Tuple -from sqlalchemy import select, insert, update, delete, func +from sqlalchemy import or_, select, insert, update, delete, func from database.client import get_db_session, as_dict from database.db_models import AgentInfo, ToolInstance, AgentRelation, AgentVersion, SkillInstance +from consts.const import ASSET_OWNER_TENANT_ID logger = logging.getLogger("agent_version_db") @@ -28,7 +29,6 @@ def search_version_by_version_no( with get_db_session() as session: version = session.query(AgentVersion).filter( AgentVersion.agent_id == agent_id, - AgentVersion.tenant_id == tenant_id, AgentVersion.version_no == version_no, AgentVersion.delete_flag == 'N', ).first() @@ -77,7 +77,10 @@ def query_current_version_no( with get_db_session() as session: agent = session.query(AgentInfo).filter( AgentInfo.agent_id == agent_id, - AgentInfo.tenant_id == tenant_id, + or_( + AgentInfo.tenant_id == tenant_id, + AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID, + ), AgentInfo.version_no == 0, AgentInfo.delete_flag == 'N', ).first() @@ -96,11 +99,17 @@ def query_agent_snapshot( # Query agent info snapshot agent = session.query(AgentInfo).filter( AgentInfo.agent_id == agent_id, - AgentInfo.tenant_id == tenant_id, + or_( + AgentInfo.tenant_id == tenant_id, + AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID, + ), AgentInfo.version_no == version_no, AgentInfo.delete_flag == 'N', ).first() + if agent is not None: + tenant_id = agent.tenant_id + # Query tool instances snapshot tools = session.query(ToolInstance).filter( ToolInstance.agent_id == agent_id, diff --git a/backend/database/attachment_db.py b/backend/database/attachment_db.py index 187381cd2..06b84e5ac 100644 --- a/backend/database/attachment_db.py +++ b/backend/database/attachment_db.py @@ -2,13 +2,66 @@ import os import uuid from datetime import datetime -from typing import Any, BinaryIO, Dict, List, Optional +from typing import Any, BinaryIO, Dict, List, Optional, Tuple from .client import minio_client +from consts.const import S3_URL_PREFIX from consts.const import NORTHBOUND_EXTERNAL_URL from urllib.parse import quote +def _normalize_object_and_bucket(object_name: str, bucket: Optional[str] = None) -> Tuple[str, Optional[str]]: + """ + Normalize object_name + bucket from supported URL styles. + + Supports: + - s3://bucket/key + - /bucket/key + - key (uses provided bucket or default bucket) + """ + if not object_name: + return object_name, bucket + + if object_name.startswith(S3_URL_PREFIX): + s3_path = object_name[len(S3_URL_PREFIX) :] + parts = s3_path.split("/", 1) + parsed_bucket = parts[0] if parts[0] else None + parsed_key = parts[1] if len(parts) > 1 else "" + return parsed_key, parsed_bucket or bucket + + if object_name.startswith("/"): + path = object_name.lstrip("/") + parts = path.split("/", 1) + parsed_bucket = parts[0] if parts[0] else None + parsed_key = parts[1] if len(parts) > 1 else "" + return parsed_key, parsed_bucket or bucket + + return object_name, bucket + + +def build_s3_url(object_name: str, bucket: Optional[str] = None) -> str: + """ + Build an s3://bucket/key style URL from an object name (or passthrough if already s3://). + """ + if not object_name: + return "" + + if object_name.startswith(S3_URL_PREFIX): + return object_name + + if object_name.startswith("/"): + path = object_name.lstrip("/") + parts = path.split("/", 1) + if len(parts) == 2: + return f"{S3_URL_PREFIX}{parts[0]}/{parts[1]}" + return f"{S3_URL_PREFIX}{parts[0]}/" + + resolved_bucket = bucket or minio_client.default_bucket + if resolved_bucket: + return f"{S3_URL_PREFIX}{resolved_bucket}/{object_name}" + return f"{S3_URL_PREFIX}{object_name}" + + def _build_mcp_presigned_url(presigned_url: str) -> str: """ Build northbound API proxy URL for MCP tools. @@ -217,6 +270,7 @@ def get_file_size_from_minio(object_name: str, bucket: Optional[str] = None) -> """ Get file size by object name """ + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) # Ensure minio_client is initialized before accessing storage_config minio_client._ensure_initialized() bucket = bucket or minio_client.storage_config.default_bucket @@ -235,6 +289,7 @@ def file_exists(object_name: str, bucket: Optional[str] = None) -> bool: bool: True if file exists, False otherwise """ try: + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) return minio_client.file_exists(object_name, bucket) except Exception: return False @@ -252,6 +307,8 @@ def copy_file(source_object: str, dest_object: str, bucket: Optional[str] = None Returns: Dict[str, Any]: Result containing success flag and error message (if any) """ + source_object, bucket = _normalize_object_and_bucket(source_object, bucket) + dest_object, bucket = _normalize_object_and_bucket(dest_object, bucket) success, result = minio_client.copy_file(source_object, dest_object, bucket) if success: return {"success": True, "object_name": result} @@ -296,6 +353,7 @@ def delete_file(object_name: str, bucket: Optional[str] = None) -> Dict[str, Any Returns: Dict[str, Any]: Delete result, containing success flag and error message (if any) """ + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) if not bucket: minio_client._ensure_initialized() bucket = minio_client.storage_config.default_bucket @@ -320,6 +378,7 @@ def get_file_stream(object_name: str, bucket: Optional[str] = None) -> Optional[ Returns: Optional[BinaryIO]: Standard BinaryIO stream object, or None if failed """ + object_name, bucket = _normalize_object_and_bucket(object_name, bucket) success, result = minio_client.get_file_stream(object_name, bucket) if not success: return None diff --git a/backend/database/client.py b/backend/database/client.py index 05f8940b9..e095c5636 100644 --- a/backend/database/client.py +++ b/backend/database/client.py @@ -89,6 +89,9 @@ def __init__(self): if MinioClient._initialized: return MinioClient._initialized = True + # Explicitly initialize attributes so external callers never hit missing-attribute errors. + self._storage_client = None + self.storage_config = None def _ensure_initialized(self): """Lazily initialize the storage client on first use.""" @@ -108,6 +111,23 @@ def _ensure_initialized(self): return True return False + @property + def default_bucket(self) -> Optional[str]: + """ + Resolve default bucket safely for callers that need bucket info. + Falls back to configured constant when lazy init has not run yet. + """ + try: + self._ensure_initialized() + except Exception: + # Keep this accessor resilient; operational methods can still raise + # detailed storage errors when invoked. + pass + + if getattr(self, "storage_config", None) is not None: + return self.storage_config.default_bucket + return MINIO_DEFAULT_BUCKET + def upload_file( self, file_path: str, diff --git a/backend/database/community_mcp_db.py b/backend/database/community_mcp_db.py new file mode 100644 index 000000000..92b78a4ed --- /dev/null +++ b/backend/database/community_mcp_db.py @@ -0,0 +1,181 @@ +import logging +from typing import Any, Dict, List + +from sqlalchemy import func, or_ + +from database.client import as_dict, filter_property, get_db_session +from database.db_models import McpCommunityRecord + +logger = logging.getLogger("community_mcp_db") + + +def get_mcp_community_records( + *, + search: str | None = None, + tag: str | None = None, + transport_type: str | None = None, + cursor: str | None = None, + limit: int = 30, +) -> Dict[str, Any]: + with get_db_session() as session: + query = session.query(McpCommunityRecord).filter( + McpCommunityRecord.delete_flag != "Y" + ) + + if transport_type: + query = query.filter(McpCommunityRecord.transport_type == transport_type) + + if tag: + query = query.filter(McpCommunityRecord.tags.any(tag)) + + if search: + keyword = f"%{search}%" + query = query.filter( + or_( + McpCommunityRecord.mcp_name.ilike(keyword), + McpCommunityRecord.description.ilike(keyword), + func.array_to_string(McpCommunityRecord.tags, ",").ilike(keyword), + ) + ) + + cursor_id: int | None = None + if cursor: + try: + cursor_id = int(cursor) + except ValueError: + cursor_id = None + + if cursor_id is not None: + query = query.filter(McpCommunityRecord.community_id < cursor_id) + + rows: List[McpCommunityRecord] = ( + query.order_by(McpCommunityRecord.community_id.desc()) + .limit(limit + 1) + .all() + ) + + has_next = len(rows) > limit + page_rows = rows[:limit] + + next_cursor = None + if has_next and page_rows: + next_cursor = str(page_rows[-1].community_id) + + return { + "count": len(page_rows), + "nextCursor": next_cursor, + "items": [as_dict(row) for row in page_rows], + } + + +def get_mcp_community_tag_stats() -> List[Dict[str, Any]]: + with get_db_session() as session: + rows = ( + session.query( + func.unnest(McpCommunityRecord.tags).label("tag"), + func.count(McpCommunityRecord.community_id).label("count"), + ) + .filter( + McpCommunityRecord.delete_flag != "Y", + ) + .group_by("tag") + .order_by(func.count(McpCommunityRecord.community_id).desc(), "tag") + .all() + ) + return [{"tag": str(row.tag), "count": int(row.count)} for row in rows if row.tag] + + +def create_mcp_community_record(mcp_data: Dict[str, Any], tenant_id: str, user_id: str) -> int: + with get_db_session() as session: + mcp_data.update({ + "tenant_id": tenant_id, + "user_id": user_id, + "created_by": user_id, + "updated_by": user_id, + "delete_flag": "N", + "source": "community", + }) + new_record = McpCommunityRecord(**filter_property(mcp_data, McpCommunityRecord)) + session.add(new_record) + session.flush() + return int(new_record.community_id) + + +def get_mcp_community_record_by_id_and_tenant(community_id: int, tenant_id: str) -> Dict[str, Any] | None: + with get_db_session() as session: + record = session.query(McpCommunityRecord).filter( + McpCommunityRecord.community_id == community_id, + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).first() + return as_dict(record) if record else None + + +def update_mcp_community_record_by_id( + *, + community_id: int, + tenant_id: str, + user_id: str, + name: str | None = None, + description: str | None = None, + tags: List[str] | None = None, + version: str | None = None, + registry_json: Dict[str, Any] | None = None, + config_json: Dict[str, Any] | None = None, +) -> None: + update_fields: Dict[str, Any] = {"updated_by": user_id} + + if name is not None: + update_fields["mcp_name"] = name + if description is not None: + update_fields["description"] = description + if tags is not None: + update_fields["tags"] = tags + if version is not None: + update_fields["version"] = version + if registry_json is not None: + update_fields["registry_json"] = registry_json + if config_json is not None: + update_fields["config_json"] = config_json + + with get_db_session() as session: + session.query(McpCommunityRecord).filter( + McpCommunityRecord.community_id == community_id, + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).update(update_fields) + + +def delete_mcp_community_record_by_id(*, community_id: int, tenant_id: str, user_id: str) -> None: + with get_db_session() as session: + session.query(McpCommunityRecord).filter( + McpCommunityRecord.community_id == community_id, + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).update({"delete_flag": "Y", "updated_by": user_id}) + + +def list_mcp_community_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: + with get_db_session() as session: + rows = session.query(McpCommunityRecord).filter( + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ).order_by(McpCommunityRecord.community_id.desc()).all() + return [as_dict(row) for row in rows] + +def get_mcp_community_tag_stats_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: + with get_db_session() as session: + rows = ( + session.query( + func.unnest(McpCommunityRecord.tags).label("tag"), + func.count(McpCommunityRecord.community_id).label("count"), + ) + .filter( + McpCommunityRecord.tenant_id == tenant_id, + McpCommunityRecord.delete_flag != "Y", + ) + .group_by("tag") + .order_by(func.count(McpCommunityRecord.community_id).desc(), "tag") + .all() + ) + return [{"tag": str(row.tag), "count": int(row.count)} for row in rows if row.tag] diff --git a/backend/database/db_models.py b/backend/database/db_models.py index baa8e903e..b779266c9 100644 --- a/backend/database/db_models.py +++ b/backend/database/db_models.py @@ -1,5 +1,5 @@ -from sqlalchemy import BigInteger, Boolean, Column, Integer, JSON, Numeric, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float -from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy import BigInteger, Boolean, Column, Integer, JSON, Numeric, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float, text +from sqlalchemy.dialects.postgresql import ARRAY, JSONB from sqlalchemy.orm import DeclarativeBase from sqlalchemy.sql import func @@ -182,6 +182,10 @@ class ModelRecord(TableBase): String(100), doc="Application ID for model authentication (used by some STT/TTS providers like Volcano Engine)") access_token = Column( String(100), doc="Access token for model authentication (used by some STT/TTS providers like Volcano Engine)") + timeout_seconds = Column( + Integer, doc="Request timeout in seconds for this model. Default is 120 seconds.") + concurrency_limit = Column( + Integer, doc="Maximum concurrent requests for this model. Default is null (unlimited).") class ModelMonitoringRecord(SimpleTableBase): @@ -313,6 +317,8 @@ class AgentInfo(TableBase): Text, doc="Manually entered by the user to describe the entire business process") business_logic_model_name = Column(String(100), doc="Model name used for business logic prompt generation") business_logic_model_id = Column(Integer, doc="Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id") + prompt_template_id = Column(Integer, doc="Prompt template ID used for business logic prompt generation") + prompt_template_name = Column(String(100), doc="Prompt template name used for business logic prompt generation") group_ids = Column(String, doc="Agent group IDs list") is_new = Column(Boolean, default=False, doc="Whether this agent is marked as new for the user") current_version_no = Column(Integer, nullable=True, doc="Current published version number. NULL means no version published yet") @@ -320,6 +326,41 @@ class AgentInfo(TableBase): enable_context_manager = Column(Boolean, default=False, doc="Whether to enable context management (compression) for this agent") +class PromptTemplate(TableBase): + """ + Prompt template table for user-defined prompt generation templates. + """ + __tablename__ = "ag_prompt_template_t" + __table_args__ = ( + Index( + "uq_prompt_template_user_name_active", + "tenant_id", + "user_id", + "template_name", + unique=True, + postgresql_where=text("delete_flag = 'N'"), + ), + Index( + "idx_ag_prompt_template_t_user", + "tenant_id", + "user_id", + "template_type", + postgresql_where=text("delete_flag = 'N'"), + ), + {"schema": SCHEMA}, + ) + + template_id = Column(Integer, Sequence( + "ag_prompt_template_t_template_id_seq", schema=SCHEMA), primary_key=True, nullable=False, autoincrement=True, doc="Prompt template ID") + template_name = Column(String(100), nullable=False, doc="Prompt template name") + description = Column(String(500), doc="Prompt template description") + template_type = Column(String(50), nullable=False, default="agent_generate", doc="Prompt template type") + tenant_id = Column(String(100), nullable=False, doc="Tenant ID") + user_id = Column(String(100), nullable=False, doc="User ID") + template_content_zh = Column(JSONB, nullable=False, doc="Chinese prompt template content") + template_content_en = Column(JSONB, doc="English prompt template content") + + class ToolInstance(TableBase): """ Information table for tenant tool configuration. @@ -426,12 +467,52 @@ class McpRecord(TableBase): String(200), doc="Docker container ID for MCP service, None for non-containerized MCP", ) + container_port = Column( + Integer, + doc="Host port bound for containerized MCP service", + ) authorization_token = Column( String(500), doc="Authorization token for MCP server authentication (e.g., Bearer token)", default=None, ) + custom_headers = Column( + JSON, + doc="Custom HTTP headers as JSON object for MCP server requests", + default=None, + ) + source = Column(String(30), doc="Source type: local/mcp_registry/community") + registry_json = Column(JSONB, doc="Full MCP registry server.json snapshot") + config_json = Column(JSON, doc="MCP config data") + enabled = Column(Boolean, default=True, doc="Enabled") + tags = Column(ARRAY(Text), doc="Tags") + description = Column(Text, doc="Description") + + +class McpCommunityRecord(TableBase): + """Community MCP market records table.""" + + __tablename__ = "mcp_community_record_t" + __table_args__ = {"schema": SCHEMA} + community_id = Column( + Integer, + Sequence("mcp_community_record_t_community_id_seq", schema=SCHEMA), + primary_key=True, + nullable=False, + doc="Community record ID, unique primary key", + ) + tenant_id = Column(String(100), doc="Publisher tenant ID") + user_id = Column(String(100), doc="Publisher user ID") + mcp_name = Column(String(100), doc="MCP name") + mcp_server = Column(String(500), doc="MCP server URL") + source = Column(String(30), doc="Source type, fixed to community") + version = Column(String(50), doc="MCP version") + registry_json = Column(JSONB, doc="Full MCP metadata JSON") + transport_type = Column(String(30), doc="Transport type: http/sse/container") + config_json = Column(JSON, doc="Public-shareable MCP configuration JSON") + tags = Column(ARRAY(Text), doc="Tags") + description = Column(Text, doc="Description") class UserTenant(TableBase): """ @@ -628,7 +709,7 @@ class UserOAuthAccount(TableBase): ) user_id = Column(String(100), nullable=False, doc="Supabase user UUID") provider = Column( - String(30), nullable=False, doc="OAuth provider name: github, wechat" + String(30), nullable=False, doc="OAuth provider name: github, wechat, gde, link_app" ) provider_user_id = Column( String(200), nullable=False, doc="User ID from the OAuth provider" @@ -648,10 +729,12 @@ class SkillInfo(TableBase): skill_id = Column(Integer, Sequence("ag_skill_info_t_skill_id_seq", schema=SCHEMA), primary_key=True, nullable=False, autoincrement=True, doc="Skill ID") skill_name = Column(String(100), nullable=False, unique=True, doc="Unique skill name") + tenant_id = Column(String(100), nullable=True, doc="Tenant ID for multi-tenancy. NULL for pre-existing skills.") skill_description = Column(String(1000), doc="Skill description") skill_tags = Column(JSON, doc="Skill tags as JSON array") skill_content = Column(Text, doc="Skill content in markdown format") - params = Column(JSON, doc="Skill configuration parameters as JSON object") + config_schemas = Column(JSON, doc="Parameter metadata from config/schema.yaml") + config_values = Column(JSON, doc="Runtime parameter values from config/config.yaml") source = Column(String(30), nullable=False, default="official", doc="Skill source: official, custom, etc.") @@ -691,6 +774,8 @@ class SkillInstance(TableBase): tenant_id = Column(String(100), doc="Tenant ID") enabled = Column(Boolean, default=True, doc="Whether this skill is enabled for the agent") version_no = Column(Integer, default=0, primary_key=True, nullable=False, doc="Version number. 0 = draft/editing state, >=1 = published snapshot") + config_values = Column(JSON, doc="Per-agent runtime parameter values (mirrors ag_tool_instance_t.params)") + config_schemas = Column(JSON, doc="Per-agent parameter schema overrides from config/schema.yaml") class OuterApiService(TableBase): diff --git a/backend/database/invitation_db.py b/backend/database/invitation_db.py index f7e27d005..32523cd06 100644 --- a/backend/database/invitation_db.py +++ b/backend/database/invitation_db.py @@ -300,8 +300,8 @@ def query_invitations_with_pagination( TenantInvitationCode.delete_flag == "N" ) - # Apply tenant filter if provided - if tenant_id: + # Apply tenant filter when tenant_id is specified (including ASSET_OWNER virtual tenant) + if tenant_id is not None: query = query.filter(TenantInvitationCode.tenant_id == tenant_id) # Apply sorting diff --git a/backend/database/knowledge_db.py b/backend/database/knowledge_db.py index 8674bb4fb..9a8b1c8c1 100644 --- a/backend/database/knowledge_db.py +++ b/backend/database/knowledge_db.py @@ -183,7 +183,7 @@ def update_knowledge_record(query: Dict[str, Any]) -> bool: # Update group IDs if query.get("group_ids") is not None: record.group_ids = query["group_ids"] - + # Update timestamp and user if query.get("user_id"): record.updated_by = query["user_id"] @@ -259,7 +259,7 @@ def get_knowledge_record(query: Optional[Dict[str, Any]] = None) -> Dict[str, An if 'tenant_id' in query and query['tenant_id'] is not None: db_query = db_query.filter( KnowledgeRecord.tenant_id == query['tenant_id']) - + result = db_query.first() if result: @@ -404,14 +404,25 @@ def get_index_name_by_knowledge_name(knowledge_name: str, tenant_id: str) -> str """ try: with get_db_session() as session: + # First try resolving by user-facing knowledge_name. result = session.query(KnowledgeRecord).filter( KnowledgeRecord.knowledge_name == knowledge_name, KnowledgeRecord.tenant_id == tenant_id, KnowledgeRecord.delete_flag != 'Y' ).first() - if result: return result.index_name + + # Backward/forward compatibility: if caller already passes internal index_name, + # accept it directly by resolving on index_name as well. + index_result = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == knowledge_name, + KnowledgeRecord.tenant_id == tenant_id, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if index_result: + return index_result.index_name + raise ValueError( f"Knowledge base '{knowledge_name}' not found for the current tenant" ) diff --git a/backend/database/model_management_db.py b/backend/database/model_management_db.py index cb1c6c69f..1a1a98c8b 100644 --- a/backend/database/model_management_db.py +++ b/backend/database/model_management_db.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Dict, List, Optional from sqlalchemy import and_, desc, func, insert, select, update @@ -7,6 +8,8 @@ from .db_models import ModelRecord from .utils import add_creation_tracking, add_update_tracking +logger = logging.getLogger("database.model_management_db") + def create_model_record(model_data: Dict[str, Any], user_id: str, tenant_id: str) -> bool: """ @@ -170,7 +173,7 @@ def get_model_records(filters: Optional[Dict[str, Any]], tenant_id: str) -> List return result_list -def get_model_by_display_name(display_name: str, tenant_id: str) -> Optional[Dict[str, Any]]: +def get_model_by_display_name(display_name: str, tenant_id: str, model_type: str = None) -> Optional[Dict[str, Any]]: """ Get a model record by display name @@ -179,6 +182,11 @@ def get_model_by_display_name(display_name: str, tenant_id: str) -> Optional[Dic tenant_id: """ filters = {'display_name': display_name} + + if model_type in ["multiEmbedding", "multi_embedding"]: + filters['model_type'] = "multi_embedding" + elif model_type == "embedding": + filters['model_type'] = "embedding" records = get_model_records(filters, tenant_id) if not records: @@ -203,7 +211,7 @@ def get_models_by_display_name(display_name: str, tenant_id: str) -> List[Dict[s return get_model_records(filters, tenant_id) -def get_model_id_by_display_name(display_name: str, tenant_id: str) -> Optional[int]: +def get_model_id_by_display_name(display_name: str, tenant_id: str, model_type: str = None) -> Optional[int]: """ Get a model ID by display name @@ -214,7 +222,7 @@ def get_model_id_by_display_name(display_name: str, tenant_id: str) -> Optional[ Returns: Optional[int]: Model ID """ - model = get_model_by_display_name(display_name, tenant_id) + model = get_model_by_display_name(display_name, tenant_id, model_type) return model["model_id"] if model else None diff --git a/backend/database/prompt_template_db.py b/backend/database/prompt_template_db.py new file mode 100644 index 000000000..fbc286cf9 --- /dev/null +++ b/backend/database/prompt_template_db.py @@ -0,0 +1,165 @@ +import logging +from typing import Optional + +from sqlalchemy import select, update + +from database.client import as_dict, filter_property, get_db_session +from database.db_models import PromptTemplate + +logger = logging.getLogger("prompt_template_db") + + +def create_prompt_template(template_data: dict) -> dict: + """Create a prompt template.""" + with get_db_session() as session: + prompt_template = PromptTemplate( + **filter_property(template_data, PromptTemplate) + ) + prompt_template.delete_flag = "N" + session.add(prompt_template) + session.flush() + return as_dict(prompt_template) + + +def upsert_prompt_template_by_id(template_id: int, template_data: dict, user_id: str) -> dict: + """Create or update a prompt template with a fixed template ID.""" + with get_db_session() as session: + prompt_template = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + ).first() + + filtered_data = filter_property(template_data, PromptTemplate) + if prompt_template: + for key, value in filtered_data.items(): + setattr(prompt_template, key, value) + prompt_template.updated_by = user_id + else: + prompt_template = PromptTemplate(**filtered_data) + prompt_template.template_id = template_id + prompt_template.delete_flag = filtered_data.get("delete_flag", "N") + session.add(prompt_template) + + session.flush() + return as_dict(prompt_template) + + +def update_prompt_template(template_id: int, template_data: dict, user_id: str) -> dict: + """Update a prompt template.""" + with get_db_session() as session: + prompt_template = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + PromptTemplate.delete_flag == "N", + ).first() + + if not prompt_template: + raise ValueError("prompt template not found") + + for key, value in filter_property(template_data, PromptTemplate).items(): + if value is None: + continue + setattr(prompt_template, key, value) + + prompt_template.updated_by = user_id + session.flush() + return as_dict(prompt_template) + + +def delete_prompt_template(template_id: int, user_id: str) -> int: + """Soft-delete a prompt template.""" + with get_db_session() as session: + result = session.execute( + update(PromptTemplate) + .where( + PromptTemplate.template_id == template_id, + PromptTemplate.delete_flag == "N", + ) + .values(delete_flag="Y", updated_by=user_id) + ) + return result.rowcount + + +def query_prompt_templates_by_user( + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> list[dict]: + """Query prompt templates by tenant and user.""" + with get_db_session() as session: + templates = session.query(PromptTemplate).filter( + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ).order_by(PromptTemplate.update_time.desc(), PromptTemplate.template_id.desc()).all() + return [as_dict(template) for template in templates] + + +def get_prompt_template_by_id( + template_id: int, + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> Optional[dict]: + """Get a prompt template by ID.""" + with get_db_session() as session: + template = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ).first() + return as_dict(template) if template else None + + +def get_prompt_template_by_name( + template_name: str, + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> Optional[dict]: + """Get a prompt template by name.""" + with get_db_session() as session: + template = session.query(PromptTemplate).filter( + PromptTemplate.template_name == template_name, + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ).first() + return as_dict(template) if template else None + + +def get_prompt_template_by_template_id( + template_id: int, + template_type: str = "agent_generate", + include_deleted: bool = False, +) -> Optional[dict]: + """Get a prompt template by template ID regardless of owner.""" + with get_db_session() as session: + query = session.query(PromptTemplate).filter( + PromptTemplate.template_id == template_id, + PromptTemplate.template_type == template_type, + ) + if not include_deleted: + query = query.filter(PromptTemplate.delete_flag == "N") + template = query.first() + return as_dict(template) if template else None + + +def query_prompt_template_names( + tenant_id: str, + user_id: str, + template_type: str = "agent_generate", +) -> set[str]: + """Query all active prompt template names for the current user.""" + with get_db_session() as session: + rows = session.execute( + select(PromptTemplate.template_name).where( + PromptTemplate.tenant_id == tenant_id, + PromptTemplate.user_id == user_id, + PromptTemplate.template_type == template_type, + PromptTemplate.delete_flag == "N", + ) + ).all() + return {row[0] for row in rows if row and row[0]} diff --git a/backend/database/remote_mcp_db.py b/backend/database/remote_mcp_db.py index d535f9fba..b08769437 100644 --- a/backend/database/remote_mcp_db.py +++ b/backend/database/remote_mcp_db.py @@ -15,16 +15,31 @@ def create_mcp_record(mcp_data: Dict[str, Any], tenant_id: str, user_id: str): :param tenant_id: Tenant ID :param user_id: User ID :return: Created MCP record + + Note: Only fields defined in the McpRecord model are inserted. + Fields like 'transport_type' and 'version' are not part of McpRecord + and will be ignored. """ + # Filter to only include fields that exist in the model + # McpRecord fields: mcp_id, tenant_id, user_id, mcp_name, mcp_server, status, + # container_id, container_port, authorization_token, source, registry_json, + # config_json, enabled, tags, description, create_time, update_time, created_by, updated_by, delete_flag + allowed_fields = { + 'mcp_name', 'mcp_server', 'status', 'container_id', 'container_port', + 'authorization_token', 'custom_headers', 'source', 'registry_json', 'config_json', + 'enabled', 'tags', 'description' + } + + filtered_data = {k: v for k, v in mcp_data.items() if k in allowed_fields and v is not None} + filtered_data.update({ + "tenant_id": tenant_id, + "user_id": user_id, + "created_by": user_id, + "updated_by": user_id, + "delete_flag": "N" + }) with get_db_session() as session: - mcp_data.update({ - "tenant_id": tenant_id, - "user_id": user_id, - "created_by": user_id, - "updated_by": user_id, - "delete_flag": "N" - }) - new_mcp = McpRecord(**filter_property(mcp_data, McpRecord)) + new_mcp = McpRecord(**filtered_data) session.add(new_mcp) @@ -80,7 +95,7 @@ def update_mcp_status_by_name_and_url(mcp_name: str, mcp_server: str, tenant_id: ).update({"status": status, "updated_by": user_id}) -def get_mcp_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: +def get_mcp_records_by_tenant(tenant_id: str, tag: str | None = None) -> List[Dict[str, Any]]: """ Get all MCP records for a tenant @@ -88,14 +103,139 @@ def get_mcp_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]: :return: List of MCP records """ with get_db_session() as session: - mcp_records = session.query(McpRecord).filter( + query = session.query(McpRecord).filter( McpRecord.tenant_id == tenant_id, McpRecord.delete_flag != 'Y' - ).order_by(McpRecord.create_time.desc()).all() + ) + + if tag: + query = query.filter(McpRecord.tags.any(tag)) + + mcp_records = query.order_by(McpRecord.create_time.desc()).all() return [as_dict(record) for record in mcp_records] +def get_mcp_records_by_container_port(container_port: int) -> List[Dict[str, Any]]: + """ + Get enabled MCP records that already use the given container port. + + The lookup is global. + """ + with get_db_session() as session: + query = session.query(McpRecord).filter( + McpRecord.container_port == container_port, + McpRecord.delete_flag != 'Y' + ) + + records = query.order_by(McpRecord.create_time.desc()).all() + return [as_dict(record) for record in records] + + +def update_mcp_record_manage_fields_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + name: str, + server_url: str, + description: str | None, + tags: List[str] | None, + source: str | None, + authorization_token: str | None, + custom_headers: Dict[str, Any] | None, + config_json: Dict[str, Any] | None, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update( + { + "mcp_name": name, + "mcp_server": server_url, + "description": description, + "tags": tags or [], + "source": source, + "authorization_token": authorization_token, + "custom_headers": custom_headers, + "config_json": config_json, + "updated_by": user_id, + } + ) + + +def update_mcp_record_enabled_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + enabled: bool, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update({"enabled": enabled, "updated_by": user_id}) + + +def update_mcp_record_status_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + status: bool, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update({"status": status, "updated_by": user_id}) + + +def update_mcp_record_container_fields_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, + container_id: str | None, + container_port: int | None, + mcp_server: str, + status: bool | None, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update( + { + "container_id": container_id, + "container_port": container_port, + "mcp_server": mcp_server, + "status": status, + "updated_by": user_id, + } + ) + + +def delete_mcp_record_by_id( + *, + mcp_id: int, + tenant_id: str, + user_id: str, +) -> None: + with get_db_session() as session: + session.query(McpRecord).filter( + McpRecord.mcp_id == mcp_id, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).update({"delete_flag": "Y", "updated_by": user_id}) + + def get_mcp_server_by_name_and_tenant(mcp_name: str, tenant_id: str) -> str: """ Get MCP server address by name and tenant ID @@ -134,6 +274,26 @@ def get_mcp_authorization_token_by_name_and_url(mcp_name: str, mcp_server: str, return mcp_record.authorization_token if mcp_record else None +def get_mcp_custom_headers_by_name_and_url(mcp_name: str, mcp_server: str, tenant_id: str) -> Dict[str, Any] | None: + """ + Get MCP custom headers by name, URL and tenant ID + + :param mcp_name: MCP name + :param mcp_server: MCP server URL + :param tenant_id: Tenant ID + :return: Custom headers dict, None if not found + """ + with get_db_session() as session: + mcp_record = session.query(McpRecord).filter( + McpRecord.mcp_name == mcp_name, + McpRecord.mcp_server == mcp_server, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y' + ).first() + + return mcp_record.custom_headers if mcp_record else None + + def update_mcp_record_by_name_and_url( update_data, tenant_id: str, @@ -161,6 +321,10 @@ def update_mcp_record_by_name_and_url( if hasattr(update_data, 'new_authorization_token'): update_fields["authorization_token"] = update_data.new_authorization_token + # Update custom_headers if provided + if hasattr(update_data, 'custom_headers'): + update_fields["custom_headers"] = update_data.custom_headers + with get_db_session() as session: session.query(McpRecord).filter( McpRecord.mcp_name == update_data.current_service_name, @@ -187,6 +351,26 @@ def check_mcp_name_exists(mcp_name: str, tenant_id: str) -> bool: return mcp_record is not None +def check_enabled_mcp_name_exists(mcp_name: str, tenant_id: str) -> bool: + """ + Check if enabled MCP name already exists for a tenant. + + Only enabled records participate in conflict checks for runtime container startup. + + :param mcp_name: MCP name + :param tenant_id: Tenant ID + :return: True if enabled name exists, False otherwise + """ + with get_db_session() as session: + mcp_record = session.query(McpRecord).filter( + McpRecord.mcp_name == mcp_name, + McpRecord.tenant_id == tenant_id, + McpRecord.delete_flag != 'Y', + McpRecord.enabled.is_(True), + ).first() + return mcp_record is not None + + def get_mcp_record_by_id_and_tenant(mcp_id: int, tenant_id: str) -> Dict[str, Any] | None: """ Get MCP record by ID and tenant ID diff --git a/backend/database/skill_db.py b/backend/database/skill_db.py index 2a718800b..6a3f69069 100644 --- a/backend/database/skill_db.py +++ b/backend/database/skill_db.py @@ -18,8 +18,7 @@ def _params_value_for_db(raw: Any) -> Any: """Strip UI/YAML comment metadata, then JSON round-trip for the DB JSON column.""" if raw is None: return None - stripped = strip_params_comments_for_db(raw) - return json.loads(json.dumps(stripped, default=str)) + return json.loads(json.dumps(strip_params_comments_for_db(raw), default=str)) def create_or_update_skill_by_skill_info(skill_info, tenant_id: str, user_id: str, version_no: int = 0): @@ -155,6 +154,31 @@ def delete_skill_instances_by_skill_id(skill_id: int, user_id: str): }) +def delete_skill_instances_by_tenant(tenant_id: str, user_id: str) -> int: + """Soft delete all skill instances for a tenant. + + This is called when a tenant is deleted to clean up all skill instances. + + Args: + tenant_id: Tenant ID to delete skill instances for + user_id: User ID for the updated_by field + + Returns: + Number of skill instances soft-deleted + """ + with get_db_session() as session: + count = session.query(SkillInstance).filter( + SkillInstance.tenant_id == tenant_id, + SkillInstance.delete_flag != 'Y' + ).update({ + SkillInstance.delete_flag: 'Y', + 'updated_by': user_id + }) + session.commit() + return count + + + # ============== SkillInfo Repository Functions ============== @@ -171,10 +195,12 @@ def _to_dict(skill: SkillInfo) -> Dict[str, Any]: return { "skill_id": skill.skill_id, "name": skill.skill_name, + "tenant_id": skill.tenant_id, "description": skill.skill_description, "tags": skill.skill_tags or [], "content": skill.skill_content or "", - "params": skill.params if skill.params is not None else {}, + "config_schemas": skill.config_schemas, + "config_values": skill.config_values, "source": skill.source, "created_by": skill.created_by, "create_time": skill.create_time.isoformat() if skill.create_time else None, @@ -183,10 +209,15 @@ def _to_dict(skill: SkillInfo) -> Dict[str, Any]: } -def list_skills() -> List[Dict[str, Any]]: - """List all skills from database.""" +def list_skills(tenant_id: str) -> List[Dict[str, Any]]: + """List all skills for a tenant from database. + + Args: + tenant_id: Tenant ID for filtering skills + """ with get_db_session() as session: skills = session.query(SkillInfo).filter( + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).all() results = [] @@ -197,11 +228,37 @@ def list_skills() -> List[Dict[str, Any]]: return results -def get_skill_by_name(skill_name: str) -> Optional[Dict[str, Any]]: - """Get skill by name.""" +def get_skill_by_name(skill_name: str, tenant_id: str) -> Optional[Dict[str, Any]]: + """Get skill by name within a tenant. + + Args: + skill_name: Skill name + tenant_id: Tenant ID for filtering + """ with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, + SkillInfo.delete_flag != 'Y' + ).first() + if skill: + result = _to_dict(skill) + result["tool_ids"] = _get_tool_ids(session, skill.skill_id) + return result + return None + + +def get_skill_by_id(skill_id: int, tenant_id: str) -> Optional[Dict[str, Any]]: + """Get skill by ID within a tenant. + + Args: + skill_id: Skill ID + tenant_id: Tenant ID for filtering + """ + with get_db_session() as session: + skill = session.query(SkillInfo).filter( + SkillInfo.skill_id == skill_id, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() if skill: @@ -211,8 +268,15 @@ def get_skill_by_name(skill_name: str) -> Optional[Dict[str, Any]]: return None -def get_skill_by_id(skill_id: int) -> Optional[Dict[str, Any]]: - """Get skill by ID.""" +def get_skill_by_id_global(skill_id: int) -> Optional[Dict[str, Any]]: + """Get skill by ID without tenant filter (global lookup for template skills). + + Args: + skill_id: Skill ID + + Returns: + Skill dict or None if not found. + """ with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_id == skill_id, @@ -225,15 +289,42 @@ def get_skill_by_id(skill_id: int) -> Optional[Dict[str, Any]]: return None -def create_skill(skill_data: Dict[str, Any]) -> Dict[str, Any]: - """Create a new skill.""" +def list_global_official_skills() -> List[Dict[str, Any]]: + """List all global official skills (tenant_id IS NULL) for installation. + + Returns: + List of skill dicts with skill_id, name, description, source. + """ + with get_db_session() as session: + skills = session.query(SkillInfo).filter( + SkillInfo.tenant_id.is_(None), + SkillInfo.delete_flag != 'Y', + SkillInfo.source == 'official' + ).all() + return [_to_dict(s) for s in skills] + if skill: + result = _to_dict(skill) + result["tool_ids"] = _get_tool_ids(session, skill.skill_id) + return result + return None + + +def create_skill(skill_data: Dict[str, Any], tenant_id: str) -> Dict[str, Any]: + """Create a new skill for a tenant. + + Args: + skill_data: Skill data dict + tenant_id: Tenant ID for the skill + """ with get_db_session() as session: skill = SkillInfo( skill_name=skill_data["name"], + tenant_id=tenant_id, skill_description=skill_data.get("description", ""), skill_tags=skill_data.get("tags", []), skill_content=skill_data.get("content", ""), - params=_params_value_for_db(skill_data.get("params")), + config_schemas=_params_value_for_db(skill_data.get("config_schemas")), + config_values=_params_value_for_db(skill_data.get("config_values")), source=skill_data.get("source", "custom"), created_by=skill_data.get("created_by"), create_time=datetime.now(), @@ -265,13 +356,15 @@ def create_skill(skill_data: Dict[str, Any]) -> Dict[str, Any]: def update_skill( skill_name: str, skill_data: Dict[str, Any], + tenant_id: str, updated_by: Optional[str] = None, ) -> Dict[str, Any]: - """Update an existing skill. + """Update an existing skill for a tenant. Args: - skill_name: Skill name (unique key). + skill_name: Skill name (unique key within tenant). skill_data: Business fields to update (description, content, tags, source, params, tool_ids). + tenant_id: Tenant ID for filtering. updated_by: Actor user id from server-side auth; never taken from the HTTP request body. Notes: @@ -282,6 +375,7 @@ def update_skill( with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != "Y", ).first() @@ -302,8 +396,10 @@ def update_skill( row_values["skill_tags"] = skill_data["tags"] if "source" in skill_data: row_values["source"] = skill_data["source"] - if "params" in skill_data: - row_values["params"] = _params_value_for_db(skill_data["params"]) + if "config_schemas" in skill_data: + row_values["config_schemas"] = _params_value_for_db(skill_data["config_schemas"]) + if "config_values" in skill_data: + row_values["config_values"] = _params_value_for_db(skill_data["config_values"]) session.execute( sa_update(SkillInfo) @@ -331,6 +427,7 @@ def update_skill( refreshed = session.query(SkillInfo).filter( SkillInfo.skill_id == skill_id, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != "Y", ).first() if not refreshed: @@ -344,11 +441,12 @@ def update_skill( return result -def delete_skill(skill_name: str, updated_by: Optional[str] = None) -> bool: - """Soft delete a skill (mark as deleted). +def delete_skill(skill_name: str, tenant_id: str, updated_by: Optional[str] = None) -> bool: + """Soft delete a skill for a tenant (mark as deleted). Args: skill_name: Name of the skill to delete + tenant_id: Tenant ID for filtering updated_by: User ID of the user performing the delete Returns: @@ -357,6 +455,7 @@ def delete_skill(skill_name: str, updated_by: Optional[str] = None) -> bool: with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() @@ -412,11 +511,12 @@ def get_tool_ids_by_names(tool_names: List[str], tenant_id: str) -> List[int]: return [t.tool_id for t in tools] -def get_tool_names_by_skill_name(skill_name: str) -> List[str]: - """Get tool names for a skill by skill name. +def get_tool_names_by_skill_name(skill_name: str, tenant_id: str) -> List[str]: + """Get tool names for a skill by skill name within a tenant. Args: skill_name: Name of the skill + tenant_id: Tenant ID for filtering Returns: List of tool names @@ -424,6 +524,7 @@ def get_tool_names_by_skill_name(skill_name: str) -> List[str]: with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() if not skill: @@ -432,11 +533,12 @@ def get_tool_names_by_skill_name(skill_name: str) -> List[str]: return get_tool_names_by_ids(session, tool_ids) -def get_skill_with_tool_names(skill_name: str) -> Optional[Dict[str, Any]]: - """Get skill with tool names included.""" +def get_skill_with_tool_names(skill_name: str, tenant_id: str) -> Optional[Dict[str, Any]]: + """Get skill with tool names included for a tenant.""" with get_db_session() as session: skill = session.query(SkillInfo).filter( SkillInfo.skill_name == skill_name, + SkillInfo.tenant_id == tenant_id, SkillInfo.delete_flag != 'Y' ).first() if skill: @@ -446,3 +548,74 @@ def get_skill_with_tool_names(skill_name: str) -> Optional[Dict[str, Any]]: result["allowed_tools"] = get_tool_names_by_ids(session, tool_ids) return result return None + + +# ============== Skill Initialization Functions ============== + + +def check_skill_list_initialized(tenant_id: str) -> bool: + """Check if skill list has been initialized for the tenant. + + Args: + tenant_id: Tenant ID to check + + Returns: + True if skills have been initialized, False otherwise + """ + with get_db_session() as session: + count = session.query(SkillInfo).filter( + SkillInfo.tenant_id == tenant_id, + SkillInfo.delete_flag != 'Y', + SkillInfo.source != 'custom' + ).count() + return count > 0 + + +def upsert_scanned_skills(skills: List[Dict[str, Any]], user_id: str, tenant_id: str): + """Scan local skill directories and upsert skill metadata to ag_skill_info_t. + + Mirrors update_tool_table_from_scan_tool_list() in tool_db.py. + All fields are unconditionally overwritten on every scan (same as tools). + + Args: + skills: List of skill dicts with name, description, tags, content, params, inputs, source + user_id: User ID for tracking who initiated the scan + tenant_id: Tenant ID for the skills + """ + with get_db_session() as session: + existing_skills = session.query(SkillInfo).filter( + SkillInfo.tenant_id == tenant_id, + SkillInfo.delete_flag != 'Y' + ).all() + existing_dict = {s.skill_name: s for s in existing_skills} + + for skill_data in skills: + skill_name = skill_data.get("name") + if not skill_name: + continue + + if skill_name in existing_dict: + existing = existing_dict[skill_name] + # Unconditionally overwrite all fields on every scan (same as tools) + existing.skill_description = skill_data.get("description", "") + existing.skill_tags = skill_data.get("tags", []) + existing.skill_content = skill_data.get("content", "") + existing.config_schemas = _params_value_for_db(skill_data.get("config_schemas")) + existing.config_values = _params_value_for_db(skill_data.get("config_values")) + existing.updated_by = user_id + else: + new_skill = SkillInfo( + skill_name=skill_name, + tenant_id=tenant_id, + skill_description=skill_data.get("description", ""), + skill_tags=skill_data.get("tags", []), + skill_content=skill_data.get("content", ""), + config_schemas=_params_value_for_db(skill_data.get("config_schemas")), + config_values=_params_value_for_db(skill_data.get("config_values")), + source=skill_data.get("source", "official"), + created_by=user_id, + updated_by=user_id, + create_time=datetime.now(), + update_time=datetime.now(), + ) + session.add(new_skill) diff --git a/backend/prompts/managed_system_prompt_template_en.yaml b/backend/prompts/managed_system_prompt_template_en.yaml index 67da8305c..5c2893c39 100644 --- a/backend/prompts/managed_system_prompt_template_en.yaml +++ b/backend/prompts/managed_system_prompt_template_en.yaml @@ -42,10 +42,11 @@ system_prompt: |- {{ duty }} Please note that you should follow these principles: - Legal Compliance: Strictly adhere to all laws and regulations in your service area; - Political Neutrality: Do not discuss any country's political system, leadership evaluations, or sensitive historical events; - Security Protection: Do not respond to requests involving weapon manufacturing, dangerous behavior, privacy theft, etc.; - Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate universal values. + Behavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited; + Legal Compliance: Comply with laws and regulations of the business operating jurisdiction; + Political Neutrality: Maintain political neutrality and avoid initiating political discussions; + Security Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities; + Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards. ### Execution Process To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.** @@ -116,7 +117,7 @@ system_prompt: |- → Use **presigned_url** (already includes proxy prefix, format: `http://.../api/nb/v1/file/fetch?presigned_url=...`) Directly use the **presigned_url** field provided in the user's uploaded file info. No need to construct or append anything. 2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image): - → Use **S3 URL** (format: `s3:/nexent/attachments/xxx.pdf`) + → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`) Reason: Internal tools run inside Nexent and can directly access MinIO storage {%- else %} diff --git a/backend/prompts/managed_system_prompt_template_zh.yaml b/backend/prompts/managed_system_prompt_template_zh.yaml index 231eee325..291e336fb 100644 --- a/backend/prompts/managed_system_prompt_template_zh.yaml +++ b/backend/prompts/managed_system_prompt_template_zh.yaml @@ -46,6 +46,7 @@ system_prompt: |- {{ duty }} 请注意,你应该遵守以下原则: + 行为安全:严禁直接执行代码进行文件的增删改操作,只能使用提供的文件操作类工具; 法律合规:严格遵守服务地区的所有法律法规; 政治中立:不讨论任何国家的政治体制、领导人评价或敏感历史事件; 安全防护:不响应涉及武器制造、危险行为、隐私窃取等内容的请求; diff --git a/backend/prompts/manager_system_prompt_template_en.yaml b/backend/prompts/manager_system_prompt_template_en.yaml index a4ffae074..8ce58db29 100644 --- a/backend/prompts/manager_system_prompt_template_en.yaml +++ b/backend/prompts/manager_system_prompt_template_en.yaml @@ -42,10 +42,11 @@ system_prompt: |- {{ duty }} Please note that you should follow these principles: - Legal Compliance: Strictly adhere to all laws and regulations in your service area; - Political Neutrality: Do not discuss any country's political system, leadership evaluations, or sensitive historical events; - Security Protection: Do not respond to requests involving weapon manufacturing, dangerous behavior, privacy theft, etc.; - Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate universal values. + Behavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited; + Legal Compliance: Comply with laws and regulations of the business operating jurisdiction; + Political Neutrality: Maintain political neutrality and avoid initiating political discussions; + Security Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities; + Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards. ### Execution Process To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.** @@ -118,7 +119,7 @@ system_prompt: |- → Use **Download URL** (format: `https://minio.example.com/...?token=xxx`) Reason: MCP tools run on external services and cannot access internal S3 storage 2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image): - → Use **S3 URL** (format: `s3:/nexent/attachments/xxx.pdf`) + → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`) Reason: Internal tools run inside Nexent and can directly access MinIO storage {%- else %} - No tools are currently available diff --git a/backend/prompts/manager_system_prompt_template_zh.yaml b/backend/prompts/manager_system_prompt_template_zh.yaml index 6743316e3..fc4eb7c0c 100644 --- a/backend/prompts/manager_system_prompt_template_zh.yaml +++ b/backend/prompts/manager_system_prompt_template_zh.yaml @@ -42,10 +42,11 @@ system_prompt: |- {{ duty }} 请注意,你应该遵守以下原则: - 法律合规:严格遵守服务地区的所有法律法规; - 政治中立:不讨论任何国家的政治体制、领导人评价或敏感历史事件; - 安全防护:不响应涉及武器制造、危险行为、隐私窃取等内容的请求; - 伦理准则:拒绝仇恨言论、歧视性内容及任何违反普世价值观的请求。 + 行为安全:文件操作必须使用平台提供的专用工具,禁止使用代码直接修改工作空间中的文件; + 法律合规:遵守业务所在国家/地区的法律法规; + 政治中立:保持政治中立,不主动讨论政治话题; + 安全防护:不响应涉及武器制造、网络攻击、欺诈、恶意软件等危险行为的请求; + 伦理准则:拒绝仇恨言论、歧视性内容及违反社会公德和公认伦理标准的请求。 {%- if skills and skills|length > 0 %} ### 可用技能 diff --git a/backend/prompts/utils/prompt_optimize_en.yaml b/backend/prompts/utils/prompt_optimize_en.yaml new file mode 100644 index 000000000..a487107b7 --- /dev/null +++ b/backend/prompts/utils/prompt_optimize_en.yaml @@ -0,0 +1,51 @@ +OPTIMIZE_SYSTEM_PROMPT: |- + ### You Are a Prompt Optimization Expert + You optimize one specific section of an agent prompt based on the user's feedback while preserving the section's original intent and format conventions. + + ### Your Goal + Improve only the target section content according to the evaluation feedback. + + ### Requirements + 1. Output only the optimized section content. + 2. Preserve the target section's language unless the user feedback explicitly requests otherwise. + 3. Keep the optimized content aligned with the business task, available tools, and available assistants. + 4. Do not add explanations, summaries, markdown fences, titles, or comparison text. + 5. For `duty`, keep the content concise and role-oriented. + 6. For `constraint`, keep the content as explicit usage requirements. + 7. For `few_shots`, keep the content as concrete examples consistent with the current prompt style. + +OPTIMIZE_USER_PROMPT: |- + ### Section Type + {{ section_type }} + + ### Section Title + {{ section_title }} + + ### Business Task Description + {{ task_description }} + + ### Current Section Content + {{ current_content }} + + ### User Evaluation Feedback + {{ feedback }} + + ### Available Tools + {% if tool_description %} + {{ tool_description }} + {% else %} + No available tools. + {% endif %} + + ### Available Assistants + {% if assistant_description %} + {{ assistant_description }} + {% else %} + No available assistants. + {% endif %} + + {% if knowledge_base_names %} + ### Knowledge Base Configuration Note + When optimizing few-shot examples that use `knowledge_base_search`, you must use these actual configured knowledge base names: + {{ knowledge_base_names | default('') }} + {% endif %} diff --git a/backend/prompts/utils/prompt_optimize_zh.yaml b/backend/prompts/utils/prompt_optimize_zh.yaml new file mode 100644 index 000000000..a769ea5eb --- /dev/null +++ b/backend/prompts/utils/prompt_optimize_zh.yaml @@ -0,0 +1,51 @@ +OPTIMIZE_SYSTEM_PROMPT: |- + ### 你是一名提示词优化专家 + 你需要根据用户给出的评价,对智能体提示词中的某一个指定部分进行优化,同时保持该部分原本的目标和格式风格。 + + ### 你的任务 + 只优化目标部分的内容,并让结果更贴合用户评价。 + + ### 要求 + 1. 只输出优化后的该部分内容。 + 2. 保持原内容的语言风格,除非用户明确要求切换语言。 + 3. 优化结果要与业务任务、可用工具和可用助手保持一致。 + 4. 不要输出解释、总结、标题、对比说明或 Markdown 代码块。 + 5. 当 `section_type` 为 `duty` 时,内容应保持简洁,突出智能体角色与职责。 + 6. 当 `section_type` 为 `constraint` 时,内容应保持为清晰明确的使用要求。 + 7. 当 `section_type` 为 `few_shots` 时,内容应保持为具体示例,并与当前提示词风格一致。 + +OPTIMIZE_USER_PROMPT: |- + ### 部分类型 + {{ section_type }} + + ### 部分标题 + {{ section_title }} + + ### 业务任务描述 + {{ task_description }} + + ### 当前内容 + {{ current_content }} + + ### 用户评价反馈 + {{ feedback }} + + ### 可用工具 + {% if tool_description %} + {{ tool_description }} + {% else %} + 当前没有可用工具。 + {% endif %} + + ### 可用助手 + {% if assistant_description %} + {{ assistant_description }} + {% else %} + 当前没有可用助手。 + {% endif %} + + {% if knowledge_base_names %} + ### 知识库配置说明 + 如果优化后的 few-shot 示例中需要使用 `knowledge_base_search`,必须使用以下已配置的真实知识库名称: + {{ knowledge_base_names | default('') }} + {% endif %} diff --git a/backend/pyproject.toml b/backend/pyproject.toml index c8e6c5370..dff0e8693 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -3,8 +3,11 @@ name = "backend" version = "0.1.0" requires-python = "==3.10.*" dependencies = [ + "aiofiles>=0.8.0", "uvicorn>=0.34.0", "fastapi>=0.115.12", + "python-multipart>=0.0.9", + "email-validator>=2.0.0", "aiohttp>=3.8.0", "authlib>=1.3.0", "cryptography>=42.0.0", @@ -15,6 +18,7 @@ dependencies = [ "supabase>=2.18.1", "websocket-client>=1.8.0", "pyyaml>=6.0.2", + "jsonref>=1.1.0", "ruamel-yaml==0.19.1", "redis>=5.0.0", "fastmcp==2.12.0", diff --git a/backend/services/a2a_agent_adapter.py b/backend/services/a2a_agent_adapter.py index b6fddc500..36f10657e 100644 --- a/backend/services/a2a_agent_adapter.py +++ b/backend/services/a2a_agent_adapter.py @@ -227,7 +227,7 @@ def build_a2a_task_response( text_content = str(message) task["status"]["message"] = { "role": message.get("role", "agent"), - "parts": [{"type": "text", "text": text_content, "mediaType": _MEDIA_TYPE_TEXT}] + "parts": [{"text": text_content, "mediaType": _MEDIA_TYPE_TEXT}] } # Handle artifacts @@ -266,9 +266,9 @@ def build_a2a_message_response( if parts: message_parts = parts elif text: - message_parts = [{"type": "text", "text": text, "mediaType": _MEDIA_TYPE_TEXT}] + message_parts = [{"text": text, "mediaType": _MEDIA_TYPE_TEXT}] else: - message_parts = [{"type": "text", "text": "", "mediaType": _MEDIA_TYPE_TEXT}] + message_parts = [{"text": "", "mediaType": _MEDIA_TYPE_TEXT}] message_obj = { "messageId": message_id, @@ -294,8 +294,8 @@ def _content_to_artifact_parts( return parts if isinstance(content, dict): if content.get("type") == "text": - return [{"type": "text", "text": content.get("text", "")}] - return [{"type": "text", "text": str(content)}] + return [{"text": content.get("text", ""), "mediaType": _MEDIA_TYPE_TEXT}] + return [{"text": str(content), "mediaType": _MEDIA_TYPE_TEXT}] def _map_task_state(self, state: str) -> str: """Map shorthand state to TASK_STATE constant.""" @@ -343,7 +343,7 @@ def _message_to_parts_format(self, message: Any) -> Dict[str, Any]: text = str(message) return { "role": role, - "parts": [{"type": "text", "text": text}] + "parts": [{"text": text}] } def _build_artifact_update_event( diff --git a/backend/services/a2a_server_service.py b/backend/services/a2a_server_service.py index 2cccbe40d..4d9c5e607 100644 --- a/backend/services/a2a_server_service.py +++ b/backend/services/a2a_server_service.py @@ -647,7 +647,7 @@ async def handle_message_send( return self.adapter.build_a2a_task_response( task_id=task_id, status="TASK_STATE_COMPLETED", - parts=[{"type": "text", "text": accumulated_text, "mediaType": "text/plain"}] if accumulated_text else None, + parts=[{"text": accumulated_text, "mediaType": "text/plain"}] if accumulated_text else None, context_id=context_id, timestamp=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") ) @@ -879,7 +879,7 @@ def get_task( message = result.get("message", "") if message: task_obj["artifacts"] = [{ - "parts": [{"type": "text", "text": str(message)}], + "parts": [{"text": str(message)}], "lastChunk": True }] diff --git a/backend/services/agent_service.py b/backend/services/agent_service.py index 02fa7d8c6..5a340b1d6 100644 --- a/backend/services/agent_service.py +++ b/backend/services/agent_service.py @@ -1,10 +1,13 @@ import asyncio +import base64 +import io import json import logging import os import uuid +import zipfile from collections import deque -from typing import Callable, Optional, Dict +from typing import Callable, Optional, Dict, List from fastapi import Header, Request from fastapi.responses import JSONResponse, StreamingResponse @@ -16,9 +19,11 @@ from agents.create_agent_info import create_agent_run_info, create_tool_config_list from agents.preprocess_manager import preprocess_manager from services.agent_version_service import publish_version_impl +from utils.prompt_template_utils import normalize_prompt_generate_template_content from consts.const import MEMORY_SEARCH_START_MSG, MEMORY_SEARCH_DONE_MSG, MEMORY_SEARCH_FAIL_MSG, TOOL_TYPE_MAPPING, \ LANGUAGE, MESSAGE_ROLE, MODEL_CONFIG_MAPPING, CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ, PERMISSION_PRIVATE -from consts.exceptions import MemoryPreparationException +from consts.exceptions import MemoryPreparationException, SkillDuplicateError +from consts.agent_unavailable_reasons import AgentUnavailableReason from consts.model import ( AgentInfoRequest, AgentRequest, @@ -28,9 +33,11 @@ ExportAndImportDataFormat, MCPInfo, SkillInstanceInfoRequest, + SkillZipEntry, ToolInstanceInfoRequest, ToolSourceEnum, ModelConnectStatusEnum ) +from services.asset_owner_visibility import resolve_agent_list_permission from database.agent_db import ( create_agent, delete_agent_by_id, @@ -60,10 +67,16 @@ search_tools_for_sub_agent ) from database import skill_db +from services.skill_service import SkillService from database.agent_version_db import query_version_list from database.group_db import query_group_ids_by_user from database.user_tenant_db import get_user_tenant_by_user_id -from database.a2a_agent_db import get_server_agent_ids +from database.a2a_agent_db import get_server_agent_ids, query_external_sub_agents +from services.prompt_template_service import ( + SYSTEM_PROMPT_TEMPLATE_ID, + SYSTEM_PROMPT_TEMPLATE_NAME, + get_prompt_template_summary, +) from utils.str_utils import convert_list_to_string, convert_string_to_list from services.conversation_management_service import save_conversation_assistant, save_conversation_user from services.memory_config_service import build_memory_context @@ -74,13 +87,14 @@ from utils.prompt_template_utils import get_prompt_generate_prompt_template from utils.llm_utils import call_llm_for_system_prompt -# Monitoring utilities: expose monitoring context for downstream observers -from nexent.monitor import set_monitoring_context +# Monitoring utilities: bind Agent metadata once at the request boundary. +from nexent.monitor import AgentRunMetadata, agent_monitoring_context # Import monitoring utilities from utils.monitoring import monitoring_manager logger = logging.getLogger(__name__) +SAFE_AGENT_STREAM_ERROR_MESSAGE = "Agent execution failed. Please try again later." # ------------------------------------------------------------- @@ -88,6 +102,15 @@ # ------------------------------------------------------------- +def _safe_agent_stream_error_chunk() -> str: + """Return a sanitized SSE error chunk without internal exception details.""" + error_payload = json.dumps( + {"type": "error", "content": SAFE_AGENT_STREAM_ERROR_MESSAGE}, + ensure_ascii=False, + ) + return f"data: {error_payload}\n\n" + + def _resolve_user_tenant_language( authorization: str, http_request: Request | None = None, @@ -312,12 +335,25 @@ def _regenerate_agent_value_with_llm( user_prompt_key: str, default_system_prompt: str, default_user_prompt_builder: Callable[[dict], str], - fallback_fn: Callable[[str], str] + fallback_fn: Callable[[str], str], + prompt_template_id: Optional[int] = None, + user_id: Optional[str] = None, ) -> str: """ Shared helper to regenerate agent-related values with an LLM. """ - prompt_template = get_prompt_generate_prompt_template(language) + if user_id is not None: + from services.prompt_template_service import resolve_prompt_generate_template + prompt_template = resolve_prompt_generate_template( + tenant_id=tenant_id, + user_id=user_id, + language=language, + prompt_template_id=prompt_template_id, + ) + else: + prompt_template = normalize_prompt_generate_template_content( + get_prompt_generate_prompt_template(language) + ) system_prompt = _render_prompt_template( prompt_template.get(system_prompt_key, ""), original_value=original_value @@ -349,7 +385,8 @@ def _regenerate_agent_value_with_llm( callback=None, tenant_id=tenant_id ) - candidate = (regenerated_value or "").strip().splitlines()[0].strip() + candidate = (regenerated_value or "").strip().splitlines()[ + 0].strip() if candidate in value_set: raise ValueError(f"Generated duplicate value '{candidate}'") return candidate @@ -374,7 +411,9 @@ def _regenerate_agent_name_with_llm( tenant_id: str, language: str = LANGUAGE["ZH"], agents_cache: list[dict] | None = None, - exclude_agent_id: int | None = None + exclude_agent_id: int | None = None, + prompt_template_id: Optional[int] = None, + user_id: Optional[str] = None, ) -> str: return _regenerate_agent_value_with_llm( original_value=original_name, @@ -383,8 +422,8 @@ def _regenerate_agent_name_with_llm( model_id=model_id, tenant_id=tenant_id, language=language, - system_prompt_key="AGENT_NAME_REGENERATE_SYSTEM_PROMPT", - user_prompt_key="AGENT_NAME_REGENERATE_USER_PROMPT", + system_prompt_key="agent_name_regenerate_system_prompt", + user_prompt_key="agent_name_regenerate_user_prompt", default_system_prompt=( "You refine agent variable names so that they stay close to the " "original meaning and remain unique within the tenant." @@ -402,11 +441,12 @@ def _regenerate_agent_name_with_llm( tenant_id=tenant_id, agents_cache=agents_cache, exclude_agent_id=exclude_agent_id - ) + ), + prompt_template_id=prompt_template_id, + user_id=user_id, ) - def _regenerate_agent_display_name_with_llm( original_display_name: str, existing_display_names: list[str], @@ -415,7 +455,9 @@ def _regenerate_agent_display_name_with_llm( tenant_id: str, language: str = LANGUAGE["ZH"], agents_cache: list[dict] | None = None, - exclude_agent_id: int | None = None + exclude_agent_id: int | None = None, + prompt_template_id: Optional[int] = None, + user_id: Optional[str] = None, ) -> str: return _regenerate_agent_value_with_llm( original_value=original_display_name, @@ -424,8 +466,8 @@ def _regenerate_agent_display_name_with_llm( model_id=model_id, tenant_id=tenant_id, language=language, - system_prompt_key="AGENT_DISPLAY_NAME_REGENERATE_SYSTEM_PROMPT", - user_prompt_key="AGENT_DISPLAY_NAME_REGENERATE_USER_PROMPT", + system_prompt_key="agent_display_name_regenerate_system_prompt", + user_prompt_key="agent_display_name_regenerate_user_prompt", default_system_prompt=( "You refine agent display names so they remain unique, concise, " "and aligned with the agent's capability." @@ -442,11 +484,12 @@ def _regenerate_agent_display_name_with_llm( tenant_id=tenant_id, agents_cache=agents_cache, exclude_agent_id=exclude_agent_id - ) + ), + prompt_template_id=prompt_template_id, + user_id=user_id, ) - async def check_agent_name_conflict_batch_impl( request: AgentNameBatchCheckRequest, authorization: str @@ -504,17 +547,21 @@ async def regenerate_agent_name_batch_impl( _, tenant_id, _ = get_current_user_info(authorization) agents_cache = query_all_agent_info_by_tenant_id(tenant_id) - existing_names = [agent.get("name") for agent in agents_cache if agent.get("name")] - existing_display_names = [agent.get("display_name") for agent in agents_cache if agent.get("display_name")] + existing_names = [agent.get("name") + for agent in agents_cache if agent.get("name")] + existing_display_names = [agent.get( + "display_name") for agent in agents_cache if agent.get("display_name")] # Always use tenant quick-config LLM model quick_config_model = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id ) - resolved_model_id = quick_config_model.get("model_id") if quick_config_model else None + resolved_model_id = quick_config_model.get( + "model_id") if quick_config_model else None if not resolved_model_id: - raise ValueError("No available model for regeneration. Please configure an LLM model first.") + raise ValueError( + "No available model for regeneration. Please configure an LLM model first.") results: list[dict] = [] # Use local mutable caches to avoid regenerated duplicates in the same batch @@ -544,7 +591,8 @@ async def regenerate_agent_name_batch_impl( exclude_agent_id=exclude_agent_id ) except Exception as e: - logger.error(f"Failed to regenerate agent name with LLM: {str(e)}, using fallback") + logger.error( + f"Failed to regenerate agent name with LLM: {str(e)}, using fallback") agent_name = _generate_unique_agent_name_with_suffix( agent_name, tenant_id=tenant_id, @@ -569,7 +617,8 @@ async def regenerate_agent_name_batch_impl( exclude_agent_id=exclude_agent_id ) except Exception as e: - logger.error(f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback") + logger.error( + f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback") agent_display_name = _generate_unique_display_name_with_suffix( agent_display_name, tenant_id=tenant_id, @@ -619,11 +668,8 @@ async def _stream_agent_chunks( pass yield f"data: {chunk}\n\n" except Exception as run_exc: - logger.error(f"Agent run error: {str(run_exc)}") - # Emit an error chunk and terminate the stream immediately - error_payload = json.dumps( - {"type": "error", "content": str(run_exc)}, ensure_ascii=False) - yield f"data: {error_payload}\n\n" + logger.error("Agent run error: %r", run_exc, exc_info=True) + yield _safe_agent_stream_error_chunk() finally: # Persist assistant messages for non-debug runs if not agent_request.is_debug: @@ -685,7 +731,8 @@ async def _add_memory_background(): # Create and store the background task to avoid warnings background_task = asyncio.create_task(_add_memory_background()) # Add done callback to handle any exceptions that might occur - background_task.add_done_callback(lambda t: t.exception() if t.exception() else None) + background_task.add_done_callback( + lambda t: t.exception() if t.exception() else None) except Exception as schedule_err: logger.error( f"Failed to schedule background memory addition: {schedule_err}") @@ -713,13 +760,36 @@ async def get_creating_sub_agent_id_service(tenant_id: str, user_id: str = None) return create_agent(agent_info={"enabled": False}, tenant_id=tenant_id, user_id=user_id)["agent_id"] -async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0): +async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0, user_id: Optional[str] = None): try: - agent_info = search_agent_info_by_agent_id(agent_id, tenant_id, version_no) + agent_info = search_agent_info_by_agent_id( + agent_id, tenant_id, version_no) + # Keep the request-scoped tenant_id unless the record explicitly provides one. + record_tenant_id = agent_info.get("tenant_id") + if record_tenant_id: + tenant_id = record_tenant_id except Exception as e: logger.error(f"Failed to get agent info: {str(e)}") raise ValueError(f"Failed to get agent info: {str(e)}") + # Calculate permission if user_id is provided + if user_id is not None: + try: + user_tenant_record = get_user_tenant_by_user_id(user_id) or {} + user_role = str(user_tenant_record.get("user_role") or "").upper() + can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES + + # Permission logic (same as agent list): + # - If creator or can_edit_all: PERMISSION_EDIT + # - Otherwise: use ingroup_permission, default to PERMISSION_READ if None + if can_edit_all or str(agent_info.get("created_by")) == str(user_id): + agent_info["permission"] = PERMISSION_EDIT + else: + ingroup_permission = agent_info.get("ingroup_permission") + agent_info["permission"] = ingroup_permission if ingroup_permission is not None else PERMISSION_READ + except Exception as e: + logger.warning(f"Failed to calculate agent permission: {str(e)}") + try: tool_info = search_tools_for_sub_agent( agent_id=agent_id, tenant_id=tenant_id) @@ -736,21 +806,52 @@ async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0 logger.error(f"Failed to get sub agent id list: {str(e)}") agent_info["sub_agent_id_list"] = [] + try: + skill_service = SkillService() + instances = skill_service.list_skill_instances( + agent_id=agent_id, + tenant_id=tenant_id, + version_no=version_no + ) + agent_info["skills"] = instances + except Exception as e: + logger.exception(f"Failed to get agent skills: {str(e)}") + agent_info["skills"] = [] + + try: + external_agents = query_external_sub_agents( + local_agent_id=agent_id, tenant_id=tenant_id, version_no=version_no) + agent_info["external_sub_agent_id_list"] = [ + ea["external_agent_id"] for ea in external_agents + ] + except Exception as e: + logger.error(f"Failed to get external sub agents: {str(e)}") + agent_info["external_sub_agent_id_list"] = [] + if agent_info["model_id"] is not None: model_info = get_model_by_model_id(agent_info["model_id"]) - agent_info["model_name"] = model_info.get("display_name", None) if model_info is not None else None + agent_info["model_name"] = model_info.get( + "display_name", None) if model_info is not None else None else: agent_info["model_name"] = None # Get business logic model display name from model_id if agent_info.get("business_logic_model_id") is not None: - business_logic_model_info = get_model_by_model_id(agent_info["business_logic_model_id"]) - agent_info["business_logic_model_name"] = business_logic_model_info.get("display_name", None) if business_logic_model_info is not None else None + business_logic_model_info = get_model_by_model_id( + agent_info["business_logic_model_id"]) + agent_info["business_logic_model_name"] = business_logic_model_info.get( + "display_name", None) if business_logic_model_info is not None else None elif "business_logic_model_name" not in agent_info: agent_info["business_logic_model_name"] = None + if not agent_info.get("prompt_template_id"): + agent_info["prompt_template_id"] = SYSTEM_PROMPT_TEMPLATE_ID + if not agent_info.get("prompt_template_name"): + agent_info["prompt_template_name"] = SYSTEM_PROMPT_TEMPLATE_NAME + if agent_info.get("group_ids") is not None: - agent_info["group_ids"] = convert_string_to_list(agent_info.get("group_ids")) + agent_info["group_ids"] = convert_string_to_list( + agent_info.get("group_ids")) # Check agent availability is_available, unavailable_reasons = check_agent_availability( @@ -805,6 +906,11 @@ async def get_creating_sub_agent_info_impl(authorization: str = Header(None)): async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = Header(None)): user_id, tenant_id, _ = get_current_user_info(authorization) + prompt_template_id, prompt_template_name = get_prompt_template_summary( + template_id=request.prompt_template_id, + tenant_id=tenant_id, + user_id=user_id, + ) # If agent_id is None, create a new agent; otherwise, update existing agent_id: Optional[int] = request.agent_id @@ -822,6 +928,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = "model_name": request.model_name, "business_logic_model_id": request.business_logic_model_id, "business_logic_model_name": request.business_logic_model_name, + "prompt_template_id": prompt_template_id, + "prompt_template_name": prompt_template_name, "max_steps": request.max_steps, "provide_run_summary": request.provide_run_summary, "duty_prompt": request.duty_prompt, @@ -834,6 +942,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = agent_id = created["agent_id"] else: # Update agent + request.prompt_template_id = prompt_template_id + request.prompt_template_name = prompt_template_name update_agent(agent_id, request, user_id) except Exception as e: logger.error(f"Failed to update agent info: {str(e)}") @@ -901,9 +1011,11 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = skill_info=SkillInstanceInfoRequest( skill_id=inst_skill_id, agent_id=agent_id, - skill_description=instance.get("skill_description"), + skill_description=instance.get( + "skill_description"), skill_content=instance.get("skill_content"), - enabled=False + enabled=False, + config_values=instance.get("config_values"), ), tenant_id=tenant_id, user_id=user_id @@ -917,7 +1029,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = if inst.get("skill_id") == skill_id), None ) - skill_description = (existing_instance or {}).get("skill_description") + skill_description = (existing_instance or {}).get( + "skill_description") skill_content = (existing_instance or {}).get("skill_content") skill_db.create_or_update_skill_by_skill_info( skill_info=SkillInstanceInfoRequest( @@ -926,6 +1039,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = skill_description=skill_description, skill_content=skill_content, enabled=True, + config_values=(existing_instance or {} + ).get("config_values"), ), tenant_id=tenant_id, user_id=user_id @@ -945,7 +1060,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = while len(search_list): left_ele = search_list.popleft() if left_ele == agent_id: - raise ValueError("Circular dependency detected: Agent cannot be related to itself or create circular calls") + raise ValueError( + "Circular dependency detected: Agent cannot be related to itself or create circular calls") if left_ele in agent_id_set: continue else: @@ -980,7 +1096,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = current_external_ids = { rel["external_agent_id"] for rel in current_relations } - new_external_ids = set(related_external_agent_ids) if related_external_agent_ids else set() + new_external_ids = set( + related_external_agent_ids) if related_external_agent_ids else set() # Find IDs to delete (in current but not in new) ids_to_delete = current_external_ids - new_external_ids @@ -1137,7 +1254,7 @@ async def export_agent_impl(agent_id: int, authorization: str = Header(None)) -> export_data = ExportAndImportDataFormat( agent_id=agent_id, agent_info=export_agent_dict, mcp_info=mcp_info_list) - return export_data.model_dump() + return json.dumps(export_data.model_dump()) async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) -> ExportAndImportAgentInfo: @@ -1150,9 +1267,26 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) main_agent_id=agent_id, tenant_id=tenant_id) tool_list = await create_tool_config_list(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id) + # Collect skill names from skill instances + skill_names: List[str] = [] + try: + skill_instances = skill_db.query_skill_instances_by_agent_id( + agent_id=agent_id, tenant_id=tenant_id, version_no=0 + ) + for inst in skill_instances: + skill_id = inst.get("skill_id") + skill = skill_db.get_skill_by_id(skill_id, tenant_id) + if skill: + name = skill.get("name") + if name: + skill_names.append(name) + except Exception as e: + logger.warning( + f"Failed to collect skill instances for agent {agent_id}: {e}") + # Check if any tool is KnowledgeBaseSearchTool and set its metadata to empty dict for tool in tool_list: - if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "DataMateSearchTool"]: + if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "AnalyzeAudioTool", "AnalyzeVideoTool", "DataMateSearchTool"]: tool.metadata = {} # Get model_id and model display name from agent_info @@ -1160,14 +1294,17 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) model_display_name = None if model_id is not None: model_info = get_model_by_model_id(model_id) - model_display_name = model_info.get("display_name") if model_info is not None else None + model_display_name = model_info.get( + "display_name") if model_info is not None else None # Get business_logic_model_id and business logic model display name business_logic_model_id = agent_info.get("business_logic_model_id") business_logic_model_display_name = None if business_logic_model_id is not None: - business_logic_model_info = get_model_by_model_id(business_logic_model_id) - business_logic_model_display_name = business_logic_model_info.get("display_name") if business_logic_model_info is not None else None + business_logic_model_info = get_model_by_model_id( + business_logic_model_id) + business_logic_model_display_name = business_logic_model_info.get( + "display_name") if business_logic_model_info is not None else None agent_info = ExportAndImportAgentInfo(agent_id=agent_id, name=agent_info["name"], @@ -1189,14 +1326,19 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) model_id=model_id, model_name=model_display_name, business_logic_model_id=business_logic_model_id, - business_logic_model_name=business_logic_model_display_name) + business_logic_model_name=business_logic_model_display_name, + skill_names=skill_names, + prompt_template_id=agent_info.get( + "prompt_template_id"), + prompt_template_name=agent_info.get("prompt_template_name")) return agent_info async def import_agent_impl( agent_info: ExportAndImportDataFormat, authorization: str = Header(None), - force_import: bool = False + force_import: bool = False, + skill_name_to_id: Optional[Dict[str, int]] = None ): """ Import agent using DFS. @@ -1284,9 +1426,9 @@ async def import_agent_by_agent_id( enabled=True, params=tool.params)) # check the validity of the agent parameters - if import_agent_info.max_steps <= 0 or import_agent_info.max_steps > 20: + if import_agent_info.max_steps <= 0 or import_agent_info.max_steps > 30: raise ValueError( - f"Invalid max steps: {import_agent_info.max_steps}. max steps must be greater than 0 and less than 20.") + f"Invalid max steps: {import_agent_info.max_steps}. max steps must be greater than 0 and less than 30.") if not import_agent_info.name.isidentifier(): raise ValueError( f"Invalid agent name: {import_agent_info.name}. agent name must be a valid python variable name.") @@ -1322,6 +1464,8 @@ async def import_agent_by_agent_id( "model_name": import_agent_info.model_name, "business_logic_model_id": business_logic_model_id, "business_logic_model_name": import_agent_info.business_logic_model_name, + "prompt_template_id": import_agent_info.prompt_template_id or SYSTEM_PROMPT_TEMPLATE_ID, + "prompt_template_name": import_agent_info.prompt_template_name or SYSTEM_PROMPT_TEMPLATE_NAME, "max_steps": import_agent_info.max_steps, "provide_run_summary": import_agent_info.provide_run_summary, "duty_prompt": import_agent_info.duty_prompt, @@ -1347,7 +1491,8 @@ async def import_agent_by_agent_id( release_note="Initial version from Agent Market" ) except Exception as e: - logger.warning(f"Failed to auto-publish version v1 for agent {new_agent_id}: {str(e)}") + logger.warning( + f"Failed to auto-publish version v1 for agent {new_agent_id}: {str(e)}") return new_agent_id @@ -1376,12 +1521,11 @@ async def clear_agent_new_mark_impl(agent_id: int, tenant_id: str, user_id: str) user_id (str): User ID (for audit purposes) """ rowcount = clear_agent_new_mark(agent_id, tenant_id, user_id) - logger.info(f"clear_agent_new_mark_impl called for agent_id={agent_id}, tenant_id={tenant_id}, user_id={user_id}, affected_rows={rowcount}") + logger.info( + f"clear_agent_new_mark_impl called for agent_id={agent_id}, tenant_id={tenant_id}, user_id={user_id}, affected_rows={rowcount}") return rowcount - - async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]: """ list all agent info @@ -1427,7 +1571,8 @@ async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]: # Apply visibility filter for DEV/USER based on group overlap if not can_edit_all: - agent_group_ids = set(convert_string_to_list(agent.get("group_ids"))) + agent_group_ids = set( + convert_string_to_list(agent.get("group_ids"))) ingroup_permission = agent.get("ingroup_permission") is_creator = str(agent.get("created_by")) == str(user_id) # Hide agent if: no group overlap OR (ingroup_permission is PRIVATE AND user is not creator) @@ -1455,23 +1600,24 @@ async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]: simple_agent_list: list[dict] = [] for entry in enriched_agents: agent = entry["raw_agent"] - unavailable_reasons = list(dict.fromkeys(entry["unavailable_reasons"])) + unavailable_reasons = list( + dict.fromkeys(entry["unavailable_reasons"])) model_id = agent.get("model_id") model_info = None if model_id is not None: if model_id not in model_cache: - model_cache[model_id] = get_model_by_model_id(model_id, tenant_id) + model_cache[model_id] = get_model_by_model_id( + model_id, tenant_id) model_info = model_cache.get(model_id) - # Permission logic: - # - If creator or can_edit_all: PERMISSION_EDIT - # - Otherwise: use ingroup_permission, default to PERMISSION_READ if None - if can_edit_all or str(agent.get("created_by")) == str(user_id): - permission = PERMISSION_EDIT - else: - ingroup_permission = agent.get("ingroup_permission") - permission = ingroup_permission if ingroup_permission is not None else PERMISSION_READ + # Permission logic (ASSET_OWNER-scoped + non-ASSET_OWNER role => READ_ONLY first): + permission = resolve_agent_list_permission( + user_role=user_role, + agent=agent, + user_id=user_id, + can_edit_all=can_edit_all, + ) simple_agent_list.append({ "agent_id": agent["agent_id"], @@ -1533,8 +1679,9 @@ def _mark_duplicates(groups: dict[str, list[dict]], reason_key: str) -> None: for duplicate_entry in sorted_entries[1:]: duplicate_entry["unavailable_reasons"].append(reason_key) - _mark_duplicates(name_groups, "duplicate_name") - _mark_duplicates(display_name_groups, "duplicate_display_name") + _mark_duplicates(name_groups, AgentUnavailableReason.DUPLICATE_NAME) + _mark_duplicates(display_name_groups, + AgentUnavailableReason.DUPLICATE_DISPLAY_NAME) def _collect_model_availability_reasons(agent: dict, tenant_id: str, model_cache: Dict[int, Optional[dict]]) -> list[str]: @@ -1546,7 +1693,7 @@ def _collect_model_availability_reasons(agent: dict, tenant_id: str, model_cache model_id=agent.get("model_id"), tenant_id=tenant_id, model_cache=model_cache, - reason_key="model_unavailable" + reason_key=AgentUnavailableReason.MODEL_UNAVAILABLE )) return reasons @@ -1604,15 +1751,17 @@ def check_agent_availability( agent_info = search_agent_info_by_agent_id(agent_id, tenant_id) if not agent_info: - return False, ["agent_not_found"] + return False, [AgentUnavailableReason.AGENT_NOT_FOUND] # Check tool availability - tool_info = search_tools_for_sub_agent(agent_id=agent_id, tenant_id=tenant_id) - tool_id_list = [tool["tool_id"] for tool in tool_info if tool.get("tool_id") is not None] + tool_info = search_tools_for_sub_agent( + agent_id=agent_id, tenant_id=tenant_id) + tool_id_list = [tool["tool_id"] + for tool in tool_info if tool.get("tool_id") is not None] if tool_id_list: tool_statuses = check_tool_is_available(tool_id_list) if not all(tool_statuses): - unavailable_reasons.append("tool_unavailable") + unavailable_reasons.append(AgentUnavailableReason.TOOL_UNAVAILABLE) # Check model availability model_reasons = _collect_model_availability_reasons( @@ -1689,7 +1838,8 @@ async def prepare_agent_run( ) # Mount conversation-level reusable ContextManager if enabled - cm_config = getattr(agent_run_info.agent_config, 'context_manager_config', None) + cm_config = getattr(agent_run_info.agent_config, + 'context_manager_config', None) if cm_config and cm_config.enabled: cm = agent_run_manager.get_or_create_context_manager( conversation_id=str(agent_request.conversation_id), @@ -1718,9 +1868,6 @@ def save_messages(agent_request, target: str, user_id: str, tenant_id: str, mess # Helper function for run_agent_stream, used to generate stream response with memory preprocess tokens -@monitoring_manager.monitor_endpoint( - "agent_service.generate_stream_with_memory", exclude_params=["authorization"] -) async def generate_stream_with_memory( agent_request: AgentRequest, user_id: str, @@ -1805,18 +1952,19 @@ def _memory_token(message_text: str) -> str: yield data_chunk except Exception as run_exc: logger.error( - f"Agent run error after memory failure: {str(run_exc)}") - # Emit an error chunk and terminate the stream immediately - error_payload = json.dumps( - {"type": "error", "content": str(run_exc)}, ensure_ascii=False) - yield f"data: {error_payload}\n\n" + "Agent run error after memory failure: %r", + run_exc, + exc_info=True, + ) + yield _safe_agent_stream_error_chunk() return - except Exception as e: - logger.error(f"Generate stream with memory error: {str(e)}") - # Emit an error chunk and terminate the stream immediately - error_payload = json.dumps( - {"type": "error", "content": str(e)}, ensure_ascii=False) - yield f"data: {error_payload}\n\n" + except Exception as stream_exc: + logger.error( + "Generate stream with memory error: %r", + stream_exc, + exc_info=True, + ) + yield _safe_agent_stream_error_chunk() return finally: # Always unregister preprocess task @@ -1824,7 +1972,6 @@ def _memory_token(message_text: str) -> str: # Helper function for run_agent_stream, used when user memory is disabled (no memory tokens) -@monitoring_manager.monitor_endpoint("agent_service.generate_stream_no_memory", exclude_params=["authorization"]) async def generate_stream_no_memory( agent_request: AgentRequest, user_id: str, @@ -1834,7 +1981,6 @@ async def generate_stream_no_memory( """Stream agent responses without any memory preprocessing tokens or fallback logic.""" # Prepare run info respecting memory disabled (honor provided user_id/tenant_id) - monitoring_manager.add_span_event("generate_stream_no_memory.started") agent_run_info, memory_context = await prepare_agent_run( agent_request=agent_request, user_id=user_id, @@ -1842,10 +1988,7 @@ async def generate_stream_no_memory( language=language, allow_memory_search=False, ) - monitoring_manager.add_span_event("generate_stream_no_memory.completed") - monitoring_manager.add_span_event( - "generate_stream_no_memory.streaming.started") async for data_chunk in _stream_agent_chunks( agent_request=agent_request, user_id=user_id, @@ -1854,11 +1997,8 @@ async def generate_stream_no_memory( memory_ctx=memory_context, ): yield data_chunk - monitoring_manager.add_span_event( - "generate_stream_no_memory.streaming.completed") -@monitoring_manager.monitor_endpoint("agent_service.run_agent_stream", exclude_params=["authorization"]) async def run_agent_stream( agent_request: AgentRequest, http_request: Request, @@ -1871,27 +2011,6 @@ async def run_agent_stream( Start an agent run and stream responses. If user_id or tenant_id is provided, authorization will be overridden. (Useful in northbound apis) """ - import time - - # Add initial span attributes for tracking - monitoring_manager.set_span_attributes( - agent_id=agent_request.agent_id, - conversation_id=agent_request.conversation_id, - is_debug=agent_request.is_debug, - skip_user_save=skip_user_save, - has_override_user_id=user_id is not None, - has_override_tenant_id=tenant_id is not None, - query_length=len(agent_request.query) if agent_request.query else 0, - history_count=len( - agent_request.history) if agent_request.history else 0, - minio_files_count=len( - agent_request.minio_files) if agent_request.minio_files else 0 - ) - - # Step 1: Resolve user tenant language - resolve_start_time = time.time() - monitoring_manager.add_span_event("user_resolution.started") - resolved_user_id, resolved_tenant_id, language = _resolve_user_tenant_language( authorization=authorization, http_request=http_request, @@ -1899,32 +2018,7 @@ async def run_agent_stream( tenant_id=tenant_id, ) - resolve_duration = time.time() - resolve_start_time - monitoring_manager.add_span_event("user_resolution.completed", { - "duration": resolve_duration, - "user_id": resolved_user_id, - "tenant_id": resolved_tenant_id, - "language": language - }) - monitoring_manager.set_span_attributes( - resolved_user_id=resolved_user_id, - resolved_tenant_id=resolved_tenant_id, - language=language, - user_resolution_duration=resolve_duration - ) - # Expose resolved identity to downstream monitoring (LLM-level record writing) - set_monitoring_context( - tenant_id=resolved_tenant_id, - user_id=resolved_user_id, - agent_id=agent_request.agent_id, - conversation_id=agent_request.conversation_id, - ) - - # Step 2: Save user message (if needed) if not agent_request.is_debug and not skip_user_save: - save_start_time = time.time() - monitoring_manager.add_span_event("user_message_save.started") - save_messages( agent_request, target=MESSAGE_ROLE["USER"], @@ -1932,56 +2026,39 @@ async def run_agent_stream( tenant_id=resolved_tenant_id, ) - save_duration = time.time() - save_start_time - monitoring_manager.add_span_event("user_message_save.completed", { - "duration": save_duration - }) - monitoring_manager.set_span_attributes( - user_message_saved=True, - user_message_save_duration=save_duration - ) - else: - monitoring_manager.add_span_event("user_message_save.skipped", { - "reason": "debug_mode" if agent_request.is_debug else "skip_user_save_flag" - }) - monitoring_manager.set_span_attributes(user_message_saved=False) - - # Step 3: Build memory context (skip for debug mode) - memory_start_time = time.time() - monitoring_manager.add_span_event("memory_context_build.started") - memory_ctx_preview = build_memory_context( resolved_user_id, resolved_tenant_id, agent_request.agent_id, skip_query=agent_request.is_debug ) - - memory_duration = time.time() - memory_start_time memory_enabled = memory_ctx_preview.user_config.memory_switch - monitoring_manager.add_span_event("memory_context_build.completed", { - "duration": memory_duration, - "memory_enabled": memory_enabled, - "agent_share_option": getattr(memory_ctx_preview.user_config, "agent_share_option", "unknown"), - "debug_mode": agent_request.is_debug - }) - monitoring_manager.set_span_attributes( + + agent_metadata = monitoring_manager.bind_agent_context(AgentRunMetadata( + agent_id=agent_request.agent_id, + conversation_id=agent_request.conversation_id, + user_id=resolved_user_id, + tenant_id=resolved_tenant_id, + query=agent_request.query, + is_debug=agent_request.is_debug, + language=language, memory_enabled=memory_enabled, - memory_context_build_duration=memory_duration, - agent_share_option=getattr( - memory_ctx_preview.user_config, "agent_share_option", "unknown") - ) + history_count=len( + agent_request.history) if agent_request.history else 0, + minio_files_count=len( + agent_request.minio_files) if agent_request.minio_files else 0, + extra_metadata={ + "agent_share_option": getattr( + memory_ctx_preview.user_config, + "agent_share_option", + "unknown", + ), + "skip_user_save": skip_user_save, + "has_override_user_id": user_id is not None, + "has_override_tenant_id": tenant_id is not None, + }, + )) - # Step 4: Choose streaming strategy - strategy_start_time = time.time() use_memory_stream = memory_enabled and not agent_request.is_debug - monitoring_manager.add_span_event("streaming_strategy.selected", { - "strategy": "with_memory" if use_memory_stream else "no_memory", - "memory_enabled": memory_enabled, - "is_debug": agent_request.is_debug - }) - if use_memory_stream: - monitoring_manager.add_span_event( - "stream_generator.memory_stream.creating") stream_gen = generate_stream_with_memory( agent_request, user_id=resolved_user_id, @@ -1989,8 +2066,6 @@ async def run_agent_stream( language=language, ) else: - monitoring_manager.add_span_event( - "stream_generator.no_memory_stream.creating") stream_gen = generate_stream_no_memory( agent_request, user_id=resolved_user_id, @@ -1998,43 +2073,25 @@ async def run_agent_stream( language=language, ) - strategy_duration = time.time() - strategy_start_time - monitoring_manager.add_span_event("streaming_strategy.completed", { - "duration": strategy_duration, - "selected_strategy": "with_memory" if use_memory_stream else "no_memory" - }) - monitoring_manager.set_span_attributes( - streaming_strategy=( - "with_memory" if use_memory_stream else "no_memory"), - strategy_selection_duration=strategy_duration - ) - - # Step 5: Create streaming response - response_start_time = time.time() - monitoring_manager.add_span_event("streaming_response.creating") + async def stream_with_agent_context(): + try: + with agent_monitoring_context(agent_metadata): + async for data_chunk in stream_gen: + yield data_chunk + except Exception as stream_exc: + logger.error( + "Agent stream response error: %r", + stream_exc, + exc_info=True, + ) + yield _safe_agent_stream_error_chunk() - response = StreamingResponse( - stream_gen, + return StreamingResponse( + stream_with_agent_context(), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}, ) - response_duration = time.time() - response_start_time - monitoring_manager.add_span_event("streaming_response.created", { - "duration": response_duration, - "media_type": "text/event-stream" - }) - monitoring_manager.set_span_attributes( - response_creation_duration=response_duration, - total_preparation_duration=(time.time() - resolve_start_time) - ) - - monitoring_manager.add_span_event("run_agent_stream.preparation_completed", { - "total_preparation_time": time.time() - resolve_start_time - }) - - return response - def stop_agent_tasks(conversation_id: int, user_id: str): """ @@ -2221,3 +2278,131 @@ def get_sub_agents_recursive(parent_agent_id: int, depth: int = 0, max_depth: in logger.exception( f"Failed to get agent call relationship for agent {agent_id}: {str(e)}") raise ValueError(f"Failed to get agent call relationship: {str(e)}") + + +async def export_agent_with_skills_impl(agent_id: int, authorization: str) -> dict: + """Export an agent, returning a ZIP if it has skill instances, otherwise plain JSON. + + The response is either: + - A dict with {"_zip": True, "data": bytes, "filename": str} when the agent has skills + - A plain dict (JSON string) when the agent has no skills + """ + from services.skill_service import SkillService + + user_id, tenant_id, _ = get_current_user_info(authorization) + + skill_instances = skill_db.query_skill_instances_by_agent_id( + agent_id=agent_id, tenant_id=tenant_id, version_no=0 + ) + + if not skill_instances: + return await export_agent_impl(agent_id, authorization) + + skill_names = [] + for inst in skill_instances: + skill_id = inst.get("skill_id") + skill = skill_db.get_skill_by_id(skill_id, tenant_id) + if skill: + skill_names.append(skill.get("name")) + + if not skill_names: + return await export_agent_impl(agent_id, authorization) + + agent_json_str = await export_agent_impl(agent_id, authorization) + + skill_service = SkillService(tenant_id=tenant_id) + skill_zip_entries = skill_service.export_skills_by_names( + skill_names, tenant_id) + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("agent.json", agent_json_str) + for entry in skill_zip_entries: + skill_zip_bytes = base64.b64decode(entry["skill_zip_base64"]) + zf.writestr(f"skills/{entry['skill_name']}.zip", skill_zip_bytes) + + zip_buffer.seek(0) + zip_data = zip_buffer.read() + + agent_info = search_agent_info_by_agent_id( + agent_id=agent_id, tenant_id=tenant_id) + agent_name = agent_info.get( + "name", "anonymous") if agent_info else "anonymous" + + filename = f"{agent_name}.zip" + + return { + "_zip": True, + "data": zip_data, + "filename": filename + } + + +async def import_agent_with_skills_impl( + agent_info: "ExportAndImportDataFormat", + skills: List[SkillZipEntry], + authorization: str, + force_import: bool = False +): + """Import an agent with skills bundled from a ZIP export. + + For each skill in the bundle: + 1. Check if a skill with the same name already exists in the target tenant. + 2. If duplicates exist, raise SkillDuplicateError (do not create anything). + 3. If no duplicates, create the skill from ZIP bytes via SkillService. + 4. Create a SkillInstance linking the new skill_id to the new agent_id. + + Then proceeds with the standard agent import flow using the mapped skill IDs. + """ + from services.skill_service import SkillService + + user_id, tenant_id, _ = get_current_user_info(authorization) + + skill_name_to_zip_base64 = { + entry.skill_name: entry.skill_zip_base64 for entry in skills} + + existing_skills = skill_db.list_skills(tenant_id) + existing_skill_names = {s.get("name") for s in existing_skills} + + import_skill_names = set(skill_name_to_zip_base64.keys()) + duplicate_names = list(import_skill_names & existing_skill_names) + + if duplicate_names: + raise SkillDuplicateError(duplicate_names) + + skill_name_to_id: Dict[str, int] = {} + skill_service = SkillService(tenant_id=tenant_id) + + for skill_name, zip_base64 in skill_name_to_zip_base64.items(): + zip_bytes = base64.b64decode(zip_base64) + result = skill_service.create_skill_from_zip_bytes( + zip_bytes=zip_bytes, + skill_name=skill_name, + source="导入", + user_id=user_id, + tenant_id=tenant_id, + skip_duplicate_check=True + ) + skill_name_to_id[skill_name] = result.get("skill_id") + + agent_id_mapping = await import_agent_impl( + agent_info, authorization, force_import, + skill_name_to_id=skill_name_to_id + ) + + main_agent_id = agent_id_mapping.get(agent_info.agent_id) + if main_agent_id: + for skill_name, new_skill_id in skill_name_to_id.items(): + skill_db.create_or_update_skill_by_skill_info( + skill_info=SkillInstanceInfoRequest( + skill_id=new_skill_id, + agent_id=main_agent_id, + enabled=True, + version_no=0 + ), + tenant_id=tenant_id, + user_id=user_id, + version_no=0 + ) + + return agent_id_mapping diff --git a/backend/services/agent_version_service.py b/backend/services/agent_version_service.py index 69163dbc6..1f1c382d8 100644 --- a/backend/services/agent_version_service.py +++ b/backend/services/agent_version_service.py @@ -33,6 +33,7 @@ ) from database.model_management_db import get_model_by_model_id from utils.str_utils import convert_string_to_list +from consts.agent_unavailable_reasons import AgentUnavailableReason logger = logging.getLogger("agent_version_service") @@ -337,21 +338,18 @@ def _check_version_snapshot_availability( # Check if agent info exists if not agent_info: - return False, ["agent_not_found"] + return False, [AgentUnavailableReason.AGENT_NOT_FOUND] # Check model availability model_id = agent_info.get('model_id') if model_id is None or model_id == 0: - unavailable_reasons.append("model_not_configured") + unavailable_reasons.append(AgentUnavailableReason.MODEL_NOT_CONFIGURED) - # Check tools availability - if not tool_instances: - unavailable_reasons.append("no_tools") - else: - # Check if at least one tool is enabled + # Check tools availability (only when tools are configured) + if tool_instances: has_enabled_tool = any(t.get('enabled', True) for t in tool_instances) if not has_enabled_tool: - unavailable_reasons.append("all_tools_disabled") + unavailable_reasons.append(AgentUnavailableReason.ALL_TOOLS_DISABLED) return len(unavailable_reasons) == 0, unavailable_reasons @@ -387,6 +385,11 @@ def rollback_version_impl( if not target_agent: raise ValueError(f"Agent snapshot for version {target_version_no} not found") + # Ensure the draft still exists before attempting an in-place restore. + draft_agent, _, _ = query_agent_draft(agent_id, tenant_id) + if not draft_agent: + raise ValueError("Agent draft not found") + # Get skill snapshots for target version from database import skill_db as skill_db_module target_skills = skill_db_module.query_skill_instances_by_agent_id( @@ -817,7 +820,7 @@ async def list_published_agents_impl( # Apply visibility filter for DEV/USER based on group overlap if not can_edit_all: agent_group_ids = set(convert_string_to_list(agent.get("group_ids"))) - is_creator = str(agent.get("created_by)) == str(user_id)")) + is_creator = str(agent.get("created_by")) == str(user_id) if not is_creator and len(user_group_ids.intersection(agent_group_ids)) == 0: continue diff --git a/backend/services/asset_owner_visibility.py b/backend/services/asset_owner_visibility.py new file mode 100644 index 000000000..24cb697b2 --- /dev/null +++ b/backend/services/asset_owner_visibility.py @@ -0,0 +1,104 @@ +"""ASSET_OWNER tenant visibility filters, feature flags, and response post-processing.""" + +from typing import Any, Dict, List, Optional + +from consts.const import ( + AGENT_PROMPTS_HIDDEN_FLAG, + ASSET_OWNER_ROLE, + ASSET_OWNER_TENANT_ID, + ENABLE_ASSET_OWNER_ROLE, + PERMISSION_EDIT, + PERMISSION_READ, +) +from consts.exceptions import ValidationError + + +_PROMPT_FIELDS = ("duty_prompt", "constraint_prompt", "few_shots_prompt") + + +ASSET_OWNER_RESOURCES_ROUTE = "/asset-owner-resources" + + +def is_asset_owner_enabled() -> bool: + """Return whether the ASSET_OWNER feature flag is enabled.""" + return ENABLE_ASSET_OWNER_ROLE + + +def require_asset_owner_enabled() -> None: + """Raise ValidationError when the ASSET_OWNER feature is disabled.""" + if not ENABLE_ASSET_OWNER_ROLE: + raise ValidationError("ASSET_OWNER feature is not enabled") + + +def filter_accessible_routes_for_asset_owner_feature( + accessible_routes: List[str], +) -> List[str]: + """Remove asset-owner nav route when the ASSET_OWNER feature flag is disabled.""" + if ENABLE_ASSET_OWNER_ROLE: + return accessible_routes + return [r for r in accessible_routes if r != ASSET_OWNER_RESOURCES_ROUTE] + + +def can_view_skill(caller_tenant_id: Optional[str], skill_tenant_id: Optional[str]) -> bool: + """ + Return True when the caller may view a skill and its files. + + ASSET_OWNER-scoped skills (tenant_id asset_owner_tenant_id or legacy "") are + visible only to callers in the ASSET_OWNER virtual tenant. + """ + + if skill_tenant_id == ASSET_OWNER_TENANT_ID: + return caller_tenant_id == ASSET_OWNER_TENANT_ID + return True + + +def resolve_agent_list_permission( + user_role: str, + agent: Dict[str, Any], + user_id: str, + can_edit_all: bool, +) -> str: + """ + Resolve list-item permission for an agent. + + Highest priority: ASSET_OWNER-scoped agents are READ_ONLY for callers whose + user_role is not ASSET_OWNER (overrides can_edit_all, creator, ingroup_permission). + """ + role = (user_role or "").upper() + if agent.get("tenant_id") == ASSET_OWNER_TENANT_ID and role != ASSET_OWNER_ROLE: + return PERMISSION_READ + if can_edit_all or str(agent.get("created_by")) == str(user_id): + return PERMISSION_EDIT + ingroup_permission = agent.get("ingroup_permission") + return ingroup_permission if ingroup_permission is not None else PERMISSION_READ + + +def apply_agent_detail_prompt_visibility( + caller_tenant_id: Optional[str], + agent_info: Dict[str, Any], +) -> Dict[str, Any]: + """ + Mask system prompt fields when a non-ASSET_OWNER caller views an ASSET_OWNER-scoped agent. + + Sets duty_prompt, constraint_prompt, and few_shots_prompt to None and adds + prompts_hidden=True so clients can render a permission-denied state. + """ + result = dict(agent_info) + if caller_tenant_id == ASSET_OWNER_TENANT_ID: + return result + if result.get("tenant_id") != ASSET_OWNER_TENANT_ID: + return result + for field in _PROMPT_FIELDS: + result[field] = None + result[AGENT_PROMPTS_HIDDEN_FLAG] = True + return result + + +def postprocess_knowledge_visibility( + items: List[Dict[str, Any]], + caller_role: Optional[str], + caller_tenant_id: Optional[str], +) -> List[Dict[str, Any]]: + """Return knowledge records after visibility post-processing (no-op for now).""" + _ = (caller_role, caller_tenant_id) + return items diff --git a/backend/services/config_sync_service.py b/backend/services/config_sync_service.py index 0ed29bfc5..7feea9452 100644 --- a/backend/services/config_sync_service.py +++ b/backend/services/config_sync_service.py @@ -20,7 +20,7 @@ MODEL_ENGINE_ENABLED, TENANT_NAME ) -from database.model_management_db import get_model_id_by_display_name +from database.model_management_db import get_model_id_by_display_name, get_model_records from utils.config_utils import ( get_env_key, get_model_name_from_config, @@ -31,6 +31,20 @@ logger = logging.getLogger("config_sync_service") +def get_model_id_for_config(model_type: str, display_name: str, tenant_id: str) -> Optional[int]: + if not display_name: + return None + + records = get_model_records( + {"display_name": display_name, "model_type": model_type}, + tenant_id + ) + if records: + return records[0].get("model_id") + + return get_model_id_by_display_name(display_name, tenant_id) + + def handle_model_config(tenant_id: str, user_id: str, config_key: str, model_id: Optional[int], tenant_config_dict: dict) -> None: """ Handle model configuration updates, deletions, and settings operations @@ -98,8 +112,8 @@ async def save_config_impl(config, tenant_id, user_id): model_display_name = model_config.get("displayName") config_key = get_env_key(model_type) + "_ID" - model_id = get_model_id_by_display_name( - model_display_name, tenant_id) + model_id = get_model_id_for_config( + model_type, model_display_name, tenant_id) handle_model_config(tenant_id, user_id, config_key, model_id, tenant_config_dict) @@ -182,6 +196,7 @@ def build_models_config(tenant_id: str) -> dict: def build_model_config(model_config: dict) -> dict: if not model_config: return { + "id": None, "name": "", "displayName": "", "apiConfig": { @@ -191,6 +206,7 @@ def build_model_config(model_config: dict) -> dict: } config = { + "id": model_config.get("model_id"), "name": get_model_name_from_config(model_config) if model_config else "", "displayName": model_config.get("display_name", ""), "apiConfig": { @@ -202,9 +218,9 @@ def build_model_config(model_config: dict) -> dict: if "embedding" in model_config.get("model_type", ""): config["dimension"] = model_config.get("max_tokens", 0) - # Add STT model specific fields + # Add voice model specific fields (STT and TTS) model_type = model_config.get("model_type", "") - if model_type == "stt": + if model_type == "stt" or model_type == "tts": config["modelFactory"] = model_config.get("model_factory", "") config["modelAppid"] = model_config.get("model_appid", "") config["accessToken"] = model_config.get("access_token", "") diff --git a/backend/services/conversation_management_service.py b/backend/services/conversation_management_service.py index d5d4a85a4..302ec63a8 100644 --- a/backend/services/conversation_management_service.py +++ b/backend/services/conversation_management_service.py @@ -248,6 +248,8 @@ def call_llm_for_title(question: str, tenant_id: str, language: str = LANGUAGE[" display_name = model_config.get("display_name", "") if model_config else "" set_monitoring_operation("title_generation", display_name=display_name or None) + timeout_seconds = model_config.get("timeout_seconds") if model_config else None + # Create OpenAIModel instance llm = OpenAIModel( model_id=get_model_name_from_config(model_config) if model_config.get("model_name") else "", @@ -256,7 +258,9 @@ def call_llm_for_title(question: str, tenant_id: str, language: str = LANGUAGE[" temperature=0.7, top_p=0.95, model_factory=model_config.get("model_factory", None), - ssl_verify=model_config.get("ssl_verify", True) + ssl_verify=model_config.get("ssl_verify", True), + timeout_seconds=timeout_seconds, + stream=False, ) # Build messages - use new template variable 'question' instead of 'content' diff --git a/backend/services/data_process_service.py b/backend/services/data_process_service.py index a024089a3..ae3d35dcd 100644 --- a/backend/services/data_process_service.py +++ b/backend/services/data_process_service.py @@ -54,7 +54,7 @@ def __init__(self): self._inspector = None self._inspector_last_time = 0 - self._inspector_ttl = 60 # Inspector cache time in seconds + self._inspector_ttl = 300 # 5 minutes - inspector is expensive to create (ping all workers) self._inspector_lock = None self._inspector_lock = threading.Lock() @@ -105,7 +105,7 @@ async def stop(self): logger.info("Data processing service stopped") def _get_celery_inspector(self): - """Get Celery inspector""" + """Get Celery inspector (cached for performance)""" with self._inspector_lock: now = time.time() if self._inspector and now - self._inspector_last_time < self._inspector_ttl: @@ -117,9 +117,9 @@ def _get_celery_inspector(self): f"Celery broker URL is not configured properly, reconfiguring to {celery_app.conf.broker_url}") try: inspector = celery_app.control.inspect() - inspector.ping() self._inspector = inspector self._inspector_last_time = now + self._inspector_init_time = now return inspector except Exception as e: self._inspector = None @@ -142,11 +142,9 @@ async def get_all_tasks(self, filter: bool = True) -> List[Dict[str, Any]]: all_tasks = [] try: start_time = time.time() - logger.debug( - "Getting inspector to check for active and reserved tasks (concurrent)") + inspector_start = time.time() inspector = self._get_celery_inspector() - logger.debug( - f"⏰ Inspector initialization took {time.time() - start_time}s") + inspector_duration = time.time() - inspector_start # Collect task IDs from different sources and keep runtime metadata task_ids = set() @@ -171,18 +169,37 @@ def _normalize_runtime_meta(task: Dict[str, Any]) -> Dict[str, Any]: 'original_filename': kwargs.get('original_filename', ''), } + celery_start = time.time() + + # Use short timeout for inspector since workers can respond in ~0.1s + # Default 1s timeout is unnecessary and causes delay + short_timeout = 0.2 + def get_active(): - return inspector.active() + t = time.time() + # Create fresh inspector with short timeout for each call + short_inspector = celery_app.control.inspect(timeout=short_timeout) + result = short_inspector.active() + elapsed = time.time() - t + logger.info(f"[get_all_tasks] inspector.active() took {elapsed:.3f}s") + return result if result else {} def get_reserved(): - return inspector.reserved() + t = time.time() + short_inspector = celery_app.control.inspect(timeout=short_timeout) + result = short_inspector.reserved() + elapsed = time.time() - t + logger.info(f"[get_all_tasks] inspector.reserved() took {elapsed:.3f}s") + return result if result else {} + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: future_active = executor.submit(get_active) future_reserved = executor.submit(get_reserved) - active_tasks_dict = future_active.result() - reserved_tasks_dict = future_reserved.result() - logger.debug( - f"⏰ Get active and reserved tasks (concurrent) took {time.time() - start_time}s") + active_tasks_dict = future_active.result(timeout=short_timeout + 0.5) + reserved_tasks_dict = future_reserved.result(timeout=short_timeout + 0.5) + celery_duration = time.time() - celery_start + if celery_duration > 0.5: + logger.warning(f"[get_all_tasks] Inspector took {celery_duration:.3f}s (expected <0.5s)") if active_tasks_dict: for worker, tasks in active_tasks_dict.items(): for task in tasks: @@ -199,23 +216,17 @@ def get_reserved(): # Keep active metadata if already present runtime_task_meta.setdefault(task_id, _normalize_runtime_meta(task)) - # Currently, we don't have scheduled tasks, so skip getting scheduled tasks here - start_time = time.time() - logger.debug("Getting task IDs from Redis backend") - # Also get task IDs from Redis backend (covers completed/failed tasks within expiry) + # Get task IDs from Redis backend (covers completed/failed tasks within expiry) try: redis_task_ids = get_all_task_ids_from_redis(self.redis_client) - logger.debug( - f"⏰ Get Redis task IDs took {time.time() - start_time}s") for task_id in redis_task_ids: - # Add to the set, duplicates will be handled task_ids.add(task_id) except Exception as redis_error: logger.warning( f"Failed to query Redis for stored task IDs: {str(redis_error)}") - logger.debug( - f"Total unique task IDs collected (inspector + Redis): {len(task_ids)}") + task_id_list = list(task_ids) + # Batch fetch all task info tasks = [get_task_info(task_id) for task_id in task_id_list] all_task_infos = await asyncio.gather(*tasks, return_exceptions=True) for idx, task_info in enumerate(all_task_infos): @@ -243,7 +254,6 @@ def get_reserved(): if not task_info.get('index_name'): continue all_tasks.append(task_info) - logger.debug(f"Retrieved {len(all_tasks)} tasks.") except Exception as e: logger.error(f"Error retrieving all tasks: {str(e)}") all_tasks = [] @@ -296,6 +306,17 @@ async def load_image(self, image_url: str) -> Optional[Image.Image]: async def _load_image(self, session: aiohttp.ClientSession, path: str) -> Optional[Image.Image]: """Internal method to load an image from various sources""" try: + if path.startswith('s3://'): + # Fetch from MinIO using s3://bucket/key + file_stream = get_file_stream(object_name=path) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {path}") + file_data = file_stream.read() + image_based64_str = base64.b64encode( + file_data).decode('utf-8') + path = f"data:image/jpeg;base64,{image_based64_str}" + # Check if input is base64 encoded if path.startswith('data:image'): # Extract the base64 data after the comma @@ -504,6 +525,8 @@ async def create_batch_tasks_impl(self, authorization: Optional[str], request: B chunking_strategy = source_config.get('chunking_strategy') index_name = source_config.get('index_name') original_filename = source_config.get('original_filename') + embedding_model_id = source_config.get('embedding_model_id') + tenant_id = source_config.get('tenant_id') # Validate required fields if not source: @@ -522,7 +545,9 @@ async def create_batch_tasks_impl(self, authorization: Optional[str], request: B source_type=source_type, chunking_strategy=chunking_strategy, index_name=index_name, - original_filename=original_filename + original_filename=original_filename, + embedding_model_id=embedding_model_id, + tenant_id=tenant_id ).set(queue='process_q'), forward.s( index_name=index_name, @@ -600,7 +625,7 @@ async def process_uploaded_text_file(self, file_content: bytes, filename: str, c } async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: str) -> None: - """Full conversion pipeline: download → convert → upload → validate → cleanup. + """Full conversion pipeline: download -> convert -> upload -> validate -> cleanup. All five steps run inside data-process so that LibreOffice only needs to be installed in this container. diff --git a/backend/services/datamate_service.py b/backend/services/datamate_service.py index 776e0eb1d..41858440b 100644 --- a/backend/services/datamate_service.py +++ b/backend/services/datamate_service.py @@ -51,7 +51,7 @@ async def _create_datamate_knowledge_records(knowledge_base_ids: List[str], "tenant_id": tenant_id, "user_id": user_id, # Use datamate as embedding model name - "embedding_model_name": embedding_model_names[i] + "embedding_model_name": embedding_model_names[i], } # Run synchronous database operation in executor to avoid blocking diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py index b5cd048bf..b2850403d 100644 --- a/backend/services/file_management_service.py +++ b/backend/services/file_management_service.py @@ -10,6 +10,8 @@ from fastapi import UploadFile from consts.const import ( + ASSET_OWNER_ATTACHMENTS_PREFIX, + ASSET_OWNER_TENANT_ID, DATA_PROCESS_SERVICE, FILE_PREVIEW_SIZE_LIMIT, MAX_CONCURRENT_UPLOADS, @@ -51,13 +53,54 @@ logger = logging.getLogger("file_management_service") -def check_file_access(object_name: str, user_id: Optional[str]) -> bool: +def resolve_minio_upload_folder( + folder: Optional[str], + user_id: Optional[str] = None, + uploader_tenant_id: Optional[str] = None, +) -> str: + """Map caller context to the MinIO object prefix used for uploads. + + Resolution order (first match wins): + 1. Asset-owner tenant → ``attachments/asset_owner/{user_id}`` + 2. ``folder == "knowledge_base"`` → shared ``knowledge_base`` prefix + 3. Otherwise → per-user ``attachments/{user_id}`` when ``user_id`` is set + 4. Legacy fallback → ``folder`` if provided, else ``attachments`` + + Access control for reads is enforced separately; this function only + chooses the storage prefix. + + Args: + folder: Requested folder hint (e.g. ``"knowledge_base"`` or a legacy path). + user_id: Uploader user ID; required for user-scoped attachment paths. + uploader_tenant_id: Uploader tenant ID; asset-owner tenants use a dedicated prefix. + + Returns: + Resolved MinIO folder prefix (no leading or trailing slash). + """ + if uploader_tenant_id == ASSET_OWNER_TENANT_ID: + return f"{ASSET_OWNER_ATTACHMENTS_PREFIX}/{user_id}" + + if folder == "knowledge_base": + return "knowledge_base" + + if user_id: + return f"attachments/{user_id}" + + return folder or "attachments" + + +def check_file_access( + object_name: str, + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> bool: """ Check if user has permission to access the file. Access rules: - knowledge_base/*: All authenticated users can access - attachments/{user_id}/*: Only the owner (user_id) can access + - images_in_attachments/*: All authenticated users can access - preview/*: Accessible if the original file is accessible Args: @@ -70,10 +113,18 @@ def check_file_access(object_name: str, user_id: Optional[str]) -> bool: if not user_id: return False + if object_name.startswith(ASSET_OWNER_ATTACHMENTS_PREFIX): + return caller_tenant_id == ASSET_OWNER_TENANT_ID + if object_name.startswith("knowledge_base/"): # Knowledge base files: all authenticated users can access return True + if object_name.startswith("images_in_attachments/"): + # Extracted image files used by knowledge-base image chunks. + # Keep them readable for authenticated users to avoid broken image citations. + return True + # Check if file is in user's attachments folder # Pattern: attachments/{user_id}/* if object_name.startswith(f"attachments/{user_id}/"): @@ -89,21 +140,33 @@ def check_file_access(object_name: str, user_id: Optional[str]) -> bool: return False -def check_file_access_batch(object_names: List[str], user_id: Optional[str]) -> Dict[str, bool]: +def check_file_access_batch( + object_names: List[str], + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> Dict[str, bool]: """ Batch check file access permissions. Args: object_names: List of file object names user_id: Current user ID + caller_tenant_id: Caller's tenant ID for ASSET_OWNER path checks Returns: Dict mapping object_name to access permission (True/False) """ - return {obj_name: check_file_access(obj_name, user_id) for obj_name in object_names} + return { + obj_name: check_file_access(obj_name, user_id, caller_tenant_id) + for obj_name in object_names + } -def validate_s3_url_access(object_name: str, user_id: Optional[str]) -> None: +def validate_s3_url_access( + object_name: str, + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> None: """ Validate if user has permission to access the S3 URL. @@ -117,12 +180,18 @@ def validate_s3_url_access(object_name: str, user_id: Optional[str]) -> None: if not user_id: raise PermissionError("User authentication required to access files") - if not check_file_access(object_name, user_id): - logger.warning(f"[validate_s3_url_access] Access denied: object_name={object_name}, user_id={user_id}") - raise PermissionError(f"Access denied: You don't have permission to access this file ({object_name})") + if not check_file_access(object_name, user_id, caller_tenant_id): + logger.warning( + f"[validate_s3_url_access] Access denied: object_name={object_name}, user_id={user_id}") + raise PermissionError( + f"Access denied: You don't have permission to access this file ({object_name})") -def validate_urls_access(urls: List[str], user_id: Optional[str]) -> None: +def validate_urls_access( + urls: List[str], + user_id: Optional[str], + caller_tenant_id: Optional[str] = None, +) -> None: """ Validate if user has permission to access the given URLs. @@ -147,19 +216,27 @@ def validate_urls_access(urls: List[str], user_id: Optional[str]) -> None: if url.startswith("s3://"): try: _, object_name = parse_s3_url(url) - validate_s3_url_access(object_name, user_id) + validate_s3_url_access(object_name, user_id, caller_tenant_id) except ValueError as e: - logger.warning(f"[validate_urls_access] Failed to parse S3 URL: {url}, error: {e}") + logger.warning( + f"[validate_urls_access] Failed to parse S3 URL: {url}, error: {e}") raise PermissionError(f"Invalid S3 URL format: {url}") elif url.startswith("/") and not url.startswith("//"): # Handle /bucket/key format (absolute path style) parts = url.strip("/").split("/", 1) if len(parts) == 2: bucket, object_name = parts - validate_s3_url_access(object_name, user_id) + validate_s3_url_access(object_name, user_id, caller_tenant_id) -async def upload_files_impl(destination: str, file: List[UploadFile], folder: str = None, index_name: Optional[str] = None, user_id: Optional[str] = None) -> tuple: +async def upload_files_impl( + destination: str, + file: List[UploadFile], + folder: str = None, + index_name: Optional[str] = None, + user_id: Optional[str] = None, + uploader_tenant_id: Optional[str] = None, +) -> tuple: """ Upload files to local storage or MinIO based on destination. @@ -169,6 +246,7 @@ async def upload_files_impl(destination: str, file: List[UploadFile], folder: st folder: Folder name for MinIO uploads index_name: Knowledge base index for conflict resolution user_id: User ID for attachment path isolation + uploader_tenant_id: Uploader tenant ID (ASSET_OWNER uses dedicated prefix) Returns: tuple: (errors, uploaded_file_paths, uploaded_filenames) @@ -195,19 +273,8 @@ async def upload_files_impl(destination: str, file: List[UploadFile], folder: st errors.append(f"Failed to save file: {f.filename}") elif destination == "minio": - # Determine actual folder path based on file type - # knowledge_base: accessible by all authenticated users - # other folders (attachments): user-isolated path (attachments/{user_id}/...) - if folder == "knowledge_base": - actual_folder = "knowledge_base" - else: - # User isolation for personal attachments - if user_id: - actual_folder = f"attachments/{user_id}" - else: - # Fallback to old behavior if no user_id provided - actual_folder = folder or "attachments" - + actual_folder = resolve_minio_upload_folder( + folder, user_id, uploader_tenant_id) minio_results = await upload_to_minio(files=file, folder=actual_folder) for result in minio_results: if result.get("success"): @@ -261,18 +328,26 @@ def make_unique_names(original_names: List[str], taken_lower: set) -> List[str]: return errors, uploaded_file_paths, uploaded_filenames -async def upload_to_minio(files: List[UploadFile], folder: str, user_id: Optional[str] = None) -> List[dict]: +async def upload_to_minio( + files: List[UploadFile], + folder: str, + user_id: Optional[str] = None, + uploader_tenant_id: Optional[str] = None, +) -> List[dict]: """ Helper function to upload files to MinIO and return results. Args: files: List of files to upload - folder: Storage folder path (will be prefixed with user_id if user_id is provided for attachments) - user_id: User ID for attachment path isolation + folder: Storage folder path or resolved MinIO prefix + user_id: User ID for attachment path isolation when folder is generic + uploader_tenant_id: Uploader tenant ID for ASSET_OWNER attachment prefix Returns: List of upload results """ + actual_folder = resolve_minio_upload_folder( + folder, user_id, uploader_tenant_id) results = [] for f in files: try: @@ -282,17 +357,6 @@ async def upload_to_minio(files: List[UploadFile], folder: str, user_id: Optiona # Convert file content to BytesIO object file_obj = BytesIO(file_content) - # Determine actual folder path - # knowledge_base: no user isolation - # other folders: append user_id to path for isolation - if folder == "knowledge_base": - actual_folder = "knowledge_base" - else: - if user_id: - actual_folder = f"attachments/{user_id}" - else: - actual_folder = folder or "attachments" - # Upload file result = upload_fileobj( file_obj=file_obj, @@ -352,6 +416,8 @@ def get_llm_model(tenant_id: str): # Get the tenant config main_model_config = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) + timeout_seconds = main_model_config.get( + "timeout_seconds") if main_model_config else None long_text_to_text_model = OpenAILongContextModel( observer=MessageObserver(), model_id=get_model_name_from_config(main_model_config), @@ -359,6 +425,7 @@ def get_llm_model(tenant_id: str): api_key=main_model_config.get("api_key"), max_context_tokens=main_model_config.get("max_tokens"), ssl_verify=main_model_config.get("ssl_verify", True), + timeout_seconds=timeout_seconds, ) return long_text_to_text_model @@ -390,7 +457,8 @@ async def resolve_preview_file(object_name: str) -> Tuple[str, str, int]: # Office documents - convert to PDF with caching elif content_type in OFFICE_MIME_TYPES: - name_without_ext = object_name.rsplit('.', 1)[0] if '.' in object_name else object_name + name_without_ext = object_name.rsplit( + '.', 1)[0] if '.' in object_name else object_name hash_suffix = hashlib.md5(object_name.encode()).hexdigest()[:8] pdf_object_name = f"preview/converted/{name_without_ext}_{hash_suffix}.pdf" temp_pdf_object_name = f"preview/converting/{name_without_ext}_{hash_suffix}.pdf.tmp" @@ -404,7 +472,8 @@ async def resolve_preview_file(object_name: str) -> Tuple[str, str, int]: # Unsupported file type else: - raise UnsupportedFileTypeException(f"Unsupported file type for preview: {content_type}") + raise UnsupportedFileTypeException( + f"Unsupported file type for preview: {content_type}") def get_preview_stream(actual_object_name: str, start: Optional[int] = None, end: Optional[int] = None): @@ -428,7 +497,8 @@ def get_preview_stream(actual_object_name: str, start: Optional[int] = None, end stream = get_file_range(actual_object_name, start, end) if stream is None: - raise NotFoundException("File not found or failed to read from storage") + raise NotFoundException( + "File not found or failed to read from storage") return stream @@ -442,7 +512,8 @@ def _is_pdf_cache_valid(pdf_object_name: str) -> bool: # Verify the cached file is readable by fetching a small range stream = get_file_range(pdf_object_name, 0, 0) if stream is None: - logger.warning(f"Corrupted cache detected (cannot read), deleting: {pdf_object_name}") + logger.warning( + f"Corrupted cache detected (cannot read), deleting: {pdf_object_name}") delete_file(pdf_object_name) return False @@ -451,7 +522,8 @@ def _is_pdf_cache_valid(pdf_object_name: str) -> bool: try: close_fn() except Exception as e: - logger.warning(f"Failed to close cache probe stream for {pdf_object_name}: {str(e)}") + logger.warning( + f"Failed to close cache probe stream for {pdf_object_name}: {str(e)}") return True @@ -504,7 +576,8 @@ async def _convert_office_to_cached_pdf( ) # Atomic move from temp to final location, then clean up temp - copy_result = copy_file(source_object=temp_pdf_object_name, dest_object=pdf_object_name) + copy_result = copy_file( + source_object=temp_pdf_object_name, dest_object=pdf_object_name) if not copy_result.get('success'): logger.error( "Failed to finalize converted PDF cache: object=%s, temp=%s, dest=%s, error=%s", @@ -513,7 +586,8 @@ async def _convert_office_to_cached_pdf( pdf_object_name, copy_result.get('error', 'Unknown error'), ) - raise RuntimeError("Failed to finalize converted PDF cache") + raise RuntimeError( + "Failed to finalize converted PDF cache") delete_file(temp_pdf_object_name) except Exception as e: @@ -522,7 +596,8 @@ async def _convert_office_to_cached_pdf( logger.error(f"Office conversion failed: {str(e)}") if isinstance(e, OfficeConversionException): raise - raise OfficeConversionException("Office file conversion failed") from e + raise OfficeConversionException( + "Office file conversion failed") from e finally: # Clean up the file lock (prevents memory leak for many unique files) async with _conversion_locks_guard: diff --git a/backend/services/haotian_service.py b/backend/services/haotian_service.py index a49079ec7..4d86823b5 100644 --- a/backend/services/haotian_service.py +++ b/backend/services/haotian_service.py @@ -11,6 +11,8 @@ logger = logging.getLogger("haotian_service") +_DEFAULT_KNOWLEDGE_BASE_ID = "a8d68fbf-bd6e-5461-a9d1-cf1bb3522e38" + def _normalize_list_payload(raw: Dict[str, Any]) -> Dict[str, Any]: """ @@ -24,7 +26,7 @@ def _normalize_list_payload(raw: Dict[str, Any]) -> Dict[str, Any]: ] } - This function also filters out knowledge sets with name == "Public". + When dify_dataset_id is "null", it is replaced with the default ID. """ knowledge_sets = raw.get("knowledge_sets", []) if not isinstance(knowledge_sets, list): @@ -35,7 +37,7 @@ def _normalize_list_payload(raw: Dict[str, Any]) -> Dict[str, Any]: if not isinstance(ks, dict): continue set_name = str(ks.get("name", "") or "").strip() - if not set_name or set_name == "Public": + if not set_name: continue bases = ks.get("knowledge_bases", []) @@ -48,15 +50,18 @@ def _normalize_list_payload(raw: Dict[str, Any]) -> Dict[str, Any]: continue dataset_id = str(kb.get("dify_dataset_id", "") or "").strip() kb_name = str(kb.get("name", "") or "").strip() - if not dataset_id or not kb_name: + if not kb_name: continue + if dataset_id == "null" or not dataset_id: + dataset_id = _DEFAULT_KNOWLEDGE_BASE_ID normalized_bases.append( {"dify_dataset_id": dataset_id, "name": kb_name} ) - normalized_sets.append( - {"name": set_name, "knowledge_bases": normalized_bases} - ) + if normalized_bases: + normalized_sets.append( + {"name": set_name, "knowledge_bases": normalized_bases} + ) return {"knowledge_sets": normalized_sets} @@ -77,7 +82,7 @@ async def fetch_haotian_knowledge_sets_impl( ) headers = {"Authorization": external_authorization} - async with httpx.AsyncClient(timeout=timeout_s, follow_redirects=True) as client: + async with httpx.AsyncClient(timeout=timeout_s, follow_redirects=True, trust_env=False) as client: resp = await client.get(list_url, headers=headers) if resp.status_code >= 400: raise RuntimeError( diff --git a/backend/services/image_service.py b/backend/services/image_service.py index 8decbd541..8a924e9cc 100644 --- a/backend/services/image_service.py +++ b/backend/services/image_service.py @@ -31,7 +31,11 @@ async def proxy_image_impl(decoded_url: str): def get_vlm_model(tenant_id: str): - # Get the tenant config + """Return the configured image understanding model for AnalyzeImageTool. + + The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"] + for compatibility, but it is the user-facing image understanding configuration. + """ vlm_model_config = tenant_config_manager.get_model_config( key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id) if not vlm_model_config: @@ -48,3 +52,27 @@ def get_vlm_model(tenant_id: str): max_tokens=512, ssl_verify=vlm_model_config.get("ssl_verify", True), ) + + +def get_image_understanding_model(tenant_id: str): + return get_vlm_model(tenant_id=tenant_id) + + +def get_video_understanding_model(tenant_id: str): + """Return the configured video understanding model for multimodal tools.""" + vlm_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id) + if not vlm_model_config: + return None + return OpenAIVLModel( + observer=MessageObserver(), + model_id=get_model_name_from_config( + vlm_model_config) if vlm_model_config else "", + api_base=vlm_model_config.get("base_url", ""), + api_key=vlm_model_config.get("api_key", ""), + temperature=0.7, + top_p=0.7, + frequency_penalty=0.5, + max_tokens=512, + ssl_verify=vlm_model_config.get("ssl_verify", True), + ) diff --git a/backend/services/invitation_service.py b/backend/services/invitation_service.py index 58a45d369..4011c67cc 100644 --- a/backend/services/invitation_service.py +++ b/backend/services/invitation_service.py @@ -19,8 +19,15 @@ ) from database.user_tenant_db import get_user_tenant_by_user_id from database.group_db import query_group_ids_by_user +from database.role_permission_db import check_role_permission +from consts.const import ( + ASSET_OWNER_TENANT_ID, + ASSET_OWNER_INVITE_CODE_TYPE, + ENABLE_ASSET_OWNER_ROLE, +) from consts.exceptions import NotFoundException, UnauthorizedError, DuplicateError from services.group_service import get_tenant_default_group_id +from services.asset_owner_visibility import require_asset_owner_enabled from utils.str_utils import convert_string_to_list logger = logging.getLogger(__name__) @@ -41,7 +48,7 @@ def create_invitation_code( Args: tenant_id (str): Tenant ID - code_type (str): Invitation code type (ADMIN_INVITE, DEV_INVITE, USER_INVITE) + code_type (str): Invitation code type (ADMIN_INVITE, DEV_INVITE, USER_INVITE, ASSET_OWNER_INVITE) invitation_code (Optional[str]): Invitation code, auto-generated if None group_ids (Optional[List[int]]): Associated group IDs capacity (int): Invitation code capacity @@ -58,9 +65,21 @@ def create_invitation_code( ValueError: When code_type is invalid """ # Validate code_type - valid_code_types = ["ADMIN_INVITE", "DEV_INVITE", "USER_INVITE"] + valid_code_types = [ + "ADMIN_INVITE", + "DEV_INVITE", + "USER_INVITE", + ASSET_OWNER_INVITE_CODE_TYPE, + ] + if ENABLE_ASSET_OWNER_ROLE: + valid_code_types.append(ASSET_OWNER_INVITE_CODE_TYPE) if code_type not in valid_code_types: - raise ValueError(f"Invalid code_type: {code_type}. Must be one of {valid_code_types}") + raise ValueError( + f"Invalid code_type: {code_type}. Must be one of {valid_code_types}") + + if code_type == ASSET_OWNER_INVITE_CODE_TYPE and not ENABLE_ASSET_OWNER_ROLE: + raise UnauthorizedError( + "ASSET_OWNER feature is not enabled") # Get user information user_info = get_user_tenant_by_user_id(user_id) @@ -70,10 +89,16 @@ def create_invitation_code( user_role = user_info.get("user_role", "USER") # Check permission based on code_type - if code_type == "ADMIN_INVITE" and user_role not in ["SU"]: - raise UnauthorizedError(f"User role {user_role} not authorized to create ADMIN_INVITE codes") + if code_type in ["ADMIN_INVITE", ASSET_OWNER_INVITE_CODE_TYPE] and user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to create ADMIN_INVITE codes") elif code_type in ["DEV_INVITE", "USER_INVITE"] and user_role not in ["SU", "ADMIN"]: - raise UnauthorizedError(f"User role {user_role} not authorized to create {code_type} codes") + raise UnauthorizedError( + f"User role {user_role} not authorized to create {code_type} codes") + + if code_type == ASSET_OWNER_INVITE_CODE_TYPE: + tenant_id = ASSET_OWNER_TENANT_ID + group_ids = [] # Set default group_ids based on code_type if not provided if group_ids is None: @@ -95,7 +120,8 @@ def create_invitation_code( # Check if invitation code already exists if query_invitation_by_code(invitation_code): - raise DuplicateError(f"Invitation code '{invitation_code}' already exists") + raise DuplicateError( + f"Invitation code '{invitation_code}' already exists") # Create invitation (status will be set automatically) invitation_id = add_invitation( @@ -112,11 +138,13 @@ def create_invitation_code( # Automatically update status based on expiry date and capacity update_invitation_code_status(invitation_id) - logger.info(f"Created invitation code {invitation_code} (type: {code_type}) for tenant {tenant_id} by user {user_id}") + logger.info( + f"Created invitation code {invitation_code} (type: {code_type}) for tenant {tenant_id} by user {user_id}") # Get the final invitation info with correct status invitation_info = query_invitation_by_id(invitation_id) - normalized_info = _normalize_invitation_data(invitation_info) if invitation_info else None + normalized_info = _normalize_invitation_data( + invitation_info) if invitation_info else None return { "invitation_id": invitation_id, @@ -154,8 +182,18 @@ def update_invitation_code( raise UnauthorizedError(f"User {user_id} not found") user_role = user_info.get("user_role", "USER") - if user_role not in ["SU", "ADMIN"]: - raise UnauthorizedError(f"User role {user_role} not authorized to update invitation codes") + + invitation_info = query_invitation_by_id(invitation_id) + if not invitation_info: + raise NotFoundException(f"Invitation {invitation_id} not found") + + code_type = invitation_info.get("code_type") + if code_type == ASSET_OWNER_INVITE_CODE_TYPE and user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to update invitation codes") + elif user_role not in ["SU", "ADMIN"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to update invitation codes") # Update invitation code success = modify_invitation( @@ -165,7 +203,8 @@ def update_invitation_code( ) if success: - logger.info(f"Updated invitation code {invitation_id} by user {user_id}") + logger.info( + f"Updated invitation code {invitation_id} by user {user_id}") # Automatically update status after successful update update_invitation_code_status(invitation_id) @@ -193,15 +232,19 @@ def delete_invitation_code(invitation_id: int, user_id: str) -> bool: raise UnauthorizedError(f"User {user_id} not found") user_role = user_info.get("user_role", "USER") - if user_role not in ["SU", "ADMIN"]: - raise UnauthorizedError( - f"User role {user_role} not authorized to delete invitation codes") - # Check if invitation exists invitation_info = query_invitation_by_id(invitation_id) if not invitation_info: raise NotFoundException(f"Invitation {invitation_id} not found") + code_type = invitation_info.get("code_type") + if code_type == ASSET_OWNER_INVITE_CODE_TYPE and user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to delete invitation codes") + elif user_role not in ["SU", "ADMIN"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to delete invitation codes") + # Delete invitation code success = remove_invitation( invitation_id=invitation_id, updated_by=user_id) @@ -306,7 +349,8 @@ def _calculate_current_status(invitation_data: Dict[str, Any]) -> Dict[str, Any] if current_time.date() > expiry_datetime.date(): new_status = "EXPIRE" except (ValueError, AttributeError, TypeError): - logger.warning(f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") + logger.warning( + f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") # Check capacity if usage_count >= capacity: @@ -346,7 +390,7 @@ def use_invitation_code( ) -> Dict[str, Any]: """ Use an invitation code by creating a usage record. - + Args: invitation_code (str): Invitation code to use user_id (str): User ID using the code @@ -359,7 +403,8 @@ def use_invitation_code( """ # Check if invitation is available if not check_invitation_available(invitation_code): - raise NotFoundException(f"Invitation code {invitation_code} is not available") + raise NotFoundException( + f"Invitation code {invitation_code} is not available") # Get invitation code details invitation_info = query_invitation_by_code(invitation_code) @@ -426,7 +471,8 @@ def update_invitation_code_status(invitation_id: int) -> bool: if current_time.date() > expiry_datetime.date(): new_status = "EXPIRE" except (ValueError, AttributeError, TypeError): - logger.warning(f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") + logger.warning( + f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}") # Check capacity if not expired if new_status == "IN_USE" and usage_count >= capacity: @@ -439,7 +485,8 @@ def update_invitation_code_status(invitation_id: int) -> bool: updates={"status": new_status}, updated_by="system" ) - logger.info(f"Updated invitation code {invitation_id} status to {new_status}") + logger.info( + f"Updated invitation code {invitation_id} status to {new_status}") return True return False @@ -468,7 +515,8 @@ def _generate_unique_invitation_code(length: int = 6) -> str: attempts += 1 - raise RuntimeError(f"Failed to generate unique invitation code after {max_attempts} attempts") + raise RuntimeError( + f"Failed to generate unique invitation code after {max_attempts} attempts") def get_invitations_list( @@ -506,9 +554,13 @@ def get_invitations_list( # Permission logic: # - If tenant_id is provided: ADMIN or SU can view that tenant's invitations # - If tenant_id is not provided: Only SU can view all invitations - if tenant_id: - # If tenant_id is specified, user must be ADMIN/SU - if user_role not in ["SU", "ADMIN"]: + if tenant_id is not None: + # ASSET_OWNER_TENANT_ID virtual tenant_id is used for asset-owner invites (SU only) + if tenant_id == ASSET_OWNER_TENANT_ID: + if user_role not in ["SU"]: + raise UnauthorizedError( + f"User role {user_role} not authorized to view asset owner invitations") + elif user_role not in ["SU", "ADMIN"]: raise UnauthorizedError( f"User role {user_role} not authorized to view invitation lists") else: @@ -531,6 +583,7 @@ def get_invitations_list( # Normalize each invitation item in the list if result and "items" in result: - result["items"] = [_normalize_invitation_data(item) for item in result["items"]] + result["items"] = [_normalize_invitation_data( + item) for item in result["items"]] return result diff --git a/backend/services/mcp_management_service.py b/backend/services/mcp_management_service.py new file mode 100644 index 000000000..a62de250a --- /dev/null +++ b/backend/services/mcp_management_service.py @@ -0,0 +1,334 @@ +import logging +from datetime import datetime +from typing import Any, Dict, List +from urllib.parse import urlencode + +import aiohttp + +from consts.exceptions import ( + MCPConnectionError, + McpNotFoundError, + McpValidationError, +) +from database.community_mcp_db import ( + create_mcp_community_record, + delete_mcp_community_record_by_id, + get_mcp_community_record_by_id_and_tenant, + get_mcp_community_records, + get_mcp_community_tag_stats, + list_mcp_community_records_by_tenant, + update_mcp_community_record_by_id, +) +from database.remote_mcp_db import get_mcp_record_by_id_and_tenant + +logger = logging.getLogger("mcp_management_service") + +MCP_REGISTRY_BASE_URL = "https://registry.modelcontextprotocol.io/v0.1/servers" + + +# --------------------------------------------------------------------------- +# Community MCP Service Functions +# --------------------------------------------------------------------------- + +async def list_community_mcp_services( + *, + search: str | None = None, + tag: str | None = None, + transport_type: str | None = None, + cursor: str | None = None, + limit: int = 30, +) -> Dict[str, Any]: + """List public community MCP services. + + Args: + search: Search keyword + tag: Filter by tag + transport_type: Filter by transport (url or container) + cursor: Pagination cursor + limit: Items per page + + Returns: + Dictionary with count, nextCursor, and items + """ + db_result = get_mcp_community_records( + search=search, + tag=tag, + transport_type=transport_type, + cursor=cursor, + limit=limit, + ) + + raw_items = db_result.get("items", []) + items = [] + for item in raw_items: + items.append({ + "communityId": item.get("community_id"), + "name": item.get("mcp_name"), + "version": item.get("version"), + "description": item.get("description"), + "status": "active", + "createdAt": item.get("create_time"), + "updatedAt": item.get("update_time"), + "source": "community", + "transportType": item.get("transport_type"), + "serverUrl": item.get("mcp_server"), + "configJson": item.get("config_json") if isinstance(item.get("config_json"), dict) else None, + "registryJson": item.get("registry_json") if isinstance(item.get("registry_json"), dict) else None, + "tags": item.get("tags") or [], + }) + return { + "count": len(items), + "nextCursor": db_result.get("nextCursor"), + "items": items, + } + + +def list_community_mcp_tag_stats() -> List[Dict[str, Any]]: + """Get community MCP tag statistics. + + Args: + tenant_id: Tenant ID + + Returns: + List of tag statistics + """ + return get_mcp_community_tag_stats() + + +async def publish_community_mcp_service( + *, + tenant_id: str, + user_id: str, + mcp_id: int, + name: str | None = None, + description: str | None = None, + version: str | None = None, + tags: List[str] | None = None, + mcp_server: str | None = None, + config_json: Dict[str, Any] | None = None, +) -> int: + """Publish a local MCP service to the community. + + Optional ``name`` / ``description`` / ``version`` / ``tags`` / ``mcp_server`` / + ``config_json`` override the values copied from the local MCP row when creating + the community record. Omit an optional field (``None``) to keep the local MCP + value for that field. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID to publish + name: Optional community display name override + description: Optional description override + version: Optional version override + tags: Optional tags override + mcp_server: Optional remote MCP URL override + config_json: Optional container config override + + Returns: + Community record ID + + Raises: + McpNotFoundError: If MCP record is not found + """ + source_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not source_record: + raise McpNotFoundError("MCP record not found") + + source_registry_json = source_record.get("registry_json") if isinstance(source_record.get("registry_json"), dict) else None + source_config_json = source_record.get("config_json") if isinstance(source_record.get("config_json"), dict) else None + + final_name = name if name is not None else source_record.get("mcp_name") + final_description = description if description is not None else source_record.get("description") + final_version = version if version is not None else source_record.get("version") + final_tags = tags if tags is not None else source_record.get("tags") + final_mcp_server = ( + mcp_server if mcp_server is not None else source_record.get("mcp_server") + ) + final_config_json = ( + config_json if isinstance(config_json, dict) else source_config_json + ) + + # Remote MCP table may omit transport_type; community list still needs it for filters. + community_transport_type = "container" if final_config_json is not None else "url" + + community_id = create_mcp_community_record( + mcp_data={ + "mcp_name": final_name, + "mcp_server": final_mcp_server, + "version": final_version, + "registry_json": source_registry_json, + "transport_type": source_record.get("transport_type") or community_transport_type, + "config_json": final_config_json, + "tags": final_tags, + "description": final_description, + }, + tenant_id=tenant_id, + user_id=user_id, + ) + return community_id + + +async def update_community_mcp_service( + *, + tenant_id: str, + user_id: str, + community_id: int, + name: str | None, + description: str | None, + tags: List[str] | None, + version: str | None, + registry_json: Dict[str, Any] | None, +) -> None: + """Update a community MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + community_id: Community record ID + name: New MCP service name + description: MCP service description + tags: MCP tags + version: MCP version + registry_json: Registry metadata JSON + + Raises: + McpNotFoundError: If community MCP record is not found + """ + current = get_mcp_community_record_by_id_and_tenant(community_id=community_id, tenant_id=tenant_id) + if not current: + raise McpNotFoundError("Community MCP record not found") + + existing_config_json = current.get("config_json") if isinstance(current.get("config_json"), dict) else None + next_registry_json = registry_json if isinstance(registry_json, dict) else current.get("registry_json") + next_config_json = existing_config_json if isinstance(existing_config_json, dict) else None + + update_mcp_community_record_by_id( + community_id=community_id, + tenant_id=tenant_id, + user_id=user_id, + name=name, + description=description, + tags=tags, + version=version, + registry_json=next_registry_json, + config_json=next_config_json, + ) + + +async def delete_community_mcp_service( + *, + tenant_id: str, + user_id: str, + community_id: int, +) -> None: + """Delete a community MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + community_id: Community record ID + + Raises: + McpNotFoundError: If community MCP record is not found + """ + current = get_mcp_community_record_by_id_and_tenant(community_id=community_id, tenant_id=tenant_id) + if not current: + raise McpNotFoundError("Community MCP record not found") + delete_mcp_community_record_by_id( + community_id=community_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + +async def list_my_community_mcp_services( + *, + tenant_id: str, +) -> Dict[str, Any]: + """List MCP services published by the current user to the community. + + Args: + tenant_id: Tenant ID + + Returns: + Dictionary with count and items + """ + rows = list_mcp_community_records_by_tenant(tenant_id=tenant_id) + items = [] + for row in rows: + items.append({ + "communityId": row.get("community_id"), + "name": row.get("mcp_name"), + "version": row.get("version"), + "description": row.get("description"), + "status": "active", + "createdAt": row.get("create_time"), + "updatedAt": row.get("update_time"), + "source": "community", + "transportType": row.get("transport_type"), + "serverUrl": row.get("mcp_server"), + "configJson": row.get("config_json") if isinstance(row.get("config_json"), dict) else None, + "registryJson": row.get("registry_json") if isinstance(row.get("registry_json"), dict) else None, + "tags": row.get("tags") or [], + }) + return { + "count": len(items), + "items": items, + } + + +# --------------------------------------------------------------------------- +# Registry Functions +# --------------------------------------------------------------------------- + +async def list_registry_mcp_services( + *, + search: str | None = None, + include_deleted: bool = False, + updated_since: str | None = None, + version: str | None = None, + cursor: str | None = None, + limit: int = 30, +) -> Dict[str, Any]: + """List MCP services from the official MCP Registry. + + Args: + search: Search keyword + include_deleted: Include deleted records + updated_since: Filter by update time + version: Filter by version + cursor: Pagination cursor + limit: Items per page + + Returns: + Dictionary with servers and metadata + """ + params: Dict[str, Any] = {"limit": limit} + if search: + params["search"] = search + if include_deleted: + params["include_deleted"] = "true" + if updated_since: + params["updated_since"] = updated_since + if version: + params["version"] = version + if cursor: + params["cursor"] = cursor + + request_url = f"{MCP_REGISTRY_BASE_URL}?{urlencode(params)}" + timeout = aiohttp.ClientTimeout(total=20) + + async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: + async with session.get(request_url) as response: + if response.status >= 400: + raise RuntimeError(f"Registry request failed with status {response.status}") + payload = await response.json(content_type=None) + + raw_servers = payload.get("servers") if isinstance(payload, dict) else [] + metadata = payload.get("metadata") if isinstance(payload, dict) and isinstance(payload.get("metadata"), dict) else {} + + return { + "servers": raw_servers if isinstance(raw_servers, list) else [], + "metadata": metadata, + } diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py index a20b2a6ca..109f92ad0 100644 --- a/backend/services/model_health_service.py +++ b/backend/services/model_health_service.py @@ -15,6 +15,11 @@ logger = logging.getLogger("model_health_service") +DASHSCOPE_MODEL_FACTORY = "dashscope" +TOKENPONY_MODEL_FACTORY = "tokenpony" +PROVIDER_CATALOG_HEALTHCHECK_FACTORIES = {DASHSCOPE_MODEL_FACTORY, TOKENPONY_MODEL_FACTORY} +PROVIDER_CATALOG_HEALTHCHECK_TYPES = {"vlm", "vlm2", "vlm3"} + def _mask_secret(value: Optional[str]) -> str: """Mask a secret value, showing only first and last 4 characters.""" @@ -29,6 +34,7 @@ async def _embedding_dimension_check( model_base_url: str, model_api_key: str, ssl_verify: bool = True, + timeout_seconds: Optional[float] = None, ): # Test connectivity based on different model types if model_type == "embedding": @@ -38,6 +44,7 @@ async def _embedding_dimension_check( api_key=model_api_key, embedding_dim=0, ssl_verify=ssl_verify, + timeout_seconds=timeout_seconds, ).dimension_check() if len(embedding) > 0: return len(embedding[0]) @@ -51,6 +58,7 @@ async def _embedding_dimension_check( api_key=model_api_key, embedding_dim=0, ssl_verify=ssl_verify, + timeout_seconds=timeout_seconds, ).dimension_check() if len(embedding) > 0: return len(embedding[0]) @@ -61,6 +69,31 @@ async def _embedding_dimension_check( raise ValueError(f"Unsupported model type: {model_type}") +async def _provider_catalog_connectivity_check( + model_name: str, + model_type: str, + model_api_key: str, + model_factory: Optional[str], +) -> bool: + """Validate provider-managed multimodal models through their model catalog.""" + provider = (model_factory or "").lower() + if provider not in PROVIDER_CATALOG_HEALTHCHECK_FACTORIES: + return False + + from services.model_provider_service import get_provider_models + + model_list = await get_provider_models({ + "provider": provider, + "model_type": model_type, + "api_key": model_api_key, + }) + if not model_list or any(model.get("_error") for model in model_list): + return False + + expected_model_id = model_name.lower() + return any(str(model.get("id", "")).lower() == expected_model_id for model in model_list) + + async def _perform_connectivity_check( model_name: str, model_type: str, @@ -71,6 +104,7 @@ async def _perform_connectivity_check( model_appid: Optional[str] = None, access_token: Optional[str] = None, display_name: Optional[str] = None, + timeout_seconds: Optional[float] = None, ) -> bool: """ Perform specific model connectivity check @@ -80,6 +114,8 @@ async def _perform_connectivity_check( model_base_url: Model base URL model_api_key: API key ssl_verify: Whether to verify SSL certificates (default: True) + display_name: Optional display name for monitoring + timeout_seconds: Optional request timeout in seconds Returns: bool: Connectivity check result """ @@ -91,21 +127,23 @@ async def _perform_connectivity_check( # Test connectivity based on different model types if model_type == "embedding": - connectivity = len(await OpenAICompatibleEmbedding( + embedding = OpenAICompatibleEmbedding( model_name=model_name, base_url=model_base_url, api_key=model_api_key, embedding_dim=0, - ssl_verify=ssl_verify - ).dimension_check()) > 0 + ssl_verify=ssl_verify, + ) + connectivity = len(await embedding.dimension_check(timeout=timeout_seconds if timeout_seconds else 5.0)) > 0 elif model_type == "multi_embedding": - connectivity = len(await JinaEmbedding( + embedding = JinaEmbedding( model_name=model_name, base_url=model_base_url, api_key=model_api_key, embedding_dim=0, - ssl_verify=ssl_verify - ).dimension_check()) > 0 + ssl_verify=ssl_verify, + ) + connectivity = len(await embedding.dimension_check(timeout=timeout_seconds if timeout_seconds else 5.0)) > 0 elif model_type == "llm": observer = MessageObserver() set_monitoring_operation("connectivity_check", @@ -115,7 +153,8 @@ async def _perform_connectivity_check( model_id=model_name, api_base=model_base_url, api_key=model_api_key, - ssl_verify=ssl_verify + ssl_verify=ssl_verify, + timeout_seconds=timeout_seconds, ).check_connectivity() elif model_type == "rerank": rerank_model = OpenAICompatibleRerank( @@ -125,7 +164,19 @@ async def _perform_connectivity_check( ssl_verify=ssl_verify, ) connectivity = await rerank_model.connectivity_check() - elif model_type == "vlm": + elif model_type in ("vlm", "vlm2", "vlm3"): + if ( + model_type in PROVIDER_CATALOG_HEALTHCHECK_TYPES + and (model_factory or "").lower() in PROVIDER_CATALOG_HEALTHCHECK_FACTORIES + ): + connectivity = await _provider_catalog_connectivity_check( + model_name=model_name, + model_type=model_type, + model_api_key=model_api_key, + model_factory=model_factory, + ) + return connectivity + observer = MessageObserver() set_monitoring_operation("connectivity_check", display_name=display_name) @@ -139,7 +190,6 @@ async def _perform_connectivity_check( elif model_type == 'stt': voice_service = get_voice_service() - # Determine STT provider based on model_factory use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"] @@ -164,16 +214,43 @@ async def _perform_connectivity_check( "model": model_name } ) + elif model_type == 'tts': + voice_service = get_voice_service() + + # Determine TTS provider based on model_factory + use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"] + + if use_volc: + # Use Volcano TTS with appid and access_token + connectivity = await voice_service.check_voice_connectivity( + model_type="tts", + stt_config={ + "model_factory": model_factory, + "model_appid": model_appid, + "access_token": access_token, + "base_url": model_base_url + } + ) + else: + # Use Ali TTS (default) with api_key and model name + connectivity = await voice_service.check_voice_connectivity( + model_type="tts", + stt_config={ + "api_key": model_api_key, + "base_url": model_base_url, + "model": model_name + } + ) else: raise ValueError(f"Unsupported model type: {model_type}") return connectivity -async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: +async def check_model_connectivity(display_name: str, tenant_id: str, model_type: str = None) -> dict: try: # Query the database using display_name and tenant context from app layer - model = get_model_by_display_name(display_name, tenant_id=tenant_id) + model = get_model_by_display_name(display_name, tenant_id=tenant_id, model_type=model_type) if not model: raise LookupError( f"Model configuration not found for {display_name}") @@ -192,14 +269,22 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: model_factory = model.get("model_factory") model_appid = model.get("model_appid") access_token = model.get("access_token") + timeout_seconds = model.get("timeout_seconds") try: set_monitoring_context(tenant_id=tenant_id) + ssl_verify_fallback = False connectivity = await _perform_connectivity_check( model_name, model_type, model_base_url, model_api_key, ssl_verify, - model_factory, model_appid, access_token,display_name=display_name, + model_factory, model_appid, access_token, display_name, timeout_seconds, ) + if not connectivity and ssl_verify: + ssl_verify_fallback = True + connectivity = await _perform_connectivity_check( + model_name, model_type, model_base_url, model_api_key, False, + model_factory, model_appid, access_token, display_name, timeout_seconds, + ) except Exception as e: update_data = { "connect_status": ModelConnectStatusEnum.UNAVAILABLE.value} @@ -215,6 +300,8 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: f"UNCONNECTED: {model_name}") connect_status = ModelConnectStatusEnum.AVAILABLE.value if connectivity else ModelConnectStatusEnum.UNAVAILABLE.value update_data = {"connect_status": connect_status} + if ssl_verify_fallback: + update_data["ssl_verify"] = False update_model_record(model["model_id"], update_data) return { "connectivity": connectivity, @@ -245,16 +332,18 @@ async def verify_model_config_connectivity(model_config: dict): model_factory = model_config.get("model_factory") model_appid = model_config.get("model_appid") access_token = model_config.get("access_token") + # Get timeout from model config if present + timeout_seconds = model_config.get("timeout_seconds") try: connectivity = await _perform_connectivity_check( model_name, model_type, model_base_url, model_api_key, ssl_verify, - model_factory, model_appid, access_token + model_factory, model_appid, access_token, None, timeout_seconds, ) if not connectivity and ssl_verify: connectivity = await _perform_connectivity_check( model_name, model_type, model_base_url, model_api_key, False, - model_factory, model_appid, access_token + model_factory, model_appid, access_token, None, timeout_seconds, ) if not connectivity: error_msg = f"Failed to connect to model '{model_name}' at {model_base_url}. Please verify the URL, API key, and network connection." @@ -296,9 +385,17 @@ async def embedding_dimension_check(model_config: dict): try: ssl_verify = model_config.get("ssl_verify", True) + timeout_seconds = model_config.get("timeout_seconds") dimension = await _embedding_dimension_check( - model_name, model_type, model_base_url, model_api_key, ssl_verify + model_name, model_type, model_base_url, model_api_key, ssl_verify, + timeout_seconds=timeout_seconds ) + # Fallback to ssl_verify=False if initial check fails + if dimension == 0 and ssl_verify: + dimension = await _embedding_dimension_check( + model_name, model_type, model_base_url, model_api_key, False, + timeout_seconds=timeout_seconds + ) return dimension except ValueError as e: logger.error(f"Error checking embedding dimension: {str(e)}") diff --git a/backend/services/model_management_service.py b/backend/services/model_management_service.py index d012803be..9f032728a 100644 --- a/backend/services/model_management_service.py +++ b/backend/services/model_management_service.py @@ -8,15 +8,15 @@ from database.model_management_db import ( create_model_record, delete_model_record, - get_model_by_display_name, + get_model_by_name_factory, get_models_by_display_name, get_model_records, get_models_by_tenant_factory_type, - update_model_record, + update_model_record ) from services.model_provider_service import ( prepare_model_dict, - merge_existing_model_tokens, + merge_existing_model_attributes, get_provider_models, ) from services.model_health_service import embedding_dimension_check @@ -31,6 +31,23 @@ logger = logging.getLogger("model_management_service") +INDEPENDENT_MULTIMODAL_MODEL_TYPES = {"vlm", "vlm2", "vlm3"} + + +def _has_display_name_conflict(existing_models: List[Dict[str, Any]], model_type: Optional[str]) -> bool: + """Allow the three multimodal slots to share display names across slots.""" + if not existing_models: + return False + + if model_type in INDEPENDENT_MULTIMODAL_MODEL_TYPES: + return any( + existing.get("model_type") == model_type + or existing.get("model_type") not in INDEPENDENT_MULTIMODAL_MODEL_TYPES + for existing in existing_models + ) + + return True + async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict[str, Any]): """Create a single model record for the given tenant. @@ -45,9 +62,19 @@ async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict model_base_url.replace(LOCALHOST_NAME, DOCKER_INTERNAL_HOST) .replace(LOCALHOST_IP, DOCKER_INTERNAL_HOST) ) - model_data['ssl_verify'] = True + # Auto-set ssl_verify based on api_key: + # - Empty api_key (local/LAN services) -> ssl_verify=False + # - "open/router" URL -> ssl_verify=False + # - Otherwise -> ssl_verify=True + model_api_key = model_data.get("api_key", "") + if not model_api_key or "open/router" in model_base_url: + model_data["ssl_verify"] = False + else: + model_data["ssl_verify"] = True + + # Set model_factory to modelengine when using open/router URL if "open/router" in model_base_url: - model_data['ssl_verify'] = False + model_data["model_factory"] = "modelengine" # Split model_name into repo and name model_repo, model_name = split_repo_name( model_data["model_name"]) if model_data.get("model_name") else ("", "") @@ -66,9 +93,9 @@ async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict # Check display name conflict scoped by tenant if model_data.get("display_name"): - existing_model_by_display = get_model_by_display_name( + existing_models_by_display = get_models_by_display_name( model_data["display_name"], tenant_id) - if existing_model_by_display: + if _has_display_name_conflict(existing_models_by_display, model_data.get("model_type")): logging.error( f"Name {model_data['display_name']} is already in use, please choose another display name") raise ValueError( @@ -114,8 +141,8 @@ async def create_provider_models_for_tenant(tenant_id: str, provider_request: Di # Get provider model list model_list = await get_provider_models(provider_request) - # Merge existing model's max_tokens attribute - model_list = merge_existing_model_tokens( + # Merge existing model's attributes (max_tokens, api_key, timeout_seconds, concurrency_limit) + model_list = merge_existing_model_attributes( model_list, tenant_id, provider_request["provider"], provider_request["model_type"]) # Sort model list by ID @@ -153,6 +180,13 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay tenant_id, provider, model_type) model_list_ids = {model.get("id") for model in model_list} if model_list else set() + existing_model_map = { + add_repo_to_name( + model_repo=model["model_repo"], + model_name=model["model_name"], + ): model + for model in existing_model_list + } # Delete existing models not present for model in existing_model_list: @@ -162,21 +196,20 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay # Create or update new models for model in model_list: + model["model_type"] = model_type _, model_name = split_repo_name( model["id"]) if model.get("id") else ("", "") model_repo, model_name_only = split_repo_name( model.get("id", "")) if model.get("id") else ("", "") model_display_name = add_repo_to_name(model_repo, model_name_only) if model_name: - existing_model_by_display = get_model_by_display_name( - model_display_name, tenant_id) - if existing_model_by_display: + existing_model = existing_model_map.get(model_display_name) + if existing_model: # Check if max_tokens has changed - existing_max_tokens = existing_model_by_display.get( - "max_tokens") + existing_max_tokens = existing_model.get("max_tokens") new_max_tokens = model.get("max_tokens") if new_max_tokens is not None and existing_max_tokens != new_max_tokens: - update_model_record(existing_model_by_display["model_id"], { + update_model_record(existing_model["model_id"], { "max_tokens": new_max_tokens}, user_id) continue @@ -251,6 +284,15 @@ async def update_single_model_for_tenant( m.get("model_type") == "multi_embedding" for m in existing_models ) + # Auto-set ssl_verify based on api_key if provided: + # - Empty api_key -> ssl_verify=False + # - Otherwise -> ssl_verify=True + if "api_key" in model_data: + if not model_data["api_key"]: + model_data["ssl_verify"] = False + else: + model_data["ssl_verify"] = True + if has_multi_embedding: # Update both embedding and multi_embedding records for model in existing_models: @@ -276,12 +318,36 @@ async def update_single_model_for_tenant( async def batch_update_models_for_tenant(user_id: str, tenant_id: str, model_list: List[Dict[str, Any]]): - """Batch update models for a tenant.""" + """Batch update models for a tenant by model_id or model_name.""" try: for model in model_list: - update_model_record(model["model_id"], model, user_id, tenant_id) + # Build update data excluding id fields + update_data = {k: v for k, v in model.items() if k not in ["model_id", "model_name"]} + + model_id_or_name = model.get("model_id") or model.get("model_name") + + # Check if model_id is a numeric string (primary key) + if model_id_or_name and model_id_or_name.isdigit(): + update_model_record(int(model_id_or_name), update_data, user_id, tenant_id) + else: + # Parse "model_repo/model_name" format from frontend's model_id field + if "/" in model_id_or_name: + model_repo, model_name = model_id_or_name.split("/", 1) + else: + model_repo = None + model_name = model_id_or_name + + logging.info(f"[DEBUG] Updating model by name: model_name={model_name}, model_repo={model_repo}, tenant_id={tenant_id}") + + # Query to get model_id first, then update by primary key + model_record = get_model_by_name_factory(model_name, model_repo, tenant_id) + if not model_record: + logging.warning(f"Model not found: model_name={model_name}, model_repo={model_repo}, tenant_id={tenant_id}") + continue + + update_model_record(model_record["model_id"], update_data, user_id, tenant_id) - logging.debug("Batch update models successfully") + logging.info("[DEBUG] Batch update models successfully") except Exception as e: logging.error(f"Failed to batch update models: {str(e)}") raise Exception(f"Failed to batch update models: {str(e)}") diff --git a/backend/services/model_provider_service.py b/backend/services/model_provider_service.py index dbff17082..9b9f26bd4 100644 --- a/backend/services/model_provider_service.py +++ b/backend/services/model_provider_service.py @@ -100,11 +100,13 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a # Build the canonical representation using the existing Pydantic schema for # consistency of validation and default handling. # For embedding/multi_embedding models, max_tokens will be set via connectivity check later, - # so use 0 as placeholder if not provided + # so use 0 as placeholder if not provided. + # Set default timeout_seconds to 120 for LLM models (embedding models don't need it). model_type = model["model_type"] is_embedding_type = model_type in ["embedding", "multi_embedding"] max_tokens_value = model.get( "max_tokens", 0) if not is_embedding_type else 0 + timeout_seconds_value = 120 if not is_embedding_type else None model_obj = ModelRequest( model_factory=provider, @@ -115,7 +117,8 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a display_name=model_display_name, expected_chunk_size=expected_chunk_size, maximum_chunk_size=maximum_chunk_size, - chunk_batch=chunk_batch + chunk_batch=chunk_batch, + timeout_seconds=timeout_seconds_value ) model_dict = model_obj.model_dump() @@ -155,19 +158,29 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a return model_dict -def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider: str, model_type: str) -> List[dict]: +def merge_existing_model_attributes( + model_list: List[dict], + tenant_id: str, + provider: str, + model_type: str, + fields: List[str] = None +) -> List[dict]: """ - Merge existing model's max_tokens attribute into the model list. + Merge existing model's attributes into the model list. Args: model_list: List of models tenant_id: Tenant ID provider: Provider model_type: Model type + fields: List of fields to merge (defaults to max_tokens, api_key, timeout_seconds, concurrency_limit) Returns: List[dict]: Merged model list """ + if fields is None: + fields = ["max_tokens", "api_key", "timeout_seconds", "concurrency_limit"] + if model_type == "embedding" or model_type == "multi_embedding": return model_list @@ -184,15 +197,35 @@ def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider "/" + existing_model["model_name"] existing_model_map[model_full_name] = existing_model - # Iterate through the model list, if the model exists in the existing model list, add max_tokens attribute + # Iterate through the model list, merge specified fields from existing models for model in model_list: if model.get("id") in existing_model_map: - model["max_tokens"] = existing_model_map[model.get( - "id")].get("max_tokens") + existing_model = existing_model_map[model.get("id")] + for field in fields: + if existing_model.get(field) is not None: + model[field] = existing_model.get(field) return model_list +def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider: str, model_type: str) -> List[dict]: + """ + Merge existing model's max_tokens attribute into the model list. + + DEPRECATED: Use merge_existing_model_attributes instead. + + Args: + model_list: List of models + tenant_id: Tenant ID + provider: Provider + model_type: Model type + + Returns: + List[dict]: Merged model list + """ + return merge_existing_model_attributes(model_list, tenant_id, provider, model_type, ["max_tokens"]) + + # Re-export provider classes for backward compatibility __all__ = [ "AbstractModelProvider", @@ -200,6 +233,7 @@ def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider "ModelEngineProvider", "prepare_model_dict", "merge_existing_model_tokens", + "merge_existing_model_attributes", "get_provider_models", "get_model_engine_raw_url", ] diff --git a/backend/services/oauth_service.py b/backend/services/oauth_service.py index 0083ad9ec..fe2aa0c42 100644 --- a/backend/services/oauth_service.py +++ b/backend/services/oauth_service.py @@ -3,24 +3,32 @@ import os import secrets import ssl +import time import urllib.request from typing import Any, Dict, List, Optional from urllib.parse import urlencode, quote +import jwt +from pydantic import EmailStr, TypeAdapter, ValidationError as PydanticValidationError + from consts.const import ( + ASSET_OWNER_INVITE_CODE_TYPE, + ASSET_OWNER_ROLE, + ASSET_OWNER_TENANT_ID, DEFAULT_TENANT_ID, OAUTH_CALLBACK_BASE_URL, OAUTH_SSL_VERIFY, OAUTH_CA_BUNDLE, + SUPABASE_JWT_SECRET, ) from consts.exceptions import OAuthLinkError, OAuthProviderError +from services.asset_owner_visibility import require_asset_owner_enabled from consts.oauth_providers import ( get_all_provider_definitions, get_provider_definition, is_provider_enabled, ) from database.oauth_account_db import ( - count_oauth_accounts_by_user_id, delete_oauth_account, get_oauth_account_by_provider, get_soft_deleted_oauth_account, @@ -33,6 +41,10 @@ logger = logging.getLogger(__name__) +OAUTH_PENDING_EXPIRE_SECONDS = 10 * 60 +OAUTH_PENDING_PURPOSE = "oauth_account_completion" +_EMAIL_ADAPTER = TypeAdapter(EmailStr) + def _build_ssl_context() -> ssl.SSLContext: if OAUTH_CA_BUNDLE and os.path.isfile(OAUTH_CA_BUNDLE): @@ -246,12 +258,240 @@ def get_provider_user_info( except Exception: logger.warning(f"Failed to fetch {provider} user emails") - if result.get("email", "") == "": - result["email"] = f"{result['username']}@nexent.com" - return result +def generate_pending_oauth_token( + provider: str, + provider_user_id: str, + provider_email: Optional[str] = None, + provider_username: Optional[str] = None, + expires_in: int = OAUTH_PENDING_EXPIRE_SECONDS, +) -> str: + if not SUPABASE_JWT_SECRET: + raise OAuthProviderError("JWT verification is not configured") + + now = int(time.time()) + payload = { + "purpose": OAUTH_PENDING_PURPOSE, + "provider": provider, + "provider_user_id": provider_user_id, + "provider_email": provider_email or "", + "provider_username": provider_username or "", + "iat": now, + "exp": now + expires_in, + } + return jwt.encode(payload, SUPABASE_JWT_SECRET, algorithm="HS256") + + +def parse_pending_oauth_token(pending_token: str) -> Dict[str, str]: + if not pending_token: + raise OAuthLinkError("OAuth account completion session is missing") + if not SUPABASE_JWT_SECRET: + raise OAuthProviderError("JWT verification is not configured") + + try: + payload = jwt.decode( + pending_token, + SUPABASE_JWT_SECRET, + algorithms=["HS256"], + options={"verify_exp": True, "verify_aud": False}, + ) + except jwt.ExpiredSignatureError as exc: + raise OAuthLinkError("OAuth account completion session has expired") from exc + except jwt.InvalidTokenError as exc: + raise OAuthLinkError("OAuth account completion session is invalid") from exc + + if payload.get("purpose") != OAUTH_PENDING_PURPOSE: + raise OAuthLinkError("OAuth account completion session is invalid") + if not payload.get("provider") or not payload.get("provider_user_id"): + raise OAuthLinkError("OAuth account completion session is incomplete") + + return { + "provider": str(payload.get("provider", "")), + "provider_user_id": str(payload.get("provider_user_id", "")), + "provider_email": str(payload.get("provider_email", "")), + "provider_username": str(payload.get("provider_username", "")), + } + + +def get_pending_oauth_info(pending_token: str) -> Dict[str, Any]: + payload = parse_pending_oauth_token(pending_token) + provider_email = payload.get("provider_email") or "" + return { + "provider": payload["provider"], + "provider_username": payload.get("provider_username") or "", + "provider_email": provider_email, + "email_required": not bool(provider_email), + } + + +def _validate_email(email: Optional[str]) -> str: + if not email: + raise OAuthLinkError("Email is required") + try: + return str(_EMAIL_ADAPTER.validate_python(email)).lower() + except PydanticValidationError as exc: + raise OAuthLinkError("Invalid email address") from exc + + +def find_supabase_user_id_by_email( + admin_client: Any, email: Optional[str] +) -> Optional[str]: + if not email: + return None + + page = 1 + while True: + users_resp = admin_client.auth.admin.list_users(page=page, per_page=100) + users = getattr(users_resp, "users", users_resp) + if users is None: + users = [] + if not users: + return None + for user in users: + user_email = getattr(user, "email", "") + if user_email and user_email.lower() == email.lower(): + return user.id + if len(users) < 100: + return None + page += 1 + + +def _role_from_invitation_type(code_type: str) -> str: + if code_type == "ADMIN_INVITE": + return "ADMIN" + if code_type == "DEV_INVITE": + return "DEV" + if code_type == ASSET_OWNER_INVITE_CODE_TYPE: + require_asset_owner_enabled() + return ASSET_OWNER_ROLE + return "USER" + + +async def complete_pending_oauth_account( + pending_token: str, + password: str, + invite_code: str, + email: Optional[str] = None, +) -> Dict[str, Any]: + from services.group_service import add_user_to_groups + from services.invitation_service import ( + check_invitation_available, + get_invitation_by_code, + use_invitation_code, + ) + from services.tool_configuration_service import init_tool_list_for_tenant + from services.user_management_service import generate_tts_stt_4_admin + from utils.auth_utils import calculate_expires_at, generate_session_jwt + + pending = parse_pending_oauth_token(pending_token) + provider = pending["provider"] + provider_user_id = pending["provider_user_id"] + provider_email = pending.get("provider_email") or "" + provider_username = pending.get("provider_username") or "" + + if len(password or "") < 6: + raise OAuthLinkError("Password must be at least 6 characters") + + final_email = _validate_email(provider_email or email) + normalized_invite_code = invite_code.upper() + + if get_oauth_account_by_provider(provider, provider_user_id): + raise OAuthLinkError(f"This {provider} account is already bound to another user") + + if not check_invitation_available(normalized_invite_code): + raise OAuthLinkError("Invitation code is invalid or unavailable") + + invitation_info = get_invitation_by_code(normalized_invite_code) + if not invitation_info: + raise OAuthLinkError("Invitation code is invalid or unavailable") + + admin_client = None + try: + from utils.auth_utils import get_supabase_admin_client + + admin_client = get_supabase_admin_client() + except Exception: + admin_client = None + if not admin_client: + raise RuntimeError("Supabase admin client not available") + + existing_user_id = find_supabase_user_id_by_email(admin_client, final_email) + if existing_user_id: + raise OAuthLinkError( + "Email already exists. Please log in with email and password, " + "then link this OAuth account in settings." + ) + + create_resp = admin_client.auth.admin.create_user( + { + "email": final_email, + "password": password, + "email_confirm": True, + "user_metadata": { + "full_name": provider_username, + "provider": provider, + }, + } + ) + supabase_user_id = create_resp.user.id + + tenant_id = invitation_info["tenant_id"] + if invitation_info.get("code_type") == ASSET_OWNER_INVITE_CODE_TYPE: + tenant_id = ASSET_OWNER_TENANT_ID + user_role = _role_from_invitation_type(invitation_info.get("code_type", "USER_INVITE")) + is_asset_owner_registration = user_role == ASSET_OWNER_ROLE + + insert_user_tenant( + user_id=supabase_user_id, + tenant_id=tenant_id, + user_role=user_role, + user_email=final_email, + ) + + invitation_result = use_invitation_code(normalized_invite_code, supabase_user_id) + group_ids = invitation_result.get("group_ids", []) + if isinstance(group_ids, str): + from utils.str_utils import convert_string_to_list + + group_ids = convert_string_to_list(group_ids) + if group_ids and not is_asset_owner_registration: + add_user_to_groups(supabase_user_id, group_ids, supabase_user_id) + + if user_role == "ADMIN": + await generate_tts_stt_4_admin(tenant_id, supabase_user_id) + if not is_asset_owner_registration: + await init_tool_list_for_tenant(tenant_id, supabase_user_id) + + create_or_update_oauth_account( + user_id=supabase_user_id, + provider=provider, + provider_user_id=provider_user_id, + email=final_email, + username=provider_username, + tenant_id=tenant_id, + ) + + expiry_seconds = 3600 + jwt_token = generate_session_jwt(supabase_user_id, expires_in=expiry_seconds) + expires_at = calculate_expires_at(jwt_token) + + return { + "user": { + "id": str(supabase_user_id), + "email": final_email, + "role": user_role, + }, + "session": { + "access_token": jwt_token, + "refresh_token": "", + "expires_at": expires_at, + "expires_in_seconds": expiry_seconds, + }, + } + + def create_or_update_oauth_account( user_id: str, provider: str, @@ -330,13 +570,7 @@ def list_linked_accounts(user_id: str) -> List[Dict[str, Any]]: return result -def unlink_account( - user_id: str, provider: str, has_password_auth: bool = False -) -> bool: - oauth_count = count_oauth_accounts_by_user_id(user_id) - if oauth_count <= 1 and not has_password_auth: - raise OAuthLinkError("Cannot unlink the last authentication method") - +def unlink_account(user_id: str, provider: str) -> bool: success = delete_oauth_account(user_id, provider) if not success: raise OAuthLinkError(f"No linked {provider} account found") diff --git a/backend/services/prompt_service.py b/backend/services/prompt_service.py index aa4d420d5..3148c3f0f 100644 --- a/backend/services/prompt_service.py +++ b/backend/services/prompt_service.py @@ -12,6 +12,7 @@ from consts.exceptions import AppException from database.agent_db import search_agent_info_by_agent_id, query_all_agent_info_by_tenant_id, \ query_sub_agents_id_list +from database.model_management_db import get_model_by_model_id from database.knowledge_db import get_knowledge_name_map_by_index_names from database.tool_db import query_tools_by_ids, query_tool_instances_by_id from services.agent_service import ( @@ -23,14 +24,31 @@ _generate_unique_agent_name_with_suffix, _generate_unique_display_name_with_suffix ) +from services.prompt_template_service import resolve_prompt_generate_template from utils.llm_utils import call_llm_for_system_prompt -from utils.prompt_template_utils import get_prompt_generate_prompt_template +from utils.prompt_template_utils import ( + get_prompt_generate_prompt_template, + get_prompt_optimize_prompt_template, +) # Configure logging logger = logging.getLogger("prompt_service") - -def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: str, user_id: str, tenant_id: str, language: str, tool_ids: Optional[List[int]] = None, sub_agent_ids: Optional[List[int]] = None, knowledge_base_display_names: Optional[List[str]] = None): +PROMPT_SECTION_TYPE_TITLES = { + LANGUAGE["ZH"]: { + "duty": "智能体角色", + "constraint": "使用要求", + "few_shots": "示例", + }, + LANGUAGE["EN"]: { + "duty": "Agent Role", + "constraint": "Usage Requirements", + "few_shots": "Few Shots", + }, +} + + +def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: str, user_id: str, tenant_id: str, language: str, prompt_template_id: Optional[int] = None, tool_ids: Optional[List[int]] = None, sub_agent_ids: Optional[List[int]] = None, knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True): try: for system_prompt in generate_and_save_system_prompt_impl( agent_id=agent_id, @@ -39,9 +57,11 @@ def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: user_id=user_id, tenant_id=tenant_id, language=language, + prompt_template_id=prompt_template_id, tool_ids=tool_ids, sub_agent_ids=sub_agent_ids, - knowledge_base_display_names=knowledge_base_display_names + knowledge_base_display_names=knowledge_base_display_names, + has_selected_resources=has_selected_resources, ): # SSE format, each message ends with \n\n yield f"data: {json.dumps({'success': True, 'data': system_prompt}, ensure_ascii=False)}\n\n" @@ -64,9 +84,11 @@ def generate_and_save_system_prompt_impl(agent_id: int, user_id: str, tenant_id: str, language: str, + prompt_template_id: Optional[int] = None, tool_ids: Optional[List[int]] = None, sub_agent_ids: Optional[List[int]] = None, - knowledge_base_display_names: Optional[List[str]] = None): + knowledge_base_display_names: Optional[List[str]] = None, + has_selected_resources: bool = True): # Get description of tool and agent from frontend-provided IDs # Frontend always provides tool_ids and sub_agent_ids (could be empty arrays) @@ -128,8 +150,18 @@ def generate_and_save_system_prompt_impl(agent_id: int, ] # Collect results and yield non-name fields immediately, but hold name fields for duplicate checking - for result_data in generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id, - model_id, language, knowledge_base_display_names): + for result_data in generate_system_prompt( + sub_agent_info_list, + task_description, + tool_info_list, + tenant_id, + user_id, + model_id, + language, + prompt_template_id, + knowledge_base_display_names, + has_selected_resources + ): result_type = result_data["type"] final_results[result_type] = result_data["content"] @@ -158,7 +190,9 @@ def generate_and_save_system_prompt_impl(agent_id: int, tenant_id=tenant_id, language=language, agents_cache=all_agents, - exclude_agent_id=agent_id + exclude_agent_id=agent_id, + prompt_template_id=prompt_template_id, + user_id=user_id, ) logger.info(f"Regenerated agent name: '{agent_name}'") final_results["agent_var_name"] = agent_name @@ -199,7 +233,9 @@ def generate_and_save_system_prompt_impl(agent_id: int, tenant_id=tenant_id, language=language, agents_cache=all_agents, - exclude_agent_id=agent_id + exclude_agent_id=agent_id, + prompt_template_id=prompt_template_id, + user_id=user_id, ) logger.info(f"Regenerated agent display_name: '{agent_display_name}'") final_results["agent_display_name"] = agent_display_name @@ -237,10 +273,96 @@ def generate_and_save_system_prompt_impl(agent_id: int, if not has_content: raise Exception("Failed to generate prompt content.") +def optimize_prompt_section_impl( + agent_id: int, + model_id: int, + task_description: str, + tenant_id: str, + language: str, + section_type: str, + section_title: str, + current_content: str, + feedback: str, + tool_ids: Optional[List[int]] = None, + sub_agent_ids: Optional[List[int]] = None, + knowledge_base_display_names: Optional[List[str]] = None, +) -> dict: + normalized_section_type = (section_type or "").strip() + if normalized_section_type not in {"duty", "constraint", "few_shots"}: + raise AppException( + ErrorCode.COMMON_PARAMETER_INVALID, + "Unsupported prompt section type." + ) + + if not (current_content or "").strip(): + raise AppException( + ErrorCode.COMMON_MISSING_REQUIRED_FIELD, + "Current section content is required." + ) + + if not (feedback or "").strip(): + raise AppException( + ErrorCode.COMMON_MISSING_REQUIRED_FIELD, + "Optimization feedback is required." + ) + + tool_info_list = _resolve_prompt_generation_tools( + agent_id=agent_id, + tenant_id=tenant_id, + tool_ids=tool_ids, + ) + knowledge_base_display_names = _resolve_knowledge_base_display_names( + agent_id=agent_id, + tenant_id=tenant_id, + tool_info_list=tool_info_list, + knowledge_base_display_names=knowledge_base_display_names, + ) + sub_agent_info_list = _resolve_prompt_generation_sub_agents( + agent_id=agent_id, + tenant_id=tenant_id, + sub_agent_ids=sub_agent_ids, + ) + + prompt_template = get_prompt_optimize_prompt_template(language) + prompt_context = join_info_for_optimize_prompt_section( + prompt_for_optimize=prompt_template, + section_type=normalized_section_type, + section_title=section_title or _default_prompt_section_title(normalized_section_type, language), + task_description=task_description, + current_content=current_content, + feedback=feedback, + tool_info_list=tool_info_list, + sub_agent_info_list=sub_agent_info_list, + language=language, + knowledge_base_display_names=knowledge_base_display_names, + ) + + optimized_content = call_llm_for_system_prompt( + model_id=model_id, + user_prompt=prompt_context, + system_prompt=prompt_template["OPTIMIZE_SYSTEM_PROMPT"], + tenant_id=tenant_id, + ).strip() + + if not optimized_content: + raise AppException(ErrorCode.MODEL_PROMPT_GENERATION_FAILED) + + return { + "section_type": normalized_section_type, + "section_title": section_title or _default_prompt_section_title(normalized_section_type, language), + "original_content": current_content, + "optimized_content": optimized_content, + } + -def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id: str, model_id: int, language: str = LANGUAGE["ZH"], knowledge_base_display_names: Optional[List[str]] = None): +def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id: str, user_id: str, model_id: int, language: str = LANGUAGE["ZH"], prompt_template_id: Optional[int] = None, knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True): """Main function for generating system prompts""" - prompt_for_generate = get_prompt_generate_prompt_template(language) + prompt_for_generate = resolve_prompt_generate_template( + tenant_id=tenant_id, + user_id=user_id, + language=language, + prompt_template_id=prompt_template_id, + ) # Prepare content for generating system prompts content = join_info_for_generate_system_prompt( @@ -249,7 +371,8 @@ def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list task_description=task_description, tool_info_list=tool_info_list, language=language, - knowledge_base_display_names=knowledge_base_display_names + knowledge_base_display_names=knowledge_base_display_names, + has_selected_resources=has_selected_resources, ) # Initialize state @@ -259,19 +382,106 @@ def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list stop_flags = {"duty": False, "constraint": False, "few_shots": False, "agent_var_name": False, "agent_display_name": False, "agent_description": False} - # Start all generation threads + # Get model concurrency limit to control the number of concurrent LLM calls + # If None or >= 6, no limit (all 6 calls run concurrently) + # If < 6, use semaphore to limit concurrent calls + model_config = get_model_by_model_id(model_id, tenant_id) + concurrency_limit = model_config.get("concurrency_limit") if model_config else None + + # Start all generation threads with concurrency control threads, error_holder = _start_generation_threads( - content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id) + content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id, + has_selected_resources, + concurrency_limit=concurrency_limit + ) # Stream results yield from _stream_results(produce_queue, latest, stop_flags, threads, error_holder) -def _start_generation_threads(content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id): - """Start all prompt generation threads""" +def _resolve_prompt_generation_tools( + agent_id: int, + tenant_id: str, + tool_ids: Optional[List[int]] = None, +) -> List[dict]: + if tool_ids and len(tool_ids) > 0: + logger.debug(f"Using frontend-provided tool IDs: {tool_ids}") + return query_tools_by_ids(tool_ids) + + logger.debug("No tools selected (empty tool_ids list)") + return get_enabled_tool_description_for_generate_prompt( + tenant_id=tenant_id, agent_id=agent_id + ) + + +def _resolve_knowledge_base_display_names( + agent_id: int, + tenant_id: str, + tool_info_list: List[dict], + knowledge_base_display_names: Optional[List[str]] = None, +) -> Optional[List[str]]: + if knowledge_base_display_names: + logger.debug( + f"Using frontend-provided knowledge base display names: {knowledge_base_display_names}" + ) + return knowledge_base_display_names + + resolved_names = get_knowledge_base_display_names( + tool_info_list=tool_info_list, + agent_id=agent_id, + tenant_id=tenant_id + ) + logger.debug(f"Using database query for knowledge base display names: {resolved_names}") + return resolved_names + + +def _resolve_prompt_generation_sub_agents( + agent_id: int, + tenant_id: str, + sub_agent_ids: Optional[List[int]] = None, +) -> List[dict]: + if sub_agent_ids and len(sub_agent_ids) > 0: + sub_agent_info_list = [] + for sub_agent_id in sub_agent_ids: + try: + sub_agent_info = search_agent_info_by_agent_id( + agent_id=sub_agent_id, tenant_id=tenant_id) + sub_agent_info_list.append(sub_agent_info) + except Exception as exc: + logger.warning( + f"Failed to get sub-agent info for agent_id {sub_agent_id}: {str(exc)}" + ) + logger.debug(f"Using frontend-provided sub-agent IDs: {sub_agent_ids}") + return sub_agent_info_list + + logger.debug("No sub-agents selected (empty sub_agent_ids list)") + return get_enabled_sub_agent_description_for_generate_prompt( + tenant_id=tenant_id, agent_id=agent_id + ) + +def _start_generation_threads(content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id, + has_selected_resources = True, concurrency_limit: Optional[int] = None): + """Start all prompt generation threads with optional concurrency control.""" # Shared error tracking across threads error_holder = {"error": None} + # Total number of generation tasks + total_tasks = 6 + + # Determine effective concurrency limit + # None means unlimited, 0 or negative means unlimited + if concurrency_limit is None or concurrency_limit <= 0 or concurrency_limit >= total_tasks: + effective_limit = None + else: + effective_limit = concurrency_limit + + # Use semaphore if concurrency is limited + semaphore = threading.Semaphore(effective_limit) if effective_limit else None + if semaphore: + logger.info(f"Using concurrency limit of {effective_limit} for prompt generation (total tasks: {total_tasks})") + else: + logger.info("Using unlimited concurrency for prompt generation") + def make_callback(tag): def callback_fn(current_text): latest[tag] = current_text @@ -280,8 +490,16 @@ def callback_fn(current_text): def run_and_flag(tag, sys_prompt): try: - call_llm_for_system_prompt( - model_id, content, sys_prompt, make_callback(tag), tenant_id) + # Acquire semaphore before starting (if limited) + if semaphore: + semaphore.acquire() + try: + call_llm_for_system_prompt( + model_id, content, sys_prompt, make_callback(tag), tenant_id) + finally: + # Always release semaphore after completion + if semaphore: + semaphore.release() except Exception as e: logger.error(f"Error in {tag} generation: {e}") error_holder["error"] = e @@ -291,18 +509,31 @@ def run_and_flag(tag, sys_prompt): threads = [] logger.info("Generating system prompt") + # Base sections always generated prompt_configs = [ - ("duty", prompt_for_generate["DUTY_SYSTEM_PROMPT"]), - ("constraint", prompt_for_generate["CONSTRAINT_SYSTEM_PROMPT"]), - ("few_shots", prompt_for_generate["FEW_SHOTS_SYSTEM_PROMPT"]), + ("duty", prompt_for_generate["duty_system_prompt"]), ("agent_var_name", - prompt_for_generate["AGENT_VARIABLE_NAME_SYSTEM_PROMPT"]), + prompt_for_generate["agent_variable_name_system_prompt"]), ("agent_display_name", - prompt_for_generate["AGENT_DISPLAY_NAME_SYSTEM_PROMPT"]), + prompt_for_generate["agent_display_name_system_prompt"]), ("agent_description", - prompt_for_generate["AGENT_DESCRIPTION_SYSTEM_PROMPT"]) + prompt_for_generate["agent_description_system_prompt"]) ] + # Constraint and few_shots sections are only generated when tools or sub-agents are selected + if has_selected_resources: + prompt_configs.extend([ + ("constraint", prompt_for_generate["constraint_system_prompt"]), + ("few_shots", prompt_for_generate["few_shots_system_prompt"]), + ]) + else: + logger.info("Skipping constraint and few_shots generation: no tools or sub-agents selected") + # Mark these sections as already complete with empty content + stop_flags["constraint"] = True + stop_flags["few_shots"] = True + latest["constraint"] = "" + latest["few_shots"] = "" + for tag, sys_prompt in prompt_configs: thread = threading.Thread(target=run_and_flag, args=(tag, sys_prompt)) thread.start() @@ -368,7 +599,7 @@ def _stream_results(produce_queue, latest, stop_flags, threads, error_holder): last_results[tag] = latest[tag] -def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_list, task_description, tool_info_list, language: str = LANGUAGE["ZH"], knowledge_base_display_names: Optional[List[str]] = None): +def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_list, task_description, tool_info_list, language: str = LANGUAGE["ZH"], knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True): input_label = "Inputs" if language == 'en' else "接受输入" output_label = "Output type" if language == 'en' else "返回输出类型" @@ -385,7 +616,10 @@ def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_lis "assistant_description": assistant_description, # Always include knowledge_base_names to avoid StrictUndefined errors in template. # An empty string is falsy, so the {% if knowledge_base_names %} block will be skipped. - "knowledge_base_names": "" + "knowledge_base_names": "", + # Flag indicating whether tools or sub-agents are selected; + # templates use this to suppress boilerplate in constraint/few_shots sections + "has_selected_resources": has_selected_resources, } # Always add knowledge_base_names to context (empty string when not available). @@ -398,10 +632,63 @@ def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_lis template_context["knowledge_base_names"] = kb_names_str # Generate content using template - content = Template(prompt_for_generate["USER_PROMPT"], undefined=StrictUndefined).render(template_context) + content = Template(prompt_for_generate["user_prompt"], undefined=StrictUndefined).render(template_context) return content +def join_info_for_optimize_prompt_section( + prompt_for_optimize, + section_type: str, + section_title: str, + task_description: str, + current_content: str, + feedback: str, + tool_info_list, + sub_agent_info_list, + language: str = LANGUAGE["ZH"], + knowledge_base_display_names: Optional[List[str]] = None, +): + input_label = "Inputs" if language == LANGUAGE["EN"] else "接受输入" + output_label = "Output type" if language == LANGUAGE["EN"] else "返回输出类型" + + tool_description = "\n".join( + [f"- {tool['name']}: {tool['description']} \n {input_label}: {tool['inputs']}\n {output_label}: {tool['output_type']}" + for tool in tool_info_list] + ) + assistant_description = "\n".join( + [f"- {sub_agent_info['name']}: {sub_agent_info['description']}" for sub_agent_info in sub_agent_info_list] + ) + + if knowledge_base_display_names: + kb_names_str = ", ".join(f'"{name}"' for name in knowledge_base_display_names) + else: + kb_names_str = "" + + template_context = { + "section_type": section_type, + "section_title": section_title, + "task_description": task_description, + "current_content": current_content, + "feedback": feedback, + "tool_description": tool_description, + "assistant_description": assistant_description, + "knowledge_base_names": kb_names_str, + } + + return Template( + prompt_for_optimize["OPTIMIZE_USER_PROMPT"], + undefined=StrictUndefined + ).render(template_context) + + +def _default_prompt_section_title(section_type: str, language: str) -> str: + localized_titles = PROMPT_SECTION_TYPE_TITLES.get( + language, + PROMPT_SECTION_TYPE_TITLES[LANGUAGE["ZH"]] + ) + return localized_titles.get(section_type, section_type) + + def get_enabled_tool_description_for_generate_prompt(agent_id: int, tenant_id: str): # Get tool information logger.info("Fetching tool instances") diff --git a/backend/services/prompt_template_service.py b/backend/services/prompt_template_service.py new file mode 100644 index 000000000..14224a099 --- /dev/null +++ b/backend/services/prompt_template_service.py @@ -0,0 +1,322 @@ +import logging +from typing import Optional + +from consts.const import DEFAULT_TENANT_ID, DEFAULT_USER_ID +from consts.const import LANGUAGE +from consts.exceptions import DuplicateError, NotFoundException, ValidationError +from consts.model import PromptTemplateRequest +from database.prompt_template_db import ( + create_prompt_template, + delete_prompt_template, + get_prompt_template_by_id, + get_prompt_template_by_name, + get_prompt_template_by_template_id, + query_prompt_templates_by_user, + upsert_prompt_template_by_id, + update_prompt_template, +) +from utils.prompt_template_utils import ( + get_prompt_generate_prompt_template, + merge_prompt_generate_templates, + normalize_prompt_generate_template_content, +) + +logger = logging.getLogger("prompt_template_service") + +SYSTEM_PROMPT_TEMPLATE_ID = 0 +SYSTEM_PROMPT_TEMPLATE_NAME = "system_default" +PROMPT_TEMPLATE_TYPE_AGENT_GENERATE = "agent_generate" +SYSTEM_PROMPT_TEMPLATE_DESCRIPTION = "System default prompt template" +SYSTEM_PROMPT_TEMPLATE_TENANT_ID = DEFAULT_TENANT_ID +SYSTEM_PROMPT_TEMPLATE_USER_ID = DEFAULT_USER_ID + + +def _normalize_prompt_template_entity(template: Optional[dict]) -> Optional[dict]: + """Normalize prompt template entity content keys to lowercase.""" + if not template: + return template + + normalized_template = dict(template) + normalized_template["template_content_zh"] = normalize_prompt_generate_template_content( + normalized_template.get("template_content_zh") + ) + template_content_en = normalize_prompt_generate_template_content( + normalized_template.get("template_content_en") + ) + normalized_template["template_content_en"] = template_content_en or None + return normalized_template + + +def build_system_default_prompt_template_payload() -> dict: + """Build the canonical system default prompt template payload from YAML files.""" + system_template_zh = normalize_prompt_generate_template_content( + get_prompt_generate_prompt_template(LANGUAGE["ZH"]) + ) + system_template_en = normalize_prompt_generate_template_content( + get_prompt_generate_prompt_template(LANGUAGE["EN"]) + ) + return { + "template_id": SYSTEM_PROMPT_TEMPLATE_ID, + "template_name": SYSTEM_PROMPT_TEMPLATE_NAME, + "description": SYSTEM_PROMPT_TEMPLATE_DESCRIPTION, + "template_type": PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + "tenant_id": SYSTEM_PROMPT_TEMPLATE_TENANT_ID, + "user_id": SYSTEM_PROMPT_TEMPLATE_USER_ID, + "template_content_zh": system_template_zh, + "template_content_en": system_template_en, + "created_by": SYSTEM_PROMPT_TEMPLATE_USER_ID, + "updated_by": SYSTEM_PROMPT_TEMPLATE_USER_ID, + "delete_flag": "N", + } + + +def sync_system_default_prompt_template() -> dict: + """Sync the YAML-backed system default prompt template into the database.""" + payload = build_system_default_prompt_template_payload() + prompt_template = upsert_prompt_template_by_id( + template_id=SYSTEM_PROMPT_TEMPLATE_ID, + template_data=payload, + user_id=SYSTEM_PROMPT_TEMPLATE_USER_ID, + ) + prompt_template["is_system_default"] = True + return _normalize_prompt_template_entity(prompt_template) + + +def get_system_default_prompt_template() -> dict: + """Return the system default prompt generation template from the database.""" + prompt_template = get_prompt_template_by_template_id( + template_id=SYSTEM_PROMPT_TEMPLATE_ID, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not prompt_template: + prompt_template = sync_system_default_prompt_template() + else: + prompt_template["is_system_default"] = True + return _normalize_prompt_template_entity({ + **prompt_template, + "is_system_default": True, + }) + + +def _normalize_template_request(request: PromptTemplateRequest) -> dict: + """Normalize prompt template request payload.""" + template_name = (request.template_name or "").strip() + if not template_name: + raise ValidationError("template_name is required") + + if request.template_type != PROMPT_TEMPLATE_TYPE_AGENT_GENERATE: + raise ValidationError("Unsupported template type") + + zh_content = normalize_prompt_generate_template_content( + request.template_content_zh.model_dump() + ) + if len(zh_content) == 0: + raise ValidationError("template_content_zh is required") + + en_content = None + if request.template_content_en is not None: + en_content = normalize_prompt_generate_template_content( + request.template_content_en.model_dump() + ) + if len(en_content) == 0: + en_content = None + + return { + "template_name": template_name, + "description": (request.description or "").strip() or None, + "template_type": request.template_type, + "template_content_zh": zh_content, + "template_content_en": en_content, + } + + +def list_prompt_templates_impl(tenant_id: str, user_id: str) -> list[dict]: + """List all prompt templates for the current user.""" + system_default_template = sync_system_default_prompt_template() + templates = query_prompt_templates_by_user( + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + return [system_default_template, *[ + _normalize_prompt_template_entity({ + **template, + "is_system_default": False, + }) + for template in templates + if template.get("template_id") != SYSTEM_PROMPT_TEMPLATE_ID + ]] + + +def get_prompt_template_detail_impl(template_id: int, tenant_id: str, user_id: str) -> dict: + """Get prompt template detail.""" + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + return get_system_default_prompt_template() + + template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not template: + raise NotFoundException("Prompt template not found") + + template["is_system_default"] = False + return _normalize_prompt_template_entity(template) + + +def create_prompt_template_impl( + request: PromptTemplateRequest, + tenant_id: str, + user_id: str, +) -> dict: + """Create a prompt template.""" + normalized_request = _normalize_template_request(request) + existing_template = get_prompt_template_by_name( + template_name=normalized_request["template_name"], + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if existing_template: + raise DuplicateError("Prompt template name already exists") + + created_template = create_prompt_template({ + **normalized_request, + "tenant_id": tenant_id, + "user_id": user_id, + "created_by": user_id, + "updated_by": user_id, + }) + created_template["is_system_default"] = False + return _normalize_prompt_template_entity(created_template) + + +def update_prompt_template_impl( + template_id: int, + request: PromptTemplateRequest, + tenant_id: str, + user_id: str, +) -> dict: + """Update a prompt template.""" + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + raise ValidationError("System default prompt template cannot be updated") + + existing_template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not existing_template: + raise NotFoundException("Prompt template not found") + + normalized_request = _normalize_template_request(request) + duplicate_template = get_prompt_template_by_name( + template_name=normalized_request["template_name"], + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if duplicate_template and duplicate_template["template_id"] != template_id: + raise DuplicateError("Prompt template name already exists") + + updated_template = update_prompt_template( + template_id=template_id, + template_data=normalized_request, + user_id=user_id, + ) + updated_template["is_system_default"] = False + return _normalize_prompt_template_entity(updated_template) + + +def delete_prompt_template_impl(template_id: int, tenant_id: str, user_id: str) -> dict: + """Delete a prompt template.""" + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + raise ValidationError("System default prompt template cannot be deleted") + + existing_template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not existing_template: + raise NotFoundException("Prompt template not found") + + deleted_count = delete_prompt_template(template_id=template_id, user_id=user_id) + return { + "template_id": template_id, + "deleted": deleted_count > 0, + } + + +def resolve_prompt_generate_template( + tenant_id: str, + user_id: str, + language: str, + prompt_template_id: Optional[int] = None, +) -> dict: + """Resolve prompt generation template for the current user and language.""" + system_default_template = sync_system_default_prompt_template() + system_template = ( + system_default_template.get("template_content_en") + if language == LANGUAGE["EN"] + else system_default_template.get("template_content_zh") + ) + fallback_system_template = system_default_template.get("template_content_zh") + + if not prompt_template_id or prompt_template_id == SYSTEM_PROMPT_TEMPLATE_ID: + return merge_prompt_generate_templates(system_template, fallback_system_template) + + prompt_template = get_prompt_template_by_id( + template_id=prompt_template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not prompt_template: + logger.warning( + "Prompt template %s not found for tenant %s user %s, falling back to system default", + prompt_template_id, + tenant_id, + user_id, + ) + return merge_prompt_generate_templates(system_template, fallback_system_template) + + custom_language_template = ( + prompt_template.get("template_content_en") + if language == LANGUAGE["EN"] + else prompt_template.get("template_content_zh") + ) + return merge_prompt_generate_templates( + custom_language_template, + prompt_template.get("template_content_zh"), + system_template, + fallback_system_template, + ) + + +def get_prompt_template_summary( + template_id: Optional[int], + tenant_id: str, + user_id: str, +) -> tuple[Optional[int], Optional[str]]: + """Resolve prompt template identity for saving on agent.""" + if template_id is None: + return None, None + + if template_id == SYSTEM_PROMPT_TEMPLATE_ID: + return SYSTEM_PROMPT_TEMPLATE_ID, SYSTEM_PROMPT_TEMPLATE_NAME + + prompt_template = get_prompt_template_by_id( + template_id=template_id, + tenant_id=tenant_id, + user_id=user_id, + template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE, + ) + if not prompt_template: + raise NotFoundException("Prompt template not found") + + return prompt_template["template_id"], prompt_template["template_name"] diff --git a/backend/services/providers/dashscope_provider.py b/backend/services/providers/dashscope_provider.py index 69096fb15..497dcfe99 100644 --- a/backend/services/providers/dashscope_provider.py +++ b/backend/services/providers/dashscope_provider.py @@ -6,6 +6,75 @@ from services.providers.base import AbstractModelProvider, _classify_provider_error +DASHSCOPE_IMAGE_GENERATION_KEYWORDS = ( + "image", + "wanx", + "aitryon", + "tryon", + "flux", + "stable-diffusion", + "sdxl", +) +DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS = ( + "qwen-vl", + "qwen2-vl", + "qwen2.5-vl", + "qwen3-vl", + "qwen3.5-vl", + "qwen3.6-vl", + "-vl", + "vl-", + "vision", + "visual", + "ocr", + "qwen3.6", + "qwen-3.6", +) +DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS = ("omni", "video-understanding", "video-ocr") + + +def _modality_set(value) -> set: + if not value: + return set() + if isinstance(value, str): + return {value.lower()} + return {str(item).lower() for item in value} + + +def _has_keyword(text: str, keywords: tuple) -> bool: + return any(keyword in text for keyword in keywords) + + +def _is_dashscope_explicit_image_understanding_model(model_id: str) -> bool: + return _has_keyword(model_id, DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS) + + +def _is_dashscope_image_generation_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool: + if _is_dashscope_explicit_image_understanding_model(model_id): + return False + return "image" in res_mods or _has_keyword(model_id, DASHSCOPE_IMAGE_GENERATION_KEYWORDS) + + +def _is_dashscope_video_understanding_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool: + searchable_text = f"{model_id} {desc.lower()}" + if "video" in req_mods and "text" in res_mods: + return True + return _has_keyword(searchable_text, DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS) + + +def _is_dashscope_image_understanding_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool: + searchable_text = f"{model_id} {desc.lower()}" + if _is_dashscope_image_generation_model(model_id, desc, req_mods, res_mods): + return False + if _is_dashscope_video_understanding_model(model_id, desc, req_mods, res_mods): + return False + if ("image" in req_mods or "video" in req_mods) and "text" in res_mods: + return True + return _is_dashscope_explicit_image_understanding_model(model_id) or _has_keyword( + searchable_text, DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS + ) + + class DashScopeModelProvider(AbstractModelProvider): """Concrete implementation for DashScope (Aliyun) provider.""" @@ -57,6 +126,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: categorized_models = { "chat": [], # Maps to "llm" "vlm": [], # Maps to "vlm" + "vlm2": [], # Maps to image generation models + "vlm3": [], # Maps to video understanding models "embedding": [], # Maps to "embedding" / "multi_embedding" "rerank": [], # Maps to "rerank" "tts": [], # Maps to "tts" @@ -71,6 +142,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: metadata = model_obj.get('inference_metadata') or {} req_mod = metadata.get('request_modality', []) res_mod = metadata.get('response_modality', []) + req_mods = _modality_set(req_mod) + res_mods = _modality_set(res_mod) model_obj.setdefault("object", model_obj.get("object", "model")) model_obj.setdefault("owned_by", model_obj.get("owned_by", "dashscope")) cleaned_model = { @@ -107,8 +180,17 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: continue # 5. VLM - vision_mods = {'Image', 'Video'} - if (set(req_mod) & vision_mods) or (set(res_mod) & vision_mods) or '视觉' in desc: + if _is_dashscope_video_understanding_model(m_id, desc, req_mods, res_mods): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm3"}) + categorized_models['vlm3'].append(cleaned_model) + continue + + if _is_dashscope_image_generation_model(m_id, desc, req_mods, res_mods): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm2"}) + categorized_models['vlm2'].append(cleaned_model) + continue + + if _is_dashscope_image_understanding_model(m_id, desc, req_mods, res_mods): cleaned_model.update({"model_tag": "chat", "model_type": "vlm"}) categorized_models['vlm'].append(cleaned_model) continue @@ -124,7 +206,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: elif target_model_type in ("embedding", "multi_embedding"): return categorized_models["embedding"] elif target_model_type in categorized_models: - return categorized_models[target_model_type] + return [ + {**model, "model_type": target_model_type} + for model in categorized_models[target_model_type] + ] else: return [] except (httpx.HTTPStatusError, httpx.ConnectTimeout, httpx.ConnectError, Exception) as e: diff --git a/backend/services/providers/silicon_provider.py b/backend/services/providers/silicon_provider.py index ea41cc95d..130f2346e 100644 --- a/backend/services/providers/silicon_provider.py +++ b/backend/services/providers/silicon_provider.py @@ -1,4 +1,5 @@ import httpx +import re from typing import Dict, List from consts.const import DEFAULT_LLM_MAX_TOKENS @@ -6,6 +7,62 @@ from services.providers.base import AbstractModelProvider, _classify_provider_error +SILICON_VLM_MODEL_KEYWORDS = ( + "-vl", + "_vl", + "/vl", + ".vl", + "vl-", + "vision", + "visual", + "internvl", + "deepseek-vl", + "deepseekvl", + "glm-4v", + "minicpm-v", + "llava", + "kimi-vl", + "kimi-k2.5", + "kimi-k2.6", + "qvq", + "omni", + "qwen3.5", + "qwen3.6", +) + +SILICON_VLM_METADATA_KEYWORDS = ("image", "video", "vision", "visual") + + +def _contains_silicon_vlm_metadata(value) -> bool: + if isinstance(value, str): + lower_value = value.lower() + return any(keyword in lower_value for keyword in SILICON_VLM_METADATA_KEYWORDS) + if isinstance(value, list): + return any(_contains_silicon_vlm_metadata(item) for item in value) + if isinstance(value, dict): + return any(_contains_silicon_vlm_metadata(item) for item in value.values()) + return False + + +def _is_silicon_vlm_model(model: Dict) -> bool: + if _contains_silicon_vlm_metadata(model): + return True + + model_id = str(model.get("id", "")).lower() + model_name = str(model.get("name", "")).lower() + searchable_text = f"{model_id} {model_name}" + if any(keyword in searchable_text for keyword in SILICON_VLM_MODEL_KEYWORDS): + return True + + return bool(re.search(r"glm-\d+(?:\.\d+)?v", searchable_text)) + + +def _is_silicon_omni_model(model: Dict) -> bool: + model_id = str(model.get("id", "")).lower() + model_name = str(model.get("name", "")).lower() + return "omni" in f"{model_id} {model_name}" + + class SiliconModelProvider(AbstractModelProvider): """Concrete implementation for SiliconFlow provider.""" @@ -25,12 +82,14 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: headers = {"Authorization": f"Bearer {model_api_key}"} + provider_model_type = "vlm" if model_type in ("vlm2", "vlm3") else model_type + # Choose endpoint by model type - if model_type in ("llm", "vlm"): + if provider_model_type in ("llm", "vlm"): silicon_url = f"{SILICON_GET_URL}?sub_type=chat" - elif model_type in ("embedding", "multi_embedding"): + elif provider_model_type in ("embedding", "multi_embedding"): silicon_url = f"{SILICON_GET_URL}?sub_type=embedding" - elif model_type == "rerank": + elif provider_model_type == "rerank": silicon_url = f"{SILICON_GET_URL}?sub_type=reranker" else: silicon_url = SILICON_GET_URL @@ -40,17 +99,22 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: response.raise_for_status() model_list: List[Dict] = response.json()["data"] + if model_type == "vlm3": + model_list = [item for item in model_list if _is_silicon_omni_model(item)] + elif provider_model_type == "vlm": + model_list = [item for item in model_list if _is_silicon_vlm_model(item)] + # Annotate models with canonical fields expected downstream - if model_type in ("llm", "vlm"): + if provider_model_type in ("llm", "vlm"): for item in model_list: item["model_tag"] = "chat" item["model_type"] = model_type item["max_tokens"] = DEFAULT_LLM_MAX_TOKENS - elif model_type in ("embedding", "multi_embedding"): + elif provider_model_type in ("embedding", "multi_embedding"): for item in model_list: item["model_tag"] = "embedding" item["model_type"] = model_type - elif model_type == "rerank": + elif provider_model_type == "rerank": for item in model_list: item["model_tag"] = "rerank" item["model_type"] = model_type diff --git a/backend/services/providers/tokenpony_provider.py b/backend/services/providers/tokenpony_provider.py index ab4446c1b..be2bb9c71 100644 --- a/backend/services/providers/tokenpony_provider.py +++ b/backend/services/providers/tokenpony_provider.py @@ -9,6 +9,64 @@ from services.providers.base import AbstractModelProvider, _classify_provider_error +TOKENPONY_IMAGE_UNDERSTANDING_KEYWORDS = ( + "qwen-vl", + "qwen2-vl", + "qwen2.5-vl", + "qwen3-vl", + "qwen3.5-vl", + "qwen3.6-vl", + "-vl", + "vl-", + "vision", + "visual", + "ocr", + "gpt-4o", + "qwen3.6", + "qwen-3.6", +) +TOKENPONY_IMAGE_GENERATION_KEYWORDS = ( + "image", + "dall", + "flux", + "stable-diffusion", + "sdxl", + "midjourney", + "wanx", + "kolors", + "seedream", + "ideogram", + "recraft", +) +TOKENPONY_VIDEO_UNDERSTANDING_KEYWORDS = ("omni", "video") + + +def _has_keyword(text: str, keywords: tuple) -> bool: + return any(keyword in text for keyword in keywords) + + +def _is_tokenpony_explicit_image_understanding_model(model_id: str) -> bool: + return _has_keyword(model_id, TOKENPONY_IMAGE_UNDERSTANDING_KEYWORDS) + + +def _is_tokenpony_image_generation_model(model_id: str) -> bool: + if _is_tokenpony_explicit_image_understanding_model(model_id): + return False + return _has_keyword(model_id, TOKENPONY_IMAGE_GENERATION_KEYWORDS) + + +def _is_tokenpony_video_understanding_model(model_id: str) -> bool: + return _has_keyword(model_id, TOKENPONY_VIDEO_UNDERSTANDING_KEYWORDS) + + +def _is_tokenpony_image_understanding_model(model_id: str) -> bool: + if _is_tokenpony_image_generation_model(model_id): + return False + if _is_tokenpony_video_understanding_model(model_id): + return False + return _is_tokenpony_explicit_image_understanding_model(model_id) + + class TokenPonyModelProvider(AbstractModelProvider): """Concrete implementation for TokenPony provider.""" @@ -46,6 +104,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: categorized_models = { "chat": [], # Maps to "llm" "vlm": [], # Maps to "vlm" + "vlm2": [], # Maps to image generation models + "vlm3": [], # Maps to video understanding models "embedding": [], # Maps to "embedding" / "multi_embedding" "rerank": [], # Maps to "rerank" "tts": [], # Maps to "tts" @@ -86,9 +146,14 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: cleaned_model.update({"model_tag": "tts", "model_type": "tts"}) categorized_models['tts'].append(cleaned_model) - # 5. VLM (Vision Language Model / Image & Video Generation) - - elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision']): + # 5. Multimodal models + elif _is_tokenpony_video_understanding_model(m_id): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm3"}) + categorized_models['vlm3'].append(cleaned_model) + elif _is_tokenpony_image_generation_model(m_id): + cleaned_model.update({"model_tag": "chat", "model_type": "vlm2"}) + categorized_models['vlm2'].append(cleaned_model) + elif _is_tokenpony_image_understanding_model(m_id): cleaned_model.update({"model_tag": "chat", "model_type": "vlm"}) categorized_models['vlm'].append(cleaned_model) @@ -104,7 +169,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: elif target_model_type in ("embedding", "multi_embedding"): return categorized_models["embedding"] elif target_model_type in categorized_models: - return categorized_models[target_model_type] + return [ + {**model, "model_type": target_model_type} + for model in categorized_models[target_model_type] + ] else: return [] diff --git a/backend/services/redis_service.py b/backend/services/redis_service.py index dae617f60..1ffcf921c 100644 --- a/backend/services/redis_service.py +++ b/backend/services/redis_service.py @@ -1,7 +1,7 @@ import json import logging import re -from typing import Dict, Any, Optional, Tuple, Set +from typing import Dict, Any, Optional, Tuple, Set, List import redis @@ -24,8 +24,8 @@ def client(self) -> redis.Redis: if not REDIS_URL: raise ValueError("REDIS_URL environment variable is not set") self._client = redis.from_url( - REDIS_URL, - socket_timeout=5, + REDIS_URL, + socket_timeout=5, socket_connect_timeout=5, decode_responses=True ) @@ -654,13 +654,13 @@ def save_error_info(self, task_id: str, error_reason: str, ttl_days: int = 30) - if not error_reason: logger.error(f"Cannot save error info for task {task_id}: error_reason is empty") return False - + ttl_seconds = ttl_days * 24 * 60 * 60 reason_key = f"error:reason:{task_id}" # Save error reason result = self.client.setex(reason_key, ttl_seconds, error_reason) - + if result: logger.info(f"Successfully saved error info to Redis for task {task_id}, key: {reason_key}") # Verify the save by reading it back @@ -695,13 +695,13 @@ def save_progress_info(self, task_id: str, processed_chunks: int, total_chunks: if not task_id: logger.error("Cannot save progress info: task_id is empty") return False - + progress_key = f"progress:{task_id}" progress_data = { 'processed_chunks': processed_chunks, 'total_chunks': total_chunks } - + ttl_seconds = ttl_hours * 3600 progress_json = json.dumps(progress_data) self.client.setex( @@ -874,6 +874,79 @@ def get_error_info(self, task_id: str) -> Optional[str]: f"Failed to get error info for task {task_id}: {str(e)}") return None + def batch_get_progress_info(self, task_ids: List[str]) -> Dict[str, Optional[Dict[str, int]]]: + """ + Batch get progress information for multiple tasks in a single Redis call. + + Args: + task_ids: List of Celery task IDs + + Returns: + Dict mapping task_id to progress info dict, or None if not found + """ + if not task_ids: + return {} + + try: + # Build list of keys + progress_keys = [f"progress:{tid}" for tid in task_ids] + # Use pipeline for batch operation + pipe = self.client.pipeline() + for key in progress_keys: + pipe.get(key) + results = pipe.execute() + + # Build result dict + result = {} + for i, task_id in enumerate(task_ids): + progress_data = results[i] + if progress_data: + try: + if isinstance(progress_data, bytes): + progress_data = progress_data.decode('utf-8') + result[task_id] = json.loads(progress_data) + except (json.JSONDecodeError, TypeError): + result[task_id] = None + else: + result[task_id] = None + return result + except Exception as e: + logger.warning(f"Failed to batch get progress info: {str(e)}") + return {tid: None for tid in task_ids} + + def batch_get_error_info(self, task_ids: List[str]) -> Dict[str, Optional[str]]: + """ + Batch get error information for multiple tasks in a single Redis call. + + Args: + task_ids: List of Celery task IDs + + Returns: + Dict mapping task_id to error reason string, or None if not found + """ + if not task_ids: + return {} + + try: + # Build list of keys + error_keys = [f"error:reason:{tid}" for tid in task_ids] + # Use pipeline for batch operation + pipe = self.client.pipeline() + for key in error_keys: + pipe.get(key) + results = pipe.execute() + + # Build result dict + result = {} + for i, task_id in enumerate(task_ids): + reason = results[i] + # With decode_responses=True, reason is already a string + result[task_id] = reason if reason else None + return result + except Exception as e: + logger.warning(f"Failed to batch get error info: {str(e)}") + return {tid: None for tid in task_ids} + # Global Redis service instance _redis_service = None diff --git a/backend/services/remote_mcp_service.py b/backend/services/remote_mcp_service.py index ab0f0b04f..72904cf8a 100644 --- a/backend/services/remote_mcp_service.py +++ b/backend/services/remote_mcp_service.py @@ -1,63 +1,168 @@ import logging import os import tempfile +import asyncio +import socket +import random from fastmcp import Client from fastmcp.client.transports import StreamableHttpTransport, SSETransport -from consts.const import CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ -from consts.exceptions import MCPConnectionError, MCPNameIllegal +from consts.const import CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ, NEXENT_MCP_DOCKER_IMAGE +from consts.exceptions import ( + MCPConnectionError, + MCPNameIllegal, + MCPContainerError, + McpNotFoundError, + McpValidationError, + McpNameConflictError, + McpPortConflictError, +) +from consts.model import MCPConfigRequest from database.remote_mcp_db import ( create_mcp_record, - delete_mcp_record_by_name_and_url, delete_mcp_record_by_container_id, get_mcp_records_by_tenant, check_mcp_name_exists, + check_enabled_mcp_name_exists, update_mcp_status_by_name_and_url, update_mcp_record_by_name_and_url, + update_mcp_record_manage_fields_by_id, + update_mcp_record_enabled_by_id, + update_mcp_record_container_fields_by_id, + update_mcp_record_status_by_id, + delete_mcp_record_by_id, get_mcp_authorization_token_by_name_and_url, get_mcp_record_by_id_and_tenant, + get_mcp_custom_headers_by_name_and_url, ) from database.user_tenant_db import get_user_tenant_by_user_id from services.mcp_container_service import MCPContainerManager +from services.tool_configuration_service import get_tool_from_remote_mcp_server logger = logging.getLogger("remote_mcp_service") -async def mcp_server_health(remote_mcp_server: str, authorization_token: str | None = None) -> bool: +# --------------------------------------------------------------------------- +# Health Check +# --------------------------------------------------------------------------- + +async def mcp_server_health(remote_mcp_server: str, authorization_token: str | None = None, custom_headers: dict | None = None) -> bool: + """Check if an MCP server is healthy and reachable.""" try: - # Select transport based on URL ending url_stripped = remote_mcp_server.strip() - headers = {"Authorization": authorization_token} if authorization_token else {} + headers = {} + if authorization_token: + headers["Authorization"] = authorization_token + if custom_headers: + headers.update(custom_headers) if url_stripped.endswith("/sse"): - transport = SSETransport( - url=url_stripped, - headers=headers - ) + transport = SSETransport(url=url_stripped, headers=headers) elif url_stripped.endswith("/mcp"): - transport = StreamableHttpTransport( - url=url_stripped, - headers=headers - ) + transport = StreamableHttpTransport(url=url_stripped, headers=headers) else: - # Default to StreamableHttpTransport for unrecognized formats - transport = StreamableHttpTransport( - url=url_stripped, - headers=headers - ) + transport = StreamableHttpTransport(url=url_stripped, headers=headers) client = Client(transport=transport) async with client: connected = client.is_connected() return connected except BaseException as e: - logger.error( - f"Remote MCP server health check failed: {e}", exc_info=True) - # Prevent library-level exits (e.g., SystemExit) from crashing the service - raise MCPConnectionError("MCP connection failed") + logger.error(f"Remote MCP server health check failed: {e}", exc_info=True) + error_message = str(e).strip() or repr(e) + if isinstance(e, (asyncio.TimeoutError, TimeoutError)) or "timeout" in error_message.lower(): + raise MCPConnectionError("MCP_HEALTH_TIMEOUT") + raise MCPConnectionError(error_message) + + +# --------------------------------------------------------------------------- +# Helper Functions +# --------------------------------------------------------------------------- + +def _is_container_record(record: dict | None) -> bool: + """Check if the MCP record is container-based. + + A record is considered container-based if it has: + - container_id (Docker container ID) + - config_json (container configuration) + """ + if not record: + return False + return record.get("container_id") is not None or record.get("config_json") is not None + + +# --------------------------------------------------------------------------- +# Port Management Functions +# --------------------------------------------------------------------------- + +def check_container_port_conflict_records(port: int) -> bool: + """Check if there are enabled MCP records that already use the given container port.""" + from database.remote_mcp_db import get_mcp_records_by_container_port + return not get_mcp_records_by_container_port(container_port=port) +def check_runtime_host_port_available(port: int) -> bool: + """Return True when the host port is not occupied by a listener.""" + probe_targets = [(socket.AF_INET, "127.0.0.1")] + if socket.has_ipv6: + probe_targets.append((socket.AF_INET6, "::1")) + + try: + host_infos = socket.getaddrinfo("host.docker.internal", port, socket.AF_UNSPEC, socket.SOCK_STREAM) + for family, _, _, _, sockaddr in host_infos: + probe_targets.append((family, sockaddr[0])) + except OSError: + pass + + for family, host in probe_targets: + try: + with socket.socket(family, socket.SOCK_STREAM) as probe_socket: + probe_socket.settimeout(0.2) + connect_result = probe_socket.connect_ex((host, port) if family == socket.AF_INET else (host, port, 0, 0)) + if connect_result == 0: + logger.info(f"Host port {port} is already in use on {host}") + return False + except OSError: + continue + + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as bind_probe: + if hasattr(socket, "SO_EXCLUSIVEADDRUSE"): + bind_probe.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) + else: + bind_probe.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 0) + bind_probe.bind(("0.0.0.0", port)) + bind_probe.listen(1) + return True + except OSError as exc: + logger.info(f"Host port {port} is already in use: {exc}") + return False + + +def check_container_port_conflict(*, port: int) -> bool: + """Check if a port is available for MCP container.""" + no_conflict_records = check_container_port_conflict_records(port=port) + runtime_available = check_runtime_host_port_available(port) + return no_conflict_records and runtime_available + + +def suggest_container_port() -> int: + """Suggest an available port for MCP container.""" + min_port = 2000 + max_port = 50000 + count = 0 + while count < 1000: + port = random.randint(min_port, max_port) + if check_container_port_conflict(port=port): + return port + count += 1 + raise McpPortConflictError("No available port found") + +# --------------------------------------------------------------------------- +# Add Functions +# --------------------------------------------------------------------------- + async def add_remote_mcp_server_list( tenant_id: str, user_id: str, @@ -65,48 +170,233 @@ async def add_remote_mcp_server_list( remote_mcp_server_name: str, container_id: str | None = None, authorization_token: str | None = None, + custom_headers: dict | None = None, + source: str | None = "local", + container_port: int | None = None, ): + """Add a remote MCP server to the list. - # check if MCP name already exists + Args: + tenant_id: Tenant ID + user_id: User ID + remote_mcp_server: MCP server URL + remote_mcp_server_name: MCP service name + container_id: Docker container ID (optional) + authorization_token: Authorization token (optional) + custom_headers: Custom HTTP headers (optional) + + Raises: + MCPNameIllegal: If MCP name already exists + MCPConnectionError: If MCP server is not reachable + """ if check_mcp_name_exists(mcp_name=remote_mcp_server_name, tenant_id=tenant_id): - logger.error( - f"MCP name already exists, tenant_id: {tenant_id}, remote_mcp_server_name: {remote_mcp_server_name}") + logger.error(f"MCP name already exists: {remote_mcp_server_name}") raise MCPNameIllegal("MCP name already exists") - # check if the address is available - if not await mcp_server_health(remote_mcp_server=remote_mcp_server, authorization_token=authorization_token): + if not await mcp_server_health(remote_mcp_server=remote_mcp_server, authorization_token=authorization_token, custom_headers=custom_headers): raise MCPConnectionError("MCP connection failed") - # update the PG database record insert_mcp_data = { "mcp_name": remote_mcp_server_name, "mcp_server": remote_mcp_server, "status": True, "container_id": container_id, "authorization_token": authorization_token, + "custom_headers": custom_headers, + "source": source, + "container_port": container_port, } - create_mcp_record(mcp_data=insert_mcp_data, - tenant_id=tenant_id, user_id=user_id) + create_mcp_record(mcp_data=insert_mcp_data, tenant_id=tenant_id, user_id=user_id) -async def delete_remote_mcp_server_list(tenant_id: str, - user_id: str, - remote_mcp_server: str, - remote_mcp_server_name: str): - # delete the record in the PG database - delete_mcp_record_by_name_and_url(mcp_name=remote_mcp_server_name, - mcp_server=remote_mcp_server, - tenant_id=tenant_id, - user_id=user_id) +async def add_mcp_service( + *, + tenant_id: str, + user_id: str, + name: str, + description: str | None, + source: str, + server_url: str, + tags: list | None, + authorization_token: str | None, + custom_headers: dict | None, + container_config: dict | None, + registry_json: dict | None, + enabled: bool = False, + container_id: str | None = None, + container_port: int | None = None, +) -> None: + """Add an MCP service record. + Args: + tenant_id: Tenant ID + user_id: User ID + name: MCP service name + description: MCP service description + source: Source type (local/mcp_registry/community) + server_url: MCP server URL + tags: MCP tags + authorization_token: Authorization token for MCP server + custom_headers: Custom HTTP headers + container_config: Container configuration + registry_json: Registry metadata JSON + enabled: Whether the MCP is enabled + container_id: Docker container ID + container_port: Container port + """ + status: bool | None = None + normalized_container_id = container_id if isinstance(container_id, str) and container_id else None + is_container = container_id is not None or container_config is not None + config_json = container_config if is_container and isinstance(container_config, dict) else None -async def update_remote_mcp_server_list( - update_data, + if enabled: + if check_mcp_name_exists(mcp_name=name, tenant_id=tenant_id): + logger.error(f"MCP name already exists: {name}") + raise MCPNameIllegal("MCP name already exists") + + if not await mcp_server_health(remote_mcp_server=server_url, authorization_token=authorization_token, custom_headers=custom_headers): + raise MCPConnectionError("MCP connection failed") + + status = True + + create_mcp_record( + mcp_data={ + "mcp_name": name, + "mcp_server": server_url, + "status": status, + "container_id": normalized_container_id, + "container_port": container_port, + "authorization_token": authorization_token, + "custom_headers": custom_headers, + "source": source, + "registry_json": registry_json, + "enabled": enabled, + "tags": tags, + "description": description, + "config_json": config_json, + }, + tenant_id=tenant_id, + user_id=user_id, + ) + + +async def add_container_mcp_service( + *, tenant_id: str, user_id: str, -): + name: str, + description: str | None, + source: str, + tags: list | None, + authorization_token: str | None, + registry_json: dict | None, + port: int, + mcp_config: MCPConfigRequest, +) -> dict: + """Add a container-based MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + name: MCP service name + description: MCP service description + source: Source type + tags: MCP tags + authorization_token: Authorization token + registry_json: Registry metadata JSON + port: Host port for the container + mcp_config: MCP server configuration + + Returns: + Container information dictionary """ - Update an existing remote MCP server record. + service_name = name + if check_mcp_name_exists(mcp_name=service_name, tenant_id=tenant_id): + raise McpNameConflictError("Enabled MCP name already exists") + + if not check_container_port_conflict(port=port): + raise McpPortConflictError(f"Port {port} is already in use") + + servers = mcp_config.mcpServers + if len(servers) != 1: + raise McpValidationError("Exactly one mcpServers entry is required") + + _, config = next(iter(servers.items())) + command = config.command + if not command: + raise McpValidationError("command is required") + if command.strip().lower() == "docker": + raise McpValidationError("Docker command is not supported") + + env_vars = dict(config.env or {}) + auth_token = authorization_token + if auth_token: + env_vars["authorization_token"] = auth_token + + full_command = [ + "python", + "-m", + "mcp_proxy", + "--host", + "0.0.0.0", + "--port", + str(port), + "--transport", + "streamablehttp", + "--", + command, + *(config.args or []), + ] + + container_manager = MCPContainerManager() + try: + container_info = await container_manager.start_mcp_container( + service_name=service_name, + tenant_id=tenant_id, + user_id=user_id, + env_vars=env_vars, + host_port=port, + image=NEXENT_MCP_DOCKER_IMAGE, + full_command=full_command, + ) + logger.info(f"Started MCP container with info: {container_info}") + + container_config = mcp_config.model_dump(exclude_none=True) + + await add_mcp_service( + tenant_id=tenant_id, + user_id=user_id, + name=service_name, + description=description, + source=source, + server_url=container_info.get("mcp_url"), + tags=tags, + authorization_token=auth_token, + container_config=container_config, + registry_json=registry_json, + enabled=True, + container_id=container_info.get("container_id"), + container_port=container_info.get("host_port"), + ) + except Exception as exc: + logger.warning(f"Failed to start container MCP service: {exc}") + raise + + return { + "service_name": service_name, + "mcp_url": container_info.get("mcp_url"), + "container_id": container_info.get("container_id"), + "container_name": container_info.get("container_name"), + "host_port": container_info.get("host_port"), + } + + +# --------------------------------------------------------------------------- +# Update Functions +# --------------------------------------------------------------------------- + +async def update_remote_mcp_server_list(update_data, tenant_id: str, user_id: str) -> None: + """Update an existing remote MCP server record. Args: update_data: MCPUpdateRequest containing current and new values @@ -114,40 +404,31 @@ async def update_remote_mcp_server_list( user_id: User ID Raises: - MCPNameIllegal: If the new MCP name already exists (and is different from current) + MCPNameIllegal: If the new MCP name already exists MCPConnectionError: If the new MCP server URL is not accessible """ - # Check if the current record exists by verifying the name exists for this tenant if not check_mcp_name_exists(mcp_name=update_data.current_service_name, tenant_id=tenant_id): - logger.error( - f"MCP name does not exist, tenant_id: {tenant_id}, current_mcp_server_name: {update_data.current_service_name}") raise MCPNameIllegal("MCP name does not exist") - # If the new name is different from the current name, check if it already exists if update_data.new_service_name != update_data.current_service_name: if check_mcp_name_exists(mcp_name=update_data.new_service_name, tenant_id=tenant_id): - logger.error( - f"New MCP name already exists, tenant_id: {tenant_id}, new_mcp_server_name: {update_data.new_service_name}") raise MCPNameIllegal("New MCP name already exists") - # User authorization token authorization_token = update_data.new_authorization_token + custom_headers = getattr(update_data, 'custom_headers', None) - # Check if the new server URL is accessible try: status = await mcp_server_health( remote_mcp_server=update_data.new_mcp_url, - authorization_token=authorization_token + authorization_token=authorization_token, + custom_headers=custom_headers, ) except BaseException: status = False if not status: - logger.error( - f"New MCP server health check failed: {update_data.new_mcp_url}") raise MCPConnectionError("New MCP server connection failed") - # Update the database record update_mcp_record_by_name_and_url( update_data=update_data, tenant_id=tenant_id, @@ -156,7 +437,309 @@ async def update_remote_mcp_server_list( ) -async def get_remote_mcp_server_list(tenant_id: str, user_id: str | None = None, is_need_auth: bool = True) -> list[dict]: +def update_mcp_service( + *, + tenant_id: str, + user_id: str, + mcp_id: int, + new_name: str, + description: str | None, + server_url: str, + authorization_token: str | None, + custom_headers: dict | None, + tags: list | None, +) -> None: + """Update an MCP service record by ID. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + new_name: New MCP service name + description: MCP service description + server_url: New MCP server URL + authorization_token: Authorization token + custom_headers: Custom HTTP headers + tags: MCP tags + + Raises: + McpNotFoundError: If MCP record is not found + """ + current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not current_record: + raise McpNotFoundError("MCP record not found") + + is_container = _is_container_record(current_record) + config_json = None + if is_container: + config_json = current_record.get("config_json") if isinstance(current_record.get("config_json"), dict) else None + + update_mcp_record_manage_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + name=new_name, + description=description, + server_url=server_url, + source=(current_record.get("source") or "local"), + authorization_token=authorization_token, + custom_headers=custom_headers, + config_json=config_json, + tags=tags, + ) + + +async def update_mcp_service_enabled( + *, + tenant_id: str, + user_id: str, + mcp_id: int, + enabled: bool, +) -> None: + """Enable or disable an MCP service. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + enabled: True to enable, False to disable + + Raises: + McpNotFoundError: If MCP record is not found + McpNameConflictError: If an enabled service with the same name exists + McpPortConflictError: If the container port is not available + MCPConnectionError: If MCP connection fails + """ + current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not current_record: + raise McpNotFoundError("MCP record not found") + + if enabled: + current_name = current_record.get("mcp_name") + if current_name: + records = get_mcp_records_by_tenant(tenant_id=tenant_id) + for record in records: + if int(record.get("mcp_id") or 0) == mcp_id: + continue + record_name = record.get("mcp_name") + is_enabled = bool(record.get("enabled")) + if is_enabled and record_name == current_name: + raise McpNameConflictError("An enabled service already uses this name") + + authorization_token = current_record.get("authorization_token") + custom_headers = current_record.get("custom_headers") if isinstance(current_record.get("custom_headers"), dict) else None + + if _is_container_record(current_record): + if enabled: + port = current_record.get("container_port") + if port is None: + raise McpValidationError("Container port is missing, cannot rebuild container") + if not check_runtime_host_port_available(port): + raise McpPortConflictError(f"Port {port} is already in use") + + config_json = current_record.get("config_json") + if not isinstance(config_json, dict): + raise McpValidationError("Container configuration is missing, cannot rebuild container") + + try: + mcp_config = MCPConfigRequest(**config_json) + except Exception as exc: + raise McpValidationError(f"Invalid container configuration: {exc}") + + servers = mcp_config.mcpServers + if not servers or len(servers) != 1: + raise McpValidationError("Exactly one mcpServers entry is required") + _, config = next(iter(servers.items())) + command = config.command + if not command: + raise McpValidationError("command is required") + + env_vars = dict(config.env or {}) + if authorization_token: + env_vars["authorization_token"] = authorization_token + + full_command = [ + "python", + "-m", + "mcp_proxy", + "--host", + "0.0.0.0", + "--port", + str(port), + "--transport", + "streamablehttp", + "--", + command, + *(config.args or []), + ] + + container_manager = MCPContainerManager() + container_info = await container_manager.start_mcp_container( + service_name=current_record.get("mcp_name"), + tenant_id=tenant_id, + user_id=user_id, + env_vars=env_vars, + host_port=port, + image=NEXENT_MCP_DOCKER_IMAGE, + full_command=full_command, + ) + + next_server_url = container_info.get("mcp_url") + next_container_id = container_info.get("container_id") + next_container_port = container_info.get("host_port") or port + + health_ok = False + MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS = 10 + MCP_CONTAINER_HEALTH_CHECK_DELAY_SECONDS = 0.5 + for attempt in range(MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS): + try: + health_ok = await mcp_server_health( + remote_mcp_server=next_server_url, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + except MCPConnectionError: + health_ok = False + if health_ok: + break + if attempt < MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS - 1: + await asyncio.sleep(MCP_CONTAINER_HEALTH_CHECK_DELAY_SECONDS) + + if not health_ok: + if next_container_id: + try: + await MCPContainerManager().stop_mcp_container(next_container_id) + except Exception as exc: + logger.warning(f"Failed to stop unhealthy container {next_container_id}: {exc}") + update_mcp_record_container_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + container_id=None, + container_port=port, + mcp_server=next_server_url, + status=False, + ) + raise MCPConnectionError("MCP connection failed") + + update_mcp_record_container_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + container_id=next_container_id, + container_port=next_container_port, + mcp_server=next_server_url, + status=True, + ) + else: + current_container_id = current_record.get("container_id") + if current_container_id and current_record.get("config_json"): + try: + manager = MCPContainerManager() + await manager.stop_mcp_container(current_container_id) + except Exception as exc: + logger.warning(f"Failed to stop container {current_container_id}: {exc}") + update_mcp_record_container_fields_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + container_id=None, + container_port=current_record.get("container_port"), + mcp_server=current_record.get("mcp_server"), + status=None, + ) + elif enabled: + server_url = current_record.get("mcp_server") + health_ok = await mcp_server_health( + remote_mcp_server=server_url, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + update_mcp_record_status_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + status=bool(health_ok), + ) + if not health_ok: + raise MCPConnectionError("MCP connection failed") + + update_mcp_record_enabled_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + enabled=enabled, + ) + + +# --------------------------------------------------------------------------- +# Delete Functions +# --------------------------------------------------------------------------- + +async def delete_mcp_service( + *, + tenant_id: str, + user_id: str, + mcp_id: int, +) -> None: + """Delete an MCP service by ID. + + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + + Raises: + McpNotFoundError: If MCP record is not found + """ + current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not current_record: + raise McpNotFoundError("MCP record not found") + container_id = current_record.get("container_id") + if container_id: + try: + manager = MCPContainerManager() + await manager.stop_mcp_container(container_id=container_id) + except Exception as exc: + logger.warning(f"Failed to stop container: {exc}, but continue to delete MCP record") + + delete_mcp_record_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + +async def delete_mcp_by_container_id(tenant_id: str, user_id: str, container_id: str) -> None: + """Soft delete MCP record associated with a specific container ID.""" + delete_mcp_record_by_container_id( + container_id=container_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + +# --------------------------------------------------------------------------- +# List Functions +# --------------------------------------------------------------------------- + +async def get_remote_mcp_server_list( + tenant_id: str, + user_id: str | None = None, + is_need_auth: bool = True, +) -> list[dict]: + """Get list of remote MCP servers with full details. + + Args: + tenant_id: Tenant ID + user_id: User ID for permission checking + is_need_auth: Whether to include authorization tokens + + Returns: + List of MCP server records with all fields including container_id, description, + enabled, source, update_time, tags, container_port, registry_json, config_json, + container_status, and authorization_token + """ mcp_records = get_mcp_records_by_tenant(tenant_id=tenant_id) mcp_records_list = [] can_edit_all = False @@ -165,23 +748,60 @@ async def get_remote_mcp_server_list(tenant_id: str, user_id: str | None = None, user_role = str(user_tenant_record.get("user_role") or "").upper() can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES + container_status_map = {} + try: + manager = MCPContainerManager() + for container in manager.list_mcp_containers(tenant_id=tenant_id): + container_id = container.get("container_id") + status = container.get("status") + if not container_id: + continue + if status == "running": + container_status_map[container_id] = "running" + elif status: + container_status_map[container_id] = "stopped" + except Exception as exc: + logger.warning(f"Failed to load container runtime status: {exc}") + for record in mcp_records: created_by = record.get("created_by") or record.get("user_id") if user_id is None: permission = PERMISSION_READ else: - permission = PERMISSION_EDIT if can_edit_all or str( - created_by) == str(user_id) else PERMISSION_READ + permission = PERMISSION_EDIT if can_edit_all or str(created_by) == str(user_id) else PERMISSION_READ + + config_json = record.get("config_json") + container_id = record.get("container_id") + + is_container = container_id is not None or config_json is not None + + container_status = None + if is_container: + if container_id: + container_status = container_status_map.get(container_id, "stopped") + else: + container_status = "stopped" record_dict = { "remote_mcp_server_name": record["mcp_name"], "remote_mcp_server": record["mcp_server"], - "status": record["status"], + "status": record.get("status"), "permission": permission, "mcp_id": record.get("mcp_id"), + "container_id": container_id, + "description": record.get("description"), + "enabled": record.get("enabled"), + "source": record.get("source"), + "update_time": record.get("update_time"), + "tags": record.get("tags") or [], + "container_port": record.get("container_port"), + "registry_json": record.get("registry_json"), + "config_json": record.get("config_json"), + "container_status": container_status, } if is_need_auth: record_dict["authorization_token"] = record.get("authorization_token") + record_dict["custom_headers"] = record.get("custom_headers") mcp_records_list.append(record_dict) return mcp_records_list @@ -192,13 +812,15 @@ def attach_mcp_container_permissions( tenant_id: str, user_id: str | None = None, ) -> list[dict]: - """ - Attach permission (EDIT/READ) to each MCP container entry. + """Attach permission (EDIT/READ) to each MCP container entry. - Rules: - - If user's role is in CAN_EDIT_ALL_USER_ROLES => EDIT for all containers - - Otherwise => EDIT only if the container is associated with an MCP record created by this user - - If association cannot be determined => default to READ + Args: + containers: List of container records + tenant_id: Tenant ID + user_id: User ID for permission checking + + Returns: + List of containers with permission field added """ if not containers: return [] @@ -208,19 +830,17 @@ def attach_mcp_container_permissions( user_role = str(user_tenant_record.get("user_role") or "").upper() can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES - created_by_by_container_id: dict[str, str] = {} + created_by_by_container_id = {} try: for record in get_mcp_records_by_tenant(tenant_id=tenant_id) or []: cid = record.get("container_id") if not cid: continue - created_by_by_container_id[str(cid)] = str( - record.get("created_by") or record.get("user_id") or "" - ) + created_by_by_container_id[str(cid)] = str(record.get("created_by") or record.get("user_id") or "") except Exception as e: logger.warning(f"Failed to load MCP records for permission mapping: {e}") - enriched: list[dict] = [] + enriched = [] for container in containers: container_id = str(container.get("container_id") or "") created_by = created_by_by_container_id.get(container_id, "") @@ -228,77 +848,195 @@ def attach_mcp_container_permissions( if user_id is None: permission = PERMISSION_READ else: - permission = PERMISSION_EDIT if can_edit_all or ( - created_by and str(created_by) == str(user_id) - ) else PERMISSION_READ + permission = PERMISSION_EDIT if can_edit_all or (created_by and str(created_by) == str(user_id)) else PERMISSION_READ enriched.append({**container, "permission": permission}) return enriched -async def check_mcp_health_and_update_db(mcp_url, service_name, tenant_id, user_id): - # Get authorization token from database +async def get_mcp_record_by_id(mcp_id: int, tenant_id: str) -> dict | None: + """Get MCP record by ID. + + Args: + mcp_id: MCP record ID + tenant_id: Tenant ID + + Returns: + Dictionary containing mcp_name, mcp_server, authorization_token, and custom_headers, or None if not found + """ + mcp_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not mcp_record: + return None + + return { + "mcp_name": mcp_record.get("mcp_name"), + "mcp_server": mcp_record.get("mcp_server"), + "authorization_token": mcp_record.get("authorization_token"), + "custom_headers": mcp_record.get("custom_headers"), + } + + +# --------------------------------------------------------------------------- +# Health Check Functions +# --------------------------------------------------------------------------- + +async def check_mcp_health_and_update_db(mcp_url, service_name, tenant_id, user_id) -> None: + """Check MCP health and update database status. + + Args: + mcp_url: MCP server URL + service_name: MCP service name + tenant_id: Tenant ID + user_id: User ID + + Raises: + MCPConnectionError: If MCP connection fails + """ authorization_token = get_mcp_authorization_token_by_name_and_url( mcp_name=service_name, mcp_server=mcp_url, tenant_id=tenant_id ) + custom_headers = get_mcp_custom_headers_by_name_and_url( + mcp_name=service_name, + mcp_server=mcp_url, + tenant_id=tenant_id + ) - # check the health of the MCP server try: status = await mcp_server_health( remote_mcp_server=mcp_url, - authorization_token=authorization_token + authorization_token=authorization_token, + custom_headers=custom_headers, ) except BaseException: status = False - # update the status of the MCP server in the database + update_mcp_status_by_name_and_url( mcp_name=service_name, mcp_server=mcp_url, tenant_id=tenant_id, user_id=user_id, - status=status) + status=status + ) if not status: raise MCPConnectionError("MCP connection failed") -async def delete_mcp_by_container_id(tenant_id: str, user_id: str, container_id: str): - """ - Soft delete MCP record associated with a specific container ID. +async def check_mcp_service_health( + *, + tenant_id: str, + user_id: str, + mcp_id: int, +) -> str: + """Check MCP service health by ID. - This is used when stopping a containerized MCP so that the MCP record and - its container are removed together. + Args: + tenant_id: Tenant ID + user_id: User ID + mcp_id: MCP record ID + + Returns: + "healthy" if MCP is reachable + + Raises: + McpNotFoundError: If MCP record is not found + McpValidationError: If MCP server URL is empty + MCPConnectionError: If MCP connection fails """ - delete_mcp_record_by_container_id( - container_id=container_id, + record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not record: + raise McpNotFoundError("MCP record not found") + + server_url = record.get("mcp_server") + if not server_url: + raise McpValidationError("MCP server URL is empty") + + authorization_token = record.get("authorization_token") + custom_headers = record.get("custom_headers") + + try: + status = await mcp_server_health( + remote_mcp_server=server_url, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + except MCPConnectionError: + update_mcp_record_status_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + status=False, + ) + raise + except Exception as exc: + logger.error(f"MCP health check failed: {exc}") + update_mcp_record_status_by_id( + mcp_id=mcp_id, + tenant_id=tenant_id, + user_id=user_id, + status=False, + ) + raise MCPConnectionError(str(exc) or "MCP connection failed") + + update_mcp_record_status_by_id( + mcp_id=mcp_id, tenant_id=tenant_id, user_id=user_id, + status=status, ) + if not status: + raise MCPConnectionError("MCP connection failed") + + return "healthy" -async def get_mcp_record_by_id(mcp_id: int, tenant_id: str) -> dict | None: - """ - Get MCP record by ID + +# --------------------------------------------------------------------------- +# Tool Functions +# --------------------------------------------------------------------------- + +async def list_mcp_service_tools_by_id(*, tenant_id: str, mcp_id: int) -> list[dict]: + """Get tools from an MCP service by ID. Args: - mcp_id: MCP record ID tenant_id: Tenant ID + mcp_id: MCP record ID Returns: - Dictionary containing mcp_name, mcp_server, and authorization_token, or None if not found + List of tool dictionaries + + Raises: + McpNotFoundError: If MCP record is not found + McpValidationError: If MCP record is missing connection fields + MCPConnectionError: If MCP connection fails """ - mcp_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) - if not mcp_record: - return None + record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id) + if not record: + raise McpNotFoundError("MCP record not found") - return { - "mcp_name": mcp_record.get("mcp_name"), - "mcp_server": mcp_record.get("mcp_server"), - "authorization_token": mcp_record.get("authorization_token"), - } + service_name = record.get("mcp_name") + server_url = record.get("mcp_server") + if not service_name or not server_url: + raise McpValidationError("MCP record is missing runtime connection fields") + authorization_token = record.get("authorization_token") + custom_headers = record.get("custom_headers") + + tools_info = await get_tool_from_remote_mcp_server( + mcp_server_name=service_name, + remote_mcp_server=server_url, + tenant_id=tenant_id, + authorization_token=authorization_token, + custom_headers=custom_headers, + ) + return [tool.__dict__ for tool in tools_info] + + +# --------------------------------------------------------------------------- +# Image Upload Functions +# --------------------------------------------------------------------------- async def upload_and_start_mcp_image( tenant_id: str, @@ -308,69 +1046,56 @@ async def upload_and_start_mcp_image( port: int, service_name: str | None = None, env_vars: str | None = None, -): - """ - Upload MCP Docker image and start container. +) -> dict: + """Upload MCP Docker image and start container. Args: - tenant_id: Tenant ID for isolation - user_id: User ID for isolation + tenant_id: Tenant ID + user_id: User ID file_content: Raw file content bytes filename: Original filename port: Host port to expose the MCP server on - service_name: Optional name for the MCP service (auto-generated if not provided) + service_name: Optional name for the MCP service env_vars: Optional environment variables as JSON string Returns: - Dictionary with service details including mcp_url, container_id, etc. + Dictionary with service details Raises: MCPContainerError: If container operations fail MCPNameIllegal: If service name already exists ValueError: If file validation fails """ - # Validate file type if not filename.lower().endswith('.tar'): raise ValueError("Only .tar files are allowed") - # Validate file size (limit to 1GB) file_size = len(file_content) - if file_size > 1024 * 1024 * 1024: # 1GB limit + if file_size > 1024 * 1024 * 1024: raise ValueError("File size exceeds 1GB limit") - # Parse environment variables parsed_env_vars = None if env_vars: + import json try: - import json parsed_env_vars = json.loads(env_vars) if not isinstance(parsed_env_vars, dict): raise ValueError("Environment variables must be a JSON object") except (json.JSONDecodeError, ValueError) as e: raise ValueError(f"Invalid environment variables format: {str(e)}") - # Generate service name if not provided final_service_name = service_name if not final_service_name: - # Remove .tar extension from filename final_service_name = os.path.splitext(filename)[0] - # Check if MCP service name already exists if check_mcp_name_exists(mcp_name=final_service_name, tenant_id=tenant_id): raise MCPNameIllegal("MCP service name already exists") - # Save file to temporary location (delete=False, manual cleanup) with tempfile.NamedTemporaryFile(delete=False, suffix='.tar') as temp_file: temp_file.write(file_content) temp_file_path = temp_file.name try: - # Initialize container manager container_manager = MCPContainerManager() - - # Start container from uploaded image - # Note: uploaded image should be a complete MCP server implementation - # that can be started directly without additional commands (uses image's CMD/ENTRYPOINT) container_info = await container_manager.start_mcp_container_from_tar( tar_file_path=temp_file_path, service_name=final_service_name, @@ -378,22 +1103,18 @@ async def upload_and_start_mcp_image( user_id=user_id, env_vars=parsed_env_vars, host_port=port, - full_command=None, # Uploaded image should contain the MCP server + full_command=None, ) finally: - # Manual cleanup of temporary file try: os.unlink(temp_file_path) except Exception as e: - logger.warning( - f"Failed to clean up temporary file {temp_file_path}: {e}") + logger.warning(f"Failed to clean up temporary file {temp_file_path}: {e}") - # Extract authorization_token from env_vars for database registration authorization_token = None if parsed_env_vars: authorization_token = parsed_env_vars.get("authorization_token") - # Register to remote MCP server list await add_remote_mcp_server_list( tenant_id=tenant_id, user_id=user_id, @@ -401,6 +1122,7 @@ async def upload_and_start_mcp_image( remote_mcp_server_name=final_service_name, container_id=container_info["container_id"], authorization_token=authorization_token, + container_port=port ) return { diff --git a/backend/services/skill_service.py b/backend/services/skill_service.py index 1cccd31d6..f5b7d1c7c 100644 --- a/backend/services/skill_service.py +++ b/backend/services/skill_service.py @@ -1,11 +1,17 @@ """Skill management service.""" +import aiofiles +import argparse +import ast import asyncio -import uuid +import inspect import io import json import logging import os +import uuid +import zipfile +import re import threading from typing import Any, Dict, List, Optional, Tuple, Union @@ -15,7 +21,7 @@ from nexent.skills.skill_loader import SkillLoader from nexent.core.utils.observer import MessageObserver from nexent.core.agents.agent_model import ModelConfig -from consts.const import CONTAINER_SKILLS_PATH, ROOT_DIR +from consts.const import CONTAINER_SKILLS_PATH, OFFICIAL_SKILLS_ZIP_PATH, ROOT_DIR from consts.exceptions import SkillException from database import skill_db from agents.skill_creation_agent import create_skill_from_request @@ -253,6 +259,51 @@ def _commented_tree_to_plain(node: Any) -> Any: return node +def _ruamel_tree_to_plain(node: Any) -> Any: + """Convert ruamel CommentedMap/Seq to plain dict/list with NO comment merging. + + Used for parsing config.yaml into config_values where the value must be clean + (e.g. ``/mnt/nexent`` not ``/mnt/nexent # Initial workspace path``). + """ + from ruamel.yaml.comments import CommentedMap, CommentedSeq + + if isinstance(node, CommentedMap): + return {k: _ruamel_tree_to_plain(v) for k, v in node.items()} + if isinstance(node, CommentedSeq): + return [_ruamel_tree_to_plain(v) for v in node] + return node + + +def _parse_yaml_ruamel_plain(text: str) -> Dict[str, Any]: + """Parse YAML with ruamel round-trip and return plain dict (no comment merging). + + Used for ``config.yaml`` → ``config_values`` where scalar values must be clean. + """ + from ruamel.yaml import YAML + from ruamel.yaml.comments import CommentedMap + + y = YAML(typ="rt") + try: + root = y.load(text) + except Exception as exc: + raise SkillException(f"Invalid YAML in config/config.yaml: {exc}") from exc + if root is None: + return {} + if isinstance(root, CommentedMap): + plain = _ruamel_tree_to_plain(root) + elif isinstance(root, dict): + plain = root + else: + raise SkillException( + "config/config.yaml must contain a JSON or YAML object (mapping), not a list or scalar" + ) + if not isinstance(plain, dict): + raise SkillException( + "config/config.yaml must contain a JSON or YAML object (mapping), not a list or scalar" + ) + return _params_dict_to_storable(plain) + + def _parse_yaml_with_ruamel_merge_eol_comments(text: str) -> Dict[str, Any]: """Parse YAML with ruamel; merge ``#`` into scalar values only (``value # tip`` for the UI). @@ -286,6 +337,189 @@ def _parse_yaml_with_ruamel_merge_eol_comments(text: str) -> Dict[str, Any]: return _params_dict_to_storable(plain) +def _get_skill_inputs_from_code(scripts_dir: str) -> List[Dict[str, Any]]: + """Extract argparse parameters from skill scripts using AST analysis. + + Walks every ``scripts/*.py`` file (skipping ``_*.py``) and uses AST to find + all ``parser.add_argument(...)`` calls anywhere in the file, including inside + function bodies and ``if __name__ == "__main__":`` blocks. + + Mirrors ``get_local_tools()`` in tool_configuration_service.py. + + Args: + scripts_dir: Absolute path to the skill's ``scripts/`` directory. + + Returns: + List of input parameter dicts with name, type, required, description, default. + """ + inputs: List[Dict[str, Any]] = [] + seen_names: set = set() + + if not os.path.isdir(scripts_dir): + return inputs + + for filename in os.listdir(scripts_dir): + if not filename.endswith(".py") or filename.startswith("_"): + continue + + script_path = os.path.join(scripts_dir, filename) + try: + source = open(script_path, "r", encoding="utf-8").read() + except (OSError, IOError): + continue + + try: + tree = ast.parse(source, filename=filename) + except SyntaxError: + continue + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if not _is_add_argument_call(node): + continue + + parsed = _extract_arg_from_add_argument(node) + if not parsed: + continue + + param_name = parsed["name"] + if param_name in ("help", "h") or param_name in seen_names: + continue + seen_names.add(param_name) + + inputs.append({ + "name": param_name, + "type": parsed["type"], + "required": parsed["required"], + "description_en": parsed.get("description_en", ""), + }) + + return inputs + + +def _is_add_argument_call(node: ast.Call) -> bool: + """Return True if node is a call to ``.add_argument(...)``.""" + if not isinstance(node.func, ast.Attribute): + return False + if node.func.attr != "add_argument": + return False + if isinstance(node.func.value, ast.Name) and node.func.value.id == "parser": + return True + if isinstance(node.func.value, ast.Attribute): + return True + return False + + +def _extract_arg_from_add_argument(node: ast.Call) -> Optional[Dict[str, Any]]: + """Extract parameter metadata from an ``add_argument`` Call AST node.""" + args = node.args + kwargs = {kw.arg: kw.value for kw in node.keywords} + + # Positional arg 0 = name or first positional arg (--name / name) + name_node = args[0] if args else kwargs.get("name") + if name_node is None: + return None + param_name = _ast_literal_eval(name_node) + if not param_name or not isinstance(param_name, str): + return None + + # --name style + if param_name.startswith("--"): + param_name = param_name[2:] + elif param_name.startswith("-"): + param_name = param_name[1:] + + # Determine type + param_type = "string" + type_node = kwargs.get("type") + if type_node is not None: + type_name = _get_type_name(type_node) + if type_name in ("int", "integer"): + param_type = "number" + elif type_name in ("float",): + param_type = "number" + elif type_name in ("bool",): + param_type = "boolean" + + # Description + help_node = kwargs.get("help") + description = "" + if help_node is not None: + val = _ast_literal_eval(help_node) + if isinstance(val, str): + description = val + + # Required / default + required = False + default: Any = None + + if kwargs.get("required") is not None: + req_val = _ast_literal_eval(kwargs["required"]) + if req_val is True: + required = True + + default_node = kwargs.get("default") + if default_node is not None: + default = _ast_literal_eval(default_node) + if default is None or (isinstance(default, str) and default == ""): + required = False + elif not required: + required = False + + return { + "name": param_name, + "type": param_type, + "required": required, + "description_en": description, + } + + +def _get_type_name(node: ast.AST) -> str: + """Get the type name string from a type-related AST node.""" + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + return node.attr + if isinstance(node, ast.Call) and isinstance(node.func, ast.Name): + return node.func.id + if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute): + return node.func.attr + return "" + + +def _ast_literal_eval(node: ast.AST) -> Any: + """Safely evaluate a literal AST node (Name, Constant, Str, Num, etc.) to a Python value.""" + if isinstance(node, (ast.Constant, ast.Num)): + return getattr(node, "value", None) + if isinstance(node, ast.Str): # Python < 3.8 compat + return node.s + if isinstance(node, ast.Name): + name = node.id + if name == "None": + return None + if name == "True": + return True + if name == "False": + return False + return name + if isinstance(node, (ast.List, ast.Tuple)): + elts = [_ast_literal_eval(e) for e in node.elts] + return list(elts) if isinstance(node, ast.List) else tuple(elts) + if isinstance(node, ast.Dict): + return {_ast_literal_eval(k): _ast_literal_eval(v) for k, v in node.keys} + if isinstance(node, ast.UnaryOp) and isinstance(node.op, (ast.UAdd, ast.USub)): + val = _ast_literal_eval(node.operand) + if isinstance(val, (int, float)): + return -val if isinstance(node.op, ast.USub) else val + if isinstance(node, ast.BinOp): + left = _ast_literal_eval(node.left) + right = _ast_literal_eval(node.right) + if isinstance(left, str) and isinstance(right, str) and isinstance(node.op, ast.Add): + return left + right + return None + + def _parse_yaml_fallback_pyyaml(text: str) -> Dict[str, Any]: """Parse YAML with PyYAML (comments are dropped).""" try: @@ -312,7 +546,7 @@ def _parse_skill_params_from_config_bytes(raw: bytes) -> Dict[str, Any]: data = json.loads(text) except json.JSONDecodeError: try: - return _parse_yaml_with_ruamel_merge_eol_comments(text) + return _parse_yaml_ruamel_plain(text) except ImportError: logger.warning("ruamel.yaml not installed; YAML comments will be dropped on parse") return _parse_yaml_fallback_pyyaml(text) @@ -332,6 +566,66 @@ def _parse_skill_params_from_config_bytes(raw: bytes) -> Dict[str, Any]: return _params_dict_to_storable(data) +def _parse_skill_schema_from_yaml_bytes(raw: bytes) -> List[Dict[str, Any]]: + """Parse config/schema.yaml bytes into List[SkillParam]. + + Expected YAML structure: + param_name: + type: string | number | boolean | array | object + required: true | false + description_en: "English description" + description_zh: "Chinese description" + depends_on: other_param_name + + Returns a list of param dicts with name, type, required, description_en, + description_zh, depends_on — matching frontend SkillParam interface. + """ + text = raw.decode("utf-8-sig").strip() + if not text: + logger.warning("[schema] Empty raw bytes for schema.yaml") + return [] + data: Any = None + parse_method = "unknown" + try: + data = json.loads(text) + parse_method = "json" + except json.JSONDecodeError: + try: + data = _parse_yaml_with_ruamel_merge_eol_comments(text) + parse_method = "ruamel" + except ImportError: + data = _parse_yaml_fallback_pyyaml(text) + parse_method = "pyyaml" + except SkillException: + raise + except Exception: + try: + data = _parse_yaml_fallback_pyyaml(text) + parse_method = "pyyaml" + except Exception as exc: + logger.warning("[schema] All YAML parsers failed: %s", exc) + return [] + + if not isinstance(data, dict): + logger.warning("[schema] Parsed data is not a dict (type=%s, parse_method=%s)", type(data).__name__, parse_method) + return [] + + result: List[Dict[str, Any]] = [] + for param_name, meta in data.items(): + if not isinstance(meta, dict): + logger.debug("[schema] Skipping param '%s': meta is not a dict (%s)", param_name, type(meta).__name__) + continue + result.append({ + "name": param_name, + "type": meta.get("type", "string"), + "required": bool(meta.get("required", False)), + "description_en": meta.get("description_en", meta.get("description", "")), + "description_zh": meta.get("description_zh", ""), + "depends_on": meta.get("depends_on"), + }) + return result + + def _read_params_from_zip_config_yaml( zip_bytes: bytes, preferred_skill_root: Optional[str] = None, @@ -353,11 +647,127 @@ def _read_params_from_zip_config_yaml( return params +def _find_zip_member_schema_yaml( + file_list: List[str], + preferred_skill_root: Optional[str] = None, +) -> Optional[str]: + """Return the ZIP entry path for .../config/schema.yaml (any depth; case-insensitive).""" + for entry in file_list: + norm = _normalize_zip_entry_path(entry) + # Match .../config/schema.yaml at any depth + parts = norm.split("/") + if len(parts) >= 2 and parts[-2] == "config" and parts[-1] == "schema.yaml": + logger.debug("[schema] Found schema.yaml via config/ prefix match: %s", entry) + return entry + # Fallback: if preferred_root is given, also check /config/schema.yaml + if preferred_skill_root and norm == f"{preferred_skill_root}/config/schema.yaml": + logger.debug("[schema] Found schema.yaml via preferred_root match: %s", entry) + return entry + logger.debug("[schema] No schema.yaml found in ZIP entries (preferred_root=%s, entry_count=%d)", preferred_skill_root, len(file_list)) + return None + + +def _read_schema_yaml_from_zip( + zip_bytes: bytes, + preferred_skill_root: Optional[str] = None, +) -> Optional[List[Dict[str, Any]]]: + """If the archive contains config/schema.yaml, parse it into List[SkillParam]; else None.""" + import zipfile + + zip_stream = io.BytesIO(zip_bytes) + with zipfile.ZipFile(zip_stream, "r") as zf: + member = _find_zip_member_schema_yaml( + zf.namelist(), + preferred_skill_root=preferred_skill_root, + ) + if not member: + return None + raw = zf.read(member) + parsed = _parse_skill_schema_from_yaml_bytes(raw) + if not parsed: + logger.debug("[schema] Parsed result is empty from ZIP member %s", member) + return parsed + + +def _get_skill_inputs_from_zip( + zip_bytes: bytes, + preferred_skill_root: Optional[str] = None, +) -> List[Dict[str, Any]]: + """Extract argparse parameters from scripts/*.py inside a ZIP archive. + + Mirrors ``_get_skill_inputs_from_code`` but reads from ZIP bytes instead of filesystem. + + Args: + zip_bytes: ZIP archive content. + preferred_skill_root: Preferred folder name inside ZIP containing scripts/. + + Returns: + List of input parameter dicts with name, type, required, description, default. + """ + zip_stream = io.BytesIO(zip_bytes) + inputs: List[Dict[str, Any]] = [] + seen_names: set = set() + + try: + with zipfile.ZipFile(zip_stream, "r") as zf: + file_list = zf.namelist() + scripts_root = preferred_skill_root or "" + + for member in file_list: + normalized = member.replace("\\", "/").strip() + if not normalized.endswith(".py") or "/_" in normalized or normalized.endswith("/_"): + continue + if not normalized.startswith(scripts_root + "/scripts/"): + if scripts_root: + continue + parts = normalized.split("/") + if len(parts) < 2 or parts[-2] != "scripts": + continue + + try: + source = zf.read(member).decode("utf-8") + except (OSError, UnicodeDecodeError): + continue + + try: + tree = ast.parse(source, filename=member) + except SyntaxError: + continue + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if not _is_add_argument_call(node): + continue + parsed = _extract_arg_from_add_argument(node) + if not parsed: + continue + param_name = parsed["name"] + if param_name in ("help", "h") or param_name in seen_names: + continue + seen_names.add(param_name) + inputs.append({ + "name": param_name, + "type": parsed["type"], + "required": parsed["required"], + "description_en": parsed.get("description_en", ""), + }) + except zipfile.BadZipFile: + return inputs + + return inputs + + def _local_skill_config_yaml_path(skill_name: str, local_skills_dir: str) -> str: """Absolute path to //config/config.yaml.""" return os.path.join(local_skills_dir, skill_name, "config", "config.yaml") +def _local_skill_schema_yaml_path(skill_name: str, local_skills_dir: str) -> str: + """Absolute path to //config/schema.yaml.""" + return os.path.join(local_skills_dir, skill_name, "config", "schema.yaml") + + def _write_skill_params_to_local_config_yaml( skill_name: str, params: Dict[str, Any], @@ -387,24 +797,28 @@ def _remove_local_skill_config_yaml(skill_name: str, local_skills_dir: str) -> N logger.info("Removed %s (params cleared in DB)", path) -def get_skill_manager() -> SkillManager: - """Get or create the global SkillManager instance.""" - global _skill_manager - if _skill_manager is None: - _skill_manager = SkillManager(CONTAINER_SKILLS_PATH) - return _skill_manager +def get_skill_manager(tenant_id: Optional[str] = None) -> SkillManager: + """Create a SkillManager instance with optional tenant-based directory isolation. + + Args: + tenant_id: Tenant ID for directory isolation. When provided, skills + are stored under CONTAINER_SKILLS_PATH / tenant_id / + """ + return SkillManager(base_skills_dir=CONTAINER_SKILLS_PATH, tenant_id=tenant_id) class SkillService: """Skill management service for backend operations.""" - def __init__(self, skill_manager: Optional[SkillManager] = None): + def __init__(self, skill_manager: Optional[SkillManager] = None, tenant_id: Optional[str] = None): """Initialize SkillService. Args: - skill_manager: Optional SkillManager instance, uses global if not provided + skill_manager: Optional SkillManager instance, uses tenant-aware global if not provided + tenant_id: Tenant ID for skill isolation. Required when no skill_manager is provided. """ - self.skill_manager = skill_manager or get_skill_manager() + self.tenant_id = tenant_id + self.skill_manager = skill_manager or get_skill_manager(tenant_id) def _resolve_local_skills_dir_for_overlay(self) -> Optional[str]: """Directory where skill folders live: ``SKILLS_PATH``, else ``ROOT_DIR/skills`` if present.""" @@ -417,12 +831,15 @@ def _resolve_local_skills_dir_for_overlay(self) -> Optional[str]: return candidate return None - def _overlay_params_from_local_config_yaml(self, skill: Dict[str, Any]) -> Dict[str, Any]: - """Prefer ``//config/config.yaml`` for ``params`` in API responses. + def _enrich_configs_from_yaml(self, skill: Dict[str, Any]) -> Dict[str, Any]: + """Read local config files and overlay onto skill. - The database stores comment-free JSON (no legacy ``_comment`` keys, no `` # `` suffixes). - On-disk YAML may use ``#`` lines; when the file exists, parse with ruamel (inline tips - on scalars only) and use for ``params``; otherwise use DB. + config/config.yaml → config_values (runtime defaults dict) + config/schema.yaml → config_schemas (parameter metadata list) + + If a file does not exist, the corresponding DB key is removed so the + response never contains stale data (e.g. {"configs": null} instead of + the old DB value). """ out = dict(skill) local_dir = self._resolve_local_skills_dir_for_overlay() @@ -431,70 +848,89 @@ def _overlay_params_from_local_config_yaml(self, skill: Dict[str, Any]) -> Dict[ name = out.get("name") if not name: return out - path = _local_skill_config_yaml_path(name, local_dir) - if not os.path.isfile(path): - return out - try: - with open(path, "rb") as f: - raw = f.read() - out["params"] = _parse_skill_params_from_config_bytes(raw) - logger.info("Using local config.yaml params (scalar inline comment tooltips) for skill %s", name) - except Exception as exc: - logger.warning( - "Could not use local config.yaml for skill %s params (using DB): %s", - name, - exc, - ) + config_path = _local_skill_config_yaml_path(name, local_dir) + if os.path.isfile(config_path): + try: + with open(config_path, "rb") as f: + raw = f.read() + out["config_values"] = _parse_skill_params_from_config_bytes(raw) + except Exception as exc: + logger.warning("Could not parse local config.yaml for skill %s: %s", name, exc) + else: + out.pop("config_values", None) + # schema.yaml takes precedence over DB config_schemas + schema_path = _local_skill_schema_yaml_path(name, local_dir) + if os.path.isfile(schema_path): + try: + with open(schema_path, "rb") as f: + raw = f.read() + parsed = _parse_skill_schema_from_yaml_bytes(raw) + out["config_schemas"] = parsed + except Exception as exc: + logger.warning("Could not parse local schema.yaml for skill %s: %s", name, exc) + else: + out.pop("config_schemas", None) return out def list_skills(self, tenant_id: Optional[str] = None) -> List[Dict[str, Any]]: - """List all skills for tenant. + """List all skills for a tenant. Args: - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for filtering skills. Uses instance tenant_id if not provided. Returns: List of skill info dicts """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - skills = skill_db.list_skills() - return [self._overlay_params_from_local_config_yaml(s) for s in skills] + skills = skill_db.list_skills(effective_tenant_id) + enriched = [self._enrich_configs_from_yaml(s) for s in skills] + return enriched except Exception as e: logger.error(f"Error listing skills: {e}") raise SkillException(f"Failed to list skills: {str(e)}") from e def get_skill(self, skill_name: str, tenant_id: Optional[str] = None) -> Optional[Dict[str, Any]]: - """Get a specific skill. + """Get a specific skill within a tenant. Args: skill_name: Name of the skill - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided. Returns: Skill dict or None if not found """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - skill = skill_db.get_skill_by_name(skill_name) + skill = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if skill: - return self._overlay_params_from_local_config_yaml(skill) + return self._enrich_configs_from_yaml(skill) return None except Exception as e: logger.error(f"Error getting skill {skill_name}: {e}") raise SkillException(f"Failed to get skill: {str(e)}") from e - def get_skill_by_id(self, skill_id: int) -> Optional[Dict[str, Any]]: - """Get a specific skill by ID. + def get_skill_by_id(self, skill_id: int, tenant_id: Optional[str] = None) -> Optional[Dict[str, Any]]: + """Get a specific skill by ID within a tenant. Args: skill_id: ID of the skill + tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided. Returns: Skill dict or None if not found """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - skill = skill_db.get_skill_by_id(skill_id) + skill = skill_db.get_skill_by_id(skill_id, effective_tenant_id) if skill: - return self._overlay_params_from_local_config_yaml(skill) + return self._enrich_configs_from_yaml(skill) return None except Exception as e: logger.error(f"Error getting skill by ID {skill_id}: {e}") @@ -506,11 +942,11 @@ def create_skill( tenant_id: Optional[str] = None, user_id: Optional[str] = None ) -> Dict[str, Any]: - """Create a new skill. + """Create a new skill for a tenant. Args: skill_data: Skill data including name, description, content, etc. - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: User ID of the creator Returns: @@ -519,12 +955,16 @@ def create_skill( Raises: SkillException: If skill already exists locally or in database (409) """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") + skill_name = skill_data.get("name") if not skill_name: raise SkillException("Skill name is required") # Check if skill already exists in database - existing = skill_db.get_skill_by_name(skill_name) + existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if existing: raise SkillException(f"Skill '{skill_name}' already exists") @@ -540,17 +980,17 @@ def create_skill( try: # Create database record first - result = skill_db.create_skill(skill_data) + result = skill_db.create_skill(skill_data, effective_tenant_id) # Create local skill file (SKILL.md) self.skill_manager.save_skill(skill_data) - # Mirror DB params to config/config.yaml when present (same layout as ZIP uploads). - if self.skill_manager.local_skills_dir and skill_data.get("params") is not None: + # Mirror DB config_schemas to config/config.yaml when present (same layout as ZIP uploads). + if self.skill_manager.base_skills_dir and skill_data.get("config_schemas") is not None: try: _write_skill_params_to_local_config_yaml( skill_name, - _params_dict_to_storable(skill_data["params"]), + _params_dict_to_storable(skill_data["config_schemas"]), self.skill_manager.local_skills_dir, ) except Exception as exc: @@ -561,7 +1001,7 @@ def create_skill( ) logger.info(f"Created skill '{skill_name}' with local files") - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) except SkillException: raise except Exception as e: @@ -588,12 +1028,13 @@ def create_skill_from_file( skill_name: Optional skill name (extracted from ZIP if not provided) file_type: File type hint - "md", "zip", or "auto" (detect) source: Source identifier for the skill (e.g., "自定义", "官方", "导入") - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: User ID of the creator Returns: Created skill dict """ + effective_tenant_id = tenant_id or self.tenant_id content_bytes: bytes if isinstance(file_content, str): content_bytes = file_content.encode("utf-8") @@ -609,9 +1050,9 @@ def create_skill_from_file( file_type = "md" if file_type == "zip": - return self._create_skill_from_zip(content_bytes, skill_name, source, user_id, tenant_id) + return self._create_skill_from_zip(content_bytes, skill_name, source, user_id, effective_tenant_id) else: - return self._create_skill_from_md(content_bytes, skill_name, source, user_id, tenant_id) + return self._create_skill_from_md(content_bytes, skill_name, source, user_id, effective_tenant_id) def _create_skill_from_md( self, @@ -634,7 +1075,7 @@ def _create_skill_from_md( raise SkillException("Skill name is required") # Check if skill already exists in database - existing = skill_db.get_skill_by_name(name) + existing = skill_db.get_skill_by_name(name, tenant_id) if existing: raise SkillException(f"Skill '{name}' already exists") @@ -653,18 +1094,20 @@ def _create_skill_from_md( "tool_ids": tool_ids, "allowed-tools": allowed_tools, # Preserve for local file sync } + # Note: scripts/ reflection is only possible for ZIP uploads (scripts exist in ZIP bytes). + # For MD-only uploads there are no scripts to reflect at create time. # Set created_by and updated_by if user_id is provided if user_id: skill_dict["created_by"] = user_id skill_dict["updated_by"] = user_id - result = skill_db.create_skill(skill_dict) + result = skill_db.create_skill(skill_dict, tenant_id) # Write SKILL.md to local storage self.skill_manager.save_skill(skill_dict) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def _create_skill_from_zip( self, @@ -727,7 +1170,7 @@ def _create_skill_from_zip( raise SkillException("Skill name is required") # Check if skill already exists in database - existing = skill_db.get_skill_by_name(name) + existing = skill_db.get_skill_by_name(name, tenant_id) if existing: raise SkillException(f"Skill '{name}' already exists") @@ -763,26 +1206,40 @@ def _create_skill_from_zip( } preferred_root = detected_skill_name or name + + # Priority: schema.yaml (list metadata) > scripts AST (list) > config.yaml (dict defaults) + schema_from_zip = _read_schema_yaml_from_zip(zip_bytes, preferred_root) + inputs_from_scripts = _get_skill_inputs_from_zip( + zip_bytes, + preferred_skill_root=preferred_root, + ) params_from_zip = _read_params_from_zip_config_yaml( zip_bytes, preferred_skill_root=preferred_root, ) + + if schema_from_zip: + skill_dict["config_schemas"] = schema_from_zip + elif inputs_from_scripts: + skill_dict["config_schemas"] = inputs_from_scripts + + # config.yaml always goes into config_values (runtime defaults dict) if params_from_zip is not None: - skill_dict["params"] = params_from_zip + skill_dict["config_values"] = params_from_zip # Set created_by and updated_by if user_id is provided if user_id: skill_dict["created_by"] = user_id skill_dict["updated_by"] = user_id - result = skill_db.create_skill(skill_dict) + result = skill_db.create_skill(skill_dict, tenant_id) # Save SKILL.md to local storage self.skill_manager.save_skill(skill_dict) self._upload_zip_files(zip_bytes, name, detected_skill_name) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def _delete_local_skill_files(self, skill_name: str) -> None: """Delete all files within a skill's local directory, preserving the directory itself. @@ -833,20 +1290,34 @@ def _upload_zip_files( zip_stream = io.BytesIO(zip_bytes) - # Determine if folder renaming is needed + try: + with zipfile.ZipFile(zip_stream, "r") as zf: + file_list = zf.namelist() + except zipfile.BadZipFile: + raise SkillException("Invalid ZIP archive") + + # Determine if this ZIP has a subdirectory structure or root-level structure. + # Root-level: SKILL.md is at root (e.g., "SKILL.md", "script/analyze.py") -> no stripping + # Subdirectory: SKILL.md is inside a folder (e.g., "my-skill/SKILL.md") -> strip folder prefix needs_rename = ( original_folder_name is not None and original_folder_name != skill_name ) + has_root_skill_md = any( + not fp.endswith("/") + and fp.replace("\\", "/").split("/")[0].lower() == "skill.md" + for fp in file_list + ) + logger.info( - "Starting ZIP extraction for skill '%s': needs_rename=%s, original_folder='%s'", - skill_name, needs_rename, original_folder_name + "Starting ZIP extraction for skill '%s': needs_rename=%s, original_folder='%s', has_root_skill_md=%s", + skill_name, needs_rename, original_folder_name, has_root_skill_md ) + zip_stream.seek(0) try: with zipfile.ZipFile(zip_stream, "r") as zf: - file_list = zf.namelist() logger.info("ZIP contains %d entries for skill '%s'", len(file_list), skill_name) extracted_count = 0 @@ -858,10 +1329,12 @@ def _upload_zip_files( parts = normalized_path.split("/") # Calculate target relative path + # Only strip the first component when the ZIP has a subdirectory structure + # (SKILL.md is inside a folder, not at root level) if needs_rename and len(parts) >= 2 and parts[0] == original_folder_name: - # Replace original folder name with skill_name relative_path = parts[0].replace(original_folder_name, skill_name) + "/" + "/".join(parts[1:]) - elif len(parts) >= 2: + elif len(parts) >= 2 and not has_root_skill_md: + # Strip first component (ZIP has subdirectory structure without root SKILL.md) relative_path = "/".join(parts[1:]) else: relative_path = normalized_path @@ -908,7 +1381,10 @@ def update_skill_from_file( Returns: Updated skill dict """ - existing = skill_db.get_skill_by_name(skill_name) + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") + existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if not existing: raise SkillException(f"Skill not found: {skill_name}") @@ -927,9 +1403,9 @@ def update_skill_from_file( file_type = "md" if file_type == "zip": - return self._update_skill_from_zip(content_bytes, skill_name, user_id, tenant_id) + return self._update_skill_from_zip(content_bytes, skill_name, user_id, effective_tenant_id) else: - return self._update_skill_from_md(content_bytes, skill_name, user_id, tenant_id) + return self._update_skill_from_md(content_bytes, skill_name, user_id, effective_tenant_id) def _update_skill_from_md( self, @@ -960,7 +1436,7 @@ def _update_skill_from_md( } result = skill_db.update_skill( - skill_name, skill_dict, updated_by=user_id or None + skill_name, skill_dict, tenant_id, updated_by=user_id or None ) # Clean up existing local files before writing new ones @@ -971,7 +1447,7 @@ def _update_skill_from_md( skill_dict["allowed-tools"] = allowed_tools self.skill_manager.save_skill(skill_dict) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def _update_skill_from_zip( self, @@ -981,7 +1457,7 @@ def _update_skill_from_zip( tenant_id: Optional[str] = None, ) -> Dict[str, Any]: """Update skill from ZIP archive.""" - existing = skill_db.get_skill_by_name(skill_name) + existing = skill_db.get_skill_by_name(skill_name, tenant_id) if not existing: raise SkillException(f"Skill not found: {skill_name}") @@ -1037,10 +1513,10 @@ def _update_skill_from_zip( logger.warning(f"Could not parse SKILL.md from ZIP: {e}") if params_from_zip is not None: - skill_dict["params"] = params_from_zip + skill_dict["config_values"] = params_from_zip result = skill_db.update_skill( - skill_name, skill_dict, updated_by=user_id or None + skill_name, skill_dict, tenant_id, updated_by=user_id or None ) # Clean up existing local files before writing new ones @@ -1054,7 +1530,7 @@ def _update_skill_from_zip( # Update other files in local storage self._upload_zip_files(zip_bytes, skill_name, original_folder_name) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) def update_skill( self, @@ -1063,55 +1539,59 @@ def update_skill( tenant_id: Optional[str] = None, user_id: Optional[str] = None ) -> Dict[str, Any]: - """Update an existing skill. + """Update an existing skill for a tenant. Args: skill_name: Name of the skill to update skill_data: Business fields from the application layer (no audit fields). - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: Updater id from server-side auth (JWT / session); sets DB updated_by. Returns: Updated skill dict """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: - existing = skill_db.get_skill_by_name(skill_name) + existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id) if not existing: raise SkillException(f"Skill not found: {skill_name}") result = skill_db.update_skill( - skill_name, skill_data, updated_by=user_id or None + skill_name, skill_data, effective_tenant_id, updated_by=user_id or None ) - # Keep config/config.yaml in sync when params are updated (matches ZIP import path). - if CONTAINER_SKILLS_PATH and "params" in skill_data: + # Keep config/config.yaml in sync when config_values are updated (matches ZIP import path). + local_dir = self.skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH + if local_dir and "config_values" in skill_data: try: - raw_params = skill_data["params"] - if raw_params is None: - _remove_local_skill_config_yaml(skill_name, CONTAINER_SKILLS_PATH) + raw_config_values = skill_data["config_values"] + if raw_config_values is None: + _remove_local_skill_config_yaml(skill_name, local_dir) else: _write_skill_params_to_local_config_yaml( skill_name, - _params_dict_to_storable(raw_params), - CONTAINER_SKILLS_PATH, + _params_dict_to_storable(raw_config_values), + local_dir, ) except Exception as exc: logger.warning( - "Local config/config.yaml sync failed after params update for %s: %s", + "Local config/config.yaml sync failed after config_values update for %s: %s", skill_name, exc, ) # Optional: sync SKILL.md on disk when SKILLS_PATH is configured (DB is source of truth). - if not CONTAINER_SKILLS_PATH: + if not local_dir: logger.warning( "SKILLS_PATH is not set; skipped local SKILL.md sync after DB update for %s", skill_name, ) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) try: - allowed_tools = skill_db.get_tool_names_by_skill_name(skill_name) + allowed_tools = skill_db.get_tool_names_by_skill_name(skill_name, effective_tenant_id) local_skill_dict = { "name": skill_name, "description": skill_data.get("description", existing.get("description", "")), @@ -1128,7 +1608,7 @@ def update_skill( exc, ) - return self._overlay_params_from_local_config_yaml(result) + return self._enrich_configs_from_yaml(result) except SkillException: raise except Exception as e: @@ -1138,18 +1618,22 @@ def update_skill( def delete_skill( self, skill_name: str, + tenant_id: Optional[str] = None, user_id: Optional[str] = None ) -> bool: - """Delete a skill. + """Delete a skill for a tenant. Args: skill_name: Name of the skill to delete - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided. user_id: User ID of the user performing the delete Returns: True if deleted successfully """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + raise SkillException("tenant_id is required") try: # Delete local skill files from filesystem skill_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name) @@ -1159,7 +1643,7 @@ def delete_skill( logger.info(f"Deleted skill directory: {skill_dir}") # Delete from database (soft delete with updated_by) - return skill_db.delete_skill(skill_name, updated_by=user_id) + return skill_db.delete_skill(skill_name, effective_tenant_id, updated_by=user_id) except Exception as e: logger.error(f"Error deleting skill {skill_name}: {e}") raise SkillException(f"Failed to delete skill: {str(e)}") from e @@ -1191,7 +1675,7 @@ def get_enabled_skills_for_agent( result = [] for skill_instance in enabled_skills: skill_id = skill_instance.get("skill_id") - skill = skill_db.get_skill_by_id(skill_id) + skill = skill_db.get_skill_by_id(skill_id, tenant_id) if skill: # Get skill info from ag_skill_info_t (repository returns keys: name, description, content) merged = { @@ -1271,7 +1755,7 @@ def build_skills_summary( for skill_instance in agent_skills: skill_id = skill_instance.get("skill_id") - skill = skill_db.get_skill_by_id(skill_id) + skill = skill_db.get_skill_by_id(skill_id, tenant_id) if skill: if available_skills is not None and skill.get("name") not in available_skills: continue @@ -1281,8 +1765,12 @@ def build_skills_summary( "description": skill.get("description", ""), }) else: - # Fallback: use all skills - all_skills = skill_db.list_skills() + # Fallback: use all skills from the current tenant + effective_tenant_id = tenant_id or self.tenant_id + if effective_tenant_id: + all_skills = skill_db.list_skills(effective_tenant_id) + else: + all_skills = [] skills_to_include = all_skills if available_skills is not None: available_set = set(available_skills) @@ -1318,13 +1806,16 @@ def get_skill_content(self, skill_name: str, tenant_id: Optional[str] = None) -> Args: skill_name: Name of the skill to load - tenant_id: Tenant ID (reserved for future multi-tenant support) + tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided. Returns: Skill content in markdown format """ + effective_tenant_id = tenant_id or self.tenant_id + if not effective_tenant_id: + return "" try: - skill = skill_db.get_skill_by_name(skill_name) + skill = skill_db.get_skill_by_name(skill_name, effective_tenant_id) return skill.get("content", "") if skill else "" except Exception as e: logger.error(f"Error getting skill content {skill_name}: {e}") @@ -1458,6 +1949,189 @@ def get_skill_instance( version_no=version_no ) + def create_skill_from_zip_bytes( + self, + zip_bytes: bytes, + skill_name: Optional[str] = None, + source: str = "导入", + user_id: Optional[str] = None, + tenant_id: Optional[str] = None, + skip_duplicate_check: bool = False + ) -> Dict[str, Any]: + """Create a skill from ZIP bytes, optionally skipping the duplicate name check. + + This is the shared implementation used by both the upload endpoint and the + agent import flow. When skip_duplicate_check is True, the existence check + is bypassed (used during agent import where we pre-validate duplicates). + + Args: + zip_bytes: Raw ZIP file bytes + skill_name: Optional skill name override + source: Source label for the skill + user_id: Creator user ID + tenant_id: Tenant ID + skip_duplicate_check: If True, skip the "skill already exists" check + + Returns: + Created skill dict + """ + import zipfile + + zip_stream = io.BytesIO(zip_bytes) + + try: + with zipfile.ZipFile(zip_stream, "r") as zf: + file_list = zf.namelist() + except zipfile.BadZipFile: + raise SkillException("Invalid ZIP archive") + + zip_stream.seek(0) + + skill_md_path: Optional[str] = None + detected_skill_name: Optional[str] = None + + for file_path in file_list: + if file_path.endswith("/"): + continue + normalized_path = file_path.replace("\\", "/") + parts = normalized_path.split("/") + if len(parts) == 1 and parts[0].lower() == "skill.md": + skill_md_path = file_path + break + + if not skill_md_path: + for file_path in file_list: + if file_path.endswith("/"): + continue + normalized_path = file_path.replace("\\", "/") + parts = normalized_path.split("/") + if len(parts) >= 2 and parts[-1].lower() == "skill.md": + skill_md_path = file_path + detected_skill_name = parts[0] + break + + if not skill_md_path: + raise SkillException("SKILL.md not found in ZIP archive") + + name = skill_name or detected_skill_name + if not name: + raise SkillException("Skill name is required") + + if not skip_duplicate_check: + existing = skill_db.get_skill_by_name(name, tenant_id) + if existing: + raise SkillException(f"Skill '{name}' already exists") + + with zipfile.ZipFile(zip_stream, "r") as zf: + skill_content = zf.read(skill_md_path).decode("utf-8") + + try: + skill_data = SkillLoader.parse(skill_content) + except ValueError as e: + raise SkillException(f"Invalid SKILL.md in ZIP: {e}") + + if not name: + name = skill_data.get("name") + + if not name: + raise SkillException("Skill name is required") + + allowed_tools = skill_data.get("allowed_tools", []) + tool_ids = [] + if allowed_tools: + tool_ids = skill_db.get_tool_ids_by_names(allowed_tools, tenant_id) + + skill_dict = { + "name": name, + "description": skill_data.get("description", ""), + "content": skill_data.get("content", ""), + "tags": skill_data.get("tags", []), + "source": source, + "tool_ids": tool_ids, + "allowed-tools": allowed_tools, + } + + preferred_root = detected_skill_name or name + + schema_from_zip = _read_schema_yaml_from_zip(zip_bytes, preferred_root) + inputs_from_scripts = _get_skill_inputs_from_zip( + zip_bytes, + preferred_skill_root=preferred_root, + ) + params_from_zip = _read_params_from_zip_config_yaml( + zip_bytes, + preferred_skill_root=preferred_root, + ) + + if schema_from_zip: + skill_dict["config_schemas"] = schema_from_zip + elif inputs_from_scripts: + skill_dict["config_schemas"] = inputs_from_scripts + + if params_from_zip is not None: + skill_dict["config_values"] = params_from_zip + + if user_id: + skill_dict["created_by"] = user_id + skill_dict["updated_by"] = user_id + + result = skill_db.create_skill(skill_dict, tenant_id) + + self.skill_manager.save_skill(skill_dict) + + self._upload_zip_files(zip_bytes, name, detected_skill_name) + + return self._enrich_configs_from_yaml(result) + + def export_skills_by_names( + self, + skill_names: List[str], + tenant_id: Optional[str] = None + ) -> List[Dict[str, str]]: + """Export skills as ZIP files by name. + + Packages the entire skill directory (SKILL.md, scripts/, assets/, config/) + into a ZIP for each skill name. + + Args: + skill_names: List of skill names to export + tenant_id: Tenant ID for skill lookup + + Returns: + List of dicts with skill_name and skill_zip_base64 + """ + import base64 + + effective_tenant_id = tenant_id or self.tenant_id + results: List[Dict[str, str]] = [] + + for skill_name in skill_names: + skill_dir = os.path.join( + self.skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH, + skill_name + ) + if not os.path.isdir(skill_dir): + logger.warning(f"Skill directory not found for export: {skill_name}") + continue + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: + for root, dirs, files in os.walk(skill_dir): + for file in files: + file_path = os.path.join(root, file) + rel_path = os.path.relpath(file_path, skill_dir) + arcname = os.path.join(skill_name, rel_path) + zf.write(file_path, arcname) + + zip_buffer.seek(0) + zip_base64 = base64.b64encode(zip_buffer.read()).decode("utf-8") + results.append({ + "skill_name": skill_name, + "skill_zip_base64": zip_base64 + }) + + return results + def classify_streaming_content( content: str, @@ -1740,3 +2414,345 @@ def run_task(): skill_creation_task_manager.unregister_task(task_id) return task_id, generate + + +# ============== Skill List Initialization ============== + + +async def init_skill_list_for_tenant(tenant_id: str, user_id: str): + """Initialize skill list for a new tenant by scanning local skill directories. + + Mirrors init_tool_list_for_tenant() in tool_configuration_service.py. + + Args: + tenant_id: Tenant ID for the new tenant + user_id: User ID for tracking who initiated the scan + + Returns: + Dictionary containing initialization result + """ + from database import skill_db as skill_db_module + + if skill_db_module.check_skill_list_initialized(tenant_id): + logger.info(f"Skill list already initialized for tenant {tenant_id}, skipping") + return {"status": "already_initialized", "message": "Skill list already exists"} + + logger.info(f"Initializing skill list for new tenant: {tenant_id}") + await update_skill_list(tenant_id=tenant_id, user_id=user_id) + return {"status": "success", "message": "Skill list initialized successfully"} + + +async def update_skill_list(tenant_id: str, user_id: str): + """Scan local skill directories and update ag_skill_info_t. + + Mirrors update_tool_list() in tool_configuration_service.py. + + Args: + tenant_id: Tenant ID for the tenant + user_id: User ID for tracking who initiated the scan + """ + from database import skill_db as skill_db_module + from nexent.skills import SkillManager + + skill_manager = SkillManager(base_skills_dir=CONTAINER_SKILLS_PATH, tenant_id=tenant_id) + # Use the resolved tenant-scoped local path for schema/config file reading + local_base = skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH + scanned_skills = skill_manager.list_skills() + + skills_to_upsert = [] + for skill_info in scanned_skills: + skill_name = skill_info.get("name") + if not skill_name: + continue + + skill_data = { + "name": skill_name, + "description": skill_info.get("description", ""), + "tags": skill_info.get("tags", []), + "source": "official", + } + + try: + full_skill = skill_manager.load_skill(skill_name) + if full_skill: + skill_data["content"] = full_skill.get("content", "") + + # Try schema.yaml first; fall back to AST-parsed scripts + schema_path = _local_skill_schema_yaml_path(skill_name, local_base) + if os.path.isfile(schema_path): + async with aiofiles.open(schema_path, "rb") as f: + raw = await f.read() + parsed = _parse_skill_schema_from_yaml_bytes(raw) + skill_data["config_schemas"] = parsed + logger.debug("Loaded config_schemas from schema.yaml for skill %s", skill_name) + else: + scripts_dir = os.path.join(local_base, skill_name, "scripts") + inputs = _get_skill_inputs_from_code(scripts_dir) + if inputs: + skill_data["config_schemas"] = inputs + except Exception as e: + logger.warning(f"Could not load full skill content for {skill_name}: {e}") + skill_data["content"] = "" + + skills_to_upsert.append(skill_data) + + if skills_to_upsert: + skill_db_module.upsert_scanned_skills(skills_to_upsert, user_id, tenant_id) + logger.info(f"Upserted {len(skills_to_upsert)} skills for tenant {tenant_id}") + else: + logger.info(f"No skills found to upsert for tenant {tenant_id}") + + +def install_skills_for_tenant( + skill_ids: List[int], + tenant_id: str, + user_id: Optional[str] = None +) -> List[int]: + """Install specified official skills into a new tenant by copying their records. + + For each skill_id provided, finds the global template skill (official skill with + NULL tenant_id) and creates a copy in ag_skill_info_t for the target tenant. + Skills that cannot be found as global templates are skipped with a warning. + + Args: + skill_ids: List of skill IDs to install for the tenant. + tenant_id: Target tenant ID to install skills into. + user_id: User ID for created_by/updated_by audit fields. + + Returns: + List of skill IDs that were successfully installed. + """ + from database import skill_db as skill_db_module + + if not skill_ids: + return [] + + installed_ids: List[int] = [] + for skill_id in skill_ids: + try: + template = skill_db_module.get_skill_by_id_global(skill_id) + if not template: + logger.warning( + f"Skill template with ID {skill_id} not found for installation " + f"into tenant {tenant_id}" + ) + continue + + skill_name = template.get("name", "") + if not skill_name: + logger.warning( + f"Skill template {skill_id} has no name, skipping installation " + f"for tenant {tenant_id}" + ) + continue + + existing = skill_db_module.get_skill_by_name(skill_name, tenant_id) + if existing: + logger.info( + f"Skill '{skill_name}' already exists for tenant {tenant_id}, skipping" + ) + installed_ids.append(existing.get("skill_id")) + continue + + skill_data = { + "name": skill_name, + "description": template.get("description", ""), + "tags": template.get("tags", []), + "content": template.get("content", ""), + "config_schemas": template.get("config_schemas"), + "config_values": template.get("config_values"), + "source": template.get("source", "official"), + "created_by": user_id, + "updated_by": user_id, + } + result = skill_db_module.create_skill(skill_data, tenant_id) + new_skill_id = result.get("skill_id") + if new_skill_id: + installed_ids.append(new_skill_id) + logger.info( + f"Installed skill '{skill_name}' (ID {new_skill_id}) for tenant {tenant_id}" + ) + else: + logger.warning( + f"create_skill returned no skill_id for '{skill_name}', " + f"tenant {tenant_id}" + ) + except Exception as e: + logger.error( + f"Failed to install skill ID {skill_id} into tenant {tenant_id}: {e}" + ) + + return installed_ids + + +def install_skills_from_zip_for_tenant( + skill_names: List[str], + tenant_id: str, + user_id: Optional[str] = None, + locale: Optional[str] = None +) -> List[str]: + """Install official skills into a new tenant by reading ZIP files from OFFICIAL_SKILLS_ZIP_PATH. + + For each skill_name provided, derives the ZIP filename as .zip, + reads the file from OFFICIAL_SKILLS_ZIP_PATH, and creates the skill via + create_skill_from_file (which handles ZIP extraction, SKILL.md parsing, + and database record creation). + + Skills that cannot be found as ZIP files are skipped with a warning. + Skills that already exist for the tenant are skipped (not reinstalled). + + Args: + skill_names: List of skill names to install (e.g. ["search-knowledge-base"]). + tenant_id: Target tenant ID to install skills into. + user_id: User ID for created_by/updated_by audit fields. + locale: Frontend locale (e.g. "zh" or "en"). Determines the source label: + "zh" → "官方", other locales → "official". + + Returns: + List of skill names that were successfully installed. + """ + if not skill_names: + return [] + + zip_dir = OFFICIAL_SKILLS_ZIP_PATH + if not os.path.isdir(zip_dir): + logger.warning(f"Official skills zip directory not found: {zip_dir}") + return [] + + # Derive source label from locale: zh → "官方", otherwise "official" + source = "官方" if locale == "zh" else "official" + + installed: List[str] = [] + service = SkillService(tenant_id=tenant_id) + + for skill_name in skill_names: + zip_filename = f"{skill_name}.zip" + zip_path = os.path.join(zip_dir, zip_filename) + + if not os.path.isfile(zip_path): + logger.warning( + f"ZIP file not found for skill '{skill_name}': {zip_path}" + ) + continue + + try: + existing = skill_db.get_skill_by_name(skill_name, tenant_id) + if existing: + logger.info( + f"Skill '{skill_name}' already exists for tenant {tenant_id}, skipping" + ) + installed.append(skill_name) + continue + + with open(zip_path, "rb") as f: + zip_content = f.read() + + result = service.create_skill_from_file( + file_content=zip_content, + skill_name=skill_name, + file_type="zip", + source=source, + tenant_id=tenant_id, + user_id=user_id, + ) + installed_name = result.get("name", skill_name) + installed.append(installed_name) + logger.info( + f"Installed skill '{installed_name}' for tenant {tenant_id} " + f"from ZIP {zip_filename}" + ) + except Exception as e: + logger.error( + f"Failed to install skill '{skill_name}' from ZIP for tenant {tenant_id}: {e}" + ) + + return installed + + +def get_official_skills_with_status( + tenant_id: Optional[str] = None +) -> List[Dict[str, Any]]: + """Return all official skills with their installation status for a tenant. + + Scans the official-skills-zip directory for available official skills + (filename without .zip = skill name). For each skill, checks whether + it is already installed for the target tenant and whether local resource + files exist. + + Args: + tenant_id: Tenant ID to check installation status for. + + Returns: + List of dicts with skill_id, name, description, source, and status + ("installable" | "installed" | "resource_missing"). + """ + from database import skill_db as skill_db_module + + result: List[Dict[str, Any]] = [] + + zip_dir = OFFICIAL_SKILLS_ZIP_PATH + if not os.path.isdir(zip_dir): + logger.warning(f"Official skills zip directory not found: {zip_dir}") + return result + + try: + zip_files = [f for f in os.listdir(zip_dir) if f.lower().endswith(".zip")] + except OSError as e: + logger.warning(f"Failed to list official skills zip directory: {e}") + return result + + for zip_file in sorted(zip_files): + skill_name = zip_file[:-4] + if not skill_name: + continue + + skill_id: Optional[int] = None + is_installed = False + has_resources = True + + if tenant_id: + existing = skill_db_module.get_skill_by_name(skill_name, tenant_id) + if existing: + skill_id = existing.get("skill_id") + is_installed = True + skill_manager = SkillManager( + base_skills_dir=CONTAINER_SKILLS_PATH, + tenant_id=tenant_id + ) + skill_dir = os.path.join( + skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH or "", + skill_name + ) + has_resources = os.path.isdir(skill_dir) + + if skill_id is None: + global_skill = skill_db_module.get_skill_by_name(skill_name, None) + if global_skill: + skill_id = global_skill.get("skill_id") + + if is_installed and not has_resources: + status = "resource_missing" + elif is_installed: + status = "installed" + else: + status = "installable" + + description = "" + if skill_id: + db_skill = skill_db_module.get_skill_by_id(skill_id, tenant_id) if tenant_id else None + if db_skill: + description = db_skill.get("description", "") + if not description: + db_global = skill_db_module.get_skill_by_name(skill_name, None) + if db_global: + description = db_global.get("description", "") + + result.append({ + "skill_id": skill_id if skill_id is not None else 0, + "name": skill_name, + "description": description, + "source": "official", + "status": status, + }) + + return result diff --git a/backend/services/tenant_service.py b/backend/services/tenant_service.py index bb761d2b4..6ed96a849 100644 --- a/backend/services/tenant_service.py +++ b/backend/services/tenant_service.py @@ -3,9 +3,12 @@ """ import asyncio import logging +import os +import shutil import uuid from typing import Any, Dict, List, Optional +from database import skill_db from database.tenant_config_db import ( get_single_config_info, insert_config, @@ -23,8 +26,9 @@ from database.remote_mcp_db import get_mcp_records_by_tenant, delete_mcp_record_by_name_and_url from database.invitation_db import query_invitations_by_tenant, remove_invitation from database.tool_db import delete_tools_by_agent_id -from consts.const import TENANT_NAME, TENANT_ID, DEFAULT_GROUP_ID +from consts.const import ASSET_OWNER_TENANT_ID, TENANT_NAME, TENANT_ID, DEFAULT_GROUP_ID, CONTAINER_SKILLS_PATH from consts.exceptions import NotFoundException, ValidationError, UserRegistrationException +from services.skill_service import install_skills_from_zip_for_tenant logger = logging.getLogger(__name__) @@ -47,7 +51,8 @@ def get_tenant_info(tenant_id: str) -> Dict[str, Any]: # Get tenant name name_config = get_single_config_info(tenant_id, TENANT_NAME) if not name_config: - logger.warning(f"The name of tenant {tenant_id} not found, creating default config.") + logger.warning( + f"The name of tenant {tenant_id} not found, creating default config.") # Auto-create TENANT_NAME config with default name _ensure_tenant_name_config(tenant_id) # Re-fetch after creation @@ -92,7 +97,8 @@ def _ensure_tenant_name_config(tenant_id: str) -> bool: if success: logger.info(f"Auto-created TENANT_NAME config for tenant {tenant_id}") else: - logger.error(f"Failed to auto-create TENANT_NAME config for tenant {tenant_id}") + logger.error( + f"Failed to auto-create TENANT_NAME config for tenant {tenant_id}") return success @@ -133,8 +139,11 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]: Returns: Dict[str, Any]: Dictionary containing paginated tenant data and pagination info """ - # Get all tenant IDs first - all_tenant_ids = get_all_tenant_ids() + # Exclude virtual ASSET_OWNER tenant from admin tenant listings + all_tenant_ids = [ + tid for tid in get_all_tenant_ids() + if tid != ASSET_OWNER_TENANT_ID + ] total = len(all_tenant_ids) # Calculate pagination @@ -151,7 +160,8 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]: tenant_info = get_tenant_info(tenant_id) tenants.append(tenant_info) except NotFoundException: - logging.warning(f"Tenant info of {tenant_id} not found. Returning basic tenant structure.") + logging.warning( + f"Tenant info of {tenant_id} not found. Returning basic tenant structure.") tenant_info = { "tenant_id": tenant_id, "tenant_name": "", @@ -168,7 +178,13 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]: } -def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[str, Any]: +def create_tenant( + tenant_name: str, + created_by: Optional[str] = None, + skill_ids: Optional[List[int]] = None, + skill_names: Optional[List[str]] = None, + locale: Optional[str] = None +) -> Dict[str, Any]: """ Create a new tenant with default group @@ -191,11 +207,13 @@ def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[st # Check if tenant name already exists if check_tenant_name_exists(tenant_name.strip()): - raise ValidationError(f"Tenant with name '{tenant_name.strip()}' already exists") + raise ValidationError( + f"Tenant with name '{tenant_name.strip()}' already exists") try: # Create default group first - default_group_id = _create_default_group_for_tenant(tenant_id, created_by) + default_group_id = _create_default_group_for_tenant( + tenant_id, created_by) # Create tenant ID configuration tenant_id_data = { @@ -231,15 +249,48 @@ def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[st } group_success = insert_config(group_config_data) if not group_success: - raise ValidationError("Failed to create tenant default group configuration") + raise ValidationError( + "Failed to create tenant default group configuration") + + # Install requested skills for the new tenant + # Prefer skill_names (ZIP-based installation) over skill_ids (legacy record-copy) + installed_skill_names: List[str] = [] + if skill_names: + try: + installed_skill_names = install_skills_from_zip_for_tenant( + skill_names=skill_names, + tenant_id=tenant_id, + user_id=created_by, + locale=locale + ) + except Exception as e: + logger.warning( + f"Failed to install skills from ZIP for tenant {tenant_id}: {e}") + elif skill_ids: + try: + from services.skill_service import install_skills_for_tenant as install_by_ids + installed_by_ids = install_by_ids( + skill_ids=skill_ids, + tenant_id=tenant_id, + user_id=created_by + ) + logger.info( + f"Legacy install_skills_for_tenant installed IDs: {installed_by_ids} " + f"for tenant {tenant_id}" + ) + except Exception as e: + logger.warning( + f"Failed to install skills by IDs for tenant {tenant_id}: {e}") tenant_info = { "tenant_id": tenant_id, "tenant_name": tenant_name.strip(), - "default_group_id": str(default_group_id) + "default_group_id": str(default_group_id), + "installed_skill_names": installed_skill_names, } - logger.info(f"Created tenant {tenant_id} with name '{tenant_name}' and default group {default_group_id}") + logger.info( + f"Created tenant {tenant_id} with name '{tenant_name}' and default group {default_group_id}") return tenant_info except Exception as e: @@ -270,13 +321,15 @@ def update_tenant_info(tenant_id: str, tenant_name: str, updated_by: Optional[st # Check if tenant name already exists (exclude current tenant) if check_tenant_name_exists(tenant_name.strip(), exclude_tenant_id=tenant_id): - raise ValidationError(f"Tenant with name '{tenant_name.strip()}' already exists") + raise ValidationError( + f"Tenant with name '{tenant_name.strip()}' already exists") # Check if tenant name config exists name_config = get_single_config_info(tenant_id, TENANT_NAME) if not name_config: # Tenant config doesn't exist, create it with the provided name - logger.info(f"TENANT_NAME config not found for {tenant_id}, creating new config.") + logger.info( + f"TENANT_NAME config not found for {tenant_id}, creating new config.") tenant_name_data = { "tenant_id": tenant_id, "config_key": TENANT_NAME, @@ -302,6 +355,57 @@ def update_tenant_info(tenant_id: str, tenant_name: str, updated_by: Optional[st return updated_tenant +async def _delete_skills_for_tenant(tenant_id: str, actor: str) -> None: + """ + Delete all skills, skill instances, and local skill files for a tenant. + + This performs cascade cleanup of: + - All skill instances (ag_skill_instance_t) for the tenant + - All skills (ag_skill_info_t) for the tenant + - All local skill directories and files under CONTAINER_SKILLS_PATH/{tenant_id}/ + + Args: + tenant_id: Tenant ID to delete skills for + actor: User ID performing the deletion (for audit trail) + """ + logger.info(f"Deleting skills and local files for tenant {tenant_id}") + + # 1. Soft-delete all skill instances for the tenant (regardless of skill source) + try: + deleted_count = skill_db.delete_skill_instances_by_tenant( + tenant_id, actor) + logger.info( + f"Soft-deleted {deleted_count} skill instances for tenant {tenant_id}") + except Exception as e: + logger.warning( + f"Failed to soft-delete skill instances for tenant {tenant_id}: {str(e)}") + + # 2. Soft-delete all skills for the tenant + skills = skill_db.list_skills(tenant_id) + for skill in skills: + try: + skill_name = skill.get("name") + if skill_name: + skill_db.delete_skill(skill_name, tenant_id, actor) + logger.info( + f"Soft-deleted skill '{skill_name}' for tenant {tenant_id}") + except Exception as e: + logger.warning( + f"Failed to soft-delete skill {skill.get('name')}: {str(e)}") + + # 3. Delete the tenant's local skill directory and all its contents + if CONTAINER_SKILLS_PATH: + tenant_skill_root = os.path.join(CONTAINER_SKILLS_PATH, tenant_id) + if os.path.exists(tenant_skill_root): + try: + shutil.rmtree(tenant_skill_root) + logger.info( + f"Deleted tenant skill root directory: {tenant_skill_root}") + except Exception as e: + logger.warning( + f"Failed to delete tenant skill root directory {tenant_skill_root}: {str(e)}") + + async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> bool: """ Delete tenant and all associated resources @@ -312,6 +416,7 @@ async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> boo - All models in the tenant - All knowledge bases in the tenant - All agents in the tenant (including tool instances) + - All skills, skill instances, and local skill files for the tenant - All MCP configurations in the tenant - All invitation codes in the tenant - All tenant configurations @@ -332,12 +437,14 @@ async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> boo if not name_config: raise NotFoundException(f"Tenant {tenant_id} does not exist") - logger.info(f"Starting cascade deletion for tenant {tenant_id} by {deleted_by}") + logger.info( + f"Starting cascade deletion for tenant {tenant_id} by {deleted_by}") try: # 1. Deactivate all users in the tenant (full cleanup including Supabase deletion) logger.info(f"Deactivating users for tenant {tenant_id}") - users_result = get_users_by_tenant_id(tenant_id, page=1, page_size=10000) + users_result = get_users_by_tenant_id( + tenant_id, page=1, page_size=10000) users = users_result.get("users", []) if users: @@ -346,9 +453,11 @@ async def delete_single_user(user: Dict[str, Any]) -> None: if user_id: try: await delete_user_and_cleanup(user_id, tenant_id) - logger.info(f"Deactivated user {user_id} for tenant {tenant_id}") + logger.info( + f"Deactivated user {user_id} for tenant {tenant_id}") except Exception as e: - logger.warning(f"Failed to deactivate user {user_id}: {str(e)}") + logger.warning( + f"Failed to deactivate user {user_id}: {str(e)}") # Concurrently delete all users await asyncio.gather(*[delete_single_user(user) for user in users]) @@ -360,16 +469,19 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: remove_group(group["group_id"], deleted_by) except Exception as e: - logger.warning(f"Failed to delete group {group.get('group_id')}: {str(e)}") + logger.warning( + f"Failed to delete group {group.get('group_id')}: {str(e)}") # 3. Delete all models in the tenant logger.info(f"Deleting models for tenant {tenant_id}") models = get_model_records({"tenant_id": tenant_id}, tenant_id) for model in models: try: - delete_model_record(model["model_id"], deleted_by or "system", tenant_id) + delete_model_record( + model["model_id"], deleted_by or "system", tenant_id) except Exception as e: - logger.warning(f"Failed to delete model {model.get('model_id')}: {str(e)}") + logger.warning( + f"Failed to delete model {model.get('model_id')}: {str(e)}") # 4. Delete all knowledge bases in the tenant logger.info(f"Deleting knowledge bases for tenant {tenant_id}") @@ -381,7 +493,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None: "user_id": deleted_by or "system" }) except Exception as e: - logger.warning(f"Failed to delete knowledge base {kb.get('knowledge_id')}: {str(e)}") + logger.warning( + f"Failed to delete knowledge base {kb.get('knowledge_id')}: {str(e)}") # 5. Delete all agents in the tenant (including related data) logger.info(f"Deleting agents for tenant {tenant_id}") @@ -390,24 +503,34 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: agent_id = agent.get("agent_id") # Delete tool instances first - delete_tools_by_agent_id(agent_id, tenant_id, deleted_by or "system", version_no=0) + delete_tools_by_agent_id( + agent_id, tenant_id, deleted_by or "system", version_no=0) # Delete agent relationships - delete_agent_relationship(agent_id, tenant_id, deleted_by or "system", version_no=0) + delete_agent_relationship( + agent_id, tenant_id, deleted_by or "system", version_no=0) # Delete the agent delete_agent_by_id(agent_id, tenant_id, deleted_by or "system") except Exception as e: - logger.warning(f"Failed to delete agent {agent.get('agent_id')}: {str(e)}") + logger.warning( + f"Failed to delete agent {agent.get('agent_id')}: {str(e)}") # Also delete published agents (version_no >= 1) - agents_published = query_all_agent_info_by_tenant_id(tenant_id, version_no=1) + agents_published = query_all_agent_info_by_tenant_id( + tenant_id, version_no=1) for agent in agents_published: try: agent_id = agent.get("agent_id") - delete_tools_by_agent_id(agent_id, tenant_id, deleted_by or "system", version_no=1) - delete_agent_relationship(agent_id, tenant_id, deleted_by or "system", version_no=1) + delete_tools_by_agent_id( + agent_id, tenant_id, deleted_by or "system", version_no=1) + delete_agent_relationship( + agent_id, tenant_id, deleted_by or "system", version_no=1) delete_agent_by_id(agent_id, tenant_id, deleted_by or "system") except Exception as e: - logger.warning(f"Failed to delete published agent {agent.get('agent_id')}: {str(e)}") + logger.warning( + f"Failed to delete published agent {agent.get('agent_id')}: {str(e)}") + + # 5b. Delete all skills, skill instances, and local skill files for the tenant + _delete_skills_for_tenant(tenant_id, deleted_by or "system") # 6. Delete all MCP configurations in the tenant logger.info(f"Deleting MCP records for tenant {tenant_id}") @@ -421,7 +544,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None: deleted_by or "system" ) except Exception as e: - logger.warning(f"Failed to delete MCP {mcp.get('mcp_id')}: {str(e)}") + logger.warning( + f"Failed to delete MCP {mcp.get('mcp_id')}: {str(e)}") # 7. Delete all invitation codes in the tenant logger.info(f"Deleting invitations for tenant {tenant_id}") @@ -430,7 +554,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: remove_invitation(invitation["invitation_id"], deleted_by) except Exception as e: - logger.warning(f"Failed to delete invitation {invitation.get('invitation_id')}: {str(e)}") + logger.warning( + f"Failed to delete invitation {invitation.get('invitation_id')}: {str(e)}") # 8. Delete all tenant configurations (must be done last) logger.info(f"Deleting tenant configurations for tenant {tenant_id}") @@ -440,9 +565,11 @@ async def delete_single_user(user: Dict[str, Any]) -> None: try: delete_config_by_tenant_config_id(config["tenant_config_id"]) except Exception as e: - logger.warning(f"Failed to delete config {config.get('tenant_config_id')}: {str(e)}") + logger.warning( + f"Failed to delete config {config.get('tenant_config_id')}: {str(e)}") - logger.info(f"Successfully deleted tenant {tenant_id} and all associated resources") + logger.info( + f"Successfully deleted tenant {tenant_id} and all associated resources") return True except Exception as e: @@ -476,5 +603,6 @@ def _create_default_group_for_tenant(tenant_id: str, created_by: Optional[str] = return group_id except Exception as e: - logger.error(f"Failed to create default group for tenant {tenant_id}: {str(e)}") + logger.error( + f"Failed to create default group for tenant {tenant_id}: {str(e)}") raise ValidationError(f"Failed to create default group: {str(e)}") diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py index 5e5229ff6..6d9978f40 100644 --- a/backend/services/tool_configuration_service.py +++ b/backend/services/tool_configuration_service.py @@ -24,6 +24,7 @@ get_mcp_authorization_token_by_name_and_url, get_mcp_records_by_tenant, get_mcp_server_by_name_and_tenant, + get_mcp_custom_headers_by_name_and_url, ) from database.tool_db import ( check_tool_list_initialized, @@ -38,7 +39,7 @@ from services.file_management_service import get_llm_model, validate_urls_access from services.vectordatabase_service import get_embedding_model_by_index_name, get_rerank_model from database.client import minio_client -from services.image_service import get_vlm_model +from services.image_service import get_video_understanding_model, get_vlm_model from nexent.monitor import set_monitoring_context, set_monitoring_operation from services.vectordatabase_service import get_vector_db_core from utils.langchain_utils import discover_langchain_modules @@ -47,19 +48,24 @@ logger = logging.getLogger("tool_configuration_service") -def _create_mcp_transport(url: str, authorization_token: Optional[str] = None): +def _create_mcp_transport(url: str, authorization_token: Optional[str] = None, custom_headers: Optional[Dict[str, Any]] = None): """ Create appropriate MCP transport based on URL ending. Args: url: MCP server URL authorization_token: Optional authorization token + custom_headers: Optional custom HTTP headers Returns: Transport instance (SSETransport or StreamableHttpTransport) """ url_stripped = url.strip() - headers = {"Authorization": authorization_token} if authorization_token else {} + headers = {} + if authorization_token: + headers["Authorization"] = authorization_token + if custom_headers: + headers.update(custom_headers) if url_stripped.endswith("/sse"): return SSETransport(url=url_stripped, headers=headers) @@ -130,11 +136,15 @@ def get_local_tools() -> List[ToolInfo]: if hasattr(param.default, 'exclude') and param.default.exclude: continue + # Check if default is a Pydantic FieldInfo (has .default attribute) + is_pydantic_field = hasattr(param.default, 'default') + # Get description in both languages - param_description = param.default.description if hasattr(param.default, 'description') else "" + param_description = param.default.description if is_pydantic_field else "" # First try to get from param.default.description_zh (FieldInfo) - param_description_zh = param.default.description_zh if hasattr(param.default, 'description_zh') else None + # Note: Pydantic Field doesn't have description_zh attribute, so use getattr with default + param_description_zh = getattr(param.default, 'description_zh', None) if is_pydantic_field else None # Fallback to init_param_descriptions if not found if param_description_zh is None and param_name in init_param_descriptions: @@ -146,11 +156,21 @@ def get_local_tools() -> List[ToolInfo]: "description": param_description, "description_zh": param_description_zh } - if param.default.default is PydanticUndefined: - param_info["optional"] = False + + # Handle both Pydantic FieldInfo and simple defaults + if is_pydantic_field: + if param.default.default is PydanticUndefined: + param_info["optional"] = False + else: + param_info["default"] = param.default.default + param_info["optional"] = True else: - param_info["default"] = param.default.default - param_info["optional"] = True + # Simple default value (not a FieldInfo) + if param.default == inspect.Parameter.empty: + param_info["optional"] = False + else: + param_info["default"] = param.default + param_info["optional"] = True init_params_list.append(param_info) @@ -262,13 +282,15 @@ async def get_all_mcp_tools(tenant_id: str) -> List[ToolInfo]: mcp_info = get_mcp_records_by_tenant(tenant_id=tenant_id) tools_info = [] for record in mcp_info: - # only update connected server - if record["status"]: + # Only scan MCP services that are explicitly enabled and currently healthy. + if bool(record.get("enabled")) and bool(record.get("status")): try: tools_info.extend(await get_tool_from_remote_mcp_server( mcp_server_name=record["mcp_name"], remote_mcp_server=record["mcp_server"], - tenant_id=tenant_id + tenant_id=tenant_id, + authorization_token=record.get("authorization_token"), + custom_headers=record.get("custom_headers"), )) except Exception as e: logger.error(f"mcp connection error: {str(e)}") @@ -340,7 +362,8 @@ async def get_tool_from_remote_mcp_server( mcp_server_name: str, remote_mcp_server: str, tenant_id: Optional[str] = None, - authorization_token: Optional[str] = None + authorization_token: Optional[str] = None, + custom_headers: Optional[Dict[str, Any]] = None ): """ Get the tool information from the remote MCP server, avoid blocking the event loop @@ -350,6 +373,7 @@ async def get_tool_from_remote_mcp_server( remote_mcp_server: URL of the MCP server tenant_id: Optional tenant ID for database lookup of authorization_token authorization_token: Optional authorization token for authentication (if not provided and tenant_id is given, will be fetched from database) + custom_headers: Optional custom HTTP headers """ # Get authorization token from database if not provided if authorization_token is None and tenant_id: @@ -359,10 +383,18 @@ async def get_tool_from_remote_mcp_server( tenant_id=tenant_id ) + # Get custom headers from database if not provided + if custom_headers is None and tenant_id: + custom_headers = get_mcp_custom_headers_by_name_and_url( + mcp_name=mcp_server_name, + mcp_server=remote_mcp_server, + tenant_id=tenant_id + ) + tools_info = [] try: - transport = _create_mcp_transport(remote_mcp_server, authorization_token) + transport = _create_mcp_transport(remote_mcp_server, authorization_token, custom_headers) client = Client(transport=transport, timeout=10) async with client: # List available operations @@ -482,7 +514,8 @@ async def list_all_tools(tenant_id: str): param["description_zh"] = sdk_param.get("description_zh") break - # Merge inputs description_zh from SDK + # Use SDK full input schema for local tools to keep runtime inputs + # aligned with current tool code (instead of stale DB snapshots). inputs_str = tool.get("inputs", "{}") try: inputs = json.loads(inputs_str) if isinstance(inputs_str, str) else inputs_str @@ -515,7 +548,6 @@ async def list_all_tools(tenant_id: str): "category": tool.get("category") } formatted_tools.append(formatted_tool) - return formatted_tools @@ -535,7 +567,8 @@ async def _call_mcp_tool( mcp_url: str, tool_name: str, inputs: Optional[Dict[str, Any]], - authorization_token: Optional[str] = None + authorization_token: Optional[str] = None, + custom_headers: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Common method to call MCP tool with connection handling. @@ -545,6 +578,7 @@ async def _call_mcp_tool( tool_name: Name of the tool to call inputs: Parameters to pass to the tool authorization_token: Optional authorization token for authentication + custom_headers: Optional custom HTTP headers Returns: Dict containing tool execution result @@ -552,7 +586,7 @@ async def _call_mcp_tool( Raises: MCPConnectionError: If MCP connection fails """ - transport = _create_mcp_transport(mcp_url, authorization_token) + transport = _create_mcp_transport(mcp_url, authorization_token, custom_headers) client = Client(transport=transport) async with client: # Check if connected @@ -616,16 +650,22 @@ async def _validate_mcp_tool_remote( if not actual_mcp_url: raise NotFoundException(f"MCP server not found for name: {usage}") - # Get authorization token from database + # Get authorization token and custom headers from database authorization_token = None + custom_headers = None if tenant_id: authorization_token = get_mcp_authorization_token_by_name_and_url( mcp_name=usage, mcp_server=actual_mcp_url, tenant_id=tenant_id ) + custom_headers = get_mcp_custom_headers_by_name_and_url( + mcp_name=usage, + mcp_server=actual_mcp_url, + tenant_id=tenant_id + ) - return await _call_mcp_tool(actual_mcp_url, tool_name, inputs, authorization_token) + return await _call_mcp_tool(actual_mcp_url, tool_name, inputs, authorization_token, custom_headers) def _get_tool_class_by_name(tool_name: str) -> Optional[type]: @@ -681,6 +721,8 @@ def _validate_local_tool( if not tool_class: raise NotFoundException(f"Tool class not found for {tool_name}") + runtime_inputs = dict(inputs or {}) + # Parse instantiation parameters first instantiation_params = params or {} # Get signature and extract default values for all parameters @@ -704,6 +746,7 @@ def _validate_local_tool( if tool_name == "knowledge_base_search": index_names = instantiation_params.get("index_names", []) + is_multimodal = instantiation_params.pop("multimodal", False) # Must have embedding model for knowledge base search if not index_names or not tenant_id: @@ -765,6 +808,7 @@ def _validate_local_tool( if not tenant_id or not user_id: raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") + # get_vlm_model reads the first multimodal slot, now shown as image understanding. image_to_text_model = get_vlm_model(tenant_id=tenant_id) vlm_display_name = getattr( image_to_text_model, 'display_name', None) @@ -778,6 +822,23 @@ def _validate_local_tool( 'validate_url_access': lambda urls: validate_urls_access(urls, user_id) } tool_instance = tool_class(**params) + elif tool_name in ["analyze_audio", "analyze_video"]: + if not tenant_id or not user_id: + raise ToolExecutionException( + f"Tenant ID and User ID are required for {tool_name} validation") + video_understanding_model = get_video_understanding_model(tenant_id=tenant_id) + model_display_name = getattr( + video_understanding_model, 'display_name', None) + set_monitoring_context(tenant_id=tenant_id) + set_monitoring_operation( + "tool_validation", display_name=model_display_name) + params = { + **instantiation_params, + 'vlm_model': video_understanding_model, + 'storage_client': minio_client, + 'validate_url_access': lambda urls: validate_urls_access(urls, user_id) + } + tool_instance = tool_class(**params) elif tool_name == "analyze_text_file": if not tenant_id or not user_id: raise ToolExecutionException( @@ -799,6 +860,17 @@ def _validate_local_tool( else: tool_instance = tool_class(**instantiation_params) + # # Only pass declared runtime inputs to forward() to avoid unexpected kwargs. + # declared_inputs = getattr(tool_class, "inputs", {}) or {} + # allowed_input_names = ( + # set(declared_inputs.keys()) if isinstance(declared_inputs, dict) else set() + # ) + # filtered_runtime_inputs = ( + # {k: v for k, v in runtime_inputs.items() if k in allowed_input_names} + # if allowed_input_names + # else runtime_inputs + # ) + result = tool_instance.forward(**(inputs or {})) return result except Exception as e: diff --git a/backend/services/user_management_service.py b/backend/services/user_management_service.py index b994f35b1..a983b25d3 100644 --- a/backend/services/user_management_service.py +++ b/backend/services/user_management_service.py @@ -15,11 +15,35 @@ from utils.auth_utils import ( get_supabase_client, + get_supabase_admin_client, calculate_expires_at, get_jwt_expiry_seconds, + resolve_tenant_id_from_user_tenant_record, ) -from consts.const import INVITE_CODE, SUPABASE_URL, SUPABASE_KEY, DEFAULT_TENANT_ID -from consts.exceptions import NoInviteCodeException, IncorrectInviteCodeException, UserRegistrationException, UnauthorizedError +from consts.const import ( + INVITE_CODE, + SUPABASE_URL, + SUPABASE_KEY, + DEFAULT_TENANT_ID, + ASSET_OWNER_TENANT_ID, + ASSET_OWNER_INVITE_CODE_TYPE, + ASSET_OWNER_ROLE, + ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL, +) + +from services.asset_owner_visibility import ( + filter_accessible_routes_for_asset_owner_feature, + require_asset_owner_enabled, +) +from consts.exceptions import ( + NoInviteCodeException, + IncorrectInviteCodeException, + UserRegistrationException, + UnauthorizedError, + ValidationError, +) +from consts.error_code import ErrorCode +from consts.exceptions import AppException from database.model_management_db import create_model_record from database.user_tenant_db import insert_user_tenant, get_user_tenant_by_user_id @@ -29,7 +53,7 @@ from services.invitation_service import use_invitation_code, check_invitation_available, get_invitation_by_code from services.group_service import add_user_to_groups from services.tool_configuration_service import init_tool_list_for_tenant - +from services.skill_service import init_skill_list_for_tenant logging.getLogger("user_management_service").setLevel(logging.DEBUG) @@ -133,6 +157,12 @@ async def signup_user_with_invitation(email: EmailStr, auto_login: Optional[bool] = True): """User registration with invitation code support""" client = get_supabase_client() + + # Validate password strength before registration + if not validate_password_strength(password): + raise AppException(ErrorCode.PROFILE_PASSWORD_WEAK, + "Password must be at least 8 characters with uppercase, lowercase, and digit.") + logging.info( f"Receive registration request: email={email}, invite_code={'provided' if invite_code else 'not provided'}, auto_login={auto_login}") @@ -163,12 +193,17 @@ async def signup_user_with_invitation(email: EmailStr, user_role = "ADMIN" elif code_type == "DEV_INVITE": user_role = "DEV" + elif code_type == ASSET_OWNER_INVITE_CODE_TYPE: + require_asset_owner_enabled() + raise ValidationError(ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL) logging.info( f"Invitation code {invite_code} validated successfully, will assign role: {user_role}") except IncorrectInviteCodeException: raise + except ValidationError: + raise except Exception as e: logging.error( f"Invitation code {invite_code} validation failed: {str(e)}") @@ -187,14 +222,20 @@ async def signup_user_with_invitation(email: EmailStr, # Determine tenant_id based on invitation code if invitation_info: tenant_id = invitation_info["tenant_id"] + if invitation_info.get("code_type") == ASSET_OWNER_INVITE_CODE_TYPE: + tenant_id = ASSET_OWNER_TENANT_ID else: tenant_id = DEFAULT_TENANT_ID + is_asset_owner_registration = user_role == ASSET_OWNER_ROLE + # Create user tenant relationship - logging.debug(f"Creating user tenant relationship: user_id={user_id}, tenant_id={tenant_id}, user_role={user_role}") + logging.debug( + f"Creating user tenant relationship: user_id={user_id}, tenant_id={tenant_id}, user_role={user_role}") insert_user_tenant( user_id=user_id, tenant_id=tenant_id, user_role=user_role, user_email=email) - logging.debug(f"User tenant relationship created successfully for user {user_id}") + logging.debug( + f"User tenant relationship created successfully for user {user_id}") # Use invitation code now that we have the real user_id if invitation_info: @@ -205,7 +246,7 @@ async def signup_user_with_invitation(email: EmailStr, # Add user to groups specified in invitation code group_ids = invitation_result.get("group_ids", []) - if group_ids: + if group_ids and not is_asset_owner_registration: try: # Convert group_ids from string to list if needed if isinstance(group_ids, str): @@ -213,7 +254,8 @@ async def signup_user_with_invitation(email: EmailStr, group_ids = convert_string_to_list(group_ids) if group_ids: - group_results = add_user_to_groups(user_id, group_ids, user_id) + group_results = add_user_to_groups( + user_id, group_ids, user_id) successful_adds = [ r for r in group_results if not r.get("error")] logging.info( @@ -235,7 +277,9 @@ async def signup_user_with_invitation(email: EmailStr, await generate_tts_stt_4_admin(tenant_id, user_id) # Initialize tool list for the new tenant (only once per tenant) - await init_tool_list_for_tenant(tenant_id, user_id) + if not is_asset_owner_registration: + await init_tool_list_for_tenant(tenant_id, user_id) + await init_skill_list_for_tenant(tenant_id, user_id) return await parse_supabase_response(False, response, user_role, auto_login) else: @@ -330,14 +374,24 @@ async def signin_user(email: EmailStr, "password": password }) + user_tenant = get_user_tenant_by_user_id(response.user.id) + if user_tenant and user_tenant.get("user_role") == ASSET_OWNER_ROLE: + try: + require_asset_owner_enabled() + except ValidationError: + client.auth.sign_out() + raise + # Get actual expiration time from access_token expiry_seconds = get_jwt_expiry_seconds(response.session.access_token) expires_at = calculate_expires_at(response.session.access_token) - # Get role information from user metadata - user_role = "user" # Default role - if 'role' in response.user.user_metadata: # Adapt to historical user data - user_role = response.user.user_metadata['role'] + # Prefer user_tenant_t role; fall back to Supabase metadata for legacy users + user_role = "user" + if user_tenant and user_tenant.get("user_role"): + user_role = user_tenant["user_role"] + elif "role" in response.user.user_metadata: + user_role = response.user.user_metadata["role"] logging.info( f"User {email} logged in successfully, session validity is {expiry_seconds} seconds, role: {user_role}") @@ -374,7 +428,8 @@ async def refresh_user_token(authorization, refresh_token: str): async def get_session_by_authorization(authorization): # Extract clean token from authorization header - clean_token = authorization.replace("Bearer ", "") if authorization.startswith("Bearer ") else authorization + clean_token = authorization.replace( + "Bearer ", "") if authorization.startswith("Bearer ") else authorization # Use the unified token validation function is_valid, user = validate_token(clean_token) @@ -411,9 +466,27 @@ async def get_user_info(user_id: str) -> Optional[Dict[str, Any]]: # Get user tenant relationship user_tenant = get_user_tenant_by_user_id(user_id) if not user_tenant: + # User exists in Supabase but not in local database - this is an inconsistent state. + # Delete the orphaned Supabase account and return None to trigger 401. + logging.warning( + f"User {user_id} not found in local database, cleaning up orphaned Supabase account" + ) + try: + admin_client = get_supabase_admin_client() + if admin_client and hasattr(admin_client.auth, "admin"): + admin_client.auth.admin.delete_user(user_id) + logging.info(f"Deleted orphaned Supabase user {user_id}") + else: + logging.warning( + f"Could not get Supabase admin client to delete user {user_id}" + ) + except Exception as delete_err: + logging.error( + f"Failed to delete orphaned Supabase user {user_id}: {str(delete_err)}" + ) return None - tenant_id = user_tenant["tenant_id"] + tenant_id = resolve_tenant_id_from_user_tenant_record(user_tenant) user_role = user_tenant["user_role"] user_email = user_tenant["user_email"] @@ -437,7 +510,7 @@ async def get_user_info(user_id: str) -> Optional[Dict[str, Any]]: "user_email": user_email, "user_role": user_role, "permissions": permissions_data["permissions"], - "accessibleRoutes": permissions_data["accessibleRoutes"] + "accessibleRoutes": permissions_data["accessibleRoutes"], } } @@ -469,16 +542,20 @@ def format_role_permissions(permissions: List[Dict[str, Any]]) -> Dict[str, List permission_subtype = perm.get("permission_subtype", "") if permission_category == "RESOURCE" and permission_type and permission_subtype: - # Format as "permission_type:permission_subtype" + # Format as "permission_type:permission_subtype" formatted_permissions.append( f"{permission_type}:{permission_subtype}") elif permission_type == "LEFT_NAV_MENU" and permission_subtype: # Add permission_subtype to accessible routes for LEFT_NAV_MENU type accessible_routes.append(permission_subtype) + accessible_routes = filter_accessible_routes_for_asset_owner_feature( + accessible_routes + ) + return { "permissions": formatted_permissions, - "accessibleRoutes": accessible_routes + "accessibleRoutes": accessible_routes, } @@ -522,3 +599,85 @@ def delete_token(token_id: int, user_id: str) -> bool: True if the token was deleted, False if not found or not owned by user. """ return delete_token_record(token_id, user_id) + + +# ----------------------------- +# Password Management +# ----------------------------- + +def validate_password_strength(password: str) -> bool: + """Validate password meets minimum security requirements. + + Args: + password: The password to validate. + + Returns: + True if password meets requirements, False otherwise. + """ + if len(password) < 8: + return False + has_upper = any(c.isupper() for c in password) + has_lower = any(c.islower() for c in password) + has_digit = any(c.isdigit() for c in password) + return has_upper and has_lower and has_digit + + +async def update_password(user_id: str, old_password: str, new_password: str) -> bool: + """Update user password with old password verification. + + This method first re-authenticates the user with their old password, + then updates to the new password. + + Args: + user_id: The user ID to update password for. + old_password: The current password for verification. + new_password: The new password to set. + + Returns: + True if password was updated successfully. + + Raises: + UnauthorizedError: If old password is incorrect. + AppException (PROFILE_PASSWORD_WEAK): If new password does not meet requirements. + AppException (PROFILE_PASSWORD_SAME_AS_OLD): If new password is the same as old password. + """ + if not validate_password_strength(new_password): + raise AppException(ErrorCode.PROFILE_PASSWORD_WEAK) + + if old_password == new_password: + raise AppException(ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD) + + admin_client = get_supabase_admin_client() + + try: + user_tenant = get_user_tenant_by_user_id(user_id) + if not user_tenant or not user_tenant.get("user_email"): + raise UnauthorizedError("Unable to retrieve user email") + + user_email = user_tenant["user_email"] + + # Re-authenticate with old password to verify identity using admin client + try: + admin_client.auth.sign_in_with_password({ + "email": user_email, + "password": old_password + }) + except Exception as auth_err: + logging.warning( + f"Password verification failed for user {user_id}: {str(auth_err)}") + raise UnauthorizedError("Invalid old password") + + # Update to new password using admin client + admin_client.auth.update_user({"password": new_password}) + + logging.info(f"Password updated successfully for user {user_id}") + return True + + except UnauthorizedError: + raise + except AppException: + raise + except Exception as exc: + logging.error( + f"Failed to update password for user {user_id}: {str(exc)}") + raise diff --git a/backend/services/vectordatabase_service.py b/backend/services/vectordatabase_service.py index 8ad9b54e2..89f037fd4 100644 --- a/backend/services/vectordatabase_service.py +++ b/backend/services/vectordatabase_service.py @@ -26,9 +26,9 @@ from nexent.vector_database.elasticsearch_core import ElasticSearchCore from nexent.vector_database.datamate_core import DataMateCore -from consts.const import DATAMATE_URL, ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE, PERMISSION_EDIT, PERMISSION_READ +from consts.const import DATAMATE_URL, ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE, PERMISSION_EDIT, PERMISSION_READ, ASSET_OWNER_TENANT_ID from consts.model import ChunkCreateRequest, ChunkUpdateRequest -from database.attachment_db import delete_file +from database.attachment_db import delete_file, get_file_stream from database.knowledge_db import ( create_knowledge_record, delete_knowledge_record, @@ -46,6 +46,7 @@ from database.model_management_db import get_model_by_display_name, get_model_by_model_id, get_model_records from services.redis_service import get_redis_service from services.group_service import get_tenant_default_group_id +from services.asset_owner_visibility import postprocess_knowledge_visibility from utils.config_utils import tenant_config_manager, get_model_name_from_config from utils.file_management_utils import get_all_files_status, get_file_size from utils.str_utils import convert_string_to_list @@ -101,6 +102,28 @@ def _get_embedding_model_display_name(model_id: Optional[int], tenant_id: str) - return "" +def _is_multimodal_by_model_id(model_id: Optional[int], tenant_id: str) -> bool: + """ + Determine whether an embedding model is multimodal based on model_id. + + Args: + model_id: The embedding model ID. + tenant_id: Tenant ID for model lookup. + + Returns: + True when the model type is `multi_embedding`, otherwise False. + """ + if model_id is None: + return False + try: + model = get_model_by_model_id(model_id, tenant_id) + if model: + return model.get("model_type") == "multi_embedding" + except Exception as e: + logger.warning(f"Failed to determine multimodal flag for model_id {model_id}: {e}") + return False + + class KnowledgeBaseNeedsModelConfigError(Exception): """Exception raised when a knowledge base needs an embedding model to be configured.""" def __init__(self, index_name: str, message: str = None): @@ -134,7 +157,8 @@ def get_embedding_model_by_index_name(tenant_id: str, index_name: str) -> tuple[ try: knowledge_record = get_knowledge_record({ "index_name": index_name, - "tenant_id": tenant_id + "tenant_id": tenant_id, + "include_asset_owner_assets": True, }) if not knowledge_record: @@ -283,8 +307,42 @@ def check_knowledge_base_exist_impl(knowledge_name: str, vdb_core: VectorDatabas # Case B: Name is available in this tenant return {"status": "available"} - -def get_embedding_model(tenant_id: str, model_name: Optional[str] = None) -> tuple[Optional[Any], Optional[int]]: +def _normalize_model_type(raw_model_type: Optional[str]) -> Optional[str]: + if raw_model_type in ["multiEmbedding", "multi_embedding"]: + return "multi_embedding" + if raw_model_type == "embedding": + return "embedding" + return None + +def _build_model_config(model: dict) -> dict: + return { + "model_repo": model.get("model_repo", ""), + "model_name": model["model_name"], + "api_key": model.get("api_key", ""), + "base_url": model.get("base_url", ""), + "model_type": model.get("model_type", "embedding"), + "max_tokens": model.get("max_tokens", 1024), + "ssl_verify": model.get("ssl_verify", True), + } + +def _create_embedding_model(model: dict) -> Any: + model_config = _build_model_config(model) + common_kwargs = { + "api_key": model_config.get("api_key", ""), + "base_url": model_config.get("base_url", ""), + "model_name": get_model_name_from_config(model_config) or "", + "embedding_dim": model_config.get("max_tokens", 1024), + "ssl_verify": model_config.get("ssl_verify", True), + } + if model.get("model_type", "embedding") == "multi_embedding": + return JinaEmbedding(**common_kwargs) + return OpenAICompatibleEmbedding(**common_kwargs) + +def get_embedding_model( + tenant_id: str, + model_name: Optional[str] = None, + model_type: Optional[str] = None +) -> tuple[Optional[Any], Optional[int]]: """ Get the embedding model for the tenant, optionally using a specific model name. @@ -296,40 +354,19 @@ def get_embedding_model(tenant_id: str, model_name: Optional[str] = None) -> tup Returns: Tuple of (embedding model instance or None, model_id or None) """ - # If model_name is provided, find the model by display_name if model_name: try: - model = get_model_by_display_name(model_name, tenant_id) - if model and model.get("model_type") in ["embedding", "multi_embedding"]: - model_config = { - "model_repo": model.get("model_repo", ""), - "model_name": model["model_name"], - "api_key": model.get("api_key", ""), - "base_url": model.get("base_url", ""), - "model_type": model.get("model_type", "embedding"), - "max_tokens": model.get("max_tokens", 1024), - "ssl_verify": model.get("ssl_verify", True), - } - model_type = model.get("model_type", "embedding") - if model_type == "multi_embedding": - embedding_model = JinaEmbedding( - api_key=model_config.get("api_key", ""), - base_url=model_config.get("base_url", ""), - model_name=get_model_name_from_config(model_config) or "", - embedding_dim=model_config.get("max_tokens", 1024), - ssl_verify=model_config.get("ssl_verify", True), - ) - else: - embedding_model = OpenAICompatibleEmbedding( - api_key=model_config.get("api_key", ""), - base_url=model_config.get("base_url", ""), - model_name=get_model_name_from_config(model_config) or "", - embedding_dim=model_config.get("max_tokens", 1024), - ssl_verify=model_config.get("ssl_verify", True), - ) - return embedding_model, model.get("model_id") + normalized_model_type = _normalize_model_type(model_type) + if normalized_model_type: + model = get_model_by_display_name(model_name, tenant_id, normalized_model_type) else: + model = get_model_by_display_name(model_name, tenant_id) + + if not model or model.get("model_type") not in ["embedding", "multi_embedding"]: logger.warning(f"Model '{model_name}' not found or is not an embedding model") + return None, None + + return _create_embedding_model(model), model.get("model_id") except Exception as e: logger.warning(f"Failed to get embedding model by name {model_name}: {e}") @@ -595,6 +632,7 @@ def create_knowledge_base( ingroup_permission: Optional[str] = None, group_ids: Optional[List[int]] = None, embedding_model_name: Optional[str] = None, + is_multimodal: Optional[bool] = None, ): """ Create a new knowledge base with a user-facing name and an internal Elasticsearch index name. @@ -620,7 +658,17 @@ def create_knowledge_base( """ try: # Get embedding model - use user-selected model if provided, otherwise use tenant default - embedding_model, model_id = get_embedding_model(tenant_id, embedding_model_name) + selected_model_type = None + if is_multimodal is True: + selected_model_type = "multi_embedding" + elif is_multimodal is False and embedding_model_name: + selected_model_type = "embedding" + + embedding_model, model_id = get_embedding_model( + tenant_id, + embedding_model_name, + selected_model_type + ) # Determine the embedding model name to save: use user-provided name if available, # otherwise use the model's display name @@ -855,7 +903,9 @@ def list_indices( Permission logic: - SU: All knowledgebases visible, all editable - ADMIN: Knowledgebases from same tenant visible, all editable - - USER/DEV: Knowledgebases where user belongs to intersecting groups, permission determined by: + - DEV on ASSET_OWNER-scoped records: all visible, read-only (READ_ONLY) + - SU/ADMIN/SPEED cross-tenant view of ASSET_OWNER records: read-only + - USER/DEV (non-ASSET_OWNER records): group intersection required; permission by: * If user is creator: editable * If ingroup_permission=EDIT: editable * If ingroup_permission=READ_ONLY: read-only @@ -887,7 +937,9 @@ def list_indices( es_indices_list = vdb_core.get_user_indices(pattern) # Get all knowledgebase records from database (for cleanup and permission checking) - all_db_records = get_knowledge_info_by_tenant_id(target_tenant_id) + all_db_records = get_knowledge_info_by_tenant_id( + target_tenant_id + ) # Filter visible knowledgebases based on user role and permissions visible_knowledgebases = [] @@ -903,6 +955,8 @@ def list_indices( # Check permission based on user role permission = None + record_tenant_id = str(record.get("tenant_id") or "") + is_asset_owner_record = record_tenant_id == ASSET_OWNER_TENANT_ID # Fallback logic: if user_id equals user_tenant_id, treat as legacy admin user # even if user_role is None or empty @@ -914,7 +968,12 @@ def list_indices( effective_user_role = "SPEED" logger.info("User under SPEED version is treated as admin") - if effective_user_role in ["SU", "ADMIN", "SPEED"]: + if is_asset_owner_record: + if effective_user_role in ["ASSET_OWNER"]: + permission = PERMISSION_EDIT + elif effective_user_role in ["SU", "ADMIN", "SPEED", "DEV"]: + permission = PERMISSION_READ + elif effective_user_role in ["SU", "ADMIN", "SPEED", "ASSET_OWNER"]: # SU, ADMIN and SPEED roles can see all knowledgebases permission = PERMISSION_EDIT elif effective_user_role in ["USER", "DEV"]: @@ -980,6 +1039,11 @@ def list_indices( model_name_is_none_list.append(index_name) # Build response + visible_knowledgebases = postprocess_knowledge_visibility( + visible_knowledgebases, + caller_role=user_role, + caller_tenant_id=target_tenant_id, + ) indices = [record["index_name"] for record in visible_knowledgebases] response = { @@ -1002,6 +1066,7 @@ def list_indices( model_id = record.get("embedding_model_id") tenant_id = record.get("tenant_id") or target_tenant_id embedding_model_display_name = _get_embedding_model_display_name(model_id, tenant_id) + is_multimodal = _is_multimodal_by_model_id(model_id, tenant_id) stats_info.append({ # Internal index name (used as ID) @@ -1013,6 +1078,7 @@ def list_indices( # knowledge source and ingroup permission from DB record "knowledge_sources": record["knowledge_sources"], "ingroup_permission": record["ingroup_permission"], + "is_multimodal": is_multimodal, "tenant_id": record.get("tenant_id"), # Embedding model info: display_name from model_id "embedding_model_name": embedding_model_display_name or record.get("embedding_model_name", ""), @@ -1122,12 +1188,27 @@ def index_documents( "author": author, "date": date, "content": text, - "process_source": "Unstructured", + "process_source": metadata.get("process_source", "Unstructured"), "file_size": file_size, "create_time": create_time, "languages": metadata.get("languages", []), "embedding_model_name": embedding_model_name } + + image_url = metadata.get("image_url", "") + if len(image_url) > 0: + # Fetch image bytes from MinIO (supports s3://bucket/key or /bucket/key) + try: + file_stream = get_file_stream( + object_name=image_url) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {image_url}") + document["image_bytes"] = file_stream.read() + except Exception as e: + logger.error( + f"Failed to fetch file from {image_url}: {e}") + raise documents.append(document) @@ -1148,8 +1229,9 @@ def index_documents( 'tenant_id') if knowledge_record else None if tenant_id: + model_type = "EMBEDDING_ID" if embedding_model.model_type == "text" else "MULTI_EMBEDDING_ID" model_config = tenant_config_manager.get_model_config( - key="EMBEDDING_ID", tenant_id=tenant_id) + key=model_type, tenant_id=tenant_id) embedding_batch_size = model_config.get("chunk_batch", 10) if embedding_batch_size is None: embedding_batch_size = 10 @@ -1237,35 +1319,33 @@ async def list_files( """ try: files_map: Dict[str, Dict[str, Any]] = {} - # Get existing files from ES + total_start_time = time.time() + + logger.info(f"[list_files] index={index_name}, include_chunks={include_chunks}") + + # Step 1: Get existing files from ES (includes chunk_count via aggregation) + step1_start = time.time() existing_files = vdb_core.get_documents_detail(index_name) + step1_duration = time.time() - step1_start + logger.info(f"[list_files:step1] ES get_documents_detail: {len(existing_files)} files in {step1_duration:.3f}s") - # Get unique celery files list and the status of each file + # Step 2: Get celery task statuses from external service + step2_start = time.time() celery_task_files = await get_all_files_status(index_name) + step2_duration = time.time() - step2_start + logger.info(f"[list_files:step2] Celery task status: {len(celery_task_files)} tasks in {step2_duration:.3f}s") - # For files already stored in ES, add to files list + # Step 3: Build files_map from ES data + step3_start = time.time() for file_info in existing_files: utc_create_time_str = file_info.get('create_time', '') - # Try to parse the create_time string, fallback to current timestamp if format is invalid try: utc_create_timestamp = datetime.strptime(utc_create_time_str, '%Y-%m-%dT%H:%M:%S').replace( tzinfo=timezone.utc).timestamp() except (ValueError, TypeError): utc_create_timestamp = time.time() - # Always re-query chunk count to ensure accuracy (aggregation may be stale) path_or_url = file_info.get('path_or_url') - chunk_count = file_info.get('chunk_count', 0) - try: - count_result = vdb_core.client.count( - index=index_name, - body={"query": {"term": {"path_or_url": path_or_url}}} - ) - chunk_count = count_result.get("count", chunk_count) - except Exception as count_err: - logger.warning( - f"Failed to get chunk count for {path_or_url}: {count_err}, using aggregation value {chunk_count}") - file_data = { 'path_or_url': path_or_url, 'file': file_info.get('filename', ''), @@ -1273,65 +1353,40 @@ async def list_files( 'create_time': int(utc_create_timestamp * 1000), 'status': "COMPLETED", 'latest_task_id': '', - 'chunk_count': chunk_count, + 'chunk_count': file_info.get('chunk_count', 0), 'error_reason': None, 'has_error_info': False } files_map[path_or_url] = file_data + step3_duration = time.time() - step3_start + logger.info(f"[list_files:step3] Build files_map from ES: {len(existing_files)} files in {step3_duration:.3f}s") - # For files not yet stored in ES (files currently being processed) + # Step 4: Merge celery task data (Redis progress already fetched in get_all_files_status) + step4_start = time.time() + celery_file_count = 0 for path_or_url, status_info in celery_task_files.items(): - status_dict = status_info if isinstance( - status_info, dict) else {} + celery_file_count += 1 + status_dict = status_info if isinstance(status_info, dict) else {} - # Get source_type and original_filename, with defaults - source_type = status_dict.get('source_type') if status_dict.get( - 'source_type') else 'minio' + source_type = status_dict.get('source_type') if status_dict.get('source_type') else 'minio' original_filename = status_dict.get('original_filename') + filename = original_filename or (os.path.basename(path_or_url) if path_or_url else '') - # Determine the filename - filename = original_filename or ( - os.path.basename(path_or_url) if path_or_url else '') - - # Safely get file size; default to 0 on any error file_size = 0 if path_or_url in files_map: file_size = files_map[path_or_url].get('file_size', 0) else: try: - file_size = get_file_size( - source_type or 'minio', path_or_url) + file_size = get_file_size(source_type or 'minio', path_or_url) except Exception as size_err: - logger.error( - f"Failed to get file size for '{path_or_url}': {size_err}") + logger.error(f"Failed to get file size for '{path_or_url}': {size_err}") file_size = 0 - # Get progress from status_dict first, then try Redis for real-time updates + # Get progress from celery_task_files (already includes Redis batch data) processed_chunks = status_dict.get('processed_chunks') total_chunks = status_dict.get('total_chunks') task_id = status_dict.get('latest_task_id', '') - # Always try to get latest progress from Redis if task_id exists - # Redis has the most up-to-date progress during vectorization - if task_id: - try: - redis_service = get_redis_service() - progress_info = redis_service.get_progress_info( - task_id) - if progress_info: - redis_processed = progress_info.get( - 'processed_chunks') - redis_total = progress_info.get('total_chunks') - if redis_processed is not None: - processed_chunks = redis_processed - if redis_total is not None: - total_chunks = redis_total - logger.debug( - f"Retrieved progress from Redis for task {task_id}: {processed_chunks}/{total_chunks}") - except Exception as e: - logger.debug( - f"Failed to get progress from Redis for task {task_id}: {str(e)}") - if path_or_url in files_map: file_data = files_map[path_or_url] else: @@ -1346,13 +1401,12 @@ async def list_files( } files_map[path_or_url] = file_data - file_data['status'] = status_dict.get('state', file_data.get( - 'status', 'UNKNOWN')) + file_data['status'] = status_dict.get('state', file_data.get('status', 'UNKNOWN')) file_data['latest_task_id'] = task_id file_data['processed_chunk_num'] = processed_chunks file_data['total_chunk_num'] = total_chunks - # Get error reason for failed documents + # Get error reason for failed documents (fetch from Redis batch if needed) if task_id and status_dict.get('state') in ['PROCESS_FAILED', 'FORWARD_FAILED']: try: redis_service = get_redis_service() @@ -1360,17 +1414,20 @@ async def list_files( if error_reason: file_data['error_reason'] = error_reason file_data['has_error_info'] = True - except Exception as e: - logger.debug( - f"Failed to get error info for task {task_id}: {str(e)}") + except Exception: + pass # Error info is optional, don't fail the request + step4_duration = time.time() - step4_start + logger.info(f"[list_files:step4] Merge celery tasks: {celery_file_count} tasks in {step4_duration:.3f}s") files = list(files_map.values()) + logger.info(f"[list_files:step4] Total files built: {len(files)}") # Unified chunks processing for all files if include_chunks: - # Prepare msearch body for all completed files + step5_start = time.time() completed_files_map = { f['path_or_url']: f for f in files if f['status'] == "COMPLETED"} + completed_count = len(completed_files_map) msearch_body = [] for path_or_url in completed_files_map.keys(): @@ -1381,7 +1438,6 @@ async def list_files( "_source": ["id", "title", "content", "create_time"] }) - # Initialize chunks for all files for file_data in files: file_data['chunks'] = [] file_data['chunk_count'] = file_data.get('chunk_count', 0) @@ -1413,46 +1469,25 @@ async def list_files( }) file_data['chunks'] = chunks - # Get accurate chunk count using count query instead of len(chunks) - # because msearch may have size limits - try: - count_result = vdb_core.client.count( - index=index_name, - body={ - "query": {"term": {"path_or_url": file_path}}} - ) - file_data['chunk_count'] = count_result.get( - "count", len(chunks)) - except Exception as count_err: - logger.warning( - f"Failed to get chunk count for {file_path}: {count_err}, using len(chunks)") - file_data['chunk_count'] = len(chunks) + # chunk_count from aggregation is already accurate + # no need for additional count queries except Exception as e: logger.error( f"Error during msearch for chunks: {str(e)}") + step5_duration = time.time() - step5_start + logger.info(f"[list_files:step5] ES msearch chunks: {completed_count} files in {step5_duration:.3f}s") else: - # When include_chunks=False, ensure chunk_count is accurate for completed files + # When include_chunks=False, chunk_count is already accurate from ES aggregation + # No need for additional count queries - doc_count from terms aggregation is accurate for file_data in files: file_data['chunks'] = [] - if file_data.get('status') == "COMPLETED": - # Always re-query chunk count for completed files to ensure accuracy - try: - count_result = vdb_core.client.count( - index=index_name, - body={ - "query": {"term": {"path_or_url": file_data.get('path_or_url')}}} - ) - file_data['chunk_count'] = count_result.get( - "count", 0) - except Exception as count_err: - logger.warning( - f"Failed to get chunk count for {file_data.get('path_or_url')}: {count_err}") - file_data['chunk_count'] = file_data.get( - 'chunk_count', 0) - else: - file_data['chunk_count'] = file_data.get( - 'chunk_count', 0) + # chunk_count is already set from ES aggregation (doc_count) + file_data['chunk_count'] = file_data.get('chunk_count', 0) + + total_duration = time.time() - total_start_time + logger.info(f"[list_files:complete] index={index_name}, total_files={len(files)}, " + f"total_duration={total_duration:.3f}s") return {"files": files} @@ -1867,6 +1902,7 @@ def update_chunk( chunk_request: ChunkUpdateRequest, vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), user_id: Optional[str] = None, + tenant_id: Optional[str] = None, ): """ Update a chunk document. diff --git a/backend/services/voice_service.py b/backend/services/voice_service.py index 80d6264db..5a08e1f8b 100644 --- a/backend/services/voice_service.py +++ b/backend/services/voice_service.py @@ -1,14 +1,19 @@ +import asyncio import logging from typing import Any, Dict, Optional from nexent.core.models.stt_model import BaseSTTModel +from nexent.core.models.tts_model import BaseTTSModel from nexent.core.models.volc_stt_model import VolcSTTConfig, VolcSTTModel from nexent.core.models.ali_stt_model import AliSTTConfig, AliSTTModel +from nexent.core.models.volc_tts_model import VolcTTSConfig, VolcTTSModel +from nexent.core.models.ali_tts_model import AliTTSConfig, AliTTSModel -from consts.const import TEST_PCM_PATH +from consts.const import TEST_VOICE_PATH, TEST_PCM_PATH from consts.exceptions import ( VoiceServiceException, STTConnectionException, + TTSConnectionException, ) from database.model_management_db import get_model_records from utils.config_utils import tenant_config_manager @@ -17,7 +22,7 @@ class VoiceService: - """Voice service that handles STT operations""" + """Voice service that handles STT and TTS operations""" def _get_stt_model_from_config( self, @@ -44,9 +49,11 @@ def _get_stt_model_from_config( Returns: STT model instance based on configuration """ + # Default to Ali Cloud if model_factory is not specified or is dashscope use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"] if use_volc: + # Use Volcano Engine STT volc_config = VolcSTTConfig( appid=model_appid or "", access_token=access_token or "", @@ -56,6 +63,7 @@ def _get_stt_model_from_config( ) return VolcSTTModel(volc_config, TEST_PCM_PATH) else: + # Use Ali Cloud STT (default) ali_config = AliSTTConfig( api_key=api_key or "", model=model_name or "qwen3-asr-flash-realtime", @@ -84,6 +92,7 @@ def _get_stt_model_from_tenant_config( STT model instance based on tenant's configuration """ try: + # Get STT model configuration from tenant config stt_config = tenant_config_manager.get_model_config(tenant_id, "stt") if stt_config: @@ -104,6 +113,7 @@ def _get_stt_model_from_tenant_config( language=language ) + # Try to get from model records in database model_records = get_model_records({"model_type": "stt"}, tenant_id) if model_records: record = model_records[0] @@ -131,6 +141,114 @@ def _get_stt_model_from_tenant_config( logger.error(f"Error getting STT model config for tenant {tenant_id}: {str(e)}") return self._get_stt_model_from_config(language=language) + def _get_tts_model_from_config( + self, + model_factory: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + speed_ratio: float = 1.0, + base_url: Optional[str] = None, + model: Optional[str] = None + ) -> BaseTTSModel: + """ + Get the appropriate TTS model based on model factory configuration. + + Args: + model_factory: Model factory/vendor name + api_key: API key (for Ali TTS) + model_appid: Application ID (for Volcano TTS) + access_token: Access token (for Volcano TTS) + speed_ratio: Speech speed ratio + base_url: Custom WebSocket URL (optional) + model: Model name (for Ali TTS) + + Returns: + TTS model instance based on configuration + """ + use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"] + + if use_volc: + volc_config = VolcTTSConfig( + appid=model_appid or "", + token=access_token or "", + speed_ratio=speed_ratio, + ws_url=base_url or None, + ) + return VolcTTSModel(volc_config) + else: + ali_config = AliTTSConfig( + api_key=api_key or "", + model=model or "qwen3-tts-flash", + voice="Cherry", + speech_rate=speed_ratio, + ws_url=base_url if base_url else None + ) + return AliTTSModel(ali_config) + + def _get_tts_model_from_tenant_config( + self, + tenant_id: str + ) -> BaseTTSModel: + """ + Get TTS model based on tenant's model configuration. + + Args: + tenant_id: Tenant ID + + Returns: + TTS model instance based on tenant's configuration + """ + try: + tts_config = tenant_config_manager.get_model_config(tenant_id, "tts") + + if tts_config: + model_factory = tts_config.get("model_factory", "") + api_key = tts_config.get("api_key", "") + model_appid = tts_config.get("model_appid", "") + access_token_val = tts_config.get("access_token", "") + speed_ratio = float(tts_config.get("speed_ratio", 1.0)) + base_url = tts_config.get("base_url", "") + model = tts_config.get("model") or tts_config.get("model_name", "") + + return self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + speed_ratio=speed_ratio, + base_url=base_url if base_url else None, + model=model if model else None + ) + + model_records = get_model_records({"model_type": "tts"}, tenant_id) + if model_records: + record = model_records[0] + model_factory = record.get("model_factory", "") + api_key = record.get("api_key", "") + model_appid = record.get("model_appid", "") + access_token_val = record.get("access_token", "") + speed_ratio = float(record.get("speed_ratio", 1.0)) + base_url = record.get("base_url", "") + model = record.get("model_name", "") + + return self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + speed_ratio=speed_ratio, + base_url=base_url if base_url else None, + model=model if model else None + ) + + logger.warning(f"No TTS model configuration found for tenant {tenant_id}, using default config") + return self._get_tts_model_from_config() + + except Exception as e: + logger.error(f"Error getting TTS model config for tenant {tenant_id}: {str(e)}") + return self._get_tts_model_from_config() + async def start_stt_streaming_session( self, websocket, @@ -169,6 +287,7 @@ async def start_stt_streaming_session( else: logger.warning("No stt_config provided, will use tenant model config if available") + # Get STT model based on configuration if model_factory or api_key or model_appid: stt_model = self._get_stt_model_from_config( model_factory=model_factory, @@ -193,6 +312,153 @@ async def start_stt_streaming_session( logger.error(f"STT streaming session failed: {str(e)}") raise STTConnectionException(f"STT streaming failed: {str(e)}") from e + async def generate_tts_speech( + self, + text: str, + stream: bool = True, + tts_config: Optional[Dict[str, Any]] = None, + tenant_id: Optional[str] = None, + model_name_override: Optional[str] = None + ) -> Any: + """ + Generate TTS speech from text + + Args: + text: Text to convert to speech + stream: Whether to stream the audio or return complete audio + tts_config: TTS configuration dict from client (preferred) + tenant_id: Tenant ID for model lookup + model_name_override: Model name override + + Returns: + Audio data (streaming or complete) + + Raises: + TTSConnectionException: If TTS generation fails + """ + if not text: + raise VoiceServiceException("No text provided for TTS generation") + + try: + logger.info(f"Generating TTS speech for text: {text[:50]}...") + + model_factory = None + api_key = None + model_appid = None + access_token = None + speed_ratio = 1.0 + base_url = None + model_name = None + + if tts_config: + model_factory = tts_config.get("model_factory") + api_key = tts_config.get("api_key") or tts_config.get("apiKey") + model_appid = tts_config.get("model_appid") or tts_config.get("appid") + access_token = tts_config.get("access_token") + speed_ratio = float(tts_config.get("speed_ratio", 1.0)) + base_url = tts_config.get("base_url") or tts_config.get("baseUrl") + model_name = tts_config.get("model") or tts_config.get("model_name") + + # If model_name is provided directly, use it + effective_model = model_name_override or model_name + logger.info(f"TTS config - api_key: {bool(api_key)}, model_name_override: {model_name_override}, " + f"model_name from config: {model_name}, effective_model: {effective_model}") + + + # Determine model factory and create appropriate TTS model + use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"] + + if use_volc: + # Use Volcano TTS + tts_model = self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=effective_model + ) + logger.info(f"TTS model created: Volcano TTS (factory={model_factory})") + elif api_key: + # Use Ali TTS with provided api_key + tts_model = self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=effective_model + ) + logger.info(f"TTS model created: Ali TTS (api_key provided)") + elif tenant_id: + tts_model = self._get_tts_model_from_tenant_config(tenant_id) + logger.info(f"TTS model created from tenant config for tenant_id={tenant_id}") + else: + logger.warning("No api_key, model_name, or tenant_id provided, using default TTS model") + tts_model = self._get_tts_model_from_config() + + speech_result = await tts_model.generate_speech(text, stream=stream) + return speech_result + except Exception as e: + logger.error(f"TTS generation failed: {str(e)}") + raise TTSConnectionException(f"TTS generation failed: {str(e)}") from e + + async def stream_tts_to_websocket( + self, + websocket, + text: str, + tenant_id: Optional[str] = None, + model_name: Optional[str] = None, + tts_config: Optional[Dict[str, Any]] = None, + ) -> None: + """ + Stream TTS audio to WebSocket with proper error handling and fallback + + Args: + websocket: WebSocket connection to stream to + text: Text to convert to speech + tenant_id: Optional tenant ID for model selection + model_name: Optional model name override + tts_config: Optional TTS configuration dict with model_factory, api_key, model_appid, access_token, base_url + + Raises: + TTSConnectionException: If TTS service connection fails + VoiceServiceException: If TTS streaming fails + """ + speech_result = await self.generate_tts_speech( + text, + stream=True, + tenant_id=tenant_id, + model_name_override=model_name, + tts_config=tts_config + ) + + # Check if it's an async iterator or a regular iterable + if hasattr(speech_result, '__aiter__'): + # It's an async iterator, use async for + async for chunk in speech_result: + if websocket.client_state.name == "CONNECTED": + await websocket.send_bytes(chunk) + else: + break + elif hasattr(speech_result, '__iter__'): + # It's a regular iterator, use normal for + for chunk in speech_result: + if websocket.client_state.name == "CONNECTED": + await websocket.send_bytes(chunk) + else: + break + else: + # It's a single chunk, send it directly + if websocket.client_state.name == "CONNECTED": + await websocket.send_bytes(speech_result) + + # Send end marker after successful TTS generation + if websocket.client_state.name == "CONNECTED": + await websocket.send_json({"status": "completed"}) + async def check_stt_connectivity( self, model_factory: Optional[str] = None, @@ -222,6 +488,7 @@ async def check_stt_connectivity( STTConnectionException: If connectivity check fails """ try: + # Get STT model based on factory stt_model = self._get_stt_model_from_config( model_factory=model_factory, model_name=model, @@ -232,6 +499,7 @@ async def check_stt_connectivity( language=language ) + connected = await stt_model.check_connectivity() if not connected: @@ -244,6 +512,57 @@ async def check_stt_connectivity( logger.error(f"STT connectivity check failed: {str(e)}") raise STTConnectionException(f"STT connectivity check failed: {str(e)}") from e + async def check_tts_connectivity( + self, + model_factory: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + speed_ratio: float = 1.0, + base_url: Optional[str] = None, + model: Optional[str] = None + ) -> bool: + """ + Check TTS service connectivity. + + Args: + model_factory: Model factory/vendor name (e.g., "volc", "dashscope") + api_key: API key for Ali TTS + model_appid: Application ID for Volcano TTS + access_token: Access token for Volcano TTS + speed_ratio: Speech speed ratio + base_url: Custom WebSocket URL (optional) + model: Model name (e.g., "qwen3-tts-flash") + + Returns: + bool: True if TTS service is connected, False otherwise + + Raises: + TTSConnectionException: If connectivity check fails + """ + try: + tts_model = self._get_tts_model_from_config( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=model + ) + + connected = await tts_model.check_connectivity() + if not connected: + msg = "TTS service connectivity check returned False" + logger.warning(msg) + raise TTSConnectionException(msg) + return connected + except TTSConnectionException: + raise + except Exception as e: + logger.error(f"TTS connectivity check failed: {str(e)}") + raise TTSConnectionException(f"TTS connectivity check failed: {str(e)}") from e + async def check_voice_connectivity( self, model_type: str, @@ -253,39 +572,61 @@ async def check_voice_connectivity( Check voice service connectivity based on model type. Args: - model_type: Type of model to check ('stt' only) + model_type: Type of model to check ('stt' or 'tts') stt_config: Optional STT configuration dict Returns: - bool: True if the service is connected, False otherwise + bool: True if the specified service is connected, False otherwise Raises: VoiceServiceException: If model_type is invalid STTConnectionException: If STT connectivity check fails + TTSConnectionException: If TTS connectivity check fails """ - if model_type != "stt": - logger.error(f"Unsupported model type: {model_type}") - raise VoiceServiceException(f"Unsupported model type: {model_type}") - try: - model_factory = stt_config.get("model_factory") if stt_config else None - api_key = stt_config.get("api_key") if stt_config else None - model_appid = stt_config.get("model_appid") if stt_config else None - access_token = stt_config.get("access_token") if stt_config else None - language = stt_config.get("language", "zh") if stt_config else "zh" - model = stt_config.get("model", "qwen3-asr-flash-realtime") if stt_config else "qwen3-asr-flash-realtime" - base_url = stt_config.get("base_url") if stt_config else None - - return await self.check_stt_connectivity( - model_factory=model_factory, - api_key=api_key, - model_appid=model_appid, - access_token=access_token, - language=language, - model=model, - base_url=base_url - ) - except STTConnectionException: + if model_type == 'stt': + model_factory = stt_config.get("model_factory") if stt_config else None + api_key = stt_config.get("api_key") if stt_config else None + model_appid = stt_config.get("model_appid") if stt_config else None + access_token = stt_config.get("access_token") if stt_config else None + language = stt_config.get("language", "zh") if stt_config else "zh" + model = stt_config.get("model", "qwen3-asr-flash-realtime") if stt_config else "qwen3-asr-flash-realtime" + base_url = stt_config.get("base_url") if stt_config else None + + return await self.check_stt_connectivity( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + language=language, + model=model, + base_url=base_url + ) + elif model_type == 'tts': + model_factory = stt_config.get("model_factory") if stt_config else None + api_key = stt_config.get("api_key") if stt_config else None + model_appid = stt_config.get("model_appid") if stt_config else None + access_token = stt_config.get("access_token") if stt_config else None + speed_ratio = float(stt_config.get("speed_ratio", 1.0)) if stt_config else 1.0 + base_url = stt_config.get("base_url") if stt_config else None + model = stt_config.get("model", "qwen3-tts-flash") if stt_config else "qwen3-tts-flash" + + connected = await self.check_tts_connectivity( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + speed_ratio=speed_ratio, + base_url=base_url, + model=model + ) + if not connected: + raise TTSConnectionException("TTS service connectivity check returned False") + return connected + else: + logger.error(f"Unknown model type: {model_type}") + raise VoiceServiceException(f"Unknown model type: {model_type}") + except (STTConnectionException, TTSConnectionException): raise except Exception as e: logger.error(f"Voice service connectivity check failed: {str(e)}") @@ -297,7 +638,12 @@ async def check_voice_connectivity( def get_voice_service() -> VoiceService: - """Get the global voice service instance.""" + """ + Get the global voice service instance + + Returns: + VoiceService: The global voice service instance + """ global _voice_service_instance if _voice_service_instance is None: _voice_service_instance = VoiceService() diff --git a/backend/utils/auth_utils.py b/backend/utils/auth_utils.py index 543d49693..04e81e6e3 100644 --- a/backend/utils/auth_utils.py +++ b/backend/utils/auth_utils.py @@ -3,13 +3,15 @@ import hmac import hashlib from datetime import datetime, timedelta -from typing import Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple import jwt from fastapi import Request from supabase import create_client from consts.const import ( + ASSET_OWNER_ROLE, + ASSET_OWNER_TENANT_ID, DEFAULT_TENANT_ID, DEFAULT_USER_ID, IS_SPEED_MODE, @@ -99,7 +101,8 @@ def verify_aksk_signature( if access_key != expected_access_key: return False - expected_sig = calculate_hmac_signature(secret_key, access_key, timestamp, body) + expected_sig = calculate_hmac_signature( + secret_key, access_key, timestamp, body) return hmac.compare_digest(expected_sig, signature) @@ -228,6 +231,24 @@ def get_user_and_tenant_by_access_key(access_key: str) -> Dict[str, str]: } +def resolve_tenant_id_from_user_tenant_record(user_tenant: Dict[str, Any]) -> str: + """ + Resolve the effective tenant_id from a user_tenant_t record. + + ASSET_OWNER users may have an empty legacy tenant_id; map them to the + virtual ASSET_OWNER tenant. Fall back to DEFAULT_TENANT_ID when unset. + """ + tenant_id = user_tenant.get("tenant_id") + if tenant_id: + return tenant_id + + user_role = (user_tenant.get("user_role") or "").upper() + if user_role == ASSET_OWNER_ROLE: + return ASSET_OWNER_TENANT_ID + + return DEFAULT_TENANT_ID + + def get_supabase_client(): """Get Supabase client instance with regular key (user-context operations).""" try: @@ -263,7 +284,8 @@ def get_jwt_expiry_seconds(token: str) -> int: return 10 * 365 * 24 * 60 * 60 # Ensure token is pure JWT, remove possible Bearer prefix jwt_token = ( - token.replace("Bearer ", "") if token.startswith("Bearer ") else token + token.replace("Bearer ", "") if token.startswith( + "Bearer ") else token ) # If debug expiration time is set, return directly for quick debugging @@ -372,7 +394,8 @@ def get_current_user_id(authorization: Optional[str] = None) -> tuple[str, str]: """ # In speed mode, allow unauthenticated access with default user for demo/dev if IS_SPEED_MODE: - logging.debug("Speed mode detected - returning default user ID and tenant ID") + logging.debug( + "Speed mode detected - returning default user ID and tenant ID") return DEFAULT_USER_ID, DEFAULT_TENANT_ID # In normal mode, missing auth header means unauthorized - return 401, not default user diff --git a/backend/utils/context_utils.py b/backend/utils/context_utils.py new file mode 100644 index 000000000..740bf66df --- /dev/null +++ b/backend/utils/context_utils.py @@ -0,0 +1,1332 @@ +"""Context component building utilities for system prompt assembly. + +Provides build_context_components() to convert agent configuration data +into ContextComponent instances for use with ContextManager. + +This module implements the piecewise component architecture where each +semantic section of the system prompt is emitted by a dedicated function, +allowing ContextManager to assemble them in the correct order. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +if TYPE_CHECKING: + from nexent.core.agents.agent_model import ( + ContextComponent, + ToolsComponent, + SkillsComponent, + MemoryComponent, + KnowledgeBaseComponent, + ManagedAgentsComponent, + ExternalAgentsComponent, + SystemPromptComponent, + ToolConfig, + AgentConfig, + ExternalA2AAgentConfig, + ) + + +# ============================================================================= +# SECTION 1: Long-text format functions (expanded from Jinja2 templates) +# Each function accepts language and is_manager params for variant-specific text +# ============================================================================= + + +def _format_memory_context( + memory_list: List[Any], + language: str = "zh", +) -> str: + """Format memory search results with full usage guidelines. + + Jinja2 templates have ~30 lines of "记忆使用准则" text that must be + included here for semantic equivalence. + """ + if not memory_list: + return "" + + # Group memories by level in correct order: tenant, user_agent, user, agent + level_order = ["tenant", "user_agent", "user", "agent"] + memory_by_level: Dict[str, List[Any]] = {} + for mem in memory_list: + if isinstance(mem, dict): + level = mem.get("memory_level", "user") + if level not in memory_by_level: + memory_by_level[level] = [] + memory_by_level[level].append(mem) + + lines = [] + + if language == "zh": + lines.append("### 上下文记忆") + lines.append("基于之前的交互记录,以下是按作用域和重要程度排序的最相关记忆:") + lines.append("") + + for level in level_order: + if level in memory_by_level: + level_title = { + "tenant": "Tenant", + "user_agent": "User_agent", + "user": "User", + "agent": "Agent", + }.get(level, level.title()) + lines.append(f"**{level_title} 层级记忆:**") + for item in memory_by_level[level]: + content = item.get("memory", "") or item.get("content", "") + score = item.get("score", 0.0) + lines.append(f"- {content} `({score:.2f})`") + lines.append("") + + lines.append("**记忆使用准则:**") + lines.append("1. **冲突处理优先级**:当记忆信息存在矛盾时,严格按以下顺序处理:") + lines.append("- **最优先**:在上述列表中位置靠前的记忆具有优先权") + lines.append("- **次优先**:当前对话内容与记忆直接冲突时,以当前对话为准") + lines.append("- **次优先**:相关度分数越高,表示记忆越可信") + lines.append("") + lines.append("2. **记忆整合最佳实践**:") + lines.append(" - 自然地将相关记忆融入回答中,避免显式使用\"根据记忆\"、\"根据上下文\"或\"根据交互记忆\"等语言") + lines.append(" - 利用记忆信息调整回答的语调、方式和技术深度以适应用户") + lines.append(" - 让记忆指导您对用户偏好和上下文的理解") + lines.append("") + lines.append("3. **级别特定说明**:") + lines.append(" - **tenant(租户级)**:组织层面的约束和政策(不可违背)") + lines.append(" - **user_agent(用户-代理级)**:特定用户在代理中的交互模式和既定工作流程") + lines.append(" - **user(用户级)**:用户的个人偏好、技能水平和历史上下文") + lines.append(" - **agent(代理级)**:您的既定行为模式和能力特征,通常对所有用户共享(重要性最低)") + else: + lines.append("### Contextual Memory") + lines.append("Based on previous interactions, here are the most relevant memories organized by scope and importance:") + lines.append("") + + for level in level_order: + if level in memory_by_level: + lines.append(f"**{level.title()} Level Memory:**") + for item in memory_by_level[level]: + content = item.get("memory", "") or item.get("content", "") + score = item.get("score", 0.0) + lines.append(f"- {content} `({score:.2f})`") + lines.append("") + + lines.append("**Memory Usage Guidelines:**") + lines.append("1. **Conflict Resolution Priority**: When memories contradict each other, follow this strict order:") + lines.append(" - **Primary**: Information appearing EARLIER in the above numbered list takes precedence") + lines.append(" - **Secondary**: Current conversation context overrides historical memory when directly contradicted") + lines.append(" - **Tertiary**: Higher relevance scores indicate more trustworthy information") + lines.append("") + lines.append("2. **Memory Integration Best Practices**:") + lines.append(" - Seamlessly weave relevant memories into your responses without explicitly saying \"I remember\", \"based on memory\" or \"based on context\"") + lines.append(" - Use memories to inform your tone, approach, and technical level appropriate for this user") + lines.append(" - Let memories guide your assumptions about user preferences and context") + lines.append("") + lines.append("3. **Level-Specific Considerations**:") + lines.append(" - **tenant**: Organizational constraints and policies (non-negotiable)") + lines.append(" - **user_agent**: Specific interaction dynamics and established workflow patterns") + lines.append(" - **user**: Individual preferences, skills, and historical context") + lines.append(" - **agent**: Your established behavioral patterns and capabilities, usually shared by all users (least important)") + + return "\n".join(lines) + + +def _format_skills_description( + skills: List[Dict[str, str]], + language: str = "zh", +) -> str: + """Format skill descriptions with full 6-step usage process. + + Jinja2 templates have ~50 lines of "技能使用流程" text that must be + included here for semantic equivalence. + """ + if not skills: + return "" + + lines = [] + + # Build the block + skills_block_lines = [""] + for skill in skills: + name = skill.get("name", "") + desc = skill.get("description", "") + skills_block_lines.append(" ") + skills_block_lines.append(f" {name}") + skills_block_lines.append(f" {desc}") + skills_block_lines.append(" ") + skills_block_lines.append("") + skills_block = "\n".join(skills_block_lines) + + if language == "zh": + lines.append("### 可用技能") + lines.append("") + lines.append("你拥有以下技能(Skills)。技能是预定义的专业能力模块,包含详细执行指南和可选的附加脚本。") + lines.append("") + lines.append(skills_block) + lines.append("") + lines.append("**技能使用流程**:") + lines.append("1. 收到用户请求后,首先审视 `` 中每个技能的 description,判断是否有匹配的技能。") + lines.append("2. **加载技能**:根据不同场景选择读取方式:") + lines.append(" - **首次加载**:调用 `read_skill_md(\"skill_name\")` 读取技能的完整执行指南(默认读取 SKILL.md)") + lines.append(" - **精确读取**:如只需特定文件(如示例、参考文档),可指定 additional_files:") + lines.append(" ") + lines.append(" skill_content = read_skill_md(\"skill_name\", [\"examples.md\", \"reference/api_doc\"])") + lines.append(" print(skill_content)") + lines.append(" ") + lines.append(" 注意:当 additional_files 非空时,默认不再自动读取 SKILL.md,如需同时读取请显式指定。") + lines.append("") + lines.append(" - **加载技能配置**:如果技能需要读取配置变量,可先调用 `read_skill_config(\"skill_name\")` 读取配置字符串,通过 `json.loads` 方法转化为配置字典,再从中获取所需值:") + lines.append(" ") + lines.append(" import json") + lines.append(" config = json.loads(read_skill_config(\"skill_name\"))") + lines.append(" # 返回示例: {\"key_a\": {\"key2\": \"value2\"}, \"others\": {...}}") + lines.append(" value = config[\"key1\"][\"key2\"]") + lines.append(" print(value)") + lines.append(" ") + lines.append("") + lines.append("3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的流程。") + lines.append("") + lines.append("4. **执行技能脚本**:如果技能指南中引用了附加脚本(形如 ``),使用以下格式调用:") + lines.append(" 代码:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" 对于需要附加参数的脚本,需要参照脚本调用说明,将参数直接以字符串形式传递。") + lines.append(" 例如对于希望附加的参数:--param1 value1 --flag,则使用以下格式调用run_skill_script:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\", \"--param1 value1 --flag\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" 注意:只执行技能指南中明确声明的脚本路径,绝不自行构造脚本路径。") + lines.append("") + lines.append("5. **整合输出**:根据技能指南要求的输出格式,结合脚本执行结果生成最终回答。") + lines.append("") + lines.append("6. **引用场景处理**:当技能内容中出现引用标记或需要引用其他文件时,需要识别并再次调用 read_skill_md:") + lines.append(" - **引用模板识别**:注意技能内容中形如 `` 或自然语言式的引用声明(如\"详见 examples.md\"、\"请参考 reference/api_doc\")") + lines.append(" - **自动补全**:发现引用后,尝试读取被引用的文件获取更多信息") + lines.append(" - **示例**:") + lines.append(" ") + lines.append(" # 技能内容提示\"请参考 examples.md 获取详细示例\"") + lines.append(" additional_info = read_skill_md(\"skill_name\", [\"examples.md\"])") + lines.append(" print(additional_info)") + lines.append(" ") + else: + lines.append("### Available Skills") + lines.append("") + lines.append("You have the following Skills. Skills are predefined professional capability modules with detailed execution guides and optional additional scripts.") + lines.append("") + lines.append(skills_block) + lines.append("") + lines.append("**Skill Usage Process**:") + lines.append("1. After receiving a user request, first examine the description of each skill in `` to determine if there is a matching skill.") + lines.append("2. **Load Skill**: Choose the appropriate reading method based on the scenario:") + lines.append(" - **First-time load**: Call `read_skill_md(\"skill_name\")` to read the complete execution guide (defaults to reading SKILL.md)") + lines.append(" - **Precise read**: If you only need specific files (like examples, reference docs), specify additional_files:") + lines.append(" ") + lines.append(" skill_content = read_skill_md(\"skill_name\", [\"examples.md\", \"reference/api_doc\"])") + lines.append(" print(skill_content)") + lines.append(" ") + lines.append(" Note: When additional_files is non-empty, SKILL.md is no longer auto-read. If you need both, explicitly specify it.") + lines.append("") + lines.append(" - **Load skill config**: If the skill needs configuration variables, call `read_skill_config(\"skill_name\")` to read the config string, convert to dict via `json.loads`, then access values:") + lines.append(" ") + lines.append(" import json") + lines.append(" config = json.loads(read_skill_config(\"skill_name\"))") + lines.append(" # Example: {\"key_a\": {\"key2\": \"value2\"}, \"others\": {...}}") + lines.append(" value = config[\"key1\"][\"key2\"]") + lines.append(" print(value)") + lines.append(" ") + lines.append("") + lines.append("3. **Follow Skill Guide**: After skill content is injected, strictly follow its steps. Do not skip steps or replace with your own code.") + lines.append("") + lines.append("4. **Execute Skill Script**: If the skill guide references additional scripts (like ``), call:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" For scripts needing extra params, pass them as a command-line string per the script's calling instructions.") + lines.append(" Example for --param1 value1 --flag:") + lines.append(" ") + lines.append(" result = run_skill_script(\"skill_name\", \"script_path\", \"--param1 value1 --flag\")") + lines.append(" print(result)") + lines.append(" ") + lines.append(" Note: Only execute script paths explicitly declared in the skill guide. Never construct paths yourself.") + lines.append("") + lines.append("5. **Integrate Output**: Generate the final answer based on the skill guide's output format and script execution results.") + lines.append("") + lines.append("6. **Handle References**: When the skill content has reference markers or needs to reference other files, identify and call read_skill_md again:") + lines.append(" - **Reference template recognition**: Look for patterns like `` or natural-language references (\"see examples.md\", \"refer to reference/api_doc\")") + lines.append(" - **Auto-complete**: After discovering a reference, try reading the referenced file for more info") + lines.append(" - **Example**:") + lines.append(" ") + lines.append(" # Skill content says \"see examples.md for detailed examples\"") + lines.append(" additional_info = read_skill_md(\"skill_name\", [\"examples.md\"])") + lines.append(" print(additional_info)") + lines.append(" ") + + return "\n".join(lines) + + +def _format_tools_description( + tools: Dict[str, Any], + knowledge_base_summary: Optional[str] = None, + language: str = "zh", + is_manager: bool = True, +) -> str: + """Format tool descriptions with file URL usage guide. + + Jinja2 templates have ~10 lines of "文件链接使用指南" text that must be + included here for semantic equivalence. + + Note: Managed agents use different presigned_url guidance than manager agents. + """ + if not tools: + no_tools_msg = "- 当前没有可用的工具" if language == "zh" else "- No tools are currently available" + return no_tools_msg + + lines = [] + + if language == "zh": + lines.append("- 你只能使用以下工具,不得使用任何其他工具:") + else: + lines.append("- You can only use the following tools and may not use any other tools:") + + for name, tool in tools.items(): + if hasattr(tool, 'description'): + desc = tool.description + inputs = tool.inputs + output_type = tool.output_type + source = getattr(tool, 'source', 'local') + else: + desc = tool.get('description', '') + inputs = tool.get('inputs', '') + output_type = tool.get('output_type', '') + source = tool.get('source', 'local') + + # MCP tools have [MCP] prefix + if source == 'mcp': + if language == "zh": + lines.append(f"- [MCP] {name}: {desc}") + lines.append(f" 接受输入: {inputs}") + lines.append(f" 返回输出类型: {output_type}") + else: + lines.append(f"- [MCP] {name}: {desc}") + lines.append(f" Accepts input: {inputs}") + lines.append(f" Returns output type: {output_type}") + else: + if language == "zh": + lines.append(f"- {name}: {desc}") + lines.append(f" 接受输入: {inputs}") + lines.append(f" 返回输出类型: {output_type}") + else: + lines.append(f"- {name}: {desc}") + lines.append(f" Accepts input: {inputs}") + lines.append(f" Returns output type: {output_type}") + + # Knowledge base summary + if knowledge_base_summary: + if language == "zh": + lines.append("- knowledge_base_search工具只能使用以下知识库索引,请根据用户问题选择最相关的一个或多个知识库索引:") + lines.append(f" {knowledge_base_summary}") + else: + lines.append("- knowledge_base_search tool can only use the following knowledge base indexes, please select the most relevant one or more knowledge base indexes based on the user's question:") + lines.append(f" {knowledge_base_summary}") + + # File URL usage guide + lines.append("") + if language == "zh": + lines.append("### 文件链接使用指南") + lines.append("当处理用户上传的文件时,请根据工具类型选择正确的 URL:") + lines.append("1. **调用标记为 [MCP] 的工具**(外部工具,运行在 Nexent 之外):") + if is_manager: + lines.append(" → 使用 **Download URL**(格式:`https://minio.example.com/...?token=xxx`)") + lines.append(" 原因:MCP 工具运行在外部服务,无法访问内部 S3 存储") + else: + lines.append(" → 使用 **presigned_url**(已包含代理前缀,格式:`http://.../api/nb/v1/file/fetch?presigned_url=...`)") + lines.append(" 直接使用用户上传文件信息中提供的 **presigned_url** 字段,无需拼接。") + lines.append("2. **调用其他所有工具**(内部工具,如 analyze_text_file、analyze_image 等):") + lines.append(" → 使用 **S3 URL**(格式:`s3:/nexent/attachments/xxx.pdf`)") + lines.append(" 原因:内部工具运行在 Nexent 内部,可以直接访问 MinIO 存储") + else: + lines.append("### File URL Usage Guide") + lines.append("When processing user-uploaded files, choose the correct URL based on tool type:") + lines.append("1. **Calling tools marked with [MCP]** (external tools that run outside Nexent):") + if is_manager: + lines.append(" → Use **Download URL** (format: `https://minio.example.com/...?token=xxx`)") + lines.append(" Reason: MCP tools run on external services and cannot access internal S3 storage") + else: + lines.append(" → Use **presigned_url** (already includes proxy prefix, format: `http://.../api/nb/v1/file/fetch?presigned_url=...`)") + lines.append(" Directly use the **presigned_url** field provided in the user's uploaded file info. No need to construct or append anything.") + lines.append("2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image):") + lines.append(" → Use **S3 URL** (format: `s3:/nexent/attachments/xxx.pdf`)") + lines.append(" Reason: Internal tools run inside Nexent and can directly access MinIO storage") + + return "\n".join(lines) + + +def _format_managed_agents_description( + managed_agents: Dict[str, Any], + language: str = "zh", +) -> str: + """Format managed sub-agent descriptions with calling specifications. + + Jinja2 templates have ~15 lines of "内部助手调用规范" text that must be + included here for semantic equivalence. + """ + if not managed_agents: + return "" + + lines = [] + + if language == "zh": + lines.append("你可以使用以下内部助手(通过函数调用方式协作):") + for name, agent in managed_agents.items(): + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("内部助手调用规范:") + lines.append(" 1. 调用方式:") + lines.append(" - 接受输入:{\"task\": {\"type\": \"string\", \"description\": \"任务描述\"}}") + lines.append(" - 返回输出类型:{\"type\": \"string\", \"description\": \"执行结果\"}") + lines.append(" 2. 使用策略:") + lines.append(" - 任务分解:单次调用中不要让助手一次做过多的事情,任务拆分是你的工作,你需要将复杂任务分解为可管理的子任务") + lines.append(" - 专业匹配:根据助手的专长分配任务") + lines.append(" - 信息整合:整合不同助手的输出生成连贯解决方案") + lines.append(" - 效率优化:避免重复工作") + lines.append(" 3. 协作要求:") + lines.append(" - 评估助手返回的结果") + lines.append(" - 必要时提供额外指导或重新分配任务") + lines.append(" - 在助手结果基础上进行工作,避免重复工作") + lines.append(" - 注意保留子助手回答中的特殊符号,如索引溯源信息等") + else: + lines.append("You can use the following internal agents (via function calls):") + for name, agent in managed_agents.items(): + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("Internal agent calling specifications:") + lines.append(" 1. Calling method:") + lines.append(" - Accepts input: {\"task\": {\"type\": \"string\", \"description\": \"task description\"}}") + lines.append(" - Returns output type: {\"type\": \"string\", \"description\": \"execution result\"}") + lines.append(" 2. Usage strategy:") + lines.append(" - Task decomposition: Don't let agents do too many things in a single call, task breakdown is your job, you need to decompose complex tasks into manageable subtasks") + lines.append(" - Professional matching: Assign tasks based on agent expertise") + lines.append(" - Information integration: Integrate outputs from different agents to generate coherent solutions") + lines.append(" - Efficiency optimization: Avoid duplicate work") + lines.append(" 3. Collaboration requirements:") + lines.append(" - Evaluate agent returned results") + lines.append(" - Provide additional guidance or reassign tasks when necessary") + lines.append(" - Work based on agent results, avoid duplicate work") + lines.append(" - Pay attention to preserving special symbols in sub-agent answers, such as index traceability information") + + return "\n".join(lines) + + +def _format_external_agents_description( + external_a2a_agents: Dict[str, Any], + language: str = "zh", +) -> str: + """Format external A2A agent descriptions with calling specifications. + + Jinja2 templates have ~5 lines of "外部助手调用规范" text that must be + included here for semantic equivalence. + """ + if not external_a2a_agents: + return "" + + lines = [] + + if language == "zh": + lines.append("你还可以使用以下外部助手(通过 A2A 协议远程调用):") + for agent_id, agent in external_a2a_agents.items(): + name = agent.name if hasattr(agent, 'name') else agent.get('name', '') + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("外部助手调用规范:") + lines.append(" 1. 调用格式:`agent_name(task=\"自然语言任务描述\")`,注意:只需要 task 参数,不需要其他参数") + lines.append(" 2. 例如:`tool_assistant(task=\"北京天气怎么样\")`") + lines.append(" 3. 任务描述使用自然语言,让外部助手自动识别和处理") + else: + lines.append("You can also use the following external agents (called via A2A protocol remotely):") + for agent_id, agent in external_a2a_agents.items(): + name = agent.name if hasattr(agent, 'name') else agent.get('name', '') + desc = agent.description if hasattr(agent, 'description') else agent.get('description', '') + lines.append(f" - {name}: {desc}") + lines.append("") + lines.append("External agent calling specifications:") + lines.append(" 1. Call format: `agent_name(task=\"natural language task description\")`, NOTE: only task parameter is needed, no other parameters") + lines.append(" 2. Example: `tool_assistant(task=\"What's the weather in Beijing?\")`") + lines.append(" 3. Use natural language for task description, let the external agent handle the rest") + + return "\n".join(lines) + + +def _format_skills_usage_requirements( + skills: List[Dict[str, str]], + language: str = "zh", +) -> str: + """Format skills usage requirements section. + + This is the "技能使用要求" section that appears after the skills reference + in the Available Resources section. + """ + if not skills: + no_skills_msg = "- 当前没有可用的技能" if language == "zh" else "- No skills are currently available" + return no_skills_msg + + lines = [] + + if language == "zh": + lines.append("- 你拥有上述 `` 中列出的技能。技能中引用的脚本通过 `run_skill_script()` 函数调用,该函数由平台提供,不需要导入。") + lines.append("") + lines.append("### 技能使用要求") + lines.append("1. **技能优先**:如果用户请求匹配了某个技能的 description,必须先调用 `read_skill_md()` 加载技能指南,再按指南执行。不得跳过技能自行编写代码解决。") + lines.append("2. **忠实执行**:读取技能内容后,严格按技能指南中的步骤操作。不要自行修改流程、跳过步骤或用通用代码替代技能定义的流程。") + lines.append("3. **脚本调用规范**:只使用 `run_skill_script` 工具执行技能指南中明确要求的脚本。传入的 `skill_name` 和 `script_path` 必须与技能指南中的声明完全一致,不要自行拼接或猜测路径。如果需要附加参数,将参数以命令行字符串形式传递给`run_skill_script`。") + lines.append("4. **失败回退**:如果 `read_skill_md` 返回错误或 `run_skill_script` 执行失败,向用户说明情况,并尝试用通用推理模式提供替代方案。") + lines.append("5. **技能组合**:如果一个任务需要多个技能配合,按逻辑依赖顺序依次加载和执行,前一个技能的输出可作为后一个技能的输入。") + else: + lines.append("- You have the skills listed in `` above. Scripts referenced in skills are called via the `run_skill_script()` function, which is provided by the platform and does not need to be imported.") + lines.append("") + lines.append("### Skill Usage Requirements") + lines.append("1. **Skill Priority**: If a user request matches a skill's description, you must first call `read_skill_md()` to load the skill guide, then execute per the guide. Do not skip skills and write your own code.") + lines.append("2. **Faithful Execution**: After reading skill content, strictly follow the skill guide's steps. Do not modify the flow, skip steps, or replace with generic code.") + lines.append("3. **Script Calling Specification**: Only use `run_skill_script` to execute scripts explicitly required in the skill guide. The `skill_name` and `script_path` must match the skill guide's declaration exactly. Do not construct or guess paths. For extra params, pass them as a command-line string to `run_skill_script`.") + lines.append("4. **Failure Fallback**: If `read_skill_md` returns an error or `run_skill_script` fails, explain to the user and try to provide an alternative via general reasoning mode.") + lines.append("5. **Skill Combination**: If a task needs multiple skills, load and execute in logical dependency order. The output of one skill can be input to the next.") + + return "\n".join(lines) + + +def _format_agent_fallback( + managed_agents: Dict[str, Any], + external_a2a_agents: Dict[str, Any], + language: str = "zh", +) -> str: + """Format fallback message when no agents are available.""" + if managed_agents or external_a2a_agents: + return "" + + return "- 当前没有可用的助手" if language == "zh" else "- No agents are currently available" + + +def _format_app_context(app_name: str, app_description: str, user_id: str, time_str: str) -> str: + """Format application context for system prompt injection.""" + lines = [ + f"Application: {app_name}", + f"Description: {app_description}", + f"Current user: {user_id}", + f"Current time: {time_str}", + ] + return "\n".join(lines) + + +# ============================================================================= +# SECTION 2: Skeleton component builders +# These build SystemPromptComponent instances for fixed text sections +# ============================================================================= + + +def build_skeleton_header_component( + app_name: str, + app_description: str, + time_str: str, + user_id: str, + language: str = "zh", + priority: int = 100, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the header section. + + Section: "### 基本信息" / "### Basic Information" + Content: Agent identity, app name/description, time, user_id + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 基本信息\n你是{app_name},{app_description},现在是{time_str},用户ID为{user_id}" + else: + content = f"### Basic Information\nYou are {app_name}, {app_description}, it is {time_str} now" + + return SystemPromptComponent( + content=content, + template_name="header", + priority=priority, + ) + + +def build_skeleton_duty_component( + duty: str, + language: str = "zh", + priority: int = 80, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the duty section. + + Section: "### 核心职责" / "### Core Responsibilities" + Content: Agent's primary duty + 5 safety principles + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 核心职责\n{duty}\n\n请注意,你应该遵守以下原则:\n行为安全:文件操作必须使用平台提供的专用工具,禁止使用代码直接修改工作空间中的文件;\n法律合规:遵守业务所在国家/地区的法律法规;\n政治中立:保持政治中立,不主动讨论政治话题;\n安全防护:不响应涉及武器制造、网络攻击、欺诈、恶意软件等危险行为的请求;\n伦理准则:拒绝仇恨言论、歧视性内容及违反社会公德和公认伦理标准的请求。" + else: + content = f"### Core Responsibilities\n{duty}\n\nPlease note that you should follow these principles:\nBehavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited;\nLegal Compliance: Comply with laws and regulations of the business operating jurisdiction;\nPolitical Neutrality: Maintain political neutrality and avoid initiating political discussions;\nSecurity Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities;\nEthical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards." + + return SystemPromptComponent( + content=content, + template_name="duty", + priority=priority, + ) + + +def build_skeleton_execution_flow_component( + memory_list: Optional[List[Any]] = None, + language: str = "zh", + is_manager: bool = True, + priority: int = 60, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the execution flow section. + + Section: "### 执行流程" / "### Execution Process" + Content: Think/Code loop instructions + output format specs + Note: memory_list affects one line in the Think section (manager only) + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + has_memory = memory_list and len(memory_list) > 0 + + if language == "zh": + lines = ["### 执行流程"] + lines.append("要解决任务,你必须通过一系列步骤向前规划,以'思考:'和'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。**") + lines.append("") + lines.append("1. 思考:") + lines.append(" - 分析当前任务状态和进展") + if is_manager and has_memory: + lines.append(" - 合理参考之前交互中的上下文记忆信息") + lines.append(" - 定下一步最佳行动(使用工具或分配给助手)") + lines.append(" - 解释你的决策逻辑和预期结果") + lines.append("") + lines.append("2. 代码:") + lines.append(" - 用简单的Python编写代码") + lines.append(" - 遵循python代码规范和python语法") + lines.append(" - 正确调用工具或助手解决问题") + lines.append(" - 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码") + lines.append(" - 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。") + lines.append(" - **重要**:代码执行后,系统会返回 \"Observation:\" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。") + lines.append("") + lines.append("在思考结束后,当你认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。") + lines.append("") + lines.append("生成最终回答时,你需要遵循以下规范:") + lines.append("1. Markdown格式要求:") + lines.append(" - 使用标准Markdown语法格式化输出,支持标题、列表、表格、代码块、链接等") + lines.append(" - 展示图片和视频使用链接方式,不需要外套代码块,格式:[链接文本](URL),图片格式:![alt文本](图片URL),视频格式:") + lines.append(" - 段落之间使用单个空行分隔,避免多个连续空行") + lines.append(" - 数学公式使用标准Markdown格式:行内公式用 $公式$,块级公式用 $$公式$$") + lines.append("") + lines.append("2. 引用标记规范(仅在使用了检索工具时):") + lines.append(" - 引用标记格式必须严格为:`[[字母+数字]]`,例如:`[[a1]]`、`[[b2]]`、`[[c3]]`") + lines.append(" - 字母部分必须是单个小写字母(a-e),数字部分必须是整数") + lines.append(" - 引用标记的字母和数字必须与检索工具的检索结果一一对应") + lines.append(" - 引用标记应紧跟在相关信息或句子之后,通常放在句末或段落末尾") + lines.append(" - 多个引用标记可以连续使用,例如:`[[a1]][[b2]]`") + lines.append(" - **重要**:仅添加引用标记,不要添加链接、参考文献列表等多余内容") + lines.append(" - 如果检索结果中没有匹配的引用,则不显示该引用标记") + lines.append("") + lines.append("3. 格式细节要求:") + lines.append(" - 避免在Markdown中使用HTML标签,优先使用Markdown原生语法") + lines.append(" - 代码块中的代码应保持原始格式,不要添加额外的转义字符") + lines.append(" - 若未使用检索工具,则不添加任何引用标记") + else: + lines = ["### Execution Process"] + lines.append("To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.**") + lines.append("") + lines.append("1. Think:") + lines.append(" - Analyze current task status and progress") + if is_manager and has_memory: + lines.append(" - Reference relevant contextual memories from previous interactions when applicable") + lines.append(" - Determine the best next action (use tools or delegate to agents)") + lines.append(" - Explain your decision logic and expected results") + lines.append("") + lines.append("2. Code:") + lines.append(" - Write code in simple Python") + lines.append(" - Follow Python coding standards and Python syntax") + lines.append(" - Correctly call tools or agents to solve problems") + lines.append(" - To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code") + lines.append(" - Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code.") + lines.append(" - **IMPORTANT**: After code execution, the system will return content with \"Observation:\" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.**") + lines.append("") + lines.append("After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop.") + lines.append("") + lines.append("When generating the final answer, you need to follow these specifications:") + lines.append("1. **Markdown Format Requirements**:") + lines.append(" - Use standard Markdown syntax to format your output, supporting headings, lists, tables, code blocks, and links.") + lines.append(" - Display images and videos using links instead of wrapping them in code blocks. Use `[link text](URL)` for links, `![alt text](image URL)` for images, and `` for videos.") + lines.append(" - Use a single blank line between paragraphs, avoid multiple consecutive blank lines") + lines.append(" - Mathematical formulas use standard Markdown format: inline formulas use $formula$, block formulas use $$formula$$") + lines.append("") + lines.append("2. **Reference Mark Specifications** (only when retrieval tools are used):") + lines.append(" - Reference mark format must strictly be: `[[letter+number]]`, for example: `[[a1]]`, `[[b2]]`, `[[c3]]`") + lines.append(" - The letter part must be a single lowercase letter (a-e), the number part must be an integer") + lines.append(" - The letters and numbers of reference marks must correspond one-to-one with the retrieval results of retrieval tools") + lines.append(" - Reference marks should be placed immediately after relevant information or sentences, usually at the end of sentences or paragraphs") + lines.append(" - Multiple reference marks can be used consecutively, for example: `[[a1]][[b2]]`") + lines.append(" - **Important**: Only add reference marks, do not add links, reference lists, or other extraneous content") + lines.append(" - If there is no matching reference in the retrieval results, do not display that reference mark") + lines.append("") + lines.append("3. **Format Detail Requirements**:") + lines.append(" - Avoid using HTML tags in Markdown, prioritize native Markdown syntax") + lines.append(" - Code in code blocks should maintain original format, do not add extra escape characters") + lines.append(" - If no retrieval tools are used, do not add any reference marks") + + content = "\n".join(lines) + + return SystemPromptComponent( + content=content, + template_name="execution_flow", + priority=priority, + ) + + +def build_skeleton_constraint_component( + constraint: str, + language: str = "zh", + priority: int = 30, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the constraint section. + + Section: "### 资源使用要求" / "### Resource Usage Requirements" + Content: User-defined constraint text + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 资源使用要求\n{constraint}" + else: + content = f"### Resource Usage Requirements\n{constraint}" + + return SystemPromptComponent( + content=content, + template_name="constraint", + priority=priority, + ) + + +def build_skeleton_code_norms_component( + language: str = "zh", + is_manager: bool = True, + priority: int = 20, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the Python code norms section. + + Section: "### python代码规范" / "### Python Code Specifications" + Content: 12 fixed code rules (11 for managed agents) + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + lines = ["### python代码规范"] + lines.append("1. 如果认为是需要执行的代码,使用'代码'格式;如果是不需要执行仅用于展示的代码,使用'代码'格式,其中语言类型例如python、java、javascript等;") + lines.append("2. 只使用已定义的变量,变量将在多次调用之间持续保持;") + lines.append("3. 使用\"print()\"函数让下一次的模型调用看到对应变量信息;") + lines.append("4. 正确使用工具/助手的入参,使用关键字参数,不要用字典形式;") + lines.append("5. 避免在一轮对话中进行过多的工具/助手调用,这会导致输出格式难以预测;") + lines.append("6. 只在需要时调用工具/助手,不重复相同参数的调用;") + lines.append("7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用\"print()\"来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误;") + lines.append("9. 示例中的代码避免出现**if**、**for**等逻辑,仅调用工具/助手,示例中的每一次的行动都是确定事件。如果有不同的条件,你应该给出不同条件下的示例;") + lines.append("10. 工具调用使用关键字参数,如:tool_name(param1=\"value1\", param2=\"value2\");") + if is_manager: + lines.append("11. 助手调用必须使用task参数,如:assistant_name(task=\"任务描述\");") + lines.append("12. 不要放弃!你负责解决任务,而不是提供解决方向。") + else: + lines = ["### Python Code Specifications"] + lines.append("1. If it is considered to be code that needs to be executed, use 'code'. If the code does not need to be executed for display only, use 'code', where language_type can be python, java, javascript, etc;") + lines.append("2. Only use defined variables, variables will persist between multiple calls;") + lines.append("3. Use \"print()\" function to let the next model call see corresponding variable information;") + lines.append("4. Use tool/agent input parameters correctly, use keyword arguments, not dictionary format;") + lines.append("5. Avoid making too many tool/agent calls in one round of conversation, as this will make the output format unpredictable;") + lines.append("6. Only call tools/agents when needed, do not repeat calls with the same parameters;") + lines.append("7. Use variable names to save function call results. In each intermediate step, you can use \"print()\" to save any important information you need. The saved information persists between code executions. The content printed by print() should be treated as a string, do not perform dictionary-related operations such as .get(), [] etc., to avoid type errors;") + lines.append("8. Avoid **if**, **for** and other logic in example code, only call tools/agents. Each action in the example is a deterministic event. If there are different conditions, you should provide examples under different conditions;") + lines.append("9. Tool calls use keyword arguments, such as: tool_name(param1=\"value1\", param2=\"value2\");") + if is_manager: + lines.append("10. Agent calls must use task parameter, such as: agent_name(task=\"task description\");") + lines.append("11. Don't give up! You are responsible for solving the task, not providing solution directions.") + + content = "\n".join(lines) + + return SystemPromptComponent( + content=content, + template_name="code_norms", + priority=priority, + ) + + +def build_skeleton_footer_component( + few_shots: str, + language: str = "zh", + priority: int = 10, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for the footer section. + + Section: "### 示例模板" + ending + Content: few_shots + "$1M reward" ending + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + if language == "zh": + content = f"### 示例模板\n{few_shots}\n\n现在开始!如果你正确解决任务,你将获得100万美元的奖励。" + else: + content = f"### Example Templates\n{few_shots}\n\nNow start! If you solve the task correctly, you will receive a reward of 1 million dollars." + + return SystemPromptComponent( + content=content, + template_name="footer", + priority=priority, + ) + + +# ============================================================================= +# SECTION 3: Piecewise component builders (existing, enhanced) +# ============================================================================= + + +def build_tools_component( + tools: Dict[str, Any], + knowledge_base_summary: Optional[str] = None, + language: str = "zh", + is_manager: bool = True, + priority: int = 50, +) -> "ToolsComponent": + """Build ToolsComponent from tool configurations. + + Args: + tools: Dict of tool name -> ToolConfig or tool dict + knowledge_base_summary: Summary text from knowledge bases + language: Language code ('zh' or 'en') + is_manager: Whether this is a manager agent + priority: Component priority for selection + + Returns: + ToolsComponent instance + """ + from nexent.core.agents.agent_model import ToolsComponent + + tool_list = [] + for name, tool in tools.items(): + if hasattr(tool, 'description'): + tool_dict = { + "name": name, + "description": tool.description, + "inputs": getattr(tool, 'inputs', ''), + "output_type": getattr(tool, 'output_type', ''), + "source": getattr(tool, 'source', 'local'), + } + else: + tool_dict = { + "name": name, + "description": tool.get('description', ''), + "inputs": tool.get('inputs', ''), + "output_type": tool.get('output_type', ''), + "source": tool.get('source', 'local'), + } + tool_list.append(tool_dict) + + formatted_desc = _format_tools_description( + tools, + knowledge_base_summary=knowledge_base_summary, + language=language, + is_manager=is_manager, + ) + return ToolsComponent( + tools=tool_list, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_skills_component( + skills: List[Dict[str, str]], + language: str = "zh", + priority: int = 70, +) -> "SkillsComponent": + """Build SkillsComponent from skill configurations. + + Args: + skills: List of skill dicts with name and description + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + SkillsComponent instance + """ + from nexent.core.agents.agent_model import SkillsComponent + + formatted_desc = _format_skills_description(skills, language=language) + return SkillsComponent( + skills=skills, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_memory_component( + memory_list: List[Any], + search_query: Optional[str] = None, + language: str = "zh", + priority: int = 90, +) -> "MemoryComponent": + """Build MemoryComponent from memory search results. + + Args: + memory_list: List of memory search results + search_query: Query used to search memory + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + MemoryComponent instance + """ + from nexent.core.agents.agent_model import MemoryComponent + + memories = [] + for mem in memory_list: + if isinstance(mem, dict): + memories.append({ + "content": mem.get('memory', '') or mem.get('content', ''), + "memory_type": mem.get('memory_type', 'user'), + "metadata": mem.get('metadata', {}), + }) + elif isinstance(mem, str): + memories.append({ + "content": mem, + "memory_type": "user", + "metadata": {}, + }) + + formatted_content = _format_memory_context(memory_list, language=language) + return MemoryComponent( + memories=memories, + formatted_content=formatted_content, + search_query=search_query, + priority=priority, + ) + + +def build_knowledge_base_component( + knowledge_base_summary: str, + kb_ids: Optional[List[str]] = None, + priority: int = 10, +) -> "KnowledgeBaseComponent": + """Build KnowledgeBaseComponent from knowledge base summary. + + Args: + knowledge_base_summary: Summary text from knowledge bases + kb_ids: List of knowledge base IDs used + priority: Component priority for selection + + Returns: + KnowledgeBaseComponent instance + """ + from nexent.core.agents.agent_model import KnowledgeBaseComponent + + return KnowledgeBaseComponent( + summary=knowledge_base_summary, + kb_ids=kb_ids or [], + priority=priority, + ) + + +def build_managed_agents_component( + managed_agents: Dict[str, Any], + language: str = "zh", + priority: int = 45, +) -> "ManagedAgentsComponent": + """Build ManagedAgentsComponent from managed sub-agent configurations. + + Args: + managed_agents: Dict of agent name -> AgentConfig + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + ManagedAgentsComponent instance + """ + from nexent.core.agents.agent_model import ManagedAgentsComponent + + agent_list = [] + for name, agent in managed_agents.items(): + if hasattr(agent, 'description'): + agent_dict = { + "name": name, + "description": agent.description, + "tools": [], + } + if hasattr(agent, 'tools'): + agent_dict["tools"] = [t.name for t in agent.tools if hasattr(t, 'name')] + else: + agent_dict = { + "name": name, + "description": agent.get('description', ''), + "tools": [], + } + agent_list.append(agent_dict) + + formatted_desc = _format_managed_agents_description(managed_agents, language=language) + return ManagedAgentsComponent( + agents=agent_list, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_external_agents_component( + external_a2a_agents: Dict[str, Any], + language: str = "zh", + priority: int = 44, +) -> "ExternalAgentsComponent": + """Build ExternalAgentsComponent from external A2A agent configurations. + + Args: + external_a2a_agents: Dict of agent_id -> ExternalA2AAgentConfig + language: Language code ('zh' or 'en') + priority: Component priority for selection + + Returns: + ExternalAgentsComponent instance + """ + from nexent.core.agents.agent_model import ExternalAgentsComponent + + agent_list = [] + for agent_id, agent in external_a2a_agents.items(): + if hasattr(agent, 'agent_id'): + agent_dict = { + "agent_id": str(agent.agent_id), + "name": agent.name, + "description": agent.description, + "url": getattr(agent, 'url', ''), + } + else: + agent_dict = { + "agent_id": str(agent_id), + "name": agent.get('name', ''), + "description": agent.get('description', ''), + "url": agent.get('url', ''), + } + agent_list.append(agent_dict) + + formatted_desc = _format_external_agents_description(external_a2a_agents, language=language) + return ExternalAgentsComponent( + agents=agent_list, + formatted_description=formatted_desc, + priority=priority, + ) + + +def build_system_prompt_component( + content: str, + template_name: Optional[str] = None, + priority: int = 100, +) -> "SystemPromptComponent": + """Build SystemPromptComponent with rendered content. + + Args: + content: Rendered system prompt content + template_name: Source template name for reference + priority: Component priority (highest by default) + + Returns: + SystemPromptComponent instance + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + return SystemPromptComponent( + content=content, + template_name=template_name, + priority=priority, + ) + + +def build_skills_usage_component( + skills: List[Dict[str, str]], + language: str = "zh", + priority: int = 40, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for skills usage requirements. + + This is a skeleton-like component but its content depends on + whether skills exist, so it's built dynamically. + + Args: + skills: List of skill dicts + language: Language code ('zh' or 'en') + priority: Component priority + + Returns: + SystemPromptComponent instance + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + content = _format_skills_usage_requirements(skills, language=language) + return SystemPromptComponent( + content=content, + template_name="skills_usage", + priority=priority, + ) + + +def build_agent_fallback_component( + managed_agents: Dict[str, Any], + external_a2a_agents: Dict[str, Any], + language: str = "zh", + priority: int = 5, +) -> "SystemPromptComponent": + """Build SystemPromptComponent for agent fallback message. + + Only emits content when no agents are available. + + Args: + managed_agents: Dict of managed agents + external_a2a_agents: Dict of external agents + language: Language code + priority: Component priority + + Returns: + SystemPromptComponent instance (may have empty content) + """ + from nexent.core.agents.agent_model import SystemPromptComponent + + content = _format_agent_fallback(managed_agents, external_a2a_agents, language=language) + return SystemPromptComponent( + content=content, + template_name="agent_fallback", + priority=priority, + ) + + +# ============================================================================= +# SECTION 4: Main assembly function - build_context_components +# ============================================================================= + + +def build_context_components( + # Raw params for piecewise assembly (NEW in Goal 3) + duty: Optional[str] = None, + constraint: Optional[str] = None, + few_shots: Optional[str] = None, + app_name: Optional[str] = None, + app_description: Optional[str] = None, + time_str: Optional[str] = None, + user_id: Optional[str] = None, + language: str = "zh", + is_manager: bool = True, + # Piecewise data sources + tools: Optional[Dict[str, Any]] = None, + skills: Optional[List[Dict[str, str]]] = None, + managed_agents: Optional[Dict[str, Any]] = None, + external_a2a_agents: Optional[Dict[str, Any]] = None, + memory_list: Optional[List[Any]] = None, + memory_search_query: Optional[str] = None, + knowledge_base_summary: Optional[str] = None, + kb_ids: Optional[List[str]] = None, + # Legacy param for fallback (removed short-circuit in Goal 3) + system_prompt: Optional[str] = None, + # Inclusion flags (kept for backward compatibility) + include_tools: bool = True, + include_skills: bool = True, + include_memory: bool = True, + include_knowledge_base: bool = True, + include_managed_agents: bool = True, + include_external_agents: bool = True, + include_app_context: bool = True, +) -> List["ContextComponent"]: + """Build list of ContextComponents from agent configuration data. + + Piecewise assembly: Each semantic section is emitted as a dedicated + ContextComponent, assembled in the exact order matching Jinja2 templates. + + Assembly order (12 sections): + 1. Header (基本信息) + 2. Memory (上下文记忆) - if memory_list exists + 3. Duty (核心职责 + 安全准则) + 4. Skills (可用技能 + 6步流程) - if skills exist + 5. Execution Flow (执行流程 + 输出规范) + 6. Tools (可用资源/1. 工具 + 文件链接指南) + 7. Managed Agents (可用资源/2. 助手) - if managed_agents exist + 8. External Agents (外部助手) - if external_a2a_agents exist + 9. Agent Fallback (当前没有可用的助手) - if no agents + 10. Skills Usage (可用资源/3. 技能 + 使用要求) + 11. Constraint (资源使用要求) + 12. Code Norms (python代码规范) + 13. Footer (示例模板 + 结尾) + + Note: The a330d815 short-circuit (if system_prompt: return [single]) + has been REMOVED. All callers must provide raw params for piecewise assembly. + The system_prompt param is kept for future fallback use but not currently + used in the piecewise path. + + Args: + duty: Agent's primary duty text + constraint: Resource usage constraint text + few_shots: Example templates text + app_name: Application name + app_description: Application description + time_str: Current time string + user_id: Current user ID + language: Language code ('zh' or 'en') + is_manager: Whether this is a manager agent + tools: Dict of tool name -> ToolConfig + skills: List of skill dicts with name and description + managed_agents: Dict of agent name -> AgentConfig + external_a2a_agents: Dict of agent_id -> ExternalA2AAgentConfig + memory_list: List of memory search results + memory_search_query: Query used to search memory + knowledge_base_summary: Summary text from knowledge bases + kb_ids: List of knowledge base IDs + system_prompt: (Legacy) Pre-rendered system prompt - NOT USED in piecewise path + include_*: Flags for backward compatibility + + Returns: + List of ContextComponent instances ready for ContextManager + """ + components: List = [] + + # 1. Header + if app_name and app_description and time_str and user_id: + components.append( + build_skeleton_header_component( + app_name=app_name, + app_description=app_description, + time_str=time_str, + user_id=user_id, + language=language, + ) + ) + + # 2. Memory (if exists) + if include_memory and memory_list: + components.append( + build_memory_component( + memory_list=memory_list, + search_query=memory_search_query, + language=language, + ) + ) + + # 3. Duty + Safety Principles + if duty: + components.append( + build_skeleton_duty_component( + duty=duty, + language=language, + ) + ) + + # 4. Skills (if exists) - includes 6-step process + if include_skills and skills: + components.append( + build_skills_component( + skills=skills, + language=language, + ) + ) + + # 5. Execution Flow + components.append( + build_skeleton_execution_flow_component( + memory_list=memory_list, + language=language, + is_manager=is_manager, + ) + ) + + # 6. Tools + File URL Guide + if include_tools and tools: + components.append( + build_tools_component( + tools=tools, + knowledge_base_summary=knowledge_base_summary, + language=language, + is_manager=is_manager, + ) + ) + + # 7. Managed Agents (if exists) - manager only + if is_manager and include_managed_agents and managed_agents: + components.append( + build_managed_agents_component( + managed_agents=managed_agents, + language=language, + ) + ) + + # 8. External Agents (if exists) - manager only + if is_manager and include_external_agents and external_a2a_agents: + components.append( + build_external_agents_component( + external_a2a_agents=external_a2a_agents, + language=language, + ) + ) + + # 9. Agent Fallback (if no agents available) - manager only + if is_manager and not managed_agents and not external_a2a_agents: + fallback_comp = build_agent_fallback_component( + managed_agents=managed_agents or {}, + external_a2a_agents=external_a2a_agents or {}, + language=language, + ) + if fallback_comp.content: # Only add if has content + components.append(fallback_comp) + + # 10. Skills Usage Requirements + if include_skills: + components.append( + build_skills_usage_component( + skills=skills or [], + language=language, + ) + ) + + # 11. Constraint + if constraint: + components.append( + build_skeleton_constraint_component( + constraint=constraint, + language=language, + ) + ) + + # 12. Code Norms + components.append( + build_skeleton_code_norms_component( + language=language, + is_manager=is_manager, + ) + ) + + # 13. Footer + if few_shots: + components.append( + build_skeleton_footer_component( + few_shots=few_shots, + language=language, + ) + ) + + return components + + +def build_app_context_string( + app_name: str, + app_description: str, + user_id: str, +) -> str: + """Build app context string for template injection. + + Args: + app_name: Application name + app_description: Application description + user_id: Current user ID + + Returns: + Formatted app context string + """ + time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + return _format_app_context(app_name, app_description, user_id, time_str) \ No newline at end of file diff --git a/backend/utils/file_management_utils.py b/backend/utils/file_management_utils.py index 7d31a74bb..83c3957e7 100644 --- a/backend/utils/file_management_utils.py +++ b/backend/utils/file_management_utils.py @@ -2,6 +2,7 @@ import logging import os import subprocess +import time import traceback from pathlib import Path from typing import List @@ -15,7 +16,6 @@ from consts.model import ProcessParams from database.attachment_db import get_file_size_from_minio from utils.auth_utils import get_current_user_id -from utils.config_utils import tenant_config_manager logger = logging.getLogger("file_management_utils") @@ -45,18 +45,13 @@ async def trigger_data_process(files: List[dict], process_params: ProcessParams) if not files: return None - # Get chunking size according to the embedding model - embedding_model_id = None + # Get tenant_id from authorization for downstream task processing + embedding_model_id = process_params.model_id tenant_id = None try: _, tenant_id = get_current_user_id(process_params.authorization) - # Get embedding model ID from tenant config - tenant_config = tenant_config_manager.load_config(tenant_id) - embedding_model_id_str = tenant_config.get("EMBEDDING_ID") if tenant_config else None - if embedding_model_id_str: - embedding_model_id = int(embedding_model_id_str) except Exception as e: - logger.warning(f"Failed to get embedding model ID for tenant: {e}") + logger.warning(f"Failed to get tenant_id from authorization: {e}") # Build headers with authorization headers = { @@ -134,19 +129,23 @@ async def trigger_data_process(files: List[dict], process_params: ProcessParams) async def get_all_files_status(index_name: str): """ - Get status for all files according to index_name, matching corresponding tasks, + Get status for all files according to index_name, matching corresponding tasks, and then convert to custom state - + Args: index_name: Index name to filter tasks - + Returns: Dictionary with path_or_url as keys and dict values: {state, latest_task_id} """ + start_time = time.time() try: try: async with httpx.AsyncClient() as client: response = await client.get(f"{DATA_PROCESS_SERVICE}/tasks/indices/{index_name}", timeout=10.0) + http_duration = time.time() - start_time + logger.info(f"[get_all_files_status] HTTP request to {DATA_PROCESS_SERVICE}/tasks/indices/{index_name} " + f"completed in {http_duration:.3f}s, status={response.status_code}") if response.status_code == 200: tasks_list = response.json() else: @@ -214,41 +213,46 @@ async def get_all_files_status(index_name: str): file_state['total_chunks'] = task_info.get( 'total_chunks', file_state.get('total_chunks')) result = {} + # Use local fallback logic for state conversion (avoiding HTTP call to external service) + # The conversion logic is simple and can be done locally + step_local_start = time.time() + + # Batch fetch progress info from Redis for all task_ids (single round-trip) + redis_progress_batch = {} + if file_states: + try: + from services.redis_service import get_redis_service + redis_service = get_redis_service() + all_task_ids = [fs.get('latest_task_id', '') for fs in file_states.values()] + all_task_ids = [tid for tid in all_task_ids if tid] + if all_task_ids: + redis_progress_batch = redis_service.batch_get_progress_info(all_task_ids) or {} + except Exception as e: + logger.debug(f"Failed to batch get Redis progress info: {e}") + for path_or_url, file_state in file_states.items(): - # Call remote state conversion API so this service no longer depends on Celery - custom_state = await _convert_to_custom_state( + custom_state = _convert_to_custom_state_local( process_celery_state=file_state['process_state'] or '', forward_celery_state=file_state['forward_state'] or '' ) - # Try to get progress from Redis - always check Redis for real-time progress - # especially when task is in progress (FORWARDING or PROCESSING) + + # Get progress from pre-fetched batch Redis data processed_chunks = file_state.get('processed_chunks') total_chunks = file_state.get('total_chunks') task_id = file_state['latest_task_id'] or '' - # Always try to get latest progress from Redis if task_id exists - # Redis has the most up-to-date progress during vectorization - if task_id: - try: - from services.redis_service import get_redis_service - redis_service = get_redis_service() - progress_info = redis_service.get_progress_info(task_id) - if progress_info: - # Use Redis progress as primary source (it's updated in real-time) - redis_processed = progress_info.get('processed_chunks') - redis_total = progress_info.get('total_chunks') - if redis_processed is not None: - processed_chunks = redis_processed - if redis_total is not None: - total_chunks = redis_total - logger.debug( - f"Retrieved progress from Redis for task {task_id}: {processed_chunks}/{total_chunks}") - else: - logger.debug( - f"No progress info in Redis for task {task_id}, using task state values: {processed_chunks}/{total_chunks}") - except Exception as e: + # Use pre-fetched batch Redis data for progress + if task_id and task_id in redis_progress_batch: + progress_info = redis_progress_batch.get(task_id) + if progress_info: + redis_processed = progress_info.get('processed_chunks') + redis_total = progress_info.get('total_chunks') + if redis_processed is not None: + processed_chunks = redis_processed + if redis_total is not None: + total_chunks = redis_total logger.debug( - f"Failed to get progress from Redis for task {task_id}: {str(e)}") + f"Retrieved progress from batch Redis for task {task_id}: {processed_chunks}/{total_chunks}") result[path_or_url] = { 'state': custom_state, @@ -259,41 +263,26 @@ async def get_all_files_status(index_name: str): 'processed_chunks': processed_chunks, 'total_chunks': total_chunks, } + step_local_duration = time.time() - step_local_start + logger.info(f"[get_all_files_status] Local processing: {len(result)} files in {step_local_duration:.3f}s") + total_duration = time.time() - start_time + logger.info(f"[get_all_files_status] Complete: {len(result)} files processed in {total_duration:.3f}s") return result except Exception as e: logger.error(f"Error getting all files status for index {index_name}, details: {str(e)} {traceback.format_exc()}") return {} # Return empty dict on error -async def _convert_to_custom_state(process_celery_state: str, forward_celery_state: str) -> str: - """Delegates Celery-state conversion to the data-process service. - - This removes the direct dependency on the *celery* package for callers of - `file_management_utils`. +def _convert_to_custom_state_local(process_celery_state: str, forward_celery_state: str) -> str: + """ + Local state conversion logic - handles all known Celery states. + Returns "UNKNOWN" only if the states are not recognized. """ - try: - payload = { - "process_state": process_celery_state, - "forward_state": forward_celery_state, - } - - async with httpx.AsyncClient() as client: - response = await client.post(f"{DATA_PROCESS_SERVICE}/tasks/convert_state", json=payload, timeout=5.0) - - if response.status_code == 200: - return response.json().get("state", "WAIT_FOR_PROCESSING") - else: - logger.warning( - "State conversion service error: %s - %s", response.status_code, response.text - ) - except Exception as e: - logger.warning("Failed to convert state via service: %s", str(e)) - - # Fallback mapping without Celery dependency (string comparison only) success = "SUCCESS" failure = "FAILURE" pending = "PENDING" started = "STARTED" + unknown = "UNKNOWN" if process_celery_state == failure: return "PROCESS_FAILED" @@ -304,6 +293,11 @@ async def _convert_to_custom_state(process_celery_state: str, forward_celery_sta if not process_celery_state and not forward_celery_state: return "WAIT_FOR_PROCESSING" + # Check if states are known Celery states + known_states = {success, failure, pending, started, ""} + if process_celery_state not in known_states or forward_celery_state not in known_states: + return unknown + forward_state_map = { pending: "WAIT_FOR_FORWARDING", started: "FORWARDING", diff --git a/backend/utils/llm_utils.py b/backend/utils/llm_utils.py index e99b9f384..f7caba37d 100644 --- a/backend/utils/llm_utils.py +++ b/backend/utils/llm_utils.py @@ -73,6 +73,8 @@ def call_llm_for_system_prompt( set_monitoring_operation("system_prompt_generation", display_name=display_name or None) + timeout_seconds = llm_model_config.get("timeout_seconds") if llm_model_config else None + llm = OpenAIModel( model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "", api_base=llm_model_config.get("base_url", "") if llm_model_config else "", @@ -82,6 +84,7 @@ def call_llm_for_system_prompt( model_factory=llm_model_config.get("model_factory") if llm_model_config else None, ssl_verify=llm_model_config.get("ssl_verify", True) if llm_model_config else True, display_name=display_name or None, + timeout_seconds=timeout_seconds, ) messages = [ {"role": MESSAGE_ROLE["SYSTEM"], "content": system_prompt}, @@ -100,9 +103,21 @@ def call_llm_for_system_prompt( reasoning_content_seen = False content_tokens_seen = 0 for chunk in current_request: - delta = chunk.choices[0].delta + choices = getattr(chunk, "choices", None) + if choices is None: + logger.warning("Received non-standard chunk without choices during prompt generation.") + continue + if not choices: + logger.debug("Received empty choices chunk during prompt generation; skipping.") + continue + + delta = getattr(choices[0], "delta", None) + if delta is None: + logger.debug("Skipping LLM stream chunk without delta") + continue + reasoning_content = getattr(delta, "reasoning_content", None) - new_token = delta.content + new_token = getattr(delta, "content", None) # Note: reasoning_content is separate metadata and doesn't affect content filtering # We only filter content based on tags in delta.content diff --git a/backend/utils/monitoring.py b/backend/utils/monitoring.py index eb20d88ec..e6da57041 100644 --- a/backend/utils/monitoring.py +++ b/backend/utils/monitoring.py @@ -2,12 +2,12 @@ Global Monitoring Manager for Backend This module initializes and configures the global monitoring manager instance -with backend environment variables. All other backend modules should import -`monitoring_manager` directly from this module. +with backend environment variables using OTLP protocol. All other backend modules +should import `monitoring_manager` directly from this module. Usage: from utils.monitoring import monitoring_manager - + @monitoring_manager.monitor_endpoint("my_service.my_function") async def my_function(): return {"status": "ok"} @@ -17,67 +17,88 @@ async def my_function(): MonitoringConfig, get_monitoring_manager ) -# Import configuration from backend (support both relative and absolute imports) try: - # Try relative import first (when running from backend directory) from consts.const import ( ENABLE_TELEMETRY, - SERVICE_NAME, - JAEGER_ENDPOINT, - PROMETHEUS_PORT, - TELEMETRY_SAMPLE_RATE, - LLM_SLOW_REQUEST_THRESHOLD_SECONDS, - LLM_SLOW_TOKEN_RATE_THRESHOLD + MONITORING_PROVIDER, + MONITORING_PROJECT_NAME, + OTEL_SERVICE_NAME, + OTEL_EXPORTER_OTLP_ENDPOINT, + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT, + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, + OTEL_EXPORTER_OTLP_PROTOCOL, + OTEL_EXPORTER_OTLP_METRICS_ENABLED, + MONITORING_INSTRUMENT_REQUESTS, + MONITORING_FASTAPI_INCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDE_SPANS, + MONITORING_TRACE_CONTENT_MODE, + MONITORING_TRACE_MAX_CHARS, + MONITORING_TRACE_MAX_ITEMS, + OTLP_HEADERS, + TELEMETRY_SAMPLE_RATE ) except ImportError: - # Fallback to absolute import (when running from project root) from backend.consts.const import ( ENABLE_TELEMETRY, - SERVICE_NAME, - JAEGER_ENDPOINT, - PROMETHEUS_PORT, - TELEMETRY_SAMPLE_RATE, - LLM_SLOW_REQUEST_THRESHOLD_SECONDS, - LLM_SLOW_TOKEN_RATE_THRESHOLD + MONITORING_PROVIDER, + MONITORING_PROJECT_NAME, + OTEL_SERVICE_NAME, + OTEL_EXPORTER_OTLP_ENDPOINT, + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT, + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, + OTEL_EXPORTER_OTLP_PROTOCOL, + OTEL_EXPORTER_OTLP_METRICS_ENABLED, + MONITORING_INSTRUMENT_REQUESTS, + MONITORING_FASTAPI_INCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDED_URLS, + MONITORING_FASTAPI_EXCLUDE_SPANS, + MONITORING_TRACE_CONTENT_MODE, + MONITORING_TRACE_MAX_CHARS, + MONITORING_TRACE_MAX_ITEMS, + OTLP_HEADERS, + TELEMETRY_SAMPLE_RATE ) import logging logger = logging.getLogger(__name__) -# ============================================================================ -# Global Monitoring Manager Instance -# ============================================================================ - -# Get the global monitoring manager instance monitoring_manager = get_monitoring_manager() -# Initialize monitoring configuration immediately when this module is imported - def _initialize_monitoring(): - """Initialize monitoring configuration with backend environment variables.""" + """Initialize monitoring configuration with OTLP settings.""" config = MonitoringConfig( enable_telemetry=ENABLE_TELEMETRY, - service_name=SERVICE_NAME, - jaeger_endpoint=JAEGER_ENDPOINT, - prometheus_port=PROMETHEUS_PORT, + service_name=OTEL_SERVICE_NAME, + provider=MONITORING_PROVIDER or "otlp", + otlp_endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, + otlp_traces_endpoint=OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or None, + otlp_metrics_endpoint=OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or None, + otlp_protocol=OTEL_EXPORTER_OTLP_PROTOCOL, + otlp_headers=OTLP_HEADERS, + export_metrics=OTEL_EXPORTER_OTLP_METRICS_ENABLED, + instrument_requests=MONITORING_INSTRUMENT_REQUESTS, + fastapi_included_urls=MONITORING_FASTAPI_INCLUDED_URLS, + fastapi_excluded_urls=MONITORING_FASTAPI_EXCLUDED_URLS, + fastapi_exclude_spans=MONITORING_FASTAPI_EXCLUDE_SPANS, + project_name=MONITORING_PROJECT_NAME or None, telemetry_sample_rate=TELEMETRY_SAMPLE_RATE, - llm_slow_request_threshold_seconds=LLM_SLOW_REQUEST_THRESHOLD_SECONDS, - llm_slow_token_rate_threshold=LLM_SLOW_TOKEN_RATE_THRESHOLD + trace_content_mode=MONITORING_TRACE_CONTENT_MODE, + trace_max_chars=MONITORING_TRACE_MAX_CHARS, + trace_max_items=MONITORING_TRACE_MAX_ITEMS ) - # Configure the SDK monitoring system using the singleton monitoring_manager.configure(config) logger.info( - f"Global monitoring initialized: service_name={SERVICE_NAME}, enable_telemetry={ENABLE_TELEMETRY}") + f"OTLP monitoring initialized: service_name={OTEL_SERVICE_NAME}, " + f"enable_telemetry={config.enable_telemetry}, provider={config.provider}, " + f"endpoint={config.otlp_endpoint}, trace_endpoint={config.get_trace_endpoint()}, " + f"protocol={OTEL_EXPORTER_OTLP_PROTOCOL}" + ) -# Initialize monitoring when module is imported _initialize_monitoring() - -# Export the global monitoring manager instance -__all__ = [ - 'monitoring_manager' -] +__all__ = ['monitoring_manager'] diff --git a/backend/utils/prompt_template_utils.py b/backend/utils/prompt_template_utils.py index cf83bfa60..8822e5fd4 100644 --- a/backend/utils/prompt_template_utils.py +++ b/backend/utils/prompt_template_utils.py @@ -5,9 +5,56 @@ import yaml from consts.const import LANGUAGE +from consts.prompt_template import ( + PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP, + PROMPT_GENERATE_TEMPLATE_FIELDS, +) logger = logging.getLogger("prompt_template_utils") +PROMPT_GENERATE_TEMPLATE_KEY_MAP = PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP +PROMPT_GENERATE_TEMPLATE_KEYS = PROMPT_GENERATE_TEMPLATE_FIELDS + + +def get_prompt_generate_template_keys() -> list[str]: + """Return the supported prompt generation template keys.""" + return list(PROMPT_GENERATE_TEMPLATE_FIELDS) + + +def normalize_prompt_generate_template_content( + template_content: Optional[Dict[str, Any]] +) -> Dict[str, str]: + """Normalize prompt generation template content and keep non-empty fields only.""" + normalized: Dict[str, str] = {} + if not isinstance(template_content, dict): + return normalized + + for key in PROMPT_GENERATE_TEMPLATE_FIELDS: + legacy_key = PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP[key] + value = template_content.get(key) + if value is None: + value = template_content.get(legacy_key) + if isinstance(value, str) and value.strip(): + normalized[key] = value + + return normalized + + +def merge_prompt_generate_templates( + *template_contents: Optional[Dict[str, Any]] +) -> Dict[str, str]: + """Merge multiple prompt generation templates with first-non-empty priority.""" + merged: Dict[str, str] = {} + + for template_content in template_contents: + normalized = normalize_prompt_generate_template_content(template_content) + for key in PROMPT_GENERATE_TEMPLATE_FIELDS: + value = normalized.get(key) + if value and key not in merged: + merged[key] = value + + return merged + def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kwargs) -> Dict[str, Any]: """ @@ -16,6 +63,7 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw Args: template_type: Template type, supports the following values: - 'prompt_generate': Prompt generation template + - 'prompt_optimize': Prompt section optimization template - 'agent': Agent template including manager and managed agents - 'generate_title': Title generation template - 'document_summary': Document summary template (Map stage) @@ -33,6 +81,10 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw LANGUAGE["ZH"]: 'backend/prompts/utils/prompt_generate_zh.yaml', LANGUAGE["EN"]: 'backend/prompts/utils/prompt_generate_en.yaml' }, + 'prompt_optimize': { + LANGUAGE["ZH"]: 'backend/prompts/utils/prompt_optimize_zh.yaml', + LANGUAGE["EN"]: 'backend/prompts/utils/prompt_optimize_en.yaml' + }, 'agent': { LANGUAGE["ZH"]: { 'manager': 'backend/prompts/manager_system_prompt_template_zh.yaml', @@ -101,6 +153,19 @@ def get_prompt_generate_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[ return get_prompt_template('prompt_generate', language) +def get_prompt_optimize_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[str, Any]: + """ + Get prompt optimization template. + + Args: + language: Language code ('zh' or 'en') + + Returns: + dict: Loaded prompt optimization template configuration + """ + return get_prompt_template('prompt_optimize', language) + + def get_agent_prompt_template(is_manager: bool, language: str = LANGUAGE["ZH"]) -> Dict[str, Any]: """ Get agent prompt template diff --git a/backend/utils/tool_utils.py b/backend/utils/tool_utils.py index f06f36bc3..f1d9147e3 100644 --- a/backend/utils/tool_utils.py +++ b/backend/utils/tool_utils.py @@ -46,7 +46,8 @@ def get_local_tools_description_zh() -> Dict[str, Dict]: if hasattr(param.default, 'exclude') and param.default.exclude: continue - param_description_zh = param.default.description_zh if hasattr(param.default, 'description_zh') else None + # Note: Pydantic Field doesn't have description_zh attribute + param_description_zh = getattr(param.default, 'description_zh', None) if hasattr(param.default, 'description_zh') else None if param_description_zh is None and param_name in init_param_descriptions: param_description_zh = init_param_descriptions[param_name].get('description_zh') diff --git a/doc/docs/.vitepress/config.mts b/doc/docs/.vitepress/config.mts index 6ee76ff5d..87e79a831 100644 --- a/doc/docs/.vitepress/config.mts +++ b/doc/docs/.vitepress/config.mts @@ -385,6 +385,7 @@ export default defineConfig({ ], }, { text: "性能监控", link: "/zh/sdk/monitoring" }, + { text: "OpenTelemetry 设计", link: "/zh/sdk/opentelemetry-design" }, { text: "向量数据库", link: "/zh/sdk/vector-database" }, { text: "数据处理", link: "/zh/sdk/data-process" }, ], diff --git a/doc/docs/en/deployment/devcontainer.md b/doc/docs/en/deployment/devcontainer.md index 84a49f47e..ce6efe7be 100644 --- a/doc/docs/en/deployment/devcontainer.md +++ b/doc/docs/en/deployment/devcontainer.md @@ -25,7 +25,7 @@ This development container configuration sets up a complete Nexent development e 1. Clone the project locally 2. Open project folder in Cursor/VS Code -3. Run `docker/deploy.sh` script in `infrastructure` mode to start containers +3. Run `./deploy.sh --components infrastructure,application --port-policy development` from the `docker` directory to start base containers 4. Enter `nexent-minio` and `nexent-elasticsearch` containers, copy `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `ELASTICSEARCH_API_KEY` environment variables to corresponding positions in `docker/docker-compose.dev.yml` 5. Press `F1` or `Ctrl+Shift+P`, type `Dev Containers: Reopen in Container ...` 6. Cursor will start the development container based on configuration in `.devcontainer` directory diff --git a/doc/docs/en/deployment/docker-build.md b/doc/docs/en/deployment/docker-build.md index 47f51d891..bf36dc5d4 100644 --- a/doc/docs/en/deployment/docker-build.md +++ b/doc/docs/en/deployment/docker-build.md @@ -178,6 +178,11 @@ Notes: ## 🚀 Deployment Recommendations -After building is complete, you can use the docker/deploy.sh script for deployment, or directly start the services using docker-compose. +After building is complete, you can deploy local images from the `docker` directory: -> When starting a test of locally built images, you need to change APP_VERSION="$(get_app_version)" to APP_VERSION="latest" in docker/deploy.sh, because the deployment will default to using the image corresponding to the current version. +```bash +cd docker +bash deploy.sh --image-source local-latest +``` + +> `local-latest` uses local `latest` Nexent application images and avoids pulling those images again. You do not need to modify `docker/deploy.sh`. diff --git a/doc/docs/en/developer-guide/environment-setup.md b/doc/docs/en/developer-guide/environment-setup.md index 21f3cb6af..e2b0b9ed3 100644 --- a/doc/docs/en/developer-guide/environment-setup.md +++ b/doc/docs/en/developer-guide/environment-setup.md @@ -23,7 +23,7 @@ Before backend work, start core services (PostgreSQL, Redis, Elasticsearch, MinI ```bash # Run from the docker directory at the project root cd docker -./deploy.sh --mode infrastructure +./deploy.sh --components infrastructure --port-policy development ``` :::: info Important Notes @@ -139,4 +139,3 @@ This adds: - Testing framework (pytest) - Data processing dependencies (unstructured) - Other developer utilities - diff --git a/doc/docs/en/getting-started/software-architecture.md b/doc/docs/en/getting-started/software-architecture.md index dde7f8525..99e38a5f9 100644 --- a/doc/docs/en/getting-started/software-architecture.md +++ b/doc/docs/en/getting-started/software-architecture.md @@ -274,7 +274,7 @@ Real-time Input → Streaming Endpoint → Async Processing - **High Availability**: Multi-service redundancy, health checks, auto-restart - **High Performance**: Async processing, Redis caching, vector search optimization - **High Concurrency**: Distributed architecture, load balancing -- **Monitoring Friendly**: Prometheus metrics, Jaeger tracing, structured logging +- **Monitoring Friendly**: OpenTelemetry observability, Grafana Tempo tracing, structured logging ### 🔧 Developer Friendly - **Modular Development**: Clean layered architecture (App → Service → Database) diff --git a/doc/docs/en/quick-start/installation.md b/doc/docs/en/quick-start/installation.md index 50c2f0b59..3156a1e06 100644 --- a/doc/docs/en/quick-start/installation.md +++ b/doc/docs/en/quick-start/installation.md @@ -19,10 +19,9 @@ ```bash git clone https://github.com/ModelEngine-Group/nexent.git cd nexent/docker -cp .env.example .env # Configure environment variables ``` -> **💡 Tip**: If there are no special requirements, you can directly use `.env.example` for deployment without making any changes. If you need to configure voice models (STT/TTS), you will need to set the relevant parameters in `.env`. We will work on making this configuration available through the frontend soon—stay tuned. +> **💡 Tip**: `deploy.sh` automatically copies `.env.example` to `docker/.env` when `docker/.env` does not exist. If you need to configure voice models (STT/TTS), update the related values in `docker/.env` before or after deployment. ### 2. Deployment Options @@ -32,20 +31,42 @@ Run the following command to start deployment: bash deploy.sh ``` -After executing this command, the system will provide two different versions for you to choose from: +After running the command, the script opens Bash TUI menus for deployment options. Use arrow keys or `j/k` to move, Space to toggle multi-select items, Enter to confirm, `b`/Backspace to go back, and `q` to quit. -**Version Selection:** -- **Speed version (Lightweight & Fast Deployment, Default)**: Quick startup of core features, suitable for individual users and small teams -- **Full version (Complete Feature Edition)**: Provides enterprise-level tenant management and resource isolation features, but takes longer to install, suitable for enterprise users +**Deployment Components:** +- **infrastructure (required)**: Elasticsearch, PostgreSQL, Redis, MinIO +- **application (selected by default, optional)**: config, runtime, mcp, northbound, web +- **data-process (optional)**: data processing service +- **supabase (optional)**: enables user, tenant, and authentication features +- **terminal (optional)**: enables the OpenSSH terminal tool +- **monitoring (optional)**: enables observability components and then prompts for a provider -**Deployment Modes:** -- **Development mode (default)**: Exposes all service ports for debugging -- **Infrastructure mode**: Only starts infrastructure services -- **Production mode**: Only exposes port 3000 for security +**Port Policy:** +- **development (default)**: publishes debug and internal service ports for local troubleshooting +- **production**: publishes only production entry ports -**Optional Components:** -- **Terminal Tool**: Enables openssh-server for AI agent shell command execution -- **Regional optimization**: Mainland China users can use optimized image sources +**Image Source:** +- **general (default)**: uses standard public registries +- **mainland**: uses mainland China mirrors +- **local-latest**: uses local `latest` Nexent images and avoids pulling Nexent application images + +You can also pass options directly: + +```bash +# Default component set, development port policy, standard image source +bash deploy.sh --components infrastructure,application --port-policy development --image-source general + +# Enable user/tenant features, data processing, and terminal +bash deploy.sh --components infrastructure,application,supabase,data-process,terminal + +# Use mainland China image sources +bash deploy.sh --image-source mainland + +# Use local latest images +bash deploy.sh --image-source local-latest +``` + +After a successful deployment, non-sensitive choices are saved to `docker/deploy.options`. The next interactive deployment can reuse the local config or run a full reconfiguration. #### ⚠️ Important Notes @@ -102,7 +123,7 @@ Nexent uses a microservices architecture deployed via Docker Compose. | nexent-minio | S3-compatible object storage | | redis | Caching layer | -**Supabase Services (Full Version Only):** +**Supabase Services (when `supabase` is selected):** | Service | Description | |---------|-------------| | supabase-kong | API Gateway | @@ -113,6 +134,7 @@ Nexent uses a microservices architecture deployed via Docker Compose. | Service | Description | |---------|-------------| | nexent-openssh-server | SSH terminal for AI agents | +| nexent-monitoring | Optional observability stack | Internal services communicate using the Docker internal network. @@ -126,10 +148,12 @@ Nexent uses Docker volumes for data persistence: | Elasticsearch | nexent-elasticsearch-data | `{dataDir}/elasticsearch` | | Redis | nexent-redis-data | `{dataDir}/redis` | | MinIO | nexent-minio-data | `{dataDir}/minio` | -| Supabase DB (Full) | nexent-supabase-db-data | `{dataDir}/supabase-db` | +| Supabase DB (when `supabase` is selected) | nexent-supabase-db-data | `{dataDir}/supabase-db` | Default `dataDir` is `./volumes` (configurable via `ROOT_DIR` in `.env`). +Uninstall is handled by `docker/uninstall.sh`. It prompts before deleting persistent data by default; you can also pass `--delete-volumes true|false`, `--remove-volumes`, `--keep-volumes`, or use `bash uninstall.sh delete-all` to remove containers and persistent data. + ## 🔌 Port Mapping | Service | Internal Port | External Port | Description | @@ -183,4 +207,4 @@ NORTHBOUND_EXTERNAL_URL=https://api.yourdomain.com/api Want to build from source or add new features? Check the [Docker Build Guide](../deployment/docker-build) for step-by-step instructions. -For detailed setup instructions and customization options, see our [Developer Guide](../developer-guide/overview). \ No newline at end of file +For detailed setup instructions and customization options, see our [Developer Guide](../developer-guide/overview). diff --git a/doc/docs/en/quick-start/kubernetes-installation.md b/doc/docs/en/quick-start/kubernetes-installation.md index 44ca3c993..895ce4a41 100644 --- a/doc/docs/en/quick-start/kubernetes-installation.md +++ b/doc/docs/en/quick-start/kubernetes-installation.md @@ -35,21 +35,29 @@ cd nexent/k8s/helm Run the deployment script: ```bash -./deploy-helm.sh apply +./deploy.sh ``` -After executing this command, the system will prompt for configuration options: +After running the command, the script opens Bash TUI menus for configuration. Use arrow keys or `j/k` to move, Space to toggle multi-select items, Enter to confirm, `b`/Backspace to go back, and `q` to quit. -**Version Selection:** -- **Speed version (Lightweight & Fast Deployment, Default)**: Quick startup of core features, suitable for individual users and small teams -- **Full version (Complete Feature Edition)**: Provides enterprise-level tenant management and resource isolation features, includes Supabase authentication +**Deployment Components:** +- **infrastructure (required)**: Elasticsearch, PostgreSQL, Redis, MinIO +- **application (selected by default, optional)**: config, runtime, mcp, northbound, web +- **data-process (optional)**: data processing service +- **supabase (optional)**: enables user, tenant, and authentication features +- **terminal (optional)**: enables the OpenSSH terminal tool +- **monitoring (optional)**: enables observability components and then prompts for a provider -**Image Source Selection:** -- **Mainland China**: Uses optimized regional mirrors for faster image pulling -- **General**: Uses standard Docker Hub registries +**Port Policy:** +- **development (default)**: uses NodePort for Web and selected debug/internal services +- **production**: keeps internal services as ClusterIP and exposes only production entrypoints -**Optional Components:** -- **Terminal Tool**: Enables openssh-server for AI agent shell command execution +**Image Source:** +- **general (default)**: uses standard public registries +- **mainland**: uses mainland China mirrors +- **local-latest**: uses local `latest` images and local-friendly pull policies for Nexent application images + +After a successful deployment, non-sensitive choices are saved to `k8s/helm/deploy.options`. The next interactive deployment can reuse the local config or run a full reconfiguration. ### ⚠️ Important Notes @@ -72,7 +80,7 @@ kubectl exec -it -n nexent deploy/nexent-postgresql -- psql -U root -d nexent -c "DELETE FROM nexent.user_tenant_t WHERE user_id='your_user_id';" # Step 3: Re-deploy and record the su account password -./deploy-helm.sh apply +./deploy.sh ``` ### 4. Access Your Installation @@ -113,7 +121,7 @@ Nexent uses a microservices architecture deployed via Helm charts: | nexent-redis | Caching layer | | nexent-minio | S3-compatible object storage | -**Supabase Services (Full Version Only):** +**Supabase Services (when `supabase` is selected):** | Service | Description | |---------|-------------| | nexent-supabase-kong | API Gateway | @@ -124,13 +132,14 @@ Nexent uses a microservices architecture deployed via Helm charts: | Service | Description | |---------|-------------| | nexent-openssh-server | SSH terminal for AI agents | +| nexent-monitoring | Optional observability stack | ## 🔌 Port Mapping | Service | Internal Port | NodePort | Description | |---------|---------------|----------|-------------| | Web Interface | 3000 | 30000 | Main application access | -| Northbound API | 5010 | 30013 | Northbound API service | +| Northbound API | 5013 | 30013 | Northbound API service | | SSH Server | 22 | 30022 | Terminal tool access | For internal service communication, services use Kubernetes internal DNS (e.g., `http://nexent-config:5010`). @@ -141,34 +150,49 @@ Nexent uses PersistentVolumes for data persistence: | Data Type | PersistentVolume | Default Host Path | |-----------|------------------|-------------------| -| Elasticsearch | nexent-elasticsearch-pv | `{dataDir}/elasticsearch` | -| PostgreSQL | nexent-postgresql-pv | `{dataDir}/postgresql` | -| Redis | nexent-redis-pv | `{dataDir}/redis` | -| MinIO | nexent-minio-pv | `{dataDir}/minio` | -| Supabase DB (Full) | nexent-supabase-db-pv | `{dataDir}/supabase-db` | +| Elasticsearch | nexent-elasticsearch-pv | `/var/lib/nexent-data/nexent-elasticsearch` | +| PostgreSQL | nexent-postgresql-pv | `/var/lib/nexent-data/nexent-postgresql` | +| Redis | nexent-redis-pv | `/var/lib/nexent-data/nexent-redis` | +| MinIO | nexent-minio-pv | `/var/lib/nexent-data/nexent-minio` | +| Supabase DB (when `supabase` is selected) | nexent-supabase-db-pv | `/var/lib/nexent-data/nexent-supabase-db` | -Default `dataDir` is `/var/lib/nexent-data` (configurable in `values.yaml`). +Helm uninstall does not delete local hostPath data by default. Use `./uninstall.sh --delete-local-data true` to delete known Nexent local volume contents under `/var/lib/nexent-data/nexent-*`, or `--keep-local-data` to preserve them explicitly. ## 🔧 Deployment Commands ```bash # Deploy with interactive prompts -./deploy-helm.sh apply +./deploy.sh + +# Non-interactive deployment with the default component set +./deploy.sh --components infrastructure,application --port-policy development --image-source general + +# Enable user/tenant features, data processing, and terminal +./deploy.sh --components infrastructure,application,supabase,data-process,terminal # Deploy with mainland China image sources -./deploy-helm.sh apply --is-mainland Y +./deploy.sh --image-source mainland -# Deploy full version (with Supabase) -./deploy-helm.sh apply --deployment-version full +# Use local latest images +./deploy.sh --image-source local-latest # Clean helm state only (fixes stuck releases) -./deploy-helm.sh clean +./uninstall.sh clean + +# Uninstall; local data is preserved by default, with interactive prompts for namespace and local data deletion +./uninstall.sh + +# Uninstall and delete the namespace +./uninstall.sh --delete-namespace true + +# Uninstall and delete local hostPath data +./uninstall.sh --delete-local-data true -# Uninstall but preserve data -./deploy-helm.sh delete +# Complete uninstall including namespace and local hostPath data +./uninstall.sh delete-all -# Complete uninstall including all data -./deploy-helm.sh delete-all +# Complete uninstall but preserve local hostPath data +./uninstall.sh delete-all --keep-local-data ``` ## 🔍 Troubleshooting diff --git a/doc/docs/en/quick-start/kubernetes-upgrade-guide.md b/doc/docs/en/quick-start/kubernetes-upgrade-guide.md index 293358d2f..75afcfba9 100644 --- a/doc/docs/en/quick-start/kubernetes-upgrade-guide.md +++ b/doc/docs/en/quick-start/kubernetes-upgrade-guide.md @@ -15,7 +15,7 @@ Follow these steps to upgrade Nexent on Kubernetes safely: Before updating, record the current deployment version and data directory information. - Current Deployment Version Location: `APP_VERSION` in `backend/consts/const.py` -- Data Directory Location: `global.dataDir` in `k8s/helm/nexent/values.yaml` +- Local volume directories: each Helm sub-chart's `storage.hostPath`, defaulting to `/var/lib/nexent-data/nexent-*` **Code downloaded via git** @@ -28,7 +28,7 @@ git pull **Code downloaded via ZIP package or other means** 1. Re-download the latest code from GitHub and extract it. -2. Copy the `.deploy.options` file from the `k8s/helm` directory of your previous deployment to the new code directory. (If the file doesn't exist, you can ignore this step). +2. Copy the `deploy.options` file from the `k8s/helm` directory of your previous deployment to the new code directory. (If the file doesn't exist, you can ignore this step). ## 🔄 Step 2: Execute the Upgrade @@ -36,10 +36,10 @@ Navigate to the k8s/helm directory of the updated code and run the deployment sc ```bash cd k8s/helm -./deploy-helm.sh apply +./deploy.sh ``` -The script will detect your previous deployment settings (version, image source, etc.) from the `.deploy.options` file. If the file is missing, you will be prompted to enter configuration details. +The script will detect your saved deployment settings (components, port policy, image source, etc.) from `deploy.options`. If the file is missing, you will be prompted to enter configuration details. > 💡 Tip > If you need to configure voice models (STT/TTS), please edit the corresponding values in `values.yaml` or pass them via command line. @@ -137,7 +137,7 @@ kubectl exec -i $POSTGRES_POD -n nexent -- psql -U root -d nexent < ./sql/v2.0.0 kubectl exec nexent/$POSTGRES_POD -n nexent -- pg_dump -U root nexent > backup_$(date +%F).sql ``` -> - For Supabase database (full version only), use `nexent-supabase-db` pod instead: +> - For the Supabase database (when `supabase` is selected), use the `nexent-supabase-db` pod instead: ```bash SUPABASE_POD=$(kubectl get pods -n nexent -l app=nexent-supabase-db -o jsonpath='{.items[0].metadata.name}') diff --git a/doc/docs/en/quick-start/upgrade-guide.md b/doc/docs/en/quick-start/upgrade-guide.md index 497212e06..3bc22f254 100644 --- a/doc/docs/en/quick-start/upgrade-guide.md +++ b/doc/docs/en/quick-start/upgrade-guide.md @@ -38,11 +38,11 @@ Navigate to the docker directory of the updated code and run the upgrade script: bash upgrade.sh ``` -If deploy.options is missing, the script will prompt you to manually enter configuration details from the previous deployment, such as the current version and data directory. Enter the information you recorded earlier. +If deploy.options is missing, the script will prompt you to select deployment settings again, such as components, port policy, and image source. Choose the same options you used for the previous deployment. >💡 Tip -> The default scenario is quick deployment, which uses .env.example. -> If you need to configure voice models (STT/TTS), please add the relevant variables to .env.example in advance. We will provide a front-end configuration interface as soon as possible. +> If `docker/.env` is missing, the deploy script automatically copies it from `.env.example`. +> If you need to configure voice models (STT/TTS), add the relevant variables to `docker/.env`. We will provide a front-end configuration interface as soon as possible. ## 🌐 Step 3: Verify the deployment diff --git a/doc/docs/en/sdk/monitoring.md b/doc/docs/en/sdk/monitoring.md index 4aa625132..bb7c1db13 100644 --- a/doc/docs/en/sdk/monitoring.md +++ b/doc/docs/en/sdk/monitoring.md @@ -1,289 +1,327 @@ -# 🚀 Nexent LLM Monitoring System +# Nexent Agent Observability (OTLP) -Enterprise-grade monitoring solution specifically designed for monitoring LLM token generation speed and performance. +Enterprise-grade observability for AI agents using OpenTelemetry OTLP protocol. Supports integration with observability platforms like Arize Phoenix, Langfuse, LangSmith, Grafana Tempo, Zipkin, and more. -## 📊 System Architecture +## Architecture ``` -┌─────────────────────────────────────────────────────────┐ -│ Nexent LLM Monitoring System │ -├─────────────────────────────────────────────────────────┤ -│ │ -│ Nexent API ──► OpenTelemetry ──► Jaeger (Tracing) │ -│ │ │ │ -│ │ └──────► Prometheus (Metrics) │ -│ │ │ │ -│ └─► OpenAI LLM └──► Grafana (Visualization) │ -│ (Token Monitoring) │ -└─────────────────────────────────────────────────────────┘ +NexentAgent ──► OpenTelemetry SDK ──► OTLP Collector ──► Arize Phoenix / Langfuse / LangSmith / Grafana Tempo / Zipkin / OTLP Backend + │ │ + │ OpenInference Semantics │ + │ (llm.*, agent.* attributes) │ + └────────────────────────────────────────┘ ``` -## ⚡ Quick Start (5 minutes) +## Quick Start ```bash -# 1. Start monitoring services -./docker/start-monitoring.sh +cd docker +[ -f .env ] || cp .env.example .env +cp monitoring/monitoring.env.example monitoring/monitoring.env -# 2. Install performance monitoring dependencies -uv sync --extra performance +vim .env +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http -# 3. Enable monitoring -export ENABLE_TELEMETRY=true +vim monitoring/monitoring.env +MONITORING_PROVIDER=otlp -# 4. Start backend service -python backend/config_service.py -python backend/runtime_service.py +./start-monitoring.sh --stack collector ``` -## 📊 Access Monitoring Interfaces +## AI Observability Platforms -| Interface | URL | Purpose | -|-----------|-----|---------| -| **Grafana Dashboard** | http://localhost:3005 | LLM Performance Monitoring | -| **Jaeger Tracing** | http://localhost:16686 | Request Trace Analysis | -| **Prometheus Metrics** | http://localhost:9090 | Raw Monitoring Data | +### Arize Phoenix -### 🔐 Grafana Login Information +Arize Phoenix provides AI-specific observability with OpenInference semantic support. -When first accessing Grafana (http://localhost:3005), you need to login: +**Configuration:** +```bash +MONITORING_PROVIDER=phoenix +OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +OTEL_EXPORTER_OTLP_AUTHORIZATION="Bearer YOUR_PHOENIX_API_KEY" +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -Username: admin -Password: admin -``` - -**After first login, you'll be prompted to change password:** -- Set a new password (recommended) -- Click "Skip" to skip (development environment) -**After login, you can see:** -- 📊 **LLM Performance Dashboard** - Pre-configured performance dashboard -- 📈 **Data Source Configuration** - Auto-connected to Prometheus and Jaeger -- 🎯 **Real-time Monitoring Panel** - Key metrics like token generation speed, latency +**Features:** +- LLM trace visualization with prompt/completion +- Token-level performance metrics +- Agent step tracing +- Cost analysis -## 🎯 Core Features +### Langfuse -### ⚡ LLM-Specific Monitoring -- **Token Generation Speed**: Real-time monitoring of tokens generated per second -- **TTFT (Time to First Token)**: First token return latency -- **Streaming Response Analysis**: Generation timestamp for each token -- **Model Performance Comparison**: Performance benchmarks across different models +Langfuse offers prompt management and LLM observability with OTLP support. -### 🔍 Distributed Tracing -- **Complete Request Chain**: End-to-end tracing from HTTP to LLM -- **Performance Bottleneck Detection**: Automatically identify slow queries and anomalies -- **Error Root Cause Analysis**: Quickly locate problem sources +**Configuration:** -### 🛠️ Developer-Friendly Design -- **One-Line Integration**: Quick monitoring with decorators -- **Zero-Dependency Degradation**: Auto-skip when monitoring dependencies are missing -- **Zero-Touch Usage**: No need to manually check monitoring status, handled automatically -- **Flexible Configuration**: Environment variable controlled behavior - -## 🛠️ Adding Monitoring to Code +```bash +MONITORING_PROVIDER=langfuse +OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel -### 🎯 Recommended Approach: Singleton Pattern (v2.1+) +LANGFUSE_PUBLIC_KEY=pk-xxx +LANGFUSE_SECRET_KEY=sk-xxx -```python -# Backend service usage - directly use globally configured monitoring_manager -from utils.monitoring import monitoring_manager - -# API endpoint monitoring -@monitoring_manager.monitor_endpoint("my_service.my_function") -async def my_api_function(): - return {"status": "ok"} +OTEL_EXPORTER_OTLP_AUTHORIZATION=Basic BASE64_ENCODED_KEY +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 +``` -# LLM call monitoring -@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") -def call_llm(messages): - # Automatically get token-level monitoring - return llm_response +Generate the encoded key: -# Manual monitoring events -monitoring_manager.add_span_event("custom_event", {"key": "value"}) -monitoring_manager.set_span_attributes(user_id="123", action="process") +```bash +echo -n "$LANGFUSE_PUBLIC_KEY:$LANGFUSE_SECRET_KEY" | base64 ``` -### 📦 Direct SDK Usage +**Features:** +- Prompt versioning and management +- Session-based trace grouping +- User feedback collection +- Model cost tracking -```python -from nexent.monitor import get_monitoring_manager - -# Get global monitoring manager - already configured in backend -monitor = get_monitoring_manager() - -# Use decorators -@monitor.monitor_llm_call("claude-3", "completion") -def my_llm_function(): - return "response" - -# Or use directly in business logic -with monitor.trace_llm_request("custom_operation", "my_model") as span: - # Execute business logic - result = process_data() - monitor.add_span_event("processing_completed") - return result -``` +### LangSmith -### ✨ Global Configuration Automation +LangSmith supports online OTLP trace ingestion through the OpenTelemetry endpoint. Nexent can send traces to a local Collector first, and the Collector forwards them to LangSmith. -Monitoring configuration is auto-initialized in `backend/utils/monitoring.py`: +**Collector forwarding:** -```python -# No manual configuration needed - auto-completed at system startup -# monitoring_manager already configured with environment variables -from utils.monitoring import monitoring_manager +```bash +cd docker +vim monitoring/monitoring.env -# Direct usage without checking if enabled -@monitoring_manager.monitor_endpoint("my_function") -def my_function(): - pass +MONITORING_PROVIDER=langsmith +LANGSMITH_API_KEY=lsv2_xxx +LANGSMITH_PROJECT=nexent +LANGSMITH_OTLP_TRACES_ENDPOINT=https://api.smith.langchain.com/otel/v1/traces -# FastAPI application initialization -monitoring_manager.setup_fastapi_app(app) +./start-monitoring.sh --stack langsmith ``` -### 🔒 Auto Start/Stop Design - -- **Smart Monitoring**: Auto start/stop based on `ENABLE_TELEMETRY` environment variable -- **Zero-Touch Usage**: External code doesn't need to check monitoring status, use all features directly -- **Graceful Degradation**: Silent no-effect when disabled, normal operation when enabled -- **Default Off**: Auto-disabled when not configured +Nexent backend configuration when it sends OTLP to the Collector: ```bash -# Enable monitoring -export ENABLE_TELEMETRY=true - -# Disable monitoring -export ENABLE_TELEMETRY=false +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=langsmith +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -## 📊 Core Monitoring Metrics +For direct backend-to-LangSmith export, set `OTEL_EXPORTER_OTLP_ENDPOINT=https://api.smith.langchain.com/otel`, `LANGSMITH_API_KEY`, and optionally `LANGSMITH_PROJECT`. -| Metric | Description | Importance | -|--------|-------------|------------| -| `llm_token_generation_rate` | Token generation speed (tokens/s) | ⭐⭐⭐ | -| `llm_time_to_first_token_seconds` | First token latency | ⭐⭐⭐ | -| `llm_request_duration_seconds` | Complete request duration | ⭐⭐⭐ | -| `llm_total_tokens` | Input/output token count | ⭐⭐ | -| `llm_error_count` | LLM call error count | ⭐⭐⭐ | +### Zipkin -## 🔧 Environment Configuration +Zipkin provides a lightweight local trace query UI. For local deployment, Nexent sends OTLP to the Collector, and the Collector forwards traces to Zipkin. ```bash -# Add to .env file -cat >> .env << EOF -ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 -TELEMETRY_SAMPLE_RATE=1.0 # Development environment, production recommended 0.1 -EOF +MONITORING_PROVIDER=zipkin +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +MONITORING_DASHBOARD_URL=http://localhost:9411 ``` -## 🛠️ System Verification +Set `MONITORING_DASHBOARD_URL` to the browser-accessible monitoring UI URL. The backend returns this value to the frontend top bar without deriving a provider-specific path. ```bash -# Check metrics endpoint -curl http://localhost:8000/metrics - -# Verify dependency installation -python -c "from backend.utils.monitoring import MONITORING_AVAILABLE; print(f'Monitoring Available: {MONITORING_AVAILABLE}')" +MONITORING_DASHBOARD_URL=http://localhost:6006 +MONITORING_DASHBOARD_URL=http://localhost:3001/project/nexent +MONITORING_DASHBOARD_URL=http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1 +MONITORING_DASHBOARD_URL=http://localhost:9411 ``` -## 🆘 Troubleshooting +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `ENABLE_TELEMETRY` | `false` | Enable/disable monitoring | +| `MONITORING_PROVIDER` | `otlp` | Provider profile: `otlp`, `phoenix`, `langfuse`, `langsmith`, `grafana`, `zipkin` | +| `MONITORING_DASHBOARD_URL` | (empty) | Browser-accessible monitoring UI URL used by the frontend top bar | +| `MONITORING_PROJECT_NAME` | `nexent` | Observability platform project name | +| `MONITORING_TRACE_CONTENT_MODE` | `summary` | Trace payload mode: `summary` records bounded previews plus metadata, `metrics` records only structure/size metadata, `full` keeps full payloads subject to `MONITORING_TRACE_MAX_CHARS` | +| `MONITORING_TRACE_MAX_CHARS` | `4000` | Maximum characters for each payload preview written to trace attributes | +| `MONITORING_TRACE_MAX_ITEMS` | `20` | Maximum dict keys/list items included in payload previews | +| `OTEL_SERVICE_NAME` | `nexent-backend` | Service identifier | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://localhost:4318` | OTLP base endpoint; SDK derives `/v1/traces` and `/v1/metrics` | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | (empty) | Optional trace-specific endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | (empty) | Optional metric-specific endpoint | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | `http` | Protocol: `http` or `grpc` | +| `OTEL_EXPORTER_OTLP_HEADERS` | (empty) | Generic auth headers (comma-separated) | +| `OTEL_EXPORTER_OTLP_AUTHORIZATION` | (empty) | `Authorization` header, commonly used by Phoenix bearer auth and Langfuse | +| `OTEL_EXPORTER_OTLP_X_API_KEY` | (empty) | `x-api-key` header for platforms that require it | +| `OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION` | (empty) | Langfuse ingestion version, for example `4` | +| `OTEL_EXPORTER_OTLP_METRICS_ENABLED` | `true` | Whether to export OTLP metrics | +| `LANGSMITH_API_KEY` | (empty) | LangSmith API key; mapped to the `x-api-key` OTLP header | +| `LANGSMITH_PROJECT` | (empty) | Optional LangSmith project header | +| `LANGSMITH_OTLP_TRACES_ENDPOINT` | `https://api.smith.langchain.com/otel/v1/traces` | Collector trace endpoint for online LangSmith | + +## Code Integration + +### Agent Boundary Context + +At the request boundary, business code only binds the resolved user and Agent metadata once. The SDK then creates Agent, LLM, and Tool spans from the runtime lifecycle: -### No monitoring data? -```bash -# Check service status -docker-compose -f docker/docker-compose-monitoring.yml ps +```python +from nexent.monitor.agent_observability import AgentRunMetadata +from utils.monitoring import monitoring_manager -# Check dependency installation -python -c "import opentelemetry; print('✅ Monitoring dependencies installed')" +monitoring_manager.bind_agent_context(AgentRunMetadata( + tenant_id=tenant_id, + user_id=user_id, + agent_id=agent_request.agent_id, + conversation_id=agent_request.conversation_id, + query=agent_request.query, + is_debug=agent_request.is_debug, + language=language, +)) ``` -### Port conflicts? -```bash -# Check port usage -lsof -i :3005 -i :9090 -i :16686 +`monitor_endpoint` is still kept as a compatibility API and low-level escape hatch, but it is no longer the recommended way to add normal Agent observability. + +### Trace Payload Policy + +Tool input/output, retriever output, and Langfuse-compatible `input.value` / `output.value` attributes share the same payload policy. By default Nexent writes a bounded preview plus structured metadata such as `type`, `size_chars`, `item_count`, `truncated`, and `keys`. Memory search spans intentionally record only result summaries and statistics, not full memory text bodies. + +Agent context metrics are emitted from the SDK lifecycle. Each action step records an `agent.step.metrics` event with estimated context tokens, compression calls, cache hits, compression ratio, and token threshold. The final Agent span also receives aggregate step count, max context size, average compression ratio, total compression calls, and cache hit totals. + +### LLM Call Monitoring + +```python +@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") +def call_llm(messages): + return llm_response ``` -### Dependency installation issues? -```bash -# Reinstall performance dependencies -uv sync --extra performance +### Agent Step Tracing -# Check performance configuration in pyproject.toml -cat backend/pyproject.toml | grep -A 20 "performance" +```python +with monitoring_manager.trace_agent_step("agent.run.loop", step_type="agent_loop") as span: + result = execute_tool() + monitoring_manager.set_tool_output(result) ``` -### Service name shows as unknown_service? -```bash -# Check environment variable configuration -echo "SERVICE_NAME: $SERVICE_NAME" +### Tool Call Tracing -# Restart monitoring service to apply new configuration -./docker/start-monitoring.sh +```python +with monitoring_manager.trace_tool_call("web_search", "agent_name", {"query": "test"}) as span: + results = search_web("test") + monitoring_manager.set_tool_output({"results": results}) ``` -## 🧹 Data Management +### Retriever Call Tracing -### Clean Jaeger Trace Data -```bash -# Method 1: Restart Jaeger container (simplest) -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-jaeger +Knowledge-base search tools are classified as retriever spans automatically by the SDK. Custom retriever integrations can use the same semantics directly: -# Method 2: Completely rebuild Jaeger container and data -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml rm -f nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-jaeger +```python +with monitoring_manager.trace_retriever_call("knowledge_base_search", "agent_name", {"query": "test"}) as span: + documents = search_knowledge_base("test") + monitoring_manager.set_retriever_output(documents) +``` -# Method 3: Clean all monitoring data (rebuild all containers) -docker-compose -f docker/docker-compose-monitoring.yml down -docker-compose -f docker/docker-compose-monitoring.yml up -d +## OpenInference Semantic Attributes + +The system uses OpenInference semantic conventions for AI-specific observability: + +### LLM Attributes + +| Attribute | Description | +|-----------|-------------| +| `llm.model_name` | Model identifier (e.g., `gpt-4`) | +| `llm.operation.name` | Operation type (e.g., `chat_completion`) | +| `llm.token_count.prompt` | Input token count | +| `llm.token_count.completion` | Output token count | +| `llm.invocation_parameters` | Model parameters (JSON) | +| `llm.time_to_first_token` | TTFT in seconds | + +### Agent Attributes + +| Attribute | Description | +|-----------|-------------| +| `agent.name` | Agent identifier | +| `agent.step.name` | Step name (e.g., `web_search`) | +| `agent.step.type` | Step type: `tool_call`, `reasoning`, `action_selection` | +| `agent.tool.name` | Tool name | +| `agent.tool.input` | Tool input preview using the configured trace payload policy | +| `agent.tool.input.*` | Structured tool input metadata: type, size, item count, truncation, keys | +| `agent.tool.output` | Tool output preview using the configured trace payload policy | +| `agent.tool.output.*` | Structured tool output metadata: type, size, item count, truncation, keys | +| `agent.tool.success` | Whether the tool call completed successfully | +| `agent.tool.duration_ms` | Tool call duration | +| `retriever.name` | Retriever name | +| `retrieval.query` | Retriever query | +| `retrieval.results.count` | Retriever result count | +| `retrieval.top_score` | Highest numeric result score when available | +| `retriever.input.*` | Structured retriever input metadata | +| `retriever.output` | Retriever output preview using the configured trace payload policy | +| `retriever.output.*` | Structured retriever output metadata | +| `context.tokens.estimated_input` | Estimated context input tokens per Agent step event | +| `context.tokens.uncompressed_estimated` | Estimated uncompressed context tokens per Agent step event | +| `context.compression.calls` | Compression calls per Agent step event | +| `context.compression.cache_hits` | Compression cache hits per Agent step event | +| `context.compression.ratio` | Compression ratio per Agent step event | + +## Metrics + +| Metric | Description | +|--------|-------------| +| `llm.request.duration` | Request latency | +| `llm.token.generation_rate` | Tokens per second | +| `llm.time_to_first_token` | TTFT | +| `llm.token_count.prompt` | Input tokens | +| `llm.token_count.completion` | Output tokens | +| `agent.step.count` | Agent step count | +| `agent.execution.duration` | Agent execution time | +| `agent.error.count` | Agent errors | + +## Collector Configuration + +By default, the OpenTelemetry Collector only logs data through the debug exporter. This avoids forwarding data back into itself when no external backend is configured. To forward through the Collector, add a platform exporter: + +```yaml +exporters: + otlphttp/langsmith: + traces_endpoint: https://api.smith.langchain.com/otel/v1/traces + headers: + x-api-key: YOUR_LANGSMITH_API_KEY + Langsmith-Project: nexent + +service: + pipelines: + traces: + exporters: [otlphttp/langsmith, debug] ``` -### Clean Prometheus Metrics Data -```bash -# Restart Prometheus container -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-prometheus +See `docker/monitoring/otel-collector-config.yml` for full configuration with platform examples. -# Completely clean Prometheus data -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-prometheus -docker volume rm docker_prometheus_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-prometheus -``` +## Graceful Degradation -### Clean Grafana Configuration -```bash -# Reset Grafana configuration and dashboards -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-grafana -docker volume rm docker_grafana_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-grafana +When OpenTelemetry dependencies are not installed, monitoring gracefully disables: + +```python +pip install nexent # Basic package - no monitoring +pip install nexent[performance] # With OTLP support ``` -## 📈 Typical Problem Analysis +All monitoring methods work without errors when disabled - decorators pass through, context managers yield None. -### Slow token generation (< 5 tokens/s) -1. **Analysis**: Grafana → Token Generation Rate panel -2. **Solution**: Check model service load, optimize input prompt length +## Troubleshooting -### Slow request response (> 10s) -1. **Analysis**: Jaeger → View complete trace chain -2. **Solution**: Locate bottleneck (database/LLM/network) +### No data appearing -### Error rate spike (> 10%) -1. **Analysis**: Prometheus → llm_error_count metric -2. **Solution**: Check model service availability, verify API keys +1. Check `ENABLE_TELEMETRY=true` in `.env` +2. Verify OTLP endpoint is reachable +3. Check authentication headers are correct -## 🎉 Getting Started +### Connection errors -After setup completion, you can: +1. Test endpoint: `curl -v $OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces` +2. Verify protocol matches endpoint (`http` vs `grpc`) +3. Check Collector logs: `docker logs nexent-otel-collector` -1. 📊 View **LLM Performance Dashboard** in Grafana -2. 🔍 Trace complete request chains in Jaeger -3. 📈 Analyze token generation speed and performance bottlenecks -4. 🚨 Set performance alerts and thresholds +### Wrong attributes -Enjoy efficient LLM performance monitoring! 🚀 +1. Verify OpenInference attributes in platform UI +2. Check span attribute naming: `llm.model_name` not `model_name` +3. Review platform-specific attribute requirements diff --git a/doc/docs/en/user-guide/agent-development.md b/doc/docs/en/user-guide/agent-development.md index 109674273..7637cd620 100644 --- a/doc/docs/en/user-guide/agent-development.md +++ b/doc/docs/en/user-guide/agent-development.md @@ -55,7 +55,7 @@ Nexent supports communication with third-party agents through the A2A protocol. If you know the Agent Card address of the target agent, you can use the URL discovery method:
- +
1. In the External A2A Agent list, click the "Add External Agent" button @@ -72,7 +72,7 @@ If you know the Agent Card address of the target agent, you can use the URL disc If your agent is registered with the Nacos service discovery platform, you can use the Nacos discovery method:
- +
1. In the External A2A Agent list, click the "Add External Agent" button @@ -94,7 +94,7 @@ If your agent is registered with the Nacos service discovery platform, you can u In the External A2A Agent list, you can view and manage all discovered external agents:
- +
1. **View Agent Details**: Click on the agent card to view its complete information, including name, description, URL, capability list, etc. diff --git a/doc/docs/en/user-guide/local-tools/index.md b/doc/docs/en/user-guide/local-tools/index.md index 27dc72ebc..9006f415c 100644 --- a/doc/docs/en/user-guide/local-tools/index.md +++ b/doc/docs/en/user-guide/local-tools/index.md @@ -9,6 +9,8 @@ Local tools let agents interact with the workspace, remote hosts, and external s - [Search Tools](./search-tools): Local/DataMate KB search plus Exa/Tavily/Linkup web search. - [Multimodal Tools](./multimodal-tools): Download/parse/analyze text files and images. - [Terminal Tool](./terminal-tool): Persistent SSH sessions for remote commands. +- [SQL Tools](./sql-tools): Connect to MySQL, PostgreSQL, SQL Server to execute SQL queries. +- [Skills](../skills): Nexent's built-in tool combinations or custom capability packs with NL generation and version management. ## ⚙️ Configuration Entry diff --git a/doc/docs/en/user-guide/local-tools/sql-tools.md b/doc/docs/en/user-guide/local-tools/sql-tools.md new file mode 100644 index 000000000..859b5fbba --- /dev/null +++ b/doc/docs/en/user-guide/local-tools/sql-tools.md @@ -0,0 +1,78 @@ +--- +title: SQL Database Tools +--- + +# SQL Database Tools + +The SQL database toolset enables AI agents to connect to and query relational databases such as MySQL, PostgreSQL, and SQL Server, allowing direct data access and manipulation. + +## Tool List + +- `mysql_database`: Connect to MySQL and execute SQL queries +- `postgres_database`: Connect to PostgreSQL and execute SQL queries +- `mssql_database`: Connect to SQL Server and execute SQL queries + +## Usage Scenarios + +- Query report data from business databases for agent analysis and summarization +- Cross-database joins to retrieve related information scattered across multiple tables +- Real-time queries of business status to provide agents with up-to-date data + +## Parameters and Behavior + +### Common Parameters + +- `sql`: The SQL query to execute (required) +- `parameters`: Parameter values for parameterized queries (optional) +- `max_rows`: Maximum number of rows to return (default: 100) +- `timeout`: Query timeout in seconds (default: 10) + +### Database Connection Parameters + +| Database | Connection Parameters | +|-------------|---------------------------------------------------------------------------| +| MySQL | `host`, `user`, `password`, `database`, `port` (default 3306) | +| PostgreSQL | `host`, `user`, `password`, `database`, `port` (default 5432) | +| SQL Server | `host`, `user`, `password`, `database`, `port` (default 1433) | + +### Security Restrictions + +- Forbidden operations: `DROP DATABASE`, `GRANT`, `REVOKE`, `CREATE USER`, `INTO OUTFILE`, `LOAD DATA INFILE` +- `UPDATE` and `DELETE` statements must include a `WHERE` clause +- `LIMIT` is automatically added to restrict returned rows + +### Response Format + +```json +{ + "status": "success", + "columns": ["id", "name", "email"], + "rows": [[1, "John Doe", "john@example.com"]], + "row_count": 1, + "execution_time_ms": 45.23 +} +``` + +## Getting Started + +1. **Prepare connection info**: Obtain host address, port, database name, username, and password +2. **Configure the tool**: Add the appropriate database tool in agent configuration and fill in connection parameters +3. **Test connection**: Use a simple query to verify connectivity +4. **Construct queries**: Let the agent understand natural language requirements and generate corresponding SQL + +## Security Best Practices + +- Use read-only accounts in production to limit operation permissions +- Store sensitive information like database passwords in a key management service +- Set reasonable `max_rows` values to avoid returning excessive data at once +- Enable SSL/TLS encryption for database connections + +## Common Database Connection Examples + +| Database | Connection Example | Parameter Placeholder | +|-------------|-------------------|---------------------| +| MySQL | `localhost:3306` | `?` | +| PostgreSQL | `localhost:5432` | `$1, $2, ...` | +| SQL Server | `localhost:1433` | `?` | + +> Note: Different databases use different parameter placeholder formats. PostgreSQL uses `$1, $2`, while others use `?`. diff --git a/doc/docs/en/user-guide/mcp-tools.md b/doc/docs/en/user-guide/mcp-tools.md index b55859cbe..cd1190e0e 100644 --- a/doc/docs/en/user-guide/mcp-tools.md +++ b/doc/docs/en/user-guide/mcp-tools.md @@ -1,28 +1,159 @@ # MCP Tools -The upcoming MCP Tools management module will let you centrally manage MCP servers and tools on a single page, easily completing connection configuration, tool synchronization, and health status monitoring. +In the MCP Tools module, you can centrally manage all MCP (Model Context Protocol) servers and tools. It supports custom addition, Registry import, and Community import, covering connection configuration, tool synchronization, health monitoring, and community sharing. -## 🎯 Feature Preview +The MCP Tools page has two parallel tabs: -1. Register and manage multiple MCP servers -2. Quickly sync, view, and organize MCP tool lists -3. Monitor MCP connection status and usage in real time +- **Imported Services**: Manage MCP services already accessed by the current tenant — configure, monitor, and maintain your MCP services here. +- **Published Services**: Manage the MCP services you have published to the community — browse, edit, and unpublish. -## ⏳ Stay Tuned +--- -The MCP Tools management feature is under development. We are committed to building an efficient and intuitive management platform that enables you to: +## ➕ Add MCP Services -1. Centrally manage all MCP servers -2. Conveniently sync and organize tools -3. Monitor server connections and tool runtime status in real time +Click the **Add MCP Service** button to open the add dialog. The dialog provides three tabs, each corresponding to a different source. -## 🚀 Related Features +### Local Add -While waiting for **MCP Tools** to launch, you can: +The **Local Add** tab lets you manually configure an MCP service with two transport types. -1. Manage your MCP tools in **[Agent Development](./agent-development)** -2. View agent and MCP collaboration relationships through **[Agent Space](./agent-space)** -3. Experience platform features in **[Start Chat](./start-chat)** +#### Add via URL -If you encounter any issues during use, please refer to our **[FAQ](../quick-start/faq)** or ask for support in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions). +For independently deployed MCP services (HTTP / SSE), connect by entering the endpoint URL. + +1. In the **Local Add** tab, set **Transport Type** to "URL" +2. Fill in the service details: + - **Service Name (required)**: A recognizable name for the MCP service + - **Service URL (required)**: The MCP service endpoint address + - **Description** (optional): A brief description of the service + - **Authorization Token** (optional): Bearer token if the service requires authentication +3. Click **Confirm** — the system will connect to the service and retrieve the available tool list + +#### Add via Container Configuration + +For MCP services that need to run locally in a container (e.g., services launched via npx), the system automatically creates and manages a container based on your JSON configuration. + +1. In the **Local Add** tab, set **Transport Type** to "Container" +2. Fill in the container configuration: + - **Service Name (required)**: A recognizable name for the MCP service + - **Description** (optional): A brief description of the service + - **Container Configuration JSON (required)**: Enter the standard MCP configuration format, for example: + ```json + { + "mcpServers": { + "service-name": { + "args": ["mcp-package-name@version"], + "command": "npx", + "env": { + "API_KEY": "xxxx" + } + } + } + } + ``` + - **Port**: The port exposed by the container service — the system automatically detects port conflicts and suggests available ports +3. Click **Confirm** — the system parses the JSON, creates the container, and registers the service + +### Import from MCP Registry + +Nexent integrates with the MCP Registry, allowing you to browse and import community-maintained MCP services in one click. + +1. Switch to the **MCP Registry** tab +2. Browse the available MCP services — search by name or tags +3. Click a service to view its details (description, version, required parameters, etc.) +4. Configure required parameters (e.g., API Key and other environment variables) +5. Click **Import** — the system automatically installs and configures the service + +### Import from Community + +Browse MCP services published by other Nexent users and quickly import them. + +1. Switch to the **Community Market** tab +2. Browse published community MCP services — filter by name, tags, or transport type +3. Click a service to view details, then click **Import** to add it to your service list + +--- + +## 📋 Imported Services + +The **Imported Services** tab displays all MCP services accessed by the current tenant as cards. View, edit, monitor, and publish your services here. + +### View & Filter + +Each service card shows: + +- Service name and description +- Source indicator (Custom / Registry / Community) +- Enable / Disable toggle +- Tags + +Use the filter bar at the top to filter by **Source**, **Transport Type**, and **Tags**, or use the search box to quickly locate services by name. + +### Edit Service Details + +Click any service card to open the detail modal, where you can: + +- **Edit basic info**: Modify name, description, URL, Authorization Token, and tags +- **Enable / Disable**: Toggle the service on or off — tools from a disabled service will not appear in agent tool selection +- **Delete**: Remove the MCP service record — containerized services will also have their container resources cleaned up + +### View Tool List + +In the service detail modal, click **Tool List** to view all tools provided by this MCP service. + +### Health Check + +Click the **Health Check** button in the detail modal to test the connection to the MCP service. Possible statuses: + +- **Healthy**: The service is reachable +- **Unhealthy**: The service cannot be reached or responded abnormally +- **Unchecked**: A health check has not been performed yet + +### Container Management + +For containerized MCP services, the detail modal also provides: + +- **View Container Logs**: Real-time logs from the running container for troubleshooting +- **View Container Config**: The configuration JSON used when creating the container + +### Publish to Community + +In the service detail modal, click **Publish to Community**: + +1. Review or edit the publication info (name, description, tags, etc.) +2. Click **Confirm Publish** — the service will be published to the community +3. Other users can then browse and import it from the **Community Market** tab in the add dialog + +--- + +## 🌐 Published Services + +The **Published Services** tab shows all MCP services you have published to the community. Manage your published content here. + +Each card shows the service name, description, version, and tags. Filter by name, tags, and transport type. + +Click a service card to view details, where you can: + +- **Edit published service**: Modify the published service's name, description, and tags +- **Delete published service**: Withdraw the service from the community — it will no longer be visible to other users + +--- + +## 🔗 Integrating with Agents + +Once an MCP service is added, its tools are automatically synced to the agent tool selection list. When configuring an agent on the **[Agent Development](./agent-development)** page: + +1. In the **Select Agent Tools** tab, locate the corresponding MCP service group +2. Click a tool name to enable it +3. Click ⚙️ to view the tool description and configure its parameters + +## 🚀 Next Steps + +After configuring MCP services, we recommend: + +1. **[Agent Development](./agent-development)** — Assign MCP tools to your agents +2. **[Agent Space](./agent-space)** — View collaboration between agents and MCP services +3. **[Start Chat](./start-chat)** — Experience agents calling MCP tools in conversations + +If you encounter any issues, please refer to our **[FAQ](../quick-start/faq)** or ask for support in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions). diff --git a/doc/docs/zh/deployment/devcontainer.md b/doc/docs/zh/deployment/devcontainer.md index 2ce184901..b5b934187 100644 --- a/doc/docs/zh/deployment/devcontainer.md +++ b/doc/docs/zh/deployment/devcontainer.md @@ -25,7 +25,7 @@ 1. 克隆项目到本地 2. 在 Cursor 中打开项目文件夹 -3. 运行 `docker/deploy.sh` 脚本,在`infrastructure` 模式下启动容器 +3. 在 `docker` 目录运行 `./deploy.sh --components infrastructure,application --port-policy development` 启动基础容器 4. 进入 `nexent-minio` 与 `nexent-elasticsearch` 容器, 将 `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `ELASTICSEARCH_API_KEY` 环境变量复制到 `docker/docker-compose.dev.yml` 中的相应环境变量位置 5. 按下 `F1` 或 `Ctrl+Shift+P`,输入 `Dev Containers: Reopen in Container ...` 6. Cursor 将根据 `.devcontainer` 目录中的配置启动开发容器 diff --git a/doc/docs/zh/deployment/docker-build.md b/doc/docs/zh/deployment/docker-build.md index 8dad0612e..8e360d95d 100644 --- a/doc/docs/zh/deployment/docker-build.md +++ b/doc/docs/zh/deployment/docker-build.md @@ -160,6 +160,11 @@ docker rm nexent-docs ## 🚀 部署建议 -构建完成后,可以使用 `docker/deploy.sh` 脚本进行部署,或者直接使用 `docker-compose` 启动服务。 +构建完成后,可以进入 `docker` 目录使用部署脚本启动本地镜像: -> 启动测试本地构建的镜像时,需要修改下`docker/deploy.sh`中的`APP_VERSION="$(get_app_version)"` -> `APP_VERSION="latest"`,因为部署时默认会使用当前版本对应的镜像。 \ No newline at end of file +```bash +cd docker +bash deploy.sh --image-source local-latest +``` + +> `local-latest` 会使用本地 `latest` Nexent 应用镜像并避免重新拉取这些镜像,无需修改 `docker/deploy.sh`。 diff --git a/doc/docs/zh/developer-guide/environment-setup.md b/doc/docs/zh/developer-guide/environment-setup.md index 0a81ca10d..cc98ff58a 100644 --- a/doc/docs/zh/developer-guide/environment-setup.md +++ b/doc/docs/zh/developer-guide/environment-setup.md @@ -23,7 +23,7 @@ title: 环境准备 ```bash # 在项目根目录的 docker 目录执行 cd docker -./deploy.sh --mode infrastructure +./deploy.sh --components infrastructure --port-policy development ``` :::: info 重要提示 @@ -131,4 +131,3 @@ uv pip install -e ".[dev]" - 测试框架(pytest) - 数据处理依赖(unstructured) - 其他开发辅助依赖 - diff --git a/doc/docs/zh/getting-started/software-architecture.md b/doc/docs/zh/getting-started/software-architecture.md index 24c83152d..8676992a4 100644 --- a/doc/docs/zh/getting-started/software-architecture.md +++ b/doc/docs/zh/getting-started/software-architecture.md @@ -274,7 +274,7 @@ Docker Compose 编排: - **高可用性**:多服务冗余、健康检查、自动重启 - **高性能**:异步处理、Redis 缓存、向量搜索优化 - **高并发**:分布式架构、负载均衡 -- **监控友好**:Prometheus 指标、Jaeger 追踪、结构化日志 +- **监控友好**:OpenTelemetry 可观测性、Grafana Tempo 追踪、结构化日志 ### 🔧 开发友好 - **模块化开发**:清晰的分层架构(App → Service → Database) diff --git a/doc/docs/zh/quick-start/installation.md b/doc/docs/zh/quick-start/installation.md index f9c0e1708..85c84fad4 100644 --- a/doc/docs/zh/quick-start/installation.md +++ b/doc/docs/zh/quick-start/installation.md @@ -19,10 +19,9 @@ ```bash git clone https://github.com/ModelEngine-Group/nexent.git cd nexent/docker -cp .env.example .env # 复制环境变量配置文件 ``` -> **💡 提示**: 若无特殊需求,您可直接使用 `.env.example` 进行部署,无需进行任何修改。若您需要配置语音模型(STT/TTS),则需要在 `.env` 中配置相关参数。我们会尽快将此部分配置前端化,敬请期待。 +> **💡 提示**: `deploy.sh` 会在 `docker/.env` 不存在时自动从 `.env.example` 复制一份。若无特殊需求,可直接部署;若需要配置语音模型(STT/TTS),请部署前或部署后修改 `docker/.env` 中的相关参数。 ### 2. 部署选项 @@ -32,20 +31,42 @@ cp .env.example .env # 复制环境变量配置文件 bash deploy.sh ``` -执行此命令后,系统会提供两个不同的版本供您选择: +执行此命令后,系统会通过 Bash TUI 选择部署参数。可使用方向键或 `j/k` 移动,空格切换多选项,回车确认,`b`/Backspace 返回上一步,`q` 退出。 -**版本选择:** -- **Speed version(轻量快速部署,默认)**: 快速启动核心功能,适合个人用户和小团队使用 -- **Full version(完整功能版)**: 提供企业级租户管理和资源隔离等高级功能,但安装时间略长,适合企业用户 +**组件组合:** +- **infrastructure(必选)**: Elasticsearch、PostgreSQL、Redis、MinIO +- **application(默认选中,可取消)**: config、runtime、mcp、northbound、web +- **data-process(可选)**: 数据处理服务 +- **supabase(可选)**: 启用用户、租户和认证能力 +- **terminal(可选)**: 启用 OpenSSH 终端工具 +- **monitoring(可选)**: 启用观测组件,选择后会继续选择 provider -**部署模式:** -- **开发模式 (默认)**: 暴露所有服务端口以便调试 -- **基础设施模式**: 仅启动基础设施服务 -- **生产模式**: 为安全起见仅暴露端口 3000 +**端口策略:** +- **development(默认)**: 暴露调试和内部服务端口,便于本地排查 +- **production**: 仅发布生产入口端口 -**可选组件:** -- **终端工具**: 启用 openssh-server 供 AI 智能体执行 shell 命令 -- **区域优化**: 中国大陆用户可使用优化的镜像源 +**镜像来源:** +- **general(默认)**: 使用标准公开镜像仓库 +- **mainland**: 使用中国大陆镜像源 +- **local-latest**: 使用本地 `latest` 镜像,避免拉取 Nexent 应用镜像 + +您也可以通过参数跳过交互: + +```bash +# 默认组件组合,development 端口策略,标准镜像源 +bash deploy.sh --components infrastructure,application --port-policy development --image-source general + +# 启用用户/租户能力、数据处理和终端工具 +bash deploy.sh --components infrastructure,application,supabase,data-process,terminal + +# 使用中国大陆镜像源 +bash deploy.sh --image-source mainland + +# 使用本地 latest 镜像 +bash deploy.sh --image-source local-latest +``` + +部署成功后,非敏感部署选项会保存到 `docker/deploy.options`。下次交互部署时可选择复用本地配置或重新全量配置。 #### ⚠️ 重要提示 @@ -100,7 +121,7 @@ Nexent 采用微服务架构,通过 Docker Compose 进行部署。 | nexent-minio | S3 兼容对象存储 | | redis | 缓存层 | -**Supabase 服务(完整版独有):** +**Supabase 服务(选择 `supabase` 组件时):** | 服务 | 描述 | |---------|-------------| | supabase-kong | API 网关 | @@ -111,6 +132,7 @@ Nexent 采用微服务架构,通过 Docker Compose 进行部署。 | 服务 | 描述 | |---------|-------------| | nexent-openssh-server | AI 智能体 SSH 终端 | +| nexent-monitoring | 可选观测组件 | ## 💾 数据持久化 @@ -122,10 +144,12 @@ Nexent 使用 Docker volumes 进行数据持久化: | Elasticsearch | nexent-elasticsearch-data | `{dataDir}/elasticsearch` | | Redis | nexent-redis-data | `{dataDir}/redis` | | MinIO | nexent-minio-data | `{dataDir}/minio` | -| Supabase DB(完整版)| nexent-supabase-db-data | `{dataDir}/supabase-db` | +| Supabase DB(选择 supabase 时)| nexent-supabase-db-data | `{dataDir}/supabase-db` | 默认 `dataDir` 为 `./volumes`(可在 `.env` 中配置 `ROOT_DIR`)。 +卸载由 `docker/uninstall.sh` 负责。默认交互询问是否删除持久化数据;也可使用 `--delete-volumes true|false`、`--remove-volumes`、`--keep-volumes`,或使用 `bash uninstall.sh delete-all` 删除容器和持久化数据。 + ## 🔌 端口映射 | 服务 | 内部端口 | 外部端口 | 描述 | @@ -179,4 +203,4 @@ NORTHBOUND_EXTERNAL_URL=https://api.yourdomain.com/api 想要从源码构建或添加新功能?查看 [Docker 构建指南](../deployment/docker-build) 获取详细说明。 -有关详细的安装说明和自定义选项,请查看我们的 [开发者指南](../developer-guide/overview)。 \ No newline at end of file +有关详细的安装说明和自定义选项,请查看我们的 [开发者指南](../developer-guide/overview)。 diff --git a/doc/docs/zh/quick-start/kubernetes-installation.md b/doc/docs/zh/quick-start/kubernetes-installation.md index be7857fb2..2c5382d36 100644 --- a/doc/docs/zh/quick-start/kubernetes-installation.md +++ b/doc/docs/zh/quick-start/kubernetes-installation.md @@ -35,21 +35,29 @@ cd nexent/k8s/helm 运行部署脚本: ```bash -./deploy-helm.sh apply +./deploy.sh ``` -执行此命令后,系统会提示您选择配置选项: +执行此命令后,系统会通过 Bash TUI 选择配置选项。可使用方向键或 `j/k` 移动,空格切换多选项,回车确认,`b`/Backspace 返回上一步,`q` 退出。 -**版本选择:** -- **Speed version(轻量快速部署,默认)**: 快速启动核心功能,适合个人用户和小团队使用 -- **Full version(完整功能版)**: 提供企业级租户管理和资源隔离等高级功能,包含 Supabase 认证服务 +**组件组合:** +- **infrastructure(必选)**: Elasticsearch、PostgreSQL、Redis、MinIO +- **application(默认选中,可取消)**: config、runtime、mcp、northbound、web +- **data-process(可选)**: 数据处理服务 +- **supabase(可选)**: 启用用户、租户和认证能力 +- **terminal(可选)**: 启用 OpenSSH 终端工具 +- **monitoring(可选)**: 启用观测组件,选择后会继续选择 provider -**镜像源选择:** -- **中国大陆**: 使用优化的区域镜像源,加快镜像拉取速度 -- **通用**: 使用标准 Docker Hub 镜像源 +**端口策略:** +- **development(默认)**: 使用 NodePort 暴露 Web 和调试/内部服务 +- **production**: 内部服务使用 ClusterIP,仅暴露生产入口 -**可选组件:** -- **终端工具**: 启用 openssh-server 供 AI 智能体执行 shell 命令 +**镜像来源:** +- **general(默认)**: 使用标准公开镜像仓库 +- **mainland**: 使用中国大陆镜像源 +- **local-latest**: 使用本地 `latest` 镜像,并将 Nexent 应用镜像的拉取策略设为本地优先 + +部署成功后,非敏感部署选项会保存到 `k8s/helm/deploy.options`。下次交互部署时可选择复用本地配置或重新全量配置。 ### ⚠️ 重要提示 @@ -72,7 +80,7 @@ kubectl exec -it -n nexent deploy/nexent-postgresql -- psql -U root -d nexent -c "DELETE FROM nexent.user_tenant_t WHERE user_id='your_user_id';" # Step 3: 重新部署并记录 su 账号密码 -./deploy-helm.sh apply +./deploy.sh ``` ### 4. 访问您的安装 @@ -113,7 +121,7 @@ Nexent 采用微服务架构,通过 Helm Chart 进行部署: | nexent-redis | 缓存层 | | nexent-minio | S3 兼容对象存储 | -**Supabase 服务(完整版独有):** +**Supabase 服务(选择 `supabase` 组件时):** | 服务 | 描述 | |---------|-------------| | nexent-supabase-kong | API 网关 | @@ -124,13 +132,14 @@ Nexent 采用微服务架构,通过 Helm Chart 进行部署: | 服务 | 描述 | |---------|-------------| | nexent-openssh-server | AI 智能体 SSH 终端 | +| nexent-monitoring | 可选观测组件 | ## 🔌 端口映射 | 服务 | 内部端口 | NodePort | 描述 | |---------|---------------|----------|-------------| | Web 界面 | 3000 | 30000 | 主应用程序访问 | -| Northbound API | 5010 | 30013 | 北向 API 服务 | +| Northbound API | 5013 | 30013 | 北向 API 服务 | | SSH 服务器 | 22 | 30022 | 终端工具访问 | 内部服务通信使用 Kubernetes 内部 DNS(例如 `http://nexent-config:5010`)。 @@ -141,34 +150,49 @@ Nexent 使用 PersistentVolume 进行数据持久化: | 数据类型 | PersistentVolume | 默认宿主机路径 | |-----------|------------------|-------------------| -| Elasticsearch | nexent-elasticsearch-pv | `{dataDir}/elasticsearch` | -| PostgreSQL | nexent-postgresql-pv | `{dataDir}/postgresql` | -| Redis | nexent-redis-pv | `{dataDir}/redis` | -| MinIO | nexent-minio-pv | `{dataDir}/minio` | -| Supabase DB(完整版)| nexent-supabase-db-pv | `{dataDir}/supabase-db` | +| Elasticsearch | nexent-elasticsearch-pv | `/var/lib/nexent-data/nexent-elasticsearch` | +| PostgreSQL | nexent-postgresql-pv | `/var/lib/nexent-data/nexent-postgresql` | +| Redis | nexent-redis-pv | `/var/lib/nexent-data/nexent-redis` | +| MinIO | nexent-minio-pv | `/var/lib/nexent-data/nexent-minio` | +| Supabase DB(选择 supabase 时)| nexent-supabase-db-pv | `/var/lib/nexent-data/nexent-supabase-db` | -默认 `dataDir` 为 `/var/lib/nexent-data`(可在 `values.yaml` 中配置)。 +卸载 Helm release 默认不会删除本地 hostPath 数据。可使用 `./uninstall.sh --delete-local-data true` 删除 `/var/lib/nexent-data/nexent-*` 下的 Nexent 本地卷内容,使用 `--keep-local-data` 显式保留。 ## 🔧 部署命令 ```bash # 交互式部署 -./deploy-helm.sh apply +./deploy.sh + +# 非交互式部署默认组件 +./deploy.sh --components infrastructure,application --port-policy development --image-source general + +# 启用用户/租户能力、数据处理和终端工具 +./deploy.sh --components infrastructure,application,supabase,data-process,terminal # 使用中国大陆镜像源部署 -./deploy-helm.sh apply --is-mainland Y +./deploy.sh --image-source mainland -# 部署完整版本(包含 Supabase) -./deploy-helm.sh apply --deployment-version full +# 使用本地 latest 镜像 +./deploy.sh --image-source local-latest # 仅清理 Helm 状态(修复卡住的发布) -./deploy-helm.sh clean +./uninstall.sh clean + +# 卸载,默认保留本地数据;交互确认是否删除 namespace 和本地数据 +./uninstall.sh + +# 卸载并删除 namespace +./uninstall.sh --delete-namespace true + +# 卸载并删除本地 hostPath 数据 +./uninstall.sh --delete-local-data true -# 卸载但保留数据 -./deploy-helm.sh delete +# 完全卸载,包括 namespace 和本地 hostPath 数据 +./uninstall.sh delete-all -# 完全卸载包括所有数据 -./deploy-helm.sh delete-all +# 完全卸载但保留本地 hostPath 数据 +./uninstall.sh delete-all --keep-local-data ``` ## 🔍 故障排查 diff --git a/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md b/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md index 43f5c1d49..f2ec9226a 100644 --- a/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md +++ b/doc/docs/zh/quick-start/kubernetes-upgrade-guide.md @@ -15,7 +15,7 @@ 更新之前,先记录下当前部署的版本和数据目录信息。 - 当前部署版本信息的位置:`backend/consts/const.py` 中的 `APP_VERSION` -- 数据目录信息的位置:`k8s/helm/nexent/values.yaml` 中的 `global.dataDir` +- 本地卷目录信息的位置:各 Helm 子 chart 的 `storage.hostPath`,默认位于 `/var/lib/nexent-data/nexent-*` **git 方式下载的代码** @@ -28,7 +28,7 @@ git pull **zip 包等方式下载的代码** 1. 需要去 GitHub 上重新下载一份最新代码,并解压缩。 -2. 将之前执行部署脚本目录下 `k8s/helm` 目录中的 `.deploy.options` 文件拷贝到新代码目录的 `k8s/helm` 目录中。(如果不存在该文件则忽略此步骤)。 +2. 将之前执行部署脚本目录下 `k8s/helm` 目录中的 `deploy.options` 文件拷贝到新代码目录的 `k8s/helm` 目录中。(如果不存在该文件则忽略此步骤)。 ## 🔄 步骤二:执行升级 @@ -36,10 +36,10 @@ git pull ```bash cd k8s/helm -./deploy-helm.sh apply +./deploy.sh ``` -脚本会自动检测您之前的部署设置(版本、镜像源等)。如果 `.deploy.options` 文件不存在,系统会提示您输入配置信息。 +脚本会自动检测您之前保存的部署设置(组件组合、端口策略、镜像来源等)。如果 `deploy.options` 文件不存在,系统会提示您输入配置信息。 > 💡 提示 > - 若需配置语音模型(STT/TTS),请在对应的 `values.yaml` 中修改相关配置,或通过命令行参数传入。 @@ -137,7 +137,7 @@ kubectl exec -i $POSTGRES_POD -n nexent -- psql -U root -d nexent < ./sql/v2.0.0 kubectl exec nexent/$POSTGRES_POD -n nexent -- pg_dump -U root nexent > backup_$(date +%F).sql ``` -> - 对于 Supabase 数据库(仅完整版本),请使用 `nexent-supabase-db` Pod: +> - 对于 Supabase 数据库(选择 `supabase` 组件时),请使用 `nexent-supabase-db` Pod: ```bash SUPABASE_POD=$(kubectl get pods -n nexent -l app=nexent-supabase-db -o jsonpath='{.items[0].metadata.name}') diff --git a/doc/docs/zh/quick-start/upgrade-guide.md b/doc/docs/zh/quick-start/upgrade-guide.md index b888e2ada..4f8b429e0 100644 --- a/doc/docs/zh/quick-start/upgrade-guide.md +++ b/doc/docs/zh/quick-start/upgrade-guide.md @@ -37,11 +37,11 @@ git pull bash upgrade.sh ``` -缺少 deploy.options 的情况下,会提示需要手动输入之前部署的一些配置,比如:当前部署版本、数据目录等。按照提示输入之前记录的信息即可。 +缺少 deploy.options 的情况下,会提示需要重新选择部署配置,例如组件组合、端口策略、镜像来源等。按照您之前的部署方式重新选择即可。 > 💡 提示 -> - 默认为快速部署场景,使用 `.env.example`。 -> - 若需配置语音模型(STT/TTS),请提前在 `.env.example` 中补充相关变量,我们将尽快提供前端配置入口。 +> - 若 `docker/.env` 不存在,部署脚本会从 `.env.example` 自动复制一份。 +> - 若需配置语音模型(STT/TTS),请在 `docker/.env` 中补充相关变量,我们将尽快提供前端配置入口。 ## 🌐 步骤三:验证部署 diff --git a/doc/docs/zh/sdk/monitoring.md b/doc/docs/zh/sdk/monitoring.md index c592df267..2483b505b 100644 --- a/doc/docs/zh/sdk/monitoring.md +++ b/doc/docs/zh/sdk/monitoring.md @@ -1,289 +1,473 @@ -# 🚀 Nexent LLM 监控系统 +# Nexent Agent 可观测性(OTLP) -专门监控大模型 Token 生成速度和性能的企业级监控解决方案。 +基于 OpenTelemetry OTLP 协议的 AI Agent 企业级可观测性方案。支持对接 Arize Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 等可观测性平台。 -## 📊 系统架构 +## 系统架构 ``` -┌─────────────────────────────────────────────────────────┐ -│ Nexent LLM 监控系统 │ -├─────────────────────────────────────────────────────────┤ -│ │ -│ Nexent API ──► OpenTelemetry ──► Jaeger (链路追踪) │ -│ │ │ │ -│ │ └──────► Prometheus (指标收集) │ -│ │ │ │ -│ └─► OpenAI LLM └──► Grafana (可视化) │ -│ (Token 监控) │ -└─────────────────────────────────────────────────────────┘ +NexentAgent ──► OpenTelemetry SDK ──► OTLP Collector ──► Arize Phoenix / Langfuse / LangSmith / Grafana Tempo / Zipkin / OTLP Backend + │ │ + │ OpenInference 语义约定 │ + │ (llm.*, agent.* 属性) │ + └────────────────────────────────────────┘ ``` -## ⚡ 快速启动(5分钟) +## 快速启动 ```bash -# 1. 启动监控服务 -./docker/start-monitoring.sh +cd docker +[ -f .env ] || cp .env.example .env +cp monitoring/monitoring.env.example monitoring/monitoring.env -# 2. 安装性能监控依赖 -uv sync --extra performance +vim .env +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http -# 3. 启用监控 -export ENABLE_TELEMETRY=true +vim monitoring/monitoring.env +MONITORING_PROVIDER=otlp -# 4. 启动后端服务 -python backend/config_service.py -python backend/runtime_service.py +./start-monitoring.sh --stack collector ``` -## 📊 访问监控界面 +## 本地化部署形态 -| 界面 | 地址 | 用途 | -|------|------|------| -| **Grafana 仪表板** | http://localhost:3005 | LLM 性能监控 | -| **Jaeger 链路追踪** | http://localhost:16686 | 请求链路分析 | -| **Prometheus 指标** | http://localhost:9090 | 原始监控数据 | +`docker/start-monitoring.sh` 支持多种形态,均以 OpenTelemetry Collector 作为统一入口。业务服务只需要把 OTLP 发到 Collector,不需要感知后端平台差异。 -### 🔐 Grafana 登录信息 +| 形态 | 命令 | 包含服务 | 适用场景 | +|------|------|----------|----------| +| `collector` | `./start-monitoring.sh --stack collector` | OpenTelemetry Collector | 只验证埋点、或转发到外部云端平台 | +| `phoenix` | `./start-monitoring.sh --stack phoenix` | Collector + Phoenix | 本地 trace 调试、OpenInference 属性查看、实验分析 | +| `langfuse` | `./start-monitoring.sh --stack langfuse` | Collector + Langfuse Web/Worker + Postgres + ClickHouse + MinIO + Redis | 本地完整 LLMOps 体验、会话/用户/反馈/成本分析 | +| `langsmith` | `./start-monitoring.sh --stack langsmith` | OpenTelemetry Collector | 转发 traces 到在线 LangSmith 平台 | +| `grafana` | `./start-monitoring.sh --stack grafana` | Collector + Grafana + Tempo | 本地 Tempo trace 查询 | +| `zipkin` | `./start-monitoring.sh --stack zipkin` | Collector + Zipkin | 本地 trace 查询 | -首次访问 Grafana (http://localhost:3005) 时需要登录: +也可以在 `docker/monitoring/monitoring.env` 中设置默认形态: +```bash +MONITORING_PROVIDER=phoenix ``` -用户名: admin -密码: admin + +### 本地 Phoenix + +Phoenix 本地部署使用 `arizephoenix/phoenix` 镜像,默认 UI 端口为 `6006`,gRPC OTLP 端口映射为 `4319`,数据持久化到 Docker volume `phoenix-data`。 + +```bash +cd docker +./start-monitoring.sh --stack phoenix ``` -**首次登录后会要求修改密码,可以:** -- 设置新密码(推荐) -- 点击 "Skip" 跳过(开发环境) +访问地址: -**登录后可以看到:** -- 📊 **LLM Performance Dashboard** - 预配置的性能仪表板 -- 📈 **数据源配置** - 自动连接到 Prometheus 和 Jaeger -- 🎯 **实时监控面板** - Token 生成速度、延迟等关键指标 +- Phoenix UI:`http://localhost:6006` +- Collector OTLP HTTP:`http://localhost:4318` +- Collector OTLP gRPC:`localhost:4317` -## 🎯 核心功能特性 +Nexent 后端在 Docker 网络内运行时: -### ⚡ LLM 专用监控 -- **Token 生成速度**: 实时监控每秒生成的 token 数量 -- **TTFT (Time to First Token)**: 首个 token 返回延迟 -- **流式响应分析**: 每个 token 的生成时间戳 -- **模型性能对比**: 不同模型的性能基准 +```bash +ENABLE_TELEMETRY=true +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +``` -### 🔍 分布式链路追踪 -- **完整请求链路**: 从 HTTP 到 LLM 的端到端追踪 -- **性能瓶颈识别**: 自动定位慢查询和异常 -- **错误根因分析**: 快速定位问题根源 +后端直接在宿主机运行时,把 endpoint 改为 `http://localhost:4318`。 -### 🛠️ 开发友好设计 -- **一行代码接入**: 使用装饰器快速添加监控 -- **零依赖降级**: 未安装监控依赖时自动跳过 -- **零感知使用**: 无需手动检查监控状态,自动处理 -- **灵活配置**: 环境变量控制监控行为 +### 本地 Langfuse -## 🛠️ 添加监控到代码 +Langfuse 本地部署使用 v3 架构:Web、Worker、Postgres、ClickHouse、MinIO、Redis。默认 UI 端口为 `3001`,初始化项目和 API Key 来自 `monitoring.env`。 -### 🎯 推荐方式:单例模式 (v2.1+) +```bash +cd docker +./start-monitoring.sh --stack langfuse +``` -```python -# 后端服务中使用 - 直接使用全局配置好的 monitoring_manager -from utils.monitoring import monitoring_manager +访问地址: -# API 端点监控 -@monitoring_manager.monitor_endpoint("my_service.my_function") -async def my_api_function(): - return {"status": "ok"} +- Langfuse UI:`http://localhost:3001` +- 默认管理员:`admin@nexent.local` / `nexent-langfuse-admin` +- 默认项目 Key:`pk-lf-nexent-local` / `sk-lf-nexent-local` -# LLM 调用监控 -@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") -def call_llm(messages): - # 自动获得 Token 级别监控 - return llm_response +启动脚本会在 `LANGFUSE_OTLP_AUTH_HEADER` 为空时自动生成 `Basic base64(public_key:secret_key)`,并让 Collector 将 trace 转发到 `http://langfuse-web:3000/api/public/otel`。本地默认密钥只适合开发验证,生产部署必须替换 `LANGFUSE_NEXTAUTH_SECRET`、`LANGFUSE_SALT`、`LANGFUSE_ENCRYPTION_KEY`、数据库密码和对象存储密钥。 + +### 在线 LangSmith + +LangSmith 支持通过在线 OTLP endpoint 摄取 traces。Nexent 可以先把 OTLP 发到本地 Collector,再由 Collector 转发到 LangSmith,业务服务无需直接保存 LangSmith API Key。 + +```bash +cd docker +vim monitoring/monitoring.env + +MONITORING_PROVIDER=langsmith +LANGSMITH_API_KEY=lsv2_xxx +LANGSMITH_PROJECT=nexent +LANGSMITH_OTLP_TRACES_ENDPOINT=https://api.smith.langchain.com/otel/v1/traces -# 手动添加监控事件 -monitoring_manager.add_span_event("custom_event", {"key": "value"}) -monitoring_manager.set_span_attributes(user_id="123", action="process") +./start-monitoring.sh --stack langsmith ``` -### 📦 SDK中直接使用 +后端在 Docker 网络内运行时: -```python -from nexent.monitor import get_monitoring_manager - -# 获取全局监控管理器 - 在backend已自动配置 -monitor = get_monitoring_manager() - -# 使用装饰器 -@monitor.monitor_llm_call("claude-3", "completion") -def my_llm_function(): - return "response" - -# 或者在业务逻辑中直接使用 -with monitor.trace_llm_request("custom_operation", "my_model") as span: - # 执行业务逻辑 - result = process_data() - monitor.add_span_event("processing_completed") - return result +```bash +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=langsmith +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -### ✨ 全局配置自动化 +LangSmith 当前配置只转发 traces,OTLP metrics 会留在 Collector debug pipeline。若需要后端直接写入 LangSmith,可设置 `OTEL_EXPORTER_OTLP_ENDPOINT=https://api.smith.langchain.com/otel`、`LANGSMITH_API_KEY` 和可选的 `LANGSMITH_PROJECT`。 -监控配置已在 `backend/utils/monitoring.py` 中自动初始化: +### 本地 Grafana + Tempo -```python -# 无需手动配置 - 系统启动时自动完成 -# monitoring_manager 已经使用环境变量配置完成 -from utils.monitoring import monitoring_manager +Grafana 本地部署使用 Grafana Tempo 存储 traces,并启用 Tempo `metrics-generator` 的 `local-blocks` processor 支持 Grafana trace breakdown 中的 TraceQL metrics 查询。Collector 接收 Nexent 后端的 OTLP traces/metrics,其中 traces 通过 OTLP gRPC 转发到 Tempo;OTLP metrics 只进入 Collector debug pipeline,不提供独立指标存储或指标 dashboard。 + +```bash +cd docker +./start-monitoring.sh --stack grafana +``` -# 直接使用即可,无需检查是否开启 -@monitoring_manager.monitor_endpoint("my_function") -def my_function(): - pass +后端 `.env` 使用 `MONITORING_DASHBOARD_URL` 控制前端顶栏监控入口: -# FastAPI应用初始化 -monitoring_manager.setup_fastapi_app(app) +```bash +ENABLE_TELEMETRY=true +MONITORING_PROVIDER=grafana +MONITORING_DASHBOARD_URL=http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1 +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 ``` -### 🔒 自动启停设计 +访问地址: -- **智能监控**: 根据 `ENABLE_TELEMETRY` 环境变量自动启停 -- **零感知使用**: 外部代码无需检查监控状态,直接使用所有功能 -- **优雅降级**: 未开启时静默无效果,开启时正常工作 -- **默认关闭**: 未配置时自动视为关闭状态 +- Grafana UI:`http://localhost:3002` +- 默认管理员:`admin` / `nexent-grafana-admin` +- Tempo API:`http://localhost:3200` -```bash -# 开启监控 -export ENABLE_TELEMETRY=true +Grafana 会自动预置 Tempo datasource,并加载 `Nexent Agent Trace Monitoring` dashboard。Trace 查询入口在 Grafana Explore 中选择 `Tempo` datasource,示例 TraceQL 为 `{ resource.service.name = "nexent-backend" }`。 -# 关闭监控 -export ENABLE_TELEMETRY=false -``` +### 本地 Zipkin -## 📊 核心监控指标 +Zipkin 本地部署使用 `openzipkin/zipkin` 镜像。Collector 接收 Nexent 后端的 OTLP traces/metrics,其中 traces 转发到 Zipkin v2 spans endpoint;OTLP metrics 当前只进入 Collector debug pipeline。 -| 指标 | 描述 | 重要性 | -|------|------|-------| -| `llm_token_generation_rate` | Token 生成速度 (tokens/s) | ⭐⭐⭐ | -| `llm_time_to_first_token_seconds` | 首 Token 延迟 | ⭐⭐⭐ | -| `llm_request_duration_seconds` | 完整请求耗时 | ⭐⭐⭐ | -| `llm_total_tokens` | 输入/输出 Token 数量 | ⭐⭐ | -| `llm_error_count` | LLM 调用错误数 | ⭐⭐⭐ | +```bash +cd docker +./start-monitoring.sh --stack zipkin +``` -## 🔧 环境配置 +后端 `.env`: ```bash -# 添加到 .env 文件 -cat >> .env << EOF ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 -TELEMETRY_SAMPLE_RATE=1.0 # 开发环境,生产环境推荐 0.1 -EOF +MONITORING_PROVIDER=zipkin +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +MONITORING_DASHBOARD_URL=http://localhost:9411 ``` -## 🛠️ 验证系统 +访问地址: -```bash -# 检查指标端点 -curl http://localhost:8000/metrics +- Zipkin UI:`http://localhost:9411` + +## AI 可观测性平台对接 + +### Arize Phoenix -# 验证依赖安装 -python -c "from backend.utils.monitoring import MONITORING_AVAILABLE; print(f'监控可用: {MONITORING_AVAILABLE}')" +Arize Phoenix 提供针对 AI 的专业可观测性,原生支持 OpenInference 语义。 + +**配置:** + +```bash +MONITORING_PROVIDER=phoenix +OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +OTEL_EXPORTER_OTLP_AUTHORIZATION="Bearer YOUR_PHOENIX_API_KEY" +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false ``` -## 🆘 故障排除 +**功能特性:** +- LLM 调用链可视化(Prompt/Completion) +- Token 级性能指标 +- Agent 步骤追踪 +- 成本分析 + +### Langfuse + +Langfuse 提供 Prompt 管理和 LLM 可观测性,支持 OTLP 协议。 + +**配置:** -### 监控数据为空? ```bash -# 检查服务状态 -docker-compose -f docker/docker-compose-monitoring.yml ps +MONITORING_PROVIDER=langfuse +OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel + +LANGFUSE_PUBLIC_KEY=pk-xxx +LANGFUSE_SECRET_KEY=sk-xxx -# 检查依赖安装 -python -c "import opentelemetry; print('✅ 监控依赖已安装')" +OTEL_EXPORTER_OTLP_AUTHORIZATION=Basic BASE64_ENCODED_KEY +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 ``` -### 端口冲突? +生成认证 Key: + ```bash -# 检查端口占用 -lsof -i :3005 -i :9090 -i :16686 +echo -n "$LANGFUSE_PUBLIC_KEY:$LANGFUSE_SECRET_KEY" | base64 ``` -### 依赖安装问题? -```bash -# 重新安装性能依赖 -uv sync --extra performance +**功能特性:** +- Prompt 版本管理 +- 会话级 Trace 分组 +- 用户反馈收集 +- 模型成本追踪 + +## 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `ENABLE_TELEMETRY` | `false` | 启用/禁用监控 | +| `MONITORING_PROVIDER` | `otlp` | 平台配置和本地部署形态:`otlp`、`phoenix`、`langfuse`、`langsmith`、`grafana`、`zipkin` | +| `MONITORING_DASHBOARD_URL` | (空) | 前端顶栏监控入口跳转 URL,需配置为浏览器可访问地址 | +| `MONITORING_PROJECT_NAME` | `nexent` | 监控平台项目名 | +| `MONITORING_TRACE_CONTENT_MODE` | `summary` | Trace payload 记录模式:`summary` 写入有界预览和结构元数据,`metrics` 只写结构/大小元数据,`full` 在 `MONITORING_TRACE_MAX_CHARS` 限制内保留完整 payload | +| `MONITORING_TRACE_MAX_CHARS` | `4000` | 每个 payload 预览最多写入的字符数 | +| `MONITORING_TRACE_MAX_ITEMS` | `20` | dict/list 预览最多写入的 key 或 item 数 | +| `OTEL_SERVICE_NAME` | `nexent-backend` | 服务标识 | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://localhost:4318` | OTLP base endpoint,SDK 会派生 `/v1/traces` 和 `/v1/metrics` | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | (空) | 可选 trace 专用 endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | (空) | 可选 metric 专用 endpoint | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | `http` | 协议:`http` 或 `grpc` | +| `OTEL_EXPORTER_OTLP_HEADERS` | (空) | 通用认证头(逗号分隔) | +| `OTEL_EXPORTER_OTLP_AUTHORIZATION` | (空) | `Authorization` header,常用于 Phoenix bearer auth 和 Langfuse | +| `OTEL_EXPORTER_OTLP_X_API_KEY` | (空) | `x-api-key` header,用于兼容需要该 header 的平台 | +| `OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION` | (空) | Langfuse 实时摄取版本,例如 `4` | +| `OTEL_EXPORTER_OTLP_METRICS_ENABLED` | `true` | 是否导出 OTLP metrics | +| `LANGSMITH_API_KEY` | (空) | LangSmith API Key,会映射为 OTLP `x-api-key` header | +| `LANGSMITH_PROJECT` | (空) | 可选 LangSmith project header | +| `LANGSMITH_OTLP_TRACES_ENDPOINT` | `https://api.smith.langchain.com/otel/v1/traces` | Collector 转发到在线 LangSmith 的 trace endpoint | +| `MONITORING_INSTRUMENT_REQUESTS` | `false` | 是否启用 requests 自动 HTTP client span;默认关闭,避免 AI trace 被普通 HTTP 请求刷屏 | +| `MONITORING_FASTAPI_EXCLUDED_URLS` | (空) | FastAPI 自动埋点排除 URL,逗号分隔正则;例如只看 agent 业务 span 时可设为 `/agent/run` | +| `MONITORING_FASTAPI_EXCLUDE_SPANS` | `receive,send` | 排除 ASGI 内部 `receive/send` span;流式接口建议保持默认值 | +| `OTEL_COLLECTOR_VERSION` | `0.150.0` | 本地 OpenTelemetry Collector Contrib 镜像版本 | +| `PHOENIX_VERSION` | `15` | 本地 Phoenix 镜像版本 | +| `LANGFUSE_VERSION` | `3` | 本地 Langfuse Web/Worker 镜像版本 | +| `LANGFUSE_POSTGRES_VERSION` | `15-alpine` | 本地 Langfuse Postgres 镜像版本 | +| `LANGFUSE_CLICKHOUSE_VERSION` | `26.3-alpine` | 本地 Langfuse ClickHouse 镜像版本 | +| `LANGFUSE_MINIO_VERSION` | `RELEASE.2023-12-20T01-00-02Z` | 本地 Langfuse MinIO 镜像版本 | +| `LANGFUSE_REDIS_VERSION` | `alpine` | 本地 Langfuse Redis 镜像版本 | +| `GRAFANA_VERSION` | `12.4` | 本地 Grafana 镜像版本 | +| `GRAFANA_PORT` | `3002` | 本地 Grafana UI 端口 | +| `GRAFANA_ADMIN_USER` | `admin` | 本地 Grafana 管理员用户名 | +| `GRAFANA_ADMIN_PASSWORD` | `nexent-grafana-admin` | 本地 Grafana 管理员密码 | +| `GRAFANA_DEFAULT_LANGUAGE` | `zh-Hans` | 本地 Grafana 默认界面语言 | +| `TEMPO_VERSION` | `2.10.5` | 本地 Tempo 镜像版本,避免浮动 tag 带来的配置兼容性漂移 | +| `TEMPO_PORT` | `3200` | 本地 Tempo HTTP API 端口 | +| `ZIPKIN_VERSION` | `latest` | 本地 Zipkin 镜像版本 | +| `ZIPKIN_PORT` | `9411` | 本地 Zipkin UI/API 端口 | + +## 代码集成 + +### Agent 边界上下文 + +业务层只需要在请求入口解析出用户和 Agent 信息后绑定一次上下文,后续 Agent、LLM、Tool span 由 SDK 生命周期自动生成: -# 检查 pyproject.toml 中的 performance 配置 -cat backend/pyproject.toml | grep -A 20 "performance" +```python +from nexent.monitor.agent_observability import AgentRunMetadata +from utils.monitoring import monitoring_manager + +monitoring_manager.bind_agent_context(AgentRunMetadata( + tenant_id=tenant_id, + user_id=user_id, + agent_id=agent_request.agent_id, + conversation_id=agent_request.conversation_id, + query=agent_request.query, + is_debug=agent_request.is_debug, + language=language, +)) ``` -### 服务名显示为 unknown_service? -```bash -# 检查环境变量配置 -echo "SERVICE_NAME: $SERVICE_NAME" +`monitor_endpoint` 仍保留为兼容 API 和低层 escape hatch,不建议业务层新增常规埋点时继续使用。 + +### Trace Payload 策略 + +工具输入输出、检索输出,以及 OpenInference 的 `input.value` / `output.value` 属性统一使用同一套 payload 策略。默认写入有界预览,并额外写入 `type`、`size_chars`、`item_count`、`truncated`、`keys` 等结构化属性。记忆检索 span 只记录结果摘要和统计信息,不写完整 memory 正文。 -# 重启监控服务以应用新配置 -./docker/start-monitoring.sh +Agent 上下文指标由 SDK 生命周期自动写入。每个 action step 会产生 `agent.step.metrics` event,包含上下文 token 估算、压缩调用数、缓存命中、压缩率和 token 阈值。Agent 结束时还会在顶层 span 写入聚合 step 数、最大上下文 token、平均压缩率、压缩调用总数和缓存命中总数。 + +### LLM 调用监控 + +```python +@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion") +def call_llm(messages): + return llm_response ``` -## 🧹 数据管理 +### Agent 步骤追踪 -### 清理 Jaeger 追踪数据 -```bash -# 方法1: 重启 Jaeger 容器(最简单) -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-jaeger +```python +with monitoring_manager.trace_agent_step("web_search", step_type="tool_call") as span: + result = execute_tool() + monitoring_manager.set_tool_output(result) +``` -# 方法2: 完全重建 Jaeger 容器和数据 -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml rm -f nexent-jaeger -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-jaeger +### 工具调用追踪 -# 方法3: 清理所有监控数据(重建所有容器) -docker-compose -f docker/docker-compose-monitoring.yml down -docker-compose -f docker/docker-compose-monitoring.yml up -d +```python +with monitoring_manager.trace_tool_call("web_search", "agent_name", {"query": "test"}) as span: + results = search_web("test") + monitoring_manager.set_tool_output({"results": results}) ``` -### 清理 Prometheus 指标数据 -```bash -# 重启 Prometheus 容器 -docker-compose -f docker/docker-compose-monitoring.yml restart nexent-prometheus +### Phoenix 自定义层级埋点 + +如果希望 Phoenix 展示 `agent -> chain -> llm/retriever/tool` 的层级结构,使用 SDK Agent 生命周期入口和 OpenInference span kind 封装方法: + +```python +from nexent.monitor.agent_observability import AgentRunMetadata, get_monitoring_manager + +monitoring_manager = get_monitoring_manager() + +metadata = AgentRunMetadata( + tenant_id="tenant_id", + user_id="user_id", + agent_id=1, + conversation_id=1001, + agent_name="TestAgent", + query="你好", +) + +with monitoring_manager.start_agent_run(metadata): + with monitoring_manager.trace_agent_step("Step 0", metadata, step_type="agent_loop"): + with monitoring_manager.trace_llm_request("OpenAIModel.generate", "gpt-4"): + result = call_llm() + + with monitoring_manager.trace_retriever_call( + "knowledge_base_search", + "TestAgent", + {"query": "你好"}, + ): + documents = search_knowledge_base("你好") + monitoring_manager.set_retriever_output(documents) + + with monitoring_manager.trace_tool_call("FinalAnswerTool", "TestAgent", {"query": "你好"}): + monitoring_manager.set_tool_output({"answer": result}) + + monitoring_manager.set_openinference_output({"answer": result}) +``` -# 完全清理 Prometheus 数据 -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-prometheus -docker volume rm docker_prometheus_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-prometheus +Phoenix 左侧的 `agent`、`chain`、`llm`、`retriever`、`tool` 标签来自 `openinference.span.kind`。span 必须通过嵌套 `with` 创建,Phoenix 才会显示成树形结构。 + +同一套方法只写入通用 OpenInference / Nexent 属性,不再写入 Langfuse 专用 span 字段。Langfuse provider 仍通过 OTLP endpoint 接收 trace,但展示和过滤以通用 OTLP/OpenInference 属性为准。 + +## OpenInference 语义属性 + +系统使用 OpenInference 语义约定,专为 AI 可观测性设计: + +### LLM 属性 + +| 属性 | 说明 | +|------|------| +| `llm.model_name` | 模型标识(如 `gpt-4`) | +| `llm.operation.name` | 操作类型(如 `chat_completion`) | +| `llm.token_count.prompt` | 输入 Token 数 | +| `llm.token_count.completion` | 输出 Token 数 | +| `llm.invocation_parameters` | 模型参数(JSON) | +| `llm.time_to_first_token` | TTFT(秒) | + +### Agent 属性 + +| 属性 | 说明 | +|------|------| +| `agent.name` | Agent 标识 | +| `agent.step.name` | 步骤名称(如 `web_search`) | +| `agent.step.type` | 步骤类型:`tool_call`、`reasoning`、`action_selection` | +| `agent.tool.name` | 工具名称 | +| `agent.tool.input` | 按 trace payload 策略处理后的工具输入预览 | +| `agent.tool.input.*` | 工具输入结构化元数据:类型、大小、item 数、截断状态、keys | +| `agent.tool.output` | 按 trace payload 策略处理后的工具输出预览 | +| `agent.tool.output.*` | 工具输出结构化元数据:类型、大小、item 数、截断状态、keys | +| `agent.tool.success` | 工具调用是否成功 | +| `agent.tool.duration_ms` | 工具调用耗时 | +| `retriever.name` | 检索器名称 | +| `retrieval.query` | 检索查询 | +| `retrieval.results.count` | 检索结果数量 | +| `retrieval.top_score` | 可用时记录最高检索分数 | +| `retriever.input.*` | 检索输入结构化元数据 | +| `retriever.output` | 按 trace payload 策略处理后的检索输出预览 | +| `retriever.output.*` | 检索输出结构化元数据 | +| `context.tokens.estimated_input` | 每个 Agent step event 的上下文输入 token 估算 | +| `context.tokens.uncompressed_estimated` | 每个 Agent step event 的未压缩上下文 token 估算 | +| `context.compression.calls` | 每个 Agent step event 的压缩调用数 | +| `context.compression.cache_hits` | 每个 Agent step event 的压缩缓存命中数 | +| `context.compression.ratio` | 每个 Agent step event 的压缩率 | + +## 指标 + +| 指标 | 说明 | +|------|------| +| `llm.request.duration` | 请求延迟 | +| `llm.token.generation_rate` | Token 生成速率 | +| `llm.time_to_first_token` | TTFT | +| `llm.token_count.prompt` | 输入 Token | +| `llm.token_count.completion` | 输出 Token | +| `agent.step.count` | Agent 步骤数 | +| `agent.execution.duration` | Agent 执行时间 | +| `agent.error.count` | Agent 错误数 | + +## Collector 配置 + +OpenTelemetry Collector 默认只通过 debug exporter 打印数据,避免没有外部后端时把数据转发回自身。需要通过 Collector 转发到平台时,增加对应 exporter: + +```yaml +exporters: + otlphttp/langsmith: + traces_endpoint: https://api.smith.langchain.com/otel/v1/traces + headers: + x-api-key: YOUR_LANGSMITH_API_KEY + Langsmith-Project: nexent + +service: + pipelines: + traces: + exporters: [otlphttp/langsmith, debug] ``` -### 清理 Grafana 配置 -```bash -# 重置 Grafana 配置和仪表板 -docker-compose -f docker/docker-compose-monitoring.yml stop nexent-grafana -docker volume rm docker_grafana_data 2>/dev/null || true -docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-grafana +本地 Phoenix 和 Langfuse 分别使用独立 Collector 配置: + +- `docker/monitoring/otel-collector-phoenix-config.yml` +- `docker/monitoring/otel-collector-langfuse-config.yml` +- `docker/monitoring/otel-collector-langsmith-config.yml` + +基础 debug 配置见 `docker/monitoring/otel-collector-config.yml`。 + +## 优雅降级 + +未安装 OpenTelemetry 依赖时,监控自动禁用: + +```python +pip install nexent # 基础包 - 无监控 +pip install nexent[performance] # 包含 OTLP 支持 ``` -## 📈 典型问题分析 +禁用时所有监控方法均正常工作 - 装饰器透传,上下文管理器返回 None。 -### Token 生成速度慢 (< 5 tokens/s) -1. **分析**: Grafana → Token Generation Rate 面板 -2. **解决**: 检查模型服务负载、优化输入 prompt 长度 +## 故障排除 -### 请求响应慢 (> 10s) -1. **分析**: Jaeger → 查看完整链路追踪 -2. **解决**: 定位瓶颈环节(数据库/LLM/网络) +### 数据未显示 -### 错误率突增 (> 10%) -1. **分析**: Prometheus → llm_error_count 指标 -2. **解决**: 检查模型服务可用性、验证 API 密钥 +1. 检查 `.env` 中 `ENABLE_TELEMETRY=true` +2. 验证 OTLP 端点可访问 +3. 检查认证头配置正确 -## 🎉 开始使用 +### 连接错误 -设置完成后你可以: +1. 测试端点:`curl -v $OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces` +2. 确认协议匹配端点(`http` vs `grpc`) +3. 查看 Collector 日志:`docker logs nexent-otel-collector` -1. 📊 在 Grafana 中查看 **LLM Performance Dashboard** -2. 🔍 在 Jaeger 中追踪每个请求的完整链路 -3. 📈 分析 Token 生成速度和性能瓶颈 -4. 🚨 设置性能告警和阈值 +### 属性错误 -享受高效的 LLM 性能监控! 🚀 +1. 在平台 UI 中验证 OpenInference 属性 +2. 检查 Span 属性命名:使用 `llm.model_name` 而非 `model_name` +3. 查看平台特定属性要求 diff --git a/doc/docs/zh/sdk/opentelemetry-design.md b/doc/docs/zh/sdk/opentelemetry-design.md new file mode 100644 index 000000000..2f8f0a678 --- /dev/null +++ b/doc/docs/zh/sdk/opentelemetry-design.md @@ -0,0 +1,699 @@ +# Nexent OpenTelemetry 可观测性设计 + +生成日期:2026-05-06 +基准分支:当前 OpenTelemetry 功能分支 + +## 可观测性基础 + +可观测性关注的是系统在运行过程中是否能够被理解和定位问题。相比只回答“系统是否还活着”的传统监控,可观测性更强调从运行时信号反推出系统内部状态,帮助研发和运维回答以下问题: + +- 当前请求为什么慢? +- Agent 在哪一步失败? +- 大模型调用耗时、首 token 时间和 token 速率是否异常? +- 某个用户、会话或 Agent 的完整执行链路是什么? +- 问题发生时有哪些输入、输出、工具调用和错误上下文? + +业界通常把可观测性拆成三大支柱:Metrics、Logs、Traces。三者解决的问题不同,需要组合使用。 + +| 支柱 | 核心问题 | 典型数据 | 适合场景 | 在 Nexent 中的作用 | +|------|----------|----------|----------|--------------------| +| Metrics | “整体是否异常?” | 计数器、直方图、速率、分位数 | 看趋势、告警、容量评估、SLO/SLA | 统计 LLM 请求耗时、TTFT、token 速率、错误数、Agent step/tool 调用数 | +| Logs | “当时发生了什么?” | 按时间顺序输出的文本或结构化事件 | 查看异常上下文、排查单点错误、审计关键行为 | 保留运行日志,并通过 span event/attribute 记录关键 Agent、LLM、Tool 事件 | +| Traces | “一次请求经历了哪些步骤?” | trace、span、span event、上下游关系 | 分布式调用链、流式 Agent 执行链路、跨服务耗时定位 | 串联 HTTP 接口、Agent run、LLM generate、Tool call 和最终答案 | + +三大支柱之间不是替代关系。Metrics 适合发现问题,例如某段时间 LLM 错误数上升;Traces 适合定位问题,例如找到某次 `agent.run` 卡在某个 tool;Logs 适合补充细节,例如错误堆栈、原始提示词摘要或工具返回内容。对于 LLM Agent 场景,单纯的 HTTP 接口指标不足以解释 Agent 行为,因此必须把 Agent、LLM、Tool 等业务语义写入 trace 层级中。 + +## 智能体可观测性行业洞察 + +截至当前,智能体可观测性正在从传统 APM 的“接口是否健康、服务是否变慢”,扩展到“智能体为什么这样决策、哪一步引入了错误上下文、工具或检索是否误导了模型、成本和质量是否可控”。这类系统的核心难点不是单次 LLM 调用本身,而是一次用户请求会跨越路由、记忆、规划、检索、工具调用、模型生成、最终答案和反馈评价等多个阶段,并且每个阶段都可能影响最终结果。 + +智能体可观测性的接入路径通常有几类: + +| 接入路径 | 典型方式 | 适合场景 | 需要注意 | +|----------|----------|----------|----------| +| 平台 SDK 直连 | Langfuse SDK、LangSmith SDK、Datadog / New Relic SDK、框架 callback | 快速接入某个平台的专有能力,例如 prompt 管理、评分、评估、成本分析 | 平台绑定更强,后续迁移或双写到其他后端成本较高 | +| OpenTelemetry SDK 直连平台 OTLP endpoint | 应用直接用 OTLP HTTP/gRPC exporter 写入 Phoenix、Langfuse、LangSmith、Datadog 等兼容入口 | 希望保留 OTel 埋点模型,同时减少本地组件 | 鉴权、脱敏、采样、多后端分发逻辑会落在应用配置或平台侧 | +| OpenTelemetry Collector 中转 | 应用只写 Collector,由 Collector 转发到 Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 或企业 APM | 需要统一批处理、采样、脱敏、header 注入、多后端转发和私有化部署 | 多一个运行组件,需要维护 Collector 配置和部署可用性 | +| 平台 agent / 网关中转 | Datadog Agent、New Relic agent 或企业内部 telemetry gateway | 企业已有 APM 基础设施、权限、网络出口和审计要求明确 | 数据模型可能会被平台转换,AI 语义字段需要确认兼容性 | + +从知名 Agent/LLM 框架和平台的公开文档看,可观测性方案已经明显分成两层:框架或平台负责表达 Agent/LLM 运行时语义,OpenTelemetry/OTLP 负责把 trace、metric、log 导出到后端。差异主要在于:有些框架原生使用 OTel,有些通过 OpenInference/OpenLIT/OpenLLMetry 等 instrumentation 转成 OTel span,有些则先进入自有 tracing SDK,再通过 processor、callback 或平台集成转发。 + +| Agent / 平台 | 原生可观测性能力 | 常用观测框架 / SDK | OTel / OTLP 路径 | 语义覆盖重点 | 局限与注意 | +|--------------|------------------|--------------------|------------------|--------------|------------| +| LangChain / LangGraph | LangSmith tracing、thread、feedback、evaluation,面向 chain、graph、run 的调试和评估 | LangSmith SDK、LangSmith OTel、OpenTelemetry SDK、Collector | `LANGSMITH_OTEL_ENABLED=true` 后可生成 OTel spans;LangSmith 提供 OTLP traces endpoint;也支持经 Collector fan-out 到多后端 | chain、graph node、LLM、tool、retriever、thread、feedback、eval | LangSmith 语义最完整;若只使用通用 OTel 后端,需要自行补齐 graph/thread/eval 维度 | +| LlamaIndex | 内置 instrumentation/callback 体系,官方观测页覆盖 LlamaTrace、Phoenix、SigNoz、MLflow、Langfuse、OpenLLMetry、OpenLIT、AgentOps 等 | OpenInference LlamaIndex instrumentation、LlamaTrace/Phoenix、Langfuse、OpenLLMetry、OpenLIT、MLflow | Phoenix/LlamaTrace、SigNoz、Langfuse、OpenLIT 等路径都可通过 OTel/OTLP 导出;常见方式是 `openinference-instrumentation-llama-index` + OTLP exporter | RAG query engine、retriever、index、agent workflow、LLM、tool、token、latency | RAG 语义强,但不同集成对属性映射和评估能力不完全一致 | +| OpenAI Agents SDK | SDK 内置 tracing,默认记录 runner、agent、generation、function tool、guardrail、handoff、speech 等 span | OpenAI Traces dashboard、custom trace processor、外部 tracing processors(Phoenix、MLflow、LangSmith、Langfuse、AgentOps、Datadog 等) | 默认不是 OTel span,而是 OpenAI Agents tracing 模型;要进入 OTLP 通常需要外部 tracing processor 或自定义 processor 做 OTel/OTLP 适配 | agent run、LLM generation、function tool、handoff、guardrail、自定义事件、会话分组 | Agent 语义完整,但与标准 OTel 数据模型之间需要转换层;敏感输入输出默认可能被采集,需显式配置 | +| AutoGen | 新版 AutoGen 内置 tracing/observability,运行时支持 OpenTelemetry,并遵循 agent/tool 与 GenAI 语义约定;旧版 0.2 主要是 logging 和 partner providers | OpenTelemetry SDK、OTLP exporter、Jaeger/Zipkin、OpenAI instrumentor、AgentOps 等 | 可直接配置 OTel `TracerProvider` 和 OTLP exporter,把 AgentChat/GroupChat 运行时事件发到 OTel 兼容后端 | 多 Agent 消息、agent runtime、tool、LLM 调用、group chat、消息元数据 | 版本差异明显;需确认使用的是新版 AgentChat/Core 还是旧版 0.2 logging 集成 | +| Dify | 产品内置 Monitoring Dashboard 和 Run History,可查看应用指标、workflow/node tracing;外部监控支持 Langfuse、LangSmith | Dify 内置监控、Langfuse integration、LangSmith integration | 官方文档主要体现为平台到 Langfuse/LangSmith 的集成和字段映射 | app、workflow/chatflow、node、message、dataset retrieval、tool、moderation、token、user/session | 产品语义强,适合低代码应用监控;开放 OTLP 可迁移性弱于原生 OTel instrumentation | +| CrewAI | CrewAI AMP 内置 tracing,可通过 `tracing=True` 或 `CREWAI_TRACING_ENABLED=true` 追踪 crew/flow;官方观测页列出多种外部平台 | CrewAI AMP、OpenLIT、Langfuse、LangSmith OTel、Langtrace、Arize Phoenix、MLflow、Opik、Weave、Portkey 等 | OpenLIT 是 OTel-native,可配置 `OTEL_EXPORTER_OTLP_ENDPOINT`;LangSmith/CrewAI 集成使用 `opentelemetry-instrumentation-crewai`;Langfuse 可通过 OpenInference CrewAI instrumentation 产生 OTel spans | agent、task、crew、flow、tool、LLM、任务序列、成本、延迟 | 集成选择多但语义不完全统一;CrewAI AMP 与第三方 OTel 路径需要明确数据归属和脱敏策略 | +| smolagents | 官方“Inspecting runs with OpenTelemetry”明确采用 OpenTelemetry 标准记录 agent runs | `smolagents[telemetry]`、OpenInference `SmolagentsInstrumentor`、Phoenix、Langfuse、OpenTelemetry SDK | 使用 `SmolagentsInstrumentor` 生成 OTel spans,可通过 `OTLPSpanExporter` 写 Phoenix,也可通过 Langfuse/其他 OTel 兼容平台接收 | CodeAgent、ToolCallingAgent、managed agents、工具调用、LLM 交互、多步执行 | 轻量、OTel 路径清晰;复杂评估、反馈和产品内权限仍依赖后端平台补齐 | + +从对比结果看,行业并不是简单地“统一使用某一个观测平台”,而是在向三种形态收敛: + +- 框架原生 OTel:AutoGen 新版、smolagents、Vercel AI SDK、Semantic Kernel 这类更容易直接进入 OTLP/Collector/企业 APM。 +- OTel instrumentation 桥接:LlamaIndex、CrewAI、LangChain/LangGraph 常通过 OpenInference、OpenLIT、OpenLLMetry、LangSmith OTel 等层把框架语义转成 OTel span。 +- 平台私有 tracing 再导出:OpenAI Agents SDK、Dify、CrewAI AMP 这类先保留自有产品语义,再通过 processor、callback、外部平台集成或字段映射与 OTel/LLMOps 平台互通。 + +对 Nexent 来说,比较稳妥的策略是:核心埋点直接生成 OpenTelemetry span,并在 span 属性上兼容 OpenInference、OpenTelemetry GenAI、Langfuse/LangSmith 等主流语义;对外只承诺 OTLP 可导出,不把业务链路绑定到某一个平台 SDK。这样既能接入 Phoenix/Langfuse/LangSmith 这类 LLMOps 平台,也能接入 Grafana Tempo、Zipkin、Datadog、New Relic、Elastic、Honeycomb 等通用或企业级观测后端。 + +因此,智能体可观测性的关键不是选择一个“唯一平台”,也不是强制所有链路都经过 Collector,而是先把遥测数据建模成可迁移、可组合、可扩展的结构:底层用标准 trace/metric/log 表达运行路径和性能,上层用 Agent/LLM/Tool/Retriever/Session/User/Evaluation 等语义补足业务解释能力。这样既能直连 Phoenix、Langfuse、LangSmith 等 AI 可观测平台,也能通过 Collector 接入 Grafana Tempo、Zipkin 或企业已有 APM,避免在产品早期把监控能力锁死在某个供应商或某套私有 SDK 中。 + +## 为什么使用 OpenTelemetry + +```mermaid +timeline + title 可观测性框架与协议演进时间线 + 2010 : Google 发表 Dapper 论文 + 2012 : Prometheus 在 SoundCloud 起步 + 2015 : Jaeger 在 Uber 内部形成并发展 + 2016 : OpenTracing 进入 CNCF + 2017 : OpenCensus 推广 tracing + stats/metrics + tags + 2019 : OpenTracing 与 OpenCensus 合并为 OpenTelemetry + 2021 : OpenTelemetry 晋升 CNCF Incubating + 2022 : OpenTracing 被归档;OpenTelemetry Metrics 发布 RC 并进入 GA 周期 + 2023 : OpenCensus 于 7 月 31 日后停止维护 + 2024 : Prometheus 持续增强对 OpenTelemetry/OTLP 的互操作 + 2026 : OpenTelemetry 于 5 月 11 日 Graduated;OpenTracing compatibility 于 3 月被 deprecated +``` + +OpenTelemetry 是当前主流的可观测性开放标准,提供统一的 API、SDK、语义约定和 OTLP 传输协议。Nexent 选择 OpenTelemetry 作为监控主干,主要基于以下原因: + +- 标准化:用统一的 span、event、metric 表达 HTTP、Agent、LLM、Tool 等运行时信号,减少平台私有模型对业务代码的侵入。 +- 可移植:同一套埋点可以通过 OTLP 上报到 Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 或其他兼容后端,切换平台主要调整配置和 Collector pipeline。 +- 可扩展:OpenTelemetry Collector 可以在不改业务代码的情况下完成转发、过滤、批处理、认证 header 注入和多后端分发。 +- 生态成熟:FastAPI、requests 等基础组件已有自动埋点能力,Nexent 只需要补充 Agent/LLM/Tool 的业务 span。 +- 避免锁定:监控平台 SDK 可以作为增强层,但核心链路不依赖某一家平台 SDK,避免平台迁移或本地化部署时重写埋点。 +- 适合 Agent 场景:trace 的父子 span 结构天然适合表达 `agent.run -> chain step -> LLM generate/tool call -> final answer` 这类多步骤执行过程。 + +因此,Nexent 的实现原则是:业务代码只产生 OpenTelemetry 标准信号和少量平台兼容属性,平台差异收敛在配置、Collector 和展示层。 + +## OTel 规范概要 + +本文中的 OTel 规范通常指 OpenTelemetry Specification 及其配套规范。它不是某个 SDK,也不是某个监控平台,而是一套兼容性契约:规定可观测性数据应该如何生成、命名、传播、处理和导出。各语言 SDK、Collector、后端平台和自动埋点库按这套契约实现,才能保证跨语言、跨框架、跨后端互通。 + +一句话概括:OTel 规范是 OpenTelemetry 为 traces、metrics、logs 等可观测性数据制定的一套标准,保证不同语言、框架、Collector 和后端之间能够互通。 + +OpenTelemetry 规范按 signal 维度独立演进。Tracing、Metrics、Logs、Baggage 是当前主要 signal;Profiles 正在发展中,Events 通常作为 Logs 的特定事件形态讨论。每个成熟 signal 通常由 API、SDK、OTLP、Collector 和 instrumentation/contrib 生态共同组成,语义约定用于保证不同语言和组件在观测同类操作时输出一致的数据。 + +从实现视角看,OTel 规范可以拆成六个常用层面: + +| 规范领域 | 核心概念 | 作用 | +|----------|----------|------| +| Signals | Traces、Metrics、Logs、Baggage、Profiles | 定义可观测性数据类型。Nexent 当前重点使用 Traces 和 Metrics,Logs 通过应用日志与 span event 补充上下文;Profiles 暂不接入 | +| API | Tracer、Meter、Logger、Context、Propagator | 面向业务代码和 instrumentation 的稳定接口,业务埋点只依赖 API,不直接绑定具体 exporter | +| SDK | TracerProvider、MeterProvider、SpanProcessor、MetricReader、Sampler、Resource | 提供采样、批处理、资源描述、导出等运行时能力 | +| Data Model | Span、Metric、LogRecord、Resource、Instrumentation Scope | 定义 telemetry 数据结构,确保不同语言和平台对数据有一致理解 | +| Context Propagation | Context、SpanContext、Baggage、Propagator | 在服务、线程、异步任务和下游请求之间传递 trace 上下文,保证调用链可以串起来 | +| OTLP | OTLP HTTP、OTLP gRPC、protobuf payload | OpenTelemetry 原生传输协议,负责把 traces、metrics、logs 从应用或 Collector 发到后端 | +| Semantic Conventions | 标准属性名、span name、metric name、单位和枚举值 | 统一 HTTP、数据库、RPC、Messaging 等通用语义;AI 场景中 Nexent 额外兼容 OpenInference 和 Langfuse 属性 | + +### Signals + +OTel 把可观测性数据抽象为多个 signal。每个 signal 有独立 API 和数据模型,但共享 Resource、Context 和传播机制。 + +- Traces:由一组具有父子关系的 span 构成,用于描述一次逻辑操作的完整路径。Nexent 用 trace 表达 `agent.run` 到 LLM、Tool、Final Answer 的执行链路。 +- Metrics:由 counter、histogram、gauge 等 instrument 产生,用于描述聚合后的趋势和分布。Nexent 用 metrics 统计 LLM 延迟、TTFT、token 速率和错误数。 +- Logs:以 LogRecord 或传统日志集成的方式表达离散事件。Nexent 当前不把 Logs signal 作为主链路 exporter,但会通过应用日志和 span event 补充错误上下文。 +- Baggage:跨进程传播的键值上下文,适合传递租户、用户、实验分组等需要参与过滤和关联的业务标签。使用时需要控制基数和敏感信息。 +- Profiles:用于记录代码级资源消耗画像,当前在 OpenTelemetry 体系中仍处于发展阶段。Nexent 暂不采集 profiles,避免引入额外运行时开销。 + +Nexent 的当前落地策略是:Traces 优先,因为 Agent 运行链路需要父子 span 表达;Metrics 保留,用于趋势、告警和 dashboard;Logs 暂以应用日志和 span event 形态承载,后续如需统一日志采集,可以通过 Collector 增加 Logs pipeline。 + +### API 与 SDK + +OTel 区分 API 和 SDK: + +- API 是埋点代码依赖的稳定接口,例如 `trace.get_tracer()`、`start_as_current_span()`、`meter.create_counter()`。 +- SDK 是运行时实现,负责创建 provider、处理 span/metric、采样、批量导出和错误处理。 + +这种分层让库代码可以只依赖 API,而应用在启动时统一配置 SDK。Nexent 的 SDK 埋点遵循这个模型:业务函数只创建 span、event、metric;是否启用、导出到哪里、使用 HTTP 还是 gRPC,全部由 `MonitoringConfig` 和环境变量决定。 + +这种分层也决定了 Nexent 的边界: + +- 业务代码不直接创建 exporter,也不直接引用 Phoenix、Langfuse、Tempo 等平台客户端。 +- 初始化层负责创建 SDK provider、resource、processor、reader 和 exporter。 +- 平台差异通过 provider profile、OTLP endpoint、header 和 Collector pipeline 表达。 + +### Resource 与 Instrumentation Scope + +Resource 描述 telemetry 来源实体,例如服务名、版本、实例、部署环境、项目名。Nexent 当前写入: + +- `service.name`:默认 `nexent-backend` +- `service.version`:当前固定为 `1.0.0` +- `service.instance.id`:当前固定为 `nexent-instance-1` +- `telemetry.provider`:当前 provider profile,例如 `otlp`、`phoenix`、`langfuse`、`grafana`、`zipkin` +- `project.name`:当配置 `MONITORING_PROJECT_NAME` 时写入 + +Instrumentation Scope 描述产生 telemetry 的 instrumentation 库或模块。后续如果需要区分 Nexent SDK、FastAPI 自动埋点、第三方库埋点,可以在 scope 层面辅助过滤。 + +### Context Propagation + +Trace 的核心是上下文传播。一个请求从 HTTP 入口进入后,后续 Agent step、LLM 调用、Tool 调用必须处在同一个 trace 上下文中,监控页面才能显示正确的父子层级。 + +OTel 的 Context 是执行范围内的不可变上下文容器,用于承载当前 span、baggage 等跨切面数据。Propagator 负责把这些上下文编码到请求边界,例如 HTTP header,再由下游服务还原。对 Nexent 来说,同进程内的 async、generator、线程和工具调用上下文保持比跨服务 header 传播更关键。 + +Nexent 的关键处理包括: + +- 业务入口只绑定一次 `AgentRunMetadata`,保存 tenant、user、agent、conversation、query、language、memory 等请求级元数据。 +- SDK 在 `NexentAgent.agent_run_with_observer` 中创建顶层 `agent.run` span,并在 Agent loop、LLM、Tool 等生命周期中自动继承上下文。 +- `monitor_endpoint` 保留为兼容 API 和低层 escape hatch,不再作为业务层新增埋点的推荐方式。 +- Agent、LLM、Tool span 统一写入 OpenInference 和 Nexent 自定义属性,避免业务 trace 绑定到单一平台字段。 + +### Semantic Conventions + +Semantic Conventions 规定常见遥测字段的命名和含义,例如 HTTP 方法、URL、状态码、错误类型、metric 单位等。使用语义约定的价值是让不同服务、语言和平台对同一类数据有一致理解。 + +Nexent 采用三层语义: + +- OTel 通用语义:用于 service、resource、HTTP 自动埋点、metric instrument 等基础字段。 +- OpenInference 语义:用于 AI span 类型,例如 `openinference.span.kind=AGENT|CHAIN|LLM|TOOL|RETRIEVER`,适配 Phoenix 等 AI observability 平台。 + +当平台展示存在差异时,Nexent 优先保持业务 span 的通用 OpenTelemetry / OpenInference 语义,不写入平台专用字段。 + +### OTLP 与 Collector Pipeline + +OTLP 是 OpenTelemetry 原生传输协议,支持 HTTP 和 gRPC。Nexent 后端只需要把数据发到 OTLP endpoint,后端平台差异交给 Collector 处理。 + +Collector pipeline 通常由三部分组成: + +- Receiver:接收应用上报的 OTLP traces/metrics/logs。 +- Processor:执行批处理、内存限制、资源属性补充、过滤、采样等处理。 +- Exporter:把数据转发到 Phoenix、Langfuse、Tempo 或其他 OTLP 兼容后端。 + +OTLP 是 request/response 风格协议,客户端发送 export 请求,服务端返回成功、部分成功或失败响应。Nexent 当前支持: + +- OTLP HTTP:默认协议,便于通过网关、云平台和本地 Collector 接入。 +- OTLP gRPC:适合内部网络或偏高吞吐场景。 +- base endpoint 与 signal endpoint:支持配置 base endpoint,再由 SDK 推导 `/v1/traces` 和 `/v1/metrics`,也支持直接配置 signal-specific endpoint,避免路径重复拼接。 + +这种架构的好处是:应用侧配置保持稳定,平台迁移和本地化部署主要改 Collector 配置。例如 `grafana` 形态下 traces 转发到 Tempo;`phoenix` 形态下 traces 转发到 Phoenix;`otlp` 形态下先通过 debug exporter 验证数据是否产生。 + +## 设计目标 + +Nexent 的监控能力以 OpenTelemetry 为主干,SDK 和后端只负责生成标准 span、event、metric,并通过 OTLP 导出。Phoenix、Langfuse、LangSmith、Grafana Tempo、Zipkin 和标准 OTLP 后端作为可配置 exporter 接入,业务代码不绑定单一平台。 + +核心目标: + +- Agent 流式运行期间保持 trace 上下文,覆盖 API、服务准备、Agent 异步 generator、Agent 线程、LLM 流式输出、Python 解释器执行、真实工具调用和最终答案。 +- 通过 OpenInference 属性描述 Agent/LLM/Tool/Retriever 语义,同一套业务埋点可服务多个 OTLP 后端。 +- 支持 `otlp`、`phoenix`、`langfuse`、`langsmith`、`grafana`、`zipkin` provider profile。 +- 通过环境变量统一控制后端导出配置、本地部署形态和前端监控入口。 +- 支持 base endpoint 和 signal-specific endpoint,避免 `/v1/traces`、`/v1/metrics` 路径重复拼接。 +- FastAPI/requests 自动埋点可配置,默认压制流式接口中的 ASGI `receive/send` 噪声。 + +## 技术栈 + +| 分类 | 实现 | +|------|------| +| 标准框架 | OpenTelemetry API/SDK | +| 导出协议 | OTLP HTTP、OTLP gRPC | +| Trace exporter | `opentelemetry-exporter-otlp` HTTP/gRPC trace exporter | +| Metric exporter | `opentelemetry-exporter-otlp` HTTP/gRPC metric exporter | +| 自动埋点 | FastAPI instrumentation、requests instrumentation;requests 默认关闭 | +| AI 语义 | OpenInference 属性、Langfuse OTel 属性、Nexent 自定义业务属性 | +| Agent 框架 | SmolAgents `CodeAgent` 扩展、Nexent `CoreAgent`、`NexentAgent` | +| 配置 | 环境变量 | +| Collector | `otel/opentelemetry-collector-contrib`,支持 debug、Phoenix、Langfuse、LangSmith、Grafana/Tempo、Zipkin 部署形态 | + +## 总体架构 + +```mermaid +flowchart LR + Backend[Nexent Backend / SDK] --> OTel[OpenTelemetry TracerProvider / MeterProvider] + OTel --> Exporter[OTLP Trace / Metric Exporter] + Exporter --> Collector[OpenTelemetry Collector] + Collector --> Phoenix[Arize Phoenix] + Collector --> Langfuse[Langfuse] + Collector --> Tempo[Grafana Tempo] + Collector --> Zipkin[Zipkin] + Collector --> Other[OTLP Backend] + + Backend --> FastAPI[FastAPI Auto Instrumentation] + Backend --> Manual[Manual AI Spans] + Manual --> OI[OpenInference Attributes] + Manual --> LF[Langfuse Attributes] +``` + +## 配置模型 + +### 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `ENABLE_TELEMETRY` | `false` | 监控总开关 | +| `MONITORING_PROVIDER` | `otlp` | 监控 provider 和部署形态:`otlp`、`phoenix`、`langfuse`、`langsmith`、`grafana`、`zipkin` | +| `MONITORING_DASHBOARD_URL` | 空 | 前端顶栏监控入口跳转 URL,后端只读取并透传该值 | +| `MONITORING_PROJECT_NAME` | `nexent` | 平台项目名 | +| `OTEL_SERVICE_NAME` | `nexent-backend` | OpenTelemetry service name | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://localhost:4318` | OTLP base endpoint | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | 空 | 可选 trace 专用 endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | 空 | 可选 metric 专用 endpoint | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | `http` | `http` 或 `grpc` | +| `OTEL_EXPORTER_OTLP_HEADERS` | 空 | 通用 `key=value,key2=value2` header | +| `OTEL_EXPORTER_OTLP_AUTHORIZATION` | 空 | `Authorization` header,常用于 Phoenix bearer auth 和 Langfuse Basic Auth | +| `OTEL_EXPORTER_OTLP_X_API_KEY` | 空 | `x-api-key` header,用于兼容需要该 header 的平台 | +| `OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION` | 空 | Langfuse 摄取版本,例如 `4` | +| `LANGSMITH_API_KEY` | 空 | LangSmith API Key,后端直连时映射为 `x-api-key`,Collector 转发时注入 exporter header | +| `LANGSMITH_PROJECT` | 空 | 可选 LangSmith project header | +| `LANGSMITH_OTLP_TRACES_ENDPOINT` | `https://api.smith.langchain.com/otel/v1/traces` | Collector 转发到在线 LangSmith 的 trace endpoint | +| `OTEL_EXPORTER_OTLP_METRICS_ENABLED` | `true` | 是否导出 metric | +| `MONITORING_INSTRUMENT_REQUESTS` | `false` | 是否启用 requests 自动 HTTP client span | +| `MONITORING_FASTAPI_EXCLUDED_URLS` | 空 | FastAPI 自动埋点排除 URL,逗号分隔正则 | +| `MONITORING_FASTAPI_EXCLUDE_SPANS` | `receive,send` | 排除 ASGI 内部 `receive/send` span,流式接口建议保持默认 | +| `OTEL_COLLECTOR_VERSION` | `0.150.0` | 本地 OpenTelemetry Collector Contrib 镜像版本 | +| `PHOENIX_VERSION` | `15` | 本地 Phoenix 镜像版本 | +| `LANGFUSE_VERSION` | `3` | 本地 Langfuse Web/Worker 镜像版本 | +| `LANGFUSE_POSTGRES_VERSION` | `15-alpine` | 本地 Langfuse Postgres 镜像版本 | +| `LANGFUSE_CLICKHOUSE_VERSION` | `26.3-alpine` | 本地 Langfuse ClickHouse 镜像版本 | +| `LANGFUSE_MINIO_VERSION` | `RELEASE.2023-12-20T01-00-02Z` | 本地 Langfuse MinIO 镜像版本 | +| `LANGFUSE_REDIS_VERSION` | `alpine` | 本地 Langfuse Redis 镜像版本 | +| `GRAFANA_VERSION` | `12.4` | 本地 Grafana 镜像版本 | +| `GRAFANA_PORT` | `3002` | 本地 Grafana UI 端口 | +| `GRAFANA_DEFAULT_LANGUAGE` | `zh-Hans` | 本地 Grafana 默认界面语言 | +| `TEMPO_VERSION` | `2.10.5` | 本地 Tempo 镜像版本,避免浮动 tag 带来的配置兼容性漂移 | +| `TEMPO_PORT` | `3200` | 本地 Tempo HTTP API 端口 | +| `ZIPKIN_VERSION` | `latest` | 本地 Zipkin 镜像版本 | +| `ZIPKIN_PORT` | `9411` | 本地 Zipkin UI/API 端口 | + +## Endpoint 规则 + +HTTP exporter 支持两种输入: + +- base endpoint:`https://cloud.langfuse.com/api/public/otel` +- signal endpoint:`https://cloud.langfuse.com/api/public/otel/v1/traces` + +SDK 会按 signal 派生最终地址: + +| 输入 | Trace endpoint | Metric endpoint | +|------|----------------|-----------------| +| `https://host/api/public/otel` | `https://host/api/public/otel/v1/traces` | `https://host/api/public/otel/v1/metrics` | +| `https://host/api/public/otel/v1/traces` | 原值 | `https://host/api/public/otel/v1/metrics` | +| `https://host/api/public/otel/v1/metrics` | `https://host/api/public/otel/v1/traces` | 原值 | + +## 平台接入 + +### 纯 OTLP / 自建 Collector + +```bash +MONITORING_PROVIDER=otlp +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +``` + +前端顶栏监控入口不再根据 provider 在代码中映射 UI 端口和路径。后端读取 `MONITORING_DASHBOARD_URL` 并通过 `/monitoring/status` 返回给前端;该值为空时前端不显示监控入口。因此本地 Grafana 形态需要在后端 `.env` 中设置: + +```bash +MONITORING_PROVIDER=grafana +MONITORING_DASHBOARD_URL=http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1 +``` + +### Phoenix + +Phoenix 通过 OpenInference 属性识别 AI span 类型,核心字段是 `openinference.span.kind`。 + +```bash +MONITORING_PROVIDER=phoenix +OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +OTEL_EXPORTER_OTLP_AUTHORIZATION="Bearer YOUR_PHOENIX_API_KEY" +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +MONITORING_PROJECT_NAME=nexent-production +``` + +### Langfuse + +Langfuse 的 OTLP HTTP base endpoint 是 `/api/public/otel`,使用 Basic Auth。实时摄取建议带 `x-langfuse-ingestion-version=4`。 + +```bash +MONITORING_PROVIDER=langfuse +OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel +OTEL_EXPORTER_OTLP_AUTHORIZATION="Basic BASE64_PUBLIC_SECRET" +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +``` + +当前实现不写入 `langfuse.*` 专用 span 属性,Langfuse 通过 OTLP 接收通用 OpenTelemetry / OpenInference span。 + +### LangSmith + +LangSmith 的在线 OTLP trace endpoint 为 `https://api.smith.langchain.com/otel/v1/traces`,使用 `x-api-key` header 认证,可通过 `Langsmith-Project` header 指定项目。推荐仍让 Nexent 后端上报到本地 Collector,由 Collector 注入 LangSmith API Key 并转发 traces: + +```bash +MONITORING_PROVIDER=langsmith +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +``` + +Collector 侧配置 `LANGSMITH_API_KEY`、`LANGSMITH_PROJECT` 和 `LANGSMITH_OTLP_TRACES_ENDPOINT`。LangSmith 当前形态只转发 traces,metrics 进入 Collector debug pipeline。 + +### Zipkin + +Zipkin 通过 Collector 的 Zipkin exporter 接收 traces。推荐 Nexent 后端仍然只上报到本地 Collector,由 Collector 转发到 Zipkin v2 spans endpoint: + +```bash +MONITORING_PROVIDER=zipkin +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http +OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +MONITORING_DASHBOARD_URL=http://localhost:9411 +``` + +Zipkin 当前本地形态只转发 traces;metrics 进入 Collector debug pipeline。 + +## 本地化部署设计 + +本地化部署通过 `docker/start-monitoring.sh` 选择形态。所有形态都保留 OpenTelemetry Collector 作为入口,Nexent 后端统一上报到 `http://otel-collector:4318` 或宿主机的 `http://localhost:4318`,平台差异只体现在 Collector exporter 和本地服务组合上。 + +| 形态 | Collector 配置 | 本地服务 | 数据去向 | 说明 | +|------|----------------|----------|----------|------| +| `otlp` | `otel-collector-config.yml` | Collector | debug exporter | 最小形态,用于验证 span/metric 是否产生,或手动改配置转发到云端平台;`collector` 仅作为启动脚本兼容别名 | +| `phoenix` | `otel-collector-phoenix-config.yml` | Collector + Phoenix | `http://phoenix:6006/v1/traces` | Phoenix 容器同时提供 UI 和 OTLP HTTP/gRPC trace collector,适合本地 trace debug | +| `langfuse` | `otel-collector-langfuse-config.yml` | Collector + Langfuse Web/Worker + Postgres + ClickHouse + MinIO + Redis | `http://langfuse-web:3000/api/public/otel/v1/traces` | Langfuse v3 依赖多组件,适合完整 LLMOps 能力验证 | +| `langsmith` | `otel-collector-langsmith-config.yml` | Collector | `https://api.smith.langchain.com/otel/v1/traces` | 在线 LangSmith trace 分析;API Key 只配置在 Collector 环境 | +| `grafana` | `otel-collector-grafana-config.yml` | Collector + Grafana + Tempo | traces 转发到 `tempo:4317`,metrics 只进入 Collector debug pipeline | Grafana + Tempo trace 查询 | +| `zipkin` | `otel-collector-zipkin-config.yml` | Collector + Zipkin | traces 转发到 `zipkin:9411/api/v2/spans`,metrics 只进入 Collector debug pipeline | Zipkin trace 查询 | + +启动命令: + +```bash +cd docker +./start-monitoring.sh --stack otlp +./start-monitoring.sh --stack phoenix +./start-monitoring.sh --stack langfuse +./start-monitoring.sh --stack langsmith +./start-monitoring.sh --stack grafana +./start-monitoring.sh --stack zipkin +``` + +部署脚本职责: + +- 创建或复用 `nexent-network`。 +- 首次启动时从 `monitoring.env.example` 生成 `monitoring.env`。 +- 根据 `MONITORING_PROVIDER` 或 `--stack` 选择 Docker Compose profile。 +- 根据部署形态设置 `OTEL_COLLECTOR_CONFIG_FILE`。 +- Langfuse 本地形态下,如果 `LANGFUSE_OTLP_AUTH_HEADER` 未显式配置,则使用初始化项目的 public/secret key 生成 Basic Auth header。 +- LangSmith 在线形态要求 `LANGSMITH_API_KEY`,启动时会校验该变量,避免 Collector 静默丢弃鉴权失败的 trace。 + +### Phoenix 本地形态 + +Phoenix 使用 `arizephoenix/phoenix` 镜像,默认暴露: + +| 端口 | 用途 | +|------|------| +| `6006` | Phoenix UI 和 OTLP HTTP `/v1/traces` | +| `4319` | 映射到容器内 gRPC OTLP `4317`,避免与 Collector gRPC 端口冲突 | + +Compose 中设置 `PHOENIX_WORKING_DIR=/mnt/data` 并挂载 `phoenix-data` volume,确保本地重启后 trace 数据不丢失。Collector 使用 `otlphttp/phoenix` exporter 的 base endpoint `http://phoenix:6006`,由 Collector 按 OTLP HTTP 规则追加 `/v1/traces`。 + +### Langfuse 本地形态 + +Langfuse v3 本地形态按自托管架构拆分为应用容器和存储组件: + +| 组件 | 用途 | +|------|------| +| `langfuse-web` | UI、API、OTLP HTTP ingestion | +| `langfuse-worker` | 异步消费和处理 trace 事件 | +| `langfuse-postgres` | 事务型元数据 | +| `langfuse-clickhouse` | trace/observation/score 分析数据 | +| `langfuse-minio` | S3 兼容对象存储,保存事件和大对象 | +| `langfuse-redis` | 队列和缓存 | + +初始化参数通过 `LANGFUSE_INIT_*` 配置,默认创建 `nexent-local` 项目和本地 API Key。Collector 使用 `otlphttp/langfuse` exporter,endpoint 为 `http://langfuse-web:3000/api/public/otel`,并携带: + +```yaml +headers: + Authorization: ${env:LANGFUSE_OTLP_AUTH_HEADER} + x-langfuse-ingestion-version: "4" +``` + +默认密钥仅用于本地验证。生产或共享环境必须替换认证密钥、数据库密码、对象存储密钥和 `LANGFUSE_ENCRYPTION_KEY`,并补充备份、高可用和升级策略。 + +### Grafana 本地形态 + +Grafana 本地形态面向 trace 调试: + +| 组件 | 用途 | +|------|------| +| `grafana` | 展示 Nexent Agent trace dashboard,并预置 Tempo datasource | +| `tempo` | 接收 Collector 转发的 OTLP traces,并提供 Grafana Explore 查询后端 | + +Collector trace pipeline 使用 `otlp/tempo` exporter 转发到 `tempo:4317`。Tempo 启用 `metrics-generator` 的 `local-blocks` processor,用于支持 Grafana trace breakdown 中的 TraceQL metrics 查询。Collector metrics pipeline 保留为 debug exporter,用于兼容后端仍开启 OTLP metrics 的场景,但本地 Grafana 形态不提供独立指标存储和指标 dashboard。 + +### Zipkin 本地形态 + +Zipkin 本地形态面向轻量 trace 查询: + +| 组件 | 用途 | +|------|------| +| `zipkin` | 接收 Collector 转发的 traces,并提供 trace 查询 UI | + +Collector trace pipeline 使用 `zipkin` exporter 转发到 `http://zipkin:9411/api/v2/spans`。Collector metrics pipeline 保留为 debug exporter。 + +默认访问地址: + +- Zipkin UI:`http://localhost:9411` + +## Span 语义映射 + +| Nexent 场景 | OpenInference | +|-------------|---------------| +| Agent 入口 | `openinference.span.kind=AGENT` | +| 服务准备、流式生成、线程执行、普通步骤 | `openinference.span.kind=CHAIN` | +| LLM 调用 | `openinference.span.kind=LLM` | +| 工具调用 | `openinference.span.kind=TOOL` | +| 检索类调用 | `openinference.span.kind=RETRIEVER` | + +上下文属性: + +| 属性 | 说明 | +|------|------| +| `input.value` / `output.value` | OpenInference 输入输出 | +| `metadata` | OpenInference JSON metadata | +| `session.id` / `user.id` | OpenInference 会话和用户 | +| `tag.tags` | OpenInference tags | + +## 埋点信息 + +| 埋点 | 位置 | 类型 | 内容 | 目的 | +|------|------|------|------|------| +| FastAPI 自动 span | `MonitoringManager.setup_fastapi_app` | HTTP server | route、method、status、duration | API 入口耗时和错误定位 | +| FastAPI `receive/send` 排除 | `fastapi_exclude_spans` | 降噪配置 | 默认 `receive,send` | 避免 SSE 流式接口生成大量 `unknown POST /agent/run http ...` | +| requests 自动 span | `MonitoringConfig.instrument_requests` | HTTP client | 外部请求 URL、method、status | 默认关闭;需要分析外部 HTTP 依赖时开启 | +| `AgentRunMetadata` | `run_agent_stream` 边界 | context | tenant、user、agent、conversation、query、language、memory、文件数 | 业务层只绑定一次请求上下文,后续 span 由 SDK 自动继承 | +| `agent.run` | `NexentAgent.agent_run_with_observer` | AGENT | query、session、user、tenant、agent、metadata、tags | 作为一次 Agent 运行的顶层业务 trace | +| `agent.run.loop` | `NexentAgent.agent_run_with_observer` | CHAIN | Agent loop、step、最终输出 | 追踪实际 Agent 执行生命周期 | +| `{display_name or model_id}.generate` | `sdk/nexent/core/models/openai_llm.py` | LLM / generation | 模型、温度、top_p、消息、输入输出、token、TTFT、chunk 数 | LLM 性能、成本、输出和异常分析 | +| `python_interpreter` | `sdk/nexent/core/agents/core_agent.py` | TOOL | 生成代码、step number、执行输出、日志、是否最终答案 | 观测 CodeAgent 解释器执行 | +| 真实工具名 | `sdk/nexent/core/agents/nexent_agent.py` | TOOL | local/MCP/langchain/builtin 工具输入输出 | 观测真实工具可用性、延迟、错误和输入输出 | +| `FinalAnswerTool` | `sdk/nexent/core/agents/core_agent.py` | TOOL | 最终答案输出 | 让 Phoenix/Langfuse 中能明确看到最终答案节点 | +| `monitor_endpoint` | SDK 兼容 API | AGENT / CHAIN | 自定义 operation、参数、错误 | 低层 escape hatch;不推荐业务层新增常规埋点 | +| `start_agent_run` / `trace_agent_step` / `trace_retriever_call` | SDK 公共 API | AGENT / CHAIN / RETRIEVER | Agent metadata、输入输出、session、user | SDK 生命周期埋点和少量自定义层级埋点 | +| `trace_tool_call` | SDK 公共 API | TOOL | 工具名、输入、输出、耗时、错误 | SDK 用户自定义工具埋点 | + +### 事件清单 + +| Span / 位置 | Event | 主要属性 | 目的 | +|-------------|-------|----------|------| +| `agent.run` | `agent.run.started` / `agent.run.completed` / `agent.run.error` | `error.*` | 观测一次 Agent 运行的开始、结束和异常 | +| LLM span | `completion_started` / `first_token_received` / `token_generated` / `completion_finished` / `model_stopped` / `error_occurred` | `model_id`、`temperature`、`top_p`、`message_count`、`total_duration`、`output_length`、`chunk_count`、`error.*` | 分析模型参数、流式输出耗时、停止和异常 | +| Tool span | span 属性 `agent.tool.input` / `agent.tool.output` | JSON 字符串、`agent.tool.duration_ms`、`error.*` | 分析工具输入输出、耗时和异常 | + +## 指标 + +| 指标 | 类型 | 维度 | 用途 | +|------|------|------|------| +| `llm.request.duration` | histogram | model、operation | LLM 请求延迟 | +| `llm.token.generation_rate` | histogram | model | token/s | +| `llm.time_to_first_token` | histogram | model | 首 token 延迟 | +| `llm.token_count.prompt` | counter | model | 输入 token 成本 | +| `llm.token_count.completion` | counter | model | 输出 token 成本 | +| `llm.error.count` | counter | model、operation | LLM 错误率 | +| `agent.step.count` | counter | agent、step type、tool | Agent 步骤和工具调用量 | +| `agent.execution.duration` | histogram | agent、status | Agent 总耗时 | +| `agent.error.count` | counter | agent、error type | Agent 异常统计 | + +## Agent 运行数据流 + +```mermaid +flowchart TD + U[用户] --> FE[前端 Chat] + FE --> API[POST /agent/run] + API --> HTTP[FastAPI HTTP span: 可配置隐藏] + API --> Bind[绑定 AgentRunMetadata] + Bind --> Mem[解析 memory 开关] + Mem --> Strategy{with_memory / no_memory} + Strategy -->|with_memory| G1[generate_stream_with_memory] + Strategy -->|no_memory| G2[generate_stream_no_memory] + G1 --> AR[agent_run async generator] + G2 --> AR + AR --> Thread[agent_run_thread] + Thread --> NX[NexentAgent / CoreAgent] + NX --> A0[agent.run span: AGENT] + A0 --> Step[agent.run.loop: CHAIN] + Step --> LLM[Model.generate: LLM / generation] + Step --> PY[python_interpreter: TOOL] + PY --> Tool[Real local / MCP / langchain / builtin tool: TOOL] + PY --> Final[FinalAnswerTool: TOOL] + LLM --> Attr1[OpenInference + Langfuse attrs] + Tool --> Attr1 + Final --> Attr1 + Attr1 --> OTel[OpenTelemetry Tracer/Meter Provider] + OTel --> Collector[OTLP Collector] + Collector --> Phoenix[Phoenix] + Collector --> Langfuse[Langfuse] + Collector --> Tempo[Grafana Tempo] + Collector --> Zipkin[Zipkin] + Collector --> Other[OTLP Backend] +``` + +预期平台树形结构: + +```text +agent.run agent +└─ agent.run.loop chain + ├─ Model.generate llm / generation + ├─ python_interpreter tool + │ └─ RealTool tool + └─ FinalAnswerTool tool +``` + +FastAPI HTTP span 可以保留在最上层用于接口视角,也可以通过 `MONITORING_FASTAPI_EXCLUDED_URLS=/agent/run` 在 AI trace 视图中隐藏。 + +## 监控页面结构 + +```mermaid +flowchart TB + Page[Agent 监控页] --> Filters[筛选区: 时间 / 租户 / 用户 / Agent / 会话 / 模型 / 状态] + Page --> KPIs[指标区: 成功率 / P95 / TTFT / tokens/s / token 成本 / 工具错误数] + Page --> TraceList[Trace 列表: Agent / 会话 / 用户 / 状态 / 耗时 / Token / 模型 / 最后错误] + Page --> Detail[Trace 详情] + Detail --> Waterfall[Span 瀑布图: agent / chain / llm / tool] + Detail --> Timeline[Agent 时间线: 准备 / 记忆 / LLM / 工具 / 最终答案] + Detail --> LLMPanel[LLM 面板: prompt / output / token / TTFT / generation rate] + Detail --> ToolPanel[工具面板: 工具名 / 输入 / 输出 / 耗时 / 错误] + Detail --> Session[会话和用户上下文] + Detail --> Raw[原始 OTel 属性和 events] + Detail --> Eval[反馈、评分和评估] +``` + +监控平台之间不能只按“是否能收 trace”比较。对智能体场景,更关键的是是否理解 LLM/Agent 语义、是否支持评估和反馈、是否适合本地化部署、是否能与企业已有 APM 合流。下面按 Nexent 可能接入的平台做比较: + +| 平台 | 类型 | 部署形态 | 主要接入方式 | AI / Agent 语义 | Metrics / Logs | 评估 / 反馈 | 适合场景 | Nexent 当前适配 | +|------|------|----------|--------------|-----------------|----------------|-------------|----------|----------------| +| Phoenix | AI 原生可观测性 / 实验分析 | 云服务或自托管 | OTLP、OpenInference、Phoenix SDK | OpenInference 生态匹配好,适合展示 LLM、retriever、agent、tool 等语义 | 重点在 trace 和实验分析,通用 infra 监控不是核心 | 支持 eval、dataset、实验分析 | 本地 trace debug、RAG/LLM 质量分析、OpenInference 语义验证 | 写入 OpenInference 属性;支持本地 Phoenix stack 和 OTLP 转发 | +| Langfuse | LLMOps / Prompt 与 Trace 平台 | 云服务或自托管 | OTLP、Langfuse SDK、API | 对 trace、observation、session、user、prompt、metadata 支持完整 | 提供 LLM 应用维度 dashboard,通用 infra 监控不是重点 | 支持 score、feedback、eval、prompt 管理 | 需要 prompt 管理、用户会话、反馈和成本闭环的 LLM 应用 | 支持本地 Langfuse stack 和 OTLP 转发;业务 span 不写入 `langfuse.*` 专用属性 | +| LangSmith | LangChain / LangGraph 生态观测与评估 | 云服务为主 | LangSmith SDK、OTLP endpoint | 与 LangChain/LangGraph run、thread、feedback、evaluation 生态贴合 | 重点在应用 trace 和评估,不替代通用 APM | 评估、dataset、反馈、回归测试能力强 | 使用 LangChain/LangGraph 或需要在线评估闭环 | 支持 Collector 注入 `x-api-key` 和 `Langsmith-Project` 转发 traces | +| Grafana Tempo + Grafana | 通用 trace 后端 / Dashboard | 自托管或云服务 | OTLP、Jaeger、Zipkin 等,经 Collector 常见 | 不内置 LLM/Agent 专用语义,需要 dashboard 和属性约定补充 | Grafana 生态可接 Prometheus、Loki、Tempo 组合 | 不提供原生 LLM 评估,需要外部系统 | 私有化、本地化、已有 Grafana/Prometheus/Loki 体系 | 支持本地 Tempo + Grafana stack,预置 Tempo datasource 和 trace dashboard | +| Zipkin | 轻量分布式 tracing | 自托管 | Zipkin API,通常由 Collector exporter 转发 | 只理解通用 trace/span,不理解 LLM/Agent 语义 | 不提供 metrics/logs 平台能力 | 不提供评估能力 | 最小本地 trace 查询、验证转发链路、低成本调试 | 支持本地 Zipkin stack,Collector 转发 traces | +| Datadog LLM Observability | 全栈 APM + LLM Observability | 云服务 / Agent | Datadog SDK、Agent、OTel/OTLP 等 | 支持 LLM 应用 traces、prompt/completion、成本、质量和安全维度 | 全栈 metrics/logs/traces/APM/infra 能力强 | 支持 LLM evaluations、质量和安全监控 | 企业已有 Datadog,需把 AI 应用纳入统一生产监控 | 可通过标准 OTLP/Collector 或平台 SDK 接入,当前未内置本地 stack | +| New Relic AI Monitoring | 全栈 APM + AI Monitoring | 云服务 / Agent | New Relic agent、OTel/OTLP 等 | 关注 LLM app 性能、错误、成本和模型交互 | 全栈 APM、infra、logs、browser/mobile 生态完整 | 提供 AI 应用监控与分析能力,评估深度依赖平台能力 | 企业已有 New Relic,关注生产运行和统一告警 | 可通过标准 OTLP/Collector 或平台 agent 接入,当前未内置本地 stack | +| Elastic Observability | 全栈可观测性 / 搜索分析 | 云服务或自托管 | Elastic APM agent、OTel/OTLP、EDOT | 支持 LLM observability 和 OTel 语义,适合把 AI trace 与日志、指标、搜索分析合并 | logs、metrics、traces、搜索分析能力强 | 侧重监控、分析和 dashboard,业务评估闭环仍需额外设计 | 已有 Elastic Stack、重视日志检索、私有化和统一搜索分析 | 可通过 OTLP/Collector 对接,当前未内置本地 stack | +| Honeycomb | 事件驱动可观测性 / 高基数分析 | 云服务 | OTLP、OpenTelemetry SDK、Events API / libhoney | 擅长高基数 trace/event 分析,AI 语义通过属性和 OTel GenAI 约定表达 | 强在 trace/event 和指标分析,日志通常通过事件化方式分析 | 不提供完整 LLMOps 评估闭环 | 需要按租户、用户、agent、tool 做高维切片分析 | 可通过 OTLP/Collector 对接,当前未内置本地 stack | +| Nexent 自建页 | 产品内业务观测 | 自建 | 复用 OTel 属性和业务数据库 | 最能理解租户、会话、Agent 配置、权限、版本和业务动作 | 需要自建指标、查询、存储和告警 | 可与产品反馈、评分和评估闭环深度结合 | 产品内闭环、权限隔离、面向终端用户或运维角色的监控页 | 当前先通过 OTLP 对接外部平台,后续可基于同一批属性构建自有页面 | + +从选型上可以把平台分成三类: + +- AI 原生平台优先解决“Agent 为什么这样回答、prompt/tool/retrieval 是否有效、质量如何评估”的问题,适合研发调试和 LLMOps 闭环。 +- 通用 trace 后端优先解决“链路是否完整、哪一步慢、部署是否轻量和可私有化”的问题,适合本地调试和私有化基础能力。 +- 全栈 APM 优先解决“生产系统整体是否健康、AI 服务如何纳入企业统一监控、告警和审计”的问题,适合已有企业监控体系的团队。 + +按使用场景选择时,可以简化成下面的矩阵: + +| 场景 | 优先平台 | 原因 | 代价 | +|------|----------|------|------| +| 本地开发和快速看 trace | Phoenix、Zipkin、Grafana Tempo | 自托管简单,能快速验证 span 层级、Collector 转发和属性是否正确 | 对质量评估、prompt 管理和业务闭环支持有限 | +| RAG / Agent 质量分析 | Phoenix、Langfuse、LangSmith | 更理解 prompt、completion、retriever、tool、session、feedback 和 eval | 平台语义差异较大,需要保留可迁移的 OTel 属性 | +| 企业生产统一监控 | Datadog、New Relic、Elastic、Honeycomb | 能和服务、基础设施、日志、指标、告警、权限体系合流 | AI 业务语义需要通过 OTel GenAI/OpenInference/自定义属性补齐 | +| 产品内用户态监控页 | Nexent 自建页 + 外部 trace 后端 | 能结合租户、权限、Agent 配置、会话、反馈和产品操作 | 需要自建查询、聚合、权限隔离和可视化能力 | + +因此 Nexent 的策略不是只绑定一个平台,而是以 OpenTelemetry/OTLP 和兼容语义属性作为主干:本地默认支持 Phoenix、Langfuse、Grafana Tempo、Zipkin 等便于验证的形态;线上或企业环境可以把同一批 traces 转发到 LangSmith、Datadog、New Relic、Elastic、Honeycomb 或其他 OTLP 兼容后端。 + +推荐路径: + +1. 短期使用 OTLP 对接 Phoenix/Langfuse/LangSmith,满足调试和分析。 +2. 中期在 Nexent 增加 trace 跳转、轻量指标概览和异常聚合。 +3. 长期按租户、会话、Agent 版本建立自有监控页,同时保留 OTLP 双写能力。 + +## 已修复的设计风险 + +| 风险 | 修复 | +|------|------| +| 业务层埋点耦合过高 | 业务入口只绑定 `AgentRunMetadata`,Agent/LLM/Tool 语义 span 下沉到 SDK 生命周期 | +| `/v1/traces` 路径重复拼接 | SDK 支持 base endpoint 和 signal endpoint 自动归一化 | +| Collector header 无法兼容平台 | Collector 默认只 debug;平台转发配置拆分 `Authorization`、`x-api-key`、`x-langfuse-ingestion-version` | +| Phoenix 只看到接口看不到 Agent | SDK 顶层 `agent.run` 标记为 AGENT,内部 `agent.run.loop` 标记为 CHAIN | +| Phoenix/Langfuse 中出现大量 `unknown POST /agent/run http ...` | 默认排除 FastAPI ASGI `receive/send` span;requests 自动埋点默认关闭;可配置隐藏 `/agent/run` HTTP span | +| Langfuse 字段耦合过重 | 不写入 `langfuse.*` 专用 span 属性,仅保留 OTLP 转发和 OpenInference 语义 | +| LLM span 不明显或缺输出 | LLM span 命名为 `{display_name or model_id}.generate`,并写入 `output.value` | +| 工具 span 缺失 | 在 `NexentAgent.create_single_agent` 统一包装 local/MCP/langchain/builtin 工具,并在 `CoreAgent` 增加 `python_interpreter` 和 `FinalAnswerTool` span | +| 单测漏掉 SDK 生命周期路径 | 增加 AgentRunMetadata、Agent/chain、LLM/Tool 继承上下文测试 | + +## 使用建议 + +只看 Agent 业务链路时: + +```bash +MONITORING_FASTAPI_EXCLUDE_SPANS=receive,send +MONITORING_FASTAPI_EXCLUDED_URLS=/agent/run +MONITORING_INSTRUMENT_REQUESTS=false +``` + +同时看接口入口和 Agent 业务链路时: + +```bash +MONITORING_FASTAPI_EXCLUDE_SPANS=receive,send +MONITORING_FASTAPI_EXCLUDED_URLS= +MONITORING_INSTRUMENT_REQUESTS=false +``` + +需要排查外部 HTTP 依赖时: + +```bash +MONITORING_INSTRUMENT_REQUESTS=true +``` + +## 参考 + +- OpenTelemetry Collector: https://opentelemetry.io/docs/collector/ +- OpenTelemetry OTLP Specification: https://opentelemetry.io/docs/specs/otlp/ +- OpenTelemetry GenAI Semantic Conventions: https://opentelemetry.io/docs/specs/semconv/gen-ai/ +- OpenInference Semantic Conventions: https://arize-ai.github.io/openinference/spec/semantic_conventions.html +- LangSmith Trace with OpenTelemetry: https://docs.langchain.com/langsmith/trace-with-opentelemetry +- LangGraph Observability: https://docs.langchain.com/langgraph-platform/langsmith-observability +- LlamaIndex Observability: https://docs.llamaindex.ai/en/stable/module_guides/observability/ +- LlamaIndex OpenTelemetry Integration: https://docs.llamaindex.ai/en/stable/api_reference/observability/otel/ +- OpenAI Agents SDK Tracing: https://openai.github.io/openai-agents-python/tracing/ +- Semantic Kernel Telemetry: https://learn.microsoft.com/en-us/semantic-kernel/concepts/enterprise-readiness/observability/telemetry-with-console +- CrewAI Tracing: https://docs.crewai.com/en/observability/tracing +- CrewAI OpenTelemetry Export: https://docs.crewai.com/en/enterprise/guides/capture_telemetry_logs +- CrewAI OpenLIT Integration: https://docs.crewai.com/en/observability/openlit +- AgentOps CrewAI Integration: https://docs.agentops.ai/v1/integrations/crewai +- AutoGen Agent Observability: https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/agent-observability.html +- AutoGen Tracing and Observability: https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/tracing.html +- Dify Monitoring Dashboard: https://docs.dify.ai/en/use-dify/monitor/analysis +- Dify Langfuse Integration: https://docs.dify.ai/en/use-dify/monitor/integrations/integrate-langfuse +- Dify LangSmith Integration: https://docs.dify.ai/en/use-dify/monitor/integrations/integrate-langsmith +- Dify Agent Node: https://docs.dify.ai/en/guides/workflow/node/agent +- smolagents Inspecting runs with OpenTelemetry: https://huggingface.co/docs/smolagents/en/tutorials/inspect_runs +- smolagents Phoenix tracing guide: https://huggingface.co/blog/smolagents-phoenix +- Vercel AI SDK Telemetry: https://ai-sdk.dev/docs/ai-sdk-core/telemetry +- Haystack Tracing: https://docs.haystack.deepset.ai/docs/tracing +- Phoenix Setup Tracing: https://arize.com/docs/phoenix/tracing/how-to-tracing/setup-tracing +- Phoenix Setup OTEL: https://arize.com/docs/phoenix/tracing/how-to-tracing/setup-tracing/setup-using-phoenix-otel +- Phoenix Authentication: https://arize.com/docs/phoenix/deployment/authentication +- Phoenix Self-Hosting: https://arize.com/docs/phoenix/self-hosting +- Phoenix Docker Deployment: https://arize.com/docs/phoenix/self-hosting/deployment-options/docker +- Langfuse OpenTelemetry: https://langfuse.com/integrations/native/opentelemetry +- Langfuse Self-Hosting: https://langfuse.com/self-hosting +- Langfuse Docker Compose: https://langfuse.com/self-hosting/local +- Langfuse Overview: https://langfuse.com/docs +- LangSmith OpenTelemetry: https://docs.langchain.com/langsmith/otel-gateway-trace-redaction +- Datadog LLM Observability: https://docs.datadoghq.com/llm_observability/ +- New Relic AI Monitoring: https://docs.newrelic.com/docs/ai-monitoring/intro-to-ai-monitoring/ +- Elastic OpenTelemetry: https://www.elastic.co/docs/solutions/observability/apm/opentelemetry/ +- Elastic EDOT data streams: https://www.elastic.co/docs/reference/opentelemetry/data-streams +- Honeycomb Send Data: https://docs.honeycomb.io/send-data/ +- Honeycomb for LLMs: https://docs.honeycomb.io/send-data/llm/ +- Grafana Tempo: https://grafana.com/docs/tempo/latest/ +- Zipkin OpenTelemetry Collector exporter: https://opentelemetry.io/docs/collector/configuration/#exporters +- Zipkin Docker image: https://hub.docker.com/r/openzipkin/zipkin diff --git a/doc/docs/zh/user-guide/agent-development.md b/doc/docs/zh/user-guide/agent-development.md index a8cca4a33..3edf31de7 100644 --- a/doc/docs/zh/user-guide/agent-development.md +++ b/doc/docs/zh/user-guide/agent-development.md @@ -55,7 +55,7 @@ Nexent 支持通过 A2A 协议与第三方 Agent 进行通信。您可以通过 如果您知道目标 Agent 的 Agent Card 地址,可以使用 URL 发现方式:
- +
1. 在外部 A2A Agent 列表中,点击"添加外部 Agent"按钮 @@ -72,7 +72,7 @@ Nexent 支持通过 A2A 协议与第三方 Agent 进行通信。您可以通过 如果您的 Agent 注册在 Nacos 服务发现平台,可以使用 Nacos 发现方式:
- +
1. 在外部 A2A Agent 列表中,点击"添加外部 Agent"按钮 @@ -96,7 +96,7 @@ Nexent 支持通过 A2A 协议与第三方 Agent 进行通信。您可以通过
- +
1. **查看 Agent 详情**:点击 Agent 卡片,可以查看其完整信息,包括名称、描述、URL、能力列表等 diff --git a/doc/docs/zh/user-guide/local-tools/index.md b/doc/docs/zh/user-guide/local-tools/index.md index ceaac3f54..71ba3e950 100644 --- a/doc/docs/zh/user-guide/local-tools/index.md +++ b/doc/docs/zh/user-guide/local-tools/index.md @@ -9,6 +9,7 @@ - [搜索工具](./search-tools):本地/DataMate/Dify 知识库检索与 Exa/Tavily/Linkup 公网搜索。 - [多模态工具](./multimodal-tools):文本文件与图片的下载、解析、模型分析。 - [终端工具](./terminal-tool):持久化 SSH 会话,远程执行命令。 +- [SQL 工具](./sql-tools):连接 MySQL、PostgreSQL、SQL Server 执行 SQL 查询。 - [技能(Skills)](../skills):Nexent内置工具组合或自定义能力包,支持 NL 生成与版本管理。 ## ⚙️ 配置入口 diff --git a/doc/docs/zh/user-guide/local-tools/sql-tools.md b/doc/docs/zh/user-guide/local-tools/sql-tools.md new file mode 100644 index 000000000..b5b50af59 --- /dev/null +++ b/doc/docs/zh/user-guide/local-tools/sql-tools.md @@ -0,0 +1,75 @@ +--- +title: SQL 数据库工具 +--- + +# SQL 数据库工具 + +SQL 数据库工具组支持连接和查询 MySQL、PostgreSQL、SQL Server 等关系型数据库,让 AI 智能体能够直接读取和操作数据库数据。 + +## 工具清单 + +- `mysql_database`:连接 MySQL 数据库执行 SQL 查询 +- `postgres_database`:连接 PostgreSQL 数据库执行 SQL 查询 +- `mssql_database`:连接 SQL Server 数据库执行 SQL 查询 + +## 使用场景示例 + +- 从业务数据库中查询报表数据,供智能体分析汇总 +- 跨数据库关联查询,获取分散在多个表中的关联信息 +- 实时查询业务状态,为智能体提供最新数据参考 + +## 参数要求与行为 + +### 通用参数 +- `sql`:要执行的 SQL 查询语句,必填 +- `parameters`:参数化查询的参数值列表,可选 +- `max_rows`:最大返回行数,默认 100 +- `timeout`:查询超时时间(秒),默认 10 + +### 数据库连接参数 + +| 数据库 | 连接参数 | +|--------|----------| +| MySQL | `host`、`user`、`password`、`database`、`port`(默认 3306) | +| PostgreSQL | `host`、`user`、`password`、`database`、`port`(默认 5432) | +| SQL Server | `host`、`user`、`password`、`database`、`port`(默认 1433) | + +### 安全限制 +- 禁止执行 `DROP DATABASE`、`GRANT`、`REVOKE`、`CREATE USER`、`INTO OUTFILE`、`LOAD DATA INFILE` 等危险操作 +- `UPDATE` 和 `DELETE` 语句必须包含 `WHERE` 子句 +- 自动添加 `LIMIT` 限制返回行数 + +### 返回格式 +```json +{ + "status": "success", + "columns": ["id", "name", "email"], + "rows": [[1, "张三", "zhang@example.com"]], + "row_count": 1, + "execution_time_ms": 45.23 +} +``` + +## 操作指引 + +1. **准备数据库连接信息**:获取主机地址、端口、数据库名、用户名和密码 +2. **配置工具**:在智能体工具配置中添加对应数据库工具,填写连接参数 +3. **测试连接**:使用简单查询验证连接是否正常 +4. **构造查询**:让智能体理解自然语言需求,生成对应 SQL 执行 + +## 安全与最佳实践 + +- 生产环境建议使用只读账号,限制操作权限 +- 敏感信息如数据库密码可通过密钥管理服务存储 +- 合理设置 `max_rows` 避免一次性返回过多数据 +- 建议开启数据库连接的 SSL/TLS 加密选项 + +## 常见数据库连接示例 + +| 数据库 | 连接地址示例 | 参数占位符 | +|--------|-------------|------------| +| MySQL | `localhost:3306` | `?` | +| PostgreSQL | `localhost:5432` | `$1, $2, ...` | +| SQL Server | `localhost:1433` | `?` | + +> 不同数据库的参数占位符格式不同,PostgreSQL 使用 `$1, $2` 格式,其他使用 `?`。 diff --git a/doc/docs/zh/user-guide/mcp-tools.md b/doc/docs/zh/user-guide/mcp-tools.md index 912306284..94bf7c656 100755 --- a/doc/docs/zh/user-guide/mcp-tools.md +++ b/doc/docs/zh/user-guide/mcp-tools.md @@ -1,27 +1,158 @@ # MCP 工具 -即将推出的 MCP 工具管理模块将让您在一个页面集中管理 MCP 服务器与工具,轻松完成连接配置、工具同步和健康状态监控 +在 MCP 工具模块中,您可以集中管理所有 MCP(Model Context Protocol)服务器与工具,支持自定义添加、注册表导入和社区导入等多种接入方式,完成连接配置、工具同步、健康监控以及社区共享。 -## 🎯 功能预览 +MCP 工具页面包含两个并列页签: -1. 注册并管理多个 MCP 服务器 -2. 快速同步、查看并整理 MCP 工具列表 -3. 实时监控 MCP 连接状态和使用情况 +- **导入的服务**:管理当前租户已接入的 MCP 服务,在此配置、监控和维护您的 MCP 服务。 +- **发布的服务**:管理当前租户发布到社区的 MCP 服务,支持浏览、编辑和取消发布。 -## ⏳ 敬请期待 +--- -MCP 工具管理功能正在开发中,我们致力于打造一个高效、直观的管理平台,让您能够: +## ➕ 添加 MCP 服务 -1. 集中管理所有 MCP 服务器 -2. 便捷同步和组织工具 -3. 实时掌握服务器连接与工具运行状态 +点击页面上的"添加 MCP 服务"按钮,打开添加弹窗。弹窗提供三个页签,对应不同的接入来源。 -## 🚀 相关功能 +### 自定义添加 -在等待 **MCP 工具** 上线期间,您可以: +"自定义添加"页签支持手动配置 MCP 服务,分为两种传输类型。 -1. 在 **[智能体开发](./agent-development)** 中管理您的 MCP 工具 -2. 通过 **[智能体空间](./agent-market)** 查看智能体与 MCP 的协作关系 -3. 在 **[开始问答](./start-chat)** 中体验平台功能 +#### 通过 URL 添加 -如果您在使用过程中遇到任何问题,请参考我们的 **[常见问题](../quick-start/faq)** 或在[GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions)中进行提问获取支持。 \ No newline at end of file +适用于已有独立部署的 MCP 服务(支持 HTTP / SSE 协议),通过输入端点 URL 直接接入。 + +1. 在"本地添加"页签中,**传输类型**选择"URL" +2. 填写服务信息: + - **服务名称(必填)**:为 MCP 服务设置一个易于识别的名称 + - **服务 URL(必填)**:输入 MCP 服务的端点地址 + - **描述**:可选,填写服务的用途说明 + - **Authorization Token**:可选,若服务需要认证,在此填入 Bearer Token +3. 点击"确定"完成添加,系统会自动连接服务并获取可用工具列表 + +#### 通过容器配置添加 + +适用于需要本地容器化运行的 MCP 服务(如通过 npx 启动的服务),系统会根据您提供的 JSON 配置自动创建并管理容器。 + +1. 在"本地添加"页签中,**传输类型**选择"容器" +2. 填写容器配置信息: + - **服务名称(必填)**:为 MCP 服务设置一个易于识别的名称 + - **描述**:可选,填写服务的用途说明 + - **容器配置 JSON(必填)**:按标准 MCP 配置格式填写,例如: + ```json + { + "mcpServers": { + "service-name": { + "args": ["mcp-package-name@version"], + "command": "npx", + "env": { + "API_KEY": "xxxx" + } + } + } + } + ``` + - **端口号**:填写容器服务暴露的端口,系统会自动检测端口冲突并提示可用端口 +3. 点击"确定",系统将解析 JSON 配置、创建容器并完成服务注册 + +### 从 MCP Registry 导入 + +Nexent 集成了 MCP Registry,您可以浏览并一键导入社区维护的 MCP 服务。 + +1. 切换到"外部市场"页签 +2. 浏览可用的 MCP 服务列表,支持按名称或标签搜索 +3. 点击目标服务,查看服务详情(描述、版本、所需参数等) +4. 配置必填参数(如 API Key 等环境变量) +5. 点击"导入",系统会自动安装并配置该 MCP 服务 + +### 从社区导入 + +浏览其他用户在 Nexent 平台内发布的 MCP 服务,快速导入使用。 + +1. 切换到"社区市场"页签 +2. 浏览社区已发布的 MCP 服务,支持按名称、标签或传输协议筛选 +3. 点击目标服务查看详情,点击"导入"即可添加到您的服务列表中 + +--- + +## 📋 导入的服务 + +"导入的服务"页签以卡片形式展示当前租户所有已接入的 MCP 服务,您可以在此查看、编辑、监控和发布。 + +### 查看与筛选 + +每张服务卡片展示以下信息: + +- 服务名称与描述 +- 来源标识(本地 / 注册表 / 社区) +- 启用 / 禁用开关 +- 标签 + +您可以使用顶部的筛选栏,按**来源**、**传输类型**和**标签**进行过滤,也可以通过搜索框按名称快速定位服务。 + +### 编辑服务详情 + +点击任意服务卡片,打开详情弹窗,可以进行以下操作: + +- **编辑基本信息**:修改服务名称、描述、URL、Authorization Token 和标签 +- **启用 / 禁用服务**:通过开关控制服务的启用状态,禁用后该服务的工具将不会出现在智能体工具选择中 +- **删除服务**:移除 MCP 服务记录,容器化服务会同步清理容器资源 + +### 查看工具列表 + +在服务详情弹窗中,点击"工具列表"按钮,可以查看该 MCP 服务提供的所有工具。 + +### 健康检查 + +点击详情弹窗中的"健康检查"按钮,系统会对 MCP 服务发起连接测试并返回当前状态: + +- **正常**:服务可正常连接 +- **异常**:服务无法连接或响应异常 +- **未检测**:尚未进行健康检查 + +### 容器管理 + +对于容器化部署的 MCP 服务,详情弹窗中还提供以下操作: + +- **查看容器日志**:实时查看运行中容器的输出日志,方便排查问题 +- **查看容器配置**:查看创建容器时使用的配置 JSON + +### 发布到社区 + +在服务详情弹窗中,点击"发布到社区"按钮: + +1. 确认或修改发布信息(名称、描述、标签等) +2. 点击"确认发布",该服务将发布到社区 +3. 发布后其他用户可在添加服务的"社区市场"页签中浏览和导入 + +--- + +## 🌐 发布的服务 + +"发布的服务"页签展示您自己发布到社区的所有 MCP 服务,您可以在此集中管理已发布的内容。 + +每张卡片展示服务名称、描述、版本和标签,支持按名称、标签和传输协议进行筛选。 + +点击服务卡片可查看详细信息,您可以: + +- **编辑发布的服务**:修改已发布服务的名称、描述和标签 +- **删除发布的服务**:将服务从社区撤回,不再对其他用户可见 + +--- + +## 🔗 与智能体协作 + +添加 MCP 服务后,其提供的工具会自动同步到智能体的工具选择列表中。在 **[智能体开发](./agent-development)** 页面配置智能体时: + +1. 在"选择智能体的工具"页签下,找到对应 MCP 服务分组 +2. 点击工具名称即可启用该工具 +3. 可点击 ⚙️ 查看工具描述并进行参数配置 + +## 🚀 下一步 + +完成 MCP 服务配置后,建议您: + +1. **[智能体开发](./agent-development)** - 将 MCP 工具配置给智能体使用 +2. **[智能体空间](./agent-space)** - 查看智能体与 MCP 的协作关系 +3. **[开始问答](./start-chat)** - 在对话中体验智能体调用 MCP 工具的效果 + +如果您在使用过程中遇到任何问题,请参考我们的 **[常见问题](../quick-start/faq)** 或在 [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions) 中进行提问获取支持。 \ No newline at end of file diff --git a/doc/docs/zh/user-guide/model-management.md b/doc/docs/zh/user-guide/model-management.md index c8f07c0c3..6870f5544 100644 --- a/doc/docs/zh/user-guide/model-management.md +++ b/doc/docs/zh/user-guide/model-management.md @@ -238,7 +238,7 @@ Nexent 支持任何 **遵循OpenAI API规范** 的大语言模型供应商,包 - **网站**: [volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech) - **免费额度**: 个人使用可用 - **特色**: 高质量中英文语音合成 - +- 推荐使用**豆包语音合成模型2.0和大模型流式语音识别模型** - **开始使用**: 1. 注册火山引擎账户 @@ -248,7 +248,7 @@ Nexent 支持任何 **遵循OpenAI API规范** 的大语言模型供应商,包 **阿里灵积** - **网站**: [aliyun.com/benefit/scene/voice](https://www.aliyun.com/benefit/scene/voice) - +- 推荐使用**千问3-TTS-Instruct-Flash-Realtime/千问3-TTS-Flash-Realtime和千问3-ASR-Flash-Realtime** - **开始使用**: 1. 注册阿里云账户 diff --git a/docker/.env.bak b/docker/.env.bak deleted file mode 100644 index 24b53751b..000000000 --- a/docker/.env.bak +++ /dev/null @@ -1,168 +0,0 @@ -# ===== Necessary Configs (Necessary till now, will be migrated to frontend page) ===== - -# Voice Service Config -APPID=app_id -TOKEN=token - -# ===== Non-essential Configs (Modify if you know what you are doing) ===== - -CLUSTER=volcano_tts -VOICE_TYPE=zh_male_jieshuonansheng_mars_bigtts -SPEED_RATIO=1.3 - -# ===== Proxy Configuration (Optional) ===== - -# HTTP_PROXY=http://proxy-server:port -# HTTPS_PROXY=http://proxy-server:port -# NO_PROXY=localhost,127.0.0.1 - -# ===== Backend Configuration (No need to modify at all) ===== - -# Model Path Config -CLIP_MODEL_PATH=/opt/models/clip-vit-base-patch32 -NLTK_DATA=/opt/models/nltk_data - -# Elasticsearch Service -ELASTICSEARCH_HOST=http://nexent-elasticsearch:9200 -ELASTIC_PASSWORD=nexent@2025 - -# Elasticsearch Memory Configuration -ES_JAVA_OPTS="-Xms2g -Xmx2g" - -# Elasticsearch Disk Watermark Configuration -ES_DISK_WATERMARK_LOW=85% -ES_DISK_WATERMARK_HIGH=90% -ES_DISK_WATERMARK_FLOOD_STAGE=95% - -# Main Services -# Config service (port 5010) - Main API service for config operations -CONFIG_SERVICE_URL=http://nexent-config:5010 -ELASTICSEARCH_SERVICE=http://nexent-config:5010/api - -# Runtime service (port 5014) - Runtime execution service for agent operations -RUNTIME_SERVICE_URL=http://nexent-runtime:5014 - -# MCP service (port 5011) - MCP protocol service -NEXENT_MCP_SERVER=http://nexent-mcp:5011 -MCP_MANAGEMENT_API=http://nexent-mcp:5015 - -# Data process service (port 5012) - Data processing service -DATA_PROCESS_SERVICE=http://nexent-data-process:5012/api - -# Northbound service (port 5013) - Northbound API service -NORTHBOUND_API_SERVER=http://nexent-northbound:5013/api - -# Postgres Config -POSTGRES_HOST=nexent-postgresql -POSTGRES_USER=root -NEXENT_POSTGRES_PASSWORD=nexent@4321 -POSTGRES_DB=nexent -POSTGRES_PORT=5432 - -# Minio Config -MINIO_ENDPOINT=http://nexent-minio:9000 -MINIO_ROOT_USER=nexent -MINIO_ROOT_PASSWORD=nexent@4321 -MINIO_REGION=cn-north-1 -MINIO_DEFAULT_BUCKET=nexent - -# Redis Config -REDIS_URL=redis://redis:6379/0 -REDIS_BACKEND_URL=redis://redis:6379/1 - -# Model Engine Config -MODEL_ENGINE_ENABLED=false - -# Supabase Config -DASHBOARD_USERNAME=supabase -DASHBOARD_PASSWORD=Huawei123 - -# Supabase db Config -SUPABASE_POSTGRES_PASSWORD=Huawei123 -SUPABASE_POSTGRES_HOST=db -SUPABASE_POSTGRES_DB=supabase -SUPABASE_POSTGRES_PORT=5436 - -# Supabase Auth Config -SITE_URL=http://localhost:3011 -SUPABASE_URL=http://supabase-kong-mini:8000 -API_EXTERNAL_URL=http://supabase-kong-mini:8000 -DISABLE_SIGNUP=false -JWT_EXPIRY=3600 -DEBUG_JWT_EXPIRE_SECONDS=0 - -# Supabase Configuration -ENABLE_EMAIL_SIGNUP=true -ENABLE_EMAIL_AUTOCONFIRM=true -ENABLE_ANONYMOUS_USERS=false - -# Supabase Phone Config -ENABLE_PHONE_SIGNUP=false -ENABLE_PHONE_AUTOCONFIRM=false - -MAILER_URLPATHS_CONFIRMATION="/auth/v1/verify" -MAILER_URLPATHS_INVITE="/auth/v1/verify" -MAILER_URLPATHS_RECOVERY="/auth/v1/verify" -MAILER_URLPATHS_EMAIL_CHANGE="/auth/v1/verify" - -INVITE_CODE=nexent2025 - -# Terminal Tool SSH Key Path -SSH_PRIVATE_KEY_PATH=/path/to/openssh-server/ssh-keys/openssh_server_key - -# ===== Data Processing Service Configuration ===== - -# Redis Port -REDIS_PORT=6379 - -# Flower Monitoring -FLOWER_PORT=5555 - -# Ray Configuration -RAY_ACTOR_NUM_CPUS=2 -RAY_DASHBOARD_PORT=8265 -RAY_DASHBOARD_HOST=0.0.0.0 -RAY_NUM_CPUS=4 -RAY_OBJECT_STORE_MEMORY_GB=0.25 -RAY_TEMP_DIR=/tmp/ray -RAY_LOG_LEVEL=INFO - -# Service Control Flags -DISABLE_RAY_DASHBOARD=true -DISABLE_CELERY_FLOWER=true -DOCKER_ENVIRONMENT=false -ENABLE_UPLOAD_IMAGE=false - -# Celery Configuration -CELERY_WORKER_PREFETCH_MULTIPLIER=1 -CELERY_TASK_TIME_LIMIT=3600 -ELASTICSEARCH_REQUEST_TIMEOUT=30 - -# Worker Configuration -QUEUES=process_q,forward_q -WORKER_NAME= -WORKER_CONCURRENCY=4 - -# Skills Configuration -SKILLS_PATH=/mnt/nexent/skills - -# Telemetry and Monitoring Configuration -ENABLE_TELEMETRY=false -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -PROMETHEUS_PORT=8000 -TELEMETRY_SAMPLE_RATE=1.0 -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 - -# Market Backend Address -MARKET_BACKEND=http://60.204.251.153:8010 -DEPLOYMENT_VERSION="speed" -# Root dir -ROOT_DIR="/c/Users/18270/nexent-data" -TERMINAL_MOUNT_DIR="/opt/terminal" -SSH_USERNAME="root" -SSH_PASSWORD="731215" -NEXENT_MCP_DOCKER_IMAGE="ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:v2.0.1" -MINIO_ACCESS_KEY="72c31cb5b521511cea652723" -MINIO_SECRET_KEY="m5gcSuKzZnp84CqmG7z5VKnd2C+H5U3PSr7eoJeygmI=" diff --git a/docker/.env.example b/docker/.env.example index e55bba45a..c34300523 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -22,6 +22,13 @@ SPEED_RATIO=1.3 CLIP_MODEL_PATH=/opt/models/clip-vit-base-patch32 NLTK_DATA=/opt/models/nltk_data +# ===== Table and Structure Recognition Models ===== + +# Table Transformer and YOLOX models for extracting tables and layout structure from PDF/DOC/DOCX files. +# Both paths must be set to valid directories/files to enable extraction; if either is left empty, the feature is disabled. +TABLE_TRANSFORMER_MODEL_PATH=/opt/models/table-transformer-structure-recognition +UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH=/opt/models/yolox/config.json + # Elasticsearch Service ELASTICSEARCH_HOST=http://nexent-elasticsearch:9200 ELASTIC_PASSWORD=nexent@2025 @@ -151,16 +158,47 @@ WORKER_NAME= WORKER_CONCURRENCY=4 # Skills Configuration -SKILLS_PATH=/mnt/nexent/skills +SKILLS_PATH=/mnt/nexent-data/skills -# Telemetry and Monitoring Configuration +# Telemetry and Monitoring Configuration (OTLP Protocol) +# Enable OpenTelemetry monitoring for agent observability ENABLE_TELEMETRY=false -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -PROMETHEUS_PORT=8000 +# Provider profile: otlp, phoenix, langfuse, langsmith, grafana, zipkin +MONITORING_PROVIDER=otlp +MONITORING_PROJECT_NAME=nexent +# Browser-accessible monitoring UI URL. Leave empty to hide the frontend entry. +MONITORING_DASHBOARD_URL= +# Trace payload capture mode: +# summary: bounded preview + type/size/count metadata; metrics: metadata only; full: full preview capped by max chars. +# MAX_CHARS limits preview length; MAX_ITEMS limits dict/list preview items. +MONITORING_TRACE_CONTENT_MODE=full +MONITORING_TRACE_MAX_CHARS=4000 +MONITORING_TRACE_MAX_ITEMS=20 +# Service name for identifying traces in observability platforms +OTEL_SERVICE_NAME=nexent-backend +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 +# Optional signal-specific endpoints. Leave empty unless the backend requires them. +OTEL_EXPORTER_OTLP_TRACES_ENDPOINT= +OTEL_EXPORTER_OTLP_METRICS_ENDPOINT= +# Protocol: "http" or "grpc" +OTEL_EXPORTER_OTLP_PROTOCOL=http + +# Authentication headers (format: key1=value1,key2=value2) +# Prefer platform-specific variables when using the Collector. +OTEL_EXPORTER_OTLP_HEADERS= +OTEL_EXPORTER_OTLP_AUTHORIZATION= +OTEL_EXPORTER_OTLP_X_API_KEY= +OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION= +OTEL_EXPORTER_OTLP_METRICS_ENABLED=true +MONITORING_INSTRUMENT_REQUESTS=false +# FastAPI endpoint monitoring filters. Values are comma-separated regex patterns. +# Excluded URLs are always skipped. If included URLs is empty, all non-excluded endpoints are monitored. +# If included URLs is non-empty, only matching endpoints are monitored. +MONITORING_FASTAPI_INCLUDED_URLS= +MONITORING_FASTAPI_EXCLUDED_URLS= +MONITORING_FASTAPI_EXCLUDE_SPANS=receive,send + TELEMETRY_SAMPLE_RATE=1.0 -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 # Market Backend Address MARKET_BACKEND=http://60.204.251.153:8010 @@ -173,6 +211,10 @@ GITHUB_OAUTH_CLIENT_SECRET= GDE_URL= GDE_OAUTH_CLIENT_ID= GDE_OAUTH_CLIENT_SECRET= +# Link App OAuth +LINK_APP_URL= +LINK_APP_OAUTH_CLIENT_ID= +LINK_APP_OAUTH_CLIENT_SECRET= # WeChat OAuth (set ENABLE_WECHAT_OAUTH=true to enable) ENABLE_WECHAT_OAUTH=false WECHAT_OAUTH_APP_ID= @@ -181,3 +223,6 @@ WECHAT_OAUTH_APP_SECRET= OAUTH_SSL_VERIFY=true OAUTH_CA_BUNDLE= OAUTH_CALLBACK_BASE_URL=http://localhost:3000 + +# Asset owner role (opt-in; default false). Set true to enable ASSET_OWNER. +ENABLE_ASSET_OWNER_ROLE=false diff --git a/docker/create-su.sh b/docker/create-su.sh old mode 100644 new mode 100755 index 27d3c6033..639e64553 --- a/docker/create-su.sh +++ b/docker/create-su.sh @@ -95,16 +95,6 @@ create_default_super_admin_user() { echo " 🔏 Password: ${password}" fi - # Extract access_token from RESPONSE JSON for skill installation - local access_token - access_token=$(echo "$RESPONSE" | grep -o '"access_token":"[^"]*"' | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') - if [ -n "$access_token" ]; then - # Save access_token to a temp file for skill installation - local token_file="${SCRIPT_DIR}/.access_token" - echo "$access_token" > "$token_file" - echo " 💡 Access token saved for skill installation" - fi - # Extract user.id from RESPONSE JSON local user_id # Try using jq first (if available in the container or on host) diff --git a/docker/deploy.sh b/docker/deploy.sh index 7fb78aa90..d2edbb446 100755 --- a/docker/deploy.sh +++ b/docker/deploy.sh @@ -13,6 +13,16 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" CONST_FILE="$PROJECT_ROOT/backend/consts/const.py" DEPLOY_OPTIONS_FILE="$SCRIPT_DIR/deploy.options" +DEPLOYMENT_COMMON="$PROJECT_ROOT/scripts/deployment/common.sh" +ORIGINAL_ARGS=("$@") + +if [ -f "$DEPLOYMENT_COMMON" ]; then + # shellcheck source=/dev/null + source "$DEPLOYMENT_COMMON" +else + echo "❌ Shared deployment helper not found: $DEPLOYMENT_COMMON" + exit 1 +fi MODE_CHOICE_SAVED="" VERSION_CHOICE_SAVED="" @@ -24,6 +34,16 @@ APP_VERSION="" cd "$SCRIPT_DIR" +if [ ! -f ".env" ]; then + if [ -f ".env.example" ]; then + cp .env.example .env + echo "✅ Created docker/.env from docker/.env.example" + else + echo "❌ .env not found and .env.example is missing in $SCRIPT_DIR" + exit 1 + fi +fi + set -a source .env @@ -31,7 +51,6 @@ source .env MODE_CHOICE="" IS_MAINLAND="" ENABLE_TERMINAL="" -ENABLE_SKILLS="" VERSION_CHOICE="" ROOT_DIR_PARAM="" @@ -40,6 +59,25 @@ export COMPOSE_IGNORE_ORPHANS=True while [[ $# -gt 0 ]]; do case "$1" in + delete|delete-all|--delete-volumes|--remove-volumes|--keep-volumes) + echo "❌ Docker uninstall has moved to uninstall.sh. Use: bash uninstall.sh" + exit 1 + ;; + --help|-h) + echo "Usage: $0 [options]" + echo "" + echo "Deploy options:" + echo " --components LIST" + echo " --port-policy development|production" + echo " --image-source general|mainland|local-latest" + echo " --use-local-config" + echo " --reconfigure" + echo " --config PATH" + echo " --root-dir PATH" + echo "" + echo "Uninstall: bash uninstall.sh" + exit 0 + ;; --mode) MODE_CHOICE="$2" shift 2 @@ -52,10 +90,6 @@ while [[ $# -gt 0 ]]; do ENABLE_TERMINAL="$2" shift 2 ;; - --enable-skills) - ENABLE_SKILLS="$2" - shift 2 - ;; --version) VERSION_CHOICE="$2" shift 2 @@ -117,6 +151,49 @@ is_port_in_use() { return 1 } +is_nexent_container_name() { + local container_name="$1" + + case "$container_name" in + nexent-*|nexent_*|supabase-*-mini) + return 0 + ;; + *) + return 1 + ;; + esac +} + +docker_containers_using_host_port() { + local port="$1" + + if ! command -v docker >/dev/null 2>&1; then + return 0 + fi + + while IFS=$'\t' read -r container_name published_ports; do + if [ -n "$container_name" ] && [[ "$published_ports" == *":${port}->"* ]]; then + echo "$container_name" + fi + done < <(docker ps --format '{{.Names}}\t{{.Ports}}' 2>/dev/null) +} + +is_port_used_by_nexent_only() { + local port="$1" + local container_name + local found="false" + + while IFS= read -r container_name; do + [ -n "$container_name" ] || continue + found="true" + if ! is_nexent_container_name "$container_name"; then + return 1 + fi + done < <(docker_containers_using_host_port "$port") + + [ "$found" = "true" ] +} + add_port_if_new() { # Helper to add a port to global arrays only if not already present local port="$1" @@ -199,6 +276,8 @@ check_ports_in_env_files() { echo "🔍 Checking port availability defined in environment files..." local occupied_ports=() local occupied_sources=() + local ignored_nexent_ports=0 + local free_ports=0 local idx for idx in "${!PORTS_TO_CHECK[@]}"; do @@ -206,14 +285,26 @@ check_ports_in_env_files() { local source="${PORT_SOURCES[$idx]}" if is_port_in_use "$port"; then + if is_port_used_by_nexent_only "$port"; then + ignored_nexent_ports=$((ignored_nexent_ports + 1)) + continue + fi occupied_ports+=("$port") occupied_sources+=("$source") echo " ❌ Port $port is already in use." else - echo " ✅ Port $port is free." + free_ports=$((free_ports + 1)) fi done + if [ "$free_ports" -gt 0 ]; then + echo " ✅ $free_ports port(s) available." + fi + + if [ "$ignored_nexent_ports" -gt 0 ]; then + echo " ↺ Ignored $ignored_nexent_ports port(s) already used by Nexent containers." + fi + if [ ${#occupied_ports[@]} -gt 0 ]; then echo "" echo "❌ Port conflict detected. The following ports required by Nexent are already in use:" @@ -242,6 +333,72 @@ check_ports_in_env_files() { echo "" } +check_deployment_ports() { + PORTS_TO_CHECK=() + PORT_SOURCES=() + + local port + for port in $DEPLOYMENT_DOCKER_PORTS; do + add_port_if_new "$port" "deployment port policy: $DEPLOYMENT_PORT_POLICY" + done + + if [ ${#PORTS_TO_CHECK[@]} -eq 0 ]; then + echo "🔍 No host ports are published by the selected deployment configuration." + echo "" + echo "--------------------------------" + echo "" + return 0 + fi + + echo "🔍 Checking port availability for selected deployment policy..." + local occupied_ports=() + local ignored_nexent_ports=0 + local free_ports=0 + local idx + for idx in "${!PORTS_TO_CHECK[@]}"; do + local selected_port="${PORTS_TO_CHECK[$idx]}" + if is_port_in_use "$selected_port"; then + if is_port_used_by_nexent_only "$selected_port"; then + ignored_nexent_ports=$((ignored_nexent_ports + 1)) + continue + fi + occupied_ports+=("$selected_port") + echo " ❌ Port $selected_port is already in use." + else + free_ports=$((free_ports + 1)) + fi + done + + if [ "$free_ports" -gt 0 ]; then + echo " ✅ $free_ports port(s) available." + fi + + if [ "$ignored_nexent_ports" -gt 0 ]; then + echo " ↺ Ignored $ignored_nexent_ports port(s) already used by Nexent containers." + fi + + if [ ${#occupied_ports[@]} -gt 0 ]; then + echo "" + echo "❌ Port conflict detected for selected deployment policy:" + local occupied + for occupied in "${occupied_ports[@]}"; do + echo " - Port $occupied" + done + echo "" + local confirm_continue + read -p "👉 Do you still want to continue deployment even though some ports are in use? [y/N]: " confirm_continue + confirm_continue=$(sanitize_input "$confirm_continue") + if ! [[ "$confirm_continue" =~ ^[Yy]$ ]]; then + echo "🚫 Deployment aborted due to port conflicts." + exit 1 + fi + fi + + echo "" + echo "--------------------------------" + echo "" +} + trim_quotes() { local value="$1" value="${value%$'\r'}" @@ -372,7 +529,7 @@ generate_elasticsearch_api_key() { generate_env_for_infrastructure() { # Function to generate complete environment file for infrastructure mode using generate_env.sh - echo "🔑 Generating complete environment file in root directory..." + echo "🔑 Updating docker/.env for infrastructure mode..." echo " 🚀 Running generate_env.sh..." # Check if generate_env.sh exists @@ -388,16 +545,14 @@ generate_env_for_infrastructure() { export DEPLOYMENT_VERSION if ./generate_env.sh; then - echo " ✅ Environment file generated successfully for infrastructure mode!" - # Source the generated .env file to make variables available - if [ -f "../.env" ]; then - echo " ⏏️ Sourcing generated root .env file..." + echo " ✅ docker/.env updated successfully for infrastructure mode!" + if [ -f ".env" ]; then set -a - source ../.env + source .env set +a - echo " ✅ Environment variables loaded from ../.env" + echo " ✅ Environment variables loaded from docker/.env" else - echo " ⚠️ Warning: ../.env file not found after generation" + echo " ⚠️ Warning: docker/.env file not found after generation" return 1 fi else @@ -414,7 +569,6 @@ get_compose_version() { # Function to get the version of docker compose if command -v docker &> /dev/null; then version_output=$(docker compose version 2>/dev/null) - # 修改点:放宽正则匹配,允许版本号后面跟随其他字符(如 -desktop.1) if [[ $version_output =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then echo "v2 ${BASH_REMATCH[1]}" return 0 @@ -423,7 +577,6 @@ get_compose_version() { if command -v docker-compose &> /dev/null; then version_output=$(docker-compose --version 2>/dev/null) - # 同样放宽这里的匹配规则,以防万一 if [[ $version_output =~ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then echo "v1 ${BASH_REMATCH[1]}" return 0 @@ -440,6 +593,14 @@ disable_dashboard() { } pull_mcp_image() { + if [ "$DEPLOYMENT_IMAGE_SOURCE" = "local-latest" ]; then + echo "🔄 Skipping MCP image pull because image source is local-latest." + echo "" + echo "--------------------------------" + echo "" + return 0 + fi + echo "🔄 Checking MCP Docker image..." # Get MCP image name from environment or use default @@ -547,9 +708,6 @@ clean() { if [ -f ".env.bak" ]; then rm .env.bak fi - if [ -f "../.env.bak" ]; then - rm ../.env.bak - fi } update_env_var() { @@ -633,8 +791,23 @@ prepare_directory_and_data() { deploy_core_services() { # Function to deploy core services - echo "👀 Starting core services..." - if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d nexent-config nexent-runtime nexent-mcp nexent-northbound nexent-web nexent-data-process; then + local core_services=() + local service + for service in $DEPLOYMENT_SELECTED_DOCKER_SERVICES; do + case "$service" in + nexent-config|nexent-runtime|nexent-mcp|nexent-northbound|nexent-web|nexent-data-process) + core_services+=("$service") + ;; + esac + done + + if [ ${#core_services[@]} -eq 0 ]; then + echo "👀 No core services selected, skipping core service startup." + return 0 + fi + + echo "👀 Starting core services: ${core_services[*]}" + if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d "${core_services[@]}"; then echo " ❌ ERROR Failed to start core services" return 1 fi @@ -643,25 +816,33 @@ deploy_core_services() { deploy_infrastructure() { # Start infrastructure services (basic services only) echo "🔧 Starting infrastructure services..." - INFRA_SERVICES="nexent-elasticsearch nexent-postgresql nexent-minio redis" + INFRA_SERVICES="" + + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "infrastructure"; then + INFRA_SERVICES="nexent-elasticsearch nexent-postgresql nexent-minio redis" + fi # Add openssh-server if Terminal tool container is enabled - if [ "$ENABLE_TERMINAL_TOOL_CONTAINER" = "true" ]; then + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "terminal"; then INFRA_SERVICES="$INFRA_SERVICES nexent-openssh-server" echo "🔧 Terminal tool container enabled - openssh-server will be included in infrastructure" fi - if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d $INFRA_SERVICES; then - echo " ❌ ERROR Failed to start infrastructure services" - return 1 + if [ -n "$INFRA_SERVICES" ]; then + if ! ${docker_compose_command} -p nexent -f "docker-compose${COMPOSE_FILE_SUFFIX}" up -d $INFRA_SERVICES; then + echo " ❌ ERROR Failed to start infrastructure services" + return 1 + fi + else + echo "🔧 No infrastructure services selected, skipping infrastructure startup." fi - if [ "$ENABLE_TERMINAL_TOOL_CONTAINER" = "true" ]; then + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "terminal"; then echo "🔧 Terminal tool container (openssh-server) is now available for AI agents" fi - # Deploy Supabase services based on DEPLOYMENT_VERSION - if [ "$DEPLOYMENT_VERSION" = "full" ]; then + # Deploy Supabase services based on selected components + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "supabase"; then echo "" echo "🔧 Starting Supabase services..." # Check if the supabase compose file exists @@ -684,6 +865,104 @@ deploy_infrastructure() { echo " ✅ Infrastructure services started successfully" } +deploy_monitoring() { + deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "monitoring" || return 0 + + if [ ! -f "docker-compose-monitoring.yml" ]; then + echo " ❌ ERROR Monitoring compose file not found: docker-compose-monitoring.yml" + return 1 + fi + + local profile_args=() + case "$DEPLOYMENT_MONITORING_PROVIDER" in + phoenix|grafana|zipkin|langfuse) + profile_args+=(--profile "$DEPLOYMENT_MONITORING_PROVIDER") + ;; + esac + + echo "🔭 Starting monitoring services..." + if ! ${docker_compose_command} "${profile_args[@]}" -f "docker-compose-monitoring.yml" up -d; then + echo " ❌ ERROR Failed to start monitoring services" + return 1 + fi +} + +configure_root_dir_from_env() { + if [ -n "$ROOT_DIR_PARAM" ]; then + ROOT_DIR="$ROOT_DIR_PARAM" + echo " 📁 Using ROOT_DIR from parameter: $ROOT_DIR" + update_env_var "ROOT_DIR" "$ROOT_DIR" + elif grep -q "^ROOT_DIR=" .env; then + ROOT_DIR="$(grep "^ROOT_DIR=" .env | cut -d'=' -f2 | sed 's/^"//;s/"$//')" + echo " 📁 Use existing ROOT_DIR path: $ROOT_DIR" + else + local default_root_dir="$HOME/nexent-data" + if [ -t 0 ]; then + local user_root_dir + read -p " 📁 Enter ROOT_DIR path (default: $default_root_dir): " user_root_dir + ROOT_DIR="${user_root_dir:-$default_root_dir}" + else + ROOT_DIR="$default_root_dir" + fi + update_env_var "ROOT_DIR" "$ROOT_DIR" + fi + export ROOT_DIR + echo "" + echo "--------------------------------" + echo "" +} + +apply_deployment_common_config() { + deployment_prepare_config "${ORIGINAL_ARGS[@]}" || return 1 + + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "supabase"; then + export DEPLOYMENT_VERSION="full" + else + export DEPLOYMENT_VERSION="speed" + fi + update_env_var "DEPLOYMENT_VERSION" "$DEPLOYMENT_VERSION" + + if [ "$DEPLOYMENT_PORT_POLICY" = "production" ]; then + export DEPLOYMENT_MODE="production" + export COMPOSE_FILE_SUFFIX=".prod.yml" + disable_dashboard + elif [ "$DEPLOYMENT_COMPONENTS" = "infrastructure" ]; then + export DEPLOYMENT_MODE="infrastructure" + export COMPOSE_FILE_SUFFIX=".yml" + else + export DEPLOYMENT_MODE="development" + export COMPOSE_FILE_SUFFIX=".yml" + fi + + if deployment_csv_contains "$DEPLOYMENT_COMPONENTS" "terminal"; then + ENABLE_TERMINAL_SAVED="Y" + export ENABLE_TERMINAL_TOOL_CONTAINER="true" + export COMPOSE_PROFILES="${COMPOSE_PROFILES:+$COMPOSE_PROFILES,}terminal" + else + ENABLE_TERMINAL_SAVED="N" + export ENABLE_TERMINAL_TOOL_CONTAINER="false" + fi + + export APP_VERSION="$DEPLOYMENT_APP_VERSION" + case "$DEPLOYMENT_REGISTRY_PROFILE" in + mainland) + IS_MAINLAND_SAVED="Y" + source .env.mainland + ;; + general|local-latest) + IS_MAINLAND_SAVED="N" + source .env.general + ;; + esac + + deployment_apply_image_source + deployment_render_docker_env "$SCRIPT_DIR/.env.generated" + set -a + source "$SCRIPT_DIR/.env.generated" + set +a + deployment_print_summary docker +} + select_deployment_version() { # Function to select deployment version echo "🚀 Please select deployment version:" @@ -914,210 +1193,6 @@ check_super_admin_user_exists() { fi } -get_access_token_by_credentials() { - # Get access token by signing in with email and password - local email="$1" - local password="$2" - - # Suppress echo messages when capturing output - set +x 2>/dev/null - - local response - response=$(docker exec nexent-config bash -c "curl -s -X POST http://kong:8000/auth/v1/token?grant_type=password -H \"apikey: ${SUPABASE_KEY}\" -H \"Content-Type: application/json\" -d '{\"email\":\"${email}\",\"password\":\"${password}\"}'" 2>/dev/null) - - if echo "$response" | grep -q '"access_token"'; then - local access_token - access_token=$(echo "$response" | grep -o '"access_token":"[^"]*"' | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') - # Only output the token, no other text - echo "$access_token" - return 0 - else - # Output error to stderr, not stdout - echo " ❌ Failed to get access token: $response" >&2 - return 1 - fi -} - -prompt_skill_credentials() { - # Prompt user for email and password for skill installation with retry loop - local default_email="suadmin@nexent.com" - local max_attempts=5 - local attempts=0 - - echo "" - echo "🔐 Skills Installation - Authentication Required" - echo " Please provide credentials for an existing admin account." - echo "" - - while [ $attempts -lt $max_attempts ]; do - attempts=$((attempts + 1)) - - # Prompt for email - read -p " 📧 Enter email [${default_email}]: " user_email - user_email=$(sanitize_input "$user_email") - if [ -z "$user_email" ]; then - user_email="$default_email" - fi - - # Prompt for password - echo -n " 🔐 Enter password: " - read -s user_password - echo "" - - if [ -z "$user_password" ]; then - echo " ❌ Error: Password cannot be empty. Please try again." - echo "" - continue - fi - - # Return credentials via global variables - SKILL_AUTH_EMAIL="$user_email" - SKILL_AUTH_PASSWORD="$user_password" - return 0 - done - - echo " ❌ Too many failed attempts. Aborting skills installation." - return 1 -} - -install_builtin_skills() { - # Install built-in skills if enabled - if [ "$ENABLE_SKILLS_SAVED" != "Y" ]; then - return 0 - fi - - echo "" - echo "--------------------------------" - echo "📦 Installing built-in skills..." - echo "" - - local install_script="$SCRIPT_DIR/install-skills.sh" - chmod +x "$install_script" - - # Export necessary environment variables - export SUPABASE_KEY - export DEPLOYMENT_VERSION - export DEPLOYMENT_MODE - export SUPABASE_POSTGRES_DB - - # Get access token for skill installation - local access_token="" - local email="suadmin@nexent.com" - local max_attempts=3 - local attempts=0 - - # Check if super admin user exists first - local check_result - check_super_admin_user_exists "$email" - check_result=$? - - if [ $check_result -eq 0 ]; then - # User exists, prompt for credentials with retry loop - echo " 🔐 Please provide credentials to install skills." - echo "" - - while [ $attempts -lt $max_attempts ]; do - attempts=$((attempts + 1)) - - prompt_skill_credentials || { - echo " ❌ Failed to get credentials" - return 1 - } - - echo -n " 🔐 Signing in... " - access_token=$(get_access_token_by_credentials "$SKILL_AUTH_EMAIL" "$SKILL_AUTH_PASSWORD") - - if [ -n "$access_token" ]; then - echo "✅" - echo " ✅ Credentials verified." - break - else - echo "❌" - echo " ❌ Invalid email or password." - echo "" - # Clear sensitive data - unset SKILL_AUTH_PASSWORD access_token - fi - done - - if [ -z "$access_token" ]; then - echo " ❌ Too many failed attempts. Aborting skills installation." - unset SKILL_AUTH_PASSWORD - return 1 - fi - - elif [ $check_result -eq 1 ]; then - # User does not exist - this is a fresh deployment - echo " ℹ️ Super admin user will be created during deployment." - echo " 💡 Skills will be installed after user creation." - unset SKILL_AUTH_PASSWORD - return 0 - else - echo " ⚠️ Warning: Could not determine if user exists" - unset SKILL_AUTH_PASSWORD - return 1 - fi - - # Clear password from memory as soon as possible - unset SKILL_AUTH_PASSWORD - - # Install skills using the access token - if bash "$install_script" "$access_token"; then - echo " ✅ Built-in skills installed successfully" - else - echo " ⚠️ Built-in skills installation failed" - return 1 - fi - - # Clean up access token - unset access_token - - echo "" - echo "--------------------------------" - echo "" -} - -install_skills_after_user_creation() { - # Install skills after user creation - called with access_token as first argument - if [ "$ENABLE_SKILLS_SAVED" != "Y" ]; then - return 0 - fi - - if [ "$DEPLOYMENT_VERSION" != "full" ]; then - return 0 - fi - - local access_token="$1" - - if [ -z "$access_token" ]; then - echo " ⚠️ Warning: No access token provided for skill installation" - return 1 - fi - - local install_script="$SCRIPT_DIR/install-skills.sh" - if [ ! -f "$install_script" ]; then - echo " ❌ Error: install-skills.sh not found" - return 1 - fi - - export SUPABASE_KEY - export DEPLOYMENT_VERSION - export DEPLOYMENT_MODE - export SUPABASE_POSTGRES_DB - - echo "" - echo "📦 Installing built-in skills..." - if bash "$install_script" "$access_token"; then - echo " ✅ Built-in skills installed successfully" - else - echo " ⚠️ Built-in skills installation failed" - return 1 - fi - - # Clean up access token from memory - unset access_token -} - prompt_super_admin_password() { # Prompt user to enter password for super admin user with confirmation # Note: All prompts go to stderr, only password is returned via stdout @@ -1216,8 +1291,10 @@ create_default_super_admin_user() { # Execute the script with password as argument if bash "$script_path" "$password"; then + unset password return 0 else + unset password return 1 fi } @@ -1247,36 +1324,6 @@ choose_image_env() { echo "" } -select_skills_installation() { - # Ask user whether to install built-in skills - if [ -n "$ENABLE_SKILLS" ]; then - enable_skills="$ENABLE_SKILLS" - echo "👉 Using enable_skills from argument: $enable_skills" - else - read -p "👉 Do you want to install built-in skills? [Y/N] (default Y): " enable_skills - fi - - # Sanitize potential Windows CR in input - enable_skills=$(sanitize_input "$enable_skills") - - # Default to Y if no input - if [ -z "$enable_skills" ]; then - enable_skills="Y" - fi - - if [[ "$enable_skills" =~ ^[Yy]$ ]]; then - ENABLE_SKILLS_SAVED="Y" - echo "✅ Built-in skills will be installed later on." - else - ENABLE_SKILLS_SAVED="N" - echo "🚫 Built-in skills installation skipped." - fi - - echo "" - echo "--------------------------------" - echo "" -} - main_deploy() { # Main deployment function echo "🚀 Nexent Deployment Script 🚀" @@ -1291,15 +1338,13 @@ main_deploy() { fi echo "🌐 App version: $APP_VERSION" - # Check all relevant ports from environment files before starting deployment - check_ports_in_env_files + # Select deployment components, port policy and image source via shared config. + apply_deployment_common_config || { echo "❌ Deployment configuration failed"; exit 1; } + + # Check only the ports published by the selected deployment configuration. + check_deployment_ports - # Select deployment version, mode and image source - select_deployment_version || { echo "❌ Deployment version selection failed"; exit 1; } - select_deployment_mode || { echo "❌ Deployment mode selection failed"; exit 1; } - select_terminal_tool || { echo "❌ Terminal tool container configuration failed"; exit 1; } - choose_image_env || { echo "❌ Image environment setup failed"; exit 1; } - select_skills_installation || { echo "❌ Skills installation selection failed"; exit 1; } + configure_root_dir_from_env || { echo "❌ ROOT_DIR configuration failed"; exit 1; } # Set NEXENT_MCP_DOCKER_IMAGE in .env file if [ -n "${NEXENT_MCP_DOCKER_IMAGE:-}" ]; then @@ -1320,6 +1365,8 @@ main_deploy() { # Deploy infrastructure services deploy_infrastructure || { echo "❌ Infrastructure deployment failed"; exit 1; } + deploy_monitoring || { echo "❌ Monitoring deployment failed"; exit 1; } + # Generate Elasticsearch API key generate_elasticsearch_api_key || { echo "❌ Elasticsearch API key generation failed"; exit 1; } @@ -1334,43 +1381,18 @@ main_deploy() { # Create default super admin user (only for full version) if [ "$DEPLOYMENT_VERSION" = "full" ]; then create_default_super_admin_user || { echo "❌ Default super admin user creation failed"; exit 1; } - - # Install skills after user creation (if enabled) - if [ "$ENABLE_SKILLS_SAVED" = "Y" ]; then - echo "" - echo "--------------------------------" - echo "📦 Checking if skills installation is needed..." - - # Read access token from file (saved by create-su.sh) - local token_file="$SCRIPT_DIR/.access_token" - if [ -f "$token_file" ]; then - local access_token - access_token=$(cat "$token_file" | tr -d '[:space:]') - rm -f "$token_file" # Clean up after reading - - if [ -n "$access_token" ]; then - echo " 💡 Found access token, proceeding with skills installation..." - install_skills_after_user_creation "$access_token" || { - echo " ⚠️ Warning: Skills installation encountered issues" - } - fi - else - echo " ℹ️ No access token file found. Infrastructure mode may need manual skill installation." - fi - echo "" - echo "--------------------------------" - fi fi echo "🎉 Infrastructure deployment completed successfully!" echo " You can now start the core services manually using dev containers" - echo " Environment file available at: $(cd .. && pwd)/.env" - echo "💡 Use 'source .env' to load environment variables in your development shell" + echo " Environment file available at: $SCRIPT_DIR/.env" + echo "💡 Use 'source docker/.env' from the project root to load environment variables" # Pull MCP image for later use pull_mcp_image persist_deploy_options + deployment_persist_local_config return 0 fi @@ -1385,56 +1407,10 @@ main_deploy() { # Create default super admin user if [ "$DEPLOYMENT_VERSION" = "full" ]; then create_default_super_admin_user || { echo "❌ Default super admin user creation failed"; exit 1; } - - # Install skills after user creation (if enabled) - if [ "$ENABLE_SKILLS_SAVED" = "Y" ]; then - echo "" - echo "--------------------------------" - echo "📦 Checking if skills installation is needed..." - - # Read access token from file (saved by create-su.sh) - local token_file="$SCRIPT_DIR/.access_token" - if [ -f "$token_file" ]; then - local access_token - access_token=$(cat "$token_file" | tr -d '[:space:]') - rm -f "$token_file" # Clean up after reading - - if [ -n "$access_token" ]; then - echo " 💡 Found access token, proceeding with skills installation..." - install_skills_after_user_creation "$access_token" || { - echo " ⚠️ Warning: Skills installation encountered issues" - } - fi - else - echo " ℹ️ No access token file found. Checking if skills installation is needed..." - # Check if super admin user already exists (was created previously) - check_super_admin_user_exists "suadmin@nexent.com" - local check_result=$? - if [ $check_result -eq 0 ]; then - # User exists, prompt for credentials - echo " ℹ️ Super admin user exists from previous deployment." - echo " 💡 Please provide credentials to install skills." - if prompt_skill_credentials; then - local access_token - access_token=$(get_access_token_by_credentials "$SKILL_AUTH_EMAIL" "$SKILL_AUTH_PASSWORD") || { - echo " ⚠️ Warning: Could not get access token, skipping skills installation" - } - if [ -n "$access_token" ]; then - install_skills_after_user_creation "$access_token" || { - echo " ⚠️ Warning: Skills installation encountered issues" - } - fi - fi - else - echo " ⚠️ Warning: Could not determine user status, skipping skills installation" - fi - fi - echo "" - echo "--------------------------------" - fi fi persist_deploy_options + deployment_persist_local_config # Pull MCP image for later use pull_mcp_image @@ -1459,7 +1435,7 @@ docker_compose_command="" case $version_type in "v1") echo "Detected Docker Compose V1, version: $version_number" - # The version ​​v1.28.0​​ is the minimum requirement in Docker Compose v1 that explicitly supports interpolation syntax with default values like ${VAR:-default} + # The version 1.28.0 is the minimum requirement in Docker Compose v1 for default interpolation syntax. if [[ $version_number < "1.28.0" ]]; then echo "Warning: V1 version is too old, consider upgrading to V2" exit 1 diff --git a/docker/docker-compose-monitoring.yml b/docker/docker-compose-monitoring.yml index fb4aa5eaf..976a57c97 100644 --- a/docker/docker-compose-monitoring.yml +++ b/docker/docker-compose-monitoring.yml @@ -1,88 +1,268 @@ +name: monitor + services: - # Jaeger - Distributed Tracing - jaeger: - image: jaegertracing/all-in-one:1.52 - container_name: nexent-jaeger - ports: - - "16686:16686" # Jaeger UI - - "14268:14268" # Jaeger collector HTTP - - "14250:14250" # Jaeger collector gRPC - - "6831:6831/udp" # Agent UDP - - "6832:6832/udp" # Agent UDP + otel-collector: + image: otel/opentelemetry-collector-contrib:${OTEL_COLLECTOR_VERSION:-0.151.0} + container_name: nexent-otel-collector + command: ["--config=/etc/otel-collector-config.yml"] environment: - - COLLECTOR_OTLP_ENABLED=true - - COLLECTOR_ZIPKIN_HOST_PORT=:9411 + LANGFUSE_OTLP_AUTH_HEADER: ${LANGFUSE_OTLP_AUTH_HEADER:-} + LANGSMITH_API_KEY: ${LANGSMITH_API_KEY:-} + LANGSMITH_PROJECT: ${LANGSMITH_PROJECT:-nexent} + LANGSMITH_OTLP_TRACES_ENDPOINT: ${LANGSMITH_OTLP_TRACES_ENDPOINT:-https://api.smith.langchain.com/otel/v1/traces} + volumes: + - ${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-config.yml}:/etc/otel-collector-config.yml + ports: + - "${OTEL_COLLECTOR_GRPC_PORT:-4317}:4317" + - "${OTEL_COLLECTOR_HTTP_PORT:-4318}:4318" networks: - - nexent-network + - nexent restart: unless-stopped - volumes: - - jaeger-data:/tmp - # Prometheus - Metrics Collection - prometheus: - image: prom/prometheus:v2.48.0 - container_name: nexent-prometheus + phoenix: + image: arizephoenix/phoenix:${PHOENIX_VERSION:-15} + container_name: nexent-phoenix + profiles: ["phoenix"] + environment: + PHOENIX_WORKING_DIR: /mnt/data + volumes: + - phoenix-data:/mnt/data ports: - - "9090:9090" - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.console.libraries=/etc/prometheus/console_libraries' - - '--web.console.templates=/etc/prometheus/consoles' - - '--storage.tsdb.retention.time=15d' - - '--web.enable-lifecycle' - - '--web.enable-admin-api' + - "${PHOENIX_PORT:-6006}:6006" + - "${PHOENIX_GRPC_HOST_PORT:-4319}:4317" + networks: + - nexent + restart: unless-stopped + + tempo: + image: grafana/tempo:${TEMPO_VERSION:-2.10.5} + container_name: nexent-tempo + profiles: ["grafana"] + command: ["--config.file=/etc/tempo.yml"] volumes: - - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml - - prometheus-data:/prometheus + - ./monitoring/tempo.yml:/etc/tempo.yml:ro + - tempo-data:/var/tempo + ports: + - "${TEMPO_PORT:-3200}:3200" networks: - - nexent-network + - nexent restart: unless-stopped - # Grafana - Metrics Visualization grafana: - image: grafana/grafana:10.2.0 + image: grafana/grafana:${GRAFANA_VERSION:-12.4} container_name: nexent-grafana - ports: - - "3005:3000" + profiles: ["grafana"] environment: - - GF_SECURITY_ADMIN_PASSWORD=admin - - GF_USERS_ALLOW_SIGN_UP=false - - GF_INSTALL_PLUGINS=grafana-piechart-panel + GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-nexent-grafana-admin} + GF_USERS_ALLOW_SIGN_UP: "false" + GF_USERS_DEFAULT_LANGUAGE: ${GRAFANA_DEFAULT_LANGUAGE:-zh-Hans} + GF_PLUGINS_PREINSTALL_AUTO_UPDATE: "false" volumes: - grafana-data:/var/lib/grafana - - ./monitoring/grafana/provisioning:/etc/grafana/provisioning - - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards + - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro + - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro + ports: + - "${GRAFANA_PORT:-3002}:3000" + depends_on: + - tempo networks: - - nexent-network + - nexent restart: unless-stopped - depends_on: - - prometheus - # OpenTelemetry Collector (Optional - for advanced setups) - otel-collector: - image: otel/opentelemetry-collector-contrib:0.89.0 - container_name: nexent-otel-collector - command: ["--config=/etc/otel-collector-config.yml"] + zipkin: + image: openzipkin/zipkin:${ZIPKIN_VERSION:-latest} + container_name: nexent-zipkin + profiles: ["zipkin"] + ports: + - "${ZIPKIN_PORT:-9411}:9411" + networks: + - nexent + restart: unless-stopped + + langfuse-worker: + image: docker.io/langfuse/langfuse-worker:${LANGFUSE_VERSION:-3} + container_name: nexent-langfuse-worker + profiles: ["langfuse"] + restart: unless-stopped + depends_on: &langfuse-depends-on + langfuse-postgres: + condition: service_healthy + langfuse-minio: + condition: service_healthy + langfuse-redis: + condition: service_healthy + langfuse-clickhouse: + condition: service_healthy + environment: &langfuse-env + NEXTAUTH_URL: ${LANGFUSE_NEXTAUTH_URL:-http://localhost:3001} + NEXTAUTH_SECRET: ${LANGFUSE_NEXTAUTH_SECRET:-nexent-langfuse-secret} + DATABASE_URL: postgresql://${LANGFUSE_POSTGRES_USER:-postgres}:${LANGFUSE_POSTGRES_PASSWORD:-postgres}@langfuse-postgres:5432/${LANGFUSE_POSTGRES_DB:-postgres} + SALT: ${LANGFUSE_SALT:-nexent-langfuse-salt} + ENCRYPTION_KEY: ${LANGFUSE_ENCRYPTION_KEY:-0000000000000000000000000000000000000000000000000000000000000000} + TELEMETRY_ENABLED: ${LANGFUSE_TELEMETRY_ENABLED:-false} + LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES: ${LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES:-false} + CLICKHOUSE_MIGRATION_URL: clickhouse://langfuse-clickhouse:9000 + CLICKHOUSE_URL: http://langfuse-clickhouse:8123 + CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse} + CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse} + CLICKHOUSE_CLUSTER_ENABLED: ${LANGFUSE_CLICKHOUSE_CLUSTER_ENABLED:-false} + REDIS_HOST: langfuse-redis + REDIS_PORT: 6379 + REDIS_AUTH: ${LANGFUSE_REDIS_AUTH:-myredissecret} + REDIS_TLS_ENABLED: "false" + LANGFUSE_USE_AZURE_BLOB: "false" + LANGFUSE_USE_OCI_NATIVE_OBJECT_STORAGE: "false" + LANGFUSE_S3_EVENT_UPLOAD_BUCKET: ${LANGFUSE_S3_BUCKET:-langfuse} + LANGFUSE_S3_EVENT_UPLOAD_REGION: auto + LANGFUSE_S3_EVENT_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_ROOT_USER:-minio} + LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + LANGFUSE_S3_EVENT_UPLOAD_ENDPOINT: http://langfuse-minio:9000 + LANGFUSE_S3_EVENT_UPLOAD_FORCE_PATH_STYLE: "true" + LANGFUSE_S3_EVENT_UPLOAD_PREFIX: events/ + LANGFUSE_S3_MEDIA_UPLOAD_BUCKET: ${LANGFUSE_S3_BUCKET:-langfuse} + LANGFUSE_S3_MEDIA_UPLOAD_REGION: auto + LANGFUSE_S3_MEDIA_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_ROOT_USER:-minio} + LANGFUSE_S3_MEDIA_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT: http://langfuse-minio:9000 + LANGFUSE_S3_MEDIA_UPLOAD_FORCE_PATH_STYLE: "true" + LANGFUSE_S3_MEDIA_UPLOAD_PREFIX: media/ + LANGFUSE_S3_BATCH_EXPORT_ENABLED: "false" + LANGFUSE_S3_BATCH_EXPORT_BUCKET: ${LANGFUSE_S3_BUCKET:-langfuse} + LANGFUSE_S3_BATCH_EXPORT_REGION: auto + LANGFUSE_S3_BATCH_EXPORT_ENDPOINT: http://langfuse-minio:9000 + LANGFUSE_S3_BATCH_EXPORT_EXTERNAL_ENDPOINT: http://localhost:${LANGFUSE_MINIO_API_PORT:-9092} + LANGFUSE_S3_BATCH_EXPORT_ACCESS_KEY_ID: ${LANGFUSE_MINIO_ROOT_USER:-minio} + LANGFUSE_S3_BATCH_EXPORT_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + LANGFUSE_S3_BATCH_EXPORT_FORCE_PATH_STYLE: "true" + networks: + - nexent + + langfuse-web: + image: docker.io/langfuse/langfuse:${LANGFUSE_VERSION:-3} + container_name: nexent-langfuse-web + profiles: ["langfuse"] + restart: unless-stopped + depends_on: *langfuse-depends-on + environment: + <<: *langfuse-env + LANGFUSE_INIT_ORG_ID: ${LANGFUSE_INIT_ORG_ID:-nexent} + LANGFUSE_INIT_ORG_NAME: ${LANGFUSE_INIT_ORG_NAME:-Nexent} + LANGFUSE_INIT_PROJECT_ID: ${LANGFUSE_INIT_PROJECT_ID:-nexent-local} + LANGFUSE_INIT_PROJECT_NAME: ${LANGFUSE_INIT_PROJECT_NAME:-Nexent Local} + LANGFUSE_INIT_PROJECT_PUBLIC_KEY: ${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-pk-lf-nexent-local} + LANGFUSE_INIT_PROJECT_SECRET_KEY: ${LANGFUSE_INIT_PROJECT_SECRET_KEY:-sk-lf-nexent-local} + LANGFUSE_INIT_USER_EMAIL: ${LANGFUSE_INIT_USER_EMAIL:-admin@nexent.local} + LANGFUSE_INIT_USER_NAME: ${LANGFUSE_INIT_USER_NAME:-Nexent Admin} + LANGFUSE_INIT_USER_PASSWORD: ${LANGFUSE_INIT_USER_PASSWORD:-nexent-langfuse-admin} + ports: + - "${LANGFUSE_PORT:-3001}:3000" + networks: + - nexent + + langfuse-clickhouse: + image: docker.io/clickhouse/clickhouse-server:${LANGFUSE_CLICKHOUSE_VERSION:-26.3-alpine} + container_name: nexent-langfuse-clickhouse + profiles: ["langfuse"] + restart: unless-stopped + user: "101:101" + environment: + CLICKHOUSE_DB: default + CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse} + CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse} volumes: - - ./monitoring/otel-collector-config.yml:/etc/otel-collector-config.yml + - langfuse-clickhouse-data:/var/lib/clickhouse + - langfuse-clickhouse-logs:/var/log/clickhouse-server ports: - - "4317:4317" # OTLP gRPC receiver - - "4318:4318" # OTLP HTTP receiver - - "8888:8888" # Prometheus metrics exposed by the collector - - "8889:8889" # Prometheus exporter metrics - depends_on: - - jaeger - - prometheus + - "127.0.0.1:${LANGFUSE_CLICKHOUSE_HTTP_PORT:-8124}:8123" + - "127.0.0.1:${LANGFUSE_CLICKHOUSE_NATIVE_PORT:-9002}:9000" + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:8123/ping || exit 1"] + interval: 5s + timeout: 5s + retries: 10 + start_period: 1s networks: - - nexent-network + - nexent + + langfuse-minio: + image: docker.io/minio/minio:${LANGFUSE_MINIO_VERSION:-RELEASE.2023-12-20T01-00-02Z} + container_name: nexent-langfuse-minio + profiles: ["langfuse"] restart: unless-stopped + entrypoint: sh + command: -c 'mkdir -p /data/${LANGFUSE_S3_BUCKET:-langfuse} && minio server --address ":9000" --console-address ":9001" /data' + environment: + MINIO_ROOT_USER: ${LANGFUSE_MINIO_ROOT_USER:-minio} + MINIO_ROOT_PASSWORD: ${LANGFUSE_MINIO_ROOT_PASSWORD:-miniosecret} + ports: + - "${LANGFUSE_MINIO_API_PORT:-9092}:9000" + - "127.0.0.1:${LANGFUSE_MINIO_CONSOLE_PORT:-9093}:9001" + volumes: + - langfuse-minio-data:/data + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 1s + timeout: 5s + retries: 5 + start_period: 1s + networks: + - nexent -volumes: - jaeger-data: - prometheus-data: - grafana-data: + langfuse-redis: + image: docker.io/redis:${LANGFUSE_REDIS_VERSION:-alpine} + container_name: nexent-langfuse-redis + profiles: ["langfuse"] + restart: unless-stopped + command: > + --requirepass ${LANGFUSE_REDIS_AUTH:-myredissecret} + --maxmemory-policy noeviction + ports: + - "127.0.0.1:${LANGFUSE_REDIS_PORT:-6380}:6379" + volumes: + - langfuse-redis-data:/data + healthcheck: + test: ["CMD-SHELL", "redis-cli -a ${LANGFUSE_REDIS_AUTH:-myredissecret} ping | grep PONG"] + interval: 3s + timeout: 10s + retries: 10 + networks: + - nexent + + langfuse-postgres: + image: docker.io/postgres:${LANGFUSE_POSTGRES_VERSION:-15-alpine} + container_name: nexent-langfuse-postgres + profiles: ["langfuse"] + restart: unless-stopped + environment: + POSTGRES_USER: ${LANGFUSE_POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${LANGFUSE_POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: ${LANGFUSE_POSTGRES_DB:-postgres} + TZ: UTC + PGTZ: UTC + ports: + - "127.0.0.1:${LANGFUSE_POSTGRES_PORT:-5440}:5432" + volumes: + - langfuse-postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${LANGFUSE_POSTGRES_USER:-postgres}"] + interval: 3s + timeout: 3s + retries: 10 + networks: + - nexent networks: - nexent-network: + nexent: + name: nexent_network external: true + +volumes: + phoenix-data: + langfuse-postgres-data: + langfuse-clickhouse-data: + langfuse-clickhouse-logs: + langfuse-minio-data: + langfuse-redis-data: + grafana-data: + tempo-data: diff --git a/docker/docker-compose-supabase.prod.yml b/docker/docker-compose-supabase.prod.yml index 234185b0b..6ad7ac134 100644 --- a/docker/docker-compose-supabase.prod.yml +++ b/docker/docker-compose-supabase.prod.yml @@ -142,4 +142,5 @@ volumes: networks: nexent: - driver: bridge \ No newline at end of file + name: nexent_network + driver: bridge diff --git a/docker/docker-compose-supabase.yml b/docker/docker-compose-supabase.yml index 21a4e6958..b781b4444 100644 --- a/docker/docker-compose-supabase.yml +++ b/docker/docker-compose-supabase.yml @@ -147,4 +147,5 @@ volumes: networks: nexent: - driver: bridge \ No newline at end of file + name: nexent_network + driver: bridge diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index cfb20f6e8..f23e4210c 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -95,4 +95,5 @@ services: networks: nexent: + name: nexent_network driver: bridge diff --git a/docker/docker-compose.prod.yml b/docker/docker-compose.prod.yml index 934fe8b2f..29bd41d9f 100644 --- a/docker/docker-compose.prod.yml +++ b/docker/docker-compose.prod.yml @@ -75,6 +75,7 @@ services: restart: always volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro - ${ROOT_DIR}/scripts/sync_user_supabase2pg.py:/opt/sync_user_supabase2pg.py:ro - /var/run/docker.sock:/var/run/docker.sock:ro # Docker socket for MCP container management @@ -103,6 +104,7 @@ services: restart: always volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -155,6 +157,7 @@ services: restart: always volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -300,6 +303,7 @@ services: networks: nexent: + name: nexent_network driver: bridge volumes: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 89088f2c3..fd3851ab4 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -86,6 +86,7 @@ services: - "5010:5010" # Config service port volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro - ${ROOT_DIR}/scripts/sync_user_supabase2pg.py:/opt/sync_user_supabase2pg.py:ro - /var/run/docker.sock:/var/run/docker.sock:ro # Docker socket for MCP container management @@ -116,6 +117,7 @@ services: - "5014:5014" # Runtime service port volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -173,6 +175,7 @@ services: - "5013:5013" # Northbound service port volumes: - ${NEXENT_USER_DIR:-$HOME/nexent}:/mnt/nexent + - ${ROOT_DIR}/skills:/mnt/nexent-data/skills - ${ROOT_DIR}/openssh-server/ssh-keys:/opt/ssh-keys:ro environment: <<: [*minio-vars, *es-vars] @@ -327,6 +330,7 @@ services: networks: nexent: + name: nexent_network driver: bridge volumes: diff --git a/docker/generate_env.sh b/docker/generate_env.sh index 962102f1d..c6b20f0b1 100755 --- a/docker/generate_env.sh +++ b/docker/generate_env.sh @@ -2,34 +2,18 @@ # Exit immediately if a command exits with a non-zero status set -e -echo " 📁 Target .env location: Root directory (../)" +echo " 📁 Target .env location: docker/.env" # Function to copy and prepare .env file prepare_env_file() { - echo " 📝 Preparing root .env file..." - - # Check if .env already exists in root directory (parent directory) - if [ -f "../.env" ]; then - echo " ⚠️ .env already exists in root directory" - echo "" - read -p "👉 Do you want to overwrite it? [Y/N] (default: Y): " overwrite - # If input is empty, use default "Y" - overwrite=${overwrite:-Y} - if [[ ! "$overwrite" =~ ^[Yy]$ ]]; then - echo " Using existing .env file" - return 0 - fi - fi + echo " 📝 Preparing docker/.env file..." - # Check if .env exists in current docker directory if [ -f ".env" ]; then - echo " 📋 Copying docker/.env to root directory..." - cp ".env" "../.env" - echo " ✅ Copied docker/.env to ../.env" + echo " ✅ Using existing docker/.env" elif [ -f ".env.example" ]; then - echo " 📋 docker/.env not found, copying .env.example to root directory..." - cp ".env.example" "../.env" - echo " ✅ Copied docker/.env.example to ../.env" + echo " 📋 docker/.env not found, copying docker/.env.example..." + cp ".env.example" ".env" + echo " ✅ Created docker/.env from docker/.env.example" else echo " ❌ ERROR Neither docker/.env nor docker/.env.example exists in docker directory" ERROR_OCCURRED=1 @@ -39,57 +23,57 @@ prepare_env_file() { # Function to update .env file with generated keys update_env_file() { - echo " 📝 Updating root .env file with generated keys..." + echo " 📝 Updating docker/.env file with generated keys..." - if [ ! -f "../.env" ]; then - echo " ❌ ERROR .env file does not exist in root directory" + if [ ! -f ".env" ]; then + echo " ❌ ERROR docker/.env file does not exist" ERROR_OCCURRED=1 return 1 fi # Update or add MINIO_ACCESS_KEY - if grep -q "^MINIO_ACCESS_KEY=" ../.env; then - sed -i.bak "s~^MINIO_ACCESS_KEY=.*~MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY~" ../.env + if grep -q "^MINIO_ACCESS_KEY=" .env; then + sed -i.bak "s~^MINIO_ACCESS_KEY=.*~MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY~" .env else - echo "" >> ../.env - echo "# Generated MinIO Keys" >> ../.env - echo "MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY" >> ../.env + echo "" >> .env + echo "# Generated MinIO Keys" >> .env + echo "MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY" >> .env fi # Update or add MINIO_SECRET_KEY - if grep -q "^MINIO_SECRET_KEY=" ../.env; then - sed -i.bak "s~^MINIO_SECRET_KEY=.*~MINIO_SECRET_KEY=$MINIO_SECRET_KEY~" ../.env + if grep -q "^MINIO_SECRET_KEY=" .env; then + sed -i.bak "s~^MINIO_SECRET_KEY=.*~MINIO_SECRET_KEY=$MINIO_SECRET_KEY~" .env else - echo "MINIO_SECRET_KEY=$MINIO_SECRET_KEY" >> ../.env + echo "MINIO_SECRET_KEY=$MINIO_SECRET_KEY" >> .env fi # Update or add ELASTICSEARCH_API_KEY (only if it was generated successfully) if [ -n "$ELASTICSEARCH_API_KEY" ]; then - if grep -q "^ELASTICSEARCH_API_KEY=" ../.env; then - sed -i.bak "s~^ELASTICSEARCH_API_KEY=.*~ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY~" ../.env + if grep -q "^ELASTICSEARCH_API_KEY=" .env; then + sed -i.bak "s~^ELASTICSEARCH_API_KEY=.*~ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY~" .env else - echo "" >> ../.env - echo "# Generated Elasticsearch API Key" >> ../.env - echo "ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY" >> ../.env + echo "" >> .env + echo "# Generated Elasticsearch API Key" >> .env + echo "ELASTICSEARCH_API_KEY=$ELASTICSEARCH_API_KEY" >> .env fi fi # Update or add SSH credentials (only if they were set) if [ -n "$SSH_USERNAME" ]; then - if grep -q "^SSH_USERNAME=" ../.env; then - sed -i.bak "s~^SSH_USERNAME=.*~SSH_USERNAME=$SSH_USERNAME~" ../.env + if grep -q "^SSH_USERNAME=" .env; then + sed -i.bak "s~^SSH_USERNAME=.*~SSH_USERNAME=$SSH_USERNAME~" .env else - echo "" >> ../.env - echo "# SSH Terminal Tool Credentials" >> ../.env - echo "SSH_USERNAME=$SSH_USERNAME" >> ../.env + echo "" >> .env + echo "# SSH Terminal Tool Credentials" >> .env + echo "SSH_USERNAME=$SSH_USERNAME" >> .env fi fi if [ -n "$SSH_PASSWORD" ]; then - if grep -q "^SSH_PASSWORD=" ../.env; then - sed -i.bak "s~^SSH_PASSWORD=.*~SSH_PASSWORD=$SSH_PASSWORD~" ../.env + if grep -q "^SSH_PASSWORD=" .env; then + sed -i.bak "s~^SSH_PASSWORD=.*~SSH_PASSWORD=$SSH_PASSWORD~" .env else - echo "SSH_PASSWORD=$SSH_PASSWORD" >> ../.env + echo "SSH_PASSWORD=$SSH_PASSWORD" >> .env fi fi echo " ✅ Generated keys updated successfully" @@ -98,145 +82,145 @@ update_env_file() { echo " 🔧 Updating service URLs for localhost development environment..." # ELASTICSEARCH_HOST - if grep -q "^ELASTICSEARCH_HOST=" ../.env; then - sed -i.bak "s~^ELASTICSEARCH_HOST=.*~ELASTICSEARCH_HOST=http://localhost:9210~" ../.env + if grep -q "^ELASTICSEARCH_HOST=" .env; then + sed -i.bak "s~^ELASTICSEARCH_HOST=.*~ELASTICSEARCH_HOST=http://localhost:9210~" .env else - echo "" >> ../.env - echo "# Development Environment URLs" >> ../.env - echo "ELASTICSEARCH_HOST=http://localhost:9210" >> ../.env + echo "" >> .env + echo "# Development Environment URLs" >> .env + echo "ELASTICSEARCH_HOST=http://localhost:9210" >> .env fi # Main Services # CONFIG_SERVICE_URL - if grep -q "^CONFIG_SERVICE_URL=" ../.env; then - sed -i.bak "s~^CONFIG_SERVICE_URL=.*~CONFIG_SERVICE_URL=http://localhost:5010~" ../.env + if grep -q "^CONFIG_SERVICE_URL=" .env; then + sed -i.bak "s~^CONFIG_SERVICE_URL=.*~CONFIG_SERVICE_URL=http://localhost:5010~" .env else - echo "" >> ../.env - echo "# Main Services" >> ../.env - echo "CONFIG_SERVICE_URL=http://localhost:5010" >> ../.env + echo "" >> .env + echo "# Main Services" >> .env + echo "CONFIG_SERVICE_URL=http://localhost:5010" >> .env fi # RUNTIME_SERVICE_URL - if grep -q "^RUNTIME_SERVICE_URL=" ../.env; then - sed -i.bak "s~^RUNTIME_SERVICE_URL=.*~RUNTIME_SERVICE_URL=http://localhost:5014~" ../.env + if grep -q "^RUNTIME_SERVICE_URL=" .env; then + sed -i.bak "s~^RUNTIME_SERVICE_URL=.*~RUNTIME_SERVICE_URL=http://localhost:5014~" .env else - echo "RUNTIME_SERVICE_URL=http://localhost:5014" >> ../.env + echo "RUNTIME_SERVICE_URL=http://localhost:5014" >> .env fi # ELASTICSEARCH_SERVICE - if grep -q "^ELASTICSEARCH_SERVICE=" ../.env; then - sed -i.bak "s~^ELASTICSEARCH_SERVICE=.*~ELASTICSEARCH_SERVICE=http://localhost:5010/api~" ../.env + if grep -q "^ELASTICSEARCH_SERVICE=" .env; then + sed -i.bak "s~^ELASTICSEARCH_SERVICE=.*~ELASTICSEARCH_SERVICE=http://localhost:5010/api~" .env else - echo "ELASTICSEARCH_SERVICE=http://localhost:5010/api" >> ../.env + echo "ELASTICSEARCH_SERVICE=http://localhost:5010/api" >> .env fi # NEXENT_MCP_SERVER - if grep -q "^NEXENT_MCP_SERVER=" ../.env; then - sed -i.bak "s~^NEXENT_MCP_SERVER=.*~NEXENT_MCP_SERVER=http://localhost:5011~" ../.env + if grep -q "^NEXENT_MCP_SERVER=" .env; then + sed -i.bak "s~^NEXENT_MCP_SERVER=.*~NEXENT_MCP_SERVER=http://localhost:5011~" .env else - echo "NEXENT_MCP_SERVER=http://localhost:5011" >> ../.env + echo "NEXENT_MCP_SERVER=http://localhost:5011" >> .env fi # DATA_PROCESS_SERVICE - if grep -q "^DATA_PROCESS_SERVICE=" ../.env; then - sed -i.bak "s~^DATA_PROCESS_SERVICE=.*~DATA_PROCESS_SERVICE=http://localhost:5012/api~" ../.env + if grep -q "^DATA_PROCESS_SERVICE=" .env; then + sed -i.bak "s~^DATA_PROCESS_SERVICE=.*~DATA_PROCESS_SERVICE=http://localhost:5012/api~" .env else - echo "DATA_PROCESS_SERVICE=http://localhost:5012/api" >> ../.env + echo "DATA_PROCESS_SERVICE=http://localhost:5012/api" >> .env fi # NORTHBOUND_API_SERVER - if grep -q "^NORTHBOUND_API_SERVER=" ../.env; then - sed -i.bak "s~^NORTHBOUND_API_SERVER=.*~NORTHBOUND_API_SERVER=http://localhost:5013/api~" ../.env + if grep -q "^NORTHBOUND_API_SERVER=" .env; then + sed -i.bak "s~^NORTHBOUND_API_SERVER=.*~NORTHBOUND_API_SERVER=http://localhost:5013/api~" .env else - echo "NORTHBOUND_API_SERVER=http://localhost:5013/api" >> ../.env + echo "NORTHBOUND_API_SERVER=http://localhost:5013/api" >> .env fi # MCP_MANAGEMENT_API - if grep -q "^MCP_MANAGEMENT_API=" ../.env; then - sed -i.bak "s~^MCP_MANAGEMENT_API=.*~MCP_MANAGEMENT_API=http://localhost:5015~" ../.env + if grep -q "^MCP_MANAGEMENT_API=" .env; then + sed -i.bak "s~^MCP_MANAGEMENT_API=.*~MCP_MANAGEMENT_API=http://localhost:5015~" .env else - echo "MCP_MANAGEMENT_API=http://localhost:5015" >> ../.env + echo "MCP_MANAGEMENT_API=http://localhost:5015" >> .env fi # MINIO_ENDPOINT - if grep -q "^MINIO_ENDPOINT=" ../.env; then - sed -i.bak "s~^MINIO_ENDPOINT=.*~MINIO_ENDPOINT=http://localhost:9010~" ../.env + if grep -q "^MINIO_ENDPOINT=" .env; then + sed -i.bak "s~^MINIO_ENDPOINT=.*~MINIO_ENDPOINT=http://localhost:9010~" .env else - echo "MINIO_ENDPOINT=http://localhost:9010" >> ../.env + echo "MINIO_ENDPOINT=http://localhost:9010" >> .env fi # REDIS_URL - if grep -q "^REDIS_URL=" ../.env; then - sed -i.bak "s~^REDIS_URL=.*~REDIS_URL=redis://localhost:6379/0~" ../.env + if grep -q "^REDIS_URL=" .env; then + sed -i.bak "s~^REDIS_URL=.*~REDIS_URL=redis://localhost:6379/0~" .env else - echo "REDIS_URL=redis://localhost:6379/0" >> ../.env + echo "REDIS_URL=redis://localhost:6379/0" >> .env fi # REDIS_BACKEND_URL - if grep -q "^REDIS_BACKEND_URL=" ../.env; then - sed -i.bak "s~^REDIS_BACKEND_URL=.*~REDIS_BACKEND_URL=redis://localhost:6379/1~" ../.env + if grep -q "^REDIS_BACKEND_URL=" .env; then + sed -i.bak "s~^REDIS_BACKEND_URL=.*~REDIS_BACKEND_URL=redis://localhost:6379/1~" .env else - echo "REDIS_BACKEND_URL=redis://localhost:6379/1" >> ../.env + echo "REDIS_BACKEND_URL=redis://localhost:6379/1" >> .env fi # POSTGRES_HOST - if grep -q "^POSTGRES_HOST=" ../.env; then - sed -i.bak "s~^POSTGRES_HOST=.*~POSTGRES_HOST=localhost~" ../.env + if grep -q "^POSTGRES_HOST=" .env; then + sed -i.bak "s~^POSTGRES_HOST=.*~POSTGRES_HOST=localhost~" .env else - echo "POSTGRES_HOST=localhost" >> ../.env + echo "POSTGRES_HOST=localhost" >> .env fi # POSTGRES_PORT - if grep -q "^POSTGRES_PORT=" ../.env; then - sed -i.bak "s~^POSTGRES_PORT=.*~POSTGRES_PORT=5434~" ../.env + if grep -q "^POSTGRES_PORT=" .env; then + sed -i.bak "s~^POSTGRES_PORT=.*~POSTGRES_PORT=5434~" .env else - echo "POSTGRES_PORT=5434" >> ../.env + echo "POSTGRES_PORT=5434" >> .env fi # Supabase Configuration (Only for full version) if [ "$DEPLOYMENT_VERSION" = "full" ]; then if [ -n "$SUPABASE_KEY" ]; then - if grep -q "^SUPABASE_KEY=" ../.env; then - sed -i.bak "s~^SUPABASE_KEY=.*~SUPABASE_KEY=$SUPABASE_KEY~" ../.env + if grep -q "^SUPABASE_KEY=" .env; then + sed -i.bak "s~^SUPABASE_KEY=.*~SUPABASE_KEY=$SUPABASE_KEY~" .env else - echo "" >> ../.env - echo "# Supabase Keys" >> ../.env - echo "SUPABASE_KEY=$SUPABASE_KEY" >> ../.env + echo "" >> .env + echo "# Supabase Keys" >> .env + echo "SUPABASE_KEY=$SUPABASE_KEY" >> .env fi fi if [ -n "$SERVICE_ROLE_KEY" ]; then - if grep -q "^SERVICE_ROLE_KEY=" ../.env; then - sed -i.bak "s~^SERVICE_ROLE_KEY=.*~SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY~" ../.env + if grep -q "^SERVICE_ROLE_KEY=" .env; then + sed -i.bak "s~^SERVICE_ROLE_KEY=.*~SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY~" .env else - echo "SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY" >> ../.env + echo "SERVICE_ROLE_KEY=$SERVICE_ROLE_KEY" >> .env fi fi # Additional Supabase configuration - if grep -q "^SUPABASE_URL=" ../.env; then - sed -i.bak "s~^SUPABASE_URL=.*~SUPABASE_URL=http://localhost:8000~" ../.env + if grep -q "^SUPABASE_URL=" .env; then + sed -i.bak "s~^SUPABASE_URL=.*~SUPABASE_URL=http://localhost:8000~" .env else - echo "SUPABASE_URL=http://localhost:8000" >> ../.env + echo "SUPABASE_URL=http://localhost:8000" >> .env fi - if grep -q "^API_EXTERNAL_URL=" ../.env; then - sed -i.bak "s~^API_EXTERNAL_URL=.*~API_EXTERNAL_URL=http://localhost:8000~" ../.env + if grep -q "^API_EXTERNAL_URL=" .env; then + sed -i.bak "s~^API_EXTERNAL_URL=.*~API_EXTERNAL_URL=http://localhost:8000~" .env else - echo "API_EXTERNAL_URL=http://localhost:8000" >> ../.env + echo "API_EXTERNAL_URL=http://localhost:8000" >> .env fi - if grep -q "^SITE_URL=" ../.env; then - sed -i.bak "s~^SITE_URL=.*~SITE_URL=http://localhost:3011~" ../.env + if grep -q "^SITE_URL=" .env; then + sed -i.bak "s~^SITE_URL=.*~SITE_URL=http://localhost:3011~" .env else - echo "SITE_URL=http://localhost:3011" >> ../.env + echo "SITE_URL=http://localhost:3011" >> .env fi fi # Remove backup file - rm -f ../.env.bak + rm -f .env.bak - echo " ✅ Root .env file updated successfully with localhost development URLs" + echo " ✅ docker/.env updated successfully with localhost development URLs" } # Function to show summary diff --git a/docker/init.sql b/docker/init.sql index 2e494fc72..0668def01 100644 --- a/docker/init.sql +++ b/docker/init.sql @@ -177,6 +177,8 @@ CREATE TABLE IF NOT EXISTS "model_record_t" ( "tenant_id" varchar(100) COLLATE "pg_catalog"."default" DEFAULT 'tenant_id', "model_appid" varchar(100) COLLATE "pg_catalog"."default" DEFAULT '', "access_token" varchar(100) COLLATE "pg_catalog"."default" DEFAULT '', + "concurrency_limit" INTEGER DEFAULT NULL, + "timeout_seconds" INTEGER DEFAULT 120, CONSTRAINT "nexent_models_t_pk" PRIMARY KEY ("model_id") ); ALTER TABLE "model_record_t" OWNER TO "root"; @@ -202,6 +204,8 @@ COMMENT ON COLUMN "model_record_t"."created_by" IS 'Creator ID, audit field'; COMMENT ON COLUMN "model_record_t"."tenant_id" IS 'Tenant ID for filtering'; COMMENT ON COLUMN "model_record_t"."model_appid" IS 'Application ID for model authentication.'; COMMENT ON COLUMN "model_record_t"."access_token" IS 'Access token for model authentication.'; +COMMENT ON COLUMN "model_record_t"."concurrency_limit" IS 'Maximum concurrent requests for this model. Default is NULL (unlimited).'; +COMMENT ON COLUMN "model_record_t"."timeout_seconds" IS 'Request timeout in seconds for this model. Default is 120 seconds.'; COMMENT ON TABLE "model_record_t" IS 'List of models defined by users in the configuration page'; INSERT INTO "nexent"."model_record_t" ("model_repo", "model_name", "model_factory", "model_type", "api_key", "base_url", "max_tokens", "used_token", "display_name", "connect_status") VALUES ('', 'volcano_tts', 'OpenAI-API-Compatible', 'tts', '', '', 0, 0, 'volcano_tts', 'unavailable'); @@ -320,6 +324,8 @@ CREATE TABLE IF NOT EXISTS nexent.ag_tenant_agent_t ( model_id INTEGER, business_logic_model_name VARCHAR(100), business_logic_model_id INTEGER, + prompt_template_id INTEGER, + prompt_template_name VARCHAR(100), max_steps INTEGER, duty_prompt TEXT, constraint_prompt TEXT, @@ -370,6 +376,8 @@ COMMENT ON COLUMN nexent.ag_tenant_agent_t.model_name IS '[DEPRECATED] Name of t COMMENT ON COLUMN nexent.ag_tenant_agent_t.model_id IS 'Model ID, foreign key reference to model_record_t.model_id'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.business_logic_model_name IS 'Model name used for business logic prompt generation'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.business_logic_model_id IS 'Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_id IS 'Prompt template ID used for business logic prompt generation'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_name IS 'Prompt template name used for business logic prompt generation'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.max_steps IS 'Maximum number of steps'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.duty_prompt IS 'Duty prompt'; COMMENT ON COLUMN nexent.ag_tenant_agent_t.constraint_prompt IS 'Constraint prompt'; @@ -395,6 +403,97 @@ CREATE INDEX IF NOT EXISTS idx_ag_tenant_agent_t_is_new ON nexent.ag_tenant_agent_t (tenant_id, is_new) WHERE delete_flag = 'N'; +CREATE TABLE IF NOT EXISTS nexent.ag_prompt_template_t ( + template_id SERIAL PRIMARY KEY, + template_name VARCHAR(100) NOT NULL, + description VARCHAR(500), + template_type VARCHAR(50) NOT NULL DEFAULT 'agent_generate', + tenant_id VARCHAR(100) NOT NULL, + user_id VARCHAR(100) NOT NULL, + template_content_zh JSONB NOT NULL, + template_content_en JSONB, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.ag_prompt_template_t OWNER TO "root"; + +CREATE OR REPLACE FUNCTION update_ag_prompt_template_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER update_ag_prompt_template_update_time_trigger +BEFORE UPDATE ON nexent.ag_prompt_template_t +FOR EACH ROW +EXECUTE FUNCTION update_ag_prompt_template_update_time(); + +COMMENT ON TABLE nexent.ag_prompt_template_t IS 'Prompt template table for user-defined business logic generation prompts'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_id IS 'Prompt template ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_name IS 'Prompt template name'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.description IS 'Prompt template description'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_type IS 'Prompt template type'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.tenant_id IS 'Tenant ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.user_id IS 'User ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_zh IS 'Chinese prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_en IS 'English prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.created_by IS 'Creator'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.updated_by IS 'Updater'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.delete_flag IS 'Whether it is deleted. Optional values: Y/N'; + +CREATE UNIQUE INDEX IF NOT EXISTS uq_prompt_template_user_name_active +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_name) +WHERE delete_flag = 'N'; + +CREATE INDEX IF NOT EXISTS idx_ag_prompt_template_t_user +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_type) +WHERE delete_flag = 'N'; + +INSERT INTO nexent.ag_prompt_template_t ( + template_id, + template_name, + description, + template_type, + tenant_id, + user_id, + template_content_zh, + template_content_en, + created_by, + updated_by, + delete_flag +) +VALUES ( + 0, + 'system_default', + 'System default prompt template', + 'agent_generate', + 'tenant_id', + 'user_id', + '{}'::jsonb, + '{}'::jsonb, + 'user_id', + 'user_id', + 'N' +) +ON CONFLICT (template_id) DO UPDATE SET + template_name = EXCLUDED.template_name, + description = EXCLUDED.description, + template_type = EXCLUDED.template_type, + tenant_id = EXCLUDED.tenant_id, + user_id = EXCLUDED.user_id, + template_content_zh = EXCLUDED.template_content_zh, + template_content_en = EXCLUDED.template_content_en, + updated_by = EXCLUDED.updated_by, + delete_flag = 'N'; + -- Create the ag_tool_instance_t table in the nexent schema CREATE TABLE IF NOT EXISTS nexent.ag_tool_instance_t ( @@ -506,6 +605,14 @@ CREATE TABLE IF NOT EXISTS nexent.mcp_record_t ( status BOOLEAN DEFAULT NULL, container_id VARCHAR(200) DEFAULT NULL, authorization_token VARCHAR(500) DEFAULT NULL, + custom_headers JSON DEFAULT NULL, + source VARCHAR(30), + registry_json JSONB, + config_json JSON, + enabled BOOLEAN DEFAULT TRUE, + tags TEXT[], + description TEXT, + container_port INTEGER, create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, created_by VARCHAR(100), @@ -525,11 +632,19 @@ COMMENT ON COLUMN nexent.mcp_record_t.mcp_server IS 'MCP server address'; COMMENT ON COLUMN nexent.mcp_record_t.status IS 'MCP server connection status, true=connected, false=disconnected, null=unknown'; COMMENT ON COLUMN nexent.mcp_record_t.container_id IS 'Docker container ID for MCP service, NULL for non-containerized MCP'; COMMENT ON COLUMN nexent.mcp_record_t.authorization_token IS 'Authorization token for MCP server authentication (e.g., Bearer token)'; +COMMENT ON COLUMN nexent.mcp_record_t.custom_headers IS 'Custom HTTP headers as JSON object for MCP server requests'; COMMENT ON COLUMN nexent.mcp_record_t.create_time IS 'Creation time, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.update_time IS 'Update time, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.created_by IS 'Creator ID, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.updated_by IS 'Last updater ID, audit field'; COMMENT ON COLUMN nexent.mcp_record_t.delete_flag IS 'When deleted by user frontend, delete flag will be set to true, achieving soft delete effect. Optional values Y/N'; +COMMENT ON COLUMN nexent.mcp_record_t.source IS 'Source type: local/mcp_registry/community'; +COMMENT ON COLUMN nexent.mcp_record_t.registry_json IS 'Full MCP registry server.json snapshot'; +COMMENT ON COLUMN nexent.mcp_record_t.config_json IS 'MCP config data'; +COMMENT ON COLUMN nexent.mcp_record_t.enabled IS 'Enabled'; +COMMENT ON COLUMN nexent.mcp_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_record_t.container_port IS 'Host port bound for containerized MCP service'; -- Create a function to update the update_time column CREATE OR REPLACE FUNCTION update_mcp_record_update_time() @@ -552,6 +667,19 @@ EXECUTE FUNCTION update_mcp_record_update_time(); -- Add comment to the trigger COMMENT ON TRIGGER update_mcp_record_update_time_trigger ON nexent.mcp_record_t IS 'Trigger to call update_mcp_record_update_time function before each update on mcp_record_t table'; +-- Add indexes for common management queries +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_delete + ON nexent.mcp_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_name + ON nexent.mcp_record_t (tenant_id, mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_server + ON nexent.mcp_record_t (tenant_id, mcp_server, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tags_gin + ON nexent.mcp_record_t USING GIN (tags); + -- Create user tenant relationship table CREATE TABLE IF NOT EXISTS nexent.user_tenant_t ( user_tenant_id SERIAL PRIMARY KEY, @@ -694,7 +822,7 @@ COMMENT ON COLUMN nexent.tenant_invitation_code_t.group_ids IS 'Associated group COMMENT ON COLUMN nexent.tenant_invitation_code_t.capacity IS 'Invitation code capacity'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.expiry_date IS 'Invitation code expiry date'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.status IS 'Invitation code status: IN_USE, EXPIRE, DISABLE, RUN_OUT'; -COMMENT ON COLUMN nexent.tenant_invitation_code_t.code_type IS 'Invitation code type: ADMIN_INVITE, DEV_INVITE, USER_INVITE'; +COMMENT ON COLUMN nexent.tenant_invitation_code_t.code_type IS 'Invitation code type: ADMIN_INVITE, DEV_INVITE, USER_INVITE, ASSET_OWNER_INVITE'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.create_time IS 'Create time'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.update_time IS 'Update time'; COMMENT ON COLUMN nexent.tenant_invitation_code_t.created_by IS 'Created by'; @@ -975,7 +1103,42 @@ INSERT INTO nexent.role_permission_t (role_permission_id, user_role, permission_ (184, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'CREATE'), (185, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'READ'), (186, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'UPDATE'), -(187, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'DELETE'); +(187, 'SPEED', 'RESOURCE', 'TENANT.INVITE', 'DELETE'), +(188, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'CREATE'), +(189, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'READ'), +(190, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'UPDATE'), +(191, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'DELETE'), +(192, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/'), +(193, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/agents'), +(194, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/knowledges'), +(195, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/chat'), +(196, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/space'), +(197, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/market'), +(198, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/models'), +(199, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'CREATE'), +(200, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'READ'), +(201, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'UPDATE'), +(202, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'DELETE'), +(203, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'CREATE'), +(204, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'READ'), +(205, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'UPDATE'), +(206, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'DELETE'), +(207, 'ASSET_OWNER', 'RESOURCE', 'KB', 'CREATE'), +(208, 'ASSET_OWNER', 'RESOURCE', 'KB', 'READ'), +(209, 'ASSET_OWNER', 'RESOURCE', 'KB', 'UPDATE'), +(210, 'ASSET_OWNER', 'RESOURCE', 'KB', 'DELETE'), +(211, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'CREATE'), +(212, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'READ'), +(213, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'UPDATE'), +(214, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'DELETE'), +(215, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'CREATE'), +(216, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'READ'), +(217, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'UPDATE'), +(218, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'DELETE'), +(219, 'ASSET_OWNER', 'RESOURCE', 'USER.ROLE', 'READ'), +(220, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/users'), +(221, 'SU', 'VISIBILITY', 'LEFT_NAV_MENU', '/asset-owner-resources') +; -- Insert SPEED role user into user_tenant_t table if not exists INSERT INTO nexent.user_tenant_t (user_id, tenant_id, user_role, user_email, created_by, updated_by) @@ -1090,11 +1253,14 @@ COMMENT ON COLUMN nexent.user_token_usage_log_t.delete_flag IS 'Soft delete flag CREATE TABLE IF NOT EXISTS nexent.ag_skill_info_t ( skill_id SERIAL4 PRIMARY KEY NOT NULL, skill_name VARCHAR(100) NOT NULL, + tenant_id VARCHAR(100), skill_description VARCHAR(1000), skill_tags JSON, skill_content TEXT, - params JSON, + config_schemas JSON, + config_values JSON, source VARCHAR(30) DEFAULT 'official', + tenant_id VARCHAR(100), created_by VARCHAR(100), create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_by VARCHAR(100), @@ -1109,11 +1275,13 @@ COMMENT ON TABLE nexent.ag_skill_info_t IS 'Skill information table for managing -- Add comments to the columns COMMENT ON COLUMN nexent.ag_skill_info_t.skill_id IS 'Skill ID, unique primary key'; -COMMENT ON COLUMN nexent.ag_skill_info_t.skill_name IS 'Skill name, globally unique'; +COMMENT ON COLUMN nexent.ag_skill_info_t.skill_name IS 'Skill name, unique within tenant'; +COMMENT ON COLUMN nexent.ag_skill_info_t.tenant_id IS 'Tenant ID for multi-tenancy. NULL for pre-existing skills.'; COMMENT ON COLUMN nexent.ag_skill_info_t.skill_description IS 'Skill description text'; COMMENT ON COLUMN nexent.ag_skill_info_t.skill_tags IS 'Skill tags stored as JSON array'; COMMENT ON COLUMN nexent.ag_skill_info_t.skill_content IS 'Skill content or prompt text'; -COMMENT ON COLUMN nexent.ag_skill_info_t.params IS 'Skill configuration parameters stored as JSON object'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_schemas IS 'Parameter metadata from config/schema.yaml'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_values IS 'Runtime parameter values from config/config.yaml'; COMMENT ON COLUMN nexent.ag_skill_info_t.source IS 'Skill source: official, custom, or partner'; COMMENT ON COLUMN nexent.ag_skill_info_t.created_by IS 'Creator ID'; COMMENT ON COLUMN nexent.ag_skill_info_t.create_time IS 'Creation timestamp'; @@ -1159,6 +1327,8 @@ CREATE TABLE IF NOT EXISTS nexent.ag_skill_instance_t ( tenant_id VARCHAR(100), enabled BOOLEAN DEFAULT TRUE, version_no INTEGER DEFAULT 0 NOT NULL, + config_values JSON, + config_schemas JSON, created_by VARCHAR(100), create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_by VARCHAR(100), @@ -1180,6 +1350,8 @@ COMMENT ON COLUMN nexent.ag_skill_instance_t.user_id IS 'User ID'; COMMENT ON COLUMN nexent.ag_skill_instance_t.tenant_id IS 'Tenant ID'; COMMENT ON COLUMN nexent.ag_skill_instance_t.enabled IS 'Whether this skill is enabled for the agent'; COMMENT ON COLUMN nexent.ag_skill_instance_t.version_no IS 'Version number. 0 = draft/editing state, >=1 = published snapshot'; +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_values IS 'Per-agent runtime parameter values from config/config.yaml'; +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_schemas IS 'Per-agent parameter schema overrides from config/schema.yaml'; COMMENT ON COLUMN nexent.ag_skill_instance_t.created_by IS 'Creator ID'; COMMENT ON COLUMN nexent.ag_skill_instance_t.create_time IS 'Creation timestamp'; COMMENT ON COLUMN nexent.ag_skill_instance_t.updated_by IS 'Last updater ID'; @@ -1639,7 +1811,7 @@ EXECUTE FUNCTION update_user_oauth_account_t_update_time(); COMMENT ON TABLE nexent.user_oauth_account_t IS 'User OAuth account table - third-party login bindings'; COMMENT ON COLUMN nexent.user_oauth_account_t.oauth_account_id IS 'OAuth account ID, primary key'; COMMENT ON COLUMN nexent.user_oauth_account_t.user_id IS 'Nexent user ID (Supabase UUID)'; -COMMENT ON COLUMN nexent.user_oauth_account_t.provider IS 'OAuth provider name: github, wechat'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider IS 'OAuth provider name: github, wechat, gde, link_app'; COMMENT ON COLUMN nexent.user_oauth_account_t.provider_user_id IS 'User ID from the OAuth provider'; COMMENT ON COLUMN nexent.user_oauth_account_t.provider_email IS 'Email from the OAuth provider'; COMMENT ON COLUMN nexent.user_oauth_account_t.provider_username IS 'Display name from the OAuth provider'; @@ -1653,3 +1825,78 @@ COMMENT ON COLUMN nexent.user_oauth_account_t.delete_flag IS 'Whether it is dele -- Create index for user_id queries CREATE INDEX IF NOT EXISTS idx_user_oauth_account_t_user_id ON nexent.user_oauth_account_t (user_id); + +-- mcp_community_record_t: Community MCP market table +CREATE TABLE IF NOT EXISTS nexent.mcp_community_record_t ( + community_id SERIAL PRIMARY KEY NOT NULL, + tenant_id VARCHAR(100), + user_id VARCHAR(100), + mcp_name VARCHAR(100) NOT NULL, + mcp_server VARCHAR(500) NOT NULL, + source VARCHAR(30) DEFAULT 'community', + version VARCHAR(50), + registry_json JSONB, + transport_type VARCHAR(30), + config_json JSON, + tags TEXT[], + description TEXT, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.mcp_community_record_t OWNER TO root; + +COMMENT ON TABLE nexent.mcp_community_record_t IS 'Community MCP market records, publishable from tenant MCP services'; +COMMENT ON COLUMN nexent.mcp_community_record_t.community_id IS 'Community record ID, unique primary key'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tenant_id IS 'Publisher tenant ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.user_id IS 'Publisher user ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_name IS 'MCP name'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_server IS 'MCP server URL'; +COMMENT ON COLUMN nexent.mcp_community_record_t.source IS 'Source type, fixed to community for this table'; +COMMENT ON COLUMN nexent.mcp_community_record_t.version IS 'MCP version'; +COMMENT ON COLUMN nexent.mcp_community_record_t.registry_json IS 'Full MCP server metadata JSON for discovery and quick import'; +COMMENT ON COLUMN nexent.mcp_community_record_t.transport_type IS 'Transport type: url/container'; +COMMENT ON COLUMN nexent.mcp_community_record_t.config_json IS 'Public-shareable MCP configuration JSON'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_community_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_community_record_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.created_by IS 'Creator ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.updated_by IS 'Updater ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.delete_flag IS 'Soft delete flag: Y/N'; + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tenant_delete + ON nexent.mcp_community_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_name_delete + ON nexent.mcp_community_record_t (mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_transport_delete + ON nexent.mcp_community_record_t (transport_type, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_user_delete + ON nexent.mcp_community_record_t (user_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tags_gin + ON nexent.mcp_community_record_t USING GIN (tags); + +CREATE OR REPLACE FUNCTION update_mcp_community_record_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION update_mcp_community_record_update_time() IS 'Auto-update update_time for mcp_community_record_t'; + +DROP TRIGGER IF EXISTS update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t; +CREATE TRIGGER update_mcp_community_record_update_time_trigger +BEFORE UPDATE ON nexent.mcp_community_record_t +FOR EACH ROW +EXECUTE FUNCTION update_mcp_community_record_update_time(); + +COMMENT ON TRIGGER update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t IS 'Trigger to maintain update_time'; diff --git a/docker/install-skills.sh b/docker/install-skills.sh deleted file mode 100644 index 565887df8..000000000 --- a/docker/install-skills.sh +++ /dev/null @@ -1,347 +0,0 @@ -#!/bin/bash - -# Script to install built-in skills from official-skills-zip directory -# This script should be called from deploy.sh with necessary environment variables - -# Note: We don't use set -e here because we want to handle errors gracefully - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -SKILLS_ZIP_DIR="$SCRIPT_DIR/official-skills-zip" -TOKEN_FILE="$SCRIPT_DIR/.access_token" - -# Source environment variables if .env file exists -if [ -f "$SCRIPT_DIR/.env" ]; then - set -a - source "$SCRIPT_DIR/.env" - set +a -fi - -sanitize_input() { - local input="$1" - printf "%s" "$input" | tr -d '\r' -} - -cleanup_token() { - # Securely remove access token files and clear variables - if [ -f "$TOKEN_FILE" ]; then - shred -f -u "$TOKEN_FILE" 2>/dev/null || rm -f "$TOKEN_FILE" - fi - unset ACCESS_TOKEN USER_PASSWORD -} - -# Cleanup on exit -trap cleanup_token EXIT INT TERM - -get_access_token() { - # Get access token based on user existence - # Returns: access_token ONLY (no log messages to stdout) - - local email="$1" - local password="$2" - - # Check if super admin user exists - local check_result - check_super_admin_user_exists "$email" - check_result=$? - - if [ $check_result -eq 0 ]; then - # User exists, sign in to get access token - local response - response=$(docker exec nexent-config bash -c "curl -s -X POST http://kong:8000/auth/v1/token?grant_type=password -H \"apikey: ${SUPABASE_KEY}\" -H \"Content-Type: application/json\" -d '{\"email\":\"${email}\",\"password\":\"${password}\"}'" 2>/dev/null) - - if echo "$response" | grep -q '"access_token"'; then - # Extract access_token ONLY - local access_token - access_token=$(echo "$response" | grep -o '"access_token":"[^"]*"' | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') - unset response - echo "$access_token" - return 0 - else - unset response - echo " ❌ Failed to get access token from sign in response." >&2 - return 1 - fi - else - echo " ❌ Super admin user does not exist. Cannot get access token." >&2 - return 1 - fi -} - -check_super_admin_user_exists() { - # Check if super admin user exists in Supabase - local email="${1:-suadmin@nexent.com}" - - # Determine which container to use for curl command - local curl_container="nexent-config" - if [ "$DEPLOYMENT_MODE" = "infrastructure" ] || ! docker ps | grep -q "nexent-config"; then - if docker ps | grep -q "supabase-db-mini"; then - curl_container="supabase-db-mini" - else - return 2 # Unknown status - fi - fi - - # Try to query Supabase auth.users table directly (most reliable) - if [ "$DEPLOYMENT_VERSION" = "full" ] && docker ps | grep -q "supabase-db-mini"; then - local user_exists - user_exists=$(docker exec supabase-db-mini psql -U postgres -d "$SUPABASE_POSTGRES_DB" -t -c "SELECT COUNT(*) FROM auth.users WHERE email = '${email}';" 2>/dev/null | tr -d '[:space:]') - if [ "$user_exists" = "1" ]; then - return 0 # User exists - elif [ "$user_exists" = "0" ]; then - return 1 # User does not exist - fi - fi - - # Fallback: Try to sign in with a dummy password to check if user exists - local test_response - test_response=$(docker exec "$curl_container" bash -c "curl -s -X POST http://kong:8000/auth/v1/token?grant_type=password -H \"apikey: ${SUPABASE_KEY}\" -H \"Content-Type: application/json\" -d '{\"email\":\"${email}\",\"password\":\"dummy_password_check\"}'" 2>/dev/null) - - if echo "$test_response" | grep -q '"error_code":"invalid_credentials"'; then - return 0 # User exists (wrong password means user exists) - elif echo "$test_response" | grep -q '"error_code":"email_not_confirmed"'; then - return 0 # User exists - else - return 1 # User likely does not exist - fi -} - -install_skills() { - # Main function to install built-in skills - local access_token="$1" - - echo "🔧 Installing built-in skills..." - - # Check if skills zip directory exists - if [ ! -d "$SKILLS_ZIP_DIR" ]; then - echo " ⚠️ Warning: official-skills-zip directory not found at $SKILLS_ZIP_DIR" - echo " 💡 Please ensure the skills zip files are available." - return 1 - fi - - # Collect all zip files into an array - local skills_to_install=() - local skill_file - for skill_file in "$SKILLS_ZIP_DIR"/*.zip; do - if [ -f "$skill_file" ]; then - skills_to_install+=("$skill_file") - fi - done - - if [ ${#skills_to_install[@]} -eq 0 ]; then - echo " ⚠️ Warning: No skill zip files found in $SKILLS_ZIP_DIR" - return 1 - fi - - echo " 📦 Found ${#skills_to_install[@]} skills to install:" - local idx - for idx in "${!skills_to_install[@]}"; do - local skill_name - skill_name=$(basename "${skills_to_install[$idx]}" .zip) - echo " $((idx + 1)). $skill_name" - done - echo "" - - # Wait for nexent-config container to be ready - echo " ⏳ Waiting for nexent-config container to be ready..." - local retries=0 - local max_retries=60 - while ! docker exec nexent-config echo "ready" >/dev/null 2>&1 && [ $retries -lt $max_retries ]; do - echo " ⏳ Waiting for nexent-config... (attempt $((retries + 1))/$max_retries)" - sleep 5 - retries=$((retries + 1)) - done - - if [ $retries -eq $max_retries ]; then - echo " ❌ Error: nexent-config container is not available" - return 1 - fi - echo " ✅ nexent-config container is ready" - - # Query installed skills to skip already installed ones - echo "" - echo " 📋 Checking installed skills..." - local installed_skills="" - local list_result - list_result=$(docker exec nexent-config bash -c \ - "curl -s -X GET 'http://localhost:5010/skills' \ - -H \"Authorization: Bearer ${access_token}\" \ - -H 'Content-Type: application/json' 2>&1") - - if echo "$list_result" | grep -q '"skills"'; then - # Extract skill names from the response - installed_skills=$(echo "$list_result" | grep -o '"name":"[^"]*"' | sed 's/"name":"//g' | sed 's/"//g' | tr '\n' ' ') - echo " ✅ Found $(echo "$installed_skills" | wc -w) installed skills" - else - echo " ⚠️ Could not fetch installed skills list, will install all" - # Log for debugging - echo " [DEBUG] List response: $list_result" >> /tmp/install-debug.log 2>/dev/null - fi - - # Copy skills zip files to container's temp directory - local temp_dir="/tmp/official-skills-zip" - echo "" - echo " 📦 Copying skill files to container..." - local all_copied=true - local skip_copy_count=0 - for skill_file in "${skills_to_install[@]}"; do - local skill_name - skill_name=$(basename "$skill_file" .zip) - - # Check if skill is already installed - if echo "$installed_skills" | grep -qw "$skill_name"; then - echo " ⏭️ $skill_name - skipped" - skip_copy_count=$((skip_copy_count + 1)) - continue - fi - - # Create temp directory first - docker exec nexent-config bash -c "mkdir -p $temp_dir && chmod 777 $temp_dir" >/dev/null 2>&1 - - # Copy file - if docker cp "$skill_file" "nexent-config:${temp_dir}/${skill_name}.zip" 2>/dev/null; then - echo -n " Copying $skill_name... ✅" - echo "" - else - echo -n " Copying $skill_name... ❌" - echo "" - echo " Failed to copy file to container" - all_copied=false - fi - done - - if [ "$all_copied" = false ]; then - echo " ⚠️ Some files failed to copy" - fi - - # Install each skill - echo "" - echo " 🚀 Installing skills..." - local success_count=0 - local fail_count=0 - local skip_count=0 - - for skill_file in "${skills_to_install[@]}"; do - local skill_name - skill_name=$(basename "$skill_file" .zip) - local full_path="${temp_dir}/${skill_name}.zip" - - # Check if skill is already installed - if echo "$installed_skills" | grep -qw "$skill_name"; then - echo " ⏭️ $skill_name - skipped" - skip_count=$((skip_count + 1)) - continue - fi - - echo -n " Installing $skill_name... " - - # Check if file exists in container - local file_exists - local file_size - file_exists=$(docker exec nexent-config bash -c "test -f '${full_path}' && echo 'yes' || echo 'no'" 2>/dev/null) - file_size=$(docker exec nexent-config bash -c "stat -c%s '${full_path}' 2>/dev/null || stat -f%z '${full_path}' 2>/dev/null || echo 'unknown'" 2>/dev/null) - - if [ "$file_exists" != "yes" ]; then - echo "❌" - echo " File not found in container at ${full_path}" - fail_count=$((fail_count + 1)) - continue - fi - - if [ "$file_size" = "0" ] || [ "$file_size" = "unknown" ]; then - echo "❌" - echo " File is empty or size unknown (${file_size} bytes)" - fail_count=$((fail_count + 1)) - continue - fi - - # Call the upload API with source="官方" - local result - local debug_log="/tmp/install-debug.log" - - # Log the request details - echo " [DEBUG] Uploading: $skill_name" >> "$debug_log" - echo " File: $full_path" >> "$debug_log" - echo " Token prefix: ${access_token:0:20}..." >> "$debug_log" - - # Run curl - variables must be in double quotes to expand - result=$(docker exec nexent-config bash -c \ - "curl -v -X POST 'http://localhost:5010/skills/upload' \ - -H \"Authorization: Bearer ${access_token}\" \ - -F \"file=@${full_path}\" \ - -F 'source=官方' 2>&1") - local curl_exit_code=$? - - echo " Curl exit code: $curl_exit_code" >> "$debug_log" - echo " Response: $result" >> "$debug_log" - echo "---" >> "$debug_log" - - # Check if installation was successful - if echo "$result" | grep -q '"success":true\|"id"\|"name"\|"skill_id"'; then - echo "✅" - success_count=$((success_count + 1)) - elif echo "$result" | grep -q '"error"\|"message"\|"detail"'; then - echo "❌" - # Extract error message - local error_msg - error_msg=$(echo "$result" | grep -o '"message":"[^"]*"\|"detail":"[^"]*"' | head -1 | sed 's/"//g' | cut -d':' -f2-) - if [ -z "$error_msg" ]; then - error_msg="$result" - fi - echo " $error_msg" - fail_count=$((fail_count + 1)) - elif echo "$result" | grep -q '{.*}' 2>/dev/null; then - echo "✅" - success_count=$((success_count + 1)) - else - echo "❌" - echo " Unknown response: $result" - fail_count=$((fail_count + 1)) - fi - done - - # Cleanup temp directory - docker exec nexent-config bash -c "rm -rf $temp_dir" 2>/dev/null - - echo "" - echo " 📊 Installation Summary:" - echo " ⏭️ Skipped: $skip_count" - echo " ✅ Success: $success_count" - echo " ❌ Failed: $fail_count" - echo "" -} - -# Main execution -if [ $# -lt 1 ]; then - echo "Usage: $0 [email] [password]" - echo " access_token: Bearer token for API authentication (required)" - echo " email: User email for sign-in (optional, for existing users)" - echo " password: User password for sign-in (optional, for existing users)" - exit 1 -fi - -ACCESS_TOKEN="$1" -USER_EMAIL="${2:-suadmin@nexent.com}" -USER_PASSWORD="$3" - -# If access token is "GET_TOKEN", we need to get it via sign-in -if [ "$ACCESS_TOKEN" = "GET_TOKEN" ]; then - if [ -z "$USER_PASSWORD" ]; then - echo "❌ Error: Password required to get access token for existing user." - exit 1 - fi - - echo -n "🔐 Getting access token... " - ACCESS_TOKEN=$(get_access_token "$USER_EMAIL" "$USER_PASSWORD") - if [ -z "$ACCESS_TOKEN" ]; then - echo "❌" - echo "❌ Error: Failed to get access token." - exit 1 - fi - echo "✅" -fi - -if install_skills "$ACCESS_TOKEN"; then - exit 0 -else - exit 1 -fi diff --git a/docker/monitoring/grafana/dashboards/nexent-llm-agent.json b/docker/monitoring/grafana/dashboards/nexent-llm-agent.json new file mode 100644 index 000000000..d4e2c321b --- /dev/null +++ b/docker/monitoring/grafana/dashboards/nexent-llm-agent.json @@ -0,0 +1,150 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Nexent Agent traces backed by Grafana Tempo.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": true, + "tags": [], + "targetBlank": false, + "title": "Open Tempo Explore", + "tooltip": "Open Grafana Explore with the Tempo datasource", + "type": "link", + "url": "/explore?left=%7B%22datasource%22:%22Tempo%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22query%22:%22%7B%20resource.service.name%20%3D%20%5C%22nexent-backend%5C%22%20%7D%22,%22queryType%22:%22traceql%22%7D%5D%7D" + } + ], + "panels": [ + { + "datasource": { + "type": "tempo", + "uid": "Tempo" + }, + "description": "Recent traces for Nexent backend. Open a trace row to inspect the agent, chain, LLM, and tool span waterfall.", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 16, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "Tempo" + }, + "limit": 100, + "query": "{ resource.service.name = \"nexent-backend\" }", + "queryType": "traceql", + "refId": "A", + "tableType": "traces" + } + ], + "title": "Recent Agent Traces", + "type": "table" + }, + { + "description": "TraceQL shortcuts for common Nexent views.", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 2, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Service traces:\n{ resource.service.name = \"nexent-backend\" }\n\nAgent spans:\n{ resource.service.name = \"nexent-backend\" && span.openinference.span.kind = \"AGENT\" }\n\nLLM spans:\n{ resource.service.name = \"nexent-backend\" && span.openinference.span.kind = \"LLM\" }\n\nTool spans:\n{ resource.service.name = \"nexent-backend\" && span.openinference.span.kind = \"TOOL\" }\n\nError traces:\n{ resource.service.name = \"nexent-backend\" && status = error }", + "mode": "markdown" + }, + "pluginVersion": "11.0.0", + "title": "TraceQL Examples", + "type": "text" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "nexent", + "agent", + "tempo" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Nexent Agent Trace Monitoring", + "uid": "nexent-llm-agent", + "version": 1, + "weekStart": "" +} diff --git a/docker/monitoring/grafana/dashboards/nexent-llm-performance.json b/docker/monitoring/grafana/dashboards/nexent-llm-performance.json deleted file mode 100644 index ec8d0434a..000000000 --- a/docker/monitoring/grafana/dashboards/nexent-llm-performance.json +++ /dev/null @@ -1,544 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": null, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.95, rate(llm_request_duration_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "95th percentile", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.50, rate(llm_request_duration_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "50th percentile (median)", - "refId": "B" - } - ], - "title": "LLM Request Duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "tokens/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.95, rate(llm_token_generation_rate_bucket[5m]))", - "interval": "", - "legendFormat": "95th percentile", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.50, rate(llm_token_generation_rate_bucket[5m]))", - "interval": "", - "legendFormat": "50th percentile (median)", - "refId": "B" - } - ], - "title": "Token Generation Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.95, rate(llm_time_to_first_token_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "95th percentile TTFT", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.50, rate(llm_time_to_first_token_seconds_bucket[5m]))", - "interval": "", - "legendFormat": "50th percentile TTFT", - "refId": "B" - } - ], - "title": "Time to First Token (TTFT)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "tokens" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(llm_total_tokens_total{type=\"input\"}[5m])", - "interval": "", - "legendFormat": "Input tokens/sec", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(llm_total_tokens_total{type=\"output\"}[5m])", - "interval": "", - "legendFormat": "Output tokens/sec", - "refId": "B" - } - ], - "title": "Token Throughput", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "errors/sec" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(llm_error_count_total[5m])", - "interval": "", - "legendFormat": "Error rate by model: {{model}}", - "refId": "A" - } - ], - "title": "LLM Error Rate", - "type": "timeseries" - } - ], - "refresh": "5s", - "schemaVersion": 37, - "style": "dark", - "tags": ["nexent", "llm", "performance"], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Nexent LLM Performance Dashboard", - "uid": "nexent-llm-perf", - "version": 1, - "weekStart": "" -} - diff --git a/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml b/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml index b89a1fa81..b863e9d16 100644 --- a/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml +++ b/docker/monitoring/grafana/provisioning/dashboards/dashboards.yml @@ -1,13 +1,12 @@ apiVersion: 1 providers: - - name: 'Nexent LLM Monitoring' + - name: Nexent Monitoring orgId: 1 - folder: 'Nexent' + folder: Nexent type: file disableDeletion: false - updateIntervalSeconds: 10 + updateIntervalSeconds: 30 allowUiUpdates: true options: path: /var/lib/grafana/dashboards - diff --git a/docker/monitoring/grafana/provisioning/datasources/datasources.yml b/docker/monitoring/grafana/provisioning/datasources/datasources.yml index 9bdc40d61..d23e4cba9 100644 --- a/docker/monitoring/grafana/provisioning/datasources/datasources.yml +++ b/docker/monitoring/grafana/provisioning/datasources/datasources.yml @@ -1,16 +1,23 @@ apiVersion: 1 datasources: - - name: Prometheus - type: prometheus + - name: Tempo + uid: Tempo + type: tempo access: proxy - url: http://prometheus:9090 + url: http://nexent-tempo:3200 isDefault: true editable: true - - - name: Jaeger - type: jaeger - access: proxy - url: http://jaeger:16686 - editable: true - + basicAuth: false + jsonData: + nodeGraph: + enabled: true + search: + hide: false + traceQuery: + timeShiftEnabled: true + spanStartTimeShift: "-1h" + spanEndTimeShift: "1h" + streamingEnabled: + search: false + metrics: false diff --git a/docker/monitoring/monitoring.env b/docker/monitoring/monitoring.env deleted file mode 100644 index 2506c03a6..000000000 --- a/docker/monitoring/monitoring.env +++ /dev/null @@ -1,21 +0,0 @@ -# Telemetry and Monitoring Configuration -ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -PROMETHEUS_PORT=8000 -TELEMETRY_SAMPLE_RATE=1.0 - -# Performance monitoring thresholds -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 - -# Grafana Configuration -GF_SECURITY_ADMIN_PASSWORD=admin -GF_USERS_ALLOW_SIGN_UP=false - -# Service ports -JAEGER_UI_PORT=16686 -PROMETHEUS_UI_PORT=9090 -GRAFANA_UI_PORT=3000 -OTEL_COLLECTOR_GRPC_PORT=4317 -OTEL_COLLECTOR_HTTP_PORT=4318 diff --git a/docker/monitoring/monitoring.env.example b/docker/monitoring/monitoring.env.example index 26ab041c8..17f75a3c9 100644 --- a/docker/monitoring/monitoring.env.example +++ b/docker/monitoring/monitoring.env.example @@ -1,22 +1,72 @@ -# Telemetry and Monitoring Configuration -ENABLE_TELEMETRY=true -SERVICE_NAME=nexent-backend -JAEGER_ENDPOINT=http://localhost:14268/api/traces -PROMETHEUS_PORT=8000 -TELEMETRY_SAMPLE_RATE=1.0 +# Monitoring stack selector for ./start-monitoring.sh. +# Supported values: otlp, collector, phoenix, langfuse, langsmith, grafana, zipkin. +MONITORING_PROVIDER=otlp -# Performance monitoring thresholds -LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0 -LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0 - -# Grafana Configuration -GF_SECURITY_ADMIN_PASSWORD=admin -GF_USERS_ALLOW_SIGN_UP=false - -# Service ports -JAEGER_UI_PORT=16686 -PROMETHEUS_UI_PORT=9090 -GRAFANA_UI_PORT=3000 OTEL_COLLECTOR_GRPC_PORT=4317 OTEL_COLLECTOR_HTTP_PORT=4318 +OTEL_COLLECTOR_CONFIG_FILE= +OTEL_COLLECTOR_VERSION=0.151.0 + +# Local Phoenix stack. Used by: ./start-monitoring.sh --stack phoenix +PHOENIX_VERSION=15 +PHOENIX_PORT=6006 +PHOENIX_GRPC_HOST_PORT=4319 + +# Local Langfuse stack. Used by: ./start-monitoring.sh --stack langfuse +# Defaults are for local development only. Replace secrets before production use. +LANGFUSE_VERSION=3 +LANGFUSE_PORT=3001 +LANGFUSE_NEXTAUTH_URL=http://localhost:3001 +LANGFUSE_NEXTAUTH_SECRET=nexent-langfuse-secret +LANGFUSE_SALT=nexent-langfuse-salt +LANGFUSE_ENCRYPTION_KEY=0000000000000000000000000000000000000000000000000000000000000000 +LANGFUSE_TELEMETRY_ENABLED=false +LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES=false +LANGFUSE_INIT_ORG_ID=nexent +LANGFUSE_INIT_ORG_NAME=Nexent +LANGFUSE_INIT_PROJECT_ID=nexent +LANGFUSE_INIT_PROJECT_NAME=Nexent +LANGFUSE_INIT_PROJECT_PUBLIC_KEY=pk-lf-nexent-local +LANGFUSE_INIT_PROJECT_SECRET_KEY=sk-lf-nexent-local +LANGFUSE_INIT_USER_EMAIL=admin@nexent.com +LANGFUSE_INIT_USER_NAME=admin +LANGFUSE_INIT_USER_PASSWORD=nexent@4321 +LANGFUSE_OTLP_AUTH_HEADER= +LANGFUSE_POSTGRES_VERSION=15-alpine +LANGFUSE_POSTGRES_USER=postgres +LANGFUSE_POSTGRES_PASSWORD=nexent@4321 +LANGFUSE_POSTGRES_DB=postgres +LANGFUSE_POSTGRES_PORT=5440 +LANGFUSE_CLICKHOUSE_VERSION=26.3-alpine +LANGFUSE_CLICKHOUSE_USER=clickhouse +LANGFUSE_CLICKHOUSE_PASSWORD=clickhouse +LANGFUSE_CLICKHOUSE_HTTP_PORT=8124 +LANGFUSE_CLICKHOUSE_NATIVE_PORT=9002 +LANGFUSE_MINIO_VERSION=RELEASE.2023-12-20T01-00-02Z +LANGFUSE_MINIO_ROOT_USER=minio +LANGFUSE_MINIO_ROOT_PASSWORD=miniosecret +LANGFUSE_MINIO_API_PORT=9092 +LANGFUSE_MINIO_CONSOLE_PORT=9093 +LANGFUSE_S3_BUCKET=langfuse +LANGFUSE_REDIS_AUTH=myredissecret +LANGFUSE_REDIS_VERSION=alpine +LANGFUSE_REDIS_PORT=6380 + +# Online LangSmith forwarding. Used by: ./start-monitoring.sh --stack langsmith +# LangSmith currently ingests OTLP traces. Metrics remain in the Collector debug pipeline. +LANGSMITH_API_KEY= +LANGSMITH_PROJECT=nexent +LANGSMITH_OTLP_TRACES_ENDPOINT=https://api.smith.langchain.com/otel/v1/traces + +# Local Grafana stack. Used by: ./start-monitoring.sh --stack grafana +GRAFANA_VERSION=12.4 +GRAFANA_PORT=3002 +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=nexent@4321 +GRAFANA_DEFAULT_LANGUAGE=zh-Hans +TEMPO_VERSION=2.10.5 +TEMPO_PORT=3200 +# Local Zipkin stack. Used by: ./start-monitoring.sh --stack zipkin +ZIPKIN_VERSION=latest +ZIPKIN_PORT=9411 diff --git a/docker/monitoring/otel-collector-config.yml b/docker/monitoring/otel-collector-config.yml index f14f427b5..8d2332361 100644 --- a/docker/monitoring/otel-collector-config.yml +++ b/docker/monitoring/otel-collector-config.yml @@ -5,22 +5,16 @@ receivers: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 - - # Prometheus receiver to collect metrics from instrumented apps - prometheus: - config: - scrape_configs: - - job_name: 'nexent-backend-otel' - static_configs: - - targets: ['host.docker.internal:8000'] - scrape_interval: 5s processors: batch: timeout: 1s send_batch_size: 512 - - # Resource processor to add common attributes + + memory_limiter: + limit_mib: 256 + check_interval: 1s + resource: attributes: - key: service.name @@ -30,51 +24,71 @@ processors: from_attribute: version action: insert - # Memory limiter to prevent OOM - memory_limiter: - limit_mib: 256 - check_interval: 1s - - # Add attributes specifically for LLM monitoring - attributes: - actions: - - key: llm.system - value: openai - action: insert - - key: deployment.environment - value: development - action: insert - exporters: - # Export traces to Jaeger via OTLP - otlp/jaeger: - endpoint: jaeger:14250 - tls: - insecure: true - - # Export metrics to Prometheus - prometheus: - endpoint: "0.0.0.0:8889" - resource_to_telemetry_conversion: - enabled: true - - # Logging exporter for debugging - logging: + debug: verbosity: normal service: - extensions: [] pipelines: traces: receivers: [otlp] processors: [memory_limiter, resource, batch] - exporters: [otlp/jaeger, logging] - + exporters: [debug] + metrics: - receivers: [otlp, prometheus] - processors: [memory_limiter, resource, attributes, batch] - exporters: [prometheus, logging] - + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + telemetry: logs: level: "info" + +# Example configurations for AI observability platforms: +# +# === Arize Phoenix === +# Set environment variables: +# OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE +# OTEL_EXPORTER_OTLP_AUTHORIZATION=Bearer YOUR_PHOENIX_API_KEY +# OTEL_EXPORTER_OTLP_METRICS_ENABLED=false +# +# Or configure directly in exporters section: +# otlphttp/arize: +# endpoint: https://app.phoenix.arize.com/s/YOUR_SPACE +# headers: +# Authorization: Bearer YOUR_PHOENIX_API_KEY +# Then add otlphttp/arize to the traces pipeline exporters. +# +# === Langfuse === +# Set environment variables: +# OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel +# OTEL_EXPORTER_OTLP_AUTHORIZATION=Basic BASE64_ENCODED_KEY +# OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4 +# +# Where BASE64_ENCODED_KEY = base64(public_key:secret_key) +# +# Or configure directly: +# otlphttp/langfuse: +# endpoint: https://cloud.langfuse.com/api/public/otel +# headers: +# Authorization: Basic BASE64_ENCODED_KEY +# x-langfuse-ingestion-version: "4" +# Then add otlphttp/langfuse to the traces pipeline exporters. +# +# === LangSmith === +# Set environment variables: +# LANGSMITH_API_KEY=lsv2_... +# LANGSMITH_PROJECT=nexent +# +# Or configure directly: +# otlphttp/langsmith: +# traces_endpoint: https://api.smith.langchain.com/otel/v1/traces +# headers: +# x-api-key: YOUR_LANGSMITH_API_KEY +# Langsmith-Project: nexent +# Then add otlphttp/langsmith to the traces pipeline exporters. +# +# === Multiple Exporters === +# To export to multiple backends simultaneously, create multiple exporters +# and add them to the pipelines: +# exporters: [otlphttp/arize, otlphttp/langfuse, otlphttp/langsmith, debug] diff --git a/docker/monitoring/otel-collector-grafana-config.yml b/docker/monitoring/otel-collector-grafana-config.yml new file mode 100644 index 000000000..d69e69811 --- /dev/null +++ b/docker/monitoring/otel-collector-grafana-config.yml @@ -0,0 +1,50 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlp/tempo, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-langfuse-config.yml b/docker/monitoring/otel-collector-langfuse-config.yml new file mode 100644 index 000000000..9304d93e9 --- /dev/null +++ b/docker/monitoring/otel-collector-langfuse-config.yml @@ -0,0 +1,69 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlphttp/langfuse: + endpoint: http://langfuse-web:3000/api/public/otel + headers: + Authorization: ${env:LANGFUSE_OTLP_AUTH_HEADER} + x-langfuse-ingestion-version: "4" + # 1. 超时控制 (Timeout) + # 防止 Collector 等待太久导致协程暴涨 + timeout: 5s + + # 2. 发送队列 (Sending Queue) + # 当后端处理变慢时,把数据先缓存在 Collector 内存中 + sending_queue: + enabled: true + num_consumers: 10 # 并发发送的工作线程数(可提升发送吞吐量) + queue_size: 5000 # 队列最大可容纳的批次数。如果队列满了,新来的数据将被丢弃! + + # 3. 失败重试 (Retry on Failure) + # 遇到网络抖动或后端返回 503 等临时性错误时,进行指数退避重试 + retry_on_failure: + enabled: true + initial_interval: 1s # 第一次重试间隔 1s + max_interval: 30s # 最大重试间隔不超过 30s + max_elapsed_time: 300s # 一条数据最多重试 5 分钟,超过则彻底放弃并丢弃 + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlphttp/langfuse, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-langsmith-config.yml b/docker/monitoring/otel-collector-langsmith-config.yml new file mode 100644 index 000000000..28222c1cf --- /dev/null +++ b/docker/monitoring/otel-collector-langsmith-config.yml @@ -0,0 +1,63 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlphttp/langsmith: + traces_endpoint: ${env:LANGSMITH_OTLP_TRACES_ENDPOINT} + headers: + x-api-key: ${env:LANGSMITH_API_KEY} + Langsmith-Project: ${env:LANGSMITH_PROJECT} + timeout: 10s + + sending_queue: + enabled: true + num_consumers: 10 + queue_size: 5000 + + retry_on_failure: + enabled: true + initial_interval: 1s + max_interval: 30s + max_elapsed_time: 300s + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlphttp/langsmith, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-phoenix-config.yml b/docker/monitoring/otel-collector-phoenix-config.yml new file mode 100644 index 000000000..0682a6e4d --- /dev/null +++ b/docker/monitoring/otel-collector-phoenix-config.yml @@ -0,0 +1,66 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + otlphttp/phoenix: + endpoint: http://phoenix:6006 + # 1. 超时控制 (Timeout) + # 防止 Collector 等待太久导致协程暴涨 + timeout: 5s + + # 2. 发送队列 (Sending Queue) + # 当后端处理变慢时,把数据先缓存在 Collector 内存中 + sending_queue: + enabled: true + num_consumers: 10 # 并发发送的工作线程数(可提升发送吞吐量) + queue_size: 5000 # 队列最大可容纳的批次数。如果队列满了,新来的数据将被丢弃! + + # 3. 失败重试 (Retry on Failure) + # 遇到网络抖动或后端返回 503 等临时性错误时,进行指数退避重试 + retry_on_failure: + enabled: true + initial_interval: 1s # 第一次重试间隔 1s + max_interval: 30s # 最大重试间隔不超过 30s + max_elapsed_time: 300s # 一条数据最多重试 5 分钟,超过则彻底放弃并丢弃 + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [otlphttp/phoenix, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" diff --git a/docker/monitoring/otel-collector-zipkin-config.yml b/docker/monitoring/otel-collector-zipkin-config.yml new file mode 100644 index 000000000..ab26a84a9 --- /dev/null +++ b/docker/monitoring/otel-collector-zipkin-config.yml @@ -0,0 +1,49 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 512 + + memory_limiter: + limit_mib: 256 + check_interval: 1s + + resource: + attributes: + - key: service.name + value: nexent-backend + action: upsert + - key: service.version + from_attribute: version + action: insert + +exporters: + debug: + verbosity: normal + + zipkin: + endpoint: http://zipkin:9411/api/v2/spans + format: proto + +service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [zipkin, debug] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resource, batch] + exporters: [debug] + + telemetry: + logs: + level: "info" \ No newline at end of file diff --git a/docker/monitoring/prometheus.yml b/docker/monitoring/prometheus.yml deleted file mode 100644 index 49258c097..000000000 --- a/docker/monitoring/prometheus.yml +++ /dev/null @@ -1,39 +0,0 @@ -global: - scrape_interval: 15s - evaluation_interval: 15s - -rule_files: - # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. - - "nexent_alerts.yml" - -scrape_configs: - # Nexent Backend - LLM Metrics - - job_name: 'nexent-backend' - static_configs: - - targets: ['host.docker.internal:8000'] # Adjust based on your backend service - scrape_interval: 15s - metrics_path: /metrics - scrape_timeout: 10s - - # OpenTelemetry Collector - - job_name: 'otel-collector' - static_configs: - - targets: ['otel-collector:8888'] - scrape_interval: 10s - - # Prometheus self-monitoring - - job_name: 'prometheus' - static_configs: - - targets: ['localhost:9090'] - - # Jaeger Metrics - - job_name: 'jaeger' - static_configs: - - targets: ['jaeger:14269'] - -# Alertmanager configuration (optional) -# alerting: -# alertmanagers: -# - static_configs: -# - targets: -# - alertmanager:9093 diff --git a/docker/monitoring/tempo.yml b/docker/monitoring/tempo.yml new file mode 100644 index 000000000..414ea42b9 --- /dev/null +++ b/docker/monitoring/tempo.yml @@ -0,0 +1,43 @@ +target: all +multitenancy_enabled: false +stream_over_http_enabled: true + +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +metrics_generator: + ring: + kvstore: + store: inmemory + storage: + path: /var/tempo/generator/wal + remote_write: [] + traces_storage: + path: /var/tempo/generator/traces + processor: + local_blocks: + filter_server_spans: false + flush_to_storage: true + +storage: + trace: + backend: local + wal: + path: /var/tempo/wal + local: + path: /var/tempo/blocks + +overrides: + defaults: + metrics_generator: + processors: + - local-blocks diff --git a/docker/official-skills-zip/analyze-image.zip b/docker/official-skills-zip/analyze-image.zip index a7fb09e15..9ec4c2fb1 100644 Binary files a/docker/official-skills-zip/analyze-image.zip and b/docker/official-skills-zip/analyze-image.zip differ diff --git a/docker/official-skills-zip/analyze-text-file.zip b/docker/official-skills-zip/analyze-text-file.zip index 0cd1beb19..8c4478872 100644 Binary files a/docker/official-skills-zip/analyze-text-file.zip and b/docker/official-skills-zip/analyze-text-file.zip differ diff --git a/docker/official-skills-zip/create-file-directory.zip b/docker/official-skills-zip/create-file-directory.zip index 0995449b9..1e2d21ef0 100644 Binary files a/docker/official-skills-zip/create-file-directory.zip and b/docker/official-skills-zip/create-file-directory.zip differ diff --git a/docker/official-skills-zip/delete-file-directory.zip b/docker/official-skills-zip/delete-file-directory.zip index 0da9ba8fc..0f0067d02 100644 Binary files a/docker/official-skills-zip/delete-file-directory.zip and b/docker/official-skills-zip/delete-file-directory.zip differ diff --git a/docker/official-skills-zip/email-utils.zip b/docker/official-skills-zip/email-utils.zip index c83f8fea9..c708a252c 100644 Binary files a/docker/official-skills-zip/email-utils.zip and b/docker/official-skills-zip/email-utils.zip differ diff --git a/docker/official-skills-zip/list-directory.zip b/docker/official-skills-zip/list-directory.zip index 5798fc178..e3eaeba27 100644 Binary files a/docker/official-skills-zip/list-directory.zip and b/docker/official-skills-zip/list-directory.zip differ diff --git a/docker/official-skills-zip/move-file-directory.zip b/docker/official-skills-zip/move-file-directory.zip index c370b1186..d01897231 100644 Binary files a/docker/official-skills-zip/move-file-directory.zip and b/docker/official-skills-zip/move-file-directory.zip differ diff --git a/docker/official-skills-zip/read-file.zip b/docker/official-skills-zip/read-file.zip index e26552bd5..b394c2b38 100644 Binary files a/docker/official-skills-zip/read-file.zip and b/docker/official-skills-zip/read-file.zip differ diff --git a/docker/official-skills-zip/run-shell-ssh.zip b/docker/official-skills-zip/run-shell-ssh.zip index d8fc28aa7..868eee7c5 100644 Binary files a/docker/official-skills-zip/run-shell-ssh.zip and b/docker/official-skills-zip/run-shell-ssh.zip differ diff --git a/docker/official-skills-zip/search-datamate.zip b/docker/official-skills-zip/search-datamate.zip index ae1f76b28..0cb18ded6 100644 Binary files a/docker/official-skills-zip/search-datamate.zip and b/docker/official-skills-zip/search-datamate.zip differ diff --git a/docker/official-skills-zip/search-dify.zip b/docker/official-skills-zip/search-dify.zip index 1e2aac422..2bd7c8ccf 100644 Binary files a/docker/official-skills-zip/search-dify.zip and b/docker/official-skills-zip/search-dify.zip differ diff --git a/docker/official-skills-zip/search-idata.zip b/docker/official-skills-zip/search-idata.zip index 679293db5..85a7e1b72 100644 Binary files a/docker/official-skills-zip/search-idata.zip and b/docker/official-skills-zip/search-idata.zip differ diff --git a/docker/official-skills-zip/search-knowledge-base.zip b/docker/official-skills-zip/search-knowledge-base.zip index 28a4a9905..48fabec2a 100644 Binary files a/docker/official-skills-zip/search-knowledge-base.zip and b/docker/official-skills-zip/search-knowledge-base.zip differ diff --git a/docker/official-skills-zip/search-web-exa.zip b/docker/official-skills-zip/search-web-exa.zip index bef88ec5b..19c209588 100644 Binary files a/docker/official-skills-zip/search-web-exa.zip and b/docker/official-skills-zip/search-web-exa.zip differ diff --git a/docker/official-skills-zip/search-web-linkup.zip b/docker/official-skills-zip/search-web-linkup.zip index 640fdb4e1..4657bc165 100644 Binary files a/docker/official-skills-zip/search-web-linkup.zip and b/docker/official-skills-zip/search-web-linkup.zip differ diff --git a/docker/official-skills-zip/search-web-tavily.zip b/docker/official-skills-zip/search-web-tavily.zip index 7c438dfbf..628f73ef6 100644 Binary files a/docker/official-skills-zip/search-web-tavily.zip and b/docker/official-skills-zip/search-web-tavily.zip differ diff --git a/docker/scripts/sync_skill_directory.py b/docker/scripts/sync_skill_directory.py new file mode 100644 index 000000000..d5819d251 --- /dev/null +++ b/docker/scripts/sync_skill_directory.py @@ -0,0 +1,659 @@ +#!/usr/bin/env python3 +""" +Skills Directory Migration Script for v2.2.0 upgrade. + +This script migrates skills from the legacy flat directory structure to +tenant-isolated directories. + +Migration: + FROM: ${ROOT_DIR}/skills/ (flat directory, skills directly under skills/) + TO: ${ROOT_DIR}/skills/{tenant_id}/ + +The tenant_id is determined by querying user_tenant_t for the first record +where user_role = 'ADMIN'. + +Usage (run on host machine): + python sync_skill_directory.py [--dry-run] + +Options: + --dry-run: Show what would be migrated without making changes + --verbose: Enable verbose debug output +""" + +import os +import sys +import argparse +import logging +import shutil +import subprocess +import base64 +import tempfile +from pathlib import Path +from typing import Optional + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Constants +CONTAINER_NAME = "nexent-config" +DEFAULT_TENANT_ID = "tenant_id" + + +def get_env(key: str, default: str = "") -> str: + """Get environment variable with optional default.""" + return os.environ.get(key, default) + + +def load_environment_from_host(): + """ + Load environment variables from host .env file. + Looks for .env in the same directory as this script's parent (docker/). + """ + script_dir = Path(__file__).resolve().parent + docker_dir = script_dir.parent + env_file = docker_dir / ".env" + + if env_file.is_file(): + logger.info(f"Loading environment from: {env_file}") + with open(env_file, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, _, value = line.partition('=') + key = key.strip() + value = value.strip().strip('"').strip("'") + if key and key not in os.environ: + os.environ[key] = value + return True + else: + logger.warning(f".env file not found at: {env_file}") + logger.info("Will use existing environment variables or defaults") + return False + + +def get_root_dir() -> str: + """Get ROOT_DIR from environment, normalized for the current OS.""" + root_dir = get_env("ROOT_DIR") + if not root_dir: + script_dir = Path(__file__).resolve().parent + docker_dir = script_dir.parent + env_file = docker_dir / ".env" + if env_file.is_file(): + with open(env_file, 'r') as f: + for line in f: + if line.startswith("ROOT_DIR="): + root_dir = line.split("=", 1)[1].strip().strip('"').strip("'") + break + + # Normalize path separators for current OS + if root_dir: + root_dir = str(Path(root_dir)) + return root_dir + + +def check_container_running(): + """Check if nexent-config container is running.""" + try: + result = subprocess.run( + ['docker', 'ps', '--format', '{{.Names}}'], + capture_output=True, + text=True, + timeout=10 + ) + + if result.returncode == 0: + containers = result.stdout.strip().split('\n') + if CONTAINER_NAME in containers: + logger.info(f"Container '{CONTAINER_NAME}' is running") + return True + else: + logger.error(f"Container '{CONTAINER_NAME}' is not running") + logger.info("Please start the containers with: cd docker && docker compose up -d") + return False + else: + logger.error("Could not query Docker containers") + return False + except FileNotFoundError: + logger.error("Docker not available on this system") + return False + except Exception as e: + logger.error(f"Error checking Docker containers: {e}") + return False + + +def exec_python_in_container(python_code: str) -> tuple: + """ + Execute Python code inside the container using base64 encoding. + + This approach avoids shell escaping issues by encoding the Python code + as base64 and decoding it inside the container. + + Args: + python_code: Python code to execute inside the container + + Returns: + Tuple of (return_code, stdout, stderr) + """ + # Encode Python code as base64 + encoded = base64.b64encode(python_code.encode('utf-8')).decode('ascii') + + # Create the shell command that decodes and executes the Python code + shell_cmd = f'python3 -c "import base64, sys; exec(base64.b64decode(sys.stdin.read()).decode(\'utf-8\'))"' + + try: + # Use stdin for the base64 data + full_cmd = ['docker', 'exec', '-i', CONTAINER_NAME, 'sh', '-c', shell_cmd] + result = subprocess.run( + full_cmd, + input=encoded, + capture_output=True, + text=True, + timeout=30 + ) + return result.returncode, result.stdout, result.stderr + except subprocess.TimeoutExpired: + logger.error("Command timed out") + return -1, "", "Command timed out" + except Exception as e: + logger.error(f"Failed to execute command in container: {e}") + return -1, "", str(e) + + +def test_postgres_connection_in_container() -> bool: + """ + Test PostgreSQL connection from inside the container using Python. + + Returns: + True if connection successful, False otherwise + """ + logger.info("Testing PostgreSQL connection from inside container...") + + python_code = ''' +import os +import sys +try: + import psycopg2 + conn = psycopg2.connect( + host=os.getenv('POSTGRES_HOST', 'nexent-postgresql'), + port=os.getenv('POSTGRES_PORT', '5432'), + database=os.getenv('POSTGRES_DB', 'nexent'), + user=os.getenv('POSTGRES_USER', 'nexent'), + password=os.getenv('NEXENT_POSTGRES_PASSWORD', '') + ) + conn.close() + print("Connection successful") + sys.exit(0) +except Exception as e: + print(f"Connection failed: {e}", file=sys.stderr) + sys.exit(1) +''' + + returncode, stdout, stderr = exec_python_in_container(python_code) + + if returncode == 0: + logger.info("PostgreSQL connection test: SUCCESS") + return True + else: + logger.warning(f"PostgreSQL connection test failed: {stderr.strip()}") + return False + + +def get_admin_tenant_id_in_container() -> Optional[str]: + """ + Get tenant_id from the first user_tenant_t record where user_role = 'ADMIN'. + + Executes the query inside the container using Python. + + Returns: + tenant_id string or None if not found + """ + logger.info("Querying admin tenant_id from inside container...") + + python_code = ''' +import os +import sys + +try: + import psycopg2 + + conn = psycopg2.connect( + host=os.getenv('POSTGRES_HOST', 'nexent-postgresql'), + port=os.getenv('POSTGRES_PORT', '5432'), + database=os.getenv('POSTGRES_DB', 'nexent'), + user=os.getenv('POSTGRES_USER', 'nexent'), + password=os.getenv('NEXENT_POSTGRES_PASSWORD', '') + ) + + cur = conn.cursor() + cur.execute(""" + SELECT tenant_id + FROM nexent.user_tenant_t + WHERE user_role = 'ADMIN' + AND delete_flag = 'N' + AND tenant_id IS NOT NULL + AND tenant_id != '' + ORDER BY user_tenant_id ASC + LIMIT 1 + """) + + result = cur.fetchone() + cur.close() + conn.close() + + if result: + print(result[0]) + sys.exit(0) + else: + print("No ADMIN user found", file=sys.stderr) + sys.exit(1) + +except Exception as e: + print(f"Query failed: {e}", file=sys.stderr) + sys.exit(1) +''' + + returncode, stdout, stderr = exec_python_in_container(python_code) + + if returncode == 0: + tenant_id = stdout.strip() + if tenant_id: + logger.info(f"Found ADMIN tenant_id: {tenant_id}") + return tenant_id + else: + logger.warning("No user with user_role='ADMIN' found in user_tenant_t") + return None + else: + logger.error(f"Failed to query admin tenant_id: {stderr.strip()}") + return None + + +def discover_legacy_skills_dir(root_dir: str) -> str: + """ + Discover the legacy skills directory. + + The legacy skills are located in the old nexent folder (sibling to nexent-data). + The new skills base is under {root_dir}/skills/{tenant_id}. + + Legacy path: {root_dir}/../nexent/skills (old nexent folder) + New base: {root_dir}/skills + + Returns: + Path to the legacy skills directory (normalized for current OS) + """ + candidates = [] + if root_dir: + # Legacy path FIRST: check old nexent folder (nexent-data's sibling) + # This is the actual source of legacy skills + root_path = Path(root_dir) + legacy_candidate = root_path.parent / "nexent" / "skills" + candidates.append(str(legacy_candidate)) + # New base path (NOT the legacy, this is the destination base) + candidates.append(str(Path(root_dir) / "skills")) + candidates.append("skills") + candidates.append("./skills") + + for candidate in candidates: + if Path(candidate).is_dir(): + logger.info(f"Found legacy skills directory: {candidate}") + return candidate + + logger.warning("Could not find legacy skills directory") + return candidates[0] if candidates[0] else "skills" + + +def discover_skill_directories(skills_path: str) -> list: + """ + List all skill directories under the given base path. + + A valid skill directory contains at least a SKILL.md file. + + Args: + skills_path: Base skills directory path + + Returns: + List of skill directory names (not full paths) + """ + skills_path_obj = Path(skills_path) + if not skills_path_obj.is_dir(): + logger.warning(f"Skills directory does not exist: {skills_path}") + return [] + + skills = [] + try: + for item in skills_path_obj.iterdir(): + if item.is_dir(): + if (item / "SKILL.md").is_file(): + skills.append(item.name) + else: + logger.debug(f"Skipping non-skill directory: {item.name}") + except Exception as e: + logger.error(f"Error listing skills directory: {e}") + + return skills + + +def validate_skill_directory(skill_dir: str) -> dict: + """ + Validate a skill directory structure. + + Args: + skill_dir: Path to the skill directory + + Returns: + Dict with validation results + """ + skill_dir_obj = Path(skill_dir) + result = { + "is_valid": True, + "skill_name": skill_dir_obj.name, + "files": [], + "errors": [] + } + + if not skill_dir_obj.is_dir(): + result["is_valid"] = False + result["errors"].append("Directory does not exist") + return result + + skill_md = skill_dir_obj / "SKILL.md" + if not skill_md.is_file(): + result["is_valid"] = False + result["errors"].append("SKILL.md not found") + + try: + for item in skill_dir_obj.rglob('*'): + if item.is_file(): + rel_path = item.relative_to(skill_dir_obj) + result["files"].append(str(rel_path)) + except Exception as e: + result["errors"].append(f"Error scanning files: {e}") + + return result + + +def migrate_skills( + legacy_dir: str, + target_dir: str, + skills: list, + dry_run: bool = False +) -> dict: + """ + Migrate skills from legacy directory to target directory. + + Args: + legacy_dir: Source directory path (host path) + target_dir: Target directory path (host path) + skills: List of skill names to migrate + dry_run: If True, only show what would be done + + Returns: + Migration results dict + """ + results = { + "total": len(skills), + "migrated": 0, + "skipped": 0, + "failed": 0, + "details": [] + } + + legacy_dir_obj = Path(legacy_dir) + target_dir_obj = Path(target_dir) + + for skill_name in skills: + source = legacy_dir_obj / skill_name + target = target_dir_obj / skill_name + + logger.info(f"Processing skill: {skill_name}") + + validation = validate_skill_directory(str(source)) + if not validation["is_valid"]: + logger.warning(f" Invalid skill directory: {', '.join(validation['errors'])}") + results["skipped"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "skipped", + "reason": f"Validation failed: {', '.join(validation['errors'])}" + }) + continue + + if target.exists(): + logger.info(f" Target already exists, skipping: {target}") + results["skipped"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "skipped", + "reason": "Already exists in target directory" + }) + continue + + if dry_run: + logger.info(f" [DRY-RUN] Would migrate to: {target}") + logger.info(f" Files: {', '.join(validation['files'])}") + results["migrated"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "dry-run", + "source": str(source), + "target": str(target), + "files_count": len(validation["files"]) + }) + else: + try: + target.mkdir(parents=True, exist_ok=True) + + for item in source.rglob('*'): + if item.is_file(): + rel_path = item.relative_to(source) + dst_file = target / rel_path + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(item, dst_file) + + logger.info(f" Migrated successfully: {len(validation['files'])} files") + results["migrated"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "success", + "source": str(source), + "target": str(target), + "files_count": len(validation["files"]) + }) + + except Exception as e: + logger.error(f" Failed to migrate: {e}") + results["failed"] += 1 + results["details"].append({ + "skill": skill_name, + "status": "failed", + "reason": str(e) + }) + + return results + + +def print_results(results: dict): + """Print migration results summary.""" + logger.info("=" * 60) + logger.info("Migration Results:") + logger.info(f" Total skills found: {results['total']}") + logger.info(f" Migrated: {results['migrated']}") + logger.info(f" Skipped: {results['skipped']}") + logger.info(f" Failed: {results['failed']}") + logger.info("=" * 60) + + if results['details']: + logger.info("\nDetails:") + for detail in results['details']: + status = detail['status'] + skill = detail['skill'] + if status == 'success': + logger.info(f" [OK] {skill}: {detail.get('files_count', 0)} files -> {detail.get('target', 'N/A')}") + elif status == 'dry-run': + logger.info(f" [DRY-RUN] {skill}: would migrate {detail.get('files_count', 0)} files to {detail.get('target', 'N/A')}") + elif status == 'skipped': + logger.info(f" [SKIP] {skill}: {detail.get('reason', 'unknown reason')}") + else: + logger.info(f" [FAIL] {skill}: {detail.get('reason', 'unknown error')}") + + +def main(): + """Main function.""" + parser = argparse.ArgumentParser( + description='Migrate skills directory for v2.2.0 upgrade (run on host)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be migrated without making changes' + ) + parser.add_argument( + '--verbose', + action='store_true', + help='Enable verbose debug output' + ) + parser.add_argument( + '--legacy-dir', + type=str, + default=None, + help='Override legacy skills directory path (host path)' + ) + parser.add_argument( + '--target-dir', + type=str, + default=None, + help='Override target skills directory path (host path)' + ) + parser.add_argument( + '--skip-db', + action='store_true', + help='Skip database connection and use existing tenant directories' + ) + args = parser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + logger.info("=" * 60) + logger.info("Skills Directory Migration Script (v2.2.0)") + logger.info("=" * 60) + + if args.dry_run: + logger.info("Mode: DRY-RUN (no changes will be made)") + + # Step 1: Load environment from .env file + logger.info("\n[Step 1/6] Loading environment variables...") + load_environment_from_host() + + # Get ROOT_DIR + root_dir = get_root_dir() + if root_dir: + logger.info(f" ROOT_DIR: {root_dir}") + else: + logger.warning(" ROOT_DIR not set, using current directory") + + # Determine host paths + skills_base = str(Path(root_dir) / "skills") if root_dir else "skills" + + # Step 2: Check if container is running + logger.info("\n[Step 2/6] Checking container status...") + container_running = check_container_running() + if not container_running: + logger.error("nexent-config container is not running") + sys.exit(1) + + # Step 3: Test PostgreSQL connection and get tenant_id from container + tenant_id = None + if not args.skip_db: + logger.info("\n[Step 3/6] Testing PostgreSQL connection from inside container...") + + if test_postgres_connection_in_container(): + logger.info("\n[Step 4/6] Querying admin tenant_id...") + tenant_id = get_admin_tenant_id_in_container() + + if not tenant_id: + logger.warning("Could not determine tenant_id from database") + else: + logger.warning("Could not connect to PostgreSQL") + else: + logger.info("\n[Step 3/6] Skipping database connection (--skip-db)") + + # Fallback: check existing tenant directories on host + if not tenant_id: + logger.info("Checking for existing tenant directories...") + skills_base_obj = Path(skills_base) + if skills_base_obj.is_dir(): + existing_tenants = [ + d.name for d in skills_base_obj.iterdir() + if d.is_dir() and d.name not in ['.', '..'] + ] + if existing_tenants: + tenant_id = existing_tenants[0] + logger.info(f"Using existing tenant directory: {tenant_id}") + + # Step 5: Determine directories + legacy_dir = args.legacy_dir or discover_legacy_skills_dir(root_dir or ".") + logger.info(f"\n[Step 5/6] Migration paths:") + logger.info(f" Legacy directory (host): {legacy_dir}") + logger.info(f" Skills base (host): {skills_base}") + + if args.target_dir: + target_base = args.target_dir + logger.info(f" Target directory (host): {target_base}") + elif tenant_id: + target_base = str(Path(skills_base) / tenant_id) + logger.info(f" Target directory (host): {target_base}") + else: + logger.error("Cannot determine target directory: no tenant_id found") + logger.info("Options:") + logger.info(" 1. Ensure user_tenant_t has at least one ADMIN user") + logger.info(" 2. Provide --target-dir explicitly") + logger.info(" 3. Use --skip-db and ensure existing tenant directories exist") + sys.exit(1) + + # Step 6: Discover and migrate skills + logger.info("\n[Step 6/6] Discovering skills in legacy directory...") + + if not Path(legacy_dir).is_dir(): + logger.warning(f"Legacy directory does not exist: {legacy_dir}") + logger.info("No migration needed (source directory not found)") + return + + skills = discover_skill_directories(legacy_dir) + if not skills: + logger.info("No skills found in legacy directory") + logger.info("Migration complete (nothing to migrate)") + return + + logger.info(f"Found {len(skills)} skill(s): {', '.join(skills)}") + + # Execute migration + results = migrate_skills( + legacy_dir=legacy_dir, + target_dir=target_base, + skills=skills, + dry_run=args.dry_run + ) + + print_results(results) + + # Final summary + logger.info("\n" + "=" * 60) + if args.dry_run: + logger.info("DRY-RUN complete. To apply migration, run without --dry-run") + else: + logger.info("Migration completed") + if results['migrated'] > 0: + logger.info(f"\nSuccessfully migrated {results['migrated']} skill(s)") + logger.info(f"Skills are now available at: {target_base}") + logger.info("\nNote: The legacy directory has been preserved.") + logger.info("You can remove it manually after verifying the migration:") + logger.info(f" rm -rf {legacy_dir}") + logger.info("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/docker/scripts/v220_sync_skill_directory.sh b/docker/scripts/v220_sync_skill_directory.sh new file mode 100644 index 000000000..572ffeb30 --- /dev/null +++ b/docker/scripts/v220_sync_skill_directory.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# v2.2.0 Skills Directory Migration Script +# Migrates skills from legacy location to tenant-isolated directories. +# +# Migration: +# FROM: ${ROOT_DIR}/skills/ (flat directory, skills directly under skills/) +# TO: ${ROOT_DIR}/skills/{tenant_id}/ +# +# The tenant_id is determined by querying user_tenant_t for the first record +# with user_role = 'ADMIN'. +# +# Usage: +# ./v220_sync_skill_directory.sh [--dry-run] +# +# Options: +# --dry-run Show what would be migrated without making changes +# + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SCRIPT_PATH="${SCRIPT_DIR}/sync_skill_directory.py" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +DRY_RUN=false +for arg in "$@"; do + case $arg in + --dry-run) + DRY_RUN=true + shift + ;; + *) + ;; + esac +done + +if [ ! -f "$SCRIPT_PATH" ]; then + log_error "Script not found: $SCRIPT_PATH" + exit 1 +fi + +# Load environment from .env if exists +ENV_FILE="${SCRIPT_DIR}/../.env" +if [ -f "$ENV_FILE" ]; then + log_info "Loading environment from: $ENV_FILE" + set -a + source "$ENV_FILE" + set +a +fi + +log_info "Executing migration script..." + +if [ "$DRY_RUN" = true ]; then + log_info "Mode: DRY-RUN (no changes will be made)" + python "$SCRIPT_PATH" --dry-run "$@" +else + python "$SCRIPT_PATH" "$@" +fi + +EXIT_CODE=$? + +if [ $EXIT_CODE -eq 0 ]; then + log_info "Migration completed successfully" +else + log_error "Migration failed with exit code: $EXIT_CODE" + exit $EXIT_CODE +fi diff --git a/docker/sql/v2.0.3_0430_add_user_oauth_account_t.sql b/docker/sql/v2.0.3_0430_add_user_oauth_account_t.sql index 18ca52dc3..faa9adab2 100644 --- a/docker/sql/v2.0.3_0430_add_user_oauth_account_t.sql +++ b/docker/sql/v2.0.3_0430_add_user_oauth_account_t.sql @@ -36,7 +36,7 @@ EXECUTE FUNCTION update_user_oauth_account_t_update_time(); COMMENT ON TABLE nexent.user_oauth_account_t IS 'User OAuth account table - third-party login bindings'; COMMENT ON COLUMN nexent.user_oauth_account_t.oauth_account_id IS 'OAuth account ID, primary key'; COMMENT ON COLUMN nexent.user_oauth_account_t.user_id IS 'Nexent user ID (Supabase UUID)'; -COMMENT ON COLUMN nexent.user_oauth_account_t.provider IS 'OAuth provider name: github, wechat'; +COMMENT ON COLUMN nexent.user_oauth_account_t.provider IS 'OAuth provider name: github, wechat, gde, link_app'; COMMENT ON COLUMN nexent.user_oauth_account_t.provider_user_id IS 'User ID from the OAuth provider'; COMMENT ON COLUMN nexent.user_oauth_account_t.provider_email IS 'Email from the OAuth provider'; COMMENT ON COLUMN nexent.user_oauth_account_t.provider_username IS 'Display name from the OAuth provider'; diff --git a/docker/sql/v2.1.0_0503_add_prompt_template_t.sql b/docker/sql/v2.1.0_0503_add_prompt_template_t.sql new file mode 100644 index 000000000..3db9a9701 --- /dev/null +++ b/docker/sql/v2.1.0_0503_add_prompt_template_t.sql @@ -0,0 +1,115 @@ +-- Migration: Add prompt template table and agent prompt template fields +-- Date: 2026-05-03 +-- Description: Add user-scoped prompt template storage and bind selected prompt template to agents + +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS prompt_template_id INTEGER; + +ALTER TABLE nexent.ag_tenant_agent_t +ADD COLUMN IF NOT EXISTS prompt_template_name VARCHAR(100); + +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_id IS 'Prompt template ID used for business logic prompt generation'; +COMMENT ON COLUMN nexent.ag_tenant_agent_t.prompt_template_name IS 'Prompt template name used for business logic prompt generation'; + +UPDATE nexent.ag_tenant_agent_t +SET prompt_template_id = 0, + prompt_template_name = 'system_default' +WHERE delete_flag = 'N' + AND (prompt_template_id IS NULL OR prompt_template_name IS NULL); + +CREATE TABLE IF NOT EXISTS nexent.ag_prompt_template_t ( + template_id SERIAL PRIMARY KEY, + template_name VARCHAR(100) NOT NULL, + description VARCHAR(500), + template_type VARCHAR(50) NOT NULL DEFAULT 'agent_generate', + tenant_id VARCHAR(100) NOT NULL, + user_id VARCHAR(100) NOT NULL, + template_content_zh JSONB NOT NULL, + template_content_en JSONB, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.ag_prompt_template_t OWNER TO "root"; + +CREATE OR REPLACE FUNCTION update_ag_prompt_template_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS update_ag_prompt_template_update_time_trigger ON nexent.ag_prompt_template_t; + +CREATE TRIGGER update_ag_prompt_template_update_time_trigger +BEFORE UPDATE ON nexent.ag_prompt_template_t +FOR EACH ROW +EXECUTE FUNCTION update_ag_prompt_template_update_time(); + +ALTER TABLE nexent.ag_prompt_template_t +DROP CONSTRAINT IF EXISTS uq_prompt_template_user_name; + +COMMENT ON TABLE nexent.ag_prompt_template_t IS 'Prompt template table for user-defined business logic generation prompts'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_id IS 'Prompt template ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_name IS 'Prompt template name'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.description IS 'Prompt template description'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_type IS 'Prompt template type'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.tenant_id IS 'Tenant ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.user_id IS 'User ID'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_zh IS 'Chinese prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.template_content_en IS 'English prompt template content'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.created_by IS 'Creator'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.updated_by IS 'Updater'; +COMMENT ON COLUMN nexent.ag_prompt_template_t.delete_flag IS 'Whether it is deleted. Optional values: Y/N'; + +DROP INDEX IF EXISTS nexent.uq_prompt_template_user_name_active; +CREATE UNIQUE INDEX IF NOT EXISTS uq_prompt_template_user_name_active +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_name) +WHERE delete_flag = 'N'; + +CREATE INDEX IF NOT EXISTS idx_ag_prompt_template_t_user +ON nexent.ag_prompt_template_t (tenant_id, user_id, template_type) +WHERE delete_flag = 'N'; + +INSERT INTO nexent.ag_prompt_template_t ( + template_id, + template_name, + description, + template_type, + tenant_id, + user_id, + template_content_zh, + template_content_en, + created_by, + updated_by, + delete_flag +) +VALUES ( + 0, + 'system_default', + 'System default prompt template', + 'agent_generate', + 'tenant_id', + 'user_id', + '{}'::jsonb, + '{}'::jsonb, + 'user_id', + 'user_id', + 'N' +) +ON CONFLICT (template_id) DO UPDATE SET + template_name = EXCLUDED.template_name, + description = EXCLUDED.description, + template_type = EXCLUDED.template_type, + tenant_id = EXCLUDED.tenant_id, + user_id = EXCLUDED.user_id, + template_content_zh = EXCLUDED.template_content_zh, + template_content_en = EXCLUDED.template_content_en, + updated_by = EXCLUDED.updated_by, + delete_flag = 'N'; diff --git a/docker/sql/v2.2.0_0514_skill_config_schema.sql b/docker/sql/v2.2.0_0514_skill_config_schema.sql new file mode 100644 index 000000000..ff3f0da1a --- /dev/null +++ b/docker/sql/v2.2.0_0514_skill_config_schema.sql @@ -0,0 +1,24 @@ +-- Rename params -> config_values, add config_schemas to ag_skill_info_t +-- Add tenant_id column for multi-tenancy support +ALTER TABLE nexent.ag_skill_info_t ADD COLUMN IF NOT EXISTS tenant_id VARCHAR(100); + +-- Add config_values and config_schemas to ag_skill_info_t +ALTER TABLE nexent.ag_skill_info_t RENAME COLUMN params TO config_values; +ALTER TABLE nexent.ag_skill_info_t ADD COLUMN IF NOT EXISTS config_schemas JSON; + +-- Add config_values and config_schemas to ag_skill_instance_t +ALTER TABLE nexent.ag_skill_info_t ADD COLUMN IF NOT EXISTS config_values JSON; +ALTER TABLE nexent.ag_skill_info_t ADD COLUMN IF NOT EXISTS config_schemas JSON; + +-- Comments for ag_skill_info_t columns +COMMENT ON COLUMN nexent.ag_skill_info_t.tenant_id IS 'Tenant ID for multi-tenancy. NULL for pre-existing skills.'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_values IS 'Runtime parameter values from config/config.yaml'; +COMMENT ON COLUMN nexent.ag_skill_info_t.config_schemas IS 'Parameter metadata list from config/schema.yaml'; + +-- Add config_values and config_schemas to ag_skill_instance_t +ALTER TABLE nexent.ag_skill_instance_t ADD COLUMN IF NOT EXISTS config_values JSON; +ALTER TABLE nexent.ag_skill_instance_t ADD COLUMN IF NOT EXISTS config_schemas JSON; + +-- Comments for ag_skill_instance_t columns +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_values IS 'Per-agent runtime parameter values from config/config.yaml'; +COMMENT ON COLUMN nexent.ag_skill_instance_t.config_schemas IS 'Per-agent parameter schema overrides from config/schema.yaml'; diff --git a/docker/sql/v2.2.0_0520_add_concurrency_and_timeout_to_model_record_t.sql b/docker/sql/v2.2.0_0520_add_concurrency_and_timeout_to_model_record_t.sql new file mode 100644 index 000000000..59632f8ed --- /dev/null +++ b/docker/sql/v2.2.0_0520_add_concurrency_and_timeout_to_model_record_t.sql @@ -0,0 +1,13 @@ +-- Add concurrency_limit column to model_record_t table +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS concurrency_limit INTEGER DEFAULT NULL; + +-- Add comment to the column +COMMENT ON COLUMN nexent.model_record_t.concurrency_limit IS 'Maximum concurrent requests for this model. Default is NULL (unlimited).'; + +-- Add timeout_seconds column to model_record_t table +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS timeout_seconds INTEGER DEFAULT 120; + +-- Add comment to the column +COMMENT ON COLUMN nexent.model_record_t.timeout_seconds IS 'Request timeout in seconds for this model. Default is 120 seconds.'; diff --git a/docker/sql/v2.2.0_0521_add_mcp_community_record_t.sql b/docker/sql/v2.2.0_0521_add_mcp_community_record_t.sql new file mode 100644 index 000000000..83f9d9a56 --- /dev/null +++ b/docker/sql/v2.2.0_0521_add_mcp_community_record_t.sql @@ -0,0 +1,83 @@ +-- Migration: Add mcp_community_record_t table +-- Date: 2026-03-26 +-- Description: Community MCP market table aligned with public-shareable fields from mcp_record_t. + +SET search_path TO nexent; + +BEGIN; + +CREATE TABLE IF NOT EXISTS nexent.mcp_community_record_t ( + community_id SERIAL PRIMARY KEY NOT NULL, + tenant_id VARCHAR(100), + user_id VARCHAR(100), + mcp_name VARCHAR(100) NOT NULL, + mcp_server VARCHAR(500) NOT NULL, + source VARCHAR(30) DEFAULT 'community', + version VARCHAR(50), + registry_json JSONB, + transport_type VARCHAR(30), + config_json JSON, + tags TEXT[], + description TEXT, + create_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by VARCHAR(100), + updated_by VARCHAR(100), + delete_flag VARCHAR(1) DEFAULT 'N' +); + +ALTER TABLE nexent.mcp_community_record_t OWNER TO root; + +COMMENT ON TABLE nexent.mcp_community_record_t IS 'Community MCP market records, publishable from tenant MCP services'; +COMMENT ON COLUMN nexent.mcp_community_record_t.community_id IS 'Community record ID, unique primary key'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tenant_id IS 'Publisher tenant ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.user_id IS 'Publisher user ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_name IS 'MCP name'; +COMMENT ON COLUMN nexent.mcp_community_record_t.mcp_server IS 'MCP server URL'; +COMMENT ON COLUMN nexent.mcp_community_record_t.source IS 'Source type, fixed to community for this table'; +COMMENT ON COLUMN nexent.mcp_community_record_t.version IS 'MCP version'; +COMMENT ON COLUMN nexent.mcp_community_record_t.registry_json IS 'Full MCP server metadata JSON for discovery and quick import'; +COMMENT ON COLUMN nexent.mcp_community_record_t.transport_type IS 'Transport type: url/container'; +COMMENT ON COLUMN nexent.mcp_community_record_t.config_json IS 'Public-shareable MCP configuration JSON'; +COMMENT ON COLUMN nexent.mcp_community_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_community_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_community_record_t.create_time IS 'Creation time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.update_time IS 'Update time'; +COMMENT ON COLUMN nexent.mcp_community_record_t.created_by IS 'Creator ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.updated_by IS 'Updater ID'; +COMMENT ON COLUMN nexent.mcp_community_record_t.delete_flag IS 'Soft delete flag: Y/N'; + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tenant_delete + ON nexent.mcp_community_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_name_delete + ON nexent.mcp_community_record_t (mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_transport_delete + ON nexent.mcp_community_record_t (transport_type, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_user_delete + ON nexent.mcp_community_record_t (user_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_community_tags_gin + ON nexent.mcp_community_record_t USING GIN (tags); + +CREATE OR REPLACE FUNCTION update_mcp_community_record_update_time() +RETURNS TRIGGER AS $$ +BEGIN + NEW.update_time = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION update_mcp_community_record_update_time() IS 'Auto-update update_time for mcp_community_record_t'; + +DROP TRIGGER IF EXISTS update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t; +CREATE TRIGGER update_mcp_community_record_update_time_trigger +BEFORE UPDATE ON nexent.mcp_community_record_t +FOR EACH ROW +EXECUTE FUNCTION update_mcp_community_record_update_time(); + +COMMENT ON TRIGGER update_mcp_community_record_update_time_trigger ON nexent.mcp_community_record_t IS 'Trigger to maintain update_time'; + +COMMIT; diff --git a/docker/sql/v2.2.0_0521_expand_mcp_record_t.sql b/docker/sql/v2.2.0_0521_expand_mcp_record_t.sql new file mode 100644 index 000000000..6c92a392e --- /dev/null +++ b/docker/sql/v2.2.0_0521_expand_mcp_record_t.sql @@ -0,0 +1,41 @@ +-- Migration: Extend mcp_record_t for MCP tools (direct schema) +-- Date: 2026-03-18 +-- Description: One-step schema extension for mcp_record_t. No table merge, no data migration. + +SET search_path TO nexent; + +BEGIN; + +-- 1) Extend mcp_record_t with final column names (idempotent) +ALTER TABLE IF EXISTS nexent.mcp_record_t + ADD COLUMN IF NOT EXISTS source VARCHAR(30), + ADD COLUMN IF NOT EXISTS registry_json JSONB, + ADD COLUMN IF NOT EXISTS config_json JSON, + ADD COLUMN IF NOT EXISTS enabled BOOLEAN DEFAULT TRUE, + ADD COLUMN IF NOT EXISTS tags TEXT[], + ADD COLUMN IF NOT EXISTS description TEXT, + ADD COLUMN IF NOT EXISTS container_port INTEGER; + +-- 2) Add comments for new columns +COMMENT ON COLUMN nexent.mcp_record_t.source IS 'Source type: local/mcp_registry/community'; +COMMENT ON COLUMN nexent.mcp_record_t.registry_json IS 'Full MCP registry server.json snapshot'; +COMMENT ON COLUMN nexent.mcp_record_t.config_json IS 'MCP config data'; +COMMENT ON COLUMN nexent.mcp_record_t.enabled IS 'Enabled'; +COMMENT ON COLUMN nexent.mcp_record_t.tags IS 'Tags'; +COMMENT ON COLUMN nexent.mcp_record_t.description IS 'Description'; +COMMENT ON COLUMN nexent.mcp_record_t.container_port IS 'Host port bound for containerized MCP service'; + +-- 3) Add indexes for common management queries +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_delete + ON nexent.mcp_record_t (tenant_id, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_name + ON nexent.mcp_record_t (tenant_id, mcp_name, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tenant_server + ON nexent.mcp_record_t (tenant_id, mcp_server, delete_flag); + +CREATE INDEX IF NOT EXISTS idx_mcp_record_t_tags_gin + ON nexent.mcp_record_t USING GIN (tags); + +COMMIT; diff --git a/docker/sql/v2.2.0_0527_add_custom_headers_to_mcp_record_t.sql b/docker/sql/v2.2.0_0527_add_custom_headers_to_mcp_record_t.sql new file mode 100644 index 000000000..00933c523 --- /dev/null +++ b/docker/sql/v2.2.0_0527_add_custom_headers_to_mcp_record_t.sql @@ -0,0 +1,26 @@ +-- Migration: Add custom_headers column to mcp_record_t +-- Date: 2026-05-26 +-- Description: Add custom_headers field to store custom HTTP headers for MCP server requests + +SET search_path TO nexent; + +BEGIN; + +-- Add custom_headers column if it doesn't exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_schema = 'nexent' + AND table_name = 'mcp_record_t' + AND column_name = 'custom_headers' + ) THEN + ALTER TABLE nexent.mcp_record_t + ADD COLUMN custom_headers JSON DEFAULT NULL; + END IF; +END $$; + +-- Add comment to the column +COMMENT ON COLUMN nexent.mcp_record_t.custom_headers IS 'Custom HTTP headers as JSON object for MCP server requests'; + +COMMIT; diff --git a/docker/sql/v2.2.0_0529_add_asset_owner_role_permissions.sql b/docker/sql/v2.2.0_0529_add_asset_owner_role_permissions.sql new file mode 100644 index 000000000..8f21b110b --- /dev/null +++ b/docker/sql/v2.2.0_0529_add_asset_owner_role_permissions.sql @@ -0,0 +1,53 @@ +-- Migration: ASSET_OWNER role permissions and invitation type comment +-- Date: 2026-05-29 +-- Description: Add ASSET_OWNER role permissions, SU asset-owner invite permissions, +-- update invitation code_type comment, and ensure ag_skill_info_t.tenant_id exists +-- Source: commit 15cece97692db2372a978cbdf21b5d5316e79f30 (init.sql) + +SET search_path TO nexent; + +BEGIN; + +COMMENT ON COLUMN nexent.tenant_invitation_code_t.code_type IS + 'Invitation code type: ADMIN_INVITE, DEV_INVITE, USER_INVITE, ASSET_OWNER_INVITE'; + +INSERT INTO nexent.role_permission_t + (role_permission_id, user_role, permission_category, permission_type, permission_subtype) +VALUES + (188, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'CREATE'), + (189, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'READ'), + (190, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'UPDATE'), + (191, 'SU', 'RESOURCE', 'INVITE.ASSET_OWNER', 'DELETE'), + (192, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/'), + (193, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/agents'), + (194, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/knowledges'), + (195, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/chat'), + (196, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/space'), + (197, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/market'), + (198, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/models'), + (199, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'CREATE'), + (200, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'READ'), + (201, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'UPDATE'), + (202, 'ASSET_OWNER', 'RESOURCE', 'AGENT', 'DELETE'), + (203, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'CREATE'), + (204, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'READ'), + (205, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'UPDATE'), + (206, 'ASSET_OWNER', 'RESOURCE', 'SKILL', 'DELETE'), + (207, 'ASSET_OWNER', 'RESOURCE', 'KB', 'CREATE'), + (208, 'ASSET_OWNER', 'RESOURCE', 'KB', 'READ'), + (209, 'ASSET_OWNER', 'RESOURCE', 'KB', 'UPDATE'), + (210, 'ASSET_OWNER', 'RESOURCE', 'KB', 'DELETE'), + (211, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'CREATE'), + (212, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'READ'), + (213, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'UPDATE'), + (214, 'ASSET_OWNER', 'RESOURCE', 'MCP', 'DELETE'), + (215, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'CREATE'), + (216, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'READ'), + (217, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'UPDATE'), + (218, 'ASSET_OWNER', 'RESOURCE', 'MODEL', 'DELETE'), + (219, 'ASSET_OWNER', 'RESOURCE', 'USER.ROLE', 'READ'), + (220, 'ASSET_OWNER', 'VISIBILITY', 'LEFT_NAV_MENU', '/users'), + (221, 'SU', 'VISIBILITY', 'LEFT_NAV_MENU', '/asset-owner-resources') +ON CONFLICT (role_permission_id) DO NOTHING; + +COMMIT; diff --git a/docker/start-monitoring.sh b/docker/start-monitoring.sh index 8cd8561f0..48ca6cd3f 100755 --- a/docker/start-monitoring.sh +++ b/docker/start-monitoring.sh @@ -1,53 +1,420 @@ #!/bin/bash # Nexent LLM Performance Monitoring Setup Script -# This script sets up OpenTelemetry + Jaeger + Prometheus + Grafana for monitoring +# This script starts the OpenTelemetry Collector alone, or with a local +# Phoenix/Langfuse/Grafana/Zipkin observability backend, or forwards to +# online LangSmith. set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" MONITORING_DIR="$SCRIPT_DIR/monitoring" +COMPOSE_FILE="$SCRIPT_DIR/docker-compose-monitoring.yml" -echo "🚀 Starting Nexent LLM Performance Monitoring Setup..." +SUPPORTED_STACKS="otlp, collector, phoenix, langfuse, langsmith, grafana, zipkin" -# Check if Docker is running -if ! docker info > /dev/null 2>&1; then - echo "❌ Error: Docker is not running. Please start Docker first." - exit 1 -fi +usage() { + cat < + $(basename "$0") [stack] + $(basename "$0") [stack] + $(basename "$0") [stack] -# Create external network if it doesn't exist -if ! docker network ls | grep -q nexent-network; then - echo "🔗 Creating nexent-network..." - docker network create nexent-network -else - echo "✅ nexent-network already exists" -fi +Stacks are mutually exclusive. Starting one stack removes containers from the +other monitoring stacks while preserving their data volumes. + +Stacks: + otlp Start OpenTelemetry Collector only. This is the default. + collector Alias for otlp. + phoenix Start Collector and local Arize Phoenix. + langfuse Start Collector and local Langfuse self-host stack. + langsmith Start Collector and forward traces to online LangSmith. + grafana Start Collector, Grafana, and Tempo. + zipkin Start Collector and local Zipkin. + +Actions: + start/up Start the selected stack and stop containers from other stacks. + stop/down Stop and remove containers for the selected stack. Data is kept. + uninstall Stop and remove containers and data volumes for the selected stack. + +Set MONITORING_PROVIDER in monitoring/monitoring.env to change the default stack. +EOF +} + +ACTION="start" +STACK_ARG="" + +set_stack_arg() { + local value="$1" + if [ -n "$STACK_ARG" ] && [ "$STACK_ARG" != "$value" ]; then + echo "❌ Error: multiple monitoring stacks specified: '$STACK_ARG' and '$value'." + usage + exit 1 + fi + STACK_ARG="$value" +} + +while [ $# -gt 0 ]; do + case "$1" in + --stack) + if [ $# -lt 2 ]; then + echo "❌ Error: --stack requires a value." + usage + exit 1 + fi + set_stack_arg "$2" + shift 2 + ;; + --stop|--down) + ACTION="stop" + shift + ;; + --uninstall|--remove) + ACTION="uninstall" + shift + ;; + start|up) + ACTION="start" + shift + ;; + stop|down) + ACTION="stop" + shift + ;; + uninstall|remove) + ACTION="uninstall" + shift + ;; + -h|--help) + usage + exit 0 + ;; + otlp|collector|phoenix|langfuse|langsmith|grafana|zipkin) + set_stack_arg "$1" + shift + ;; + *) + echo "❌ Error: unknown argument '$1'." + usage + exit 1 + ;; + esac +done -# Copy environment file if it doesn't exist -if [ ! -f "$MONITORING_DIR/monitoring.env" ]; then - echo "📋 Creating monitoring.env from example..." - cp "$MONITORING_DIR/monitoring.env.example" "$MONITORING_DIR/monitoring.env" - echo "⚠️ Please review and update $MONITORING_DIR/monitoring.env as needed" +normalize_stack() { + case "$1" in + ""|otlp|collector) + echo "collector" + ;; + phoenix|langfuse|langsmith|grafana|zipkin) + echo "$1" + ;; + *) + echo "❌ Error: unsupported monitoring provider '$1'. Supported: $SUPPORTED_STACKS." >&2 + exit 1 + ;; + esac +} + +if [ -n "$STACK_ARG" ]; then + normalize_stack "$STACK_ARG" > /dev/null fi -# Start monitoring services -echo "🐳 Starting monitoring services..." -docker-compose -f "$SCRIPT_DIR/docker-compose-monitoring.yml" --env-file "$MONITORING_DIR/monitoring.env" up -d +remove_containers() { + if [ "$#" -eq 0 ]; then + return + fi + + local existing=() + local container + for container in "$@"; do + if docker ps -a --format '{{.Names}}' | grep -qx "$container"; then + existing+=("$container") + fi + done -# Wait for services to be ready -echo "⏳ Waiting for services to start..." -sleep 10 + if [ "${#existing[@]}" -gt 0 ]; then + docker rm -f "${existing[@]}" > /dev/null + echo "🧹 Removed containers: ${existing[*]}" + fi +} -# Check service health with timeout -echo "🔍 Checking service health..." +remove_volumes() { + if [ "$#" -eq 0 ]; then + return + fi + + local existing=() + local volume + for volume in "$@"; do + if docker volume ls --format '{{.Name}}' | grep -qx "$volume"; then + existing+=("$volume") + fi + done + + if [ "${#existing[@]}" -gt 0 ]; then + docker volume rm "${existing[@]}" > /dev/null + echo "🧹 Removed volumes: ${existing[*]}" + fi +} + +stack_containers() { + case "$1" in + collector|langsmith) + echo "nexent-otel-collector" + ;; + phoenix) + echo "nexent-otel-collector nexent-phoenix" + ;; + langfuse) + echo "nexent-otel-collector nexent-langfuse-worker nexent-langfuse-web nexent-langfuse-clickhouse nexent-langfuse-minio nexent-langfuse-redis nexent-langfuse-postgres" + ;; + grafana) + echo "nexent-otel-collector nexent-grafana nexent-tempo" + ;; + zipkin) + echo "nexent-otel-collector nexent-zipkin" + ;; + esac +} + +stack_data_volumes() { + case "$1" in + phoenix) + echo "monitor_phoenix-data" + ;; + langfuse) + echo "monitor_langfuse-postgres-data monitor_langfuse-clickhouse-data monitor_langfuse-clickhouse-logs monitor_langfuse-minio-data monitor_langfuse-redis-data" + ;; + grafana) + echo "monitor_grafana-data monitor_tempo-data" + ;; + collector|langsmith|zipkin) + echo "" + ;; + esac +} + +all_backend_containers() { + echo "nexent-phoenix nexent-langfuse-worker nexent-langfuse-web nexent-langfuse-clickhouse nexent-langfuse-minio nexent-langfuse-redis nexent-langfuse-postgres nexent-grafana nexent-tempo nexent-zipkin" +} + +incompatible_containers() { + local stack="$1" + local containers + containers="$(all_backend_containers)" + case "$stack" in + phoenix) + echo "$containers" | sed 's/nexent-phoenix//g' + ;; + langfuse) + echo "$containers" | sed 's/nexent-langfuse-worker//g; s/nexent-langfuse-web//g; s/nexent-langfuse-clickhouse//g; s/nexent-langfuse-minio//g; s/nexent-langfuse-redis//g; s/nexent-langfuse-postgres//g' + ;; + grafana) + echo "$containers" | sed 's/nexent-grafana//g; s/nexent-tempo//g' + ;; + zipkin) + echo "$containers" | sed 's/nexent-zipkin//g' + ;; + collector|langsmith) + echo "$containers" + ;; + esac +} + +configure_stack() { + MONITORING_PROVIDER="${STACK_ARG:-${MONITORING_PROVIDER:-otlp}}" + LOCAL_STACK="$(normalize_stack "$MONITORING_PROVIDER")" + + case "$LOCAL_STACK" in + collector) + BACKEND_MONITORING_PROVIDER="otlp" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-config.yml}" + COMPOSE_PROFILES=() + ;; + phoenix) + BACKEND_MONITORING_PROVIDER="phoenix" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-phoenix-config.yml}" + COMPOSE_PROFILES=(--profile phoenix) + ;; + langfuse) + BACKEND_MONITORING_PROVIDER="langfuse" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-langfuse-config.yml}" + COMPOSE_PROFILES=(--profile langfuse) + LANGFUSE_INIT_PROJECT_PUBLIC_KEY="${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-pk-lf-nexent-local}" + LANGFUSE_INIT_PROJECT_SECRET_KEY="${LANGFUSE_INIT_PROJECT_SECRET_KEY:-sk-lf-nexent-local}" + if [ -z "${LANGFUSE_OTLP_AUTH_HEADER:-}" ]; then + LANGFUSE_OTLP_AUTH_HEADER="Basic $(printf "%s:%s" "$LANGFUSE_INIT_PROJECT_PUBLIC_KEY" "$LANGFUSE_INIT_PROJECT_SECRET_KEY" | base64 | tr -d '\n')" + fi + export LANGFUSE_OTLP_AUTH_HEADER + ;; + langsmith) + BACKEND_MONITORING_PROVIDER="langsmith" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-langsmith-config.yml}" + COMPOSE_PROFILES=() + LANGSMITH_OTLP_TRACES_ENDPOINT="${LANGSMITH_OTLP_TRACES_ENDPOINT:-https://api.smith.langchain.com/otel/v1/traces}" + LANGSMITH_PROJECT="${LANGSMITH_PROJECT:-nexent}" + if [ "$ACTION" = "start" ] && [ -z "${LANGSMITH_API_KEY:-}" ]; then + echo "❌ Error: LANGSMITH_API_KEY is required for the langsmith stack." + echo " Set it in $MONITORING_DIR/monitoring.env or export it before running this script." + exit 1 + fi + export LANGSMITH_API_KEY LANGSMITH_PROJECT LANGSMITH_OTLP_TRACES_ENDPOINT + ;; + grafana) + BACKEND_MONITORING_PROVIDER="grafana" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-grafana-config.yml}" + COMPOSE_PROFILES=(--profile grafana) + ;; + zipkin) + BACKEND_MONITORING_PROVIDER="zipkin" + OTEL_COLLECTOR_CONFIG_FILE="${OTEL_COLLECTOR_CONFIG_FILE:-./monitoring/otel-collector-zipkin-config.yml}" + COMPOSE_PROFILES=(--profile zipkin) + ;; + esac + export OTEL_COLLECTOR_CONFIG_FILE +} + +dashboard_url() { + case "$LOCAL_STACK" in + phoenix) + echo "http://localhost:${PHOENIX_PORT:-6006}" + ;; + langfuse) + echo "http://localhost:${LANGFUSE_PORT:-3001}" + ;; + langsmith) + echo "https://smith.langchain.com/" + ;; + grafana) + echo "http://localhost:${GRAFANA_PORT:-3002}/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1" + ;; + zipkin) + echo "http://localhost:${ZIPKIN_PORT:-9411}" + ;; + collector) + echo "" + ;; + esac +} + +print_access_hints() { + local dashboard + dashboard="$(dashboard_url)" + + echo "" + echo "📊 Access your monitoring tools:" + echo " • OTLP HTTP receiver: http://localhost:${OTEL_COLLECTOR_HTTP_PORT:-4318}" + echo " • OTLP gRPC receiver: localhost:${OTEL_COLLECTOR_GRPC_PORT:-4317}" + echo " • Docker backend endpoint: http://otel-collector:4318" + + case "$LOCAL_STACK" in + phoenix) + echo " • Phoenix UI: $dashboard" + echo " • Phoenix direct gRPC ingest: localhost:${PHOENIX_GRPC_HOST_PORT:-4319}" + ;; + langfuse) + echo " • Langfuse UI: $dashboard" + echo " • Langfuse admin: ${LANGFUSE_INIT_USER_EMAIL:-admin@nexent.com} / ${LANGFUSE_INIT_USER_PASSWORD:-nexent@4321}" + echo " • Langfuse project keys: ${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-pk-lf-nexent-local} / ${LANGFUSE_INIT_PROJECT_SECRET_KEY:-sk-lf-nexent-local}" + echo " • MinIO API: http://localhost:${LANGFUSE_MINIO_API_PORT:-9092}" + echo " • MinIO console: http://localhost:${LANGFUSE_MINIO_CONSOLE_PORT:-9093}" + ;; + langsmith) + echo " • LangSmith UI: $dashboard" + echo " • LangSmith project: ${LANGSMITH_PROJECT:-nexent}" + echo " • LangSmith OTLP traces endpoint: ${LANGSMITH_OTLP_TRACES_ENDPOINT:-https://api.smith.langchain.com/otel/v1/traces}" + echo " • No local LangSmith UI is started; open the hosted UI and select the project above." + ;; + grafana) + echo " • Grafana dashboard: $dashboard" + echo " • Grafana home: http://localhost:${GRAFANA_PORT:-3002}" + echo " • Grafana admin: ${GRAFANA_ADMIN_USER:-admin} / ${GRAFANA_ADMIN_PASSWORD:-nexent@4321}" + echo " • Tempo API: http://localhost:${TEMPO_PORT:-3200}" + ;; + zipkin) + echo " • Zipkin UI: $dashboard" + ;; + collector) + echo " • Collector-only mode has no monitoring UI." + echo " • View Collector logs: docker logs -f nexent-otel-collector" + echo " • Configure Phoenix, Langfuse, LangSmith, Grafana/Tempo, Zipkin, or another OTLP backend when you need a UI." + ;; + esac + + echo "" + echo "🔗 Frontend monitoring entry:" + if [ -n "$dashboard" ]; then + echo " Set MONITORING_DASHBOARD_URL=$dashboard" + else + echo " Leave MONITORING_DASHBOARD_URL empty to hide the monitoring entry." + fi +} + +print_backend_hints() { + echo "" + echo "🔧 To enable monitoring in your Nexent backend:" + echo " 1. Set ENABLE_TELEMETRY=true in docker/.env" + echo " 2. Set MONITORING_PROVIDER=$BACKEND_MONITORING_PROVIDER in docker/.env" + echo " 3. Set OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 for Docker services" + echo " or http://localhost:${OTEL_COLLECTOR_HTTP_PORT:-4318} for a backend running on the host" + echo " 4. Set MONITORING_DASHBOARD_URL as shown above when a UI is available" + echo " 5. Install performance dependencies:" + echo " uv sync --extra performance" + echo " 6. Restart your Nexent backend service" +} + +print_uninstall_hints() { + echo "" + echo "🛑 Stop or uninstall this monitoring stack:" + echo " • Stop containers and keep data:" + echo " $(basename "$0") stop $LOCAL_STACK" + echo " • Remove containers and this stack's data volumes:" + echo " $(basename "$0") uninstall $LOCAL_STACK" + echo "" + echo " Stacks are mutually exclusive; do not run multiple monitoring providers in parallel." +} + +load_env_for_start() { + if [ ! -f "$MONITORING_DIR/monitoring.env" ]; then + echo "📋 Creating monitoring.env from example..." + cp "$MONITORING_DIR/monitoring.env.example" "$MONITORING_DIR/monitoring.env" + echo "⚠️ Please review and update $MONITORING_DIR/monitoring.env as needed" + fi + + set -a + # shellcheck disable=SC1091 + . "$MONITORING_DIR/monitoring.env" + set +a +} + +load_env_if_present() { + if [ -f "$MONITORING_DIR/monitoring.env" ]; then + set -a + # shellcheck disable=SC1091 + . "$MONITORING_DIR/monitoring.env" + set +a + fi +} + +resolve_compose_cmd() { + if docker compose version > /dev/null 2>&1; then + COMPOSE_CMD=(docker compose) + elif command -v docker-compose > /dev/null 2>&1; then + COMPOSE_CMD=(docker-compose) + else + echo "❌ Error: Docker Compose is not installed." + exit 1 + fi +} -# Function to check service health with timeout check_service() { local name=$1 local url=$2 local port=$3 - + if curl -s --max-time 5 --connect-timeout 3 "$url" > /dev/null 2>&1; then echo "✅ $name is running at http://localhost:$port" return 0 @@ -57,33 +424,123 @@ check_service() { fi } -# Check Jaeger -check_service "Jaeger" "http://localhost:16686/api/services" "16686" || true - -# Check Prometheus -check_service "Prometheus" "http://localhost:9090/-/healthy" "9090" || true - -# Check Grafana -check_service "Grafana" "http://localhost:3005/api/health" "3005" || true - -echo "" -echo "🎉 Monitoring setup complete!" -echo "" -echo "📊 Access your monitoring tools:" -echo " • Jaeger UI: http://localhost:16686" -echo " • Prometheus: http://localhost:9090" -echo " • Grafana: http://localhost:3005 (admin/admin)" -echo "" -echo "🔧 To enable monitoring in your Nexent backend:" -echo " 1. Set ENABLE_TELEMETRY=true in your .env file" -echo " 2. Install performance dependencies:" -echo " uv sync --extra performance" -echo " 3. Restart your Nexent backend service" -echo "" -echo "📈 Key Metrics to Monitor:" -echo " • Token Generation Rate (tokens/second)" -echo " • Time to First Token (TTFT)" -echo " • Request Duration" -echo " • Error Rates" -echo "" -echo "🛑 To stop monitoring services: docker-compose -f docker-compose-monitoring.yml down" +check_stack_health() { + echo "🔍 Checking service health..." + check_service "OpenTelemetry Collector HTTP receiver" "http://localhost:${OTEL_COLLECTOR_HTTP_PORT:-4318}" "${OTEL_COLLECTOR_HTTP_PORT:-4318}" || true + + case "$LOCAL_STACK" in + phoenix) + check_service "Phoenix UI" "http://localhost:${PHOENIX_PORT:-6006}" "${PHOENIX_PORT:-6006}" || true + ;; + langfuse) + check_service "Langfuse UI" "http://localhost:${LANGFUSE_PORT:-3001}" "${LANGFUSE_PORT:-3001}" || true + ;; + langsmith) + echo "✅ LangSmith forwarding is configured for project: ${LANGSMITH_PROJECT:-nexent}" + ;; + grafana) + check_service "Grafana" "http://localhost:${GRAFANA_PORT:-3002}/api/health" "${GRAFANA_PORT:-3002}" || true + check_service "Tempo API" "http://localhost:${TEMPO_PORT:-3200}/ready" "${TEMPO_PORT:-3200}" || true + ;; + zipkin) + check_service "Zipkin UI" "http://localhost:${ZIPKIN_PORT:-9411}" "${ZIPKIN_PORT:-9411}" || true + ;; + esac +} + +start_stack() { + echo "🚀 Starting Nexent LLM Performance Monitoring Setup..." + + if ! docker info > /dev/null 2>&1; then + echo "❌ Error: Docker is not running. Please start Docker first." + exit 1 + fi + + resolve_compose_cmd + + if ! docker network ls --format '{{.Name}}' | grep -qx nexent_network; then + echo "🔗 Creating nexent_network..." + docker network create nexent_network + else + echo "✅ nexent_network already exists" + fi + + load_env_for_start + configure_stack + + local incompatible + incompatible="$(incompatible_containers "$LOCAL_STACK")" + if [ -n "$incompatible" ]; then + # shellcheck disable=SC2086 + remove_containers $incompatible + fi + + echo "🐳 Starting monitoring services with provider: $MONITORING_PROVIDER" + echo " Selected stack: $LOCAL_STACK" + "${COMPOSE_CMD[@]}" -f "$COMPOSE_FILE" --env-file "$MONITORING_DIR/monitoring.env" "${COMPOSE_PROFILES[@]}" up -d --remove-orphans + + echo "⏳ Waiting for services to start..." + sleep 10 + check_stack_health + + echo "" + echo "🎉 Monitoring setup complete!" + print_access_hints + print_backend_hints + echo "" + echo "🔎 Key Trace Data to Inspect:" + echo " • Agent span hierarchy" + echo " • LLM generation spans" + echo " • Retriever and memory spans" + echo " • Tool call spans" + echo " • Error events" + print_uninstall_hints +} + +stop_or_uninstall_stack() { + local remove_data="$1" + + if ! docker info > /dev/null 2>&1; then + echo "❌ Error: Docker is not running. Please start Docker first." + exit 1 + fi + + load_env_if_present + configure_stack + + local containers + containers="$(stack_containers "$LOCAL_STACK")" + echo "🛑 Removing monitoring containers for stack: $LOCAL_STACK" + # shellcheck disable=SC2086 + remove_containers $containers + + if [ "$remove_data" = "true" ]; then + local volumes + volumes="$(stack_data_volumes "$LOCAL_STACK")" + if [ -n "$volumes" ]; then + echo "🧹 Removing data volumes for stack: $LOCAL_STACK" + # shellcheck disable=SC2086 + remove_volumes $volumes + else + echo "ℹ️ Stack '$LOCAL_STACK' has no dedicated local data volumes." + fi + echo "✅ Monitoring stack '$LOCAL_STACK' has been uninstalled." + else + echo "✅ Monitoring stack '$LOCAL_STACK' has been stopped. Data volumes were kept." + fi + + echo "" + echo "ℹ️ The shared Docker network 'nexent_network' is kept because it is also used by Nexent services." +} + +case "$ACTION" in + start) + start_stack + ;; + stop) + stop_or_uninstall_stack false + ;; + uninstall) + stop_or_uninstall_stack true + ;; +esac diff --git a/docker/uninstall.sh b/docker/uninstall.sh old mode 100644 new mode 100755 index ad9490f46..801a9f4f7 --- a/docker/uninstall.sh +++ b/docker/uninstall.sh @@ -1,16 +1,240 @@ #!/bin/bash -docker rm -f nexent-config -docker rm -f nexent-runtime -docker rm -f nexent-mcp -docker rm -f nexent-northbound -docker rm -f nexent-postgresql -docker rm -f nexent-minio -docker rm -f nexent-elasticsearch -docker rm -f nexent-data-process -docker rm -f nexent-web -docker rm -f nexent-redis -docker rm -f supabase-kong-mini -docker rm -f supabase-auth-mini -docker rm -f supabase-db-mini -docker network rm nexent_nexent \ No newline at end of file +if [ -z "$BASH_VERSION" ]; then + echo "❌ This script must be run with bash. Please use: bash uninstall.sh or ./uninstall.sh" + exit 1 +fi + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +DELETE_VOLUMES="" + +print_usage() { + echo "Usage: $0 [delete-all] [options]" + echo "" + echo "Uninstall Docker deployment for Nexent." + echo "" + echo "Options:" + echo " --delete-volumes true|false Control whether persistent data is removed" + echo " --remove-volumes Alias for --delete-volumes true" + echo " --keep-volumes Alias for --delete-volumes false" + echo " --help, -h Show this help message" + echo "" + echo "Examples:" + echo " bash uninstall.sh" + echo " bash uninstall.sh --delete-volumes false" + echo " bash uninstall.sh --delete-volumes true" + echo " bash uninstall.sh delete-all" +} + +sanitize_input() { + local input="$1" + printf "%s" "$input" | tr -d '\r' +} + +parse_bool_option() { + local value + value="$(sanitize_input "${1:-}")" + case "$value" in + true|TRUE|True|yes|YES|Yes|y|Y|1) return 0 ;; + false|FALSE|False|no|NO|No|n|N|0) return 1 ;; + *) + echo "❌ Invalid boolean value: $value. Use true or false." + exit 1 + ;; + esac +} + +while [[ $# -gt 0 ]]; do + case "$1" in + delete-all) + DELETE_VOLUMES="true" + shift + ;; + --delete-volumes) + DELETE_VOLUMES="$2" + shift 2 + ;; + --remove-volumes) + DELETE_VOLUMES="true" + shift + ;; + --keep-volumes) + DELETE_VOLUMES="false" + shift + ;; + --help|-h) + print_usage + exit 0 + ;; + *) + echo "❌ Unknown option: $1" + print_usage + exit 1 + ;; + esac +done + +if [ -f ".env" ]; then + set -a + # shellcheck source=/dev/null + source .env + set +a +fi + +if [ -f ".env.generated" ]; then + set -a + # shellcheck source=/dev/null + source .env.generated + set +a +fi + +get_compose_version() { + if command -v docker &> /dev/null; then + local version_output + version_output=$(docker compose version 2>/dev/null) + if [[ $version_output =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then + echo "v2 ${BASH_REMATCH[1]}" + return 0 + fi + fi + + if command -v docker-compose &> /dev/null; then + local version_output + version_output=$(docker-compose --version 2>/dev/null) + if [[ $version_output =~ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then + echo "v1 ${BASH_REMATCH[1]}" + return 0 + fi + fi + + echo "unknown" + return 0 +} + +resolve_compose_command() { + local version_info + version_info="$(get_compose_version)" + if [[ $version_info == "unknown" ]]; then + echo "❌ Docker Compose not found or version detection failed" + exit 1 + fi + + local version_type version_number + version_type="$(echo "$version_info" | awk '{print $1}')" + version_number="$(echo "$version_info" | awk '{print $2}')" + + case "$version_type" in + v1) + if [[ $version_number < "1.28.0" ]]; then + echo "❌ Docker Compose V1 version is too old; please upgrade to V1.28.0+ or V2." + exit 1 + fi + docker_compose_command="docker-compose" + ;; + v2) + docker_compose_command="docker compose" + ;; + *) + echo "❌ Unknown Docker Compose version type: $version_type" + exit 1 + ;; + esac +} + +resolve_delete_volumes() { + if [ -n "$DELETE_VOLUMES" ]; then + parse_bool_option "$DELETE_VOLUMES" + return $? + fi + + [ -t 0 ] || return 1 + + echo "" + echo "🧹 Delete Docker volumes and Nexent data directories?" + echo " This removes persistent data under ROOT_DIR, including elasticsearch, postgresql, redis, minio, scripts, and supabase volumes." + local answer + read -r -p " Delete data volumes? [y/N]: " answer + answer="$(sanitize_input "$answer")" + [[ "$answer" =~ ^[Yy]$ ]] +} + +docker_compose_down_file() { + local compose_file="$1" + local use_project_name="$2" + local remove_volumes="$3" + + [ -f "$compose_file" ] || return 0 + + local volume_args=() + if [ "$remove_volumes" = "true" ]; then + volume_args=(-v) + fi + + if [ "$use_project_name" = "true" ]; then + $docker_compose_command -p nexent -f "$compose_file" down --remove-orphans "${volume_args[@]}" || true + else + $docker_compose_command -f "$compose_file" down --remove-orphans "${volume_args[@]}" || true + fi +} + +remove_nexent_data_dirs() { + local root_dir="${ROOT_DIR:-$HOME/nexent-data}" + root_dir="${root_dir%/}" + + if [ -z "$root_dir" ] || [ "$root_dir" = "/" ]; then + echo "❌ Refusing to remove unsafe ROOT_DIR: ${root_dir:-}" + return 1 + fi + + local dirs=( + "$root_dir/elasticsearch" + "$root_dir/postgresql" + "$root_dir/redis" + "$root_dir/minio" + "$root_dir/volumes" + "$root_dir/openssh-server" + "$root_dir/scripts" + ) + + local dir + for dir in "${dirs[@]}"; do + if [ -e "$dir" ]; then + echo "🧹 Removing data directory: $dir" + rm -rf "$dir" + fi + done +} + +main() { + local remove_volumes="false" + if resolve_delete_volumes; then + remove_volumes="true" + fi + + resolve_compose_command + + echo "🛑 Stopping and removing Docker deployment..." + if [ "$remove_volumes" = "true" ]; then + echo "⚠️ Data volumes will be deleted." + else + echo "ℹ️ Data volumes will be preserved." + fi + + docker_compose_down_file "docker-compose-monitoring.yml" false "$remove_volumes" + docker_compose_down_file "docker-compose-supabase.prod.yml" true "$remove_volumes" + docker_compose_down_file "docker-compose-supabase.yml" true "$remove_volumes" + docker_compose_down_file "docker-compose.prod.yml" true "$remove_volumes" + docker_compose_down_file "docker-compose.yml" true "$remove_volumes" + + if [ "$remove_volumes" = "true" ]; then + remove_nexent_data_dirs + fi + + echo "✅ Docker deployment removed." +} + +main diff --git a/frontend/app/[locale]/agents/AgentVersionCard.tsx b/frontend/app/[locale]/agents/AgentVersionCard.tsx index 756268f8c..4ef6f052e 100644 --- a/frontend/app/[locale]/agents/AgentVersionCard.tsx +++ b/frontend/app/[locale]/agents/AgentVersionCard.tsx @@ -45,6 +45,7 @@ import { searchAgentInfo } from "@/services/agentConfigService"; import { useAgentConfigStore } from "@/stores/agentConfigStore"; import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider"; import log from "@/lib/logger"; +import { resolveAgentListTenantKey } from "@/lib/agentListTenant"; import { message } from "antd"; import { useQueryClient } from "@tanstack/react-query"; import AgentVersionCompareModal from "./versions/AgentVersionCompareModal"; @@ -148,7 +149,7 @@ export function VersionCardItem({ ); const { tools: toolList } = useToolList(); - const { agents: agentList } = useAgentList(user?.tenantId ?? null); + const { agents: agentList } = useAgentList(""); // Get current agent's permission from agent list const currentAgent = useMemo(() => { @@ -254,11 +255,7 @@ export function VersionCardItem({ if (store.currentAgentId === agentId) { const agentResult = await searchAgentInfo(agentId); if (agentResult.success && agentResult.data) { - const permissionFromList = currentAgent?.permission ?? undefined; - store.setCurrentAgent({ - ...agentResult.data, - permission: permissionFromList, - }); + store.setCurrentAgent(agentResult.data); store.triggerForceRefresh(); } } diff --git a/frontend/app/[locale]/agents/components/AgentConfigComp.tsx b/frontend/app/[locale]/agents/components/AgentConfigComp.tsx index 3a60e146d..f1bf5e0b2 100644 --- a/frontend/app/[locale]/agents/components/AgentConfigComp.tsx +++ b/frontend/app/[locale]/agents/components/AgentConfigComp.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useCallback, useEffect } from "react"; +import { useState, useCallback } from "react"; import { useTranslation } from "react-i18next"; import { App, Button, Row, Col, Flex, Tooltip, Badge, Divider } from "antd"; import CollaborativeAgent from "./agentConfig/CollaborativeAgent"; @@ -12,12 +12,12 @@ import { updateToolList } from "@/services/mcpService"; import { useAgentConfigStore } from "@/stores/agentConfigStore"; import { useToolList } from "@/hooks/agent/useToolList"; import { useSkillList } from "@/hooks/agent/useSkillList"; -import { useAgentSkillInstances } from "@/hooks/agent/useAgentSkillInstances"; import { useExternalAgents } from "@/hooks/agent/useExternalAgents"; import McpConfigModal from "./agentConfig/McpConfigModal"; import A2AAgentDiscoveryModal from "./a2a/A2AAgentDiscoveryModal"; import { RefreshCw, Lightbulb, Plug, BlocksIcon, Globe } from "lucide-react"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; interface AgentConfigCompProps {} @@ -28,26 +28,19 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { // Get state from store const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); + const isReadOnly = useAgentConfigStore((state) => state.isReadOnly()); const [isMcpModalOpen, setIsMcpModalOpen] = useState(false); const [isSkillModalOpen, setIsSkillModalOpen] = useState(false); const [isRefreshing, setIsRefreshing] = useState(false); const [isRefreshingSkill, setIsRefreshingSkill] = useState(false); const [showA2ADiscovery, setShowA2ADiscovery] = useState(false); + const showLegacyMcpConfig = false; + + // Use tool list hook for data management const { groupedTools, invalidate } = useToolList(); const { groupedSkills, invalidate: invalidateSkills } = useSkillList(); - const { skillInstances, invalidate: invalidateSkillInstances } = useAgentSkillInstances( - currentAgentId ?? null - ); const { invalidate: invalidateExternalAgents } = useExternalAgents(); - const setInitialSkills = useAgentConfigStore((state) => state.setInitialSkills); - - // Load skill instances when agent changes - useEffect(() => { - if (currentAgentId && skillInstances.length > 0) { - setInitialSkills(skillInstances); - } - }, [currentAgentId, skillInstances, setInitialSkills]); const handleRefreshTools = useCallback(async () => { setIsRefreshing(true); @@ -72,21 +65,17 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { setIsRefreshingSkill(true); try { invalidateSkills(); - invalidateSkillInstances(); message.success(t("skillManagement.message.refreshSuccess")); } catch (error) { message.error(t("skillManagement.message.refreshFailed")); } finally { setIsRefreshingSkill(false); } - }, [invalidateSkills, invalidateSkillInstances]); + }, [invalidateSkills]); const handleSkillBuildSuccess = useCallback(() => { invalidateSkills(); - if (currentAgentId) { - invalidateSkillInstances(); - } - }, [invalidateSkills, invalidateSkillInstances, currentAgentId]); + }, [invalidateSkills]); return ( <> @@ -95,15 +84,15 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { - -

{t("businessLogic.config.title")}

+ +

{t("businessLogic.config.title")}

- +

{t("collaborativeAgent.title")}

@@ -116,7 +105,6 @@ export default function AgentConfigComp({}: AgentConfigCompProps) { size="small" icon={} onClick={() => setShowA2ADiscovery(true)} - loading={isRefreshing} className="text-green-500 hover:!text-green-600 hover:!bg-green-50" title={t("toolManagement.refresh.title")} > @@ -126,118 +114,122 @@ export default function AgentConfigComp({}: AgentConfigCompProps) {
- + - - - -

{t("toolPool.title")}

- {t("toolPool.tooltip.functionGuide")}} - color="#ffffff" - styles={{ - root: { - backgroundColor: "#ffffff", - border: "1px solid #e5e7eb", - borderRadius: "6px", - boxShadow: "0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)", - maxWidth: "800px", - minWidth: "700px", - width: "fit-content", - }, - }} - > - - -
- - - - - - - -
- - - - - - - - - - - -

{t("skillPool.title")}

-
- - - - - - - -
- - - - - - - - + {/* Tool/Skill Tabs */} + + + + {t("toolPool.title")} + {t("toolPool.tooltip.functionGuide")}} + color="#ffffff" + styles={{ + root: { + backgroundColor: "#ffffff", + border: "1px solid #e5e7eb", + borderRadius: "6px", + boxShadow: "0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)", + maxWidth: "800px", + minWidth: "700px", + width: "fit-content", + }, + }} + > + + + + {t("skillPool.title")} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setIsMcpModalOpen(false)} /> diff --git a/frontend/app/[locale]/agents/components/AgentInfoComp.tsx b/frontend/app/[locale]/agents/components/AgentInfoComp.tsx index 9250af77f..b49842fb7 100644 --- a/frontend/app/[locale]/agents/components/AgentInfoComp.tsx +++ b/frontend/app/[locale]/agents/components/AgentInfoComp.tsx @@ -16,22 +16,12 @@ import { useAgentVersionDetail } from "@/hooks/agent/useAgentVersionDetail"; import { useAgentInfo } from "@/hooks/agent/useAgentInfo"; import AgentVersionPubulishModal from "../versions/AgentVersionPubulishModal"; -export interface AgentInfoCompProps { - isShowVersionManagePanel: boolean; - openVersionManagePanel: () => void; - closeVersionManagementPanel: () => void; -} - -export default function AgentInfoComp({ - isShowVersionManagePanel, - openVersionManagePanel, - closeVersionManagementPanel, -}: AgentInfoCompProps) { +export default function AgentInfoComp() { const { t } = useTranslation("common"); const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); - const currentAgentPermission = useAgentConfigStore((state) => state.currentAgentPermission); const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); + const isGenerating = useAgentConfigStore((state) => state.isGenerating); const isPanelActive = (currentAgentId != null && currentAgentId != undefined) || isCreatingMode; const { agentVersionList, total, invalidate: invalidateAgentVersionList } = useAgentVersionList(currentAgentId); @@ -42,8 +32,7 @@ export default function AgentInfoComp({ currentAgentId, agentInfo?.current_version_no ); - const isReadOnly = isPanelActive && !isCreatingMode && currentAgentPermission === "READ_ONLY"; - const isEditable = isPanelActive && !isReadOnly; + const isReadOnly = useAgentConfigStore((state) => state.isReadOnly()); // Save guard hook const saveGuard = useSaveGuard(); @@ -51,9 +40,6 @@ export default function AgentInfoComp({ // Debug drawer state const [isDebugDrawerOpen, setIsDebugDrawerOpen] = useState(false); - // Generation state shared with AgentGenerateDetail - const [isGenerating, setIsGenerating] = useState(false); - const [isPublishModalOpen, setIsPublishModalOpen] = useState(false); const handlePublishClick = () => { @@ -83,54 +69,21 @@ export default function AgentInfoComp({ className="w-full" > - -

+ +

{t("guide.steps.describeBusinessLogic.title")}

- - {!isCreatingMode && agentInfo?.current_version_no !== 0 && total > 0 && ( - - - - - - - {t("agent.version.currentVersion")} : - - {agentVersionDetail?.version.version_name} - - - {t("agent.version.totalVersions", { count: total ?? 0 })} - - - - - )} - + diff --git a/frontend/app/[locale]/agents/components/AgentManageComp.tsx b/frontend/app/[locale]/agents/components/AgentManageComp.tsx index c636486ab..7dabff4dd 100644 --- a/frontend/app/[locale]/agents/components/AgentManageComp.tsx +++ b/frontend/app/[locale]/agents/components/AgentManageComp.tsx @@ -7,20 +7,22 @@ import { FileInput, Plus, X } from "lucide-react"; import AgentList from "./agentManage/AgentList"; import { useAgentConfigStore } from "@/stores/agentConfigStore"; -import { importAgent } from "@/services/agentConfigService"; -import { useMutation, useQueryClient } from "@tanstack/react-query"; import { useAgentList } from "@/hooks/agent/useAgentList"; import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider"; import log from "@/lib/logger"; import { useState } from "react"; -import { ImportAgentData } from "@/hooks/useAgentImport"; +import { + parseAgentImportFile, + selectFile, + type ImportAgentData, +} from "@/lib/agentImportUtils"; import AgentImportWizard from "@/components/agent/AgentImportWizard"; export default function AgentManageComp() { const { t } = useTranslation("common"); const { message } = App.useApp(); - const { user } = useAuthorizationContext(); + useAuthorizationContext(); // Get state from store const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); @@ -32,51 +34,27 @@ export default function AgentManageComp() { const [importWizardData, setImportWizardData] = useState(null); - // Shared agent list via React Query - const { agents: agentList, isLoading: loading, refetch } = useAgentList(user?.tenantId ?? null); + // Always resolve tenant from auth on the agent dev page (matches published_list; avoids stale/wrong tenant_id query params) + const { agents: agentList, isLoading: loading, refetch } = useAgentList(""); // Handle import agent for space view - open wizard instead of direct import - const handleImportAgent = () => { - const fileInput = document.createElement("input"); - fileInput.type = "file"; - fileInput.accept = ".json"; - fileInput.onchange = async (event) => { - const file = (event.target as HTMLInputElement).files?.[0]; - if (!file) return; - - if (!file.name.endsWith(".json")) { - message.error(t("businessLogic.config.error.invalidFileType")); - return; - } - - try { - // Read and parse file - const fileContent = await file.text(); - let agentData: ImportAgentData; - - try { - agentData = JSON.parse(fileContent); - } catch (parseError) { - message.error(t("businessLogic.config.error.invalidFileType")); - return; - } - - // Validate structure - if (!agentData.agent_id || !agentData.agent_info) { - message.error(t("businessLogic.config.error.invalidFileType")); - return; - } - - // Open wizard with parsed data - setImportWizardData(agentData); - setImportWizardVisible(true); - } catch (error) { + const handleImportAgent = async () => { + const file = await selectFile(".json"); + if (!file) return; + + const agentData = await parseAgentImportFile(file, { + onParseError: (msgKey) => message.error(t(msgKey)), + onValidationError: (msgKey) => message.error(t(msgKey)), + onGenericError: (error) => { log.error("Failed to read import file:", error); message.error(t("businessLogic.config.error.agentImportFailed")); - } - }; + }, + }); - fileInput.click(); + if (!agentData) return; + + setImportWizardData(agentData); + setImportWizardVisible(true); }; return ( @@ -160,7 +138,7 @@ export default function AgentManageComp() {
void handleImportAgent()} > void; + isShowVersionManagePanel?: boolean; + onCloseVersionManagePanel?: () => void; +} + +export default function AgentSelectorHeader({ + onOpenVersionManage, + isShowVersionManagePanel = false, + onCloseVersionManagePanel, +}: AgentSelectorHeaderProps) { + const { t } = useTranslation("common"); + const { message } = App.useApp(); + const queryClient = useQueryClient(); + const checkUnsavedChanges = useSaveGuard(); + const confirm = useConfirmModal(); + const { token } = theme?.useToken?.() || {}; + const { user } = useAuthorizationContext(); + + // Fetch agent list internally + const { agents } = useAgentList(user?.tenantId ?? null); + + // Store state + const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); + const setCurrentAgent = useAgentConfigStore((state) => state.setCurrentAgent); + const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); + const enterCreateMode = useAgentConfigStore((state) => state.enterCreateMode); + const reset = useAgentConfigStore((state) => state.reset); + const hasUnsavedChanges = useAgentConfigStore((state) => state.hasUnsavedChanges); + + const { agentInfo } = useAgentInfo(currentAgentId); + const { agentVersionList, total } = useAgentVersionList(currentAgentId); + const { agentVersionDetail } = useAgentVersionDetail(currentAgentId, agentInfo?.current_version_no); + + // Call relationship modal state + const [callRelationshipModalVisible, setCallRelationshipModalVisible] = useState(false); + const [selectedAgentForRelationship, setSelectedAgentForRelationship] = useState(null); + + // A2A settings modal state + const [showA2ASettings, setShowA2ASettings] = useState(false); + const [selectedAgentForA2A, setSelectedAgentForA2A] = useState(null); + + // Dropdown open state + const [dropdownOpen, setDropdownOpen] = useState(false); + + // Mutations + const updateAgentMutation = useMutation({ + mutationFn: (payload: any) => updateAgentInfo(payload), + }); + + const deleteAgentMutation = useMutation({ + mutationFn: (agentId: number) => deleteAgent(agentId), + }); + + // Fetch A2A Server Settings when modal opens + const { data: a2aSettingsData, isLoading: isLoadingA2ASettings } = useQuery({ + queryKey: ["a2aServerSettings", selectedAgentForA2A?.id], + queryFn: () => a2aClientService.getServerSettings(Number(selectedAgentForA2A!.id)), + enabled: showA2ASettings && !!selectedAgentForA2A, + }); + + // Construct a2aAgentCard from supported_interfaces + const constructedA2AAgentCard = (() => { + const data = a2aSettingsData?.data; + if (!data?.supported_interfaces) return undefined; + + const interfaces = data.supported_interfaces; + const endpointId = data.endpoint_id; + const restEndpoints = interfaces.filter( + (iface: any) => iface.protocolBinding.toLowerCase() === "http+json" || iface.protocolBinding.toLowerCase() === "httprest" + ); + const jsonrpcEndpoints = interfaces.filter( + (iface: any) => + iface.protocolBinding.toLowerCase() === "http-json-rpc" || + iface.protocolBinding.toLowerCase() === "jsonrpc" || + iface.protocolBinding.toLowerCase() === "httpjsonrpc" + ); + + return { + endpoint_id: endpointId, + name: data.name || "", + description: data.description, + version: data.version, + streaming: data.streaming, + agent_card_url: `/nb/a2a/${endpointId}/.well-known/agent-card.json`, + rest_endpoints: { + message_send: `${restEndpoints[0]?.url}/message:send`, + message_stream: `${restEndpoints[0]?.url}/message:stream`, + tasks_get: `${restEndpoints[0]?.url}/tasks/{task_id}`, + }, + jsonrpc_url: jsonrpcEndpoints[0]?.url || "", + jsonrpc_methods: ["SendMessage", "SendStreamingMessage", "GetTask"], + }; + })(); + + // Import wizard state + const [importWizardVisible, setImportWizardVisible] = useState(false); + const [importWizardData, setImportWizardData] = useState(null); + + // Get current selected agent + const currentAgent = agents.find( + (agent: Agent) => currentAgentId !== null && String(agent.id) === String(currentAgentId) + ); + + // Handle import agent + const handleImportAgent = () => { + const fileInput = document.createElement("input"); + fileInput.type = "file"; + fileInput.accept = ".json"; + fileInput.onchange = async (event) => { + const file = (event.target as HTMLInputElement).files?.[0]; + if (!file) return; + + if (!file.name.endsWith(".json")) { + message.error(t("businessLogic.config.error.invalidFileType")); + return; + } + + try { + const fileContent = await file.text(); + let agentData: ImportAgentData; + + try { + agentData = JSON.parse(fileContent); + } catch (parseError) { + message.error(t("businessLogic.config.error.invalidFileType")); + return; + } + + if (!agentData.agent_id || !agentData.agent_info) { + message.error(t("businessLogic.config.error.invalidFileType")); + return; + } + + setImportWizardData(agentData); + setImportWizardVisible(true); + } catch (error) { + log.error("Failed to read import file:", error); + message.error(t("businessLogic.config.error.agentImportFailed")); + } + }; + + fileInput.click(); + }; + + // Handle view call relationship + const handleViewCallRelationship = (agent: Agent) => { + setSelectedAgentForRelationship(agent); + setCallRelationshipModalVisible(true); + setDropdownOpen(false); + }; + + const handleCloseCallRelationshipModal = () => { + setCallRelationshipModalVisible(false); + setSelectedAgentForRelationship(null); + }; + + // Handle view A2A agent settings + const handleViewA2AAgentSettings = (agent: Agent) => { + setSelectedAgentForA2A(agent); + setShowA2ASettings(true); + setDropdownOpen(false); + }; + + // Handle export agent + const handleExportAgent = async (agent: Agent) => { + try { + const result = await exportAgent(Number(agent.id)); + if (result.success && result.data) { + const blob = new Blob([JSON.stringify(result.data, null, 2)], { + type: "application/json", + }); + const url = URL.createObjectURL(blob); + const link = document.createElement("a"); + link.href = url; + link.download = `${agent.name || "agent"}.json`; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + message.success(t("businessLogic.config.message.agentExportSuccess")); + } else { + message.error( + result.message || t("businessLogic.config.error.agentImportFailed") + ); + } + } catch (error) { + message.error(t("businessLogic.config.error.agentExportFailed")); + } + }; + + // Handle copy agent + const handleCopyAgent = async (agent: Agent) => { + try { + const detailResult = await searchAgentInfo(Number(agent.id)); + if (!detailResult.success || !detailResult.data) { + message.error(detailResult.message); + return; + } + const detail = detailResult.data; + + const copyName = `${detail.name || "agent"}_copy`; + const copyDisplayName = `${ + detail.display_name || t("agentConfig.agents.defaultDisplayName") + }${t("agent.copySuffix")}`; + + const tools = Array.isArray(detail.tools) ? detail.tools : []; + const unavailableTools = tools.filter( + (tool: any) => tool && tool.is_available === false + ); + const unavailableToolNames = unavailableTools + .map( + (tool: any) => + tool?.display_name || tool?.name || tool?.tool_name || "" + ) + .filter((name: string) => Boolean(name)); + + const enabledToolIds = tools + .filter((tool: any) => tool && tool.is_available !== false) + .map((tool: any) => Number(tool.id)) + .filter((id: number) => Number.isFinite(id)); + + const subAgentIds = ( + Array.isArray(detail.sub_agent_id_list) ? detail.sub_agent_id_list : [] + ) + .map((id: any) => Number(id)) + .filter((id: number) => Number.isFinite(id)); + + const createResult = await updateAgentMutation.mutateAsync({ + agent_id: undefined, // create + name: copyName, + display_name: copyDisplayName, + description: detail.description, + author: detail.author, + model_name: detail.model, + model_id: detail.model_id ?? undefined, + max_steps: detail.max_step, + provide_run_summary: detail.provide_run_summary, + enabled: detail.enabled, + business_description: detail.business_description, + duty_prompt: detail.duty_prompt, + constraint_prompt: detail.constraint_prompt, + few_shots_prompt: detail.few_shots_prompt, + business_logic_model_name: detail.business_logic_model_name ?? undefined, + business_logic_model_id: detail.business_logic_model_id ?? undefined, + enabled_tool_ids: enabledToolIds, + related_agent_ids: subAgentIds, + }); + + if (!createResult.success || !createResult.data?.agent_id) { + message.error( + createResult.message || t("agentConfig.agents.copyFailed") + ); + return; + } + const newAgentId = Number(createResult.data.agent_id); + + // Copy tool configuration + for (const tool of tools) { + if (!tool || tool.is_available === false) continue; + const params = + tool.initParams?.reduce((acc: Record, param: any) => { + acc[param.name] = param.value; + return acc; + }, {}) || {}; + try { + await updateToolConfig(Number(tool.id), newAgentId, params, true); + } catch (error) { + log.error("Failed to copy tool configuration:", error); + message.error(t("agentConfig.agents.copyFailed")); + return; + } + } + + // Refresh agent list + queryClient.invalidateQueries({ queryKey: ["agents"] }); + message.success(t("agentConfig.agents.copySuccess")); + + if (unavailableTools.length > 0) { + const names = + unavailableToolNames.join(", ") || + unavailableTools + .map((tool: any) => Number(tool?.id)) + .filter((id: number) => !Number.isNaN(id)) + .join(", "); + message.warning( + t("agentConfig.agents.copyUnavailableTools", { + count: unavailableTools.length, + names, + }) + ); + } + } catch (error) { + log.error("Failed to copy agent:", error); + message.error(t("agentConfig.agents.copyFailed")); + } + }; + + // Handle copy with confirmation + const handleCopyAgentWithConfirm = (agent: Agent) => { + confirm.confirm({ + title: t("agentConfig.agents.copyConfirmTitle"), + content: t("agentConfig.agents.copyConfirmContent", { + name: agent?.display_name || agent?.name || "", + }), + onOk: () => handleCopyAgent(agent), + }); + }; + + // Handle delete agent + const handleDeleteAgent = async (agent: Agent) => { + deleteAgentMutation.mutate(Number(agent.id), { + onSuccess: () => { + message.success( + t("businessLogic.config.error.agentDeleteSuccess", { + name: agent.display_name || agent.name || "", + }) + ); + + // Clear current agent if this was the selected agent + if ( + currentAgentId !== null && + String(currentAgentId) === String(agent.id) + ) { + setCurrentAgent(null); + } + + // Refresh agent list + queryClient.invalidateQueries({ queryKey: ["agents"] }); + }, + onError: () => { + message.error(t("businessLogic.config.error.agentDeleteFailed")); + }, + }); + }; + + // Handle delete with confirmation + const handleDeleteAgentWithConfirm = (agent: Agent) => { + confirm.confirm({ + title: t("businessLogic.config.modal.deleteTitle"), + content: t("businessLogic.config.modal.deleteContent", { + name: agent.display_name || agent.name || "", + }), + onOk: () => handleDeleteAgent(agent), + }); + }; + + // Handle select agent from dropdown + const handleSelectAgent = async (agentId: number | null) => { + if (agentId === null) return; + + const agent = agents.find((a: Agent) => String(a.id) === String(agentId)); + if (!agent) return; + + // Clear NEW mark when agent is selected for editing + if (agent.is_new === true) { + try { + const res = await clearAgentNewMark(agent.id); + if (!res?.success) { + log.warn("Failed to clear NEW mark on select:", res); + queryClient.invalidateQueries({ queryKey: ["agents"] }); + } + } catch (err) { + log.error("Failed to clear NEW mark on select:", err); + } + } + + // Guard unsaved changes + if (currentAgentId !== null || isCreatingMode) { + const canSwitch = await checkUnsavedChanges.saveWithModal(); + if (!canSwitch) return; + } + + // Load and set agent + try { + const result = await searchAgentInfo(Number(agent.id)); + if (result.success && result.data) { + setCurrentAgent(result.data); + } else { + message.error(result.message || t("agentConfig.agents.detailsLoadFailed")); + } + } catch (error) { + log.error("Failed to load agent detail:", error); + message.error(t("agentConfig.agents.detailsLoadFailed")); + } + }; + + // Dropdown menu items (only agents) + const agentMenuItems = agents.flatMap((agent: Agent, index: number) => { + const isAvailable = agent.is_available !== false; + const displayName = agent.display_name || ""; + const name = agent.name || ""; + + const agentItem = { + key: `agent-${agent.id}`, + label: ( +
+ + {/* Row 1: Name + Status */} +
+
+ + {!isAvailable && ( + { + const reasons = agent.unavailable_reasons || []; + if (reasons.includes('agent_not_found')) { + return t('subAgentPool.tooltip.unavailableAgent'); + } else if (reasons.includes('tool_unavailable')) { + return t('toolPool.tooltip.unavailableTool'); + } else if (reasons.includes('duplicate_name')) { + return t('agent.error.nameExists', { name }); + } else if (reasons.includes('duplicate_display_name')) { + return t('agent.error.displayNameExists', { displayName }); + } else if (reasons.includes('model_unavailable')) { + return t('agent.error.modelUnavailable'); + } + return t('subAgentPool.tooltip.unavailableAgent'); + })()} + > + + + )} + {agent.is_new && ( + + + {t("space.new", "NEW")} + + + )} + {displayName && ( + {displayName} + )} + +
+ {agent.is_a2a_server && ( + + +
+
+
+ {/* Row 2: Description */} +
+ {agent.description} +
+
+
+ ), + onClick: () => handleSelectAgent(Number(agent.id)), + }; + + // Add divider after each item except the last one + const divider = index < agents.length - 1 + ? { key: `divider-${agent.id}`, type: 'divider' as const } + : null; + + return divider ? [agentItem, divider] : [agentItem]; + }); + + return ( + <> +
+ + {/* Left column: Agent Config */} + + triggerNode.parentNode as HTMLElement} + styles={{ + root: { + minWidth: 0, + width: '100%', + } + }} + > +
+
+ {hasUnsavedChanges && ( + + + + )} + {!hasUnsavedChanges && } +
+
+
+ {isCreatingMode + ? t("subAgentPool.button.create") + : currentAgent?.display_name || currentAgent?.name || t("agentConfig.agents.selectAgent")} +
+
+ {isCreatingMode + ? t("subAgentPool.button.create") + : currentAgent?.description || t("agentConfig.agents.noAgentSelected")} +
+
+ +
+
+ + + + {/* Right column: Agent Info */} + + {currentAgentId != null && agentInfo?.current_version_no !== 0 && total > 0 && ( + + + + + {agentVersionDetail?.version.version_name} + + + / {t("agent.version.totalVersions", { count: total ?? 0 })} + + + )} + {/* Right side: Agent count + Version management button */} + + {/* Create and Import buttons outside dropdown */} + + + + + + + + +
+ +
+ + {/* Import Wizard Modal */} + { + setImportWizardVisible(false); + setImportWizardData(null); + }} + initialData={importWizardData} + onImportComplete={() => { + setImportWizardVisible(false); + setImportWizardData(null); + queryClient.invalidateQueries({ queryKey: ["agents"] }); + }} + /> + + {/* Call Relationship Modal */} + {selectedAgentForRelationship && ( + + )} + + {/* A2A Server Settings Modal */} + { + setShowA2ASettings(false); + setSelectedAgentForA2A(null); + }} + loading={isLoadingA2ASettings} + footer={null} + zIndex={1050} + > + {selectedAgentForA2A && constructedA2AAgentCard ? ( + + ) : ( +
+ {t("a2a.service.getServerSettingsFailed", "Failed to load A2A settings")} +
+ )} +
+ + ); +} diff --git a/frontend/app/[locale]/agents/components/agentConfig/CollaborativeAgent.tsx b/frontend/app/[locale]/agents/components/agentConfig/CollaborativeAgent.tsx index 9c664e8c3..d3090b369 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/CollaborativeAgent.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/CollaborativeAgent.tsx @@ -16,7 +16,6 @@ export default function CollaborativeAgent() { const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); - const currentAgentPermission = useAgentConfigStore((state) => state.currentAgentPermission); const editedAgent = useAgentConfigStore((state) => state.editedAgent); const updateSubAgentIds = useAgentConfigStore((state) => state.updateSubAgentIds); const updateExternalSubAgentIds = useAgentConfigStore((state) => state.updateExternalSubAgentIds); @@ -35,7 +34,8 @@ export default function CollaborativeAgent() { (agent: A2AExternalAgent) => externalSubAgentIdList.includes(agent.id) ); - const editable = !!isCreatingMode || (currentAgentId != null && currentAgentPermission !== "READ_ONLY"); + // isReadOnly from store: isCreatingMode → false, READ_ONLY permission → true + const isReadOnly = useAgentConfigStore((state) => state.isReadOnly()); // Related internal agent IDs const relatedAgentIds = Array.isArray(editedAgent?.sub_agent_id_list) ? editedAgent.sub_agent_id_list : []; @@ -93,6 +93,8 @@ export default function CollaborativeAgent() { const result = await a2aClientService.addRelation(Number(currentAgentId), externalAgentId); if (result.success) { messageApi.success(t("a2a.service.addRelationSuccess")); + // Sync the store so save() sends the updated external_sub_agent_id_list + updateExternalSubAgentIds([...externalSubAgentIdList, externalAgentId]); loadExternalRelatedAgents(); } else { messageApi.error(result.message || t("a2a.service.addRelationFailed")); @@ -117,6 +119,8 @@ export default function CollaborativeAgent() { const result = await a2aClientService.removeRelation(Number(currentAgentId), agentId); if (result.success) { messageApi.success(t("a2a.service.removeRelationSuccess")); + // Sync the store so save() sends the updated external_sub_agent_id_list + updateExternalSubAgentIds(externalSubAgentIdList.filter((id) => id !== agentId)); loadExternalRelatedAgents(); } else { messageApi.error(result.message || t("a2a.service.removeRelationFailed")); @@ -163,14 +167,14 @@ export default function CollaborativeAgent() {
@@ -183,8 +187,8 @@ export default function CollaborativeAgent() { {relatedInternalAgents.map((agent: Agent) => ( handleRemoveInternalAgent(Number(agent.id)) : undefined} + closable={!isReadOnly} + onClose={!isReadOnly ? () => handleRemoveInternalAgent(Number(agent.id)) : undefined} className="bg-blue-50 text-blue-700 border-blue-200" > {agent.display_name || agent.name} @@ -199,8 +203,8 @@ export default function CollaborativeAgent() { {displayExternalAgents.map((agent) => ( handleRemoveExternalAgent(agent.id) : undefined} + closable={!isReadOnly} + onClose={!isReadOnly ? () => handleRemoveExternalAgent(agent.id) : undefined} className="bg-green-50 text-green-700 border-green-200" > diff --git a/frontend/app/[locale]/agents/components/agentConfig/McpConfigModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/McpConfigModal.tsx index fc14a89af..277e85d3d 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/McpConfigModal.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/McpConfigModal.tsx @@ -16,6 +16,7 @@ import { App, Upload, Tabs, + Tag, } from "antd"; import { Trash, @@ -88,6 +89,7 @@ export default function McpConfigModal({ const [newServerName, setNewServerName] = useState(""); const [newServerUrl, setNewServerUrl] = useState(""); const [newServerAuthorizationToken, setNewServerAuthorizationToken] = useState(""); + const [newServerCustomHeaders, setNewServerCustomHeaders] = useState(""); const [toolsModalVisible, setToolsModalVisible] = useState(false); const [currentServerTools, setCurrentServerTools] = useState([]); @@ -104,6 +106,7 @@ export default function McpConfigModal({ const [containerPort, setContainerPort] = useState( undefined ); + const [containerServiceName, setContainerServiceName] = useState(""); const [logsModalVisible, setLogsModalVisible] = useState(false); const [currentContainerId, setCurrentContainerId] = useState(""); @@ -172,16 +175,33 @@ export default function McpConfigModal({ return; } + // Parse custom headers + let parsedCustomHeaders: Record | null = null; + if (newServerCustomHeaders.trim()) { + try { + parsedCustomHeaders = JSON.parse(newServerCustomHeaders.trim()); + if (typeof parsedCustomHeaders !== 'object' || parsedCustomHeaders === null || Array.isArray(parsedCustomHeaders)) { + message.error(t("mcpConfig.message.invalidCustomHeaders")); + return; + } + } catch { + message.error(t("mcpConfig.message.invalidCustomHeadersJson")); + return; + } + } + setAddingServer(true); const result = await handleAddServer( newServerUrl.trim(), serverName, - newServerAuthorizationToken.trim() || null + newServerAuthorizationToken.trim() || null, + parsedCustomHeaders ); if (result.success) { setNewServerName(""); setNewServerUrl(""); setNewServerAuthorizationToken(""); + setNewServerCustomHeaders(""); message.success(result.messageKey ? t(result.messageKey) : t("mcpService.message.addServerSuccess")); } else { message.error(result.messageKey ? t(result.messageKey) : (result.message || t("mcpConfig.message.addServerFailed"))); @@ -278,6 +298,7 @@ export default function McpConfigModal({ service_name: result.data.mcp_name, mcp_url: result.data.mcp_server, authorization_token: result.data.authorization_token, + custom_headers: result.data.custom_headers, }); } else { message.error(result.messageKey ? t(result.messageKey) : (result.message || t("mcpConfig.message.getMcpRecordFailed"))); @@ -286,7 +307,7 @@ export default function McpConfigModal({ setLoadingMcpRecord(false); }; - const onSaveEditedServer = async (name: string, url: string, authorizationToken?: string | null) => { + const onSaveEditedServer = async (name: string, url: string, authorizationToken?: string | null, customHeaders?: Record | null) => { if (!editingServer) return; if (!name.trim() || !url.trim()) { message.error(t("mcpConfig.message.nameAndUrlRequired")); @@ -306,11 +327,11 @@ export default function McpConfigModal({ setUpdatingServer(true); const result = await handleUpdateServer( - editingServer.service_name, - editingServer.mcp_url, + editingServer.mcp_id, name.trim(), url.trim(), - authorizationToken + authorizationToken, + customHeaders ); if (result.success) { setEditServerModalVisible(false); @@ -347,12 +368,13 @@ export default function McpConfigModal({ } setAddingContainer(true); - const result = await handleAddContainer(config, containerPort); + const result = await handleAddContainer(config, containerPort, containerServiceName.trim() || undefined); if (!result.success) { message.error(result.messageKey ? t(result.messageKey) : (result.message || t("mcpConfig.message.addContainerFailed"))); } else { setContainerConfigJson(""); setContainerPort(undefined); + setContainerServiceName(""); message.success(result.messageKey ? t(result.messageKey) : t("mcpService.message.addContainerSuccess")); } setAddingContainer(false); @@ -561,9 +583,28 @@ export default function McpConfigModal({ title: t("mcpConfig.serverList.column.url"), dataIndex: "mcp_url", key: "mcp_url", - width: "40%", + width: "30%", ellipsis: true, }, + { + title: t("mcpConfig.serverList.column.enabled"), + key: "enabled", + width: "10%", + render: (_: any, record: any) => { + const isEnabled = record.enabled; + return isEnabled ? ( + + {t("mcpConfig.serverList.enabled.yes")} + + ) : ( + + + {t("mcpConfig.serverList.enabled.no")} + + + ); + }, + }, { title: t("mcpConfig.serverList.column.action"), key: "action", @@ -831,7 +872,7 @@ export default function McpConfigModal({ children: ( - +
+ setNewServerCustomHeaders(e.target.value)} + rows={2} + disabled={actionsLocked || addingServer} + style={{ fontSize: 14 }} + />
+ {t("mcpConfig.addContainer.serviceName")}: + + setContainerServiceName(e.target.value)} + style={{ width: 150 }} + maxLength={20} + disabled={actionsLocked} + /> + {t("mcpConfig.addContainer.port")}: @@ -1226,7 +1288,6 @@ export default function McpConfigModal({ size="small" pagination={false} locale={{ emptyText: t("mcpConfig.serverList.empty") }} - scroll={{ y: 300 }} style={{ width: "100%" }} />
@@ -1253,7 +1314,6 @@ export default function McpConfigModal({ size="small" pagination={false} locale={{ emptyText: t("mcpConfig.containerList.empty") }} - scroll={{ y: 300 }} style={{ width: "100%" }} />
@@ -1277,7 +1337,6 @@ export default function McpConfigModal({ size="small" pagination={false} locale={{ emptyText: t("mcpConfig.openapiService.list.empty") }} - scroll={{ y: 300 }} style={{ width: "100%" }} />
@@ -1304,6 +1363,7 @@ export default function McpConfigModal({ initialName={editingServer?.service_name || ""} initialUrl={editingServer?.mcp_url || ""} initialAuthorizationToken={editingServer?.authorization_token || null} + initialCustomHeaders={editingServer?.custom_headers || null} loading={updatingServer || loadingMcpRecord} /> diff --git a/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx index 81704ac68..7f969edb9 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/SkillBuildModal.tsx @@ -53,6 +53,7 @@ import { import { fetchSkillFiles, fetchSkillFileContent, + SkillFilesAccessDeniedError, type SkillFileNode, } from "@/services/agentConfigService"; import { MarkdownRenderer } from "@/components/ui/markdownRenderer"; @@ -520,6 +521,10 @@ export default function SkillBuildModal({ setActiveSkillTab("SKILL.md"); } catch (error) { log.error("Failed to load skill files:", error); + if (error instanceof SkillFilesAccessDeniedError) { + message.warning(error.message); + return; + } // Fallback to basic content const skill = allSkills.find((s) => s.name === skillName); if (skill?.content) { diff --git a/frontend/app/[locale]/agents/components/agentConfig/SkillDetailModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/SkillDetailModal.tsx index 075229d57..4161a3b1a 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/SkillDetailModal.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/SkillDetailModal.tsx @@ -2,10 +2,14 @@ import { useState, useEffect } from "react"; import { useTranslation } from "react-i18next"; -import { Modal, Descriptions, Tag, Tree } from "antd"; +import { Alert, Modal, Descriptions, Tag, Tree } from "antd"; import type { TreeProps } from "antd/es/tree"; import { Skill } from "@/types/agentConfig"; -import { fetchSkillFiles, fetchSkillFileContent } from "@/services/agentConfigService"; +import { + fetchSkillFiles, + fetchSkillFileContent, + SkillFilesAccessDeniedError, +} from "@/services/agentConfigService"; import { MarkdownRenderer } from "@/components/ui/markdownRenderer"; import { buildTreeData, @@ -19,6 +23,7 @@ import { } from "@/lib/skillFileUtils"; import type { ExtendedSkillFileNode } from "@/types/skill"; import { SKILL_DETAIL_CONTENT_HEIGHT } from "@/types/skill"; +import log from "@/lib/logger"; interface SkillDetailModalProps { skill: Skill | null; @@ -35,6 +40,7 @@ export default function SkillDetailModal({ skill, open, onClose }: SkillDetailMo const [loadingContent, setLoadingContent] = useState(false); const [loadingTree, setLoadingTree] = useState(false); const [expandedKeys, setExpandedKeys] = useState([]); + const [fileTreeMessage, setFileTreeMessage] = useState(null); useEffect(() => { if (skill && open) { @@ -51,6 +57,7 @@ export default function SkillDetailModal({ skill, open, onClose }: SkillDetailMo const loadSkillFiles = async () => { if (!skill) return; setLoadingTree(true); + setFileTreeMessage(null); try { const files = await fetchSkillFiles(skill.name); const normalizedFiles = normalizeSkillFiles(files); @@ -59,7 +66,11 @@ export default function SkillDetailModal({ skill, open, onClose }: SkillDetailMo setTreeData(built); setExpandedKeys(collectDirKeys(built)); } catch (error) { - console.error("Failed to load skill files:", error); + if (error instanceof SkillFilesAccessDeniedError) { + setFileTreeMessage(error.message); + } else { + log.error("Failed to load skill files:", error); + } setTreeData([]); } finally { setLoadingTree(false); @@ -76,7 +87,7 @@ export default function SkillDetailModal({ skill, open, onClose }: SkillDetailMo const content = await fetchSkillFileContent(skill.name, relativePath); setFileContent(content || ""); } catch (error) { - console.error("Failed to load file content:", error); + log.error("Failed to load file content:", error); setFileContent(""); } finally { setLoadingContent(false); @@ -88,6 +99,7 @@ export default function SkillDetailModal({ skill, open, onClose }: SkillDetailMo setFileContent(""); setTreeData([]); setExpandedKeys([]); + setFileTreeMessage(null); onClose(); }; @@ -249,6 +261,13 @@ export default function SkillDetailModal({ skill, open, onClose }: SkillDetailMo
{t("common.loading")}
+ ) : fileTreeMessage ? ( + ) : treeData.length > 0 ? ( state.currentAgentPermission - ); - - const isReadOnly = !isCreatingMode && currentAgentId !== undefined && currentAgentPermission === "READ_ONLY"; - - const editable = (currentAgentId || isCreatingMode) && !isReadOnly; + // Use prop if provided, otherwise fall back to store + const storeIsReadOnly = useAgentConfigStore((state) => state.isReadOnly()); + const isReadOnly = isReadOnlyProp ?? storeIsReadOnly; const originalSelectedSkills = useAgentConfigStore( (state) => state.editedAgent.skills @@ -47,6 +47,9 @@ export default function SkillManagement({ const [activeTabKey, setActiveTabKey] = useState(""); const [selectedSkill, setSelectedSkill] = useState(null); const [isDetailModalOpen, setIsDetailModalOpen] = useState(false); + const [configModalSkill, setConfigModalSkill] = useState(null); + const [configModalOpen, setConfigModalOpen] = useState(false); + const [skillInstanceMap, setSkillInstanceMap] = useState>>({}); useEffect(() => { if (groupedSkills.length > 0 && !activeTabKey) { @@ -54,8 +57,38 @@ export default function SkillManagement({ } }, [groupedSkills, activeTabKey]); + // Fetch per-agent skill instances to get saved config_values + useEffect(() => { + if (!currentAgentId || isCreatingMode) { + setSkillInstanceMap({}); + return; + } + + let cancelled = false; + (async () => { + try { + const result = await fetchSkillInstances(Number(currentAgentId), 0); + if (result.success && result.data) { + const map: Record> = {}; + for (const instance of result.data) { + if (instance.config_values && typeof instance.config_values === "object") { + map[instance.skill_id] = instance.config_values; + } + } + if (!cancelled) { + setSkillInstanceMap(map); + } + } + } catch (err) { + log.error("Failed to fetch skill instances:", err); + } + })(); + + return () => { cancelled = true; }; + }, [currentAgentId, isCreatingMode]); + const handleSkillClick = (skill: Skill) => { - if (!editable || isReadOnly) return; + if (isReadOnly) return; const currentSkills = useAgentConfigStore.getState().editedAgent.skills; const isCurrentlySelected = currentSkills.some( @@ -68,8 +101,36 @@ export default function SkillManagement({ ); updateSkills(newSelectedSkills); } else { - const newSelectedSkills = [...currentSkills, skill]; - updateSkills(newSelectedSkills); + // In uninstantiated mode, skillInstanceMap is empty — preserve skill.config_values (template defaults) + const savedConfigValues = skillInstanceMap[skill.skill_id] || null; + const skillWithValues: Skill = { + ...skill, + config_values: savedConfigValues !== null ? savedConfigValues : (skill.config_values || {}), + }; + + // Check if skill has required params (optional: false) without saved values. + // In uninstantiated mode, fall back to skill.config_values (template defaults). + const effectiveConfigValues = savedConfigValues !== null ? savedConfigValues : (skill.config_values || {}); + const hasRequiredParams = (skill.config_schemas || []).some( + (schema: SkillParam) => + schema.required && + (effectiveConfigValues[schema.name] === undefined || + effectiveConfigValues[schema.name] === null || + effectiveConfigValues[schema.name] === "") + ); + + // Special case: search-knowledge-base always opens the config modal for mandatory KB selection. + const isKnowledgeBaseSkill = skill.name === "search-knowledge-base"; + + if (hasRequiredParams || isKnowledgeBaseSkill) { + // Force open config modal + setConfigModalSkill(skillWithValues); + setConfigModalOpen(true); + } else { + // No required params missing — add directly to selected skills + const newSelectedSkills = [...currentSkills, skillWithValues]; + updateSkills(newSelectedSkills); + } } }; @@ -98,6 +159,53 @@ export default function SkillManagement({ }); }; + const handleConfigClick = (skill: Skill, e: React.MouseEvent) => { + e.stopPropagation(); + const savedConfigValues = skillInstanceMap[skill.skill_id] || null; + // In uninstantiated mode, skillInstanceMap is empty — preserve skill.config_values (template defaults) + setConfigModalSkill({ + ...skill, + config_values: savedConfigValues !== null ? savedConfigValues : (skill.config_values || {}), + }); + setConfigModalOpen(true); + }; + + const handleSkillConfigSave = (skill: Skill, savedParams: SkillParam[]) => { + // Build the config_values dict from saved params + const configValues: Record = {}; + for (const p of savedParams) { + configValues[p.name] = p.value; + } + + // Update skillInstanceMap so the map stays in sync with saved data + setSkillInstanceMap((prev) => ({ + ...prev, + [skill.skill_id]: configValues, + })); + + // Update the skill in the edited agent's skills list with the new params + const currentSkills = useAgentConfigStore.getState().editedAgent.skills; + const existingIndex = currentSkills.findIndex( + (s) => s.skill_id === skill.skill_id + ); + + const updatedSkill: Skill = { + ...skill, + config_values: configValues, + }; + + let updatedSkills: Skill[]; + if (existingIndex >= 0) { + // Replace existing entry with updated config + updatedSkills = [...currentSkills]; + updatedSkills[existingIndex] = updatedSkill; + } else { + // Skill not yet in list — add it (came from forced modal open) + updatedSkills = [...currentSkills, updatedSkill]; + } + updateSkills(updatedSkills); + }; + const tabItems = skillGroups.map((group) => { return { key: group.key, @@ -106,7 +214,7 @@ export default function SkillManagement({ {group.skills.map((skill) => { const isSelected = originalSelectedSkillIdsSet.has(skill.skill_id); - const isDisabled = isReadOnly; + const hasConfigurableParams = + Array.isArray(skill.config_schemas) && skill.config_schemas.length > 0; return (
handleSkillClick(skill)} > {skill.name}
+ {isSelected && hasConfigurableParams && ( + handleConfigClick(skill, e)} + /> + )} +
{skillGroups.length === 0 ? ( -
+
{t("skillPool.noSkills")}
) : ( @@ -179,8 +295,8 @@ export default function SkillManagement({ height: "100%", }} tabBarStyle={{ - minWidth: "80px", - maxWidth: "100px", + minWidth: "120px", + maxWidth: "120px", padding: "4px 0", margin: 0, }} @@ -195,6 +311,25 @@ export default function SkillManagement({ setSelectedSkill(null); }} /> + + {configModalSkill && ( + { + setConfigModalOpen(false); + setConfigModalSkill(null); + }} + onSave={(params) => { + if (configModalSkill) { + handleSkillConfigSave(configModalSkill, params); + } + }} + skill={configModalSkill} + initialParams={configModalSkill.config_schemas || []} + currentAgentId={currentAgentId} + isCreatingMode={isCreatingMode} + /> + )}
); } diff --git a/frontend/app/[locale]/agents/components/agentConfig/ToolManagement.tsx b/frontend/app/[locale]/agents/components/agentConfig/ToolManagement.tsx index 909592345..993795c98 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/ToolManagement.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/ToolManagement.tsx @@ -18,6 +18,7 @@ interface ToolManagementProps { toolGroups: ToolGroup[]; isCreatingMode?: boolean; currentAgentId?: number | undefined; + isReadOnly?: boolean; } // Tool types that require knowledge base selection @@ -34,11 +35,17 @@ const TOOLS_REQUIRING_EMBEDDING = [ "knowledge_base_search", ]; -// Tool types that require VLM model -const TOOLS_REQUIRING_VLM = [ +// Tool types that require the image understanding model +const TOOLS_REQUIRING_IMAGE_UNDERSTANDING = [ "analyze_image", ]; +// Tool types that require the video understanding model +const TOOLS_REQUIRING_VIDEO_UNDERSTANDING = [ + "analyze_audio", + "analyze_video", +]; + function getToolKbType( toolName: string ): "knowledge_base_search" | "dify_search" | "datamate_search" | "idata_search" | "haotian_search" | null { @@ -53,9 +60,18 @@ function getToolKbType( /** * Check if a tool requires VLM model but VLM is not available */ -function isToolDisabledDueToVlm(toolName: string, vlmAvailable: boolean): boolean { - if (!TOOLS_REQUIRING_VLM.includes(toolName)) return false; - return !vlmAvailable; +function isToolDisabledDueToVlm( + toolName: string, + imageUnderstandingAvailable: boolean, + videoUnderstandingAvailable: boolean +): boolean { + if (TOOLS_REQUIRING_IMAGE_UNDERSTANDING.includes(toolName)) { + return !imageUnderstandingAvailable; + } + if (TOOLS_REQUIRING_VIDEO_UNDERSTANDING.includes(toolName)) { + return !videoUnderstandingAvailable; + } + return false; } /** @@ -74,20 +90,15 @@ export default function ToolManagement({ toolGroups, isCreatingMode, currentAgentId, + isReadOnly: isReadOnlyProp, }: ToolManagementProps) { const { t } = useTranslation("common"); const queryClient = useQueryClient(); const { confirm } = useConfirmModal(); - // Get current agent permission from store - const currentAgentPermission = useAgentConfigStore( - (state) => state.currentAgentPermission - ); - - // Check if current agent is read-only (only when agent is selected and permission is READ_ONLY) - const isReadOnly = !isCreatingMode && currentAgentId !== undefined && currentAgentPermission === "READ_ONLY"; - - const editable = (currentAgentId || isCreatingMode) && !isReadOnly; + // Use prop if provided, otherwise fall back to store + const storeIsReadOnly = useAgentConfigStore((state) => state.isReadOnly()); + const isReadOnly = isReadOnlyProp ?? storeIsReadOnly; // Get state from store const originalSelectedTools = useAgentConfigStore( @@ -102,7 +113,11 @@ export default function ToolManagement({ // Use tool list hook for data management const { availableTools } = useToolList(); - const { isVlmAvailable, isEmbeddingAvailable } = useConfig(); + const { + isImageUnderstandingAvailable, + isVideoUnderstandingAvailable, + isEmbeddingAvailable, + } = useConfig(); // Prefetch knowledge bases for KB tools const { prefetchKnowledgeBases } = usePrefetchKnowledgeBases(); @@ -304,7 +319,7 @@ export default function ToolManagement({ VLM > Embedding @@ -380,9 +399,9 @@ export default function ToolManagement({ isSelected ? "bg-blue-100 border-blue-400 shadow-md" : "border-gray-200 hover:border-blue-300 hover:shadow-md" - } ${editable && !isDisabled ? "cursor-pointer" : "cursor-not-allowed opacity-60"}`} + } ${!isReadOnly && !isDisabled ? "cursor-pointer" : "cursor-not-allowed opacity-60"}`} onClick={ - editable && !isDisabled + !isReadOnly && !isDisabled ? () => handleToolClick(tool.id) : undefined } @@ -428,9 +447,9 @@ export default function ToolManagement({
{ e.stopPropagation(); handleToolSettingsClick(tool); @@ -467,7 +486,11 @@ export default function ToolManagement({ > {group.tools.map((tool) => { const isSelected = originalSelectedToolIdsSet.has(tool.id); - const isDisabledDueToVlm = isToolDisabledDueToVlm(tool.name, isVlmAvailable); + const isDisabledDueToVlm = isToolDisabledDueToVlm( + tool.name, + isImageUnderstandingAvailable, + isVideoUnderstandingAvailable + ); const isDisabledDueToEmbedding = isToolDisabledDueToEmbedding(tool.name, isEmbeddingAvailable); const isDisabled = isDisabledDueToVlm || isDisabledDueToEmbedding || isReadOnly; // Tooltip priority: permission > VLM > Embedding @@ -485,9 +508,9 @@ export default function ToolManagement({ isSelected ? "bg-blue-100 border-blue-400 shadow-md" : "border-gray-200 hover:border-blue-300 hover:shadow-md" - } ${editable && !isDisabled ? "cursor-pointer" : "cursor-not-allowed opacity-60"}`} + } ${!isReadOnly && !isDisabled ? "cursor-pointer" : "cursor-not-allowed opacity-60"}`} onClick={ - editable && !isDisabled ? () => handleToolClick(tool.id) : undefined + !isReadOnly && !isDisabled ? () => handleToolClick(tool.id) : undefined } >
@@ -531,9 +554,9 @@ export default function ToolManagement({
{ e.stopPropagation(); handleToolSettingsClick(tool); @@ -575,8 +598,8 @@ export default function ToolManagement({ height: "100%", }} tabBarStyle={{ - minWidth: "80px", - maxWidth: "100px", + minWidth: "120px", + maxWidth: "120px", padding: "4px 0", margin: 0, }} diff --git a/frontend/app/[locale]/agents/components/agentConfig/skill/SkillConfigModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/skill/SkillConfigModal.tsx new file mode 100644 index 000000000..6f372e2b4 --- /dev/null +++ b/frontend/app/[locale]/agents/components/agentConfig/skill/SkillConfigModal.tsx @@ -0,0 +1,652 @@ +"use client"; + +import { useState, useEffect, useMemo, useCallback } from "react"; +import { useTranslation } from "react-i18next"; +import { + Modal, + Form, + Input, + Switch, + InputNumber, + Button, + message, + Tag, + Skeleton, +} from "antd"; +import { Settings } from "lucide-react"; +import { CloseOutlined } from "@ant-design/icons"; + +import { Skill, SkillParam } from "@/types/agentConfig"; +import { KnowledgeBase } from "@/types/knowledgeBase"; +import { Tooltip } from "@/components/ui/tooltip"; +import { saveSkillInstance } from "@/services/agentConfigService"; +import KnowledgeBaseSelectorModal from "@/components/tool-config/KnowledgeBaseSelectorModal"; +import { + getToolTypeForSkill, + skillRequiresKbSelection as checkSkillRequiresKb, + getKbParamNameForSkill, + ToolKbType, +} from "@/components/tool-config"; +import { useKnowledgeBasesForToolConfig, useSyncKnowledgeBases } from "@/hooks/useKnowledgeBaseSelector"; +import log from "@/lib/logger"; +import { isZhLocale, getKbDisplayName, mapKbIdsToDisplayNames, parseKbIds } from "@/lib/utils"; + +export interface SkillConfigModalProps { + isOpen: boolean; + onCancel: () => void; + onSave?: (params: SkillParam[]) => void; + skill: Skill; + initialParams: SkillParam[]; + currentAgentId?: number; + isCreatingMode?: boolean; +} + +function extractDefaultValue(value: any, type: string): any { + if (value !== undefined && value !== null) return value; + switch (type) { + case "string": + case "Optional": + return ""; + case "number": + return undefined; + case "boolean": + return false; + case "array": + return []; + case "object": + return {}; + default: + return undefined; + } +} + +export default function SkillConfigModal({ + isOpen, + onCancel, + onSave, + skill, + initialParams, + currentAgentId, + isCreatingMode, +}: SkillConfigModalProps) { + const [form] = Form.useForm(); + const [isLoading, setIsLoading] = useState(false); + const [currentParams, setCurrentParams] = useState([]); + const { t } = useTranslation("common"); + const isZh = isZhLocale(); + + // Check if this skill requires knowledge base selection (has index_names or dataset_ids param) + const skillRequiresKbSelection = useMemo(() => { + return checkSkillRequiresKb(initialParams || []); + }, [initialParams]); + + // Derive the correct toolType based on skill name + const skillToolType = useMemo((): ToolKbType => { + return getToolTypeForSkill(skill?.name || ""); + }, [skill?.name]); + + // Get the KB param name for the current skill (index_names or dataset_ids) + const kbParamName = useMemo(() => { + return getKbParamNameForSkill(skill?.name || ""); + }, [skill?.name]); + + // Compute the set of param indices that should be visible, based on depends_on. + // A param is hidden when its dependency's current value is falsy. + const visibleIndices = useMemo>(() => { + const hidden = new Set(); + currentParams.forEach((param, idx) => { + if (param.depends_on) { + const depIdx = currentParams.findIndex((p) => p.name === param.depends_on); + if (depIdx !== -1) { + const depVal = currentParams[depIdx].value; + if (!depVal) { + hidden.add(idx); + } + } + } + }); + return new Set( + currentParams.map((_, i) => i).filter((i) => !hidden.has(i)) + ); + }, [currentParams]); + + // Knowledge base selector state + const [kbSelectorVisible, setKbSelectorVisible] = useState(false); + const [currentKbParamIndex, setCurrentKbParamIndex] = useState(null); + const [selectedKbIds, setSelectedKbIds] = useState([]); + const [selectedKbDisplayNames, setSelectedKbDisplayNames] = useState([]); + const [hasSubmitted, setHasSubmitted] = useState(false); + + // Fetch knowledge bases based on skill tool type + const { + data: knowledgeBases = [], + isLoading: kbLoading, + refetch: refetchKnowledgeBases, + } = useKnowledgeBasesForToolConfig(skillToolType); + + // Sync knowledge bases based on skill tool type + const { syncKnowledgeBases, isSyncing } = useSyncKnowledgeBases(); + + // Sync selectedKbDisplayNames when knowledgeBases or selectedKbIds changes + useEffect(() => { + if (selectedKbIds.length > 0 && knowledgeBases.length > 0) { + setSelectedKbDisplayNames(mapKbIdsToDisplayNames(selectedKbIds, knowledgeBases)); + } + }, [knowledgeBases, selectedKbIds]); + + // Reset state when modal opens + useEffect(() => { + if (isOpen) { + setSelectedKbIds([]); + setSelectedKbDisplayNames([]); + setHasSubmitted(false); + setKbSelectorVisible(false); + setCurrentKbParamIndex(null); + } + }, [isOpen]); + useEffect(() => { + if (selectedKbIds.length > 0 && knowledgeBases.length > 0) { + const validKbIds = selectedKbIds.filter((id) => + knowledgeBases.some((kb) => String(kb.id).trim() === String(id).trim()) + ); + if (validKbIds.length !== selectedKbIds.length) { + setSelectedKbIds(validKbIds); + setSelectedKbDisplayNames(mapKbIdsToDisplayNames(validKbIds, knowledgeBases)); + } + } + }, [knowledgeBases, selectedKbIds]); + + // Build currentParams: merge saved config_values with schema defaults. + // config_values from the database (skill.config_values) takes precedence over schema defaults. + useEffect(() => { + if (!isOpen) return; + + const schema = initialParams && Array.isArray(initialParams) ? initialParams : []; + + // Saved config_values from database (per-agent instance values) + const savedConfigValues = + skill.config_values && typeof skill.config_values === "object" + ? skill.config_values + : {}; + + const merged: SkillParam[] = schema.map((param) => { + if (savedConfigValues[param.name] !== undefined) { + return { ...param, value: savedConfigValues[param.name] }; + } + return { ...param, value: extractDefaultValue(param.value, param.type) }; + }); + + setCurrentParams(merged); + + // Initialize form with indexed field names + const formValues: Record = {}; + merged.forEach((param, index) => { + formValues[`param_${index}`] = param.value; + }); + form.setFieldsValue(formValues); + + // Parse initial knowledge base IDs from the relevant param (index_names or dataset_ids) + if (skillRequiresKbSelection && kbParamName) { + const kbParam = merged.find((p) => p.name === kbParamName); + if (kbParam?.value) { + const ids = parseKbIds(kbParam.value); + if (ids.length > 0) { + setSelectedKbIds(ids); + } + } + } + }, [isOpen, initialParams, skill.config_values, form, skillRequiresKbSelection, kbParamName]); + + // Watch all form values and sync to currentParams + const formValues = Form.useWatch([], form); + useEffect(() => { + if (!formValues) return; + const newParams = [...currentParams]; + Object.entries(formValues).forEach(([fieldName, value]) => { + const index = parseInt(fieldName.replace("param_", "")); + if (!isNaN(index) && newParams[index]) { + // Skip knowledge base selector field (controlled by selectedKbIds) + if (newParams[index].name === kbParamName) { + return; + } + newParams[index] = { ...newParams[index], value }; + } + }); + setCurrentParams(newParams); + }, [formValues]); + + const handleSave = async () => { + if (!currentAgentId && !isCreatingMode) { + message.error(t("agentConfig.skill.noAgentSelected")); + return; + } + + setIsLoading(true); + setHasSubmitted(true); + try { + // Force sync form values before validation + const latestFormValues = form.getFieldsValue(); + if (latestFormValues) { + const newParams = [...currentParams]; + Object.entries(latestFormValues).forEach(([fieldName, value]) => { + const index = parseInt(fieldName.replace("param_", "")); + if (!isNaN(index) && newParams[index]) { + newParams[index] = { ...newParams[index], value }; + } + }); + setCurrentParams(newParams); + } + + // Check if knowledge base selector has valid selection + if (skillRequiresKbSelection && selectedKbIds.length === 0) { + const kbParam = currentParams.find( + (p) => p.required && p.name === kbParamName + ); + if (kbParam) { + message.error(t("toolConfig.validation.selectKb")); + setIsLoading(false); + return; + } + } + + await form.validateFields(); + + const paramsToSave = currentParams.map((param) => ({ + ...param, + value: param.value, + })); + + const configValues = paramsToSave.reduce>((acc, p) => { + acc[p.name] = p.value; + return acc; + }, {}); + + if (!isCreatingMode && currentAgentId) { + const result = await saveSkillInstance( + Number(skill.skill_id), + Number(currentAgentId), + true, + 0, + configValues + ); + + if (!result.success) { + message.error(result.message || t("agentConfig.skill.saveFailed")); + setIsLoading(false); + return; + } + } + + if (onSave) { + onSave(paramsToSave); + } + message.success(t("toolConfig.message.saveSuccess")); + onCancel(); + } catch { + // Validation failed - error shown by antd Form + } finally { + setIsLoading(false); + } + }; + + const getLocalizedDescription = useCallback( + (param: SkillParam) => { + return isZh ? param.description_zh || param.description_en : param.description_en; + }, + [isZh] + ); + + // Open knowledge base selector for index_names parameter + const openKbSelector = (paramIndex: number) => { + setCurrentKbParamIndex(paramIndex); + setKbSelectorVisible(true); + }; + + // Handle knowledge base selection confirm + const handleKbConfirm = (selectedKnowledgeBases: KnowledgeBase[]) => { + const ids = selectedKnowledgeBases.map((kb) => kb.id); + const displayNames = selectedKnowledgeBases.map((kb) => getKbDisplayName(kb)); + + setSelectedKbIds(ids); + setSelectedKbDisplayNames(displayNames); + setHasSubmitted(false); + + // Update form value + if (currentKbParamIndex !== null) { + const param = currentParams[currentKbParamIndex]; + if (param) { + const formFieldName = `param_${currentKbParamIndex}`; + form.setFieldValue(formFieldName, ids); + + // Also update currentParams directly since Form.Item has no name for KB param + const updatedParams = [...currentParams]; + updatedParams[currentKbParamIndex] = { + ...updatedParams[currentKbParamIndex], + name: param.name, + value: ids, + }; + setCurrentParams(updatedParams); + } + } + + setKbSelectorVisible(false); + setCurrentKbParamIndex(null); + }; + + // Remove a single knowledge base from selection + const removeKbFromSelection = (indexToRemove: number, paramIndex: number) => { + const newIds = selectedKbIds.filter((_, i) => i !== indexToRemove); + const newDisplayNames = selectedKbDisplayNames.filter( + (_, i) => i !== indexToRemove + ); + + setSelectedKbIds(newIds); + setSelectedKbDisplayNames(newDisplayNames); + setHasSubmitted(false); + + // Update form value + const formFieldName = `param_${paramIndex}`; + form.setFieldValue(formFieldName, newIds); + + // Also update currentParams directly + const updatedParams = [...currentParams]; + if (updatedParams[paramIndex]) { + updatedParams[paramIndex] = { + ...updatedParams[paramIndex], + value: newIds, + }; + setCurrentParams(updatedParams); + } + }; + + // Render knowledge base selector input (clickable input that opens selector modal) + const renderKbSelectorInput = useCallback( + (param: SkillParam, index: number) => { + const fieldName = `param_${index}`; + const formValue = form.getFieldValue(fieldName); + + // Get display names based on current form value and knowledgeBases + let displayNames: string[] = []; + let ids: string[] = []; + if (formValue) { + ids = parseKbIds(formValue); + + if (ids.length > 0 && knowledgeBases.length > 0) { + displayNames = mapKbIdsToDisplayNames(ids, knowledgeBases); + } + } + + // Fallback to selectedKbDisplayNames if displayNames is empty + if (displayNames.length === 0 && selectedKbDisplayNames.length > 0) { + displayNames = selectedKbDisplayNames; + ids = selectedKbIds; + } + + const placeholder = t( + "toolConfig.input.knowledgeBaseSelector.placeholder", + { + name: getLocalizedDescription(param) || param.name, + } + ); + + // Check if this field has validation error + const hasError = + hasSubmitted && param.required && selectedKbIds.length === 0; + + return ( +
+
openKbSelector(index)} + style={{ + width: "100%", + minHeight: "32px", + display: "flex", + flexWrap: "wrap", + alignItems: "center", + gap: "4px", + }} + title={displayNames.join(", ")} + > + {kbLoading && knowledgeBases.length === 0 ? ( +
+ +
+ ) : displayNames.length > 0 ? ( + displayNames.map((name, i) => ( + + + + } + onClose={(e) => { + e.stopPropagation(); + removeKbFromSelection(i, index); + }} + style={{ marginRight: 0 }} + > + + {name} + + + )) + ) : ( + + {placeholder} + + )} +
+ {hasError && ( +
+ {t("toolConfig.validation.selectKb")} +
+ )} +
+ ); + }, + [ + form, + knowledgeBases, + selectedKbIds, + selectedKbDisplayNames, + hasSubmitted, + kbLoading, + openKbSelector, + removeKbFromSelection, + getLocalizedDescription, + t, + kbParamName, + ] + ); + + const renderParamInput = (param: SkillParam, index: number) => { + const inputStyle = { width: "100%" }; + + // For knowledge base selector, use custom input + if (skillRequiresKbSelection && param.name === kbParamName) { + return renderKbSelectorInput(param, index); + } + + switch (param.type) { + case "number": + return ( + + ); + + case "boolean": + return ( + { + const updatedParams = [...currentParams]; + updatedParams[index] = { ...updatedParams[index], value: checked }; + setCurrentParams(updatedParams); + form.setFieldValue(`param_${index}`, checked); + }} + /> + ); + + case "array": + case "object": + return ( + + ); + + case "string": + case "Optional": + default: + return ( + + ); + } + }; + + return ( + + + {skill.name} +
+ } + open={isOpen} + onCancel={onCancel} + width={600} + destroyOnClose + footer={ +
+ + +
+ } + > + {currentParams.length > 0 ? ( + <> +
+ {t("agentConfig.skill.config.parameters") || "Parameters"} +
+
+
+ {currentParams.map((param, index) => { + const fieldName = `param_${index}`; + const rules: any[] = []; + + if (param.required) { + rules.push({ + required: true, + message: t("toolConfig.validation.required"), + }); + } + + // Add custom validator for knowledge base selector field (index_names/dataset_ids) + // Since this field uses custom display without form control, we need custom validation + if ( + skillRequiresKbSelection && + param.name === kbParamName + ) { + rules.push({ + validator: async () => { + if (selectedKbIds.length === 0) { + throw new Error(t("toolConfig.validation.selectKb")); + } + }, + }); + } + + const isVisible = visibleIndices.has(index); + + return ( + + {param.name} + + } + name={ + skillRequiresKbSelection && param.name === kbParamName + ? undefined + : fieldName + } + rules={rules} + tooltip={{ + title: getLocalizedDescription(param), + placement: "topLeft", + styles: { root: { maxWidth: 400 } }, + }} + style={{ display: isVisible ? undefined : "none" }} + > + {renderParamInput(param, index)} + + ); + })} +
+
+ + ) : ( +
+ {t("agentConfig.skill.noParams")} +
+ )} + + {/* Knowledge Base Selector Modal */} + setKbSelectorVisible(false)} + onConfirm={handleKbConfirm} + selectedIds={selectedKbIds} + toolType={skillToolType} + knowledgeBases={knowledgeBases} + isLoading={kbLoading} + showCheckbox={true} + onSync={async () => { + try { + await syncKnowledgeBases(skillToolType); + message.success(t("knowledgeBase.message.syncSuccess")); + } catch (error) { + log.error("Failed to sync knowledge bases:", error); + message.error(t("knowledgeBase.message.syncError")); + } + }} + syncLoading={!!kbLoading || !!isSyncing} + /> + + ); +} diff --git a/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx index 53c6d3f03..a1974ae7e 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx @@ -1,4 +1,4 @@ -"use client"; +"use client"; import { useState, useEffect, useCallback, useMemo, useRef } from "react"; import { useTranslation } from "react-i18next"; @@ -35,7 +35,11 @@ import { import { API_ENDPOINTS } from "@/services/api"; import knowledgeBaseService from "@/services/knowledgeBaseService"; import log from "@/lib/logger"; -import { isZhLocale, getLocalizedDescription } from "@/lib/utils"; +import { + isEmbeddingModelCompatible as isEmbeddingModelCompatibleBase, + isMultimodalConstraintMismatch as isMultimodalConstraintMismatchBase, +} from "@/lib/knowledgeBaseCompatibility"; +import { isZhLocale, getLocalizedDescription, getKbDisplayName, mapKbIdsToDisplayNames, parseKbIds } from "@/lib/utils"; export interface ToolConfigModalProps { isOpen: boolean; @@ -524,6 +528,86 @@ export default function ToolConfigModal({ } }, [configData]); + const currentMultiEmbeddingModel = useMemo(() => { + try { + const modelConfig = configData?.models; + return ( + modelConfig?.multiEmbedding?.modelName || + modelConfig?.multiEmbedding?.displayName || + null + ); + } catch { + return null; + } + }, [configData]); + + const hasEmbeddingModel = Boolean(currentEmbeddingModel); + const hasMultiEmbeddingModel = Boolean(currentMultiEmbeddingModel); + const canToggleMultimodalParam = hasEmbeddingModel && hasMultiEmbeddingModel; + const forcedMultimodalValue = useMemo(() => { + if (!hasEmbeddingModel && hasMultiEmbeddingModel) { + return true; + } + if (hasEmbeddingModel && !hasMultiEmbeddingModel) { + return false; + } + return null; + }, [hasEmbeddingModel, hasMultiEmbeddingModel]); + + const toolMultimodal = useMemo(() => { + const multimodalParam = currentParams.find( + (param) => param.name === "multimodal" + ); + const value = multimodalParam?.value; + if (typeof value === "boolean") { + return value; + } + if (typeof value === "string") { + const normalized = value.trim().toLowerCase(); + if (["true", "1", "yes", "y"].includes(normalized)) return true; + if (["false", "0", "no", "n"].includes(normalized)) return false; + } + return null; + }, [currentParams]); + + useEffect(() => { + if (tool?.name !== "knowledge_base_search") return; + if (forcedMultimodalValue === null) return; + + const index = currentParams.findIndex( + (param) => param.name === "multimodal" + ); + if (index < 0) return; + + const param = currentParams[index]; + if (param.value === forcedMultimodalValue) return; + + const updatedParams = [...currentParams]; + updatedParams[index] = { ...param, value: forcedMultimodalValue }; + setCurrentParams(updatedParams); + + const fieldName = `param_${index}`; + form.setFieldValue(fieldName, forcedMultimodalValue); + }, [tool?.name, forcedMultimodalValue, currentParams, form]); + + const isMultimodalConstraintMismatch = useCallback( + (kb: KnowledgeBase) => { + return isMultimodalConstraintMismatchBase(kb, toolMultimodal); + }, + [toolMultimodal] + ); + + const isEmbeddingModelCompatible = useCallback( + (kb: KnowledgeBase) => { + return isEmbeddingModelCompatibleBase( + kb, + currentEmbeddingModel, + currentMultiEmbeddingModel + ); + }, + [currentEmbeddingModel, currentMultiEmbeddingModel] + ); + // Check if a knowledge base can be selected const canSelectKnowledgeBase = useCallback( (kb: KnowledgeBase): boolean => { @@ -534,9 +618,16 @@ export default function ToolConfigModal({ return false; } + if (kb.source === "nexent") { + if (isMultimodalConstraintMismatch(kb)) { + return false; + } + return isEmbeddingModelCompatible(kb); + } + return true; }, - [currentEmbeddingModel] + [isEmbeddingModelCompatible, isMultimodalConstraintMismatch] ); // Track whether this is the first time opening the modal (reset when modal closes) @@ -1132,10 +1223,7 @@ export default function ToolConfigModal({ // Handle knowledge base selection confirm const handleKbConfirm = (selectedKnowledgeBases: KnowledgeBase[]) => { const ids = selectedKnowledgeBases.map((kb) => kb.id); - // Use display_name if available, otherwise fall back to name - const displayNames = selectedKnowledgeBases.map( - (kb) => kb.display_name || kb.name - ); + const displayNames = selectedKnowledgeBases.map((kb) => getKbDisplayName(kb)); setSelectedKbIds(ids); setSelectedKbDisplayNames(displayNames); @@ -1235,18 +1323,7 @@ export default function ToolConfigModal({ let ids: string[] = []; if (formValue) { // Value can be an array or a JSON string - if (Array.isArray(formValue)) { - ids = formValue.map((id) => String(id)); - } else if (typeof formValue === "string") { - try { - const parsed = JSON.parse(formValue); - if (Array.isArray(parsed)) { - ids = parsed.map((id) => String(id)); - } - } catch { - ids = formValue.split(",").filter(Boolean); - } - } + ids = parseKbIds(formValue); // Map IDs to display names if (ids.length > 0) { @@ -1263,11 +1340,7 @@ export default function ToolConfigModal({ return cleanId; }); } else if (knowledgeBases.length > 0) { - displayNames = ids.map((id) => { - const cleanId = id.trim(); - const kb = knowledgeBases.find((k) => k.id === cleanId); - return kb?.display_name || kb?.name || cleanId; - }); + displayNames = mapKbIdsToDisplayNames(ids, knowledgeBases); } } } @@ -1451,7 +1524,7 @@ export default function ToolConfigModal({ })} options={options.map((option) => ({ value: option, - label: option, + label: String(option), }))} /> ); @@ -1474,10 +1547,21 @@ export default function ToolConfigModal({ case TOOL_PARAM_TYPES.ARRAY: case TOOL_PARAM_TYPES.OBJECT: default: - // Check if parameter name contains "password" for secure input - const isPasswordType = param.name.toLowerCase().includes("password"); + // Check if parameter name indicates a secure/sensitive field + const sensitivePatterns = [ + "password", + "authorization", + "api_key", + "apikey", + "api-key", + "secret", + "token", + ]; + const isSecureField = sensitivePatterns.some((pattern) => + param.name.toLowerCase().includes(pattern) + ); - if (isPasswordType) { + if (isSecureField) { return ( (""); const [isParseSuccessful, setIsParseSuccessful] = useState(false); + const isKnowledgeBaseSearchTool = + tool?.origin_name === "knowledge_base_search" || + tool?.name === "knowledge_base_search"; // Reset form initialization flag when modal is closed or tool changes useEffect(() => { @@ -141,9 +139,9 @@ export default function ToolTestPanel({ const paramType = paramInfo?.type || DEFAULT_TYPE; // Check if this is the KB selector parameter and KB selection is enabled - // Haotian uses dataset_ids, others use index_names - const isKbSelectorParam = paramName === "index_names" && toolRequiresKbSelection && toolKbType !== "haotian_search" - || paramName === "dataset_ids" && toolRequiresKbSelection && toolKbType === "haotian_search"; + // Haotian and iData use dataset_ids, others use index_names + const isKbSelectorParam = paramName === "index_names" && toolRequiresKbSelection && toolKbType !== "haotian_search" && toolKbType !== "idata_search" + || paramName === "dataset_ids" && toolRequiresKbSelection && (toolKbType === "haotian_search" || toolKbType === "idata_search"); if (isKbSelectorParam && selectedKbIds.length > 0) { // Use the selected KB IDs from configParams as default @@ -213,9 +211,9 @@ export default function ToolTestPanel({ if (!toolRequiresKbSelection) return; // Determine which field to sync based on tool type - const isHaotian = toolKbType === "haotian_search"; - const fieldName = isHaotian ? `param_dataset_ids` : `param_index_names`; - const stateKey = isHaotian ? "dataset_ids" : "index_names"; + const isHaotianOrIdata = toolKbType === "haotian_search" || toolKbType === "idata_search"; + const fieldName = isHaotianOrIdata ? `param_dataset_ids` : `param_index_names`; + const stateKey = isHaotianOrIdata ? "dataset_ids" : "index_names"; const currentValue = form.getFieldValue(fieldName); // Only update if the value is different @@ -257,7 +255,7 @@ export default function ToolTestPanel({ if (!tool) return; // Validate that knowledge base is selected when required - if (toolRequiresKbSelection && selectedKbIds.length === 0) { + if (toolRequiresKbSelection && !isKnowledgeBaseSearchTool && selectedKbIds.length === 0) { setTestResult(`Test failed: Please select at least one knowledge base`); return; } @@ -291,7 +289,7 @@ export default function ToolTestPanel({ const isKbSelectorParam = (paramName === "index_names" || paramName === "dataset_ids") && toolRequiresKbSelection; // Skip KB selector parameters - they will be handled separately - if (isKbSelectorParam) { + if (isKbSelectorParam && !isKnowledgeBaseSearchTool) { return; } @@ -332,6 +330,13 @@ export default function ToolTestPanel({ }); } + if (isKnowledgeBaseSearchTool) { + if (!Array.isArray(toolParams.index_names) || toolParams.index_names.length === 0) { + setTestResult(`Test failed: Please provide non-empty index_names in input params`); + return; + } + } + // Prepare KB selection parameter based on tool type // These are init-time configuration parameters, not forward() parameters let kbSelectionConfig: Record = {}; @@ -340,11 +345,11 @@ export default function ToolTestPanel({ // Determine the correct parameter name based on tool type if (tool?.name === "dify_search") { kbSelectionConfig = { dataset_ids: JSON.stringify(selectedKbIds) }; - } else if (tool?.name === "haotian_search") { - // Haotian uses dataset_ids as an array (not JSON string) + } else if (tool?.name === "haotian_search" || tool?.name === "idata_search") { + // Haotian and iData use dataset_ids as an array (not JSON string) kbSelectionConfig = { dataset_ids: selectedKbIds }; - } else { - // knowledge_base_search, datamate_search, idata_search use index_names + } else if (!isKnowledgeBaseSearchTool) { + // datamate_search uses index_names in config kbSelectionConfig = { index_names: selectedKbIds }; } } @@ -355,13 +360,13 @@ export default function ToolTestPanel({ const configs = (configParams || []).reduce( (acc: Record, param: ToolParam) => { // Skip index_names when KB selection is enabled (provided via kbSelectionConfig) - // For haotian_search: skip only index_names (dataset_ids is handled by kbSelectionConfig) + // For haotian_search and idata_search: skip only index_names (dataset_ids is handled by kbSelectionConfig) // For other KB tools: skip both index_names and dataset_ids if (toolRequiresKbSelection) { - if (param.name === "index_names") { + if (param.name === "index_names" && !isKnowledgeBaseSearchTool) { return acc; } - if (param.name === "dataset_ids" && tool?.name !== "haotian_search") { + if (param.name === "dataset_ids" && tool?.name !== "haotian_search" && tool?.name !== "idata_search") { return acc; } } @@ -453,10 +458,10 @@ export default function ToolTestPanel({ const formValue = currentFormValues[`param_${paramName}`]; // Check if this is a KB selector parameter - const isKbSelectorParam = paramName === "index_names" && toolRequiresKbSelection; + const isKbSelectorParam = (paramName === "index_names" || paramName === "dataset_ids") && toolRequiresKbSelection; // Handle KB selector parameters - use selectedKbIds - if (isKbSelectorParam) { + if (isKbSelectorParam && !isKnowledgeBaseSearchTool) { if (selectedKbIds.length > 0) { currentParamsJson[paramName] = selectedKbIds; } @@ -515,7 +520,7 @@ export default function ToolTestPanel({ const paramType = paramInfo?.type || DEFAULT_TYPE; // Check if this is a KB selector parameter - const isKbSelectorParam = paramName === "index_names" && toolRequiresKbSelection; + const isKbSelectorParam = (paramName === "index_names" || paramName === "dataset_ids") && toolRequiresKbSelection; if (manualValue !== undefined) { // KB selector parameters should keep their array form @@ -604,28 +609,10 @@ export default function ToolTestPanel({ // Haotian uses dataset_ids, others use index_names const isKbSelectorParam = (paramName === "index_names" || paramName === "dataset_ids") && toolRequiresKbSelection; - // Get display names based on selected KB IDs and knowledge bases - let displayNames: string[] = []; - if (isKbSelectorParam && selectedKbIds.length > 0) { - if (toolKbType === "haotian_search" && haotianKnowledgeSets.length > 0) { - // Haotian: resolve names from haotianKnowledgeSets - displayNames = selectedKbIds.map((id) => { - const cleanId = id.trim(); - for (const ks of haotianKnowledgeSets) { - const kb = (ks.knowledge_bases || []).find( - (b) => String(b.dify_dataset_id) === cleanId - ); - if (kb) return kb.name; - } - return cleanId; - }); - } else if (knowledgeBases.length > 0) { - displayNames = selectedKbIds.map((id) => { - const cleanId = id.trim(); - const kb = knowledgeBases.find((k) => k.id === cleanId); - return kb?.display_name || kb?.name || cleanId; - }); - } + // KB selection is configured in the upper config area. + // Do not render duplicated KB params in the test input area. + if (isKbSelectorParam && !isKnowledgeBaseSearchTool) { + return null; } // Add type-specific validation rules @@ -680,84 +667,17 @@ export default function ToolTestPanel({ break; } - // Render knowledge base selector for index_names parameter - if (isKbSelectorParam) { - return ( - - {paramName} - - } - name={fieldName} - rules={rules} - tooltip={{ - title: getLocalizedDescription(description, description_zh), - placement: "topLeft", - styles: { root: { maxWidth: 400 } }, - }} - > -
-
onOpenKbSelector?.(-1)} // -1 indicates this is from test panel - style={{ - width: "100%", - minHeight: "32px", - display: "flex", - flexWrap: "wrap", - alignItems: "center", - gap: "4px", - }} - title={displayNames.join(", ")} - > - {kbLoading && knowledgeBases.length === 0 ? ( -
- -
- ) : displayNames.length > 0 ? ( - displayNames.map((name, i) => ( - - - - } - onClose={(e) => { - e.stopPropagation(); - onRemoveKb?.(i, -1); // -1 indicates this is from test panel - }} - style={{ - marginRight: 0, - display: "inline-flex", - alignItems: "center", - lineHeight: "20px", - padding: "0 8px", - fontSize: "13px", - }} - > - {name} - - )) - ) : ( - - {t("toolConfig.input.knowledgeBaseSelector.placeholder", { - name: getLocalizedDescription(description, description_zh) || paramName, - })} - - )} -
-
-
- ); - } - return ( + (() => { + const kbPlaceholder = t( + "toolConfig.input.knowledgeBaseSelector.placeholder", + { + name: + getLocalizedDescription(description, description_zh) || + paramName, + } + ); + return ( - + {isKnowledgeBaseSearchTool && paramName === "index_names" ? ( +
+
onOpenKbSelector?.(-1)} + > + {selectedKbIds.length > 0 ? ( + selectedKbIds.map((id, i) => ( + { + e.preventDefault(); + onRemoveKb?.(i, -1); + }} + style={{ marginBottom: 4 }} + > + {selectedKbDisplayNames[i] || id} + + )) + ) : ( + {kbPlaceholder} + )} +
+
+ ) : ( + + )}
+ ); + })() ); })} diff --git a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx index c7c238a83..8b6cd82d7 100644 --- a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx +++ b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx @@ -1,11 +1,9 @@ "use client"; -import { useState, useEffect, useMemo, useRef } from "react"; +import { useState, useEffect, useMemo, useRef, useCallback } from "react"; import { useTranslation } from "react-i18next"; import { Button, - Tooltip, - Tabs, Form, Input, Select, @@ -15,185 +13,129 @@ import { Flex, Card, App, + Alert, } from "antd"; -import type { TabsProps } from "antd"; -import { Zap, Maximize2 } from "lucide-react"; +import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs"; +import { Zap, Maximize2, Settings2, Sparkles } from "lucide-react"; -import log from "@/lib/logger"; -import { AgentProfileInfo, AgentBusinessInfo } from "@/types/agentConfig"; import { - getAgentGenerationCache, - setAgentGenerationStatus, - saveGeneratedField, - clearAgentGenerationCache, + AgentConfigUpdate, + PromptTemplate, +} from "@/types/agentConfig"; +import { clearExpiredGenerationCaches } from "@/lib/agentGenerationCache"; +import { GENERATE_PROMPT_STREAM_TYPES } from "@/const/agentConfig"; import { useAgentList } from "@/hooks/agent/useAgentList"; -import { - GENERATE_PROMPT_STREAM_TYPES, -} from "@/const/agentConfig"; -import { generatePromptStream } from "@/services/promptService"; +import { useAgentGeneration } from "@/hooks/agent/useAgentGeneration"; import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider"; import { useDeployment } from "@/components/providers/deploymentProvider"; import { useModelList } from "@/hooks/model/useModelList"; import { useConfig } from "@/hooks/useConfig"; -import { useTenantList } from "@/hooks/tenant/useTenantList"; -import { useGroupList } from "@/hooks/group/useGroupList"; -import { USER_ROLES } from "@/const/auth"; +import { useGroupList, useGroupDetails } from "@/hooks/group/useGroupList"; +import { usePromptTemplateList } from "@/hooks/agent/usePromptTemplateList"; import { Can } from "@/components/permission/Can"; import { useAgentConfigStore } from "@/stores/agentConfigStore"; import ExpandEditModal from "./ExpandEditModal"; +import PromptTemplateManagerModal from "./PromptTemplateManagerModal"; +import PromptOptimizeModal from "./PromptOptimizeModal"; +import { isAgentPromptsHidden } from "@/lib/agentPromptVisibility"; const { TextArea } = Input; -export interface AgentGenerateDetailProps { - editable: boolean; - currentAgentId?: number | null; - isGenerating: boolean; - setIsGenerating: (value: boolean) => void; -} - -export default function AgentGenerateDetail({ - editable = false, - isGenerating, - setIsGenerating, -}: AgentGenerateDetailProps) { +export default function AgentGenerateDetail({}) { const { t } = useTranslation("common"); const { message } = App.useApp(); - const { user, groupIds: allowedGroupIds } = useAuthorizationContext(); + const { user, getAccessibleGroupIds } = useAuthorizationContext(); const { isSpeedMode } = useDeployment(); const [form] = Form.useForm(); + // Group data - get all groups for tenant, then filter to accessible ones + const { data: groupData } = useGroupList(user?.tenantId ?? null); + const allGroups = groupData?.groups ?? []; + const accessibleGroupIds = getAccessibleGroupIds(); + const { groups: filteredGroups } = useGroupDetails(allGroups, accessibleGroupIds); + const isCreatingMode = useAgentConfigStore((state) => state.isCreatingMode); const editedAgent = useAgentConfigStore((state) => state.editedAgent); const currentAgentId = useAgentConfigStore((state) => state.currentAgentId); const forceRefreshKey = useAgentConfigStore((state) => state.forceRefreshKey); - const updateBusinessInfo = useAgentConfigStore((state) => state.updateBusinessInfo); - const updateProfileInfo = useAgentConfigStore((state) => state.updateProfileInfo); + const isReadOnly = useAgentConfigStore((state) => state.isReadOnly()); + const updateAgentConfig = useAgentConfigStore((state) => state.updateAgentConfig); + const isGenerating = useAgentConfigStore((state) => state.isGenerating); + + // Determine if form should be editable (based on isReadOnly only, isGenerating handled separately) + const editable = !isReadOnly; - // Model data: default LLM name from config, resolve to full model from model list - const { defaultLlmModelName } = useConfig(); + const { defaultLlmModelConfig } = useConfig(); const { availableLlmModels, models, isLoading: loadingModels } = useModelList(); - const defaultLlmModel = useMemo(() => { - if (defaultLlmModelName) { - const found = availableLlmModels.find( - (m) => m.name === defaultLlmModelName || m.displayName === defaultLlmModelName - ); - if (found) return found; - return models.find( - (m) => - m.type === "llm" && - (m.name === defaultLlmModelName || m.displayName === defaultLlmModelName) - ); - } - // No default configured: use the first available LLM, or undefined if none - return availableLlmModels[0]; - }, [defaultLlmModelName, availableLlmModels, models]); + const { + templates: promptTemplates, + isLoading: loadingPromptTemplates, + invalidate: invalidatePromptTemplates, + } = usePromptTemplateList(); - // Tenant & group data for group selection - const { data: tenantData } = useTenantList(); - const tenantId = user?.tenantId ?? tenantData?.data?.[0]?.tenant_id ?? null; - const { data: groupData } = useGroupList(tenantId); + const defaultLlmModel = useMemo(() => { + if (!defaultLlmModelConfig) return undefined; + const configName = defaultLlmModelConfig.modelName || defaultLlmModelConfig.displayName || ""; + if (!configName) return undefined; + const found = availableLlmModels.find( + (m) => m.name === configName || m.displayName === configName + ); + if (found) return found; + return models.find( + (m) => + m.type === "llm" && + (m.name === configName || m.displayName === configName) + ); + }, [defaultLlmModelConfig, availableLlmModels, models]); - // Agent list for name uniqueness validation (use local data instead of API call) - const { agents: agentList } = useAgentList(tenantId); - const groups = groupData?.groups || []; + // Agent list for name uniqueness validation (auth-scoped, same as agent dev sidebar) + const { agents: agentList } = useAgentList(""); // State management const [activeTab, setActiveTab] = useState("agent-info"); - // Local state to track generated content (fix for stream data not syncing with form state) - const [generatedContent, setGeneratedContent] = useState({ - dutyPrompt: "", - constraintPrompt: "", - fewShotsPrompt: "", - agentName: "", - agentDescription: "", - agentDisplayName: "", - }); + // Streaming field values (accumulated from SSE, bypasses Form disabled state) // Modal states const [expandModalOpen, setExpandModalOpen] = useState(false); const [expandModalType, setExpandModalType] = useState<'duty' | 'constraint' | 'few-shots' | null>(null); - - // Use ref to track generation initiator - this doesn't trigger re-renders - // but is accessible in closures - const generationInitiatorRef = useRef(null); + const [promptTemplateManagerOpen, setPromptTemplateManagerOpen] = useState(false); + const [optimizeModalOpen, setOptimizeModalOpen] = useState(false); + const [optimizeModalType, setOptimizeModalType] = useState<'duty' | 'constraint' | 'few-shots' | null>(null); // Cleanup invalid cache on mount to prevent stuck "generating" state useEffect(() => { - // Clean up expired caches on startup to prevent stuck states - // Only removes entries that have exceeded their expiry time - // Does not interfere with legitimate in-progress caches clearExpiredGenerationCaches(); }, []); - // Sync businessInfo local state with store when editedAgent changes - // This handles navigation scenarios where component remounts but store persists + + // (e.g. business_description from a previously edited agent) useEffect(() => { - if (editedAgent.business_description !== businessInfo.businessDescription || - editedAgent.business_logic_model_name !== businessInfo.businessLogicModelName || - editedAgent.business_logic_model_id !== businessInfo.businessLogicModelId) { - setBusinessInfo({ - businessDescription: editedAgent.business_description || "", - businessLogicModelName: editedAgent.business_logic_model_name || "", - businessLogicModelId: editedAgent.business_logic_model_id || 0, - }); + if (isCreatingMode) { + form.resetFields(); } - }, [editedAgent.business_description, editedAgent.business_logic_model_name, editedAgent.business_logic_model_id]); - - // Only show "no edit permission" tooltip when the panel is active and agent is read-only. - // Note: when no agent is selected, AgentInfoComp shows an overlay and we should not show - // this tooltip in that state. - const showNoEditPermissionTip = - !editable && currentAgentId !== null && currentAgentId !== undefined; - - const noEditPermissionTitle = showNoEditPermissionTip - ? t("agent.noEditPermission") - : undefined; - - const wrapNoEditTooltipBlock = (node: React.ReactNode) => { - return ( - - {node} - - ); - }; - - const wrapNoEditTooltipInline = (node: React.ReactNode) => { - return ( - - {node} - - ); - }; - - - const stylesObject: TabsProps["styles"] = { - root: {}, - header: {}, - item: { - fontWeight: "500", - color: "#000", - padding: `6px 10px`, - textAlign: "center", - backgroundColor: "#fff", - }, - indicator: { height: 4 }, - content: { - backgroundColor: "#fff", - borderWidth: 1, - padding: "8px ", - borderRadius: "0 0 8px 8px", - height: "100%", + }, [isCreatingMode]); + + // Use agent generation hook + const { handleGenerateAgent } = useAgentGeneration({ + setActiveTab, + onStreamUpdate: ({ type, content }) => { + const fieldMap: Record = { + [GENERATE_PROMPT_STREAM_TYPES.DUTY]: 'dutyPrompt', + [GENERATE_PROMPT_STREAM_TYPES.CONSTRAINT]: 'constraintPrompt', + [GENERATE_PROMPT_STREAM_TYPES.FEW_SHOTS]: 'fewShotsPrompt', + [GENERATE_PROMPT_STREAM_TYPES.AGENT_VAR_NAME]: 'agentName', + [GENERATE_PROMPT_STREAM_TYPES.AGENT_DESCRIPTION]: 'agentDescription', + [GENERATE_PROMPT_STREAM_TYPES.AGENT_DISPLAY_NAME]: 'agentDisplayName', + }; + + const fieldName = fieldMap[type]; + if (fieldName) { + form.setFieldsValue({ [fieldName]: content }); + } }, - }; - - // Local state for business info to avoid frequent updates - const [businessInfo, setBusinessInfo] = useState({ - businessDescription: "", - businessLogicModelName: "", - businessLogicModelId: 0, }); const normalizeNumberArray = (value: unknown): number[] => { @@ -204,67 +146,22 @@ export default function AgentGenerateDetail({ }; const groupSelectOptions = useMemo(() => { - const selectedIds = normalizeNumberArray(editedAgent.group_ids || []); - const allowedSet = new Set(normalizeNumberArray(allowedGroupIds || [])); - const canSelectAllGroups = - user?.role === USER_ROLES.SU || - user?.role === USER_ROLES.ADMIN || - user?.role === USER_ROLES.SPEED; - - const baseGroups = canSelectAllGroups - ? groups - : groups.filter((g) => allowedSet.has(g.group_id)); - - const baseSet = new Set(baseGroups.map((g) => g.group_id)); - const groupById = new Map(groups.map((g) => [g.group_id, g] as const)); - - const options: Array<{ label: string; value: number; disabled?: boolean }> = - baseGroups.map((g) => ({ - label: g.group_name, - value: g.group_id, - })); - - // Keep already-selected groups visible even if they are not selectable (disabled). - for (const id of selectedIds) { - if (baseSet.has(id)) continue; - const g = groupById.get(id); - options.push({ - label: g?.group_name ?? `Group ${id}`, - value: id, - disabled: true, - }); - } - - return options; - }, [allowedGroupIds, editedAgent.group_ids, groups, user?.role]); + return filteredGroups.map((g) => ({ + label: g.group_name, + value: g.group_id, + })); + }, [filteredGroups]); - // Initialize form values when component mounts or currentAgentId changes + // Initialize form values when currentAgentId changes or forceRefreshKey updates + // Cached generation data is already merged into editedAgent by setCurrentAgent useEffect(() => { - const effectiveAgentId = currentAgentId ?? 0; - - // Skip form initialization if we're currently generating for this agent - // Use generationInitiatorRef to avoid stale closure issues - if (generationInitiatorRef.current === effectiveAgentId) { - return; - } - - // Check if this agent has cached generation content in progress - const cached = getAgentGenerationCache(effectiveAgentId); - const hasCachedGeneration = cached?.isGenerating === true; - - // Skip form initialization if we're resuming a cached generation - // This prevents overwriting the generated content - if (hasCachedGeneration) { - return; - } - const initialAgentInfo: Record = { agentName: editedAgent.name || "", agentDisplayName: editedAgent.display_name || "", agentAuthor: editedAgent.author || user?.email || (isSpeedMode ? "Default User" : ""), - mainAgentModel: - editedAgent.model || defaultLlmModel?.displayName || "", - mainAgentMaxStep: editedAgent.max_step || 5, + mainAgentModel: editedAgent.model, + mainAgentModelId: editedAgent.model_id, + mainAgentMaxStep: editedAgent.max_step || 15, agentDescription: editedAgent.description || "", group_ids: normalizeNumberArray(editedAgent.group_ids || []), ingroup_permission: editedAgent.ingroup_permission || "READ_ONLY", @@ -272,134 +169,21 @@ export default function AgentGenerateDetail({ constraintPrompt: editedAgent.constraint_prompt || "", fewShotsPrompt: editedAgent.few_shots_prompt || "", provideRunSummary: editedAgent.provide_run_summary || false, - }; - - if (isCreatingMode) { - delete initialAgentInfo.group_ids; - } - - const initialBusinessInfo = { businessDescription: editedAgent.business_description || "", - businessLogicModelName: - editedAgent.business_logic_model_name || - defaultLlmModel?.displayName || - "", - businessLogicModelId: - editedAgent.business_logic_model_id || defaultLlmModel?.id || 0, + businessLogicModelName:editedAgent.business_logic_model_name, + businessLogicModelId: editedAgent.business_logic_model_id, + promptTemplateId: editedAgent.prompt_template_id, + promptTemplateName: editedAgent.prompt_template_name || "system_default", }; - // Initialize local business description state - setBusinessInfo(initialBusinessInfo); - form.setFieldsValue(initialAgentInfo); - // Sync model to store if not already set (e.g., in create mode with default model) - if (isCreatingMode && defaultLlmModel) { - updateProfileInfo({ - model: defaultLlmModel.displayName || "", - model_id: defaultLlmModel.id || 0, - }); - } - // Sync max_step to store in create mode (default to 5) - if (isCreatingMode && !editedAgent.max_step) { - updateProfileInfo({ max_step: 5 }); - } - // Sync author to store if not already set (e.g., in create mode with default user email) - const defaultAuthor = editedAgent.author || user?.email || (isSpeedMode ? "Default User" : ""); - if (!editedAgent.author && defaultAuthor) { - updateProfileInfo({ - author: defaultAuthor, - }); - } - - }, [currentAgentId, defaultLlmModel?.id, isCreatingMode, forceRefreshKey]); - - // Default to selecting all groups when creating a new agent. - // Only applies when groups are loaded and no group is selected yet. - useEffect(() => { - const isCreateMode = editable && (currentAgentId === null || currentAgentId === undefined); - if (!isCreateMode) return; - if (!groups || groups.length === 0) return; - - const currentGroupIds = normalizeNumberArray(editedAgent.group_ids || []); - if (currentGroupIds.length > 0) return; - - const allowedSet = new Set(normalizeNumberArray(allowedGroupIds || [])); - const canSelectAllGroups = - user?.role === USER_ROLES.SU || - user?.role === USER_ROLES.ADMIN || - user?.role === USER_ROLES.SPEED; - const selectableGroups = canSelectAllGroups - ? groups - : groups.filter((g) => allowedSet.has(g.group_id)); - - const allGroupIds = normalizeNumberArray(selectableGroups.map((g) => g.group_id)); - if (allGroupIds.length === 0) return; - - form.setFieldsValue({ group_ids: allGroupIds }); - updateProfileInfo - ({ group_ids: allGroupIds }); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [editable, currentAgentId, groups, allowedGroupIds, user?.role]); - - // Load cached generation content when switching to a different agent - useEffect(() => { - const effectiveAgentId = currentAgentId ?? 0; - - // Check if this agent has cached generation content - const cached = getAgentGenerationCache(effectiveAgentId); - - // Helper to check if cache has any meaningful content - const hasContent = cached?.dutyPrompt || cached?.constraintPrompt || cached?.fewShotsPrompt || - cached?.agentName || cached?.agentDescription || cached?.agentDisplayName; - // If cache has isGenerating=true, it means a previous session was interrupted - // Clear it and return - user will need to regenerate - if (cached?.isGenerating) { - clearAgentGenerationCache(effectiveAgentId); - return; - } - - // For completed generation (isGenerating was cleared), restore the content - if (cached && hasContent) { - // Restore cached content to form and local state - setGeneratedContent({ - dutyPrompt: cached.dutyPrompt, - constraintPrompt: cached.constraintPrompt, - fewShotsPrompt: cached.fewShotsPrompt, - agentName: cached.agentName, - agentDescription: cached.agentDescription, - agentDisplayName: cached.agentDisplayName, - }); - - // Apply to form fields - form.setFieldsValue({ - dutyPrompt: cached.dutyPrompt, - constraintPrompt: cached.constraintPrompt, - fewShotsPrompt: cached.fewShotsPrompt, - agentName: cached.agentName, - agentDescription: cached.agentDescription, - agentDisplayName: cached.agentDisplayName, - }); - - // Update the store's editedAgent so hasUnsavedChanges is correctly set - // This will trigger hasUnsavedChanges = true when it differs from baselineAgent - updateProfileInfo({ - name: cached.agentName, - display_name: cached.agentDisplayName, - description: cached.agentDescription, - duty_prompt: cached.dutyPrompt, - constraint_prompt: cached.constraintPrompt, - few_shots_prompt: cached.fewShotsPrompt, - }); - } - // If no valid cache, do nothing - this agent wasn't being generated - }, [currentAgentId]); + }, [form, currentAgentId, editedAgent, isCreatingMode, defaultLlmModel, accessibleGroupIds, forceRefreshKey]); // Handle business description change const handleBusinessDescriptionChange = (value: string) => { - updateBusinessInfo({ + + updateAgentConfig({ business_description: value, - business_logic_model_id: businessInfo.businessLogicModelId, - business_logic_model_name: businessInfo.businessLogicModelName, }); }; @@ -408,16 +192,28 @@ export default function AgentGenerateDetail({ const selectedModel = availableLlmModels.find( (m) => m.name === modelName || m.displayName === modelName ); - // Update local state so the Select component reflects the change - setBusinessInfo((prev) => ({ - ...prev, - businessLogicModelName: modelName, - businessLogicModelId: selectedModel?.id || 0, - })); - updateBusinessInfo({ - business_description: businessInfo.businessDescription || "", - business_logic_model_id: selectedModel?.id || 0, - business_logic_model_name: modelName, + + updateAgentConfig({ + business_logic_model_id: selectedModel?.id, + business_logic_model_name: modelName + }); + }; + + const handlePromptTemplateChange = (templateId: number) => { + const selectedTemplate = promptTemplates.find( + (template) => template.template_id === templateId + ); + if (!selectedTemplate) { + return; + } + handleSelectPromptTemplate(selectedTemplate); + }; + + const handleSelectPromptTemplate = (template: PromptTemplate) => { + + updateAgentConfig({ + prompt_template_id: template.template_id, + prompt_template_name: template.template_name, }); }; @@ -428,19 +224,58 @@ export default function AgentGenerateDetail({ setExpandModalOpen(true); }; + const handleOpenOptimizeModal = (type: 'duty' | 'constraint' | 'few-shots') => { + const modelId = form.getFieldValue("businessLogicModelId") || editedAgent.business_logic_model_id || 0; + if (!editable || isGenerating || !modelId) { + return; + } + setOptimizeModalType(type); + setOptimizeModalOpen(true); + }; + const renderExpandButton = (type: "duty" | "constraint" | "few-shots") => { - return wrapNoEditTooltipInline( + return (