diff --git a/.github/workflows/auto-build-data-process-dev.yml b/.github/workflows/auto-build-data-process-dev.yml
index c9885170e..6be8bf638 100644
--- a/.github/workflows/auto-build-data-process-dev.yml
+++ b/.github/workflows/auto-build-data-process-dev.yml
@@ -7,14 +7,14 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'make/data_process/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
diff --git a/.github/workflows/auto-build-doc-dev.yml b/.github/workflows/auto-build-doc-dev.yml
index 697aa0204..7c2cd46d7 100644
--- a/.github/workflows/auto-build-doc-dev.yml
+++ b/.github/workflows/auto-build-doc-dev.yml
@@ -7,12 +7,12 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'doc/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'doc/**'
- '.github/workflows/**'
diff --git a/.github/workflows/auto-build-main-dev.yml b/.github/workflows/auto-build-main-dev.yml
index dbd69ac12..2815c50df 100644
--- a/.github/workflows/auto-build-main-dev.yml
+++ b/.github/workflows/auto-build-main-dev.yml
@@ -7,14 +7,14 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'make/main/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
diff --git a/.github/workflows/auto-build-mcp-dev.yml b/.github/workflows/auto-build-mcp-dev.yml
index dacf04749..03aea08b2 100644
--- a/.github/workflows/auto-build-mcp-dev.yml
+++ b/.github/workflows/auto-build-mcp-dev.yml
@@ -7,14 +7,14 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'make/mcp/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
diff --git a/.github/workflows/auto-build-terminal-dev.yml b/.github/workflows/auto-build-terminal-dev.yml
index fbc251edb..62fc20165 100644
--- a/.github/workflows/auto-build-terminal-dev.yml
+++ b/.github/workflows/auto-build-terminal-dev.yml
@@ -7,12 +7,12 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'make/terminal/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'make/terminal/**'
- '.github/workflows/**'
diff --git a/.github/workflows/auto-build-web-dev.yml b/.github/workflows/auto-build-web-dev.yml
index 28f967894..a5abeb0b3 100644
--- a/.github/workflows/auto-build-web-dev.yml
+++ b/.github/workflows/auto-build-web-dev.yml
@@ -7,13 +7,13 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- 'make/web/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- 'make/web/**'
diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml
index 1e853dd25..1595fc769 100644
--- a/.github/workflows/auto-unit-test.yml
+++ b/.github/workflows/auto-unit-test.yml
@@ -12,14 +12,14 @@ on:
required: false
default: '["ubuntu-24.04-arm"]'
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'test/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
diff --git a/.github/workflows/auto-web-check-dev.yml b/.github/workflows/auto-web-check-dev.yml
index cd107b6e5..ae831a3fb 100644
--- a/.github/workflows/auto-web-check-dev.yml
+++ b/.github/workflows/auto-web-check-dev.yml
@@ -11,12 +11,12 @@ on:
description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])'
default: '["ubuntu-latest"]'
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- '.github/workflows/**'
diff --git a/.github/workflows/docker-build-push-mainland.yml b/.github/workflows/docker-build-push-mainland.yml
index 1aa41b560..8c215c7ec 100644
--- a/.github/workflows/docker-build-push-mainland.yml
+++ b/.github/workflows/docker-build-push-mainland.yml
@@ -16,10 +16,15 @@ on:
description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])'
required: true
default: '["ubuntu-latest"]'
+ push:
+ branches:
+ - main
+ tags:
+ - 'v*'
jobs:
build-and-push-main-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -32,20 +37,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push main image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag main image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64
- name: Push latest main image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64
build-and-push-main-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -58,20 +63,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push main image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag main image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64
- name: Push latest main image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64
build-and-push-data-process-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -93,20 +98,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push data process image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag data process image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64
- name: Push latest data process image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64
build-and-push-data-process-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -128,20 +133,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push data process image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag data process image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64
- name: Push latest data process image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64
build-and-push-web-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -154,20 +159,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push web image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag web image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64
- name: Push latest web image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64
build-and-push-web-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -180,20 +185,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push web image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag web image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64
- name: Push latest web image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64
build-and-push-terminal-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -206,20 +211,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/terminal/Dockerfile .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push terminal image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag terminal image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64
- name: Push latest terminal image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64
build-and-push-terminal-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -232,20 +237,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/terminal/Dockerfile .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push terminal image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag terminal image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64
- name: Push latest terminal image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64
build-and-push-mcp-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -258,20 +263,20 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push MCP image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag MCP image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64
- name: Push latest MCP image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64
build-and-push-mcp-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -284,16 +289,16 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push MCP image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag MCP image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64
- name: Push latest MCP image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64
manifest-push-main:
@@ -305,13 +310,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for main (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for main (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 \
@@ -327,13 +333,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for data-process (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for data-process (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 \
@@ -349,13 +356,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for web (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for web (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 \
@@ -371,13 +379,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for terminal (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for terminal (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 \
@@ -393,13 +402,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for mcp (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for mcp (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 \
diff --git a/.github/workflows/docker-build-push-overseas.yml b/.github/workflows/docker-build-push-overseas.yml
index d19c2600a..dcbe9d642 100644
--- a/.github/workflows/docker-build-push-overseas.yml
+++ b/.github/workflows/docker-build-push-overseas.yml
@@ -16,10 +16,15 @@ on:
description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])'
required: true
default: '["ubuntu-latest"]'
+ push:
+ branches:
+ - main
+ tags:
+ - 'v*'
jobs:
build-and-push-main-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -32,20 +37,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent:${{ inputs.version }}-amd64 --load -f make/main/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/main/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push main image (amd64) to DockerHub
- run: docker push nexent/nexent:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag main image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent:${{ inputs.version }}-amd64 nexent/nexent:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent:amd64
- name: Push latest main image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent:amd64
build-and-push-main-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -58,20 +63,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent:${{ inputs.version }}-arm64 --load -f make/main/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/main/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push main image (arm64) to DockerHub
- run: docker push nexent/nexent:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag main image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent:${{ inputs.version }}-arm64 nexent/nexent:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent:arm64
- name: Push latest main image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent:arm64
build-and-push-data-process-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -93,20 +98,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ inputs.version }}-amd64 --load -f make/data_process/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/data_process/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push data process image (amd64) to DockerHub
- run: docker push nexent/nexent-data-process:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag data process image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-data-process:${{ inputs.version }}-amd64 nexent/nexent-data-process:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-data-process:amd64
- name: Push latest data process image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-data-process:amd64
build-and-push-data-process-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -128,20 +133,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ inputs.version }}-arm64 --load -f make/data_process/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/data_process/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push data process image (arm64) to DockerHub
- run: docker push nexent/nexent-data-process:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag data process image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-data-process:${{ inputs.version }}-arm64 nexent/nexent-data-process:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-data-process:arm64
- name: Push latest data process image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-data-process:arm64
build-and-push-web-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -154,20 +159,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ inputs.version }}-amd64 --load -f make/web/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/web/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push web image (amd64) to DockerHub
- run: docker push nexent/nexent-web:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag web image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-web:${{ inputs.version }}-amd64 nexent/nexent-web:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-web:amd64
- name: Push latest web image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-web:amd64
build-and-push-web-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -180,20 +185,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ inputs.version }}-arm64 --load -f make/web/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/web/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push web image (arm64) to DockerHub
- run: docker push nexent/nexent-web:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag web image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-web:${{ inputs.version }}-arm64 nexent/nexent-web:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-web:arm64
- name: Push latest web image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-web:arm64
build-and-push-terminal-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -206,20 +211,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 --load -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/terminal/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push terminal image (amd64) to DockerHub
- run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag terminal image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 nexent/nexent-ubuntu-terminal:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-ubuntu-terminal:amd64
- name: Push latest terminal image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-ubuntu-terminal:amd64
build-and-push-terminal-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -232,20 +237,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 --load -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/terminal/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push terminal image (arm64) to DockerHub
- run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag terminal image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 nexent/nexent-ubuntu-terminal:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-ubuntu-terminal:arm64
- name: Push latest terminal image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-ubuntu-terminal:arm64
build-and-push-mcp-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -258,20 +263,20 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ inputs.version }}-amd64 --load -f make/mcp/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/mcp/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push MCP image (amd64) to DockerHub
- run: docker push nexent/nexent-mcp:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag MCP image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-mcp:${{ inputs.version }}-amd64 nexent/nexent-mcp:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-mcp:amd64
- name: Push latest MCP image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-mcp:amd64
build-and-push-mcp-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -284,16 +289,16 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ inputs.version }}-arm64 --load -f make/mcp/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/mcp/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push MCP image (arm64) to DockerHub
- run: docker push nexent/nexent-mcp:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag MCP image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-mcp:${{ inputs.version }}-arm64 nexent/nexent-mcp:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-mcp:arm64
- name: Push latest MCP image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-mcp:arm64
manifest-push-main:
@@ -305,13 +310,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for main (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent:${{ inputs.version }} \
- nexent/nexent:${{ inputs.version }}-amd64 \
- nexent/nexent:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent:${{ inputs.version }}
+ docker manifest create nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for main (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent:latest \
nexent/nexent:amd64 \
@@ -327,13 +333,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for data-process (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-data-process:${{ inputs.version }} \
- nexent/nexent-data-process:${{ inputs.version }}-amd64 \
- nexent/nexent-data-process:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-data-process:${{ inputs.version }}
+ docker manifest create nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for data-process (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-data-process:latest \
nexent/nexent-data-process:amd64 \
@@ -349,13 +356,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for web (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-web:${{ inputs.version }} \
- nexent/nexent-web:${{ inputs.version }}-amd64 \
- nexent/nexent-web:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-web:${{ inputs.version }}
+ docker manifest create nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for web (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-web:latest \
nexent/nexent-web:amd64 \
@@ -371,13 +379,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for terminal (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-ubuntu-terminal:${{ inputs.version }} \
- nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \
- nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-ubuntu-terminal:${{ inputs.version }}
+ docker manifest create nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for terminal (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-ubuntu-terminal:latest \
nexent/nexent-ubuntu-terminal:amd64 \
@@ -393,13 +402,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for mcp (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-mcp:${{ inputs.version }} \
- nexent/nexent-mcp:${{ inputs.version }}-amd64 \
- nexent/nexent-mcp:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-mcp:${{ inputs.version }}
+ docker manifest create nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for mcp (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-mcp:latest \
nexent/nexent-mcp:amd64 \
diff --git a/.gitignore b/.gitignore
index d1b2af30b..20de73e8a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,12 @@ model-assets/
.opencode/
openspec/
logs/
+
+.devspace/
+devspace.yaml
+k8s/helm/**/*.tgz
+k8s/helm/nexent/Chart.lock
+
+MAC_DEVELOPMENT_GUIDE.md
+# Mac本地开发数据持久化(无需提交)
+data/
diff --git a/README.md b/README.md
index 894cd1862..51eb0927b 100644
--- a/README.md
+++ b/README.md
@@ -11,111 +11,106 @@ Nexent is a zero-code platform for auto-generating production-grade AI agents, b
> One prompt. Endless reach.
-### 🌐 Visit our [official website](https://nexent.tech/)
+
-
+# 🚀 Get Started Now
-https://github.com/user-attachments/assets/db6b7f5a-9ee8-4327-ae6f-c5af896126b4
+> ⭐ Before you get started, please star us on [GitHub](https://github.com/ModelEngine-Group/nexent) — your support drives us forward!
-# ⚡ Have a try first
+## Option 1: Try Our Official Demo
-### 📋 Prerequisites
+No installation required — jump right in with our **[online demo environment](http://60.204.251.153:3000/en)** to experience Nexent's capabilities instantly.
-| Resource | Minimum |
-|----------|---------|
-| **CPU** | 2 cores |
-| **RAM** | 6 GiB |
-| **Software** | Docker & Docker Compose installed |
+## Option 2: Deploy on Your Own
-### 🛠️ Quick start with Docker Compose
+If you need to run Nexent locally or in your private infrastructure, we offer two deployment options:
-```bash
-git clone https://github.com/ModelEngine-Group/nexent.git
-cd nexent/docker
-cp .env.example .env # fill only necessary configs
-bash deploy.sh
-```
-
-When the containers are running, open **http://localhost:3000** in your browser and follow the setup wizard.
-
-# 🤝 Join Our Community
-
-> *If you want to go fast, go alone; if you want to go far, go together.*
-
-We have released **Nexent v1**, and the platform is now relatively stable. However, there may still be some bugs, and we are continuously improving and adding new features. Stay tuned: we will announce **v2.0** soon!
-
-* **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features.
-* **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab.
-* **🐛 Check our [Known Issues page](https://github.com/orgs/ModelEngine-Group/projects/9)** for the latest issue status and solutions.
-
-> *Rome wasn't built in a day.*
-
-If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us.
-
-Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life.
+### System Requirements
-Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing.
-
-## 💬 Community & contact
-
-- Browse the [Documentation](https://modelengine-group.github.io/nexent) for more information.
-- Join our [Discord community](https://discord.gg/tb5H3S3wyv) to chat with other developers and get help!
-- Conntact us by Wechat, find our QR Code in our [website](https://nexent.tech/en/contact)
+| Resource | Docker | Kubernetes |
+|----------|--------|-------------|
+| **CPU** | 4 cores (min) / 8 cores (rec.) | 4 cores (min) / 8 cores (rec.) |
+| **Memory** | 8 GiB (min) / 16 GiB (rec.) | 16 GiB (min) / 64 GiB (rec.) |
+| **Disk** | 40 GiB (min) / 100 GiB (rec.) | 100 GiB (min) / 200 GiB (rec.) |
+| **Architecture** | x86_64 / ARM64 | x86_64 / ARM64 |
+| **Software** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ |
-# ✨ Key Features
+> **Note:** Recommended configurations ensure optimal performance in production environments.
-`1` **Smart agent prompt generation**
- Turn plain language into runnable prompts. Nexent automatically chooses the right tools and plans the best action path for every request.
+### Docker Deployment (Recommended for Individuals/Small Teams)
- 
+Quick and straightforward for most users. Prerequisites: Docker 24+ and Docker Compose v2+:
-`2` **Scalable data process engine**
- Process 20+ data formats with fast OCR and table structure extraction, scaling smoothly from a single process to large-batch pipelines.
-
- 
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/docker
+cp .env.example .env
+bash deploy.sh
+```
-`3` **Personal-grade knowledge base**
- Import files in real time, auto-summarise them, and let agents access both personal and global knowledge instantly, also knowing what it can get from each knowledge base.
+For detailed deployment instructions, see [Docker Installation](https://modelengine-group.github.io/nexent/en/quick-start/installation.html).
- 
+### Kubernetes Deployment (For Enterprise Production)
-`4` **Internet knowledge search**
- Connect to 5+ web search providers so agents can mix fresh internet facts with your private data.
+Ideal for enterprise scenarios requiring high availability and elastic scaling. Prerequisites: Kubernetes 1.24+ and Helm 3+:
- 
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/k8s/helm
+./deploy-helm.sh apply
+```
-`5` **Knowledge-level traceability**
- Serve answers with precise citations from web and knowledge-base sources, making every fact verifiable.
+For detailed deployment instructions, see [Kubernetes Installation](https://modelengine-group.github.io/nexent/en/quick-start/kubernetes-installation.html).
- 
+# ✨ Core Features
-`6` **Multimodal understanding & dialogue**
- Speak, type, files, or show images. Nexent understands voice, text, and pictures, and can even generate new images on demand.
+Nexent provides a comprehensive feature set for building powerful AI agents:
- 
+| Feature | Description |
+|---------|-------------|
+| **⚙️ Multi-Model Integration** | OpenAI-compatible with any provider, full LLM/Embedding/VLM/STT/TTS coverage, supports domestic model switching |
+| **🤖 Zero-Code Agent Generation** | Describe requirements in natural language, generate executable agents instantly, what you think is what you get |
+| **🤝 A2A Agent Collaboration** | Agent-to-Agent protocol enables seamless multi-agent cooperation and distributed workflows |
+| **🧠 Layered Memory Mechanism** | Two-tier memory (user-level + user-agent-level) for persistent context across conversations |
+| **📝 Progressive Skill Disclosure** | Dynamically loads Skill into context, maximizing context window efficiency |
+| **🗄️ Personal-Grade Knowledge Base** | Real-time import and intelligent retrieval for 20+ document formats, auto summaries, fine-grained access control |
+| **🔧 MCP Tool Ecosystem** | Plug-and-play extension system with custom development and third-party MCP service support |
+| **🌐 Internet Knowledge Integration** | Multi-source search blending real-time information with private data |
+| **🔍 Knowledge-Level Traceability** | Precise citations and source verification, full transparency for every fact |
+| **🎭 Multimodal Interaction** | Voice, text, images, files — comprehensive natural dialogue |
+| **🔢 Agent Version Management** | Version iteration and history rollback, safe and controllable |
+| **🏪 Agent Marketplace** | Official and community curated agents, one-click install and use |
+| **👥 Multi-Tenancy & RBAC** | Multi-tenant isolation, role-based access control, fine-grained resource management |
-`7` **MCP tool ecosystem**
- Drop in or build Python plug-ins that follow the MCP spec; swap models, tools, and chains without touching core code.
+# 🤝 Join Our Community
- 
+> *If you want to go fast, go alone; if you want to go far, go together.*
-# 🌱 MCP Tool Ecosystem
+We have released **Nexent v2.0**! A comprehensive upgrade from v1.0, featuring A2A protocol support, progressive Skill disclosure, layered memory mechanism, user management with multi-tenancy, agent version management, agent marketplace, and more.
-Check our [MCP Ecosystem page](https://modelengine-group.github.io/nexent/en/mcp-ecosystem/overview.html) for detailed information about the MCP tool ecosystem, including community hubs, recommended tools, and integration guides.
+- **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features.
+- **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab.
-# 🛠️ Developer Guide
+> *Rome wasn't built in a day.*
-### 🤖 Model Configuration & Provider Recommendations
+If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us.
-Check our [Model Providers page](https://modelengine-group.github.io/nexent/en/getting-started/model-providers.html) for detailed model configuration guides and recommended provider information.
+Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life.
-### 🔧 Hack on Nexent
+Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing.
-Want to build from source or add new features? Check the [Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing) for step-by-step instructions.
+# 📖 What's Next
-### 🛠️ Build from Source
+Ready to dive deeper? Here are the main documentation entry points:
-Prefer to run Nexent from source code? Follow our [Developer Guide](https://modelengine-group.github.io/nexent/en/getting-started/development-guide) for detailed setup instructions and customization options.
+- **[Quick Start](https://modelengine-group.github.io/nexent/en/quick-start/installation.html)** — System requirements and deployment guide
+- **[Core Features](https://modelengine-group.github.io/nexent/en/getting-started/features.html)** — Comprehensive feature documentation
+- **[User Guide](https://modelengine-group.github.io/nexent/en/user-guide/home-page.html)** — Agent development and usage
+- **[Developer Guide](https://modelengine-group.github.io/nexent/en/developer-guide/overview)** — Build from source and customization
+- **[FAQ](https://modelengine-group.github.io/nexent/en/quick-start/faq.html)** — Common questions and troubleshooting
# 📄 License
diff --git a/README_CN.md b/README_CN.md
index c16de5d32..032776418 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -11,111 +11,104 @@ Nexent 是一个基于 **Harness Engineering** 原则打造的零代码智能体
> 一个提示词,无限种可能。
-### 🌐 访问我们的[官方网站](https://nexent.tech/)
+
-
+# 🚀 先来试试看
-https://github.com/user-attachments/assets/b844e05d-5277-4509-9463-1c5b3516f11e
+> ⭐ 在您开始使用前,请您顺手在 [GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点个 Star,您的支持是我们前进的动力!
-# ⚡ 先来试试看
+## 方式一:使用官方体验环境
-### 📋 系统要求
+无需安装,直接访问我们的 **[在线体验环境](http://60.204.251.153:3000/zh)**,快速体验 Nexent 的强大功能。
-| 资源 | 最低要求 |
-|----------|---------|
-| **CPU** | 2 核 |
-| **内存** | 6 GiB |
-| **软件** | 已安装 Docker 和 Docker Compose |
+## 方式二:自行部署
-### 🛠️ 使用 Docker Compose 快速开始
+如果需要在本地或私有环境中部署 Nexent,我们提供两种部署方式:
-```bash
-git clone https://github.com/ModelEngine-Group/nexent.git
-cd nexent/docker
-cp .env.example .env # fill only necessary configs
-bash deploy.sh
-```
-
-当容器运行后,在浏览器中打开 **http://localhost:3000** 并按照设置向导操作。
-
-# 🤝 加入我们的社区
-
-> *If you want to go fast, go alone; if you want to go far, go together.*
-
-我们已经发布了 **Nexent v1**,平台现在相对稳定。但是,可能仍然存在一些 bug,我们正在持续改进并添加新功能。敬请期待:我们很快将宣布 **v2.0**!
-
-* **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。
-* **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。
-* **🐛 查看我们的[已知问题页面](https://github.com/orgs/ModelEngine-Group/projects/9)** 了解最新的问题状态和解决方案。
-
-> *Rome wasn't built in a day.*
-
-如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。
-
-早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。
+### 系统要求
-最重要的是,我们需要关注度。请为仓库点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。
+| 资源 | Docker 部署 | Kubernetes 部署 |
+|------|------------|----------------|
+| **CPU** | 4 核(最低)/ 8 核(推荐) | 4 核(最低)/ 8 核(推荐) |
+| **内存** | 8 GiB(最低)/ 16 GiB(推荐) | 16 GiB(最低)/ 64 GiB(推荐) |
+| **磁盘** | 40 GiB(最低)/ 100 GiB(推荐) | 100 GiB(最低)/ 200 GiB(推荐) |
+| **架构** | x86_64 / ARM64 | x86_64 / ARM64 |
+| **软件** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ |
-## 💬 社区与联系方式
+> **注意:** 推荐配置可确保生产环境下的最佳性能。
-- 浏览 [文档](https://modelengine-group.github.io/nexent) 了解更多信息。
-- 加入我们的 [Discord 社区](https://discord.gg/tb5H3S3wyv) 与其他开发者交流并获取帮助!
-- 通过微信联系我们,在我们的[网站](https://nexent.tech/zh/contact)找到二维码
+### Docker 部署(推荐个人/小团队使用)
-# ✨ 主要特性
+适用于大多数用户,快速简单。部署前需准备Docker 24+, Docker Compose v2+:
-`1` **智能体提示词自动生成**
- 将自然语言转化为可被Agent执行的提示词。Nexent可以根据你的需要自动选择正确的工具并为每个请求规划最佳执行路径。
-
- 
-
-`2` **可扩展数据处理引擎**
- 支持 20+ 数据格式的快速 OCR 和表格结构提取,从单进程到大规模批处理管道都能平滑扩展。
-
- 
-
-`3` **个人级知识库**
- 实时导入文件,自动总结,让智能体能够即时访问个人和全局知识,并了解每个知识库能提供什么。
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/docker
+cp .env.example .env
+bash deploy.sh
+```
- 
+详细部署指南请参考 [Docker 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)。
-`4` **互联网知识搜索**
- 连接 5+ 个网络搜索提供商,让智能体能够将最新的互联网信息与您的私有数据结合。
+### Kubernetes 部署(适合企业级生产环境)
- 
+适用于需要高可用、弹性扩展的企业场景。部署前需准备 Kubernetes 集群(1.24+)和 Helm 3+:
-`5` **知识级可追溯性**
- 提供来自网络和知识库来源的精确引用,使每个事实都可验证。
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/k8s/helm
+./deploy-helm.sh apply
+```
- 
+详细部署指南请参考 [Kubernetes 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/kubernetes-installation.html)。
-`6` **多模态理解与对话**
- 说话、打字、文件或展示图片。Nexent 理解语音、文本和图片,甚至可以根据需求生成新图像。
+# ✨ 核心特性
- 
+Nexent 为构建强大的 AI 智能体提供全面的功能集:
-`7` **MCP 工具生态系统**
- 插入或构建符合 MCP 规范的 Python 插件;无需修改核心代码即可更换模型、工具和链。
+| 特性 | 描述 |
+|------|------|
+| **⚙️ 多模型集成** | OpenAI 兼容任意提供商,LLM/Embedding/VLM/STT/TTS 全覆盖,支持灵活切换 |
+| **🤖 零代码智能体生成** | 纯自然语言描述需求,一键生成可执行智能体,所想即所得 |
+| **🤝 A2A 智能体协作** | Agent-to-Agent 协议支持多智能体无缝协作,构建分布式工作流 |
+| **🧠 分层记忆机制** | 两层记忆体系(用户级+用户-智能体级),跨对话持续积累上下文 |
+| **📝 Skill 渐进式披露** | 动态加载 Skill 内容至上下文,高效利用上下文窗口 |
+| **🗄️ 个人级知识库** | 20+ 文档格式实时导入与智能检索,自动摘要,细粒度权限控制 |
+| **🔧 MCP 工具生态** | 即插即用的扩展工具体系,支持自定义开发和第三方 MCP 服务 |
+| **🌐 互联网知识集成** | 多搜索源混合,实时信息与私有数据融合 |
+| **🔍 知识级溯源** | 精确引用与来源验证,每个事实透明可查 |
+| **🎭 多模态交互** | 语音、文字、图像、文件,全方位自然对话 |
+| **🔢 智能体版本管理** | 版本迭代与历史回溯,安全可控 |
+| **🏪 智能体市场** | 官方与社区优质智能体一键安装即用 |
+| **👥 分权分域管理** | 多租户隔离,RBAC 权限体系,资源级精细管控 |
- 
+# 🤝 加入我们的社区
-# 🌱 MCP 工具生态
+> *If you want to go fast, go alone; if you want to go far, go together.*
-查看我们的[MCP 生态系统页面](https://modelengine-group.github.io/nexent/zh/mcp-ecosystem/overview.html)了解 MCP 工具生态系统的详细信息,包括社区中心、推荐工具和集成指南。
+- **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。
+- **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。
-# 🛠️ 开发者指南
+> *Rome wasn't built in a day.*
-### 🤖 模型配置与模型提供商推荐
+如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。
-查看我们的[模型提供商页面](https://modelengine-group.github.io/nexent/zh/getting-started/model-providers.html)了解详细的模型配置指南和推荐的提供商信息。
+早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。
-### 🔧 开发 Nexent
+最重要的是,我们需要关注度。请 [前往 GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。
-想要从源代码构建或添加新功能?查看 [贡献指南](https://modelengine-group.github.io/nexent/zh/contributing) 获取分步说明。
+# 📖 下一步
-### 🛠️ 从源码构建
+准备好深入了解了吗?以下是主要文档入口:
-想要从源码运行 Nexent?查看我们的[开发者指南](https://modelengine-group.github.io/nexent/zh/getting-started/development-guide)获取详细的设置说明和自定义选项。
+- **[快速开始](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)** — 系统要求和部署指南
+- **[核心特性详解](https://modelengine-group.github.io/nexent/zh/getting-started/features.html)** — 完整的功能说明
+- **[用户指南](https://modelengine-group.github.io/nexent/zh/user-guide/home-page.html)** — 智能体开发与使用
+- **[开发者指南](https://modelengine-group.github.io/nexent/zh/developer-guide/overview)** — 从源码构建和自定义
+- **[常见问题](https://modelengine-group.github.io/nexent/zh/quick-start/faq.html)** — 常见问题和故障排除
# 📄 许可证
diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
index 933fcd129..5a11b550b 100644
--- a/backend/agents/create_agent_info.py
+++ b/backend/agents/create_agent_info.py
@@ -14,7 +14,7 @@
from services.vectordatabase_service import (
ElasticSearchService,
get_vector_db_core,
- get_embedding_model,
+ get_embedding_model_by_index_name,
get_rerank_model,
)
from services.remote_mcp_service import get_remote_mcp_server_list
@@ -32,7 +32,7 @@
from utils.prompt_template_utils import get_agent_prompt_template
from utils.config_utils import tenant_config_manager, get_model_name_from_config
from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE
-import re
+from consts.exceptions import ValidationError
logger = logging.getLogger("create_agent_info")
logger.setLevel(logging.DEBUG)
@@ -488,11 +488,23 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int
tool_config.metadata = {
"vdb_core": get_vector_db_core(),
- "embedding_model": get_embedding_model(tenant_id=tenant_id),
+ "embedding_model": None,
"rerank_model": rerank_model,
"display_name_to_index_map": display_name_to_index_map,
"index_name_to_display_map": index_name_to_display_map,
}
+
+ # Must have embedding model for knowledge base search
+ if not index_names:
+ raise ValidationError(
+ "Embedding model is required for knowledge_base_search but index_names is empty")
+
+ embedding_model, _, _ = get_embedding_model_by_index_name(tenant_id, index_names[0])
+ if not embedding_model:
+ raise ValidationError(
+ f"No embedding model found for index '{index_names[0]}'. "
+ f"Please configure an embedding model for this knowledge base.")
+ tool_config.metadata["embedding_model"] = embedding_model
elif tool_config.class_name in ["DifySearchTool", "DataMateSearchTool"]:
rerank = param_dict.get("rerank", False)
rerank_model_name = param_dict.get("rerank_model_name", "")
diff --git a/backend/apps/a2a_client_app.py b/backend/apps/a2a_client_app.py
index db7acd108..ea149ac31 100644
--- a/backend/apps/a2a_client_app.py
+++ b/backend/apps/a2a_client_app.py
@@ -5,6 +5,7 @@
Used internally for configuring A2A sub-agents.
"""
import logging
+import uuid
from typing import Annotated, List, Optional
from http import HTTPStatus
@@ -45,6 +46,14 @@ class UpdateAgentProtocolRequest(BaseModel):
)
+class TestNacosConnectionRequest(BaseModel):
+ """Request to test Nacos connectivity without saving the config."""
+ nacos_addr: str = Field(description="Nacos server address (e.g., http://nacos-server:8848)")
+ nacos_username: Optional[str] = None
+ nacos_password: Optional[str] = None
+ namespace_id: Optional[str] = "public"
+
+
# =============================================================================
# External Agent Discovery
# =============================================================================
@@ -102,7 +111,7 @@ async def discover_from_nacos(
results = await a2a_client_service.discover_from_nacos(
nacos_config_id=request.nacos_config_id,
- agent_names=request.agent_names,
+ agent_names=[name.strip() for name in request.agent_names],
tenant_id=tenant_id,
user_id=user_id,
namespace=request.namespace
@@ -482,6 +491,17 @@ class CreateNacosConfigRequest(BaseModel):
description: Optional[str] = None
+class UpdateNacosConfigRequest(BaseModel):
+ """Request to update a Nacos config."""
+ name: Optional[str] = None
+ nacos_addr: Optional[str] = None
+ nacos_username: Optional[str] = None
+ nacos_password: Optional[str] = None
+ namespace_id: Optional[str] = None
+ description: Optional[str] = None
+ is_active: Optional[bool] = None
+
+
@router.post("/nacos-configs")
async def create_nacos_config(
request: CreateNacosConfigRequest,
@@ -577,6 +597,51 @@ async def get_nacos_config(
)
+@router.put("/nacos-configs/{config_id}")
+async def update_nacos_config(
+ config_id: str,
+ request: UpdateNacosConfigRequest,
+ authorization: Annotated[Optional[str], Header()] = None,
+ http_request: Request = None
+):
+ """Update a Nacos configuration."""
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ result = a2a_agent_db.update_nacos_config(
+ config_id=config_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ name=request.name,
+ nacos_addr=request.nacos_addr,
+ nacos_username=request.nacos_username,
+ nacos_password=request.nacos_password,
+ namespace_id=request.namespace_id,
+ description=request.description,
+ is_active=request.is_active
+ )
+
+ if not result:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=f"Nacos config {config_id} not found"
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": result}
+ )
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Update Nacos config failed: {e}", exc_info=True)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to update Nacos config"
+ )
+
+
@router.delete("/nacos-configs/{config_id}")
async def delete_nacos_config(
config_id: str,
@@ -610,6 +675,62 @@ async def delete_nacos_config(
)
+@router.post("/nacos-configs/test-connection")
+async def test_nacos_connection(
+ request: TestNacosConnectionRequest,
+ authorization: Annotated[Optional[str], Header()] = None,
+ http_request: Request = None
+):
+ """Test connectivity to Nacos server without saving the configuration."""
+ from utils.nacos_client import NacosClient, NacosConnectionError
+
+ try:
+ get_current_user_info(authorization, http_request)
+
+ async with NacosClient(
+ nacos_addr=request.nacos_addr,
+ username=request.nacos_username,
+ password=request.nacos_password
+ ) as client:
+ result = await client.test_connectivity(namespace=request.namespace_id or "public")
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "status": "success",
+ "data": {
+ "success": result["success"],
+ "message": result["message"]
+ }
+ }
+ )
+
+ except NacosConnectionError as e:
+ logger.warning(f"Nacos connection test failed: {e}")
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "status": "success",
+ "data": {
+ "success": False,
+ "message": str(e)
+ }
+ }
+ )
+ except Exception as e:
+ logger.error(f"Test Nacos connection failed: {e}", exc_info=True)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "status": "success",
+ "data": {
+ "success": False,
+ "message": f"Failed to test Nacos connection: {e}"
+ }
+ }
+ )
+
+
# =============================================================================
# External Agent Chat
# =============================================================================
@@ -648,11 +769,11 @@ async def chat_with_external_agent(
# Build A2A message format following A2A protocol with parts array
a2a_message = {
+ "message_id": f"msg_{uuid.uuid4().hex}",
"role": "ROLE_USER",
"parts": [
{
"text": request_body.message.strip(),
- "mediaType": "text/plain"
}
],
}
diff --git a/backend/apps/knowledge_summary_app.py b/backend/apps/knowledge_summary_app.py
index e4e11ace9..ab45170fb 100644
--- a/backend/apps/knowledge_summary_app.py
+++ b/backend/apps/knowledge_summary_app.py
@@ -8,6 +8,7 @@
from consts.model import ChangeSummaryRequest
from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
from utils.auth_utils import get_current_user_id, get_current_user_info
+from utils.config_utils import tenant_config_manager
router = APIRouter(prefix="/summary")
logger = logging.getLogger("knowledge_summary_app")
@@ -31,6 +32,19 @@ async def auto_summary(
authorization, http_request)
service = ElasticSearchService()
+ # Get model_id from tenant config if not provided
+ if model_id is None and tenant_id:
+ try:
+ tenant_config = tenant_config_manager.load_config(tenant_id)
+ model_id_str = tenant_config.get("LLM_ID")
+ if model_id_str:
+ model_id = int(model_id_str)
+ logger.info(f"Using LLM_ID {model_id} from tenant config for auto-summary")
+ else:
+ logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary may be placeholder")
+ except Exception as e:
+ logger.warning(f"Failed to get LLM_ID from tenant config: {e}")
+
return await service.summary_index_name(
index_name=index_name,
batch_size=batch_size,
diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py
index 0a5a04139..278b729e8 100644
--- a/backend/apps/model_managment_app.py
+++ b/backend/apps/model_managment_app.py
@@ -372,7 +372,10 @@ async def manage_check_model_health(
f"Start to check model connectivity for tenant, user_id: {user_id}, "
f"target_tenant_id: {request.tenant_id}, display_name: {request.display_name}")
- result = await check_model_connectivity(request.display_name, request.tenant_id)
+ result = await check_model_connectivity(
+ request.display_name,
+ request.tenant_id
+ )
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Successfully checked model connectivity",
"data": result
diff --git a/backend/apps/vectordatabase_app.py b/backend/apps/vectordatabase_app.py
index 872b5387b..6f4232afd 100644
--- a/backend/apps/vectordatabase_app.py
+++ b/backend/apps/vectordatabase_app.py
@@ -1,30 +1,47 @@
import logging
import json
from http import HTTPStatus
-from typing import Any, Dict, List, Optional
+from typing import Annotated, Any, Dict, List, Optional
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
from fastapi.responses import JSONResponse
import re
from consts.model import ChunkCreateRequest, ChunkUpdateRequest, HybridSearchRequest, IndexingResponse
+from consts.scheduler import VALID_SUMMARY_FREQUENCIES, SUMMARY_FREQUENCY_OPTIONS_FOR_API
from nexent.vector_database.base import VectorDatabaseCore
from services.vectordatabase_service import (
ElasticSearchService,
- get_embedding_model,
+ get_embedding_model_by_id,
get_vector_db_core,
check_knowledge_base_exist_impl,
+ KnowledgeBaseNeedsModelConfigError,
)
from services.redis_service import get_redis_service
from utils.auth_utils import get_current_user_id
from utils.file_management_utils import get_all_files_status
from database.knowledge_db import get_index_name_by_knowledge_name, get_knowledge_record
+from database.model_management_db import get_model_by_model_id
router = APIRouter(prefix="/indices")
service = ElasticSearchService()
logger = logging.getLogger("vectordatabase_app")
+@router.get("/summary_frequency_options")
+async def get_summary_frequency_options():
+ """
+ Get valid summary frequency options for frontend.
+ Frontend should call this API to get the list of valid frequencies.
+ """
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "options": SUMMARY_FREQUENCY_OPTIONS_FOR_API,
+ "valid_values": VALID_SUMMARY_FREQUENCIES,
+ }
+ )
+
@router.post("/check_exist")
async def check_knowledge_base_exist(
request: Dict[str, str] = Body(
@@ -160,6 +177,186 @@ async def update_index(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating index: {str(exc)}")
+@router.patch("/{index_name}/summary_frequency")
+async def update_summary_frequency_endpoint(
+ index_name: Annotated[str, Path(..., description="Name of the index to update")],
+ request: Annotated[Dict[str, Any], Body(..., description="Update payload with summary_frequency")],
+ authorization: Annotated[Optional[str], Header()] = None,
+):
+ """Update the auto-summary frequency for a knowledge base."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ summary_frequency = request.get("summary_frequency")
+
+ valid_frequencies = VALID_SUMMARY_FREQUENCIES
+ if summary_frequency not in valid_frequencies:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail=f"Invalid summary_frequency. Must be one of: {valid_frequencies}"
+ )
+
+ from database.knowledge_db import update_summary_frequency
+ success = update_summary_frequency(
+ index_name=index_name,
+ summary_frequency=summary_frequency,
+ _tenant_id=tenant_id,
+ user_id=user_id
+ )
+
+ if success:
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "Summary frequency updated successfully", "status": "success"}
+ )
+ else:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=f"Knowledge base '{index_name}' not found"
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.exception("Error updating summary frequency")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating summary frequency: {str(exc)}"
+ )
+
+
+@router.get("/{index_name}/embedding-model-status")
+def get_embedding_model_status(
+ index_name: str = Path(..., description="Name of the index to check"),
+ authorization: Optional[str] = Header(None)
+):
+ """
+ Check the embedding model status of a knowledge base.
+ Returns information about whether a model is configured and if an update is needed.
+
+ This endpoint is used by the frontend to determine whether to show
+ a dialog prompting the user to select an embedding model for knowledge bases
+ that were created before the model ID feature was added.
+
+ Note: The path parameter is the internal index_name.
+ """
+ try:
+ _, tenant_id = get_current_user_id(authorization)
+
+ # Get the knowledge base record by index_name
+ knowledge_record = get_knowledge_record({
+ "index_name": index_name,
+ "tenant_id": tenant_id
+ })
+
+ if not knowledge_record:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=f"Knowledge base '{index_name}' not found"
+ )
+
+ # Check if model_id exists
+ model_id = knowledge_record.get("embedding_model_id")
+ embedding_model_name = knowledge_record.get("embedding_model_name")
+
+ # Get model info if model_id exists
+ model_info = None
+ if model_id:
+ model = get_model_by_model_id(model_id, tenant_id)
+ if model:
+ model_info = {
+ "model_id": model.get("model_id"),
+ "model_name": model.get("model_name"),
+ "display_name": model.get("display_name"),
+ "model_type": model.get("model_type"),
+ }
+
+ # Determine status
+ if model_id and model_info:
+ status = "configured"
+ message = f"Embedding model '{model_info.get('display_name', model_info.get('model_name'))}' is configured"
+ needs_config = False
+ elif embedding_model_name:
+ # Has model name but no model_id (legacy data)
+ status = "legacy"
+ message = "This knowledge base was created with an older version. Please select an embedding model to ensure proper functionality."
+ needs_config = True
+ else:
+ # No model configured at all
+ status = "missing"
+ message = "No embedding model configured. Please select an embedding model."
+ needs_config = True
+
+ # Get actual internal index_name from the database record
+ actual_index_name = knowledge_record.get("index_name")
+
+ return {
+ "status": status,
+ "needs_config": needs_config,
+ "index_name": actual_index_name,
+ "knowledge_name": knowledge_record.get("knowledge_name"),
+ "model_id": model_id,
+ "embedding_model_name": embedding_model_name,
+ "model_info": model_info,
+ "message": message,
+ }
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error getting embedding model status for '{index_name}': {e}", exc_info=True)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail=f"Error checking embedding model status: {str(e)}"
+ )
+
+
+@router.put("/{index_name}/embedding-model")
+def update_embedding_model(
+ index_name: str = Path(..., description="Internal index name of the knowledge base to update"),
+ request: Dict[str, Any] = Body(...,
+ description="Update payload with model_id"),
+ authorization: Optional[str] = Header(None)
+):
+ """
+ Update the embedding model for a knowledge base.
+ This is used when a user selects an embedding model from the dialog
+ for knowledge bases that don't have a model configured.
+ """
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+
+ model_id = request.get("model_id")
+ if not model_id:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail="model_id is required"
+ )
+
+ result = ElasticSearchService.update_embedding_model(
+ index_name=index_name,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content=result
+ )
+
+ except ValueError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=str(exc)
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Error updating embedding model for '{index_name}': {exc}", exc_info=True)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail=f"Error updating embedding model: {str(exc)}"
+ )
+
+
@router.get("")
def get_list_indices(
pattern: str = Query("*", description="Pattern to match index names"),
@@ -191,6 +388,8 @@ def create_index_documents(
authorization: Optional[str] = Header(None),
task_id: Optional[str] = Header(
None, alias="X-Task-Id", description="Task ID for progress tracking"),
+ large_mode: bool = Query(
+ False, description="Force large-batch path when current request chunk count is below threshold"),
):
"""
Index documents with embeddings, creating the index if it doesn't exist.
@@ -198,22 +397,24 @@ def create_index_documents(
"""
try:
user_id, tenant_id = get_current_user_id(authorization)
-
+
# Get the knowledge base record to retrieve the saved embedding model
knowledge_record = get_knowledge_record({'index_name': index_name})
- saved_embedding_model_name = None
+ saved_embedding_model_id = None
if knowledge_record:
- saved_embedding_model_name = knowledge_record.get('embedding_model_name')
-
- # Use the saved model from knowledge base, fallback to tenant default if not set
- embedding_model = get_embedding_model(tenant_id, saved_embedding_model_name)
-
+ saved_embedding_model_id = knowledge_record.get('embedding_model_id')
+
+ # Use the saved model from knowledge base by model_id
+ embedding_model, _ = get_embedding_model_by_id(tenant_id, saved_embedding_model_id) if saved_embedding_model_id else (None, None)
+
return ElasticSearchService.index_documents(
embedding_model=embedding_model,
index_name=index_name,
data=data,
vdb_core=vdb_core,
task_id=task_id,
+ large_mode=large_mode,
+ model_id=saved_embedding_model_id,
)
except Exception as e:
error_msg = str(e)
@@ -538,9 +739,19 @@ async def hybrid_search(
vdb_core=vdb_core,
)
return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except KnowledgeBaseNeedsModelConfigError as exc:
+ # Return a specific error that frontend can detect to show the config dialog
+ raise HTTPException(
+ status_code=HTTPStatus.CONFLICT,
+ detail={
+ "error_type": "KNOWLEDGE_BASE_NEEDS_MODEL_CONFIG",
+ "index_name": exc.index_name,
+ "message": exc.message,
+ "suggestion": "Please select an embedding model for this knowledge base before searching."
+ }
+ )
except ValueError as exc:
- raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
- detail=str(exc))
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
except Exception as exc:
logger.error(f"Hybrid search failed: {exc}", exc_info=True)
raise HTTPException(
diff --git a/backend/apps/voice_app.py b/backend/apps/voice_app.py
index 8f517cd07..7451a95c4 100644
--- a/backend/apps/voice_app.py
+++ b/backend/apps/voice_app.py
@@ -1,15 +1,12 @@
-import asyncio
import logging
from http import HTTPStatus
-from fastapi import APIRouter, WebSocket, HTTPException, Body, Query
+from fastapi import APIRouter, WebSocket, HTTPException
from fastapi.responses import JSONResponse
from consts.exceptions import (
VoiceServiceException,
STTConnectionException,
- TTSConnectionException,
- VoiceConfigException
)
from consts.model import VoiceConnectivityRequest, VoiceConnectivityResponse
from services.voice_service import get_voice_service
@@ -26,10 +23,29 @@ async def stt_websocket(websocket: WebSocket):
logger.info("STT WebSocket connection attempt...")
await websocket.accept()
logger.info("STT WebSocket connection accepted")
-
+
+ # Receive config from client
+ client_config = {}
+ try:
+ msg = await websocket.receive()
+ if msg["type"] == "websocket.receive":
+ import json
+ client_config = json.loads(msg["text"])
+ logger.info(f"Received client config: {client_config}")
+ elif msg["type"] == "bytes":
+ try:
+ import json
+ client_config = json.loads(msg["bytes"].decode('utf-8'))
+ logger.info(f"Received client config from bytes: {client_config}")
+ except Exception as e:
+ logger.warning(f"Failed to parse bytes as JSON: {e}")
+ except Exception as e:
+ logger.error(f"Error receiving config: {e}")
+ client_config = {}
+
try:
voice_service = get_voice_service()
- await voice_service.start_stt_streaming_session(websocket)
+ await voice_service.start_stt_streaming_session(websocket, stt_config=client_config)
except STTConnectionException as e:
logger.error(f"STT WebSocket error: {str(e)}")
await websocket.send_json({"error": str(e)})
@@ -40,55 +56,12 @@ async def stt_websocket(websocket: WebSocket):
logger.info("STT WebSocket connection closed")
-@voice_runtime_router.websocket("/tts/ws")
-async def tts_websocket(websocket: WebSocket):
- """WebSocket endpoint for streaming TTS"""
- logger.info("TTS WebSocket connection attempt...")
- await websocket.accept()
- logger.info("TTS WebSocket connection accepted")
-
- try:
- # Receive text from client (single request)
- data = await websocket.receive_json()
- text = data.get("text")
-
- if not text:
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_json({"error": "No text provided"})
- return
-
- # Stream TTS audio to WebSocket
- voice_service = get_voice_service()
- await voice_service.stream_tts_to_websocket(websocket, text)
-
- except TTSConnectionException as e:
- logger.error(f"TTS WebSocket error: {str(e)}")
- await websocket.send_json({"error": str(e)})
- except Exception as e:
- logger.error(f"TTS WebSocket error: {str(e)}")
- await websocket.send_json({"error": str(e)})
- finally:
- logger.info("TTS WebSocket connection closed")
- # Ensure connection is properly closed
- if websocket.client_state.name == "CONNECTED":
- await websocket.close()
-
-
@voice_config_router.post("/connectivity")
async def check_voice_connectivity(request: VoiceConnectivityRequest):
- """
- Check voice service connectivity
-
- Args:
- request: VoiceConnectivityRequest containing model_type
-
- Returns:
- VoiceConnectivityResponse with connectivity status
- """
+ """Check voice service connectivity."""
try:
voice_service = get_voice_service()
connected = await voice_service.check_voice_connectivity(request.model_type)
-
return JSONResponse(
status_code=HTTPStatus.OK,
content=VoiceConnectivityResponse(
@@ -99,25 +72,10 @@ async def check_voice_connectivity(request: VoiceConnectivityRequest):
)
except VoiceServiceException as e:
logger.error(f"Voice service error: {str(e)}")
- raise HTTPException(
- status_code=HTTPStatus.BAD_REQUEST,
- detail=str(e)
- )
- except (STTConnectionException, TTSConnectionException) as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except STTConnectionException as e:
logger.error(f"Voice connectivity error: {str(e)}")
- raise HTTPException(
- status_code=HTTPStatus.SERVICE_UNAVAILABLE,
- detail=str(e)
- )
- except VoiceConfigException as e:
- logger.error(f"Voice configuration error: {str(e)}")
- raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail=str(e)
- )
+ raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail=str(e))
except Exception as e:
logger.error(f"Unexpected voice service error: {str(e)}")
- raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Voice service error"
- )
+ raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Voice service error")
diff --git a/backend/assets/test_voice.pcm b/backend/assets/test_voice.pcm
new file mode 100644
index 000000000..0a78f9a15
Binary files /dev/null and b/backend/assets/test_voice.pcm differ
diff --git a/backend/consts/const.py b/backend/consts/const.py
index db1e69184..77e86a185 100644
--- a/backend/consts/const.py
+++ b/backend/consts/const.py
@@ -7,9 +7,12 @@
load_dotenv(override=True)
# TODO: Analyze every variable if this is used
-# Test voice file path
+# Test voice file path (WAV format for volcengine STT)
TEST_VOICE_PATH = os.path.join(os.path.dirname(
os.path.dirname(__file__)), 'assets', 'test.wav')
+# Test PCM file path (raw PCM format for Ali STT)
+TEST_PCM_PATH = os.path.join(os.path.dirname(
+ os.path.dirname(__file__)), 'assets', 'test_voice.pcm')
# Vector database providers
@@ -36,6 +39,11 @@ class VectorDatabaseType(str, Enum):
UPLOAD_FOLDER = os.getenv('UPLOAD_FOLDER', 'uploads')
ROOT_DIR = os.getenv("ROOT_DIR")
+PER_WAVE_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_PER_WAVE_S", "30"))
+MAX_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_MAX_S", "1800"))
+
+
+
# Container-internal skills storage path
CONTAINER_SKILLS_PATH = os.getenv("SKILLS_PATH")
@@ -149,7 +157,7 @@ class VectorDatabaseType(str, Enum):
RAY_ACTOR_NUM_CPUS = int(os.getenv("RAY_ACTOR_NUM_CPUS", "2"))
RAY_DASHBOARD_PORT = int(os.getenv("RAY_DASHBOARD_PORT", "8265"))
RAY_DASHBOARD_HOST = os.getenv("RAY_DASHBOARD_HOST", "0.0.0.0")
-RAY_NUM_CPUS = os.getenv("RAY_NUM_CPUS")
+RAY_NUM_CPUS = int(os.getenv("RAY_NUM_CPUS", "4"))
RAY_OBJECT_STORE_MEMORY_GB = float(
os.getenv("RAY_OBJECT_STORE_MEMORY_GB", "0.25"))
RAY_TEMP_DIR = os.getenv("RAY_TEMP_DIR", "/tmp/ray")
@@ -182,10 +190,22 @@ class VectorDatabaseType(str, Enum):
# Worker Configuration
RAY_ADDRESS = os.getenv("RAY_ADDRESS", "auto")
-QUEUES = os.getenv("QUEUES", "process_q,forward_q")
+QUEUES = os.getenv("QUEUES", "process_q,process_part_q,forward_q")
# Will be dynamically set based on PID if not provided
WORKER_NAME = os.getenv("WORKER_NAME")
WORKER_CONCURRENCY = int(os.getenv("WORKER_CONCURRENCY", "4"))
+RAY_WARM_ACTOR_POOL_SIZE_PART = int(os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PART", "2"))
+RAY_WARM_ACTOR_POOL_SIZE_PROCESS = int(os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PROCESS", "1"))
+# Global Ray actor pool (shared by process_q/process_part_q workers)
+RAY_GLOBAL_ACTOR_POOL_SIZE = int(os.getenv("RAY_GLOBAL_ACTOR_POOL_SIZE", "3"))
+RAY_ACTOR_WARM_TIMEOUT_S = float(os.getenv("RAY_ACTOR_WARM_TIMEOUT_S", "60"))
+RAY_GLOBAL_ACTOR_POOL_NAME = os.getenv(
+ "RAY_GLOBAL_ACTOR_POOL_NAME", "nexent_global_data_processor_pool")
+RAY_GLOBAL_ACTOR_POOL_NAMESPACE = os.getenv(
+ "RAY_GLOBAL_ACTOR_POOL_NAMESPACE", "nexent-data-process")
+
+
+
# Voice Service Configuration
@@ -348,7 +368,7 @@ class VectorDatabaseType(str, Enum):
# APP Version
-APP_VERSION = "v2.1.0"
+APP_VERSION = "v2.1.1"
# Skill Creation Streaming Configuration
diff --git a/backend/consts/error_message.py b/backend/consts/error_message.py
index 4ff1141c7..27ac33d00 100644
--- a/backend/consts/error_message.py
+++ b/backend/consts/error_message.py
@@ -5,6 +5,8 @@
Frontend should use i18n for localized messages.
"""
+from typing import Dict, Tuple
+
from .error_code import ErrorCode
@@ -145,11 +147,11 @@ def get_message(cls, error_code: ErrorCode) -> str:
return cls._MESSAGES.get(error_code, "An error occurred. Please try again later.")
@classmethod
- def get_message_with_code(cls, error_code: ErrorCode) -> tuple[int, str]:
+ def get_message_with_code(cls, error_code: ErrorCode) -> Tuple[int, str]:
"""Get error code and message as tuple."""
return (error_code.value, cls.get_message(error_code))
@classmethod
- def get_all_messages(cls) -> dict:
+ def get_all_messages(cls) -> Dict:
"""Get all error code to message mappings."""
return {code.value: msg for code, msg in cls._MESSAGES.items()}
diff --git a/backend/consts/exceptions.py b/backend/consts/exceptions.py
index 9481ebab2..a32f0282e 100644
--- a/backend/consts/exceptions.py
+++ b/backend/consts/exceptions.py
@@ -190,18 +190,6 @@ class STTConnectionException(Exception):
pass
-class TTSConnectionException(Exception):
- """Raised when TTS service connection fails."""
-
- pass
-
-
-class VoiceConfigException(Exception):
- """Raised when voice configuration is invalid."""
-
- pass
-
-
class ToolExecutionException(Exception):
"""Raised when mcp tool execution failed."""
diff --git a/backend/consts/model.py b/backend/consts/model.py
index 7cea3fdb5..bcaffcae7 100644
--- a/backend/consts/model.py
+++ b/backend/consts/model.py
@@ -118,6 +118,9 @@ class ModelRequest(BaseModel):
expected_chunk_size: Optional[int] = None
maximum_chunk_size: Optional[int] = None
chunk_batch: Optional[int] = None
+ # STT specific fields
+ model_appid: Optional[str] = None
+ access_token: Optional[str] = None
class ProviderModelRequest(BaseModel):
@@ -147,14 +150,23 @@ class SingleModelConfig(BaseModel):
dimension: Optional[int] = None
+class STTModelConfig(BaseModel):
+ """STT model specific configuration with factory, appid, and access token fields"""
+ modelName: str
+ displayName: str
+ apiConfig: Optional[ModelApiConfig] = None
+ modelFactory: Optional[str] = None
+ modelAppid: Optional[str] = None
+ accessToken: Optional[str] = None
+
+
class ModelConfig(BaseModel):
llm: SingleModelConfig
embedding: SingleModelConfig
multiEmbedding: SingleModelConfig
rerank: SingleModelConfig
vlm: SingleModelConfig
- stt: SingleModelConfig
- tts: SingleModelConfig
+ stt: STTModelConfig
class AppConfig(BaseModel):
@@ -334,6 +346,7 @@ class AgentInfoRequest(BaseModel):
enabled_tool_ids: Optional[List[int]] = None
enabled_skill_ids: Optional[List[int]] = None
related_agent_ids: Optional[List[int]] = None
+ related_external_agent_ids: Optional[List[int]] = None
group_ids: Optional[List[int]] = None
ingroup_permission: Optional[str] = None
enable_context_manager: Optional[bool] = None
@@ -492,7 +505,7 @@ def default(cls) -> "MemoryAgentShareMode":
class VoiceConnectivityRequest(BaseModel):
"""Request model for voice service connectivity check"""
model_type: str = Field(...,
- description="Type of model to check ('stt' or 'tts')")
+ description="Type of model to check ('stt')")
class VoiceConnectivityResponse(BaseModel):
@@ -503,19 +516,6 @@ class VoiceConnectivityResponse(BaseModel):
message: str = Field(..., description="Status message")
-class TTSRequest(BaseModel):
- """Request model for TTS text-to-speech conversion"""
- text: str = Field(..., min_length=1,
- description="Text to convert to speech")
- stream: bool = Field(True, description="Whether to stream the audio")
-
-
-class TTSResponse(BaseModel):
- """Response model for TTS conversion"""
- status: str = Field(..., description="Status of the TTS conversion")
- message: Optional[str] = Field(None, description="Additional message")
-
-
class ToolValidateRequest(BaseModel):
"""Request model for tool validation"""
name: str = Field(..., description="Tool name to validate")
@@ -744,15 +744,18 @@ class ManageTenantModelCreateRequest(BaseModel):
tenant_id: str = Field(..., min_length=1, description="Target tenant ID to create model for")
model_repo: Optional[str] = Field('', description="Model repository path")
model_name: str = Field(..., description="Model name")
- model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'tts', 'stt')")
+ model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'stt')")
api_key: Optional[str] = Field('', description="API key for the model")
base_url: Optional[str] = Field('', description="Base URL for the model API")
max_tokens: Optional[int] = Field(0, description="Maximum tokens for the model")
display_name: Optional[str] = Field('', description="Display name for the model")
- model_factory: Optional[str] = Field('OpenAI-API-Compatible', description="Model factory/provider name")
+ model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model")
expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models")
maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models")
chunk_batch: Optional[int] = Field(None, description="Batch size for chunking")
+ # STT specific fields
+ model_appid: Optional[str] = Field(None, description="Application ID for STT models (e.g., Volcano Engine)")
+ access_token: Optional[str] = Field(None, description="Access token for STT models (e.g., Volcano Engine)")
class ManageTenantModelUpdateRequest(BaseModel):
@@ -766,10 +769,13 @@ class ManageTenantModelUpdateRequest(BaseModel):
base_url: Optional[str] = Field(None, description="Base URL for the model API")
max_tokens: Optional[int] = Field(None, description="Maximum tokens for the model")
display_name: Optional[str] = Field(None, description="New display name for the model")
- model_factory: Optional[str] = Field(None, description="Model factory/provider name")
+ model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model")
expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models")
maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models")
chunk_batch: Optional[int] = Field(None, description="Batch size for chunking")
+ # STT specific fields
+ model_appid: Optional[str] = Field(None, description="Application ID for STT models")
+ access_token: Optional[str] = Field(None, description="Access token for STT models")
class ManageTenantModelDeleteRequest(BaseModel):
diff --git a/backend/consts/scheduler.py b/backend/consts/scheduler.py
new file mode 100644
index 000000000..6820a9687
--- /dev/null
+++ b/backend/consts/scheduler.py
@@ -0,0 +1,28 @@
+"""
+Scheduler frequency constants
+Centralized definition for auto-summary frequency options
+"""
+from datetime import timedelta
+
+# Core frequency config: includes value, timedelta, and label; this is the single source of truth
+SUMMARY_FREQUENCY_CONFIG = [
+ {"value": "1h", "timedelta": timedelta(hours=1), "label": "1h"},
+ {"value": "3h", "timedelta": timedelta(hours=3), "label": "3h"},
+ {"value": "6h", "timedelta": timedelta(hours=6), "label": "6h"},
+ {"value": "1d", "timedelta": timedelta(days=1), "label": "1d"},
+ {"value": "1w", "timedelta": timedelta(weeks=1), "label": "1w"},
+]
+
+# Generate valid frequency list from config (for validation)
+VALID_SUMMARY_FREQUENCIES = [item["value"] for item in SUMMARY_FREQUENCY_CONFIG] + [None]
+
+# Generate frequency to timedelta mapping from config (direct value, no loop conversion needed)
+FREQUENCY_MAP = {item["value"]: item["timedelta"] for item in SUMMARY_FREQUENCY_CONFIG}
+
+# Generate API options from config (for frontend)
+SUMMARY_FREQUENCY_OPTIONS_FOR_API = [
+ {"value": "disabled", "label": "Disabled"},
+] + [{"value": item["value"], "label": item["value"]} for item in SUMMARY_FREQUENCY_CONFIG]
+
+# Scheduler check interval (seconds)
+SCHEDULER_CHECK_INTERVAL_SECONDS = 30 * 60
diff --git a/backend/data_process/ray_actors.py b/backend/data_process/ray_actors.py
index 2fa590bec..0dea828ce 100644
--- a/backend/data_process/ray_actors.py
+++ b/backend/data_process/ray_actors.py
@@ -1,5 +1,6 @@
import logging
import json
+import time
from typing import Any, Dict, List, Optional
import ray
@@ -27,6 +28,84 @@ def __init__(self):
f"Ray actor initialized using {RAY_ACTOR_NUM_CPUS} CPU cores...")
self._processor = DataProcessCore()
+ def ping(self) -> bool:
+ """Lightweight health check used by prewarm logic."""
+ return True
+
+ def _prepare_process_params(
+ self,
+ task_id: Optional[str],
+ model_id: Optional[int],
+ tenant_id: Optional[str],
+ params: Dict[str, Any],
+ ) -> Dict[str, Any]:
+ """
+ Normalize task/model-related processing params.
+ """
+ process_params = dict(params)
+ if task_id:
+ process_params["task_id"] = task_id
+
+ if not (model_id and tenant_id):
+ return process_params
+
+ try:
+ model_record = get_model_by_model_id(
+ model_id=model_id, tenant_id=tenant_id)
+ if not model_record:
+ logger.warning(
+ f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes")
+ return process_params
+
+ expected_chunk_size = model_record.get(
+ "expected_chunk_size", DEFAULT_EXPECTED_CHUNK_SIZE)
+ maximum_chunk_size = model_record.get(
+ "maximum_chunk_size", DEFAULT_MAXIMUM_CHUNK_SIZE)
+ model_name = model_record.get("display_name")
+
+ process_params["max_characters"] = maximum_chunk_size
+ process_params["new_after_n_chars"] = expected_chunk_size
+
+ logger.info(
+ f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): "
+ f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}")
+ except Exception as e:
+ logger.warning(
+ f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes")
+ return process_params
+
+ def _run_file_process(
+ self,
+ file_data: bytes,
+ filename: str,
+ chunking_strategy: str,
+ process_params: Dict[str, Any],
+ log_subject: str,
+ ) -> List[Dict[str, Any]]:
+ chunks = self._processor.file_process(
+ file_data=file_data,
+ filename=filename,
+ chunking_strategy=chunking_strategy,
+ **process_params
+ )
+
+ if chunks is None:
+ logger.warning(
+ f"[RayActor] file_process returned None for {log_subject}='{filename}'")
+ return []
+ if not isinstance(chunks, list):
+ logger.error(
+ f"[RayActor] file_process returned non-list type {type(chunks)} for {log_subject}='{filename}'")
+ return []
+ if len(chunks) == 0:
+ logger.warning(
+ f"[RayActor] file_process returned empty list for {log_subject}='{filename}'")
+ return []
+
+ logger.info(
+ f"[RayActor] Processing done: produced {len(chunks)} chunks for {log_subject}='{filename}'")
+ return chunks
+
def process_file(
self,
source: str,
@@ -54,70 +133,125 @@ def process_file(
"""
logger.info(
f"[RayActor] Processing start: source='{source}', destination='{destination}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'")
-
- if task_id:
- params['task_id'] = task_id
-
- # Get chunk size parameters from embedding model if model_id is provided
- if model_id and tenant_id:
- try:
- # Get embedding model details directly by model_id
- model_record = get_model_by_model_id(
- model_id=model_id, tenant_id=tenant_id)
- if model_record:
- expected_chunk_size = model_record.get(
- 'expected_chunk_size', DEFAULT_EXPECTED_CHUNK_SIZE)
- maximum_chunk_size = model_record.get(
- 'maximum_chunk_size', DEFAULT_MAXIMUM_CHUNK_SIZE)
- model_name = model_record.get('display_name')
-
- # Pass chunk sizes to processing parameters
- params['max_characters'] = maximum_chunk_size
- params['new_after_n_chars'] = expected_chunk_size
-
- logger.info(
- f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): "
- f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}")
- else:
- logger.warning(
- f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes")
- except Exception as e:
- logger.warning(
- f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes")
+ process_params = self._prepare_process_params(
+ task_id=task_id,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ params=params,
+ )
try:
+ fetch_start = time.perf_counter()
file_stream = get_file_stream(source)
if file_stream is None:
raise FileNotFoundError(
f"Unable to fetch file from URL: {source}")
file_data = file_stream.read()
+ fetch_elapsed = time.perf_counter() - fetch_start
+ logger.info(
+ f"[RayActor] Fetch file bytes done: destination='{destination}', source='{source}', "
+ f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s")
except Exception as e:
logger.error(f"Failed to fetch file from {source}: {e}")
raise
- chunks = self._processor.file_process(
+ return self._run_file_process(
file_data=file_data,
filename=source,
chunking_strategy=chunking_strategy,
+ process_params=process_params,
+ log_subject="source",
+ )
+
+ def process_bytes(
+ self,
+ file_bytes: bytes,
+ filename: str,
+ chunking_strategy: str,
+ task_id: Optional[str] = None,
+ model_id: Optional[int] = None,
+ tenant_id: Optional[str] = None,
+ **params
+ ) -> List[Dict[str, Any]]:
+ """
+ Process in-memory file bytes, auto-detecting its type using DataProcessCore.file_process.
+ """
+ logger.info(
+ f"[RayActor] Processing bytes: filename='{filename}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'"
+ )
+ process_params = self._prepare_process_params(
+ task_id=task_id,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ params=params,
+ )
+
+ return self._run_file_process(
+ file_data=file_bytes,
+ filename=filename,
+ chunking_strategy=chunking_strategy,
+ process_params=process_params,
+ log_subject="filename",
+ )
+
+ def split_file(
+ self,
+ source: str,
+ destination: str,
+ task_id: Optional[str] = None,
+ max_size: int = 5 * 1024 * 1024,
+ file_data: Optional[bytes] = None,
+ **params
+ ) -> List[bytes]:
+ """
+ Split file into parts using DataProcessCore.file_split and return raw bytes list.
+ """
+ logger.info(
+ f"[RayActor] Splitting file: source='{source}', destination='{destination}', task_id='{task_id}', max_size={max_size}"
+ )
+
+ if file_data is None:
+ try:
+ fetch_start = time.perf_counter()
+ file_stream = get_file_stream(source)
+ if file_stream is None:
+ raise FileNotFoundError(
+ f"Unable to fetch file from URL: {source}")
+ file_data = file_stream.read()
+ fetch_elapsed = time.perf_counter() - fetch_start
+ logger.info(
+ f"[RayActor] Fetch file bytes for split done: destination='{destination}', source='{source}', "
+ f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s")
+ except Exception as e:
+ logger.error(f"Failed to fetch file from {source}: {e}")
+ raise
+
+ split_start = time.perf_counter()
+ parts = self._processor.file_split(
+ file_data=file_data,
+ filename=source,
+ max_size=max_size,
**params
)
+ split_elapsed = time.perf_counter() - split_start
- if chunks is None:
- logger.warning(
- f"[RayActor] file_process returned None for source='{source}'")
- return []
- if not isinstance(chunks, list):
- logger.error(
- f"[RayActor] file_process returned non-list type {type(chunks)} for source='{source}'")
- return []
- if len(chunks) == 0:
- logger.warning(
- f"[RayActor] file_process returned empty list for source='{source}'")
+ if not parts:
+ logger.info(
+ f"[RayActor] Split done: destination='{destination}', source='{source}', "
+ f"parts=0, elapsed={split_elapsed:.3f}s")
return []
+ bytes_parts: List[bytes] = []
+ for part in parts:
+ try:
+ bytes_parts.append(part.getvalue())
+ except Exception:
+ continue
+
logger.info(
- f"[RayActor] Processing done: produced {len(chunks)} chunks for source='{source}'")
- return chunks
+ f"[RayActor] Split done: destination='{destination}', source='{source}', "
+ f"parts={len(bytes_parts)}, elapsed={split_elapsed:.3f}s")
+ return bytes_parts
def store_chunks_in_redis(self, redis_key: str, chunks: List[Dict[str, Any]]) -> bool:
"""
diff --git a/backend/data_process/tasks.py b/backend/data_process/tasks.py
index 50414b711..f2a30f9b7 100644
--- a/backend/data_process/tasks.py
+++ b/backend/data_process/tasks.py
@@ -4,32 +4,180 @@
import asyncio
import json
import logging
+import math
import os
import threading
import time
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, List, Tuple
import aiohttp
import re
import ray
-from celery import Task, chain, states
+from celery import Task, chain, states, group, chord
from celery.exceptions import Retry
+from celery.result import allow_join_result
-from consts.const import ELASTICSEARCH_SERVICE
from utils.file_management_utils import get_file_size
+from database.attachment_db import get_file_stream
from services.redis_service import get_redis_service
from .app import app
from .ray_actors import DataProcessorRayActor
from consts.const import (
+ ELASTICSEARCH_SERVICE,
REDIS_BACKEND_URL,
FORWARD_REDIS_RETRY_DELAY_S,
FORWARD_REDIS_RETRY_MAX,
+ DP_REDIS_CHUNKS_WAIT_TIMEOUT_S,
+ DP_REDIS_CHUNKS_POLL_INTERVAL_MS,
+ RAY_ACTOR_NUM_CPUS,
+ RAY_NUM_CPUS,
DISABLE_RAY_DASHBOARD,
ROOT_DIR,
+ PER_WAVE_TIMEOUT,
+ MAX_TIMEOUT,
+ RAY_GLOBAL_ACTOR_POOL_SIZE,
+ RAY_ACTOR_WARM_TIMEOUT_S,
+ RAY_GLOBAL_ACTOR_POOL_NAME,
+ RAY_GLOBAL_ACTOR_POOL_NAMESPACE
)
logger = logging.getLogger("data_process.tasks")
+ASYNC_SPLIT_RETRY_MAX = max(FORWARD_REDIS_RETRY_MAX * 5, FORWARD_REDIS_RETRY_MAX)
+FORWARD_ES_CHUNK_BATCH_SIZE = 64
+IMAGE_METADATA_PROCESS_SOURCE = "UniversalImageExtractor"
+
+def _wait_for_split_ready(redis_key: str, timeout_s: int, poll_interval_ms: int) -> int:
+ """
+ Wait until async split aggregation is marked ready in Redis.
+ Returns aggregated chunk count.
+ Raises TimeoutError on timeout.
+ """
+ if not REDIS_BACKEND_URL:
+ raise RuntimeError("REDIS_BACKEND_URL not configured")
+
+ import redis
+
+ client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True)
+ ready_key = f"{redis_key}:ready"
+ deadline = time.time() + timeout_s
+
+ while time.time() < deadline:
+ if client.get(ready_key):
+ cached = client.get(redis_key)
+ if cached:
+ try:
+ chunks = json.loads(cached)
+ return len(chunks) if isinstance(chunks, list) else 0
+ except Exception:
+ return 0
+ return 0
+ time.sleep(max(0.01, poll_interval_ms / 1000.0))
+
+ raise TimeoutError(
+ f"Timed out waiting for async split aggregation at key '{ready_key}' after {timeout_s}s"
+ )
+
+
+def _estimate_parallel_parts() -> int:
+ try:
+ total_cpus = RAY_NUM_CPUS
+ except Exception:
+ total_cpus = os.cpu_count() or 1
+ actor_cpus = max(1, int(RAY_ACTOR_NUM_CPUS))
+ return max(1, total_cpus // actor_cpus)
+
+
+def _compute_split_wait_timeout(parts_count: int) -> int:
+ base_timeout = DP_REDIS_CHUNKS_WAIT_TIMEOUT_S
+ waves = math.ceil(max(1, parts_count) / _estimate_parallel_parts())
+ dynamic_timeout = base_timeout + max(0, waves - 1) * max(1, PER_WAVE_TIMEOUT)
+ return min(MAX_TIMEOUT, max(base_timeout, dynamic_timeout))
+
+
+def _count_image_metadata_chunks(chunks: Optional[List[Dict[str, Any]]]) -> int:
+ if not chunks:
+ return 0
+ return sum(
+ 1
+ for chunk in chunks
+ if isinstance(chunk, dict) and chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ )
+
+
+def _get_next_available_batch_index(
+ batches: List[List[Dict[str, Any]]],
+ start_idx: int,
+ batch_size: int,
+) -> int:
+ total_batches = len(batches)
+ idx = start_idx
+ for _ in range(total_batches):
+ if len(batches[idx]) < batch_size:
+ return idx
+ idx = (idx + 1) % total_batches
+ raise RuntimeError("No available batch capacity")
+
+
+def _distribute_chunks_round_robin(
+ batches: List[List[Dict[str, Any]]],
+ chunks: List[Dict[str, Any]],
+ batch_size: int,
+ error_context: str,
+) -> None:
+ idx = 0
+ for chunk in chunks:
+ try:
+ idx = _get_next_available_batch_index(batches, idx, batch_size)
+ except RuntimeError as exc:
+ raise RuntimeError(
+ f"No available batch capacity while distributing {error_context}"
+ ) from exc
+ batches[idx].append(chunk)
+ idx = (idx + 1) % len(batches)
+
+
+def _build_balanced_batches(
+ formatted_chunks: List[Dict[str, Any]],
+ batch_size: int = FORWARD_ES_CHUNK_BATCH_SIZE,
+) -> List[List[Dict[str, Any]]]:
+ """
+ Split chunks into max-size batches and spread image-metadata chunks evenly.
+ """
+ total = len(formatted_chunks)
+ if total == 0:
+ return []
+ if total <= batch_size:
+ return [formatted_chunks]
+
+ total_batches = math.ceil(total / batch_size)
+ image_chunks = [
+ chunk for chunk in formatted_chunks
+ if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ ]
+ text_chunks = [
+ chunk for chunk in formatted_chunks
+ if chunk.get("process_source") != IMAGE_METADATA_PROCESS_SOURCE
+ ]
+
+ batches: List[List[Dict[str, Any]]] = [[] for _ in range(total_batches)]
+
+ _distribute_chunks_round_robin(
+ batches=batches,
+ chunks=image_chunks,
+ batch_size=batch_size,
+ error_context="image metadata chunks",
+ )
+ _distribute_chunks_round_robin(
+ batches=batches,
+ chunks=text_chunks,
+ batch_size=batch_size,
+ error_context="text chunks",
+ )
+
+ return batches
+
+
# Thread lock for initializing Ray to prevent race conditions
ray_init_lock = threading.Lock()
@@ -179,23 +327,257 @@ def run_in_thread():
raise
-# Initialize the data processing core LAZILY
-# This will be initialized on first task run by a worker process
-def get_ray_actor() -> Any:
+def _build_forward_error(
+ message: str,
+ index_name: str,
+ source: Optional[str],
+ original_filename: Optional[str],
+) -> Exception:
+ return Exception(json.dumps({
+ "message": message,
+ "index_name": index_name,
+ "task_name": "forward",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+
+def _parse_json_or_none(text: str) -> Optional[Dict[str, Any]]:
+ try:
+ parsed = json.loads(text)
+ return parsed if isinstance(parsed, dict) else None
+ except Exception:
+ return None
+
+
+def _extract_error_code_from_es_response(
+ parsed_body: Optional[Dict[str, Any]],
+ text: str,
+) -> Optional[str]:
+ error_code = None
+ if isinstance(parsed_body, dict):
+ error_code = parsed_body.get("error_code")
+ detail = parsed_body.get("detail")
+ if isinstance(detail, dict) and detail.get("error_code"):
+ error_code = detail.get("error_code")
+ elif isinstance(detail, str):
+ parsed_detail = _parse_json_or_none(detail)
+ if isinstance(parsed_detail, dict):
+ error_code = parsed_detail.get("error_code", error_code)
+
+ if error_code:
+ return error_code
+
+ try:
+ match = re.search(
+ r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text)
+ return match.group(1) if match else None
+ except Exception:
+ return None
+
+
+def _send_chunks_to_es(
+ chunks: List[Dict[str, Any]],
+ index_name: str,
+ authorization: str | None,
+ task_id: Optional[str] = None,
+ source: str = "",
+ original_filename: str = "",
+ large_mode: bool = False,
+) -> Dict[str, Any]:
+ async def _post():
+ elasticsearch_url = ELASTICSEARCH_SERVICE
+ if not elasticsearch_url:
+ raise _build_forward_error(
+ message="ELASTICSEARCH_SERVICE env is not set",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ route_url = f"/indices/{index_name}/documents"
+ full_url = elasticsearch_url + route_url
+ headers = {"Content-Type": "application/json"}
+ if authorization:
+ headers["Authorization"] = authorization
+ if task_id:
+ headers["X-Task-Id"] = task_id
+ try:
+ connector = aiohttp.TCPConnector(verify_ssl=False)
+ timeout = aiohttp.ClientTimeout(total=600)
+
+ request_params: Dict[str, str] = {}
+
+ if large_mode:
+ request_params["large_mode"] = "true"
+
+ async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
+ async with session.post(
+ full_url,
+ headers=headers,
+ json=chunks,
+ params=request_params,
+ raise_for_status=False
+ ) as response:
+ text = await response.text()
+ status = response.status
+ parsed_body = _parse_json_or_none(text)
+
+ if status >= 400:
+ error_code = _extract_error_code_from_es_response(parsed_body, text)
+ if error_code:
+ raise Exception(json.dumps({
+ "error_code": error_code
+ }, ensure_ascii=False))
+
+ raise Exception(
+ f"ElasticSearch service returned HTTP {status}")
+
+ result = parsed_body if isinstance(parsed_body, dict) else await response.json()
+ return result
+
+ except aiohttp.ClientConnectorError as e:
+ logger.error(
+ f"[{task_id}] FORWARD TASK: Connection error to {full_url}: {str(e)}")
+ raise _build_forward_error(
+ message=f"Failed to connect to API: {str(e)}",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ except asyncio.TimeoutError as e:
+ logger.warning(
+ f"[{task_id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.")
+ raise _build_forward_error(
+ message=f"Timeout when indexing documents: {str(e)}",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ except Exception as e:
+ logger.error(
+ f"[{task_id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.")
+ raise _build_forward_error(
+ message=f"Unexpected error when indexing documents: {str(e)}",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+
+ return run_async(_post())
+
+
+@ray.remote(num_cpus=0)
+class GlobalRayActorPoolManager:
"""
- Creates a new, anonymous DataProcessorRayActor instance for each call.
- This allows for parallel execution of data processing tasks, with each
- task running in its own actor.
+ Cluster-wide shared actor pool manager.
+ A single detached manager serves all Celery worker processes.
"""
+
+ def __init__(self, warm_timeout_s: float):
+ self.warm_timeout_s = warm_timeout_s
+ self.actors: List[Any] = []
+ self.rr_index = 0
+
+ def _create_and_warm_actor(self) -> Optional[Any]:
+ actor = DataProcessorRayActor.remote()
+ try:
+ ray.get(actor.ping.remote(), timeout=self.warm_timeout_s)
+ return actor
+ except Exception as exc:
+ try:
+ ray.kill(actor, no_restart=True)
+ except Exception:
+ pass
+ logger.warning(
+ f"[GlobalRayActorPoolManager] Warm actor failed in {self.warm_timeout_s:.1f}s: {exc}"
+ )
+ return None
+
+ def ensure_pool(self, desired: int, max_allowed: int) -> int:
+ desired = max(0, int(desired))
+ max_allowed = max(1, int(max_allowed))
+ desired = min(desired, max_allowed)
+ missing = max(0, desired - len(self.actors))
+ for _ in range(missing):
+ actor = self._create_and_warm_actor()
+ if actor is not None:
+ self.actors.append(actor)
+ return len(self.actors)
+
+ def get_actor(self) -> Any:
+ if not self.actors:
+ actor = self._create_and_warm_actor()
+ if actor is None:
+ raise RuntimeError("Global actor pool is empty and actor warm-up failed")
+ self.actors.append(actor)
+ idx = self.rr_index % len(self.actors)
+ self.rr_index += 1
+ return self.actors[idx]
+
+
+def _get_or_create_global_pool_manager() -> Any:
with ray_init_lock:
init_ray_in_worker()
- actor = DataProcessorRayActor.remote()
- logger.debug(
- "Successfully created a new DataProcessorRayActor for a task.")
- return actor
+ # Prefer atomic get/create when supported.
+ try:
+ return GlobalRayActorPoolManager.options(
+ name=RAY_GLOBAL_ACTOR_POOL_NAME,
+ namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE,
+ lifetime="detached",
+ get_if_exists=True,
+ ).remote(RAY_ACTOR_WARM_TIMEOUT_S)
+ except TypeError:
+ pass
+
+ try:
+ return ray.get_actor(
+ RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE)
+ except Exception:
+ pass
+
+ try:
+ return GlobalRayActorPoolManager.options(
+ name=RAY_GLOBAL_ACTOR_POOL_NAME,
+ namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE,
+ lifetime="detached",
+ ).remote(RAY_ACTOR_WARM_TIMEOUT_S)
+ except Exception:
+ # Name race: another worker may have created it in the meantime.
+ return ray.get_actor(
+ RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE)
+
+
+def prewarm_ray_actors(target_size: Optional[int] = None) -> int:
+ """
+ Ensure a global shared pool of warm Ray actors exists for low-latency task execution.
+ """
+ desired = RAY_GLOBAL_ACTOR_POOL_SIZE if target_size is None else max(0, int(target_size))
+ manager = _get_or_create_global_pool_manager()
+ current_after = ray.get(
+ manager.ensure_pool.remote(desired=desired, max_allowed=_estimate_parallel_parts())
+ )
+ logger.info(
+ f"Global Ray actor pool ready: current={current_after}, desired={desired}"
+ )
+ return current_after
+
+
+def get_ray_actor() -> Any:
+ """
+ Return a warm actor from the global shared pool with round-robin selection.
+ """
+ manager = _get_or_create_global_pool_manager()
+ return ray.get(manager.get_actor.remote())
+def _get_split_actor() -> Any:
+ """
+ Reuse warm DataProcessorRayActor instances for split operations.
+ This keeps split path aligned with prewarmed actor pool.
+ """
+ return get_ray_actor()
+
class LoggingTask(Task):
"""Base task class with enhanced logging"""
@@ -221,6 +603,472 @@ def on_retry(self, exc, task_id, args, kwargs, einfo):
return super().on_retry(exc, task_id, args, kwargs, einfo)
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_part', queue='process_part_q')
+def process_part(
+ self,
+ part_bytes: bytes,
+ filename: str,
+ chunking_strategy: str,
+ part_redis_key: str,
+ source: Optional[str] = None,
+ source_type: Optional[str] = None,
+ model_id: Optional[int] = None,
+ tenant_id: Optional[str] = None,
+ **params
+) -> Dict[str, Any]:
+ """
+ Hidden sub-task to process a file part with Ray.
+ """
+ actor = get_ray_actor()
+ try:
+ chunks_ref = actor.process_bytes.remote(
+ part_bytes,
+ filename,
+ chunking_strategy,
+ task_id=None,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ **params
+ )
+ chunks = ray.get(chunks_ref) or []
+
+ if not REDIS_BACKEND_URL:
+ raise RuntimeError("REDIS_BACKEND_URL not configured")
+
+ import redis
+ client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True)
+ client.set(part_redis_key, json.dumps(chunks, ensure_ascii=False))
+ client.expire(part_redis_key, 2 * 60 * 60)
+
+ return {
+ "part_redis_key": part_redis_key,
+ "chunks_count": len(chunks),
+ }
+ except Exception as e:
+ logger.error(f"[process_part] Failed to process part for '{filename}': {str(e)}")
+ return {
+ "part_redis_key": part_redis_key,
+ "chunks_count": 0,
+ }
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_parts', queue='process_part_q')
+def aggregate_parts(
+ self,
+ parts_results: List[List[Dict[str, Any]]],
+ source: Optional[str] = None,
+ index_name: Optional[str] = None,
+ original_filename: Optional[str] = None
+) -> Dict[str, Any]:
+ """
+ Hidden sub-task to aggregate part chunks.
+ """
+ merged: List[Dict[str, Any]] = []
+ for part_chunks in parts_results or []:
+ if part_chunks:
+ merged.extend(part_chunks)
+ return {
+ "chunks": merged,
+ "source": source,
+ "index_name": index_name,
+ "original_filename": original_filename
+ }
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_store_chunks', queue='process_part_q')
+def aggregate_store_chunks(
+ self,
+ parts_results: List[Dict[str, Any]],
+ redis_key: str,
+ source: Optional[str] = None,
+ index_name: Optional[str] = None,
+ original_filename: Optional[str] = None
+) -> Dict[str, Any]:
+ """
+ Hidden sub-task to aggregate part chunks and store into Redis for forward task.
+ """
+ if not REDIS_BACKEND_URL:
+ raise Exception(json.dumps({
+ "message": "REDIS_BACKEND_URL not configured to store chunks",
+ "index_name": index_name,
+ "task_name": "process",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+ try:
+ import redis
+ client = redis.Redis.from_url(
+ REDIS_BACKEND_URL, decode_responses=True)
+
+ merged: List[Dict[str, Any]] = []
+ for part_result in parts_results or []:
+ part_key = (part_result or {}).get("part_redis_key")
+ if not part_key:
+ continue
+ cached = client.get(part_key)
+ if not cached:
+ continue
+ try:
+ part_chunks = json.loads(cached)
+ if isinstance(part_chunks, list):
+ merged.extend(part_chunks)
+ except Exception:
+ continue
+ # best-effort cleanup for part payload key
+ try:
+ client.delete(part_key)
+ except Exception:
+ pass
+
+ serialized = json.dumps(merged, ensure_ascii=False)
+ client.set(redis_key, serialized)
+ client.expire(redis_key, 2 * 60 * 60)
+ ready_key = f"{redis_key}:ready"
+ client.set(ready_key, "1")
+ client.expire(ready_key, 2 * 60 * 60)
+ logger.info(
+ f"[{self.request.id}] PROCESS TASK: Stored aggregated chunks in Redis at key '{redis_key}', count={len(merged)}")
+ except Exception as exc:
+ raise Exception(json.dumps({
+ "message": f"Failed to store chunks to Redis: {str(exc)}",
+ "index_name": index_name,
+ "task_name": "process",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+ return {
+ "chunks_count": len(merged),
+ "redis_key": redis_key,
+ "source": source,
+ "index_name": index_name,
+ "original_filename": original_filename
+ }
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.forward_part', queue='forward_q')
+def forward_part(
+ self,
+ chunks: List[Dict[str, Any]],
+ index_name: str,
+ authorization: Optional[str] = None,
+ parent_task_id: Optional[str] = None,
+ parent_total_chunks: Optional[int] = None,
+ source: Optional[str] = None,
+ original_filename: Optional[str] = None,
+ batch_index: Optional[int] = None,
+ total_batches: Optional[int] = None,
+ large_mode: Optional[bool] = False,
+) -> Dict[str, Any]:
+ """
+ Forward sub-task that indexes a chunk batch.
+ """
+ try:
+ # Respect cancellation from parent task if available
+ if parent_task_id:
+ try:
+ redis_service = get_redis_service()
+ if redis_service.is_task_cancelled(parent_task_id):
+ raise RuntimeError(
+ f"Parent task {parent_task_id} marked as cancelled")
+ except Exception:
+ pass
+
+ es_result = _send_chunks_to_es(
+ chunks=chunks,
+ index_name=index_name,
+ authorization=authorization,
+ task_id=None,
+ source=source,
+ original_filename=original_filename,
+ large_mode=large_mode,
+ )
+
+ if not isinstance(es_result, dict) or not es_result.get("success"):
+ error_message = es_result.get(
+ "message", "Unknown error from main_server") if isinstance(es_result, dict) else "Unknown error"
+ raise Exception(json.dumps({
+ "message": f"main_server API error: {error_message}",
+ "index_name": index_name,
+ "task_name": "forward_part",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+ # Update parent task progress per finished batch so frontend can show real-time indexing count.
+ if parent_task_id:
+ try:
+ processed_delta = int(es_result.get("total_indexed", 0) or 0)
+ redis_service = get_redis_service()
+ redis_service.increment_progress_info(
+ task_id=parent_task_id,
+ delta_processed=processed_delta,
+ total_chunks=parent_total_chunks,
+ )
+ except Exception as progress_exc:
+ logger.warning(
+ f"[{self.request.id}] FORWARD PART: Failed to update parent progress "
+ f"for task {parent_task_id}: {progress_exc}"
+ )
+
+ return {
+ "success": True,
+ "total_indexed": es_result.get("total_indexed", 0),
+ "total_submitted": es_result.get("total_submitted", len(chunks)),
+ "batch_index": batch_index,
+ "total_batches": total_batches,
+ }
+ except Exception as e:
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.warning(
+ f"[{self.request.id}] FORWARD PART: Failed batch {batch_index}/{total_batches} "
+ f"(retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX}): {str(e)}"
+ )
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=FORWARD_REDIS_RETRY_MAX,
+ exc=e
+ )
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_forward_parts', queue='forward_q')
+def aggregate_forward_parts(
+ self,
+ parts_results: List[Dict[str, Any]],
+ source: Optional[str] = None,
+ index_name: Optional[str] = None,
+ original_filename: Optional[str] = None
+) -> Dict[str, Any]:
+ """
+ Aggregate forward_part results.
+ """
+ total_indexed = 0
+ total_submitted = 0
+ for result in parts_results or []:
+ if not result:
+ continue
+ total_indexed += int(result.get("total_indexed", 0) or 0)
+ total_submitted += int(result.get("total_submitted", 0) or 0)
+
+ return {
+ "success": True,
+ "total_indexed": total_indexed,
+ "total_submitted": total_submitted,
+ "source": source,
+ "index_name": index_name,
+ "original_filename": original_filename
+ }
+
+
+def _split_file_for_processing(
+ request_id: str,
+ source: str,
+ source_type: str,
+ task_id: str,
+ params: Dict[str, Any],
+ file_data: Optional[bytes] = None,
+) -> List[bytes]:
+ max_size = 5 * 1024 * 1024
+ params.pop("max_size", None)
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Splitting file before processing (max_size={max_size})")
+
+ split_actor_get_start = time.perf_counter()
+ split_actor = _get_split_actor()
+ split_actor_get_elapsed = time.perf_counter() - split_actor_get_start
+ logger.info(
+ f"[{request_id}] PROCESS TASK: split actor ready in {split_actor_get_elapsed:.3f}s")
+
+ split_call_start = time.perf_counter()
+ split_kwargs = {
+ "source": source,
+ "destination": source_type,
+ "task_id": task_id,
+ "max_size": max_size,
+ **params,
+ }
+ if file_data is not None:
+ split_kwargs["file_data"] = file_data
+
+ parts_ref = split_actor.split_file.remote(**split_kwargs)
+ parts = ray.get(parts_ref)
+ split_call_elapsed = time.perf_counter() - split_call_start
+ logger.info(
+ f"[{request_id}] PROCESS TASK: split_file RPC done in {split_call_elapsed:.3f}s "
+ f"(source_type={source_type})")
+
+ if parts:
+ part_sizes = [len(p) for p in parts]
+ total_bytes = sum(part_sizes)
+ min_size = min(part_sizes)
+ max_part_size = max(part_sizes)
+ avg_size = total_bytes / len(part_sizes)
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Split stats: parts={len(part_sizes)}, "
+ f"total={total_bytes/1024/1024:.2f}MB, "
+ f"min={min_size/1024:.2f}KB, max={max_part_size/1024:.2f}KB, avg={avg_size/1024:.2f}KB")
+
+ return parts
+
+
+def _run_processing_for_parts(
+ request_id: str,
+ source: str,
+ source_type: str,
+ task_id: str,
+ chunking_strategy: str,
+ filename_for_processing: str,
+ parts: List[bytes],
+ index_name: Optional[str],
+ original_filename: Optional[str],
+ embedding_model_id: Optional[int],
+ tenant_id: Optional[str],
+ params: Dict[str, Any],
+) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]:
+ if not parts:
+ logger.warning(
+ f"[{request_id}] PROCESS TASK: Split returned no parts; fallback to full-file processing")
+ process_actor = get_ray_actor()
+ chunks_ref = process_actor.process_file.remote(
+ source,
+ chunking_strategy,
+ destination=source_type,
+ task_id=task_id,
+ model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ **params
+ )
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...")
+ return False, ray.get(chunks_ref), None
+
+ if len(parts) == 1:
+ process_actor = get_ray_actor()
+ chunks_ref = process_actor.process_bytes.remote(
+ parts[0],
+ filename_for_processing,
+ chunking_strategy,
+ task_id=None,
+ model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ **params
+ )
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...")
+ return False, ray.get(chunks_ref), None
+
+ redis_key = f"dp:{task_id}:chunks"
+ group_tasks = group(
+ process_part.s(
+ part_bytes=part,
+ filename=filename_for_processing,
+ chunking_strategy=chunking_strategy,
+ part_redis_key=f"dp:{task_id}:part:{idx}",
+ source=source,
+ source_type=source_type,
+ model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ **params
+ ) for idx, part in enumerate(parts)
+ )
+ callback = aggregate_store_chunks.s(
+ redis_key=redis_key,
+ source=source,
+ index_name=index_name,
+ original_filename=original_filename
+ ).set(queue='process_part_q')
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Dispatching {len(parts)} part tasks...")
+ chord(group_tasks)(callback)
+
+ split_wait_timeout = _compute_split_wait_timeout(len(parts))
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Waiting split aggregation, timeout={split_wait_timeout}s, "
+ f"parts={len(parts)}, est_parallel={_estimate_parallel_parts()}")
+ split_chunk_count = _wait_for_split_ready(
+ redis_key=redis_key,
+ timeout_s=split_wait_timeout,
+ poll_interval_ms=DP_REDIS_CHUNKS_POLL_INTERVAL_MS,
+ )
+ return True, None, split_chunk_count
+
+
+def _process_source_with_split(
+ request_id: str,
+ source: str,
+ source_type: str,
+ task_id: str,
+ chunking_strategy: str,
+ index_name: Optional[str],
+ original_filename: Optional[str],
+ embedding_model_id: Optional[int],
+ tenant_id: Optional[str],
+ params: Dict[str, Any],
+ file_data: Optional[bytes] = None,
+) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]:
+ parts = _split_file_for_processing(
+ request_id=request_id,
+ source=source,
+ source_type=source_type,
+ task_id=task_id,
+ params=params,
+ file_data=file_data,
+ )
+ filename_for_processing = original_filename or os.path.basename(source)
+ split_async, chunks, split_chunk_count = _run_processing_for_parts(
+ request_id=request_id,
+ source=source,
+ source_type=source_type,
+ task_id=task_id,
+ chunking_strategy=chunking_strategy,
+ filename_for_processing=filename_for_processing,
+ parts=parts,
+ index_name=index_name,
+ original_filename=original_filename,
+ embedding_model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ params=params,
+ )
+
+ if split_async:
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Async split finished with {split_chunk_count or 0} chunks")
+ else:
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
+
+ if not split_async:
+ redis_key = f"dp:{task_id}:chunks"
+ process_actor = get_ray_actor()
+ process_actor.store_chunks_in_redis.remote(redis_key, chunks)
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'")
+
+ return split_async, chunks, split_chunk_count
+
+
+def _build_no_valid_chunks_error(
+ split_async: bool,
+ index_name: Optional[str],
+ source: str,
+ original_filename: Optional[str],
+) -> Exception:
+ message = (
+ "Async split completed but produced 0 chunks"
+ if split_async else
+ "Ray processing completed but produced 0 chunks"
+ )
+ return Exception(json.dumps({
+ "message": message,
+ "index_name": index_name,
+ "task_name": "process",
+ "source": source,
+ "original_filename": original_filename,
+ "error_code": "no_valid_chunks"
+ }, ensure_ascii=False))
+
+
@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process', queue='process_q')
def process(
self,
@@ -248,6 +1096,7 @@ def process(
"""
start_time = time.time()
task_id = self.request.id
+ # _warn_if_queue_mismatch("PROCESS TASK", "process_q", self.request)
logger.info(
f"[{self.request.id}] PROCESS TASK: source_type: {source_type}")
@@ -264,51 +1113,39 @@ def process(
'stage': 'extracting_text'
}
)
- # Get the data processor instance
- actor = get_ray_actor()
-
try:
# Process the file based on the source type
file_size_mb = 0
+ split_chunk_count = None
+ image_metadata_chunk_count = 0
+ elapsed_time = 0.0
+ chunks: Optional[List[Dict[str, Any]]] = None
+ split_async = False
+
if source_type == "local":
# Check file existence and size for optimization
if not os.path.exists(source):
raise FileNotFoundError(f"File does not exist: {source}")
file_size = os.path.getsize(source)
- file_size_mb = file_size / (1024 * 1024)
+ file_size_mb = file_size / (5 * 1024 * 1024)
logger.info(
f"[{self.request.id}] PROCESS TASK: File size: {file_size_mb:.2f}MB")
- # The unified actor call, mapping 'file' source_type to 'local' destination
- # Submit Ray work and WAIT for processing to complete
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for source='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}")
- chunks_ref = actor.process_file.remote(
- source,
- chunking_strategy,
- destination=source_type,
+ split_async, chunks, split_chunk_count = _process_source_with_split(
+ request_id=self.request.id,
+ source=source,
+ source_type=source_type,
task_id=task_id,
- model_id=embedding_model_id,
+ chunking_strategy=chunking_strategy,
+ index_name=index_name,
+ original_filename=original_filename,
+ embedding_model_id=embedding_model_id,
tenant_id=tenant_id,
- **params
+ params=params,
)
- # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...")
- chunks = ray.get(chunks_ref)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
-
- # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task)
- redis_key = f"dp:{task_id}:chunks"
- actor.store_chunks_in_redis.remote(redis_key, chunks)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'")
-
- end_time = time.time()
- elapsed_time = end_time - start_time
+ elapsed_time = time.time() - start_time
processing_speed = file_size_mb / \
elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0
logger.info(
@@ -318,33 +1155,31 @@ def process(
logger.info(
f"[{self.request.id}] PROCESS TASK: Processing from URL: {source}")
- # For URL source, core.py expects a non-local destination to trigger URL fetching
+ # Measure MinIO fetch time in process worker logs for observability
+ fetch_start = time.perf_counter()
+ file_stream = get_file_stream(source)
+ if file_stream is None:
+ raise FileNotFoundError(f"Unable to fetch file from URL: {source}")
+ file_data = file_stream.read()
+ fetch_elapsed = time.perf_counter() - fetch_start
logger.info(
- f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for URL='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}")
- chunks_ref = actor.process_file.remote(
- source,
- chunking_strategy,
- destination=source_type,
+ f"[{self.request.id}] PROCESS TASK: MinIO fetch done in {fetch_elapsed:.3f}s, "
+ f"bytes={len(file_data)}")
+
+ split_async, chunks, split_chunk_count = _process_source_with_split(
+ request_id=self.request.id,
+ source=source,
+ source_type=source_type,
task_id=task_id,
- model_id=embedding_model_id,
+ chunking_strategy=chunking_strategy,
+ index_name=index_name,
+ original_filename=original_filename,
+ embedding_model_id=embedding_model_id,
tenant_id=tenant_id,
- **params
+ params=params,
+ file_data=file_data,
)
- # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...")
- chunks = ray.get(chunks_ref)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
-
- # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task)
- redis_key = f"dp:{task_id}:chunks"
- actor.store_chunks_in_redis.remote(redis_key, chunks)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'")
-
- end_time = time.time()
- elapsed_time = end_time - start_time
+ elapsed_time = time.time() - start_time
logger.info(
f"[{self.request.id}] PROCESS TASK: URL processing completed in {elapsed_time:.2f}s")
@@ -353,33 +1188,61 @@ def process(
raise NotImplementedError(
f"Source type '{source_type}' not yet supported")
- chunk_count = len(chunks) if chunks else 0
- if chunk_count == 0:
- raise Exception(json.dumps({
- "message": "Ray processing completed but produced 0 chunks",
- "index_name": index_name,
- "task_name": "process",
- "source": source,
- "original_filename": original_filename,
- "error_code": "no_valid_chunks"
- }, ensure_ascii=False))
+ if split_async:
+ chunk_count = split_chunk_count or 0
+ if chunk_count == 0:
+ raise _build_no_valid_chunks_error(
+ split_async=True,
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ # For async split, chunks are persisted in Redis; count image-metadata chunks from cached payload.
+ try:
+ if REDIS_BACKEND_URL:
+ import redis
+ redis_key = f"dp:{task_id}:chunks"
+ client = redis.Redis.from_url(
+ REDIS_BACKEND_URL, decode_responses=True)
+ cached = client.get(redis_key)
+ if cached:
+ cached_chunks = json.loads(cached)
+ if isinstance(cached_chunks, list):
+ image_metadata_chunk_count = _count_image_metadata_chunks(cached_chunks)
+ except Exception as image_count_exc:
+ logger.warning(
+ f"[{self.request.id}] PROCESS TASK: Failed counting image metadata chunks for async split: {image_count_exc}")
+ else:
+ chunk_count = len(chunks) if chunks else 0
+ if chunk_count == 0:
+ raise _build_no_valid_chunks_error(
+ split_async=False,
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ image_metadata_chunk_count = _count_image_metadata_chunks(chunks)
+
+ logger.info(
+ f"[{self.request.id}] PROCESS TASK: Chunk composition: total={chunk_count}, "
+ f"image_metadata={image_metadata_chunk_count}, text={max(0, chunk_count - image_metadata_chunk_count)}")
# Update task state to SUCCESS after Ray processing completes
# This transitions from STARTED (PROCESSING) to SUCCESS (WAIT_FOR_FORWARDING)
self.update_state(
state=states.SUCCESS,
meta={
- 'chunks_count': len(chunks) if chunks else 0,
- 'processing_time': elapsed_time,
- 'source': source,
- 'index_name': index_name,
- 'original_filename': original_filename,
- 'task_name': 'process',
- 'stage': 'text_extracted',
- 'file_size_mb': file_size_mb,
- 'processing_speed_mb_s': file_size_mb / elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0
- }
- )
+ 'chunks_count': chunk_count,
+ 'processing_time': elapsed_time,
+ 'source': source,
+ 'index_name': index_name,
+ 'original_filename': original_filename,
+ 'task_name': 'process',
+ 'stage': 'text_extracted',
+ 'file_size_mb': file_size_mb,
+ 'processing_speed_mb_s': file_size_mb / elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0
+ }
+ )
logger.info(
f"[{self.request.id}] PROCESS TASK: Processing complete, waiting for forward task")
@@ -391,7 +1254,9 @@ def process(
'source': source,
'index_name': index_name,
'original_filename': original_filename,
- 'task_id': task_id
+ 'task_id': task_id,
+ 'split_async': split_async,
+ 'image_metadata_chunk_count': image_metadata_chunk_count,
}
return returned_data
@@ -537,6 +1402,7 @@ def forward(
"""
start_time = time.time()
task_id = self.request.id
+ # _warn_if_queue_mismatch("FORWARD TASK", "forward_q", self.request)
original_source = source
original_index_name = index_name
filename = original_filename
@@ -575,6 +1441,7 @@ def forward(
)
chunks = processed_data.get('chunks')
+ split_async = bool(processed_data.get('split_async'))
# If chunks are not in payload, try loading from Redis via the redis_key
if (not chunks) and processed_data.get('redis_key'):
redis_key = processed_data.get('redis_key')
@@ -590,6 +1457,24 @@ def forward(
import redis
client = redis.Redis.from_url(
REDIS_BACKEND_URL, decode_responses=True)
+ ready_key = f"{redis_key}:ready"
+ if split_async:
+ ready_flag = client.get(ready_key)
+ if not ready_flag:
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Async split not ready for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=ASYNC_SPLIT_RETRY_MAX,
+ exc=Exception(json.dumps({
+ "message": "Async split not ready; will retry",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ )
cached = client.get(redis_key)
if cached:
try:
@@ -604,6 +1489,21 @@ def forward(
f"[{self.request.id}] FORWARD TASK: JSON decode error for key '{redis_key}': {str(jde)}; raw_prefix={raw_preview!r}")
raise
else:
+ if split_async:
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Async split ready but chunks missing for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=ASYNC_SPLIT_RETRY_MAX,
+ exc=Exception(json.dumps({
+ "message": "Async split ready but chunks missing; will retry",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ )
# No busy-wait: release the worker slot and retry later
retry_num = getattr(self.request, 'retries', 0)
logger.info(
@@ -650,9 +1550,29 @@ def forward(
"original_filename": original_filename
}, ensure_ascii=False))
if len(chunks) == 0:
+ if split_async and processed_data.get('redis_key'):
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Empty chunks while waiting for async split. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=ASYNC_SPLIT_RETRY_MAX,
+ exc=Exception(json.dumps({
+ "message": "Chunks not ready in Redis (empty); will retry",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ )
logger.warning(
f"[{self.request.id}] FORWARD TASK: Empty chunks list received for source {original_source}")
formatted_chunks = []
+ # Compute once per file to avoid repeated IO/MinIO calls inside loop
+ file_size = get_file_size(source_type, original_source) if isinstance(
+ original_source, str) else 0
+ filename_resolved = filename or (os.path.basename(original_source) if original_source and isinstance(
+ original_source, str) else "")
for i, chunk in enumerate(chunks):
# Extract text and metadata
content = chunk.get("content", "")
@@ -664,20 +1584,18 @@ def forward(
f"[{self.request.id}] FORWARD TASK: Chunk {i+1} has empty text content, skipping")
continue
- file_size = get_file_size(source_type, original_source) if isinstance(
- original_source, str) else 0
-
# Format as expected by the Elasticsearch API
formatted_chunk = {
"metadata": metadata,
- "filename": filename or (os.path.basename(original_source) if original_source and isinstance(original_source, str) else ""),
+ "filename": filename_resolved,
"path_or_url": original_source,
"content": content,
- "process_source": "Unstructured",
+ "process_source": chunk.get("process_source", "Unstructured"),
"source_type": source_type,
"file_size": file_size,
"create_time": metadata.get("creation_date"),
"date": metadata.get("date"),
+ "index": i,
}
formatted_chunks.append(formatted_chunk)
@@ -691,112 +1609,6 @@ def forward(
"error_code": "no_valid_chunks"
}, ensure_ascii=False))
- async def index_documents():
- elasticsearch_url = ELASTICSEARCH_SERVICE
- if not elasticsearch_url:
- raise Exception(json.dumps({
- "message": "ELASTICSEARCH_SERVICE env is not set",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
- route_url = f"/indices/{original_index_name}/documents"
- full_url = elasticsearch_url + route_url
- headers = {"Content-Type": "application/json"}
- if authorization:
- headers["Authorization"] = authorization
- # Add task_id header for progress tracking
- headers["X-Task-Id"] = task_id
-
- try:
- connector = aiohttp.TCPConnector(verify_ssl=False)
- timeout = aiohttp.ClientTimeout(total=600)
-
- async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
- async with session.post(
- full_url,
- headers=headers,
- json=formatted_chunks,
- raise_for_status=False
- ) as response:
- text = await response.text()
- status = response.status
- # Try parse JSON body for structured error_code/message
- parsed_body = None
- try:
- parsed_body = json.loads(text)
- except Exception:
- parsed_body = None
-
- if status >= 400:
- error_code = None
- if isinstance(parsed_body, dict):
- error_code = parsed_body.get("error_code")
- detail = parsed_body.get("detail")
- if isinstance(detail, dict) and detail.get("error_code"):
- error_code = detail.get("error_code")
- elif isinstance(detail, str):
- try:
- parsed_detail = json.loads(detail)
- if isinstance(parsed_detail, dict):
- error_code = parsed_detail.get(
- "error_code", error_code)
- except Exception:
- pass
-
- if not error_code:
- try:
- match = re.search(
- r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text)
- if match:
- error_code = match.group(1)
- except Exception:
- pass
-
- if error_code:
- # Raise flat payload to avoid nested JSON and preserve error_code
- raise Exception(json.dumps({
- "error_code": error_code
- }, ensure_ascii=False))
-
- raise Exception(
- f"ElasticSearch service returned HTTP {status}")
-
- result = parsed_body if isinstance(parsed_body, dict) else await response.json()
- return result
-
- except aiohttp.ClientConnectorError as e:
- logger.error(
- f"[{self.request.id}] FORWARD TASK: Connection error to {full_url}: {str(e)}")
- raise Exception(json.dumps({
- "message": f"Failed to connect to API: {str(e)}",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
- except asyncio.TimeoutError as e:
- logger.warning(
- f"[{self.request.id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.")
- raise Exception(json.dumps({
- "message": f"Timeout when indexing documents: {str(e)}",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
- except Exception as e:
- logger.error(
- f"[{self.request.id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.")
- raise Exception(json.dumps({
- "message": f"Unexpected error when indexing documents: {str(e)}",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
-
logger.info(
f"[{self.request.id}] FORWARD TASK: Starting ES indexing for {len(formatted_chunks)} chunks to index '{original_index_name}'...")
@@ -814,8 +1626,69 @@ async def index_documents():
'processed_chunks': 0 # Will be updated during vectorization via Redis
}
)
+ try:
+ redis_service = get_redis_service()
+ redis_service.save_progress_info(task_id, 0, total_chunks)
+ except Exception as progress_init_exc:
+ logger.warning(
+ f"[{self.request.id}] FORWARD TASK: Failed to initialize progress in Redis: "
+ f"{progress_init_exc}"
+ )
- es_result = run_async(index_documents())
+ if len(formatted_chunks) < FORWARD_ES_CHUNK_BATCH_SIZE:
+ es_result = _send_chunks_to_es(
+ chunks=formatted_chunks,
+ index_name=original_index_name,
+ authorization=authorization,
+ task_id=task_id,
+ source=original_source,
+ original_filename=original_filename,
+ large_mode=False,
+ )
+ else:
+ batches = _build_balanced_batches(
+ formatted_chunks=formatted_chunks,
+ batch_size=FORWARD_ES_CHUNK_BATCH_SIZE,
+ )
+ total_batches = len(batches)
+ image_chunks_total = sum(
+ 1 for chunk in formatted_chunks if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ )
+ image_distribution = [
+ sum(
+ 1
+ for chunk in batch
+ if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ )
+ for batch in batches
+ ]
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Batch distribution ready: total_batches={total_batches}, "
+ f"batch_size={FORWARD_ES_CHUNK_BATCH_SIZE}, image_metadata_total={image_chunks_total}, "
+ f"image_per_batch={image_distribution}")
+ group_tasks = group(
+ forward_part.s(
+ chunks=batch,
+ index_name=original_index_name,
+ authorization=authorization,
+ parent_task_id=task_id,
+ parent_total_chunks=total_chunks,
+ source=original_source,
+ original_filename=original_filename,
+ batch_index=idx + 1,
+ total_batches=total_batches,
+ # If request was split into multiple groups, force all groups to use large path.
+ large_mode=True,
+ ).set(queue='forward_q') for idx, batch in enumerate(batches)
+ )
+ callback = aggregate_forward_parts.s(
+ source=original_source,
+ index_name=original_index_name,
+ original_filename=original_filename
+ ).set(queue='forward_q')
+ result = chord(group_tasks)(callback)
+ with allow_join_result():
+ es_result = result.get()
logger.debug(
f"[{self.request.id}] FORWARD TASK: API response from main_server for source '{original_source}': {es_result}")
diff --git a/backend/data_process/worker.py b/backend/data_process/worker.py
index a5f5f4a27..48323869b 100644
--- a/backend/data_process/worker.py
+++ b/backend/data_process/worker.py
@@ -1,4 +1,4 @@
-"""
+"""
Celery worker script for data processing tasks
This script is used to start Celery workers for processing data
@@ -21,6 +21,7 @@
import os
import sys
import time
+import threading
import traceback
import ray
@@ -44,6 +45,7 @@
REDIS_URL,
WORKER_CONCURRENCY,
WORKER_NAME,
+ RAY_GLOBAL_ACTOR_POOL_SIZE,
)
from .app import app
@@ -200,6 +202,60 @@ def worker_ready_handler(**kwargs):
# Register health check endpoints, start monitoring, etc.
logger.debug("🔍 Worker is ready to receive tasks")
+ # Prewarm Ray actors for process-related queues to reduce first-task latency.
+ # IMPORTANT: run asynchronously so worker queue registration is never blocked.
+ try:
+ queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()}
+ if "process_q" in queue_set or "process_part_q" in queue_set:
+ from data_process.tasks import prewarm_ray_actors
+
+ # Prewarm a cluster-global shared actor pool once at startup.
+ # Multiple workers may trigger this, but pool manager is idempotent.
+ target = RAY_GLOBAL_ACTOR_POOL_SIZE
+
+ def _prewarm_in_background():
+ try:
+ warmed = prewarm_ray_actors(target_size=target)
+ logger.info(
+ f"Prewarmed Ray actor pool in background, warmed_actors={warmed}, target={target}, queues={sorted(queue_set)}"
+ )
+ except Exception as exc:
+ logger.warning(f"Background prewarm failed: {exc}")
+
+ threading.Thread(target=_prewarm_in_background, daemon=True).start()
+ except Exception as exc:
+ logger.warning(f"Failed to schedule Ray actor prewarm on worker ready: {exc}")
+
+ # Periodic concurrency + Ray CPU availability log for process_part_q.
+ try:
+ queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()}
+ if "process_part_q" in queue_set:
+ def _log_part_concurrency():
+ while True:
+ try:
+ inspector = app.control.inspect(timeout=1)
+ active = inspector.active() or {}
+ part_active = 0
+ for _, tasks in active.items():
+ for t in tasks or []:
+ if t.get("name") == "data_process.tasks.process_part":
+ part_active += 1
+ try:
+ ray_available = ray.available_resources() if ray.is_initialized() else {}
+ except Exception:
+ ray_available = {}
+ avail_cpu = ray_available.get("CPU", 0.0)
+ logger.info(
+ f"[process_part] active={part_active}, ray_available_cpu={avail_cpu}"
+ )
+ except Exception as exc:
+ logger.debug(f"Failed to collect process_part concurrency stats: {exc}")
+ time.sleep(5)
+
+ threading.Thread(target=_log_part_concurrency, daemon=True).start()
+ except Exception as exc:
+ logger.warning(f"Failed to start process_part concurrency logger: {exc}")
+
@worker_shutting_down.connect
def worker_shutdown_handler(**kwargs):
@@ -289,9 +345,9 @@ def validate_redis_connection() -> bool:
def start_worker():
"""Start Celery worker with appropriate settings"""
- # Get configuration parameters
+ # Read from runtime env first, so launcher-assigned values always win.
queues = QUEUES
- worker_name = WORKER_NAME or f'worker-{os.getpid()}'
+ worker_name = WORKER_NAME
concurrency = WORKER_CONCURRENCY
logger.info(f"Start Celery worker '{worker_name}' with queues: {queues}")
diff --git a/backend/data_process_service.py b/backend/data_process_service.py
index 0576e01fc..23d3497d9 100644
--- a/backend/data_process_service.py
+++ b/backend/data_process_service.py
@@ -206,13 +206,21 @@ def start_workers(self):
logger.debug(f"Process-worker concurrency set to: {process_worker_concurrency}")
logger.debug(f"Forward-worker concurrency set to: {forward_worker_concurrency}")
- # Define worker configurations based on new architecture
+ # Define worker configurations based on split architecture:
+ # - process-worker handles orchestration (process_q)
+ # - process-part-worker handles split sub-tasks (process_part_q)
+ # - forward-worker handles vectorization/storage (forward_q)
workers_config = [
{
'name': 'process-worker',
'queue': 'process_q',
'concurrency': process_worker_concurrency
},
+ {
+ 'name': 'process-part-worker',
+ 'queue': 'process_part_q',
+ 'concurrency': process_worker_concurrency
+ },
{
'name': 'forward-worker',
'queue': 'forward_q',
@@ -243,7 +251,7 @@ def start_workers(self):
logging.basicConfig(level=logging.INFO, format='[%(asctime)s: %(levelname)s/%(name)s] %(message)s')
logger = logging.getLogger("data_process.worker_launcher")
-os.environ["QUEUES"] = "{config['queue']}"
+os.environ["QUEUES"] = "{config['queue']}" # backward compatibility
os.environ["WORKER_NAME"] = "{config['name']}"
os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}"
@@ -254,6 +262,10 @@ def start_workers(self):
logger.debug(f"Celery app instance: {{celery_app}}")
logger.debug(f"Attempting to start worker for queue: {config['queue']}")
from data_process.worker import start_worker
+ # Re-apply launcher values after imports in case .env override changed them.
+ os.environ["QUEUES"] = "{config['queue']}"
+ os.environ["WORKER_NAME"] = "{config['name']}"
+ os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}"
start_worker()
except ImportError as e:
logger.error(f"Import error: {{e}}")
@@ -564,7 +576,11 @@ def start_all_services(self):
if success_count > 0:
self.log_service_info()
-
+
+ # Start auto-summary scheduler
+ from services.auto_summary_scheduler import auto_summary_scheduler
+ auto_summary_scheduler.start()
+
return success_count == enabled_count
def log_service_info(self):
@@ -700,7 +716,11 @@ def stop_all_services(self):
logger.error(f"Final attempt to kill Flower process failed: {final_e}")
finally:
service_processes['flower'] = None
-
+
+ # Stop auto-summary scheduler
+ from services.auto_summary_scheduler import auto_summary_scheduler
+ auto_summary_scheduler.stop()
+
# Stop Redis last
if service_processes['redis']:
try:
diff --git a/backend/database/a2a_agent_db.py b/backend/database/a2a_agent_db.py
index 9becdd67b..c1d998272 100644
--- a/backend/database/a2a_agent_db.py
+++ b/backend/database/a2a_agent_db.py
@@ -29,6 +29,22 @@ def _get_db_session():
# Default cache TTL in seconds (24 hours)
DEFAULT_CACHE_TTL_HOURS = 24
+
+def _extract_base_url(url: str) -> str:
+ """Extract base URL (scheme + host + port) from a full URL.
+
+ Args:
+ url: Full URL, e.g., http://example.com/path/to/agent.json
+
+ Returns:
+ Base URL, e.g., http://example.com
+ """
+ from urllib.parse import urlparse
+ parsed = urlparse(url)
+ if parsed.port:
+ return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
+ return f"{parsed.scheme}://{parsed.hostname}"
+
# Standard human-readable protocol label
PROTOCOL_HTTP_JSON = "HTTP+JSON"
PROTOCOL_JSONRPC = "JSONRPC"
@@ -51,27 +67,19 @@ def _generate_endpoint_id(agent_id: int) -> str:
def _extract_primary_interface(supported_interfaces: List[Dict[str, Any]]) -> tuple[str, str]:
- """Extract the primary interface (HTTP+JSON) from supported interfaces.
+ """Extract the primary interface (first one) from supported interfaces.
Args:
supported_interfaces: List of interface objects with protocolBinding, url, protocolVersion.
Returns:
Tuple of (agent_url, protocol_version).
- Falls back to first interface if HTTP+JSON not found.
+ Returns empty string for url if no interfaces found.
"""
if not supported_interfaces:
return "", "1.0"
- # Prefer HTTP+JSON
- for iface in supported_interfaces:
- if iface.get("protocolBinding", "").upper() in (PROTOCOL_HTTP_JSON, PROTOCOL_JSONRPC, PROTOCOL_GRPC):
- return (
- iface.get("url", ""),
- iface.get("protocolVersion", "1.0")
- )
-
- # Fall back to first interface
+ # Return the first interface to ensure URL and protocol are from the same interface
first = supported_interfaces[0]
return (
first.get("url", ""),
@@ -148,6 +156,7 @@ def create_external_agent_from_url(
version: Optional[str] = None,
streaming: bool = False,
supported_interfaces: Optional[List[Dict[str, Any]]] = None,
+ base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Create or update an external A2A agent discovered from URL.
@@ -162,6 +171,7 @@ def create_external_agent_from_url(
version: Agent version from Agent Card.
streaming: Whether this agent supports SSE streaming.
supported_interfaces: All supported protocol interfaces.
+ base_url: Base URL for health checks (service root address).
Returns:
Created agent information dict.
@@ -170,6 +180,10 @@ def create_external_agent_from_url(
expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS)
protocol_type = _extract_protocol_type(supported_interfaces)
+ # Extract base_url from source_url if not provided
+ if not base_url and source_url:
+ base_url = _extract_base_url(source_url)
+
with _get_db_session() as session:
# Check if agent already exists by source_url
existing = session.query(A2AExternalAgent).filter(
@@ -191,6 +205,8 @@ def create_external_agent_from_url(
existing.cached_at = now
existing.cache_expires_at = expires_at
existing.updated_by = user_id
+ if base_url:
+ existing.base_url = base_url
agent = existing
else:
# Create new record
@@ -210,6 +226,7 @@ def create_external_agent_from_url(
raw_card=raw_card,
cached_at=now,
cache_expires_at=expires_at,
+ base_url=base_url,
delete_flag='N'
)
session.add(agent)
@@ -226,6 +243,7 @@ def create_external_agent_from_url(
"streaming": agent.streaming,
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
+ "base_url": agent.base_url,
"is_available": agent.is_available,
"cached_at": agent.cached_at.isoformat() if agent.cached_at else None,
"cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None,
@@ -244,6 +262,7 @@ def create_external_agent_from_nacos(
version: Optional[str] = None,
streaming: bool = False,
supported_interfaces: Optional[List[Dict[str, Any]]] = None,
+ base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Create or update an external A2A agent discovered from Nacos.
@@ -259,6 +278,7 @@ def create_external_agent_from_nacos(
version: Agent version from Agent Card.
streaming: Whether this agent supports SSE streaming.
supported_interfaces: All supported protocol interfaces.
+ base_url: Base URL for health checks (service root address).
Returns:
Created agent information dict.
@@ -267,6 +287,10 @@ def create_external_agent_from_nacos(
expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS)
protocol_type = _extract_protocol_type(supported_interfaces)
+ # Extract base_url from agent_url if not provided
+ if not base_url and agent_url:
+ base_url = _extract_base_url(agent_url)
+
with _get_db_session() as session:
# Check if agent already exists by nacos_config_id + nacos_agent_name
existing = session.query(A2AExternalAgent).filter(
@@ -288,6 +312,8 @@ def create_external_agent_from_nacos(
existing.cached_at = now
existing.cache_expires_at = expires_at
existing.updated_by = user_id
+ if base_url:
+ existing.base_url = base_url
agent = existing
else:
agent = A2AExternalAgent(
@@ -307,6 +333,7 @@ def create_external_agent_from_nacos(
raw_card=raw_card,
cached_at=now,
cache_expires_at=expires_at,
+ base_url=base_url,
delete_flag='N'
)
session.add(agent)
@@ -323,6 +350,7 @@ def create_external_agent_from_nacos(
"streaming": agent.streaming,
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
+ "base_url": agent.base_url,
"is_available": agent.is_available,
"cached_at": agent.cached_at.isoformat() if agent.cached_at else None,
"cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None,
@@ -360,6 +388,7 @@ def get_external_agent_by_id(external_agent_id: int, tenant_id: str) -> Optional
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
"source_url": agent.source_url,
+ "base_url": agent.base_url,
"nacos_config_id": agent.nacos_config_id,
"nacos_agent_name": agent.nacos_agent_name,
"raw_card": agent.raw_card,
@@ -416,6 +445,8 @@ def list_external_agents(
"protocol_type": agent.protocol_type,
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
+ "source_url": agent.source_url,
+ "base_url": agent.base_url,
"is_available": agent.is_available,
"last_check_result": agent.last_check_result,
"create_time": agent.create_time.isoformat() if agent.create_time else None,
@@ -1714,6 +1745,7 @@ def get_nacos_config_by_id(config_id: str, tenant_id: str) -> Optional[Dict[str,
"name": config.name,
"nacos_addr": config.nacos_addr,
"nacos_username": config.nacos_username,
+ "nacos_password": config.nacos_password,
"namespace_id": config.namespace_id,
"description": config.description,
"is_active": config.is_active,
@@ -1749,6 +1781,8 @@ def list_nacos_configs(tenant_id: str, is_active: Optional[bool] = None) -> List
"name": config.name,
"nacos_addr": config.nacos_addr,
"namespace_id": config.namespace_id,
+ "nacos_username": config.nacos_username,
+ "nacos_password": config.nacos_password,
"is_active": config.is_active,
"last_scan_at": config.last_scan_at.isoformat() if config.last_scan_at else None,
}
@@ -1804,6 +1838,75 @@ def delete_nacos_config(config_id: str, tenant_id: str) -> bool:
return True
+def update_nacos_config(
+ config_id: str,
+ tenant_id: str,
+ user_id: str,
+ name: Optional[str] = None,
+ nacos_addr: Optional[str] = None,
+ nacos_username: Optional[str] = None,
+ nacos_password: Optional[str] = None,
+ namespace_id: Optional[str] = None,
+ description: Optional[str] = None,
+ is_active: Optional[bool] = None
+) -> Optional[Dict[str, Any]]:
+ """Update a Nacos config.
+
+ Args:
+ config_id: The config ID.
+ tenant_id: Tenant ID.
+ user_id: User who is updating this config.
+ name: Optional new display name.
+ nacos_addr: Optional new Nacos server address.
+ nacos_username: Optional new Nacos username.
+ nacos_password: Optional new Nacos password.
+ namespace_id: Optional new Nacos namespace.
+ description: Optional new description.
+ is_active: Optional active status.
+
+ Returns:
+ Updated config information dict, or None if not found.
+ """
+ with _get_db_session() as session:
+ config = session.query(A2ANacosConfig).filter(
+ A2ANacosConfig.config_id == config_id,
+ A2ANacosConfig.tenant_id == tenant_id,
+ A2ANacosConfig.delete_flag != 'Y'
+ ).first()
+
+ if not config:
+ return None
+
+ if name is not None:
+ config.name = name
+ if nacos_addr is not None:
+ config.nacos_addr = nacos_addr
+ if nacos_username is not None:
+ config.nacos_username = nacos_username
+ if nacos_password is not None:
+ config.nacos_password = nacos_password
+ if namespace_id is not None:
+ config.namespace_id = namespace_id
+ if description is not None:
+ config.description = description
+ if is_active is not None:
+ config.is_active = is_active
+
+ config.updated_by = user_id
+ session.flush()
+
+ return {
+ "id": config.id,
+ "config_id": config.config_id,
+ "name": config.name,
+ "nacos_addr": config.nacos_addr,
+ "namespace_id": config.namespace_id,
+ "nacos_username": config.nacos_username,
+ "nacos_password": config.nacos_password,
+ "is_active": config.is_active,
+ }
+
+
# =============================================================================
# A2A Artifact Operations
# =============================================================================
diff --git a/backend/database/attachment_db.py b/backend/database/attachment_db.py
index fbfc83583..187381cd2 100644
--- a/backend/database/attachment_db.py
+++ b/backend/database/attachment_db.py
@@ -396,6 +396,7 @@ def get_content_type(file_path: str) -> str:
'.html': 'text/html',
'.htm': 'text/html',
'.json': 'application/json',
+ '.epub': 'application/epub',
'.xml': 'application/xml',
'.zip': 'application/zip',
'.rar': 'application/x-rar-compressed',
diff --git a/backend/database/db_models.py b/backend/database/db_models.py
index 947c0a812..baa8e903e 100644
--- a/backend/database/db_models.py
+++ b/backend/database/db_models.py
@@ -1,4 +1,4 @@
-from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, ForeignKeyConstraint, Integer, JSON, Numeric, PrimaryKeyConstraint, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float
+from sqlalchemy import BigInteger, Boolean, Column, Integer, JSON, Numeric, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.sql import func
@@ -178,6 +178,10 @@ class ModelRecord(TableBase):
Boolean, default=True, doc="Whether to verify SSL certificates when connecting to this model API. Default is true. Set to false for local services without SSL support.")
chunk_batch = Column(
Integer, doc="Batch size for concurrent embedding requests during document chunking")
+ model_appid = Column(
+ String(100), doc="Application ID for model authentication (used by some STT/TTS providers like Volcano Engine)")
+ access_token = Column(
+ String(100), doc="Access token for model authentication (used by some STT/TTS providers like Volcano Engine)")
class ModelMonitoringRecord(SimpleTableBase):
@@ -353,10 +357,17 @@ class KnowledgeRecord(TableBase):
knowledge_describe = Column(String(3000), doc="Knowledge base description")
knowledge_sources = Column(String(300), doc="Knowledge base sources")
embedding_model_name = Column(String(200), doc="Embedding model name, used to record the embedding model used by the knowledge base")
+ embedding_model_id = Column(Integer, doc="Embedding model ID, foreign key reference to model_record_t.model_id")
tenant_id = Column(String(100), doc="Tenant ID")
group_ids = Column(String, doc="Knowledge base group IDs list")
ingroup_permission = Column(
String(30), doc="In-group permission: EDIT, READ_ONLY, PRIVATE")
+ summary_frequency = Column(String(10), nullable=True,
+ doc="Auto-summary frequency: '3h', '5h', '1d', '1w', or NULL (disabled)")
+ last_summary_time = Column(TIMESTAMP(timezone=False), nullable=True,
+ doc="Timestamp of last summary generation")
+ last_doc_update_time = Column(TIMESTAMP(timezone=False), nullable=True,
+ doc="Timestamp of last document add/delete operation")
class TenantConfig(TableBase):
@@ -775,6 +786,9 @@ class A2AExternalAgent(TableBase):
nacos_config_id = Column(String(64), doc="Reference to Nacos config used for discovery")
nacos_agent_name = Column(String(255), doc="Original name used for Nacos query")
+ # Base URL for infrastructure health checks
+ base_url = Column(String(512), doc="Base URL for health checks (service root address), e.g., http://agent:8080")
+
# Tenant isolation
tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC)
@@ -803,12 +817,6 @@ class A2AExternalAgentRelation(TableBase):
name="uq_local_external_agent",
deferrable=True,
),
- ForeignKeyConstraint(
- ["external_agent_id"],
- [f"{SCHEMA}.ag_a2a_external_agent_t.id"],
- name="fk_external_agent",
- deferrable=True,
- ),
{"schema": SCHEMA},
)
@@ -919,7 +927,7 @@ class A2AMessage(SimpleTableBase):
# Core identifiers (following A2A spec)
message_id = Column(String(64), primary_key=True, doc="Message ID (A2A spec: messageId)")
- task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=True, doc="Task ID this message belongs to (nullable for standalone/simple requests)")
+ task_id = Column(String(64), nullable=True, doc="Task ID this message belongs to (nullable for standalone/simple requests)")
# Message attributes
message_index = Column(Integer, nullable=False, doc="Order of message in the conversation")
@@ -947,7 +955,7 @@ class A2AArtifact(SimpleTableBase):
# Core identifiers (following A2A spec)
id = Column(String(64), primary_key=True, doc="Internal primary key")
artifact_id = Column(String(64), nullable=False, doc="Artifact ID (A2A spec: artifactId)")
- task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=False, doc="Task ID this artifact belongs to")
+ task_id = Column(String(64), nullable=False, doc="Task ID this artifact belongs to")
# Artifact attributes
name = Column(String(255), doc="Human-readable artifact name")
diff --git a/backend/database/knowledge_db.py b/backend/database/knowledge_db.py
index 0d13eb9f7..8674bb4fb 100644
--- a/backend/database/knowledge_db.py
+++ b/backend/database/knowledge_db.py
@@ -1,5 +1,6 @@
from typing import Any, Dict, List, Optional
+import logging
import uuid
from sqlalchemy import func
from sqlalchemy.exc import SQLAlchemyError
@@ -7,6 +8,9 @@
from database.client import as_dict, get_db_session
from database.db_models import KnowledgeRecord
from utils.str_utils import convert_list_to_string
+from consts.scheduler import VALID_SUMMARY_FREQUENCIES
+
+logger = logging.getLogger("knowledge_db")
def _generate_index_name(knowledge_id: int) -> str:
@@ -49,6 +53,7 @@ def create_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]:
"knowledge_sources": query.get("knowledge_sources", "elasticsearch"),
"tenant_id": query.get("tenant_id"),
"embedding_model_name": query.get("embedding_model_name"),
+ "embedding_model_id": query.get("embedding_model_id"),
"knowledge_name": knowledge_name,
"group_ids": convert_list_to_string(group_ids) if isinstance(group_ids, list) else group_ids,
"ingroup_permission": query.get("ingroup_permission"),
@@ -116,6 +121,7 @@ def upsert_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]:
existing_record.knowledge_describe = query.get('knowledge_describe', '')
existing_record.knowledge_sources = query.get('knowledge_sources', 'elasticsearch')
existing_record.embedding_model_name = query.get('embedding_model_name')
+ existing_record.embedding_model_id = query.get('embedding_model_id')
existing_record.updated_by = query.get('user_id')
existing_record.update_time = func.current_timestamp()
@@ -345,6 +351,43 @@ def update_model_name_by_index_name(index_name: str, embedding_model_name: str,
raise e
+def update_embedding_model_by_index_name(
+ index_name: str,
+ embedding_model_id: int,
+ embedding_model_name: str,
+ tenant_id: str,
+ user_id: str
+) -> bool:
+ """
+ Update the embedding model (both ID and name) for a knowledge base.
+
+ Args:
+ index_name: Internal index name of the knowledge base
+ embedding_model_id: New embedding model ID
+ embedding_model_name: New embedding model name
+ tenant_id: Tenant ID
+ user_id: User ID making the update
+
+ Returns:
+ bool: Whether the update was successful
+ """
+ try:
+ with get_db_session() as session:
+ result = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y',
+ KnowledgeRecord.tenant_id == tenant_id
+ ).update({
+ "embedding_model_id": embedding_model_id,
+ "embedding_model_name": embedding_model_name,
+ "updated_by": user_id
+ })
+ session.commit()
+ return result > 0
+ except SQLAlchemyError as e:
+ raise e
+
+
def get_index_name_by_knowledge_name(knowledge_name: str, tenant_id: str) -> str:
"""
Get the internal index_name from user-facing knowledge_name.
@@ -411,5 +454,77 @@ def get_knowledge_name_map_by_index_names(index_names: List[str]) -> Dict[str, s
knowledge_name_map[index_name] = index_name
return knowledge_name_map
- except SQLAlchemyError as e:
- raise e
+ except SQLAlchemyError:
+ logger.exception("Query knowledge name map error")
+ raise
+
+
+def update_summary_frequency(index_name: str, summary_frequency: Optional[str],
+ _tenant_id: str, user_id: str) -> bool:
+ """Update the auto-summary frequency for a knowledge base."""
+ valid_frequencies = VALID_SUMMARY_FREQUENCIES
+ if summary_frequency not in valid_frequencies:
+ raise ValueError(f"Invalid summary_frequency: {summary_frequency}")
+ try:
+ with get_db_session() as session:
+ record = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y'
+ ).first()
+ if not record:
+ return False
+ record.summary_frequency = summary_frequency
+ record.updated_by = user_id
+ session.commit()
+ return True
+ except SQLAlchemyError:
+ logger.exception("Update summary frequency error")
+ raise
+
+
+def update_last_summary_time(index_name: str):
+ """Update last_summary_time to now after a successful summary generation."""
+ from datetime import datetime
+ try:
+ with get_db_session() as session:
+ record = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y'
+ ).first()
+ if record:
+ record.last_summary_time = datetime.now()
+ session.commit()
+ except SQLAlchemyError:
+ logger.exception("Update last summary time error")
+ raise
+
+
+def update_last_doc_update_time(index_name: str):
+ """Update last_doc_update_time to now after document add/delete operation."""
+ from datetime import datetime
+ try:
+ with get_db_session() as session:
+ record = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y'
+ ).first()
+ if record:
+ record.last_doc_update_time = datetime.now()
+ session.commit()
+ except SQLAlchemyError:
+ logger.exception("Update last doc update time error")
+ raise
+
+
+def get_knowledge_bases_for_auto_summary() -> List[Dict[str, Any]]:
+ """Query all knowledge bases with non-null summary_frequency."""
+ try:
+ with get_db_session() as session:
+ records = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.summary_frequency.isnot(None),
+ KnowledgeRecord.delete_flag != 'Y'
+ ).all()
+ return [as_dict(record) for record in records]
+ except SQLAlchemyError:
+ logger.exception("Get knowledge bases error")
+ raise
diff --git a/backend/nexent_context_metrics.log b/backend/nexent_context_metrics.log
deleted file mode 100644
index ebd63bcae..000000000
--- a/backend/nexent_context_metrics.log
+++ /dev/null
@@ -1,39 +0,0 @@
-Step 1: main_i=2291 main_o=54 | comp_i=0 comp_o=0 | mem_est_input=2897 |mem_est_output=88
-Total: main_i=2291 main_o=54 | comp_i=0 comp_o=0 | all_i=2291 all_o=54 | mem_est_input=2897 |mem_est_output=88
------
-Step 1: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | mem_est_input=1807 |mem_est_output=69
-Total: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | all_i=1422 all_o=48 | mem_est_input=1807 |mem_est_output=69
------
-Step 1: main_i=1486 main_o=444 | comp_i=0 comp_o=0 | mem_est_input=1891 |mem_est_output=555
-Total: main_i=1486 main_o=444 | comp_i=0 comp_o=0 | all_i=1486 all_o=444 | mem_est_input=1891 |mem_est_output=555
------
-Step 1: main_i=1423 main_o=15 | comp_i=0 comp_o=0 | mem_est_input=1811 |mem_est_output=10
-Total: main_i=1423 main_o=15 | comp_i=0 comp_o=0 | all_i=1423 all_o=15 | mem_est_input=1811 |mem_est_output=10
------
-Step 1: main_i=1450 main_o=298 | comp_i=0 comp_o=0 | mem_est_input=1835 |mem_est_output=330
-Total: main_i=1450 main_o=298 | comp_i=0 comp_o=0 | all_i=1450 all_o=298 | mem_est_input=1835 |mem_est_output=330
------
-Step 1: main_i=1422 main_o=46 | comp_i=0 comp_o=0 | mem_est_input=1807 |mem_est_output=63
-Total: main_i=1422 main_o=46 | comp_i=0 comp_o=0 | all_i=1422 all_o=46 | mem_est_input=1807 |mem_est_output=63
------
-Step 1: main_i=1425 main_o=47 | comp_i=0 comp_o=0 | mem_est_input=1810 |mem_est_output=62
-Total: main_i=1425 main_o=47 | comp_i=0 comp_o=0 | all_i=1425 all_o=47 | mem_est_input=1810 |mem_est_output=62
------
-Step 1: main_i=1480 main_o=30 | comp_i=0 comp_o=0 | mem_est_input=1876 |mem_est_output=37
-Total: main_i=1480 main_o=30 | comp_i=0 comp_o=0 | all_i=1480 all_o=30 | mem_est_input=1876 |mem_est_output=37
------
-Step 1: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | mem_est_input=1807 |mem_est_output=67
-Total: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | all_i=1422 all_o=48 | mem_est_input=1807 |mem_est_output=67
------
-Step 1: main_i=1518 main_o=104 | comp_i=0 comp_o=0 | mem_est_input=1918 |mem_est_output=140
-Total: main_i=1518 main_o=104 | comp_i=0 comp_o=0 | all_i=1518 all_o=104 | mem_est_input=1918 |mem_est_output=140
------
-Step 1: main_i=1758 main_o=36 | comp_i=0 comp_o=0 | mem_est_input=2171 |mem_est_output=51
-Total: main_i=1758 main_o=36 | comp_i=0 comp_o=0 | all_i=1758 all_o=36 | mem_est_input=2171 |mem_est_output=51
------
-Step 1: main_i=1479 main_o=61 | comp_i=0 comp_o=0 | mem_est_input=1879 |mem_est_output=80
-Total: main_i=1479 main_o=61 | comp_i=0 comp_o=0 | all_i=1479 all_o=61 | mem_est_input=1879 |mem_est_output=80
------
-Step 1: main_i=1551 main_o=467 | comp_i=0 comp_o=0 | mem_est_input=1970 |mem_est_output=607
-Total: main_i=1551 main_o=467 | comp_i=0 comp_o=0 | all_i=1551 all_o=467 | mem_est_input=1970 |mem_est_output=607
------
diff --git a/backend/prompts/managed_system_prompt_template_en.yaml b/backend/prompts/managed_system_prompt_template_en.yaml
index 1d555a907..67da8305c 100644
--- a/backend/prompts/managed_system_prompt_template_en.yaml
+++ b/backend/prompts/managed_system_prompt_template_en.yaml
@@ -166,5 +166,14 @@ planning:
final_answer:
pre_messages: |-
+ You have reached the maximum step limit. Please provide a comprehensive summary of:
+ 1. What has been accomplished so far
+ 2. Key findings or results
+ 3. Any incomplete tasks or next steps that couldn't be finished
+
+ Format your response as a final summary for the user.
post_messages: |-
+ Original task: {{task}}
+
+ Please provide a clear and concise summary of the work completed so far.
diff --git a/backend/prompts/managed_system_prompt_template_zh.yaml b/backend/prompts/managed_system_prompt_template_zh.yaml
index 971737862..231eee325 100644
--- a/backend/prompts/managed_system_prompt_template_zh.yaml
+++ b/backend/prompts/managed_system_prompt_template_zh.yaml
@@ -83,7 +83,7 @@ system_prompt: |-
value = config["key1"]["key2"]
print(value)
- 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的��程。
+ 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的流程。
4. **执行技能脚本**:如果技能指南中引用了附加脚本(形如 ``),使用以下格式调用:
代码:
@@ -211,11 +211,11 @@ system_prompt: |-
### python代码规范
1. 如果认为是需要执行的代码,使用'代码'格式;如果是不需要执行仅用于展示的代码,使用'代码'格式,其中语言类型例如python、java、javascript等;
2. 只使用已定义的变量,变量将在多次调用之间持续保持;
- 3. 使用“print()”函数让下一次的模型调用看到对应变量信息;
+ 3. 使用"print()"函数让下一次的模型调用看到对应变量信息;
4. 正确使用工具的入参,使用关键字参数,不要用字典形式;
5. 避免在一轮对话中进行过多的工具调用,这会导致输出格式难以预测;
6. 只在需要时调用工具,不重复相同参数的调用;
- 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用“print()”来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误;
+ 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用"print()"来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误;
9. 示例中的代码避免出现**if**、**for**等逻辑,仅调用工具,示例中的每一次的行动都是确定事件。如果有不同的条件,你应该给出不同条件下的示例;
10. 工具调用使用关键字参数,如:tool_name(param1="value1", param2="value2");
11. 不要放弃!你负责解决任务,而不是提供解决方向。
@@ -259,5 +259,14 @@ planning:
final_answer:
pre_messages: |-
+ 你已达到最大步数限制。请提供一份全面的工作总结,内容包括:
+ 1. 到目前为止已完成的工作
+ 2. 主要发现或结果
+ 3. 未能完成的任务或后续步骤
+
+ 请以最终总结的格式呈现给用户。
post_messages: |-
+ 原始任务:{{task}}
+
+ 请对迄今为止完成的工作进行清晰、简洁的总结。
diff --git a/backend/prompts/manager_system_prompt_template_en.yaml b/backend/prompts/manager_system_prompt_template_en.yaml
index 50cfbc411..a4ffae074 100644
--- a/backend/prompts/manager_system_prompt_template_en.yaml
+++ b/backend/prompts/manager_system_prompt_template_en.yaml
@@ -210,5 +210,14 @@ planning:
final_answer:
pre_messages: |-
+ You have reached the maximum step limit. Please provide a comprehensive summary of:
+ 1. What has been accomplished so far
+ 2. Key findings or results
+ 3. Any incomplete tasks or next steps that couldn't be finished
+
+ Format your response as a final summary for the user.
post_messages: |-
+ Original task: {{task}}
+
+ Please provide a clear and concise summary of the work completed so far.
diff --git a/backend/prompts/manager_system_prompt_template_zh.yaml b/backend/prompts/manager_system_prompt_template_zh.yaml
index 3c7144cad..6743316e3 100644
--- a/backend/prompts/manager_system_prompt_template_zh.yaml
+++ b/backend/prompts/manager_system_prompt_template_zh.yaml
@@ -287,5 +287,14 @@ planning:
final_answer:
pre_messages: |-
+ 你已达到最大步数限制。请提供一份全面的工作总结,内容包括:
+ 1. 到目前为止已完成的工作
+ 2. 主要发现或结果
+ 3. 未能完成的任务或后续步骤
+
+ 请以最终总结的格式呈现给用户。
post_messages: |-
+ 原始任务:{{task}}
+
+ 请对迄今为止完成的工作进行清晰、简洁的总结。
diff --git a/backend/services/a2a_agent_adapter.py b/backend/services/a2a_agent_adapter.py
index c052b5d37..b6fddc500 100644
--- a/backend/services/a2a_agent_adapter.py
+++ b/backend/services/a2a_agent_adapter.py
@@ -261,7 +261,7 @@ def build_a2a_message_response(
A2A Message response dict wrapped in {"message": {...}}.
"""
if not message_id:
- message_id = f"msg_{uuid4().hex[:16]}"
+ message_id = f"msg_{uuid4().hex}"
if parts:
message_parts = parts
diff --git a/backend/services/a2a_client_service.py b/backend/services/a2a_client_service.py
index 14f721ffd..e4e81fec5 100644
--- a/backend/services/a2a_client_service.py
+++ b/backend/services/a2a_client_service.py
@@ -88,15 +88,24 @@ async def discover_from_url(
# Extract endpoint URL - prioritize supportedInterfaces (A2A v1.0 standard)
agent_url = self._extract_agent_url(card)
- # Extract protocol info and supported interfaces
- capabilities = card.get("capabilities", {})
- protocol_version = capabilities.get("protocolVersion", "1.0")
- streaming = capabilities.get("streaming", False)
- transport_type = "http-streaming" if streaming else "http-polling"
-
# Extract supported interfaces (A2A v1.0 standard format)
supported_interfaces = card.get("supportedInterfaces", [])
+ # Extract protocol info from supported_interfaces (A2A 1.0 spec)
+ # protocol_version and streaming are properties of each interface, not top-level
+ first_interface = supported_interfaces[0] if supported_interfaces else {}
+ interface_capabilities = first_interface.get("capabilities", {})
+ protocol_version = first_interface.get("protocolVersion", "1.0")
+ streaming = interface_capabilities.get("streaming", False)
+
+ # Fallback to top-level capabilities if no supported_interfaces
+ if not supported_interfaces:
+ card_capabilities = card.get("capabilities", {})
+ if protocol_version == "1.0" and card_capabilities.get("protocolVersion"):
+ protocol_version = card_capabilities.get("protocolVersion")
+ if not streaming and card_capabilities.get("streaming"):
+ streaming = card_capabilities.get("streaming")
+
# Store in database
result = a2a_agent_db.create_external_agent_from_url(
source_url=url,
@@ -104,7 +113,7 @@ async def discover_from_url(
description=description,
agent_url=agent_url,
version=protocol_version,
- streaming=(transport_type == "http-streaming"),
+ streaming=streaming,
tenant_id=tenant_id,
user_id=user_id,
raw_card=card,
@@ -222,50 +231,95 @@ async def _discover_single_from_nacos(
client = NacosClient(nacos_addr, username, password)
try:
- # Query service instance from Nacos
- instance = await client.query_service_instance(agent_name, namespace)
- if not instance:
- logger.warning(f"No instance found for agent '{agent_name}' in Nacos")
+ # Query A2A agent from Nacos using dedicated A2A endpoint
+ agent_info = await client.query_a2a_agent(agent_name, namespace)
+ if not agent_info:
+ logger.warning(f"No A2A agent found for '{agent_name}' in Nacos")
return None
- # Fetch Agent Card from instance
- agent_card_url = instance.get("metadata", {}).get("a2a_card_url")
- if not agent_card_url:
- # Construct URL from instance host/port
- host = instance.get("ip")
- port = instance.get("port")
- if host and port:
- agent_card_url = f"http://{host}:{port}/.well-known/agent-{agent_name}.json"
-
- if not agent_card_url:
- logger.warning(f"No Agent Card URL found for agent '{agent_name}'")
+ # Extract agent URL from A2A response
+ agent_url = agent_info.get("agent_url") or agent_info.get("url")
+ if not agent_url:
+ logger.warning(f"No agent URL found for A2A agent '{agent_name}'")
return None
- # Fetch Agent Card
- try:
- async with A2AHttpClient() as http_client:
- card = await http_client.get_json(agent_card_url)
- except aiohttp.ClientError:
- # Network errors retrieving agent card should result in None
- logger.warning(f"Failed to retrieve agent card from {agent_card_url}")
- return None
+ # Get metadata and extract description from Nacos response
+ metadata = agent_info.get("metadata") or {}
+ description = agent_info.get("description") or metadata.get("description", "")
+ nacos_interfaces = metadata.get("supported_interfaces", [])
+ supported_interfaces = nacos_interfaces.copy() if nacos_interfaces else []
+ protocol_version = "1.0"
+ streaming = False
+ agent_card_fetched = False
+
+ # Fetch Agent Card from agent_url to get supported_interfaces (A2A v1.0 spec)
+ # Try common Agent Card endpoints (order matters - try more specific paths first)
+ card_urls = [
+ f"{agent_url.rstrip('/')}/.well-known/agent-card.json",
+ f"{agent_url.rstrip('/')}/.well-known/agent.json",
+ f"{agent_url.rstrip('/')}/.well-known/agent-1.0.json",
+ f"{agent_url.rstrip('/')}/agent-card.json",
+ f"{agent_url.rstrip('/')}/agent.json",
+ ]
+
+ for card_url in card_urls:
+ try:
+ async with A2AHttpClient() as http_client:
+ card = await http_client.get_json(card_url, headers=build_a2a_headers())
+
+ if card and (card.get("name") or card.get("agent_id")):
+ logger.info(f"Fetched Agent Card from {card_url}")
+
+ # Extract supported_interfaces from Agent Card
+ card_interfaces = card.get("supportedInterfaces", [])
+
+ # Always update from Agent Card if present
+ if card_interfaces:
+ supported_interfaces = card_interfaces
+ agent_card_fetched = True
+
+ # Extract description from Agent Card if not found in Nacos
+ if not description:
+ description = card.get("description", "")
+
+ # Extract protocol info from supported_interfaces
+ first_interface = supported_interfaces[0] if supported_interfaces else {}
+ capabilities = first_interface.get("capabilities", {})
+ protocol_version = first_interface.get("protocolVersion", "1.0")
+ streaming = capabilities.get("streaming", False)
+
+ # Merge raw_card: Agent Card takes precedence over Nacos info
+ agent_info = card
+ break
+
+ except Exception as e:
+ logger.warning(f"Failed to fetch Agent Card from {card_url}: {e}")
+ continue
+
+ if not agent_card_fetched:
+ logger.warning(
+ f"[Nacos Discovery] Failed to fetch Agent Card for '{agent_name}', "
+ f"using Nacos interfaces: {supported_interfaces}"
+ )
- # Extract endpoint URL and supported interfaces
- agent_url = self._extract_agent_url(card)
- supported_interfaces = card.get("supportedInterfaces", [])
+ logger.info(
+ f"[Nacos Discovery] Storing agent: name={agent_name}, "
+ f"agent_url={agent_url}, supported_interfaces_count={len(supported_interfaces) if supported_interfaces else 0}, "
+ f"protocol_version={protocol_version}, streaming={streaming}"
+ )
# Store in database
result = a2a_agent_db.create_external_agent_from_nacos(
- name=card.get("name", agent_name),
- description=card.get("description", ""),
+ name=agent_name,
+ description=description,
agent_url=agent_url,
- protocol_version=card.get("capabilities", {}).get("protocolVersion", "1.0"),
- transport_type="http-streaming" if card.get("capabilities", {}).get("streaming") else "http-polling",
+ version=protocol_version,
+ streaming=streaming,
nacos_config_id=nacos_config["config_id"],
nacos_agent_name=agent_name,
tenant_id=tenant_id,
user_id=user_id,
- raw_card=card,
+ raw_card=agent_info,
supported_interfaces=supported_interfaces
)
@@ -312,13 +366,10 @@ def _extract_agent_url(self, card: Dict[str, Any]) -> str:
return ""
def _find_url_in_interfaces(self, interfaces: List[Any]) -> str:
- """Find URL from supportedInterfaces array, preferring http-json-rpc."""
- json_rpc_protocols = ("http-json-rpc", "jsonrpc", "httpjsonrpc")
- for iface in interfaces:
- if iface.get("protocolBinding", "").lower() in json_rpc_protocols:
- url = iface.get("url", "")
- if url:
- return url
+ """Find URL from supportedInterfaces array - return the first interface's URL.
+
+ This ensures protocol and URL are always from the same interface.
+ """
for iface in interfaces:
url = iface.get("url", "")
if url:
@@ -426,46 +477,128 @@ async def refresh_agent_card(
if not agent:
raise AgentDiscoveryError(f"Agent {external_agent_id} not found")
+ source_type = agent.get("source_type")
+ source_url = agent.get("source_url")
+ agent_url = agent.get("agent_url")
+ base_url = agent.get("base_url")
+
try:
- # Fetch fresh Agent Card
- source_url = agent.get("source_url")
- if not source_url:
- raise AgentDiscoveryError("No source URL available for refresh")
+ if source_type == "nacos":
+ # Nacos discovered agents: use /health endpoint to check availability
+ if not base_url:
+ raise AgentDiscoveryError("No base_url available for health check")
- async with A2AHttpClient() as client:
- card = await client.get_json(source_url)
+ health_url = f"{base_url.rstrip('/')}/health"
+ logger.info(f"Checking health for Nacos agent: {health_url}")
- # Extract updated info - use _extract_agent_url for A2A v1.0 standard
- new_url = self._extract_agent_url(card)
- new_name = card.get("name")
- new_description = card.get("description")
- new_supported_interfaces = card.get("supportedInterfaces", [])
+ async with A2AHttpClient() as client:
+ health_response = await client.get_json(health_url)
- # Note: Do NOT update protocol_type and agent_url during refresh
- # These are user-configured values and should not be overwritten
- # The refresh should only update metadata (name, description, supported_interfaces, raw_card)
+ # Update availability based on health check
+ a2a_agent_db.update_agent_availability(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ is_available=True,
+ check_result="OK"
+ )
- # Update cache
- result = a2a_agent_db.refresh_external_agent_cache(
- external_agent_id=external_agent_id,
- tenant_id=tenant_id,
- user_id=user_id,
- new_raw_card=card,
- new_name=new_name,
- new_description=new_description,
- new_supported_interfaces=new_supported_interfaces
- )
+ # Update cache timestamp
+ a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id
+ )
- # Update availability
- a2a_agent_db.update_agent_availability(
- external_agent_id=external_agent_id,
- tenant_id=tenant_id,
- is_available=True,
- check_result="OK"
- )
+ logger.info(f"Health check passed for agent {external_agent_id}")
+ return {
+ "agent_id": external_agent_id,
+ "source_type": source_type,
+ "health_url": health_url,
+ "health_response": health_response,
+ "status": "available"
+ }
- logger.info(f"Refreshed agent {external_agent_id}")
- return result
+ else:
+ # URL discovered agents: fetch fresh Agent Card from source_url
+ if not source_url:
+ raise AgentDiscoveryError("No source URL available for refresh")
+
+ async with A2AHttpClient() as client:
+ card = await client.get_json(source_url)
+
+ # Extract updated info - use _extract_agent_url for A2A v1.0 standard
+ new_url = self._extract_agent_url(card)
+ new_name = card.get("name")
+ new_description = card.get("description")
+ new_supported_interfaces = card.get("supportedInterfaces", [])
+
+ # Extract new protocol type from the card
+ new_protocol_type = _extract_protocol_type(new_supported_interfaces)
+ current_protocol_type = agent.get("protocol_type")
+
+ # Determine if we need to update agent_url and protocol_type
+ # Update agent_url if it changed in the remote card
+ update_agent_url = new_url is not None and new_url != agent_url
+
+ # Update protocol_type if it changed in the remote card
+ update_protocol_type = new_protocol_type != current_protocol_type
+
+ # When protocol_type changes, we need to find the corresponding interface URL
+ if update_protocol_type:
+ logger.info(
+ f"Protocol type changed for agent {external_agent_id}: "
+ f"{current_protocol_type} -> {new_protocol_type}"
+ )
+ # The database function will handle finding the correct interface URL
+ result = a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ new_raw_card=card,
+ new_agent_url=new_url if update_agent_url else None,
+ new_name=new_name,
+ new_description=new_description,
+ new_supported_interfaces=new_supported_interfaces,
+ new_protocol_type=new_protocol_type
+ )
+ elif update_agent_url:
+ # Only agent_url changed
+ logger.info(
+ f"Agent URL changed for agent {external_agent_id}: "
+ f"{agent_url} -> {new_url}"
+ )
+ result = a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ new_raw_card=card,
+ new_agent_url=new_url,
+ new_name=new_name,
+ new_description=new_description,
+ new_supported_interfaces=new_supported_interfaces
+ )
+ else:
+ # No changes to agent_url or protocol_type, just update metadata
+ result = a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ new_raw_card=card,
+ new_name=new_name,
+ new_description=new_description,
+ new_supported_interfaces=new_supported_interfaces
+ )
+
+ # Update availability
+ a2a_agent_db.update_agent_availability(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ is_available=True,
+ check_result="OK"
+ )
+
+ logger.info(f"Refreshed agent {external_agent_id}")
+ return result
except aiohttp.ClientError as e:
logger.error(f"Failed to refresh agent {external_agent_id}: {e}")
diff --git a/backend/services/agent_service.py b/backend/services/agent_service.py
index 73c6a4640..02fa7d8c6 100644
--- a/backend/services/agent_service.py
+++ b/backend/services/agent_service.py
@@ -46,6 +46,7 @@
update_related_agents,
clear_agent_new_mark
)
+from database import a2a_agent_db
from database.model_management_db import get_model_by_model_id, get_model_id_by_display_name
from database.remote_mcp_db import get_mcp_server_by_name_and_tenant
from database.tool_db import (
@@ -967,6 +968,49 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
logger.error(f"Failed to update related agents: {str(e)}")
raise ValueError(f"Failed to update related agents: {str(e)}")
+ # Handle related external agents saving when provided
+ try:
+ if request.related_external_agent_ids is not None and agent_id is not None:
+ related_external_agent_ids = request.related_external_agent_ids
+ # Query current relations
+ current_relations = a2a_agent_db.list_external_relations_by_local_agent(
+ local_agent_id=agent_id,
+ tenant_id=tenant_id
+ )
+ current_external_ids = {
+ rel["external_agent_id"] for rel in current_relations
+ }
+ new_external_ids = set(related_external_agent_ids) if related_external_agent_ids else set()
+
+ # Find IDs to delete (in current but not in new)
+ ids_to_delete = current_external_ids - new_external_ids
+ # Find IDs to add (in new but not in current)
+ ids_to_add = new_external_ids - current_external_ids
+
+ # Soft delete removed relations
+ for ext_agent_id in ids_to_delete:
+ a2a_agent_db.remove_external_agent_relation(
+ local_agent_id=agent_id,
+ external_agent_id=ext_agent_id,
+ tenant_id=tenant_id
+ )
+
+ # Add new relations
+ for ext_agent_id in ids_to_add:
+ try:
+ a2a_agent_db.add_external_agent_relation(
+ local_agent_id=agent_id,
+ external_agent_id=ext_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id
+ )
+ except ValueError:
+ # Relation already exists, skip
+ pass
+ except Exception as e:
+ logger.error(f"Failed to update related external agents: {str(e)}")
+ raise ValueError(f"Failed to update related external agents: {str(e)}")
+
return {"agent_id": agent_id}
diff --git a/backend/services/agent_version_service.py b/backend/services/agent_version_service.py
index 067fd0e1c..69163dbc6 100644
--- a/backend/services/agent_version_service.py
+++ b/backend/services/agent_version_service.py
@@ -817,7 +817,8 @@ async def list_published_agents_impl(
# Apply visibility filter for DEV/USER based on group overlap
if not can_edit_all:
agent_group_ids = set(convert_string_to_list(agent.get("group_ids")))
- if len(user_group_ids.intersection(agent_group_ids)) == 0:
+ is_creator = str(agent.get("created_by)) == str(user_id)"))
+ if not is_creator and len(user_group_ids.intersection(agent_group_ids)) == 0:
continue
agent_id = agent.get("agent_id")
diff --git a/backend/services/auto_summary_scheduler.py b/backend/services/auto_summary_scheduler.py
new file mode 100644
index 000000000..5bc44e442
--- /dev/null
+++ b/backend/services/auto_summary_scheduler.py
@@ -0,0 +1,211 @@
+"""
+Background scheduler that periodically checks knowledge bases with
+auto-summary enabled and regenerates summaries as needed.
+"""
+import logging
+import threading
+import time
+from datetime import datetime, timedelta
+from typing import Optional
+
+from consts.scheduler import (
+ FREQUENCY_MAP,
+ SCHEDULER_CHECK_INTERVAL_SECONDS,
+)
+from database.knowledge_db import get_knowledge_bases_for_auto_summary
+from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
+from utils.config_utils import tenant_config_manager
+
+logger = logging.getLogger(__name__)
+
+# Check interval from centralized config
+CHECK_INTERVAL_SECONDS = SCHEDULER_CHECK_INTERVAL_SECONDS
+
+# Track knowledge bases currently being processed to avoid duplicates
+_in_flight: set = set()
+
+
+def _parse_last_summary_time(last_summary_time) -> Optional[datetime]:
+ """Parse last_summary_time from various formats."""
+ if last_summary_time is None:
+ return None
+ if isinstance(last_summary_time, datetime):
+ return last_summary_time.replace(tzinfo=None)
+ if isinstance(last_summary_time, str):
+ try:
+ return datetime.fromisoformat(last_summary_time)
+ except (ValueError, TypeError):
+ return None
+ return None
+
+
+def _is_due_for_summary(last_summary_time, frequency: str, last_doc_update_time) -> bool:
+ """Check if a knowledge base is due for summary regeneration.
+
+ Args:
+ last_summary_time: Timestamp of last summary generation
+ frequency: Summary frequency (e.g., '3h', '1d')
+ last_doc_update_time: Timestamp of last document add/delete operation
+
+ Returns:
+ True if summary should be regenerated, False otherwise
+ """
+ interval = FREQUENCY_MAP.get(frequency)
+ if interval is None:
+ return False
+
+ last = _parse_last_summary_time(last_summary_time)
+ if last is None:
+ return True # Never summarized, do it now
+
+ # Check if time interval has elapsed
+ if (datetime.now() - last) < interval:
+ return False
+
+ # Check if there are new document changes since last summary
+ doc_update = _parse_last_summary_time(last_doc_update_time)
+ if doc_update is None:
+ return True # No doc update time recorded, assume need summary
+
+ # Skip if no new documents since last summary
+ if doc_update <= last:
+ logger.info(f"Skipping summary: no document changes since last summary")
+ return False
+
+ return True
+
+
+def _run_auto_summary_for_kb(index_name: str, tenant_id: str):
+ """Run the summary generation for a single knowledge base."""
+ if index_name in _in_flight:
+ logger.info(f"Skipping {index_name}: already being processed")
+ return
+
+ _in_flight.add(index_name)
+ try:
+ logger.info(f"Starting auto-summary for knowledge base: {index_name}")
+ vdb_core = get_vector_db_core()
+ service = ElasticSearchService()
+
+ from utils.document_vector_utils import (
+ process_documents_for_clustering,
+ kmeans_cluster_documents,
+ summarize_clusters_map_reduce,
+ merge_cluster_summaries,
+ )
+
+ # Get model_id from tenant config for LLM summarization
+ model_id = None
+ if tenant_id:
+ try:
+ tenant_config = tenant_config_manager.load_config(tenant_id)
+ model_id_str = tenant_config.get("LLM_ID")
+ if model_id_str:
+ model_id = int(model_id_str)
+ logger.info(f"Using LLM model ID {model_id} for auto-summary (tenant: {tenant_id})")
+ else:
+ logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary will be placeholder only")
+ except Exception as e:
+ logger.warning(f"Failed to get LLM_ID from tenant config: {e}")
+
+ sample_count = 40 # Smaller sample for auto-summary
+ document_samples, doc_embeddings = process_documents_for_clustering(
+ index_name=index_name,
+ vdb_core=vdb_core,
+ sample_doc_count=sample_count,
+ )
+
+ if not document_samples:
+ logger.warning(f"No documents found for auto-summary: {index_name}")
+ return
+
+ clusters = kmeans_cluster_documents(doc_embeddings, k=None)
+ cluster_summaries = summarize_clusters_map_reduce(
+ document_samples=document_samples,
+ clusters=clusters,
+ language="zh",
+ doc_max_words=100,
+ cluster_max_words=150,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ )
+ final_summary = merge_cluster_summaries(cluster_summaries)
+
+ # Save the summary and update last_summary_time
+ service.change_summary(
+ index_name=index_name,
+ summary_result=final_summary,
+ user_id="auto_scheduler",
+ )
+ # change_summary already calls update_last_summary_time
+ logger.info(f"Auto-summary completed for knowledge base: {index_name}")
+
+ except Exception as e:
+ logger.error(f"Auto-summary failed for {index_name}: {e}", exc_info=True)
+ finally:
+ _in_flight.discard(index_name)
+
+
+def _scheduler_loop(stop_event: threading.Event):
+ """Main scheduler loop that runs in a background thread."""
+ logger.info("Auto-summary scheduler started")
+ while not stop_event.is_set():
+ try:
+ kbs = get_knowledge_bases_for_auto_summary()
+ logger.info(f"Checking {len(kbs)} knowledge bases for auto-summary")
+
+ for kb in kbs:
+ if stop_event.is_set():
+ break
+ frequency = kb.get("summary_frequency")
+ if _is_due_for_summary(
+ kb.get("last_summary_time"),
+ frequency,
+ kb.get("last_doc_update_time")
+ ):
+ _run_auto_summary_for_kb(
+ index_name=kb["index_name"],
+ tenant_id=kb.get("tenant_id", ""),
+ )
+
+ except Exception as e:
+ logger.error(f"Auto-summary scheduler check failed: {e}", exc_info=True)
+
+ # Wait for next check interval, but respond to stop_event
+ stop_event.wait(timeout=CHECK_INTERVAL_SECONDS)
+
+ logger.info("Auto-summary scheduler stopped")
+
+
+class AutoSummaryScheduler:
+ """Manages the auto-summary background thread."""
+
+ def __init__(self):
+ self._stop_event = threading.Event()
+ self._thread: Optional[threading.Thread] = None
+
+ def start(self):
+ """Start the scheduler thread."""
+ if self._thread and self._thread.is_alive():
+ logger.warning("Auto-summary scheduler is already running")
+ return
+ self._stop_event.clear()
+ self._thread = threading.Thread(
+ target=_scheduler_loop,
+ args=(self._stop_event,),
+ daemon=True,
+ name="auto-summary-scheduler",
+ )
+ self._thread.start()
+ logger.info("Auto-summary scheduler thread started")
+
+ def stop(self):
+ """Signal the scheduler thread to stop."""
+ self._stop_event.set()
+ if self._thread:
+ self._thread.join(timeout=60)
+ logger.info("Auto-summary scheduler thread stopped")
+
+
+# Singleton instance
+auto_summary_scheduler = AutoSummaryScheduler()
diff --git a/backend/services/config_sync_service.py b/backend/services/config_sync_service.py
index 9fe50813a..0ed29bfc5 100644
--- a/backend/services/config_sync_service.py
+++ b/backend/services/config_sync_service.py
@@ -112,6 +112,21 @@ async def save_config_impl(config, tenant_id, user_id):
embedding_api_config = model_config.get("apiConfig", {})
env_config[f"{model_prefix}_API_KEY"] = safe_value(
embedding_api_config.get("apiKey"))
+
+ # Save STT specific fields for speech recognition models
+ if model_type == "stt":
+ if model_config.get("modelFactory"):
+ stt_factory_key = "STT_MODEL_FACTORY"
+ tenant_config_manager.set_single_config(
+ user_id, tenant_id, stt_factory_key, model_config.get("modelFactory"))
+ if model_config.get("modelAppid"):
+ stt_appid_key = "STT_MODEL_APPID"
+ tenant_config_manager.set_single_config(
+ user_id, tenant_id, stt_appid_key, model_config.get("modelAppid"))
+ if model_config.get("accessToken"):
+ stt_token_key = "STT_ACCESS_TOKEN"
+ tenant_config_manager.set_single_config(
+ user_id, tenant_id, stt_token_key, model_config.get("accessToken"))
logger.info("Configuration saved successfully")
@@ -187,4 +202,11 @@ def build_model_config(model_config: dict) -> dict:
if "embedding" in model_config.get("model_type", ""):
config["dimension"] = model_config.get("max_tokens", 0)
+ # Add STT model specific fields
+ model_type = model_config.get("model_type", "")
+ if model_type == "stt":
+ config["modelFactory"] = model_config.get("model_factory", "")
+ config["modelAppid"] = model_config.get("model_appid", "")
+ config["accessToken"] = model_config.get("access_token", "")
+
return config
diff --git a/backend/services/data_process_service.py b/backend/services/data_process_service.py
index 2b222a584..a024089a3 100644
--- a/backend/services/data_process_service.py
+++ b/backend/services/data_process_service.py
@@ -148,8 +148,28 @@ async def get_all_tasks(self, filter: bool = True) -> List[Dict[str, Any]]:
logger.debug(
f"⏰ Inspector initialization took {time.time() - start_time}s")
- # Collect task IDs from different sources
+ # Collect task IDs from different sources and keep runtime metadata
task_ids = set()
+ runtime_task_meta: Dict[str, Dict[str, Any]] = {}
+
+ def _normalize_runtime_meta(task: Dict[str, Any]) -> Dict[str, Any]:
+ task_name_full = task.get('name', '') or ''
+ task_name = task_name_full.split('.')[-1] if task_name_full else ''
+ kwargs = task.get('kwargs') or {}
+ if isinstance(kwargs, str):
+ try:
+ import json as _json
+ kwargs = _json.loads(kwargs)
+ except Exception:
+ kwargs = {}
+ if not isinstance(kwargs, dict):
+ kwargs = {}
+ return {
+ 'task_name': task_name,
+ 'index_name': kwargs.get('index_name', ''),
+ 'path_or_url': kwargs.get('source', ''),
+ 'original_filename': kwargs.get('original_filename', ''),
+ }
def get_active():
return inspector.active()
@@ -169,12 +189,15 @@ def get_reserved():
task_id = task.get('id')
if task_id:
task_ids.add(task_id)
+ runtime_task_meta[task_id] = _normalize_runtime_meta(task)
if reserved_tasks_dict:
for worker, tasks in reserved_tasks_dict.items():
for task in tasks:
task_id = task.get('id')
if task_id:
task_ids.add(task_id)
+ # Keep active metadata if already present
+ runtime_task_meta.setdefault(task_id, _normalize_runtime_meta(task))
# Currently, we don't have scheduled tasks, so skip getting scheduled tasks here
start_time = time.time()
@@ -192,15 +215,33 @@ def get_reserved():
f"Failed to query Redis for stored task IDs: {str(redis_error)}")
logger.debug(
f"Total unique task IDs collected (inspector + Redis): {len(task_ids)}")
- tasks = [get_task_info(task_id) for task_id in task_ids]
+ task_id_list = list(task_ids)
+ tasks = [get_task_info(task_id) for task_id in task_id_list]
all_task_infos = await asyncio.gather(*tasks, return_exceptions=True)
- for task_info in all_task_infos:
+ for idx, task_info in enumerate(all_task_infos):
if isinstance(task_info, Exception):
logger.warning(
f"Failed to get status for a task: {task_info}")
continue
+ task_id = task_id_list[idx]
+ runtime_meta = runtime_task_meta.get(task_id, {})
+ # Backfill runtime info for pending/reserved tasks that do not have result metadata yet
+ if runtime_meta:
+ if not task_info.get('task_name') and runtime_meta.get('task_name'):
+ task_info['task_name'] = runtime_meta.get('task_name')
+ if not task_info.get('index_name') and runtime_meta.get('index_name'):
+ task_info['index_name'] = runtime_meta.get('index_name')
+ if not task_info.get('path_or_url') and runtime_meta.get('path_or_url'):
+ task_info['path_or_url'] = runtime_meta.get('path_or_url')
+ if not task_info.get('original_filename') and runtime_meta.get('original_filename'):
+ task_info['original_filename'] = runtime_meta.get('original_filename')
+
if filter and not (task_info.get('index_name') and task_info.get('task_name')):
- continue
+ # Keep user-visible queued tasks even before worker updates task meta.
+ if task_info.get('task_name') not in {'process', 'forward', 'process_and_forward'}:
+ continue
+ if not task_info.get('index_name'):
+ continue
all_tasks.append(task_info)
logger.debug(f"Retrieved {len(all_tasks)} tasks.")
except Exception as e:
diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py
index 626e19007..a20b2a6ca 100644
--- a/backend/services/model_health_service.py
+++ b/backend/services/model_health_service.py
@@ -67,6 +67,9 @@ async def _perform_connectivity_check(
model_base_url: str,
model_api_key: str,
ssl_verify: bool = True,
+ model_factory: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
display_name: Optional[str] = None,
) -> bool:
"""
@@ -133,9 +136,34 @@ async def _perform_connectivity_check(
api_key=model_api_key,
ssl_verify=ssl_verify
).check_connectivity()
- elif model_type in ["tts", "stt"]:
+ elif model_type == 'stt':
voice_service = get_voice_service()
- connectivity = await voice_service.check_voice_connectivity(model_type)
+
+
+ # Determine STT provider based on model_factory
+ use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"]
+
+ if use_volc:
+ # Use Volcano STT with appid and access_token
+ connectivity = await voice_service.check_voice_connectivity(
+ model_type="stt",
+ stt_config={
+ "model_factory": model_factory,
+ "model_appid": model_appid,
+ "access_token": access_token,
+ "base_url": model_base_url
+ }
+ )
+ else:
+ # Use Ali STT (default) with api_key and model name
+ connectivity = await voice_service.check_voice_connectivity(
+ model_type="stt",
+ stt_config={
+ "api_key": model_api_key,
+ "base_url": model_base_url,
+ "model": model_name
+ }
+ )
else:
raise ValueError(f"Unsupported model type: {model_type}")
@@ -150,13 +178,10 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict:
raise LookupError(
f"Model configuration not found for {display_name}")
- # Still use repo/name concatenation for model instantiation
repo, name = model.get("model_repo", ""), model.get("model_name", "")
model_name = f"{repo}/{name}" if repo else name
- # Set model to "detecting" status
- update_data = {
- "connect_status": ModelConnectStatusEnum.DETECTING.value}
+ update_data = {"connect_status": ModelConnectStatusEnum.DETECTING.value}
update_model_record(model["model_id"], update_data)
model_type = model["model_type"]
@@ -164,13 +189,16 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict:
model_api_key = model["api_key"]
# Default to True if not present
ssl_verify = model.get("ssl_verify", True)
+ model_factory = model.get("model_factory")
+ model_appid = model.get("model_appid")
+ access_token = model.get("access_token")
try:
set_monitoring_context(tenant_id=tenant_id)
connectivity = await _perform_connectivity_check(
model_name, model_type, model_base_url, model_api_key, ssl_verify,
- display_name=display_name,
+ model_factory, model_appid, access_token,display_name=display_name,
)
except Exception as e:
update_data = {
@@ -198,36 +226,38 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict:
update_data = {
"connect_status": ModelConnectStatusEnum.UNAVAILABLE.value}
update_model_record(model["model_id"], update_data)
- # Propagate for app layer to translate into HTTP
raise e
+
+
async def verify_model_config_connectivity(model_config: dict):
"""
- Verify the connectivity of the model configuration, do not save to the database
- Args:
- model_config: Model configuration dictionary, containing necessary connection parameters
- Returns:
- dict: Contains the result of the connectivity test and error message if failed
+ Verify the connectivity of the model configuration, do not save to the database.
"""
try:
model_name = model_config.get("model_name", "")
model_type = model_config["model_type"]
- model_base_url = model_config["base_url"]
+ model_base_url = model_config.get("base_url", "")
model_api_key = model_config["api_key"]
# Default to True if not present
ssl_verify = model_config.get("ssl_verify", True)
+ model_factory = model_config.get("model_factory")
+ model_appid = model_config.get("model_appid")
+ access_token = model_config.get("access_token")
try:
- # Use the common connectivity check function
connectivity = await _perform_connectivity_check(
- model_name, model_type, model_base_url, model_api_key, ssl_verify
+ model_name, model_type, model_base_url, model_api_key, ssl_verify,
+ model_factory, model_appid, access_token
)
if not connectivity and ssl_verify:
connectivity = await _perform_connectivity_check(
- model_name, model_type, model_base_url, model_api_key, False
+ model_name, model_type, model_base_url, model_api_key, False,
+ model_factory, model_appid, access_token
)
if not connectivity:
+ error_msg = f"Failed to connect to model '{model_name}' at {model_base_url}. Please verify the URL, API key, and network connection."
return {
"connectivity": False,
"model_name": model_name,
diff --git a/backend/services/providers/dashscope_provider.py b/backend/services/providers/dashscope_provider.py
index b9fb7ab7b..69096fb15 100644
--- a/backend/services/providers/dashscope_provider.py
+++ b/backend/services/providers/dashscope_provider.py
@@ -68,7 +68,7 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
# Extract key fields for logical determination (lowercased for robustness)
m_id = model_obj.get('model', '').lower()
desc = model_obj.get('description', '')
- metadata = model_obj.get('inference_metadata', {})
+ metadata = model_obj.get('inference_metadata') or {}
req_mod = metadata.get('request_modality', [])
res_mod = metadata.get('response_modality', [])
model_obj.setdefault("object", model_obj.get("object", "model"))
diff --git a/backend/services/redis_service.py b/backend/services/redis_service.py
index efd2c0a7b..dae617f60 100644
--- a/backend/services/redis_service.py
+++ b/backend/services/redis_service.py
@@ -1,6 +1,7 @@
import json
import logging
-from typing import Dict, Any, Optional
+import re
+from typing import Dict, Any, Optional, Tuple, Set
import redis
@@ -215,7 +216,7 @@ def delete_document_records(self, index_name: str, path_or_url: str) -> Dict[str
return result
- def _recursively_delete_task_and_parents(self, task_id: str) -> tuple[int, set]:
+ def _recursively_delete_task_and_parents(self, task_id: str) -> Tuple[int, Set[str]]:
"""
Iteratively delete a Celery task and all its parent tasks from Redis.
A single task chain is deleted, and the IDs of the deleted tasks are returned.
@@ -309,16 +310,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int:
# Check for failed tasks where metadata is in the exception message
if task_index_name is None and 'exc_message' in result:
- try:
- exc_str = str(result['exc_message'])
- if '{' in exc_str and '}' in exc_str:
- json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1]
- cleaned_json_part = json_part.replace('\\"', '"')
- error_data = json.loads(cleaned_json_part)
- task_index_name = error_data.get('index_name')
- except (json.JSONDecodeError, TypeError, IndexError) as e:
- key_str = key.decode('utf-8') if isinstance(key, bytes) else key
- logger.warning(f"Could not parse exception metadata for task key {key_str}: {e}")
+ error_data = self._extract_error_metadata_from_exc_message(
+ result.get("exc_message")
+ )
+ if error_data:
+ task_index_name = error_data.get('index_name')
if task_index_name == index_name:
key_str = key.decode('utf-8') if isinstance(key, bytes) else key
@@ -366,15 +362,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int:
)
if task_index_name is None and 'exc_message' in result:
- try:
- exc_str = str(result['exc_message'])
- if '{' in exc_str and '}' in exc_str:
- json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1]
- cleaned_json_part = json_part.replace('\\"', '"')
- error_data = json.loads(cleaned_json_part)
- task_index_name = error_data.get('index_name')
- except (json.JSONDecodeError, TypeError, IndexError):
- pass
+ error_data = self._extract_error_metadata_from_exc_message(
+ result.get("exc_message")
+ )
+ if error_data:
+ task_index_name = error_data.get('index_name')
if task_index_name == index_name:
key_str = key.decode('utf-8') if isinstance(key, bytes) else key
@@ -497,16 +489,12 @@ def _cleanup_document_celery_tasks(self, index_name: str, path_or_url: str) -> i
# Check for failed tasks where metadata is in the exception message
if task_index_name is None and 'exc_message' in result:
- try:
- exc_str = str(result['exc_message'])
- if '{' in exc_str and '}' in exc_str:
- json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1]
- cleaned_json_part = json_part.replace('\\"', '"')
- error_data = json.loads(cleaned_json_part)
- task_index_name = error_data.get('index_name')
- task_source = error_data.get('source') or error_data.get('path_or_url')
- except (json.JSONDecodeError, TypeError, IndexError) as e:
- logger.warning(f"Could not parse exception metadata for task {task_id}: {e}")
+ error_data = self._extract_error_metadata_from_exc_message(
+ result.get("exc_message")
+ )
+ if error_data:
+ task_index_name = error_data.get('index_name')
+ task_source = error_data.get('source') or error_data.get('path_or_url')
# Match both index name and document path/source
if task_index_name == index_name and task_source == path_or_url:
@@ -728,6 +716,122 @@ def save_progress_info(self, task_id: str, processed_chunks: int, total_chunks:
logger.error(f"Failed to save progress info for task {task_id}: {str(e)}")
return False
+ def increment_progress_info(self, task_id: str, delta_processed: int, total_chunks: Optional[int] = None, ttl_hours: int = 24) -> bool:
+ """
+ Atomically increment processed chunks for a task.
+ """
+ if not task_id:
+ logger.error("Cannot increment progress info: task_id is empty")
+ return False
+ if delta_processed <= 0:
+ return True
+
+ progress_key = f"progress:{task_id}"
+ ttl_seconds = ttl_hours * 3600
+ max_retries = 5
+
+ for attempt in range(max_retries):
+ pipe = self.client.pipeline()
+ try:
+ pipe.watch(progress_key)
+ raw = pipe.get(progress_key)
+ current_processed, current_total = self._parse_progress(raw, total_chunks)
+ new_processed, current_total = self._compute_next_progress(
+ current_processed=current_processed,
+ delta_processed=delta_processed,
+ current_total=current_total,
+ total_chunks=total_chunks,
+ )
+
+ payload = json.dumps({
+ "processed_chunks": new_processed,
+ "total_chunks": current_total,
+ })
+
+ pipe.multi()
+ pipe.setex(progress_key, ttl_seconds, payload)
+ pipe.execute()
+ logger.info(
+ f"[REDIS PROGRESS] Incremented progress for task {task_id}: "
+ f"+{delta_processed}, now {new_processed}/{current_total}"
+ )
+ return True
+ except redis.WatchError:
+ continue
+ except Exception as exc:
+ logger.warning(f"Failed to increment progress for task {task_id}: {exc}")
+ return False
+ finally:
+ pipe.reset()
+
+ logger.warning(f"Failed to increment progress for task {task_id}: too many concurrent updates")
+ return False
+
+ def _parse_progress(self, raw: Any, total_chunks: Optional[int]) -> Tuple[int, int]:
+ """
+ Parse persisted progress payload from Redis with tolerant fallback.
+ """
+ default_total = int(total_chunks or 0)
+ if not raw:
+ return 0, default_total
+
+ if isinstance(raw, bytes):
+ raw = raw.decode("utf-8")
+
+ try:
+ data = json.loads(raw)
+ processed = int(data.get("processed_chunks", 0) or 0)
+ total = default_total if total_chunks else int(data.get("total_chunks", 0) or 0)
+ return processed, total
+ except Exception:
+ return 0, default_total
+
+ def _compute_next_progress(
+ self,
+ current_processed: int,
+ delta_processed: int,
+ current_total: int,
+ total_chunks: Optional[int],
+ ) -> Tuple[int, int]:
+ """
+ Compute new processed/total values, clamping to known total when available.
+ """
+ next_processed = current_processed + int(delta_processed)
+ next_total = int(current_total or 0)
+
+ if next_total <= 0 and total_chunks:
+ next_total = int(total_chunks)
+
+ if next_total > 0:
+ next_processed = min(next_processed, next_total)
+
+ return next_processed, next_total
+
+ def _extract_error_metadata_from_exc_message(self, exc_message: Any) -> Optional[Dict[str, Any]]:
+ """
+ Try to parse embedded JSON metadata from exception message with tolerant escaping.
+ """
+ try:
+ exc_str = str(exc_message or "")
+ if "{" not in exc_str or "}" not in exc_str:
+ return None
+ json_part = exc_str[exc_str.find("{"): exc_str.rfind("}") + 1]
+ candidates = [
+ json_part,
+ json_part.replace('\\"', '"'),
+ re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', json_part),
+ ]
+ for candidate in candidates:
+ try:
+ parsed = json.loads(candidate)
+ if isinstance(parsed, dict):
+ return parsed
+ except Exception:
+ continue
+ return None
+ except Exception:
+ return None
+
def get_progress_info(self, task_id: str) -> Optional[Dict[str, int]]:
"""
Get progress information for a specific task
diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py
index 88edfba17..5e5229ff6 100644
--- a/backend/services/tool_configuration_service.py
+++ b/backend/services/tool_configuration_service.py
@@ -15,7 +15,6 @@
from consts.const import DATA_PROCESS_SERVICE, LOCAL_MCP_SERVER, MCP_MANAGEMENT_API
from consts.exceptions import MCPConnectionError, NotFoundException, ToolExecutionException
from consts.model import ToolInstanceInfoRequest, ToolInfo, ToolSourceEnum, ToolValidateRequest
-from database.client import minio_client
from database.outer_api_tool_db import (
upsert_openapi_service,
query_openapi_services_by_tenant,
@@ -37,11 +36,11 @@
from database.knowledge_db import get_knowledge_name_map_by_index_names
from mcpadapt.smolagents_adapter import _sanitize_function_name
from services.file_management_service import get_llm_model, validate_urls_access
-from services.vectordatabase_service import get_embedding_model, get_rerank_model, get_vector_db_core
+from services.vectordatabase_service import get_embedding_model_by_index_name, get_rerank_model
from database.client import minio_client
from services.image_service import get_vlm_model
from nexent.monitor import set_monitoring_context, set_monitoring_operation
-from services.vectordatabase_service import get_embedding_model, get_vector_db_core
+from services.vectordatabase_service import get_vector_db_core
from utils.langchain_utils import discover_langchain_modules
from utils.tool_utils import get_local_tools_classes, get_local_tools_description_zh
@@ -704,7 +703,19 @@ def _validate_local_tool(
instantiation_params[param_name] = param.default
if tool_name == "knowledge_base_search":
- embedding_model = get_embedding_model(tenant_id=tenant_id)
+ index_names = instantiation_params.get("index_names", [])
+
+ # Must have embedding model for knowledge base search
+ if not index_names or not tenant_id:
+ raise ToolExecutionException(
+ "Embedding model is required for knowledge_base_search but index_names or tenant_id is missing")
+
+ embedding_model, model_id, _ = get_embedding_model_by_index_name(tenant_id, index_names[0])
+ if not embedding_model:
+ raise ToolExecutionException(
+ f"No embedding model found for index '{index_names[0]}'. "
+ f"Please configure an embedding model for this knowledge base.")
+
vdb_core = get_vector_db_core()
# Get rerank configuration
@@ -715,7 +726,6 @@ def _validate_local_tool(
rerank_model = get_rerank_model(tenant_id=tenant_id, model_name=rerank_model_name)
# Build display_name to index_name mapping for LLM parameter conversion
- index_names = instantiation_params.get("index_names", [])
display_name_to_index_map = {}
if index_names:
knowledge_name_map = get_knowledge_name_map_by_index_names(index_names)
diff --git a/backend/services/vectordatabase_service.py b/backend/services/vectordatabase_service.py
index 5639103de..8ad9b54e2 100644
--- a/backend/services/vectordatabase_service.py
+++ b/backend/services/vectordatabase_service.py
@@ -36,11 +36,14 @@
update_knowledge_record,
get_knowledge_info_by_tenant_id,
update_model_name_by_index_name,
+ update_last_doc_update_time,
+ update_last_summary_time,
+ update_embedding_model_by_index_name,
)
from utils.str_utils import convert_list_to_string
from database.user_tenant_db import get_user_tenant_by_user_id
from database.group_db import query_group_ids_by_user
-from database.model_management_db import get_model_records
+from database.model_management_db import get_model_by_display_name, get_model_by_model_id, get_model_records
from services.redis_service import get_redis_service
from services.group_service import get_tenant_default_group_id
from utils.config_utils import tenant_config_manager, get_model_name_from_config
@@ -76,6 +79,111 @@ def _update_progress(task_id: str, processed: int, total: int):
f"[PROGRESS CALLBACK] Exception updating progress for task {task_id}: {str(e)}")
+def _get_embedding_model_display_name(model_id: Optional[int], tenant_id: str) -> str:
+ """
+ Get embedding model display_name from model_id.
+
+ Args:
+ model_id: The model ID to look up
+ tenant_id: Tenant ID for the lookup
+
+ Returns:
+ The model's display_name if found, empty string otherwise
+ """
+ if model_id is None:
+ return ""
+ try:
+ model = get_model_by_model_id(model_id, tenant_id)
+ if model:
+ return model.get("display_name", "")
+ except Exception as e:
+ logger.warning(f"Failed to get display_name for model_id {model_id}: {e}")
+ return ""
+
+
+class KnowledgeBaseNeedsModelConfigError(Exception):
+ """Exception raised when a knowledge base needs an embedding model to be configured."""
+ def __init__(self, index_name: str, message: str = None):
+ self.index_name = index_name
+ self.message = message or f"Knowledge base '{index_name}' needs an embedding model to be configured"
+ super().__init__(self.message)
+
+
+def get_embedding_model_by_index_name(tenant_id: str, index_name: str) -> tuple[Optional[Any], Optional[int], dict]:
+ """
+ Get the embedding model for a knowledge base by its index_name.
+
+ Args:
+ tenant_id: Tenant ID
+ index_name: The index name of the knowledge base
+
+ Returns:
+ Tuple of (embedding model instance or None, model_id or None, metadata dict)
+ metadata contains: {
+ "status": str, # "ok" | "needs_config" | "error"
+ "needs_update": bool, # Whether the database needs to be updated
+ "update_info": dict, # Fields to update if needs_update is True
+ "message": str # Status message
+ }
+
+ Design principles:
+ - Force explicit configuration: model_id must be explicitly set by user
+ - No auto-fix: never automatically use tenant default model
+ - Clear error guidance: return needs_config status for user action
+ """
+ try:
+ knowledge_record = get_knowledge_record({
+ "index_name": index_name,
+ "tenant_id": tenant_id
+ })
+
+ if not knowledge_record:
+ return None, None, {
+ "status": "error",
+ "needs_update": False,
+ "message": f"Knowledge base '{index_name}' not found"
+ }
+
+ model_id = knowledge_record.get("embedding_model_id")
+
+ # Case 1: model_id exists and is valid, use it
+ if model_id:
+ model, _ = get_embedding_model_by_id(tenant_id, model_id)
+ if model:
+ return model, model_id, {
+ "status": "ok",
+ "needs_update": False,
+ "message": "Embedding model found"
+ }
+ # Model ID exists but model not found - fall through to error
+ logger.warning(f"Model ID {model_id} specified for index '{index_name}' but model not found")
+
+ # Case 2: model_id does not exist or is invalid
+ # Design principle: Force explicit configuration, no auto-fix
+ # Return needs_config to guide user to select a model
+ embedding_model_name = knowledge_record.get("embedding_model_name")
+ if embedding_model_name:
+ # Has model_name but no valid model_id (legacy data)
+ logger.warning(f"Index '{index_name}' has embedding_model_name but no valid model_id, needs explicit configuration")
+ else:
+ # No model configured at all
+ logger.error(f"Index '{index_name}' has no embedding model configured")
+
+ return None, None, {
+ "status": "needs_config",
+ "needs_update": False,
+ "message": f"No embedding model configured for knowledge base '{index_name}'. Please select a model."
+ }
+
+ except Exception as e:
+ logger.warning(f"Failed to get embedding model for index {index_name}: {e}")
+ return None, None, {
+ "status": "error",
+ "needs_update": False,
+ "message": str(e)
+ }
+
+
ALLOWED_CHUNK_FIELDS = {
"id",
"title",
@@ -176,70 +284,105 @@ def check_knowledge_base_exist_impl(knowledge_name: str, vdb_core: VectorDatabas
return {"status": "available"}
-def get_embedding_model(tenant_id: str, model_name: Optional[str] = None):
+def get_embedding_model(tenant_id: str, model_name: Optional[str] = None) -> tuple[Optional[Any], Optional[int]]:
"""
Get the embedding model for the tenant, optionally using a specific model name.
Args:
tenant_id: Tenant ID
- model_name: Optional specific model name to use (format: "model_repo/model_name" or just "model_name")
- If provided, will try to find the model in the tenant's model list.
+ model_name: Optional display name of the embedding model to use.
+ If provided, will find the model by display_name in the tenant's model list.
Returns:
- Embedding model instance or None
+ Tuple of (embedding model instance or None, model_id or None)
"""
- # If model_name is provided, try to find it in the tenant's models
+ # If model_name is provided, find the model by display_name
if model_name:
try:
- models = get_model_records({"model_type": "embedding"}, tenant_id)
- for model in models:
- model_display_name = model.get("model_repo") + "/" + model["model_name"] if model.get("model_repo") else model["model_name"]
- if model_display_name == model_name:
- # Found the model, create embedding instance
- model_config = {
- "model_repo": model.get("model_repo", ""),
- "model_name": model["model_name"],
- "api_key": model.get("api_key", ""),
- "base_url": model.get("base_url", ""),
- "model_type": "embedding",
- "max_tokens": model.get("max_tokens", 1024),
- "ssl_verify": model.get("ssl_verify", True),
- }
- return OpenAICompatibleEmbedding(
+ model = get_model_by_display_name(model_name, tenant_id)
+ if model and model.get("model_type") in ["embedding", "multi_embedding"]:
+ model_config = {
+ "model_repo": model.get("model_repo", ""),
+ "model_name": model["model_name"],
+ "api_key": model.get("api_key", ""),
+ "base_url": model.get("base_url", ""),
+ "model_type": model.get("model_type", "embedding"),
+ "max_tokens": model.get("max_tokens", 1024),
+ "ssl_verify": model.get("ssl_verify", True),
+ }
+ model_type = model.get("model_type", "embedding")
+ if model_type == "multi_embedding":
+ embedding_model = JinaEmbedding(
+ api_key=model_config.get("api_key", ""),
+ base_url=model_config.get("base_url", ""),
+ model_name=get_model_name_from_config(model_config) or "",
+ embedding_dim=model_config.get("max_tokens", 1024),
+ ssl_verify=model_config.get("ssl_verify", True),
+ )
+ else:
+ embedding_model = OpenAICompatibleEmbedding(
api_key=model_config.get("api_key", ""),
base_url=model_config.get("base_url", ""),
model_name=get_model_name_from_config(model_config) or "",
embedding_dim=model_config.get("max_tokens", 1024),
ssl_verify=model_config.get("ssl_verify", True),
)
+ return embedding_model, model.get("model_id")
+ else:
+ logger.warning(f"Model '{model_name}' not found or is not an embedding model")
except Exception as e:
logger.warning(f"Failed to get embedding model by name {model_name}: {e}")
- # Fall back to default embedding model (current behavior)
- model_config = tenant_config_manager.get_model_config(
- key="EMBEDDING_ID", tenant_id=tenant_id)
+ # No default fallback - return None, None when no model is specified or found
+ return None, None
- model_type = model_config.get("model_type", "")
- if model_type == "embedding":
- # Get the es core
- return OpenAICompatibleEmbedding(
- api_key=model_config.get("api_key", ""),
- base_url=model_config.get("base_url", ""),
- model_name=get_model_name_from_config(model_config) or "",
- embedding_dim=model_config.get("max_tokens", 1024),
- ssl_verify=model_config.get("ssl_verify", True),
- )
- elif model_type == "multi_embedding":
- return JinaEmbedding(
- api_key=model_config.get("api_key", ""),
- base_url=model_config.get("base_url", ""),
- model_name=get_model_name_from_config(model_config) or "",
- embedding_dim=model_config.get("max_tokens", 1024),
- ssl_verify=model_config.get("ssl_verify", True),
- )
- else:
- return None
+def get_embedding_model_by_id(tenant_id: str, model_id: int) -> tuple[Optional[Any], Optional[int]]:
+ """
+ Get the embedding model by model_id.
+
+ Args:
+ tenant_id: Tenant ID
+ model_id: Model ID to query
+
+ Returns:
+ Tuple of (embedding model instance or None, model_id or None)
+ """
+ try:
+ model = get_model_by_model_id(model_id, tenant_id)
+ if model and model.get("model_type") in ["embedding", "multi_embedding"]:
+ model_config = {
+ "model_repo": model.get("model_repo", ""),
+ "model_name": model["model_name"],
+ "api_key": model.get("api_key", ""),
+ "base_url": model.get("base_url", ""),
+ "model_type": model.get("model_type", "embedding"),
+ "max_tokens": model.get("max_tokens", 1024),
+ "ssl_verify": model.get("ssl_verify", True),
+ }
+ model_type = model.get("model_type", "embedding")
+ if model_type == "multi_embedding":
+ embedding_model = JinaEmbedding(
+ api_key=model_config.get("api_key", ""),
+ base_url=model_config.get("base_url", ""),
+ model_name=get_model_name_from_config(model_config) or "",
+ embedding_dim=model_config.get("max_tokens", 1024),
+ ssl_verify=model_config.get("ssl_verify", True),
+ )
+ else:
+ embedding_model = OpenAICompatibleEmbedding(
+ api_key=model_config.get("api_key", ""),
+ base_url=model_config.get("base_url", ""),
+ model_name=get_model_name_from_config(model_config) or "",
+ embedding_dim=model_config.get("max_tokens", 1024),
+ ssl_verify=model_config.get("ssl_verify", True),
+ )
+ return embedding_model, model.get("model_id")
+ else:
+ logger.warning(f"Model with id {model_id} not found or is not an embedding model")
+ except Exception as e:
+ logger.warning(f"Failed to get embedding model by id {model_id}: {e}")
+ return None, None
def get_rerank_model(tenant_id: str, model_name: Optional[str] = None):
@@ -415,11 +558,19 @@ def create_index(
None, description="ID of the user creating the knowledge base"),
tenant_id: Optional[str] = Body(
None, description="ID of the tenant creating the knowledge base"),
+ model_id: Optional[int] = Body(
+ None, description="ID of the embedding model to use"),
):
try:
if vdb_core.check_index_exists(index_name):
raise Exception(f"Index {index_name} already exists")
- embedding_model = get_embedding_model(tenant_id)
+
+ # Get embedding model by model_id if provided
+ if model_id:
+ embedding_model, actual_model_id = get_embedding_model_by_id(tenant_id, model_id)
+ else:
+ embedding_model, actual_model_id = None, None
+
success = vdb_core.create_index(index_name, embedding_dim=embedding_dim or (
embedding_model.embedding_dim if embedding_model else 1024))
if not success:
@@ -427,7 +578,8 @@ def create_index(
knowledge_data = {"index_name": index_name,
"created_by": user_id,
"tenant_id": tenant_id,
- "embedding_model_name": embedding_model.model}
+ "embedding_model_name": embedding_model.model if embedding_model else None,
+ "embedding_model_id": actual_model_id}
create_knowledge_record(knowledge_data)
return {"status": "success", "message": f"Index {index_name} created successfully"}
except Exception as e:
@@ -468,7 +620,7 @@ def create_knowledge_base(
"""
try:
# Get embedding model - use user-selected model if provided, otherwise use tenant default
- embedding_model = get_embedding_model(tenant_id, embedding_model_name)
+ embedding_model, model_id = get_embedding_model(tenant_id, embedding_model_name)
# Determine the embedding model name to save: use user-provided name if available,
# otherwise use the model's display name
@@ -483,6 +635,7 @@ def create_knowledge_base(
"user_id": user_id,
"tenant_id": tenant_id,
"embedding_model_name": saved_embedding_model_name,
+ "embedding_model_id": model_id,
}
# Add group permission and group IDs if provided
@@ -570,6 +723,77 @@ def update_knowledge_base(
return result
+ @staticmethod
+ def update_embedding_model(
+ index_name: str,
+ model_id: int,
+ tenant_id: str,
+ user_id: Optional[str] = None,
+ ) -> Dict[str, Any]:
+ """
+ Update the embedding model for a knowledge base.
+
+ Args:
+ index_name: Internal index name of the knowledge base
+ model_id: ID of the embedding model to use
+ tenant_id: Tenant ID
+ user_id: ID of the user making the update
+
+ Returns:
+ Dict containing update result information
+
+ Raises:
+ ValueError: If model is not found or is not an embedding model
+ Exception: If update fails
+ """
+ try:
+ # Validate the model exists and is an embedding model
+ model = get_model_by_model_id(model_id, tenant_id)
+ if not model:
+ raise ValueError(f"Model with id {model_id} not found")
+
+ if model.get("model_type") not in ["embedding", "multi_embedding"]:
+ raise ValueError(
+ f"Model '{model.get('display_name', model_id)}' is not an embedding model. "
+ f"Please select an embedding model."
+ )
+
+ # Update the database record
+ # Use display_name as embedding_model_name
+ embedding_model_name = model.get("display_name")
+ success = update_embedding_model_by_index_name(
+ index_name=index_name,
+ embedding_model_id=model_id,
+ embedding_model_name=embedding_model_name,
+ tenant_id=tenant_id,
+ user_id=user_id or ""
+ )
+
+ if not success:
+ raise Exception(f"Failed to update embedding model for index '{index_name}'")
+
+ logger.info(
+ f"Embedding model updated for knowledge base '{index_name}' "
+ f"to model '{model.get('display_name', model_id)}' (id: {model_id}) by user '{user_id}'"
+ )
+
+ # Use display_name for consistency with database update
+ model_display_name = model.get("display_name")
+ return {
+ "status": "success",
+ "index_name": index_name,
+ "model_id": model_id,
+ "model_name": model_display_name,
+ "model_display_name": model.get("display_name"),
+ "message": f"Embedding model updated successfully to '{model_display_name}'"
+ }
+
+ except ValueError:
+ raise
+ except Exception as e:
+ logger.error(f"Failed to update embedding model for index '{index_name}': {e}")
+ raise Exception(f"Failed to update embedding model: {str(e)}")
+
@staticmethod
async def delete_index(
index_name: str = Path(...,
@@ -774,6 +998,11 @@ def list_indices(
index_name = record["index_name"]
index_stats = indice_stats.get(index_name, {})
+ # Get embedding model display_name from model_id
+ model_id = record.get("embedding_model_id")
+ tenant_id = record.get("tenant_id") or target_tenant_id
+ embedding_model_display_name = _get_embedding_model_display_name(model_id, tenant_id)
+
stats_info.append({
# Internal index name (used as ID)
"name": index_name,
@@ -785,8 +1014,14 @@ def list_indices(
"knowledge_sources": record["knowledge_sources"],
"ingroup_permission": record["ingroup_permission"],
"tenant_id": record.get("tenant_id"),
+ # Embedding model info: display_name from model_id
+ "embedding_model_name": embedding_model_display_name or record.get("embedding_model_name", ""),
+ "embedding_model_id": model_id,
# Update time for sorting and display
"update_time": record.get("update_time"),
+ # Auto-summary settings
+ "summary_frequency": record.get("summary_frequency"),
+ "last_summary_time": record.get("last_summary_time"),
"stats": index_stats,
})
@@ -812,6 +1047,9 @@ def index_documents(
] = Body(..., description="Document List to process"),
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
task_id: Optional[str] = None,
+ model_id: Optional[int] = Body(
+ None, description="ID of the embedding model to use"),
+ large_mode: bool = False,
):
"""
Index documents and create vector embeddings, create index if it doesn't exist
@@ -821,6 +1059,8 @@ def index_documents(
index_name: Index name
data: List containing document data to be indexed
vdb_core: VectorDatabaseCore instance
+ task_id: Optional task ID for progress tracking
+ model_id: Optional model ID for the embedding model
Returns:
IndexingResponse object containing indexing result information
@@ -833,7 +1073,7 @@ def index_documents(
if not vdb_core.check_index_exists(index_name):
try:
ElasticSearchService.create_index(
- index_name, vdb_core=vdb_core)
+ index_name, vdb_core=vdb_core, model_id=model_id)
logger.info(f"Created new index {index_name}")
except Exception as create_error:
raise Exception(
@@ -939,6 +1179,7 @@ def index_documents(
embedding_model=embedding_model,
documents=documents,
embedding_batch_size=embedding_batch_size,
+ large_mode=large_mode,
progress_callback=lambda processed, total: _update_progress(
task_id, processed, total) if task_id else None
)
@@ -959,6 +1200,9 @@ def index_documents(
logger.warning(
f"[REDIS PROGRESS] Exception updating final progress for task {task_id}: {str(e)}")
+ # Update last_doc_update_time for auto-summary tracking
+ update_last_doc_update_time(index_name)
+
return {
"success": True,
"message": f"Successfully indexed {total_indexed} documents",
@@ -1228,6 +1472,10 @@ def delete_documents(
index_name, path_or_url)
# 2. Delete MinIO file
minio_result = delete_file(path_or_url)
+
+ # Update last_doc_update_time for auto-summary tracking
+ update_last_doc_update_time(index_name)
+
return {"status": "success", "deleted_es_count": deleted_count, "deleted_minio": minio_result.get("success")}
@staticmethod
@@ -1450,6 +1698,8 @@ def change_summary(
"index_name": index_name
}
update_knowledge_record(update_data)
+ # Update last_summary_time for auto-summary tracking
+ update_last_summary_time(index_name)
return {"status": "success", "message": f"Index {index_name} summary updated successfully",
"summary": summary_result}
except Exception as e:
@@ -1550,23 +1800,23 @@ def create_chunk(
Automatically generates and stores embedding for semantic search.
"""
try:
- # Get knowledge base's embedding model name
- embedding_model_name = None
+ # Get knowledge base's embedding model by model_id
+ embedding_model_id = None
if tenant_id:
try:
knowledge_record = get_knowledge_record({
"index_name": index_name,
"tenant_id": tenant_id
})
- embedding_model_name = knowledge_record.get("embedding_model_name") if knowledge_record else None
+ embedding_model_id = knowledge_record.get("embedding_model_id") if knowledge_record else None
except Exception as e:
- logger.warning(f"Failed to get embedding model name for index {index_name}: {e}")
+ logger.warning(f"Failed to get embedding model id for index {index_name}: {e}")
# Generate embedding if we have content and can get embedding model
embedding_vector = None
if chunk_request.content:
try:
- embedding_model = get_embedding_model(tenant_id, embedding_model_name) if tenant_id else None
+ embedding_model = get_embedding_model_by_id(tenant_id, embedding_model_id)[0] if tenant_id and embedding_model_id else None
if embedding_model:
embeddings = embedding_model.get_embeddings(chunk_request.content)
if embeddings and len(embeddings) > 0:
@@ -1596,8 +1846,8 @@ def create_chunk(
# Add embedding if generated
if embedding_vector:
chunk_payload["embedding"] = embedding_vector
- if embedding_model_name:
- chunk_payload["embedding_model_name"] = embedding_model_name
+ if embedding_model_id:
+ chunk_payload["embedding_model_id"] = embedding_model_id
result = vdb_core.create_chunk(index_name, chunk_payload)
return {
@@ -1700,10 +1950,23 @@ def search_hybrid(
if weight_accurate < 0 or weight_accurate > 1:
raise ValueError("weight_accurate must be between 0 and 1")
- embedding_model = get_embedding_model(tenant_id)
+ # Get embedding model from the first index's knowledge base record
+ if not index_names:
+ raise ValueError("At least one index name is required")
+
+ embedding_model, model_id, meta = get_embedding_model_by_index_name(tenant_id, index_names[0])
+
if not embedding_model:
- raise ValueError(
- "No embedding model configured for the current tenant")
+ if meta.get("status") == "needs_config":
+ # Return a clear error indicating model needs to be configured
+ raise KnowledgeBaseNeedsModelConfigError(
+ index_name=index_names[0],
+ message=f"Knowledge base '{index_names[0]}' does not have an embedding model configured. Please select a model in the knowledge base settings."
+ )
+ else:
+ raise ValueError(
+ f"No embedding model found for index '{index_names[0]}'. "
+ f"Please configure an embedding model for this knowledge base.")
start_time = time.perf_counter()
raw_results = vdb_core.hybrid_search(
@@ -1729,6 +1992,8 @@ def search_hybrid(
"total": len(formatted_results),
"query_time_ms": elapsed_ms,
}
+ except KnowledgeBaseNeedsModelConfigError:
+ raise
except ValueError:
raise
except Exception as exc:
diff --git a/backend/services/voice_service.py b/backend/services/voice_service.py
index 05dba6231..80d6264db 100644
--- a/backend/services/voice_service.py
+++ b/backend/services/voice_service.py
@@ -1,147 +1,219 @@
-import asyncio
import logging
-from typing import Any, Optional
+from typing import Any, Dict, Optional
-from nexent.core.models.stt_model import STTConfig, STTModel
-from nexent.core.models.tts_model import TTSConfig, TTSModel
+from nexent.core.models.stt_model import BaseSTTModel
+from nexent.core.models.volc_stt_model import VolcSTTConfig, VolcSTTModel
+from nexent.core.models.ali_stt_model import AliSTTConfig, AliSTTModel
-from consts.const import APPID, CLUSTER, SPEED_RATIO, TEST_VOICE_PATH, TOKEN, VOICE_TYPE
+from consts.const import TEST_PCM_PATH
from consts.exceptions import (
VoiceServiceException,
STTConnectionException,
- TTSConnectionException,
- VoiceConfigException
)
+from database.model_management_db import get_model_records
+from utils.config_utils import tenant_config_manager
logger = logging.getLogger("voice_service")
class VoiceService:
- """Voice service that handles STT and TTS operations"""
-
- def __init__(self):
- """Initialize the voice service with configurations from const.py"""
- try:
- # Initialize STT configuration
- self.stt_config = STTConfig(
- appid=APPID,
- token=TOKEN
- )
-
- # Initialize TTS configuration
- self.tts_config = TTSConfig(
- appid=APPID,
- token=TOKEN,
- cluster=CLUSTER,
- voice_type=VOICE_TYPE,
- speed_ratio=SPEED_RATIO
- )
-
- # Initialize models
- self.stt_model = STTModel(self.stt_config, TEST_VOICE_PATH)
- self.tts_model = TTSModel(self.tts_config)
-
- except Exception as e:
- logger.error(f"Failed to initialize voice service: {str(e)}")
- raise VoiceConfigException(f"Voice service initialization failed: {str(e)}") from e
-
- async def start_stt_streaming_session(self, websocket) -> None:
+ """Voice service that handles STT operations"""
+
+ def _get_stt_model_from_config(
+ self,
+ model_factory: Optional[str] = None,
+ model_name: Optional[str] = None,
+ api_key: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
+ base_url: Optional[str] = None,
+ language: str = "zh"
+ ) -> BaseSTTModel:
"""
- Start STT streaming session
+ Get the appropriate STT model based on model factory configuration.
Args:
- websocket: WebSocket connection for real-time audio streaming
+ model_factory: Model factory/vendor name
+ model_name: Model name
+ api_key: API key (for Ali STT)
+ model_appid: Application ID (for Volcano STT)
+ access_token: Access token (for Volcano STT)
+ base_url: Custom WebSocket URL (optional)
+ language: Language for speech recognition
- Raises:
- STTConnectionException: If STT streaming fails
+ Returns:
+ STT model instance based on configuration
"""
- try:
- logger.info("Starting STT streaming session")
- await self.stt_model.start_streaming_session(websocket)
- except Exception as e:
- logger.error(f"STT streaming session failed: {str(e)}")
- raise STTConnectionException(f"STT streaming failed: {str(e)}") from e
+ use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"]
+
+ if use_volc:
+ volc_config = VolcSTTConfig(
+ appid=model_appid or "",
+ access_token=access_token or "",
+ ws_url=base_url if base_url else "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
+ format="pcm",
+ rate=16000
+ )
+ return VolcSTTModel(volc_config, TEST_PCM_PATH)
+ else:
+ ali_config = AliSTTConfig(
+ api_key=api_key or "",
+ model=model_name or "qwen3-asr-flash-realtime",
+ language=language,
+ ws_url=base_url if base_url else None,
+ format="pcm",
+ rate=16000,
+ enable_vad=True,
+ timeout=5
+ )
+ return AliSTTModel(ali_config, TEST_PCM_PATH)
- async def generate_tts_speech(self, text: str, stream: bool = True) -> Any:
+ def _get_stt_model_from_tenant_config(
+ self,
+ tenant_id: str,
+ language: str = "zh"
+ ) -> BaseSTTModel:
"""
- Generate TTS speech from text
+ Get STT model based on tenant's model configuration.
Args:
- text: Text to convert to speech
- stream: Whether to stream the audio or return complete audio
+ tenant_id: Tenant ID
+ language: Language for speech recognition
Returns:
- Audio data (streaming or complete)
-
- Raises:
- TTSConnectionException: If TTS generation fails
+ STT model instance based on tenant's configuration
"""
- if not text:
- raise VoiceServiceException("No text provided for TTS generation")
-
try:
- logger.info(f"Generating TTS speech for text: {text[:50]}...")
- speech_result = await self.tts_model.generate_speech(text, stream=stream)
- return speech_result
- except Exception as e:
- logger.error(f"TTS generation failed: {str(e)}")
- raise TTSConnectionException(f"TTS generation failed: {str(e)}") from e
+ stt_config = tenant_config_manager.get_model_config(tenant_id, "stt")
+
+ if stt_config:
+ model_factory = stt_config.get("model_factory", "")
+ model_name = stt_config.get("model_name", "")
+ api_key = stt_config.get("api_key", "")
+ base_url = stt_config.get("base_url", "")
+ model_appid = stt_config.get("model_appid", "")
+ access_token_val = stt_config.get("access_token", "")
+
+ return self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model_name,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token_val,
+ base_url=base_url,
+ language=language
+ )
+
+ model_records = get_model_records({"model_type": "stt"}, tenant_id)
+ if model_records:
+ record = model_records[0]
+ model_factory = record.get("model_factory", "")
+ model_name = record.get("model_name", "")
+ api_key = record.get("api_key", "")
+ base_url = record.get("base_url", "")
+ model_appid = record.get("model_appid", "")
+ access_token_val = record.get("access_token", "")
+
+ return self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model_name,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token_val,
+ base_url=base_url,
+ language=language
+ )
+
+ logger.warning(f"No STT model configuration found for tenant {tenant_id}, using default config")
+ return self._get_stt_model_from_config(language=language)
- async def stream_tts_to_websocket(self, websocket, text: str) -> None:
+ except Exception as e:
+ logger.error(f"Error getting STT model config for tenant {tenant_id}: {str(e)}")
+ return self._get_stt_model_from_config(language=language)
+
+ async def start_stt_streaming_session(
+ self,
+ websocket,
+ stt_config: Optional[Dict[str, Any]] = None,
+ tenant_id: Optional[str] = None,
+ language: str = "zh"
+ ) -> None:
"""
- Stream TTS audio to WebSocket with proper error handling and fallback
+ Start STT streaming session.
Args:
- websocket: WebSocket connection to stream to
- text: Text to convert to speech
+ websocket: WebSocket connection for real-time audio streaming
+ stt_config: STT configuration dict from client (preferred)
+ tenant_id: Tenant ID for model lookup
+ language: Language for speech recognition (default: zh)
Raises:
- TTSConnectionException: If TTS service connection fails
- VoiceServiceException: If TTS streaming fails
+ STTConnectionException: If STT streaming fails
"""
try:
- # Generate and stream audio chunks
- speech_result = await self.generate_tts_speech(text, stream=True)
-
- # Check if it's an async iterator or a regular iterable
- if hasattr(speech_result, '__aiter__'):
- # It's an async iterator, use async for
- async for chunk in speech_result:
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(chunk)
- else:
- break
- elif hasattr(speech_result, '__iter__'):
- # It's a regular iterator, use normal for
- for chunk in speech_result:
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(chunk)
- else:
- break
+ model_factory = None
+ model_name = None
+ api_key = None
+ model_appid = None
+ access_token = None
+ base_url = None
+
+ if stt_config:
+ model_factory = stt_config.get("model_factory")
+ model_name = stt_config.get("model") or stt_config.get("model_name")
+ api_key = stt_config.get("api_key") or stt_config.get("apiKey")
+ model_appid = stt_config.get("model_appid") or stt_config.get("appid")
+ access_token = stt_config.get("access_token")
+ base_url = stt_config.get("base_url") or stt_config.get("baseUrl")
+ language = stt_config.get("language", language)
else:
- # It's a single chunk, send it directly
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(speech_result)
-
- await asyncio.sleep(0.1)
-
- except TypeError as te:
- # If speech_result is still a coroutine, try calling it directly without stream=True
- if "async for" in str(te) and "requires an object with __aiter__" in str(te):
- logger.error("Falling back to non-streaming TTS")
- speech_data = await self.generate_tts_speech(text, stream=False)
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(speech_data)
+ logger.warning("No stt_config provided, will use tenant model config if available")
+
+ if model_factory or api_key or model_appid:
+ stt_model = self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model_name,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ base_url=base_url,
+ language=language
+ )
+ elif tenant_id:
+ stt_model = self._get_stt_model_from_tenant_config(tenant_id, language)
else:
- raise
+ logger.warning("No tenant_id provided and no explicit config, using default Ali STT")
+ stt_model = self._get_stt_model_from_config(
+ api_key=api_key,
+ language=language
+ )
- # Send end marker after successful TTS generation
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_json({"status": "completed"})
+ await stt_model.start_streaming_session(websocket)
+ except Exception as e:
+ logger.error(f"STT streaming session failed: {str(e)}")
+ raise STTConnectionException(f"STT streaming failed: {str(e)}") from e
- async def check_stt_connectivity(self) -> bool:
+ async def check_stt_connectivity(
+ self,
+ model_factory: Optional[str] = None,
+ api_key: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
+ language: str = "zh",
+ model: str = "qwen3-asr-flash-realtime",
+ base_url: Optional[str] = None
+ ) -> bool:
"""
- Check STT service connectivity
+ Check STT service connectivity.
+
+ Args:
+ model_factory: Model factory/vendor name (e.g., "volc", "dashscope")
+ api_key: API key for Ali STT
+ model_appid: Application ID for Volcano STT
+ access_token: Access token for Volcano STT
+ language: Language for speech recognition (default: zh)
+ model: STT model name (default: qwen3-asr-flash-realtime)
+ base_url: Custom WebSocket URL (optional)
Returns:
bool: True if STT service is connected, False otherwise
@@ -150,8 +222,18 @@ async def check_stt_connectivity(self) -> bool:
STTConnectionException: If connectivity check fails
"""
try:
- logger.info(f"Checking STT connectivity with config: {self.stt_config}")
- connected = await self.stt_model.check_connectivity()
+ stt_model = self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ base_url=base_url,
+ language=language
+ )
+
+ connected = await stt_model.check_connectivity()
+
if not connected:
logger.error("STT service connection failed")
raise STTConnectionException("STT service connection failed")
@@ -162,53 +244,48 @@ async def check_stt_connectivity(self) -> bool:
logger.error(f"STT connectivity check failed: {str(e)}")
raise STTConnectionException(f"STT connectivity check failed: {str(e)}") from e
- async def check_tts_connectivity(self) -> bool:
+ async def check_voice_connectivity(
+ self,
+ model_type: str,
+ stt_config: Optional[Dict[str, Any]] = None
+ ) -> bool:
"""
- Check TTS service connectivity
-
- Returns:
- bool: True if TTS service is connected, False otherwise
-
- Raises:
- TTSConnectionException: If connectivity check fails
- """
- try:
- logger.info(f"Checking TTS connectivity with config: {self.tts_config}")
- connected = await self.tts_model.check_connectivity()
- if not connected:
- logger.error("TTS service connection failed")
- raise TTSConnectionException("TTS service connection failed")
- return connected
- except TTSConnectionException:
- raise
- except Exception as e:
- logger.error(f"TTS connectivity check failed: {str(e)}")
- raise TTSConnectionException(f"TTS connectivity check failed: {str(e)}") from e
-
- async def check_voice_connectivity(self, model_type: str) -> bool:
- """
- Check voice service connectivity based on model type
+ Check voice service connectivity based on model type.
Args:
- model_type: Type of model to check ('stt' or 'tts')
+ model_type: Type of model to check ('stt' only)
+ stt_config: Optional STT configuration dict
Returns:
- bool: True if the specified service is connected, False otherwise
+ bool: True if the service is connected, False otherwise
Raises:
VoiceServiceException: If model_type is invalid
STTConnectionException: If STT connectivity check fails
- TTSConnectionException: If TTS connectivity check fails
"""
+ if model_type != "stt":
+ logger.error(f"Unsupported model type: {model_type}")
+ raise VoiceServiceException(f"Unsupported model type: {model_type}")
+
try:
- if model_type == 'stt':
- return await self.check_stt_connectivity()
- elif model_type == 'tts':
- return await self.check_tts_connectivity()
- else:
- logger.error(f"Unknown model type: {model_type}")
- raise VoiceServiceException(f"Unknown model type: {model_type}")
- except (STTConnectionException, TTSConnectionException):
+ model_factory = stt_config.get("model_factory") if stt_config else None
+ api_key = stt_config.get("api_key") if stt_config else None
+ model_appid = stt_config.get("model_appid") if stt_config else None
+ access_token = stt_config.get("access_token") if stt_config else None
+ language = stt_config.get("language", "zh") if stt_config else "zh"
+ model = stt_config.get("model", "qwen3-asr-flash-realtime") if stt_config else "qwen3-asr-flash-realtime"
+ base_url = stt_config.get("base_url") if stt_config else None
+
+ return await self.check_stt_connectivity(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ language=language,
+ model=model,
+ base_url=base_url
+ )
+ except STTConnectionException:
raise
except Exception as e:
logger.error(f"Voice service connectivity check failed: {str(e)}")
@@ -220,12 +297,7 @@ async def check_voice_connectivity(self, model_type: str) -> bool:
def get_voice_service() -> VoiceService:
- """
- Get the global voice service instance
-
- Returns:
- VoiceService: The global voice service instance
- """
+ """Get the global voice service instance."""
global _voice_service_instance
if _voice_service_instance is None:
_voice_service_instance = VoiceService()
diff --git a/backend/utils/a2a_http_client.py b/backend/utils/a2a_http_client.py
index 2bc829403..8b7c55d9f 100644
--- a/backend/utils/a2a_http_client.py
+++ b/backend/utils/a2a_http_client.py
@@ -134,6 +134,7 @@ async def get_json(
"User-Agent": "Nexent-A2A-Client/1.0",
"Accept": CONTENT_TYPE_JSON,
"Connection": "close",
+ "A2A-Version": "1.0",
}
if headers:
request_headers.update(headers)
@@ -141,14 +142,24 @@ async def get_json(
logger.debug(f"A2A GET request: url={url}")
try:
- _, body = await self._request_with_retry(
+ status, body = await self._request_with_retry(
"GET",
url,
headers=request_headers
)
+ # Decode body and handle empty responses
+ body_text = body.decode('utf-8') if body else ""
+
+ if not body_text.strip():
+ logger.error(
+ f"A2A GET received empty response for {url}: HTTP status={status}. "
+ f"Expected JSON response but got empty body."
+ )
+ raise ValueError(f"Empty response from {url} (HTTP {status})")
+
# Parse JSON from body
import json
- data = json.loads(body.decode('utf-8'))
+ data = json.loads(body_text)
return data
except asyncio.TimeoutError as e:
logger.error(f"A2A GET timeout for {url}: {e}")
@@ -156,6 +167,9 @@ async def get_json(
except aiohttp.ClientResponseError as e:
logger.error(f"A2A GET HTTP error for {url}: {e.status}")
raise
+ except ValueError:
+ # Re-raise empty response errors without wrapping
+ raise
except Exception as e:
import traceback
logger.error(f"A2A GET request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
@@ -176,6 +190,7 @@ async def post_json(
"Content-Type": CONTENT_TYPE_JSON,
"Accept": CONTENT_TYPE_JSON,
"Connection": "close",
+ "A2A-Version": "1.0",
}
if headers:
request_headers.update(headers)
@@ -183,15 +198,29 @@ async def post_json(
logger.info(f"A2A POST request: url={url}, payload={payload}")
try:
- _, body = await self._request_with_retry(
+ status, body = await self._request_with_retry(
"POST",
url,
json=payload,
headers=request_headers
)
+ # Decode body and handle empty responses
+ body_text = body.decode('utf-8') if body else ""
+
+ if not body_text.strip():
+ logger.error(
+ f"A2A POST received empty response for {url}: HTTP status={status}. "
+ f"This usually indicates the remote agent is not responding correctly. "
+ f"Check that the agent URL '{url}' is correct and the agent is running."
+ )
+ raise ValueError(
+ f"Empty response from agent at {url} (HTTP {status}). "
+ f"The agent may be unreachable, still processing, or the endpoint URL is incorrect."
+ )
+
# Parse JSON from body
import json
- data = json.loads(body.decode('utf-8'))
+ data = json.loads(body_text)
return data
except asyncio.TimeoutError as e:
logger.error(f"A2A POST timeout for {url}: {e}")
@@ -199,6 +228,9 @@ async def post_json(
except aiohttp.ClientResponseError as e:
logger.error(f"A2A POST HTTP error for {url}: {e.status}")
raise
+ except ValueError:
+ # Re-raise empty response errors without wrapping
+ raise
except Exception as e:
import traceback
logger.error(f"A2A POST request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
@@ -249,6 +281,7 @@ def build_a2a_headers(api_key: Optional[str] = None) -> Dict[str, str]:
headers = {
"Content-Type": CONTENT_TYPE_JSON,
"Accept": CONTENT_TYPE_JSON,
+ "A2A-Version": "1.0",
}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
diff --git a/backend/utils/nacos_client.py b/backend/utils/nacos_client.py
new file mode 100644
index 000000000..0fa87410a
--- /dev/null
+++ b/backend/utils/nacos_client.py
@@ -0,0 +1,624 @@
+"""
+Nacos Client for service discovery.
+
+Provides functionality to query service instances from Nacos service registry.
+Used by A2A agent discovery to find external A2A agents registered in Nacos.
+"""
+import logging
+from typing import Any, Dict, Optional
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+
+class NacosClientError(Exception):
+ """Base exception for Nacos client errors."""
+ pass
+
+
+class NacosConnectionError(NacosClientError):
+ """Raised when connection to Nacos fails."""
+ pass
+
+
+class NacosServiceNotFoundError(NacosClientError):
+ """Raised when the requested service is not found in Nacos."""
+ pass
+
+
+class NacosClient:
+ """Async client for Nacos service registry operations.
+
+ Provides methods to query service instances for A2A agent discovery.
+ """
+
+ def __init__(
+ self,
+ nacos_addr: str,
+ username: Optional[str] = None,
+ password: Optional[str] = None
+ ):
+ """Initialize Nacos client.
+
+ Args:
+ nacos_addr: Nacos server address (e.g., http://nacos-server:8848).
+ username: Optional Nacos username for authentication.
+ password: Optional Nacos password for authentication.
+ """
+ self.nacos_addr = nacos_addr.rstrip("/")
+ self.username = username
+ self.password = password
+ self._session: Optional[aiohttp.ClientSession] = None
+ self._access_token: Optional[str] = None
+
+ async def _get_session(self) -> aiohttp.ClientSession:
+ """Get or create an aiohttp session."""
+ if self._session is None or self._session.closed:
+ timeout = aiohttp.ClientTimeout(total=30)
+ self._session = aiohttp.ClientSession(timeout=timeout)
+ return self._session
+
+ async def close(self) -> None:
+ """Close the client session."""
+ if self._session and not self._session.closed:
+ await self._session.close()
+ self._session = None
+
+ def _build_auth_params(self) -> Dict[str, str]:
+ """Build authentication parameters for Nacos API requests."""
+ params = {}
+ if self.username:
+ params["username"] = self.username
+ if self.password:
+ params["password"] = self.password
+ return params
+
+ async def query_a2a_agent(
+ self,
+ agent_name: str,
+ namespace: str = "public"
+ ) -> Optional[Dict[str, Any]]:
+ """Query A2A agent info from Nacos using the dedicated A2A endpoint.
+
+ Args:
+ agent_name: The name of the A2A agent to query.
+ namespace: Nacos namespace ID (defaults to "public").
+
+ Returns:
+ A dict containing agent information:
+ - agent_name: Agent name
+ - agent_url: A2A agent endpoint URL
+ - metadata: Additional metadata
+ Or None if no agent is found.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ params = self._build_auth_params()
+ agent_name = agent_name.strip()
+ params["agentName"] = agent_name
+ params["namespaceId"] = namespace.strip() if namespace else "public"
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ai/a2a"
+
+ try:
+ session = await self._get_session()
+ async with session.get(url, params=params) as response:
+ text = await response.text()
+
+ if response.status == 200:
+ data = await response.json()
+ return self._parse_a2a_response(data, agent_name)
+ elif response.status == 404:
+ logger.warning(
+ f"A2A agent '{agent_name}' not found in Nacos namespace '{namespace}'"
+ )
+ return None
+ else:
+ raise NacosConnectionError(
+ f"Nacos A2A API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}")
+ raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e
+
+ def _parse_a2a_response(
+ self,
+ response_data: Dict[str, Any],
+ agent_name: str
+ ) -> Optional[Dict[str, Any]]:
+ """Parse Nacos A2A agent response.
+
+ Args:
+ response_data: Response data from Nacos A2A API.
+ agent_name: Agent name for logging.
+
+ Returns:
+ Agent info dict or None if no agent found.
+ """
+ if response_data.get("code") != 0:
+ msg = response_data.get("message", "unknown error")
+ logger.warning(f"Nacos A2A API error for '{agent_name}': {msg}")
+ return None
+
+ data = response_data.get("data")
+ if not data:
+ logger.info(f"No A2A agent data found for '{agent_name}'")
+ return None
+
+ logger.info(f"[Nacos A2A Parse] Found agent: {data}")
+ return data
+
+ async def query_service_instance(
+ self,
+ service_name: str,
+ namespace: str = "public",
+ clusters: Optional[str] = None,
+ healthy_only: bool = False,
+ group_name: str = "DEFAULT_GROUP"
+ ) -> Optional[Dict[str, Any]]:
+ """Query service instance(s) from Nacos using v3 client API.
+
+ Args:
+ service_name: The name of the service to query.
+ namespace: Nacos namespace ID (defaults to "public").
+ clusters: Comma-separated cluster names (optional).
+ healthy_only: If True, only return healthy instances.
+ group_name: Nacos group name (defaults to "DEFAULT_GROUP").
+
+ Returns:
+ A dict containing instance information with keys:
+ - ip: Instance IP address
+ - port: Instance port
+ - metadata: Instance metadata dict (may contain 'a2a_card_url')
+ Or None if no instance is found.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ NacosServiceNotFoundError: If the service does not exist.
+ """
+ params = self._build_auth_params()
+ service_name = service_name.strip()
+ params["serviceName"] = service_name
+ params["namespaceId"] = namespace.strip() if namespace else "public"
+ params["groupName"] = group_name
+ if clusters:
+ params["clusterName"] = clusters
+ if healthy_only:
+ params["healthyOnly"] = "true"
+
+ url = f"{self.nacos_addr}/nacos/v3/client/ns/instance/list"
+
+ logger.info(
+ f"[Nacos Query] URL: {url}, params: "
+ f"serviceName='{service_name}', namespaceId='{namespace}', groupName='{group_name}'"
+ )
+
+ try:
+ session = await self._get_session()
+ async with session.get(url, params=params) as response:
+ text = await response.text()
+ logger.info(
+ f"[Nacos Response] status={response.status}, "
+ f"body_len={len(text)}, body={text[:300]}"
+ )
+
+ if response.status == 200:
+ data = await response.json()
+ return self._parse_v3_instance_response(data, service_name)
+ elif response.status == 404:
+ logger.warning(
+ f"Service '{service_name}' not found in Nacos namespace '{namespace}'"
+ )
+ return None
+ else:
+ raise NacosConnectionError(
+ f"Nacos API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}")
+ raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e
+
+ def _parse_v3_instance_response(
+ self,
+ response_data: Dict[str, Any],
+ service_name: str
+ ) -> Optional[Dict[str, Any]]:
+ """Parse Nacos v3 client API instance list response.
+
+ Nacos v3 API returns: { "code": 0, "message": "success", "data": [...] }
+
+ Args:
+ response_data: Response data from Nacos v3 API.
+ service_name: Service name for fallback metadata.
+
+ Returns:
+ First instance as a dict or None if no instances exist.
+ """
+ if response_data.get("code") != 0:
+ msg = response_data.get("message", "unknown error")
+ logger.warning(f"Nacos API error for '{service_name}': {msg}")
+ return None
+
+ data = response_data.get("data")
+ if data is None:
+ logger.info(f"[Nacos Parse] No data field in response for service '{service_name}'")
+ return None
+
+ hosts = data if isinstance(data, list) else []
+ logger.info(f"[Nacos Parse] Found {len(hosts)} instances for service '{service_name}'")
+
+ if not hosts:
+ logger.info(f"[Nacos Parse] No hosts found for service '{service_name}'")
+ return None
+
+ for instance in hosts:
+ instance_data = {
+ "ip": instance.get("ip"),
+ "port": instance.get("port"),
+ "healthy": instance.get("healthy", False),
+ "weight": instance.get("weight", 1.0),
+ "enabled": instance.get("enabled", True),
+ "metadata": instance.get("metadata") or {}
+ }
+
+ if instance_data["enabled"] and instance_data.get("healthy", False):
+ logger.info(
+ f"[Nacos Parse] Found healthy instance for '{service_name}': "
+ f"{instance_data['ip']}:{instance_data['port']}"
+ )
+ return instance_data
+
+ first_instance = hosts[0]
+ logger.info(
+ f"[Nacos Parse] No healthy instance found, returning first instance for '{service_name}': "
+ f"{first_instance.get('ip')}:{first_instance.get('port')}"
+ )
+ return {
+ "ip": first_instance.get("ip"),
+ "port": first_instance.get("port"),
+ "healthy": first_instance.get("healthy", False),
+ "weight": first_instance.get("weight", 1.0),
+ "enabled": first_instance.get("enabled", True),
+ "metadata": first_instance.get("metadata") or {}
+ }
+
+ def _parse_instance_response(
+ self,
+ data: Dict[str, Any],
+ service_name: str
+ ) -> Optional[Dict[str, Any]]:
+ """Parse Nacos instance list response (v1 API legacy format).
+
+ Args:
+ data: Response data from Nacos /instance/list API.
+ service_name: Service name for fallback metadata.
+
+ Returns:
+ First instance as a dict or None if no instances exist.
+ """
+ hosts = data.get("hosts") or []
+
+ if not hosts:
+ logger.debug(f"No hosts found for service '{service_name}'")
+ return None
+
+ for instance in hosts:
+ instance_data = {
+ "ip": instance.get("ip"),
+ "port": instance.get("port"),
+ "healthy": instance.get("healthy", False),
+ "weight": instance.get("weight", 1.0),
+ "enabled": instance.get("enabled", True),
+ "metadata": instance.get("metadata") or {}
+ }
+
+ if instance_data["enabled"] and instance_data.get("healthy", False):
+ logger.debug(
+ f"Found healthy instance for '{service_name}': "
+ f"{instance_data['ip']}:{instance_data['port']}"
+ )
+ return instance_data
+
+ first_instance = hosts[0]
+ return {
+ "ip": first_instance.get("ip"),
+ "port": first_instance.get("port"),
+ "healthy": first_instance.get("healthy", False),
+ "weight": first_instance.get("weight", 1.0),
+ "enabled": first_instance.get("enabled", True),
+ "metadata": first_instance.get("metadata") or {}
+ }
+
+ async def list_services(
+ self,
+ namespace: str = "public",
+ page_no: int = 1,
+ page_size: int = 100,
+ group_name: str = "DEFAULT_GROUP"
+ ) -> Dict[str, Any]:
+ """List all services in a namespace using v3 Admin API.
+
+ Args:
+ namespace: Nacos namespace ID (defaults to "public").
+ page_no: Page number (1-indexed).
+ page_size: Number of services per page.
+ group_name: Group name filter (defaults to "DEFAULT_GROUP").
+
+ Returns:
+ Dict containing:
+ - count: Total number of services
+ - services: List of service names
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ session = await self._get_session()
+ access_token = None
+ if self.username and self.password:
+ access_token = await self._get_access_token(session)
+ if not access_token:
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+
+ params = {
+ "pageNo": page_no,
+ "pageSize": page_size,
+ "namespaceId": namespace,
+ "groupName": group_name
+ }
+ headers = {}
+ if access_token:
+ headers["AccessToken"] = access_token
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ns/service"
+
+ try:
+ async with session.get(url, params=params, headers=headers) as response:
+ if response.status == 200:
+ data = await response.json()
+ if data.get("code") == 0:
+ return {
+ "count": data.get("data", {}).get("count", 0),
+ "services": data.get("data", {}).get("doms", [])
+ }
+ elif data.get("code") == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ raise NacosConnectionError(
+ f"Nacos API error: {data.get('message', 'unknown')}"
+ )
+ elif response.status == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ text = await response.text()
+ raise NacosConnectionError(
+ f"Nacos API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to list services from Nacos: {e}")
+ raise NacosConnectionError(f"Failed to list services from Nacos: {e}") from e
+
+ async def get_service_detail(
+ self,
+ service_name: str,
+ namespace: str = "public",
+ group_name: str = "DEFAULT_GROUP"
+ ) -> Optional[Dict[str, Any]]:
+ """Get detailed information about a service using v3 Admin API.
+
+ Args:
+ service_name: The name of the service.
+ namespace: Nacos namespace ID (defaults to "public").
+ group_name: Nacos group name (defaults to "DEFAULT_GROUP").
+
+ Returns:
+ Service detail dict or None if not found.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ session = await self._get_session()
+ access_token = None
+ if self.username and self.password:
+ access_token = await self._get_access_token(session)
+ if not access_token:
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+
+ params = {
+ "serviceName": service_name,
+ "namespaceId": namespace,
+ "groupName": group_name
+ }
+ headers = {}
+ if access_token:
+ headers["AccessToken"] = access_token
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ns/service"
+
+ try:
+ async with session.get(url, params=params, headers=headers) as response:
+ if response.status == 200:
+ data = await response.json()
+ if data.get("code") == 0:
+ return data.get("data")
+ elif data.get("code") == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ msg = data.get("message", "")
+ if "not found" in msg.lower() or "not exist" in msg.lower():
+ return None
+ raise NacosConnectionError(
+ f"Nacos API error: {msg}"
+ )
+ elif response.status == 404:
+ return None
+ elif response.status == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ text = await response.text()
+ raise NacosConnectionError(
+ f"Nacos API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to get service detail from Nacos: {e}")
+ raise NacosConnectionError(
+ f"Failed to get service detail from Nacos: {e}"
+ ) from e
+
+ async def check_health(
+ self,
+ host: str,
+ port: int,
+ namespace: str = "public"
+ ) -> bool:
+ """Check if an instance is healthy.
+
+ Args:
+ host: Instance IP address.
+ port: Instance port.
+ namespace: Nacos namespace ID.
+
+ Returns:
+ True if the instance is healthy, False otherwise.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ params = self._build_auth_params()
+ params["serviceName"] = "__nacos^naming*"
+ params["ip"] = host
+ params["port"] = port
+ params["namespaceId"] = namespace
+
+ url = f"{self.nacos_addr}/nacos/v1/ns/instance/health"
+
+ try:
+ session = await self._get_session()
+ async with session.get(url, params=params) as response:
+ if response.status == 200:
+ text = await response.text()
+ return text.lower() == "ok"
+ return False
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to check instance health: {e}")
+ return False
+
+ async def test_connectivity(
+ self,
+ namespace: str = "public"
+ ) -> Dict[str, Any]:
+ """Test connectivity to the Nacos server.
+
+ Args:
+ namespace: Nacos namespace ID to test connectivity with.
+
+ Returns:
+ Dict containing:
+ - success: Whether the connection was successful
+ - message: Human-readable message about the result
+ """
+ try:
+ session = await self._get_session()
+
+ access_token = None
+ if self.username and self.password:
+ access_token = await self._get_access_token(session)
+ if not access_token:
+ return {
+ "success": False,
+ "message": "Authentication failed. Please check username and password."
+ }
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ns/ops/metrics"
+ headers = {}
+ if access_token:
+ headers["AccessToken"] = access_token
+
+ async with session.get(url, headers=headers) as response:
+ if response.status == 200:
+ data = await response.json()
+ if data.get("code") == 0:
+ return {
+ "success": True,
+ "message": "Successfully connected to Nacos server"
+ }
+ else:
+ return {
+ "success": False,
+ "message": f"Nacos API error: {data.get('message', 'unknown')}"
+ }
+ elif response.status == 403:
+ return {
+ "success": False,
+ "message": "Authentication failed. Please check username and password."
+ }
+ else:
+ text = await response.text()
+ return {
+ "success": False,
+ "message": f"Nacos server returned status {response.status}: {text}"
+ }
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}")
+ return {
+ "success": False,
+ "message": f"Failed to connect to Nacos server: {e}"
+ }
+
+ async def _get_access_token(self, session: aiohttp.ClientSession) -> Optional[str]:
+ """Get access token from Nacos authentication endpoint with caching.
+
+ Args:
+ session: aiohttp session to use for the request.
+
+ Returns:
+ Access token string if authentication successful, None otherwise.
+ """
+ if self._access_token:
+ return self._access_token
+
+ try:
+ url = f"{self.nacos_addr}/nacos/v1/auth/login"
+ form_data = aiohttp.FormData()
+ form_data.add_field("username", self.username)
+ form_data.add_field("password", self.password)
+
+ async with session.post(url, data=form_data) as response:
+ if response.status == 200:
+ result = await response.json()
+ token = result.get("accessToken")
+ if token:
+ self._access_token = token
+ return token
+ logger.warning(f"Nacos login failed: {result.get('message', 'unknown')}")
+ else:
+ text = await response.text()
+ logger.warning(f"Nacos login request returned status {response.status}: {text}")
+ return None
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to login to Nacos: {e}")
+ return None
+
+ def _clear_access_token(self) -> None:
+ """Clear the cached access token."""
+ self._access_token = None
+
+ async def __aenter__(self) -> "NacosClient":
+ """Async context manager entry."""
+ return self
+
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
+ """Async context manager exit."""
+ await self.close()
diff --git a/doc/docs/en/backend/overview.md b/doc/docs/en/backend/overview.md
index 962233f18..d77dfee3c 100644
--- a/doc/docs/en/backend/overview.md
+++ b/doc/docs/en/backend/overview.md
@@ -202,4 +202,6 @@ python backend/mcp_service.py # MCP service
- Resource pool management
- Auto-scaling capabilities
-For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview).
\ No newline at end of file
+For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview).
+
+For skill development and management, see the [Skills System Documentation](./skills/index).
\ No newline at end of file
diff --git a/doc/docs/en/backend/skills/index.md b/doc/docs/en/backend/skills/index.md
new file mode 100644
index 000000000..7824260fa
--- /dev/null
+++ b/doc/docs/en/backend/skills/index.md
@@ -0,0 +1,37 @@
+# Backend Skills Documentation
+
+This section covers Nexent's Skills system in the backend infrastructure, including skill definitions, skill package structures, and system architecture.
+
+## Available Documentation
+
+### Overview and Architecture
+- [Skills System Overview](./overview): Skill types, lifecycle, and version management
+
+## Skills vs. Tools
+
+In Nexent, **Tools** and **Skills** are two distinct layers:
+
+- **Tool**: A single atomic operation the agent can call, such as `read_file` or `tavily_search`. When enabled, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for this conversation, the LLM still consumes context tokens to "see" it.
+- **Skill**: A workflow of multiple tools bundled with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance; it decides whether to activate a skill based on the user's actual needs. The corresponding toolset is only loaded when activated — effectively saving Token consumption.
+
+## Quick Start
+
+1. **Explore capabilities**: Read [Skills System Overview](./overview) to understand the supported skill types
+2. **Try creation**: Experience NL-to-Skill creation on the [Skill Management](../../user-guide/skills) page
+3. **Create manually**: Upload `SKILL.md` or a ZIP package to create a custom skill
+4. **Configure for agents**: Enable skills in the agent's tool configuration
+
+## Related References
+
+- [Skill Management (User Guide)](../../user-guide/skills)
+- [Agent Development Guide](../../user-guide/agent-development)
+- [Local Tools Overview](../../user-guide/local-tools/index)
+- [SDK Tool Development Guide](../../sdk/core/tools)
+- [MCP Tool Development](../tools/mcp)
+- [FAQ](../../quick-start/faq)
+
+## Getting Help
+
+- Check the [FAQ](../../quick-start/faq) for common skill usage questions
+- Ask questions in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions)
+- Review [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) for known issues
diff --git a/doc/docs/en/backend/skills/overview.md b/doc/docs/en/backend/skills/overview.md
new file mode 100644
index 000000000..34fbd2f97
--- /dev/null
+++ b/doc/docs/en/backend/skills/overview.md
@@ -0,0 +1,138 @@
+# Skills System Overview
+
+A Skill is Nexent's way of extending an agent's capabilities. Each skill consists of:
+
+- **Skill description**: What this skill does and when to use it
+- **Tool bundle**: A package of one or more Nexent SDK methods or user-defined tools
+- **Parameter template**: Which parameters users can fill in for this skill
+- **Usage examples**: How this skill is typically used
+
+Compared to selecting tools one by one, skills make configuring complex capabilities simple — install one skill package instead of configuring each tool separately.
+
+## Skill Package Structure
+
+A skill can be a single `SKILL.md` file or a ZIP package with multiple files:
+
+```
+skill-name/
+├── SKILL.md # Skill definition file (required)
+├── config/
+│ ├── config.yaml # Default parameter values (optional)
+│ └── schema.yaml # Parameter types and descriptions (optional)
+├── scripts/
+│ └── *.py # Python scripts (optional)
+├── examples.md # Usage examples (optional)
+└── assets/ # Static assets (optional)
+```
+
+### SKILL.md Structure
+
+Each skill must have a `SKILL.md` file, consisting of two parts:
+
+**Part 1: YAML Frontmatter (required)**
+
+```yaml
+---
+name: skill-name
+description: |
+ A description of what this skill does and when to use it.
+ Write in third person, e.g., "This skill is used for..."
+tags:
+ - tag1
+ - tag2
+---
+```
+
+**Part 2: Skill Body**
+
+Below the frontmatter, you can write Markdown content including:
+- Detailed usage instructions and guidelines
+- Example code for tool invocation
+- Error handling instructions
+- Usage limits and caveats
+
+### Two Skill Types
+
+Skills fall into two categories based on their purpose:
+
+**Tool Skills**: Used to expose the capabilities of one or more Nexent SDK methods. The body should include tool parameter descriptions, usage examples, return formats, and error handling. Once the user configures the parameters, the agent can call these tools directly.
+
+**Agent Skills**: Used to teach an agent how to perform a complex task. The body should include workflow instructions, domain knowledge, best practices, and sometimes helper scripts. The body will contain detailed step-by-step guidance.
+
+## Official Skills Overview
+
+### File Operations
+
+| Skill Name | Description |
+|-----------|-------------|
+| `read-file` | Read file content and metadata within the workspace |
+| `create-file-directory` | Create files or directories |
+| `delete-file-directory` | Delete files or directories |
+| `move-file-directory` | Move or rename files/directories |
+| `list-directory` | List directory structure in a tree view |
+
+### Knowledge Base Search
+
+| Skill Name | Description |
+|-----------|-------------|
+| `search-knowledge-base` | Local knowledge base semantic search (supports hybrid / accurate / semantic modes) |
+| `search-dify` | Dify knowledge base search |
+| `search-idata` | iData knowledge base search |
+| `search-datamate` | DataMate knowledge base search (with similarity threshold control) |
+
+### Web Search
+
+| Skill Name | Description |
+|-----------|-------------|
+| `search-web-tavily` | Tavily real-time web search |
+| `search-web-linkup` | Linkup image and text mixed search |
+| `search-web-exa` | Exa deep web search |
+
+### Multimodal Analysis
+
+| Skill Name | Description |
+|-----------|-------------|
+| `analyze-image` | VLM-based image content analysis and Q&A |
+| `analyze-text-file` | PDF/Word/Excel file content extraction and Q&A |
+
+### Communication and Remote Operations
+
+| Skill Name | Description |
+|-----------|-------------|
+| `email-utils` | IMAP receive / SMTP send (supports HTML / CC / BCC) |
+| `run-shell-ssh` | Persistent SSH session for remote command execution |
+
+## Skill Lifecycle
+
+### Version Management
+
+Each skill supports two version states:
+
+- **Draft version (version=0)**: Development and debugging stage, changes take effect immediately, suitable for iterative adjustments
+- **Published version (version>=1)**: Production use, parameters locked to prevent accidental changes
+
+### Skill Instances
+
+The same skill can be configured with different parameter values for different agents, independently.
+
+For example, a search skill can be configured for a "Technical Documentation Agent" to search only the technical knowledge base, and for a "Customer Service Agent" to search only the customer service knowledge base.
+
+### Common Workflow
+
+```
+Create skill → Configure parameters → Select skill for agent → Debug → Publish
+ ↓
+ Edit draft version
+```
+
+## Security Notes
+
+- **Path isolation**: Files within a skill package can only be accessed within the skill directory scope
+- **Parameter validation**: Parameters defined in schema.yaml are validated by the frontend form
+- **Permission control**: Skill instances are tenant-isolated; APIs require authentication tokens
+
+## Related References
+
+- [Skill Management (User Guide)](../../user-guide/skills)
+- [Agent Development Guide](../../user-guide/agent-development)
+- [Local Tools Overview](../../user-guide/local-tools/index)
diff --git a/doc/docs/en/backend/tools/index.md b/doc/docs/en/backend/tools/index.md
index 2d2d2c185..82d73b82c 100644
--- a/doc/docs/en/backend/tools/index.md
+++ b/doc/docs/en/backend/tools/index.md
@@ -12,6 +12,10 @@ Integrate with the LangChain ecosystem for advanced AI workflows.
Model Context Protocol tools for standardized AI agent communication.
→ [MCP Tools Development](./mcp)
+### Skills System
+Create reusable skill packages through natural language or ZIP files, giving agents more flexible tool-calling capabilities.
+→ [Skills Documentation](../skills/index)
+
## Quick Start
1. **Choose your tool type**: LangChain for general AI workflows, MCP for standardized agent communication
diff --git a/doc/docs/en/getting-started/features.md b/doc/docs/en/getting-started/features.md
index c0b6b4703..2216d7163 100644
--- a/doc/docs/en/getting-started/features.md
+++ b/doc/docs/en/getting-started/features.md
@@ -25,7 +25,7 @@ The system automatically extracts key information from conversations to generate
## 📝 Progressive Skill Disclosure
-Nexent introduces a **Progressive Skill Disclosure** mechanism. As users input tasks, the system dynamically reveals the most relevant Skill suggestions based on the current context — helping users quickly find the tools and methods best suited to the current task. This mechanism enables newcomers to progressively explore system capabilities without adding operational complexity for advanced users.
+Nexent introduces a **Progressive Skill Disclosure** mechanism. As users input tasks, the system dynamically reveals the most relevant Skill suggestions based on the current context — helping users quickly find the tools and methods best suited to the current task. This mechanism helps preventing context explosion and maximizing context window efficiency.
## 🗄️ Personal-Grade Knowledge Base
diff --git a/doc/docs/en/sdk/data-process.md b/doc/docs/en/sdk/data-process.md
index 2d11202b1..614c4b438 100644
--- a/doc/docs/en/sdk/data-process.md
+++ b/doc/docs/en/sdk/data-process.md
@@ -43,10 +43,10 @@ def file_process(self,
## 📁 Supported File Formats
-- **Text files**: .txt, .md, .csv
-- **Documents**: .pdf, .docx, .pptx
+- **Text files**: .txt, .md, .csv, .json
+- **Documents**: .pdf, .docx, .pptx, .epub
- **Images**: .jpg, .png, .gif (with OCR)
-- **Web content**: HTML, URLs
+- **Web content**: HTML, URLs, XML
- **Archives**: .zip, .tar
## 💡 Usage Examples
diff --git a/doc/docs/en/user-guide/agent-development.md b/doc/docs/en/user-guide/agent-development.md
index db2614f7d..109674273 100644
--- a/doc/docs/en/user-guide/agent-development.md
+++ b/doc/docs/en/user-guide/agent-development.md
@@ -31,15 +31,86 @@ You can configure other collaborative agents for your created agent, as well as
### 🤝 Collaborative Agents
+Collaborative agents help the current agent complete complex tasks. The sources of collaborative agents are divided into two categories:
+
+- **Internal Agents**: Published agents on the platform
+- **External A2A Agents**: Third-party agents discovered through the A2A protocol
+
1. Click the plus sign under the "Collaborative Agent" tab to open the selectable agent list
-2. Select the agents you want to add from the dropdown list
-3. Multiple collaborative agents can be selected
-4. Click × to remove an agent from the selection
+2. The agent list is divided into two tabs: "Internal Agent" and "External A2A Agent". You can choose based on your needs
+3. Select the agent you want to add from the dropdown list
+4. Multiple collaborative agents can be selected
+5. Click × to remove an agent from the selection
+
+
+
+
+
+#### 🌐 Add External A2A Agents
+
+Nexent supports communication with third-party agents through the A2A protocol. You can discover external A2A agents in the following two ways:
+
+##### Discover Agent via URL
+
+If you know the Agent Card address of the target agent, you can use the URL discovery method:
+
+
+
+
+
+1. In the External A2A Agent list, click the "Add External Agent" button
+2. Select the "URL Discovery" tab
+3. Fill in the Agent Card URL address, for example: `https://example.com/.well-known/agent.json`
+4. Click the "Discover" button; the system will automatically retrieve the agent's related information
+5. After successful discovery, you can view the agent's name, description, capabilities and other information
+6. Click "Add to List" to complete the addition
+
+> 💡 **Tip**: The Agent Card is an Agent description file that complies with the A2A 1.0 specification, containing the agent's name, description, calling address, capabilities and other information.
+
+##### Discover Agent via Nacos
+
+If your agent is registered with the Nacos service discovery platform, you can use the Nacos discovery method:
-
+
+1. In the External A2A Agent list, click the "Add External Agent" button
+2. Select the "Nacos Discovery" tab
+3. For first-time use, you need to configure the Nacos connection information:
+ - **Nacos Server Address**: Fill in the Nacos server address, such as `http://127.0.0.1:8848`
+ - **Namespace ID**: Fill in the Nacos namespace ID (optional)
+ - **Group Name**: Fill in the service group name, default is `DEFAULT_GROUP`
+ - **Username/Password**: Fill in the Nacos access credentials (optional)
+4. Click "Save Configuration" to save the Nacos connection information
+5. Fill in the Agent service name to scan
+6. Click the "Scan" button; the system will obtain matching Agent information from Nacos
+7. The scan results will list all matching Agents. You can select the agents you need and add them to the list
+
+> ⚠️ **Note**: Make sure the Nacos service is running properly and the target Agent is correctly registered with Nacos.
+
+##### Manage Discovered External Agents
+
+In the External A2A Agent list, you can view and manage all discovered external agents:
+
+
+
+
+
+1. **View Agent Details**: Click on the agent card to view its complete information, including name, description, URL, capability list, etc.
+2. **Test Agent**: Click the "Test" button to send a test message to the agent and verify if it is working properly
+3. **Chat with Agent**: Click the "Chat" button to open a chat window and interact with the agent in real time
+4. **Configure Calling Protocol**: Click the "Protocol Configuration" button to select the calling protocol for this agent:
+ - **HTTP + JSON**: Use REST API style calls
+ - **JSON-RPC**: Use JSON-RPC protocol calls
+5. **Refresh Agent Information**: If the agent information changes, click the "Refresh" button to re-fetch the latest Agent Card
+6. **Remove Agent**: Click the "Remove" button to delete the agent from the discovered list
+
+> 💡 **Use Cases**:
+> - Quickly integrate known third-party agent services through URL discovery
+> - Batch integrate all agents from the same service registry through Nacos discovery
+> - Configure protocols to meet the requirements of different agent service providers
+
### 🛠️ Select Agent Tools
Agents can use various tools to complete tasks, such as knowledge base search, file parsing, image parsing, email sending/receiving, file management, and other local tools. They can also integrate third-party MCP tools or custom tools.
@@ -60,6 +131,8 @@ Agents can use various tools to complete tasks, such as knowledge base search, f
> 2. Please select the `analyze_text_file` tool to enable the parsing function for document and text files.
> 3. Please select the `analyze_image` tool to enable the parsing function for image files.
>
+> ⚠️ **Embedding Model Configuration**: When using the `knowledge_base_search` tool, ensure that the knowledge base has an embedding model configured. For existing knowledge bases, the system will prompt you to select an embedding model. Make sure to select **the same embedding model used when creating the knowledge base**. If the selected model differs from the one used during knowledge base creation, it may cause search failures or inaccurate results.
+>
> 📚 Want to learn about all the built-in local tools available in the system? Please refer to [Local Tools Overview](./local-tools/index.md).
### 🔌 Add MCP Tools
@@ -108,6 +181,39 @@ You can add MCP services to Nexent in the following two ways:
Many third-party services such as [ModelScope](https://www.modelscope.cn/mcp) provide MCP services, which you can quickly integrate and use.
You can also develop your own MCP services and connect them to Nexent; see [MCP Tool Development](../backend/tools/mcp).
+**3️⃣ Convert Stock API to MCP Service**
+
+🔔 This method is suitable for quickly converting existing REST API endpoints into MCP tools without additional development, allowing agents to call existing API capabilities:
+
+>1. In the MCP Config module, select **"API to MCP"** as the access type
+>
+>2. Fill in the API basic information in the input box below:
+> - **Service Name**: Display name for the MCP service
+> - **OpenAPI JSON**: OpenAPI 3.x specification in JSON format
+> - **Base Service URL**: Base address of the API service (supports http/https)
+>
+>3. Click the **+ Add** button in the lower right corner to complete the MCP service conversion
+
+
+
+
+
+>4. After conversion, you can view all externally converted MCP tools in the **Outer APIs** tab
+
+
+
+
+
+
+
+
+
+>💡 **Use Cases**:
+>- Quickly integrate internal enterprise REST API endpoints
+>- Convert third-party service HTTP APIs into MCP tools
+>- Generate tools directly from OpenAPI specifications without writing MCP Server code
+
+
### ⚙️ Custom Tools
You can refer to the following guides to develop your own tools and integrate them into Nexent to enrich agent capabilities:
@@ -129,7 +235,7 @@ Nexent provides a "Tool Testing" capability for all types of tools—whether the
- The test `query`, such as "benefits of vitamin C"
- The search `search_mode` (default is `hybrid`)
- The target index list `index_names`, such as `["Medical", "Vitamin Encyclopedia"]`
- - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page
+ - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page
6. After entering the parameters, click "Execute Test" to start the test and view the test results below
@@ -181,6 +287,134 @@ After completing the initial agent configuration, you can debug the agent and fi
After successful debugging, click the "Save" button in the lower right corner, and the agent will be saved and appear in the agent list.
+## 📋 Version Management
+
+Nexent supports agent version management. You can save different versions of agent configurations during the debugging process.
+
+Once the agent configuration is verified, you can publish the agent. After publishing, the agent will be visible in the Agent Space and Start Chat pages.
+
+
+
+If you need to rollback to a previous version, click the "Rollback" button on the version management page.
+
+
+
+### 🚀 Publish as A2A Agent
+
+Nexent supports exposing published agents as A2A Agents for external systems to call. When publishing a version, you can check the "Publish as A2A Agent" option to register the current agent as an A2A 1.0 compliant Agent.
+
+
+
+
+
+After successful publishing, the system will display the A2A Agent's call information:
+
+
+
+
+
+| Field | Description |
+|-------|-------------|
+| **Endpoint ID** | Unique identifier for the A2A Agent |
+| **Agent Card URL** | Agent discovery endpoint; external systems use this address to retrieve Agent descriptions |
+| **Protocol Version** | A2A protocol version; currently 1.0 |
+| **REST Endpoints** | REST-style API endpoints |
+| **JSON-RPC Endpoint** | JSON-RPC 2.0 protocol calling endpoint |
+
+#### Calling Methods
+
+The published A2A Agent supports the following two calling protocols:
+
+##### REST API
+
+```bash
+# Get Agent Card (for Agent discovery)
+GET /nb/a2a/{endpoint_id}/.well-known/agent-card.json
+
+# Send synchronous message
+POST /nb/a2a/{endpoint_id}/message:send
+Content-Type: application/json
+
+{
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+}
+
+# Send streaming message (SSE)
+POST /nb/a2a/{endpoint_id}/message:stream
+Content-Type: application/json
+
+{
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+}
+
+# Get task status
+GET /nb/a2a/{endpoint_id}/tasks/{task_id}
+```
+
+##### JSON-RPC 2.0
+
+```bash
+POST /nb/a2a/{endpoint_id}/v1
+Content-Type: application/json
+
+# Send synchronous message
+{
+ "jsonrpc": "2.0",
+ "method": "SendMessage",
+ "params": {
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+ },
+ "id": 1
+}
+
+# Send streaming message
+{
+ "jsonrpc": "2.0",
+ "method": "SendStreamingMessage",
+ "params": {
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+ },
+ "id": 2
+}
+
+# Get task status
+{
+ "jsonrpc": "2.0",
+ "method": "GetTask",
+ "params": {
+ "taskId": "task_abc123"
+ },
+ "id": 3
+}
+```
+
+> 💡 **Tips**:
+> - For local development, replace the `/nb/a2a` prefix with `http://localhost:5013/nb/a2a`
+> - For production environments, replace the prefix with your server domain name or public IP address
+
+> ⚠️ **Notes**:
+> - Calling A2A Agents requires carrying valid authentication information in the request headers
+> - Agent Card information is cached with a refresh interval of 1 hour
+> - If you need to update Agent information, you need to republish the agent version
+
+When an agent is published as an A2A-compliant Agent, users can view the detailed A2A Agent calling information by clicking the button shown below in the agent list:
+
+
+
+
+
## 📋 Manage Agents
In the agent list on the left, you can perform the following operations on existing agents:
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg
new file mode 100644
index 000000000..399af1c56
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg
new file mode 100644
index 000000000..5c523f7b1
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg
new file mode 100644
index 000000000..4c42104ec
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg
new file mode 100644
index 000000000..fdfa2e826
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg
new file mode 100644
index 000000000..5c523f7b1
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg
new file mode 100644
index 000000000..4632206fb
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png
new file mode 100644
index 000000000..2cce2a44a
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png
new file mode 100644
index 000000000..12e9358c5
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png
new file mode 100644
index 000000000..4221b41f5
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg
new file mode 100644
index 000000000..fdfa2e826
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png
deleted file mode 100644
index 7f47ba1a2..000000000
Binary files a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png and /dev/null differ
diff --git a/doc/docs/en/user-guide/knowledge-base.md b/doc/docs/en/user-guide/knowledge-base.md
index e5e5714ff..05456e5fa 100644
--- a/doc/docs/en/user-guide/knowledge-base.md
+++ b/doc/docs/en/user-guide/knowledge-base.md
@@ -26,12 +26,14 @@ Create and manage knowledge bases, upload documents, and generate summaries. Kno
### Supported File Formats
Nexent supports multiple file formats, including:
-- **Text:** .txt, .md
+- **Text:** .txt, .md, .csv, .json
- **PDF:** .pdf
- **Word:** .docx
- **PowerPoint:** .pptx
+- **EPUB:** .epub
- **Excel:** .xlsx
- **Data files:** .csv
+- **Web content:** .html, .xml
## 📊 Knowledge Base Summary
diff --git a/doc/docs/en/user-guide/skills.md b/doc/docs/en/user-guide/skills.md
new file mode 100644
index 000000000..0cdc2a288
--- /dev/null
+++ b/doc/docs/en/user-guide/skills.md
@@ -0,0 +1,572 @@
+---
+title: Skill Management
+---
+
+# Skill Management
+
+A Skill is a core mechanism in Nexent for extending agent capabilities. Each skill packages multiple tools with usage documentation into a reusable unit of capability, enabling agents to handle complex tasks like assembling building blocks — without consuming excessive context space.
+
+## Table of Contents
+
+- [Skills vs. Tools](#-skills-vs-tools): Understanding the core concepts
+- [Using Skills](#-using-skills): How to use skills in agent development
+- [Skill Management](#-skill-management): Create, edit, import, and export skills
+- [Skill Upload Guide](#-skill-upload-guide): SKILL.md format, ZIP structure, special tags, and writing standards
+- [NL-to-Skill](#-nl-to-skill): Automatically generate skills from natural language descriptions
+- [Official Skills Overview](#-official-skills-overview): Built-in skills and their capabilities
+
+## The Relationship Between Skills and Tools
+
+In Nexent, **Tools** and **Skills** are two distinct layers. Understanding their differences helps you configure agent capabilities more effectively.
+
+A **Tool** is a single atomic operation the agent can call, such as `read_file` or `tavily_search`. When a tool is enabled for an agent, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for the current conversation, the LLM still consumes context tokens to "see" it.
+
+A **Skill** bundles the capabilities of multiple tools into a complete workflow, complete with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance. Based on the user's actual needs, it decides whether to activate a skill. Only when activated does the system load the corresponding toolset — effectively saving Token consumption.
+
+| Dimension | Tool | Skill |
+|-----------|------|-------|
+| Granularity | Single atomic operation | Bundle of multiple tools + configuration + documentation |
+| Token consumption | Occupies context on every turn | Loaded only when activated |
+| Parameters | Fixed parameter schema | Customizable parameter templates |
+| Versioning | No version management | Supports draft/published versions |
+| Distribution | Code-level | ZIP package distribution, plug-and-play |
+
+**Analogy**: Tools are individual items like a screwdriver, hammer, or saw. A Skill is a toolbox — with tools pre-matched for a work scenario and accompanied by usage instructions. Open the right toolbox for the task at hand.
+
+## Using Skills
+
+### Configuring Skills for an Agent
+
+1. Open the **[Agent Development](./agent-development)** page
+2. On the "Select Tools" tab, find the **Skills** group
+3. Click a skill name to select it; click again to deselect
+4. After selecting a skill, click the ⚙️ button next to it to configure skill parameters
+5. Save the agent configuration
+
+
+
+
+
+> 💡 **Tip**: If a skill has required parameters that are not configured, a guided parameter-filling prompt will appear upon selection.
+
+### Skill Parameters
+
+Each skill's parameter definitions come from the `config/schema.yaml` file in the skill package. The configuration interface auto-generates a parameter form based on the schema, including:
+
+- **Parameter name and description** (bilingual: English and Chinese)
+- **Required/optional markers**
+- **Default values**
+- **Parameter types** (string, number, boolean, array, object)
+- **YAML comment auto-mapped tooltips**
+
+### Skill Versions
+
+Each skill supports multi-version management:
+
+- **Draft version (version=0)**: Development and debugging stage; changes take effect immediately
+- **Published version (version>=1)**: Production use; parameters are locked
+
+When configuring the same skill for different agents, you can set different parameter values independently.
+
+## Skill Management
+
+### Viewing Installed Skills
+
+The "Select Tools" skill group displays all installed skills, including:
+- Official skills (`official` source)
+- Custom skills (`custom` source)
+
+### Creating Custom Skills
+
+Nexent supports two ways to create custom skills: uploading a skill package file, or generating one automatically from a natural language description.
+
+#### Method 1: Upload SKILL.md or ZIP
+
+1. Go to the skill configuration interface
+2. Click the "Upload Skill" button
+3. Select a `SKILL.md` file (single file) or a `.zip` package (complete skill package)
+4. The system automatically parses and creates the skill
+
+#### Method 2: NL-to-Skill Natural Language Creation
+
+Click the **"NL Create Skill"** button on the skill management page. See the [NL-to-Skill](#-nl-to-skill) section below for details.
+
+### Editing Skills
+
+1. Find the target skill in the skill list
+2. Click the skill card to enter the edit page
+3. Modify the skill name, description, tags, parameter configuration, etc.
+4. Save changes
+
+### Importing/Exporting Skills
+
+- **Export**: Click "Export" on the skill detail page to download as a JSON configuration file
+- **Import**: Click "Import Skill" on the Agent Development page to upload a JSON configuration file
+
+> ⚠️ **Note**: When importing skills containing knowledge base tools (such as `knowledge_base_search`), these tools will only search **knowledge bases that the currently logged-in user is permitted to access in this environment**. The original skill's knowledge base configuration will not be automatically inherited.
+
+## Skill Upload Guide
+
+### Skill Package Structure
+
+A skill can be a single file or a ZIP package containing multiple files:
+
+```
+skill-name/
+├── SKILL.md # Skill definition file (required)
+├── config/
+│ ├── config.yaml # Default parameter values
+│ └── schema.yaml # Parameter types and descriptions
+├── scripts/
+│ └── *.py # Python scripts
+├── examples.md # Usage examples
+└── assets/ # Static assets
+```
+
+### SKILL.md Format in Detail
+
+`SKILL.md` is the core file of a skill, consisting of a YAML frontmatter section and a body section.
+
+**YAML Frontmatter (required)**
+
+The file must start with YAML frontmatter:
+
+```yaml
+---
+name: skill-name
+description: |
+ A description of what this skill does and when to use it.
+ Write in third person.
+tags:
+ - tag1
+ - tag2
+---
+```
+
+| Field | Required | Description | Example |
+|-------|----------|-------------|---------|
+| `name` | Yes | Skill name; English only, lowercase, hyphenated | `github-repo-analyzer` |
+| `description` | Yes | Skill function description; 1-3 sentences, include use case | `This skill analyzes GitHub repositories and extracts key metrics` |
+| `tags` | No | Skill tag list for categorization and search | `["code", "github", "analysis"]` |
+| `allowed-tools` | No | List of allowed tools (all available by default) | `[file_read, web_search]` |
+| `always` | No | Whether to auto-activate on every turn (default: false) | `false` |
+
+**Body (optional)**
+
+Below the frontmatter, you can write Markdown content including usage instructions, best practices, example code, and more.
+
+### Two Skill Types
+
+Based on their purpose, skills fall into two categories with different writing styles:
+
+**Tool Skills**: Used to expose tool capabilities. The body should include tool parameter descriptions, usage examples, return formats, and error handling.
+
+**Agent Skills**: Used to teach the agent how to perform a complex task. The body should include workflow instructions, domain knowledge, boundary conditions, and best practices.
+
+### config/schema.yaml: Defining Parameter Forms
+
+If a skill requires user-supplied parameters, create a `config/schema.yaml` file. The system will auto-generate a parameter configuration form in the frontend based on this file.
+
+```yaml
+param_name:
+ type: string | number | boolean | array | object
+ required: true | false
+ default:
+ description: "English description of the parameter"
+ description_zh: "Chinese description of the parameter"
+```
+
+**Supported types**: `string`, `number`, `boolean`, `array`, `object`
+
+**Complete example**:
+
+```yaml
+query:
+ type: string
+ required: true
+ description: "Search query string"
+ description_zh: "Search keyword"
+ default: ""
+
+top_k:
+ type: number
+ required: false
+ description: "Number of results to return"
+ description_zh: "Number of returned results"
+ default: 3
+
+enable_rerank:
+ type: boolean
+ required: false
+ description: "Enable result reranking"
+ description_zh: "Whether to enable result reranking"
+ default: false
+```
+
+### config/config.yaml: Setting Parameter Defaults
+
+If you want certain parameters to have default values, create `config/config.yaml`:
+
+```yaml
+# Initial workspace path
+init_path: "/mnt/nexent"
+
+# Maximum number of results
+top_k: 5
+```
+
+### Special Tags
+
+You can use the following special tags in the SKILL.md body:
+
+#### ``: Lazy-loading Example Files
+
+Use the `` tag to reference external files. The referenced file is loaded only when needed, keeping the main `SKILL.md` file lightweight.
+
+```markdown
+## Example Reference
+
+> **Note**: Only load the reference example file when the default Usage examples cannot meet your needs.
+
+
+```
+
+#### ``: Declaring Bundled Scripts
+
+If the skill package contains Python or Shell scripts, declare them in `SKILL.md`:
+
+```markdown
+
+```
+
+#### ``: Displaying Executable Code Examples
+
+Use the `` tag to wrap executable code examples (usually Python code):
+
+```markdown
+
+result = run_skill_script(
+ "code-reviewer",
+ "scripts/analyze.py",
+ {"--target": "/path/to/file.py", "--verbose": True}
+)
+print(result)
+
+```
+
+### Helper Functions
+
+In agent skill bodies and examples, you can use the following functions:
+
+**`run_skill_script(skill_name, script_path, params)`**: Execute a script bundled in the skill package
+
+```python
+# Execute a Python script
+result = run_skill_script(
+ "code-reviewer",
+ "scripts/analyze.py",
+ {"--target": "/path/to/file.py"}
+)
+
+# Execute a Shell script
+result = run_skill_script(
+ "database-migration",
+ "scripts/migrate.sh",
+ {"--direction": "up", "--steps": 1}
+)
+```
+
+**`read_skill_md(skill_name, files)`**: Read files from the skill package
+
+```python
+# By default, only reads SKILL.md (referenced files are not auto-included)
+content = read_skill_md("my-skill")
+
+# Explicitly specify which files to read
+full_content = read_skill_md("my-skill", [
+ "SKILL.md",
+ "reference/api-reference.md"
+])
+```
+
+### Writing Standards and Best Practices
+
+**SKILL.md Writing Standards**:
+
+1. **Be specific**: Explain when to use the skill, not just what it does
+ - ✓ "Used when you need to analyze GitHub repository popularity metrics"
+ - ✗ "GitHub search function"
+
+2. **Avoid time-sensitive information**: Do not include specific dates, version numbers, or other content that will become outdated
+
+3. **Stay concise**: Keep the `SKILL.md` body under 500 lines. Use `` for complex content that can be lazy-loaded
+
+4. **Path format**: Always use forward slashes `/`, even on Windows
+ - ✓ `src/services/payment_service.py`
+ - ✗ `src\services\payment_service.py`
+
+5. **Consistent parameter naming**: Use the same terminology and naming style throughout
+
+6. **Include boundary conditions**: Explain the skill's scope and limitations
+
+**Parameter Description Best Practices**:
+
+```yaml
+# ✓ Good: Clearly specify purpose and format
+query:
+ type: string
+ required: true
+ description: "GitHub repository owner/name or full URL"
+ description_zh: "GitHub repository in owner/name format or full URL"
+
+# ✗ Bad: Too vague
+query:
+ type: string
+ required: true
+ description: "Search query"
+ description_zh: "Query"
+```
+
+**Code Example Best Practices**:
+
+- Provide at least 2 different-scenario examples for each tool
+- Include common parameter combinations in examples
+- Demonstrate both successful calls and common error handling
+
+### Learning from Existing Skills
+
+The system includes several complete skill reference examples in `test_skill_examples/official-skills/`:
+
+| Skill Name | Reference Value |
+|-----------|-----------------|
+| `create-file-directory` | Standard writing for tool skills, with complete parameter tables, usage examples, and error handling tables |
+| `search-knowledge-base` | Parameter configuration for search skills, with complete `schema.yaml` and `config.yaml` examples |
+| `analyze-image` | Multimodal tool example with `` call format |
+| `code_review_expert` | Agent skill reference with bundled scripts and `` tag usage |
+
+### FAQ
+
+**Q: Upload reports "SKILL.md not found"**
+
+Make sure the `SKILL.md` file is in the ZIP package's root directory, not inside a subfolder.
+
+**Q: Parameter form didn't generate correctly**
+
+Check that `config/schema.yaml` is formatted correctly. Ensure each field has both `type` and `description` fields.
+
+**Q: Skill description isn't taking effect**
+
+The skill description should be written in the YAML frontmatter's `description` field, not in the Markdown body section. Body content is not parsed as the skill description.
+
+## NL-to-Skill
+
+NL-to-Skill is an intelligent creation feature provided by Nexent. You simply describe a skill requirement in natural language, and the system automatically generates a complete skill package — including skill definition, parameter configuration, and even accompanying script code. The entire generation process is visible in real time, as if an AI assistant is writing code for you.
+
+In simple terms:
+
+> You say "I want a skill that can search GitHub repositories and extract Star counts," and the system automatically generates a complete, usable skill for you.
+
+### Quick Start
+
+#### Step 1: Describe Your Requirement
+
+In the input box, describe the skill you want in natural language. The clearer your description, the better the generated result.
+
+**Good examples**:
+- "Create a skill that searches GitHub repositories by keywords and returns Star counts, descriptions, and links"
+- "Create a skill that reads an Excel file, calculates statistics for each column, and generates a chart"
+- "Create a skill that extracts order numbers, amounts, and dates from emails and compiles them into a table"
+
+**Bad examples**:
+- "Help me make a chat skill" (too vague)
+- "Search tool" (lacks specific capability description)
+
+#### Step 2: Watch the Generation Process
+
+After clicking "Generate," the page displays the AI's thinking and writing process in real time:
+- See the AI analyzing your requirement
+- See it writing the skill definition file
+- See it planning the parameter structure
+
+This process is like watching AI write code live. You can click "Stop" at any time to interrupt.
+
+#### Step 3: Preview and Save
+
+After generation completes, the system displays the complete skill content:
+- Skill name and description
+- Parameter list (what each parameter is, whether required)
+- Usage examples
+
+Check the preview carefully:
+- To make adjustments, click "Edit" to fine-tune
+- If it meets your expectations, click "Save" to add the skill to your skill library
+
+### Writing Tips
+
+#### How to Write a Good Skill Description
+
+**1. Clarify inputs and outputs**
+
+Tell the system what information the skill needs and what it will return.
+
+```
+✓ "Input a GitHub repository address; return the repository name, Star count, Fork count, and last update time"
+✗ "Search GitHub" (too vague)
+```
+
+**2. Explain the use case**
+
+Help the AI understand in what situations this skill would be used.
+
+```
+✓ "Used to quickly query the popularity of open-source projects and assist with technical selection decisions"
+✗ "Get data" (no context)
+```
+
+**3. Describe boundary conditions**
+
+If there are special processing logic or limitations, mention them.
+
+```
+✓ "If the repository doesn't exist, return a friendly message instead of an error"
+✓ "Skip invalid image URLs and log them"
+```
+
+**4. Explicitly request examples**
+
+If the skill has complex usage scenarios with high accuracy requirements, explicitly request detailed examples.
+
+```
+✓ "Generate comprehensive and detailed usage examples"
+```
+
+#### Usage Scenario Examples
+
+| Scenario | Description Example |
+|---------|-------------------|
+| **Data collection** | "Search Zhihu for Q&A related to the keywords and extract summaries of the highest-liked answers" |
+| **File processing** | "Upload a CSV file; automatically calculate statistics for each column and generate a line chart" |
+| **API encapsulation** | "Create a skill that calls a weather API and returns a three-day forecast" |
+| **Multi-tool combination** | "Input a product link; automatically compare prices (calling multiple e-commerce searches) and return the lowest-price link" |
+| **Data cleaning** | "Read a messy text block; extract emails, phone numbers, and dates, and format the output" |
+
+### What You Can Do During Generation
+
+#### Real-time Preview
+
+During generation, skill content progressively appears in the preview area:
+- `SKILL.md` content: skill definition, description, tags
+- `examples.md`: skill usage examples
+- `scripts/*.py`: tool scripts (in complex mode)
+
+#### Stop Anytime
+
+If the generation direction deviates from expectations:
+- Click the "Stop" button; the AI immediately stops
+- Existing generated results are preserved; you can review or discard them
+
+#### Multiple Attempts
+
+If the first generation result is unsatisfactory:
+- Directly add more requirement details; modify based on the existing result
+- Or manually adjust in the preview
+- If you want to start completely fresh, click the "trash" icon in the upper right corner to clear all skill content
+
+### Limitations and Notes
+
+#### Model Capability Affects Quality
+
+NL-to-Skill uses the LLM model configured for your tenant to generate skills. The model's capability directly determines the generation quality:
+- Smarter models accurately understand requirements and generate well-structured, easy-to-understand skills
+- Weaker models may produce incomplete or misleading content, affecting agent efficiency and accuracy
+
+If the generation result is unsatisfactory, try:
+1. Simplify the requirement description
+2. Switch to a smarter, more capable model
+3. Create in steps (make a simple version first, then manually expand)
+
+#### Token Consumption
+
+Complex skill generation consumes more tokens:
+- **Simple mode**: Usually consumes less; suitable for quick validation
+- **Complex mode**: Consumes more; suitable for formally creating complete skills
+
+It is recommended to first test the idea in simple mode, then use complex mode for formal creation after confirming feasibility.
+
+#### Not All Requirements Can Be Realized
+
+NL-to-Skill excels at generating skills for:
+- Single tool wrapping (e.g., encapsulating a search capability)
+- Simple multi-tool chaining (e.g., search → read → summarize)
+- Common data processing flows (e.g., file format conversion, data extraction)
+
+The following types of skills may be beyond its capabilities:
+- Requiring external APIs that are not integrated
+- Involving complex state management or concurrency logic
+- Requiring access to underlying platform interfaces that are not open
+
+When encountering requirements that cannot be fulfilled, the system will provide a prompt. You can consider creating manually or contacting technical support.
+
+#### Modifying Skills
+
+In the NL-to-Skill interface, you can select an existing skill. After selecting, the skill information loads automatically. You can then use natural language to attempt updating the skill in the left dialog.
+
+If the skill name you create conflicts with an existing skill, Nexent will automatically switch from skill creation mode to skill update mode. All content will overwrite the original skill.
+
+## Official Skills Overview
+
+### File Operations
+
+| Skill Name | Description | Main Tools |
+|-----------|-------------|------------|
+| `read-file` | Read file content and metadata within the workspace | `read_file` |
+| `create-file-directory` | Create files or directories | `create_file`, `create_directory` |
+| `delete-file-directory` | Delete files or directories (irreversible) | `delete_file`, `delete_directory` |
+| `move-file-directory` | Move or rename files/directories | `move_item` |
+| `list-directory` | List directory structure in a tree view | `list_directory` |
+
+### Knowledge Base Search
+
+| Skill Name | Description | Main Tools |
+|-----------|-------------|------------|
+| `search-knowledge-base` | Local knowledge base semantic search | `knowledge_base_search` |
+| `search-dify` | Dify knowledge base search (supports semantic / keyword / full_text / hybrid modes) | `dify_search` |
+| `search-idata` | iData knowledge base search | `idata_search` |
+| `search-datamate` | DataMate knowledge base search (with similarity threshold control) | `datamate_search` |
+
+### Web Search
+
+| Skill Name | Description | Main Tools |
+|-----------|-------------|------------|
+| `search-web-tavily` | Tavily real-time web search | `tavily_search` |
+| `search-web-linkup` | Linkup image and text mixed search | `linkup_search` |
+| `search-web-exa` | Exa deep web search | `exa_search` |
+
+### Multimodal Analysis
+
+| Skill Name | Description | Main Tools |
+|-----------|-------------|------------|
+| `analyze-image` | VLM-based image content analysis and Q&A | `analyze_image` |
+| `analyze-text-file` | PDF/Word/Excel file content extraction and Q&A | `analyze_text_file` |
+
+### Communication and Remote Operations
+
+| Skill Name | Description | Main Tools |
+|-----------|-------------|------------|
+| `email-utils` | IMAP receive / SMTP send (supports HTML / CC / BCC) | `get_email`, `send_email` |
+| `run-shell-ssh` | Persistent SSH session for remote command execution | `terminal` |
+
+## Security and Best Practices
+
+- **Knowledge base access control**: When importing skills containing knowledge base tools, actual search scope is limited by the current user's permissions
+- **Web search**: Tavily / Linkup / Exa web search requires the corresponding API Key to be configured in the platform security settings first
+- **Path security**: File operations within skill packages are limited to the skill directory scope and cannot access arbitrary system paths
+- **Irreversible operations**: Delete and move operations are irreversible; confirm the target before executing
+- **NL-to-Skill Token consumption**: Complex skill generation consumes more model tokens; it is recommended to test in simple mode first
+
+## Related References
+
+- [Agent Development](./agent-development)
+- [Local Tools Overview](./local-tools/index)
+- [MCP Tool Configuration](./mcp-tools)
+- [Skills System Overview](../backend/skills/overview)
diff --git a/doc/docs/en/user-guide/start-chat.md b/doc/docs/en/user-guide/start-chat.md
index 9593cb6ec..5834521ea 100644
--- a/doc/docs/en/user-guide/start-chat.md
+++ b/doc/docs/en/user-guide/start-chat.md
@@ -79,8 +79,8 @@ You can upload files during a chat so the agent can reason over their content:
- Or drag files directly into the chat area
2. **Supported File Formats**
- - **Documents:** PDF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx)
- - **Text:** Markdown (.md), Plain text (.txt)
+ - **Documents:** PDF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx), EPUB (.epub), HTML (.html), XML (.xml)
+ - **Text & Data:** Markdown (.md), Plain text (.txt), JSON (.json), CSV (.csv)
- **Images:** JPG, PNG, GIF, and other common formats
3. **File Processing Flow**
diff --git a/doc/docs/zh/backend/skills/index.md b/doc/docs/zh/backend/skills/index.md
new file mode 100644
index 000000000..10b37bc90
--- /dev/null
+++ b/doc/docs/zh/backend/skills/index.md
@@ -0,0 +1,37 @@
+# 后端技能(Skill)文档
+
+本节介绍 Nexent 后端基础设施中 Skills 技能系统的完整生态,包括技能定义、技能包结构与系统架构。
+
+## 可用文档
+
+### 概览与架构
+- [技能系统概览](./overview):技能类型、生命周期与版本管理
+
+## 技能与工具的关系
+
+在 Nexent 中,**工具(Tool)** 与 **技能(Skill)** 是两个不同层次的概念:
+
+- **工具**:智能体可调用的单个原子操作。启用后,LLM 的每次思考都会在工具列表中搜索——即使本次对话完全不需要某个工具,LLM 仍然会消耗上下文额度。
+- **技能**:通过 `SKILL.md` 将多个工具的能力组合为一个完整的工作流,并附带参数配置与使用文档。LLM 根据用户实际需求自行判断是否激活技能,激活后才加载对应工具集——有效节省 Token 消耗。
+
+## 快速开始
+
+1. **了解能力**:阅读 [技能系统概览](./overview) 了解已支持的技能类型
+2. **体验创建**:在 [技能管理](../../user-guide/skills) 页面体验 NL-to-Skill 创建
+3. **手动创建**:上传 `SKILL.md` 或 ZIP 包创建自定义技能
+4. **为智能体配置**:在智能体工具配置中勾选技能
+
+## 相关参考
+
+- [技能管理(用户指南)](../../user-guide/skills)
+- [智能体开发指南](../../user-guide/agent-development)
+- [本地工具概览](../../user-guide/local-tools/index)
+- [SDK 工具开发规范](../../sdk/core/tools)
+- [MCP 工具开发](../tools/mcp)
+- [常见问题](../../quick-start/faq)
+
+## 获取帮助
+
+- 查看 [常见问题](../../quick-start/faq) 了解常见技能使用问题
+- 在 [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions) 中提问
+- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题
diff --git a/doc/docs/zh/backend/skills/overview.md b/doc/docs/zh/backend/skills/overview.md
new file mode 100644
index 000000000..f3d866f78
--- /dev/null
+++ b/doc/docs/zh/backend/skills/overview.md
@@ -0,0 +1,138 @@
+# 技能系统概览
+
+技能(Skill)是 Nexent 为智能体扩展能力的方式。每个技能由以下部分组成:
+
+- **技能描述**:这个技能是做什么的、什么时候该用它
+- **工具组合**:一个或多个 nexent sdk方法或用户自定义工具的打包
+- **参数模板**:用户可为技能填写哪些参数
+- **使用示例**:这个技能通常怎么用
+
+与直接选择一个一个工具相比,技能让复杂能力的配置变得简单——只需安装一个技能包,无需分别配置每个工具。
+
+## 技能包结构
+
+技能包可以是单个 `SKILL.md` 文件,也可以是包含多个文件的 ZIP 包:
+
+```
+skill-name/
+├── SKILL.md # 技能定义文件(必需)
+├── config/
+│ ├── config.yaml # 参数默认值(可选)
+│ └── schema.yaml # 参数类型与说明(可选)
+├── scripts/
+│ └── *.py # Python 脚本(可选)
+├── examples.md # 使用示例(可选)
+└── assets/ # 静态资源(可选)
+```
+
+### SKILL.md 的结构
+
+每个技能必须有一个 `SKILL.md` 文件,分为两部分:
+
+**第一部分:YAML 元数据(必须)**
+
+```yaml
+---
+name: skill-name
+description: |
+ 一段描述,说明这个技能是做什么的、什么时候该用它。
+ 建议用第三人称书写,如:"这个技能用于..."
+tags:
+ - tag1
+ - tag2
+---
+```
+
+**第二部分:技能正文**
+
+元数据下方可以继续写 Markdown 内容,包括:
+- 技能的详细说明与使用指南
+- 工具调用方式的示例代码
+- 错误处理说明
+- 使用限制与注意事项
+
+### 两种技能类型
+
+根据用途,技能分为两类:
+
+**工具类技能**:用于暴露一个或多个 Nexent sdk方法的能力,包含工具的参数说明、调用示例、返回格式、错误处理等。用户配置好参数后,智能体即可调用这些工具。
+
+**智能体类技能**:用于教智能体如何执行一个复杂任务,包含工作流程说明、领域知识、最佳实践,有时附带辅助脚本。这类技能的正文会包含详细的步骤指引。
+
+## 官方技能一览
+
+### 文件操作类
+
+| 技能名称 | 能力说明 |
+|---------|---------|
+| `read-file` | 读取工作空间内文件内容与元信息 |
+| `create-file-directory` | 创建文件或目录 |
+| `delete-file-directory` | 删除文件或目录 |
+| `move-file-directory` | 移动或重命名文件/目录 |
+| `list-directory` | 树形列出目录结构 |
+
+### 知识库搜索类
+
+| 技能名称 | 能力说明 |
+|---------|---------|
+| `search-knowledge-base` | 本地知识库语义检索(支持 hybrid / accurate / semantic 模式) |
+| `search-dify` | Dify 知识库检索 |
+| `search-idata` | iData 知识库检索 |
+| `search-datamate` | DataMate 知识库检索(支持相似度阈值控制) |
+
+### 公网搜索类
+
+| 技能名称 | 能力说明 |
+|---------|---------|
+| `search-web-tavily` | Tavily 公网实时搜索 |
+| `search-web-linkup` | Linkup 图文混合搜索 |
+| `search-web-exa` | Exa 深度网页搜索 |
+
+### 多模态分析类
+
+| 技能名称 | 能力说明 |
+|---------|---------|
+| `analyze-image` | 基于 VLM 的图片内容分析问答 |
+| `analyze-text-file` | PDF/Word/Excel 等文件内容提取与问答 |
+
+### 通信与远程操作类
+
+| 技能名称 | 能力说明 |
+|---------|---------|
+| `email-utils` | IMAP 收件 / SMTP 发件(支持 HTML / CC / BCC) |
+| `run-shell-ssh` | 持久化 SSH 会话远程执行命令 |
+
+## 技能生命周期
+
+### 版本管理
+
+每个技能支持两个版本状态:
+
+- **草稿版本(version=0)**:开发调试阶段,修改即时生效,适合反复调整
+- **已发布版本(version>=1)**:正式使用,参数锁定,防止误改
+
+### 技能实例
+
+同一个技能可以为不同的智能体配置不同的参数值,互不影响。
+
+例如,搜索技能可以为"技术文档 Agent"配置只搜索技术知识库,为"客服 Agent"配置只搜索客服知识库。
+
+### 常见操作流程
+
+```
+创建技能 → 配置参数 → 为智能体选择技能 → 调试 → 发布
+ ↓
+ 修改草稿版本
+```
+
+## 安全说明
+
+- **路径隔离**:技能包内文件仅能在技能目录范围内访问
+- **参数校验**:schema.yaml 中定义的参数均经过前端表单校验
+- **权限控制**:技能实例按租户隔离,API 需携带认证 Token
+
+## 相关参考
+
+- [技能管理(用户指南)](../../user-guide/skills)
+- [智能体开发指南](../../user-guide/agent-development)
+- [本地工具概览](../../user-guide/local-tools/index)
diff --git a/doc/docs/zh/backend/tools/index.md b/doc/docs/zh/backend/tools/index.md
index 94e1fe36e..88560fdcf 100644
--- a/doc/docs/zh/backend/tools/index.md
+++ b/doc/docs/zh/backend/tools/index.md
@@ -12,6 +12,10 @@
模型上下文协议工具,用于标准化 AI 智能体通信。
→ [MCP 工具开发](./mcp)
+### Skills 技能系统
+通过自然语言或 ZIP 包创建可复用的技能包,为智能体赋予更加灵活的工具调用能力。
+→ [Skills 技能文档](../skills/index)
+
## 快速开始
1. **选择工具类型**: LangChain 用于通用 AI 工作流,MCP 用于标准化智能体通信
@@ -28,4 +32,4 @@
- 查看我们的 [常见问题](../../quick-start/faq) 了解常见工具集成问题
- 加入我们的 [Discord 社区](https://discord.gg/tb5H3S3wyv) 获取实时支持
-- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题
\ No newline at end of file
+- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题
diff --git a/doc/docs/zh/getting-started/features.md b/doc/docs/zh/getting-started/features.md
index 15db67357..658a89e18 100644
--- a/doc/docs/zh/getting-started/features.md
+++ b/doc/docs/zh/getting-started/features.md
@@ -25,7 +25,7 @@ Nexent 支持 **Agent-to-Agent(A2A)** 通信协议,让多个智能体能
## 📝 Skill 渐进式披露
-Nexent 引入了 **渐进式 Skill 披露**机制。当用户输入任务时,系统会根据当前上下文动态揭示最相关的 Skill 建议,帮助用户快速找到适合当前任务的工具和方法。这一机制让新用户能够渐进式地探索系统能力,同时不增加高级用户的操作复杂度。
+Nexent 引入了 **渐进式 Skill 披露**机制。当用户输入任务时,系统会根据当前上下文动态揭示最相关的 Skill 建议,帮助用户快速找到适合当前任务的工具和方法。这一机制能够防止上下文爆炸,高效利用上下文窗口。
## 🗄️ 个人级知识库
diff --git a/doc/docs/zh/sdk/data-process.md b/doc/docs/zh/sdk/data-process.md
index a887c8442..1f1c27fde 100644
--- a/doc/docs/zh/sdk/data-process.md
+++ b/doc/docs/zh/sdk/data-process.md
@@ -98,6 +98,9 @@ def file_process(self,
- `.odt` - OpenDocument文本
- `.pptx` - PowerPoint 2007及更高版本
- `.ppt` - PowerPoint 97-2003版本
+- `.xml` - XML数据文件
+- `.json` - JSON数据文件
+- `.csv` - 逗号分隔值文件
## 💡 使用示例
diff --git a/doc/docs/zh/user-guide/agent-development.md b/doc/docs/zh/user-guide/agent-development.md
index 67d3c8311..a8cca4a33 100644
--- a/doc/docs/zh/user-guide/agent-development.md
+++ b/doc/docs/zh/user-guide/agent-development.md
@@ -31,15 +31,88 @@
### 🤝 协作 Agent
+协作智能体用于帮助当前智能体完成复杂任务。协作智能体的来源分为两类:
+
+- **内部 Agent**:平台已发布的智能体
+- **外部 A2A Agent**:通过 A2A 协议发现的第三方 Agent
+
1. 点击"协作 Agent"页签下的加号,弹出可选择的智能体列表
-2. 在下拉列表中选择要添加的智能体
-3. 允许选择多个协作智能体
-4. 可点击 × 取消选择此智能体
+2. 智能体列表分为"内部 Agent"和"外部 A2A Agent"两个页签,您可以根据需要选择
+3. 在下拉列表中选择要添加的智能体
+4. 允许选择多个协作智能体
+5. 可点击 × 取消选择此智能体
+
+