diff --git a/.github/workflows/auto-build-data-process-dev.yml b/.github/workflows/auto-build-data-process-dev.yml index c9885170e..6be8bf638 100644 --- a/.github/workflows/auto-build-data-process-dev.yml +++ b/.github/workflows/auto-build-data-process-dev.yml @@ -7,14 +7,14 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'make/data_process/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' diff --git a/.github/workflows/auto-build-doc-dev.yml b/.github/workflows/auto-build-doc-dev.yml index 697aa0204..7c2cd46d7 100644 --- a/.github/workflows/auto-build-doc-dev.yml +++ b/.github/workflows/auto-build-doc-dev.yml @@ -7,12 +7,12 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'doc/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'doc/**' - '.github/workflows/**' diff --git a/.github/workflows/auto-build-main-dev.yml b/.github/workflows/auto-build-main-dev.yml index dbd69ac12..2815c50df 100644 --- a/.github/workflows/auto-build-main-dev.yml +++ b/.github/workflows/auto-build-main-dev.yml @@ -7,14 +7,14 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'make/main/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' diff --git a/.github/workflows/auto-build-mcp-dev.yml b/.github/workflows/auto-build-mcp-dev.yml index dacf04749..03aea08b2 100644 --- a/.github/workflows/auto-build-mcp-dev.yml +++ b/.github/workflows/auto-build-mcp-dev.yml @@ -7,14 +7,14 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'make/mcp/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' diff --git a/.github/workflows/auto-build-terminal-dev.yml b/.github/workflows/auto-build-terminal-dev.yml index fbc251edb..62fc20165 100644 --- a/.github/workflows/auto-build-terminal-dev.yml +++ b/.github/workflows/auto-build-terminal-dev.yml @@ -7,12 +7,12 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'make/terminal/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'make/terminal/**' - '.github/workflows/**' diff --git a/.github/workflows/auto-build-web-dev.yml b/.github/workflows/auto-build-web-dev.yml index 28f967894..a5abeb0b3 100644 --- a/.github/workflows/auto-build-web-dev.yml +++ b/.github/workflows/auto-build-web-dev.yml @@ -7,13 +7,13 @@ concurrency: on: workflow_dispatch: pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - 'make/web/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - 'make/web/**' diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml index 1e853dd25..1595fc769 100644 --- a/.github/workflows/auto-unit-test.yml +++ b/.github/workflows/auto-unit-test.yml @@ -12,14 +12,14 @@ on: required: false default: '["ubuntu-24.04-arm"]' pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' - 'test/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'backend/**' - 'sdk/**' diff --git a/.github/workflows/auto-web-check-dev.yml b/.github/workflows/auto-web-check-dev.yml index cd107b6e5..ae831a3fb 100644 --- a/.github/workflows/auto-web-check-dev.yml +++ b/.github/workflows/auto-web-check-dev.yml @@ -11,12 +11,12 @@ on: description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])' default: '["ubuntu-latest"]' pull_request: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - '.github/workflows/**' push: - branches: [develop] + branches: [develop, 'release/**', 'hotfix/**'] paths: - 'frontend/**' - '.github/workflows/**' diff --git a/.github/workflows/docker-build-push-mainland.yml b/.github/workflows/docker-build-push-mainland.yml index 1aa41b560..8c215c7ec 100644 --- a/.github/workflows/docker-build-push-mainland.yml +++ b/.github/workflows/docker-build-push-mainland.yml @@ -16,10 +16,15 @@ on: description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])' required: true default: '["ubuntu-latest"]' + push: + branches: + - main + tags: + - 'v*' jobs: build-and-push-main-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -32,20 +37,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push main image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag main image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 - name: Push latest main image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 build-and-push-main-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -58,20 +63,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push main image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag main image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64 - name: Push latest main image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64 build-and-push-data-process-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -93,20 +98,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push data process image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag data process image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 - name: Push latest data process image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 build-and-push-data-process-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -128,20 +133,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push data process image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag data process image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64 - name: Push latest data process image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64 build-and-push-web-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -154,20 +159,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push web image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag web image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 - name: Push latest web image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 build-and-push-web-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -180,20 +185,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push web image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag web image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64 - name: Push latest web image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64 build-and-push-terminal-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -206,20 +211,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 -f make/terminal/Dockerfile . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/terminal/Dockerfile . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push terminal image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag terminal image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 - name: Push latest terminal image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 build-and-push-terminal-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -232,20 +237,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 -f make/terminal/Dockerfile . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/terminal/Dockerfile . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push terminal image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag terminal image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64 - name: Push latest terminal image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64 build-and-push-mcp-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -258,20 +263,20 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push MCP image (amd64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag MCP image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 - name: Push latest MCP image (amd64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 build-and-push-mcp-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -284,16 +289,16 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . + docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua . - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Push MCP image (arm64) to Tencent Cloud - run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 + run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag MCP image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64 - name: Push latest MCP image (arm64) to Tencent Cloud - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64 manifest-push-main: @@ -305,13 +310,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for main (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for main (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 \ @@ -327,13 +333,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for data-process (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for data-process (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 \ @@ -349,13 +356,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for web (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for web (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 \ @@ -371,13 +379,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for terminal (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for terminal (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 \ @@ -393,13 +402,14 @@ jobs: - name: Login to Tencent Cloud run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin - name: Create and push manifest for mcp (Tencent Cloud) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }} \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 \ - ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 - docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }} + docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for mcp (Tencent Cloud) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:latest \ ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 \ diff --git a/.github/workflows/docker-build-push-overseas.yml b/.github/workflows/docker-build-push-overseas.yml index d19c2600a..dcbe9d642 100644 --- a/.github/workflows/docker-build-push-overseas.yml +++ b/.github/workflows/docker-build-push-overseas.yml @@ -16,10 +16,15 @@ on: description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])' required: true default: '["ubuntu-latest"]' + push: + branches: + - main + tags: + - 'v*' jobs: build-and-push-main-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -32,20 +37,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent:${{ inputs.version }}-amd64 --load -f make/main/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/main/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push main image (amd64) to DockerHub - run: docker push nexent/nexent:${{ inputs.version }}-amd64 + run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag main image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent:${{ inputs.version }}-amd64 nexent/nexent:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent:amd64 - name: Push latest main image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent:amd64 build-and-push-main-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -58,20 +63,20 @@ jobs: uses: actions/checkout@v4 - name: Build main image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent:${{ inputs.version }}-arm64 --load -f make/main/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/main/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push main image (arm64) to DockerHub - run: docker push nexent/nexent:${{ inputs.version }}-arm64 + run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag main image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent:${{ inputs.version }}-arm64 nexent/nexent:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent:arm64 - name: Push latest main image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent:arm64 build-and-push-data-process-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -93,20 +98,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ inputs.version }}-amd64 --load -f make/data_process/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/data_process/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push data process image (amd64) to DockerHub - run: docker push nexent/nexent-data-process:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag data process image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-data-process:${{ inputs.version }}-amd64 nexent/nexent-data-process:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-data-process:amd64 - name: Push latest data process image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-data-process:amd64 build-and-push-data-process-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Free up disk space on GitHub runner run: | @@ -128,20 +133,20 @@ jobs: rm -rf .git .gitattributes - name: Build data process image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ inputs.version }}-arm64 --load -f make/data_process/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/data_process/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push data process image (arm64) to DockerHub - run: docker push nexent/nexent-data-process:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag data process image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-data-process:${{ inputs.version }}-arm64 nexent/nexent-data-process:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-data-process:arm64 - name: Push latest data process image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-data-process:arm64 build-and-push-web-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -154,20 +159,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ inputs.version }}-amd64 --load -f make/web/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/web/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push web image (amd64) to DockerHub - run: docker push nexent/nexent-web:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag web image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-web:${{ inputs.version }}-amd64 nexent/nexent-web:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-web:amd64 - name: Push latest web image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-web:amd64 build-and-push-web-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -180,20 +185,20 @@ jobs: uses: actions/checkout@v4 - name: Build web image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ inputs.version }}-arm64 --load -f make/web/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/web/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push web image (arm64) to DockerHub - run: docker push nexent/nexent-web:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag web image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-web:${{ inputs.version }}-arm64 nexent/nexent-web:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-web:arm64 - name: Push latest web image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-web:arm64 build-and-push-terminal-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -206,20 +211,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 --load -f make/terminal/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/terminal/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push terminal image (amd64) to DockerHub - run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag terminal image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 nexent/nexent-ubuntu-terminal:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-ubuntu-terminal:amd64 - name: Push latest terminal image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-ubuntu-terminal:amd64 build-and-push-terminal-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -232,20 +237,20 @@ jobs: uses: actions/checkout@v4 - name: Build terminal image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 --load -f make/terminal/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/terminal/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push terminal image (arm64) to DockerHub - run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag terminal image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 nexent/nexent-ubuntu-terminal:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-ubuntu-terminal:arm64 - name: Push latest terminal image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-ubuntu-terminal:arm64 build-and-push-mcp-amd64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -258,20 +263,20 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (amd64) and load locally run: | - docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ inputs.version }}-amd64 --load -f make/mcp/Dockerfile . + docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/mcp/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push MCP image (amd64) to DockerHub - run: docker push nexent/nexent-mcp:${{ inputs.version }}-amd64 + run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 - name: Tag MCP image (amd64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-mcp:${{ inputs.version }}-amd64 nexent/nexent-mcp:amd64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-mcp:amd64 - name: Push latest MCP image (amd64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-mcp:amd64 build-and-push-mcp-arm64: - runs-on: ${{ fromJson(inputs.runner_label_json) }} + runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }} steps: - name: Set up Docker Buildx run: | @@ -284,16 +289,16 @@ jobs: uses: actions/checkout@v4 - name: Build MCP image (arm64) and load locally run: | - docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ inputs.version }}-arm64 --load -f make/mcp/Dockerfile . + docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/mcp/Dockerfile . - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Push MCP image (arm64) to DockerHub - run: docker push nexent/nexent-mcp:${{ inputs.version }}-arm64 + run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 - name: Tag MCP image (arm64) as latest - if: inputs.push_latest == 'true' - run: docker tag nexent/nexent-mcp:${{ inputs.version }}-arm64 nexent/nexent-mcp:arm64 + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') + run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-mcp:arm64 - name: Push latest MCP image (arm64) to DockerHub - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: docker push nexent/nexent-mcp:arm64 manifest-push-main: @@ -305,13 +310,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for main (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent:${{ inputs.version }} \ - nexent/nexent:${{ inputs.version }}-amd64 \ - nexent/nexent:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent:${{ inputs.version }} + docker manifest create nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for main (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent:latest \ nexent/nexent:amd64 \ @@ -327,13 +333,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for data-process (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-data-process:${{ inputs.version }} \ - nexent/nexent-data-process:${{ inputs.version }}-amd64 \ - nexent/nexent-data-process:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-data-process:${{ inputs.version }} + docker manifest create nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for data-process (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-data-process:latest \ nexent/nexent-data-process:amd64 \ @@ -349,13 +356,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for web (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-web:${{ inputs.version }} \ - nexent/nexent-web:${{ inputs.version }}-amd64 \ - nexent/nexent-web:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-web:${{ inputs.version }} + docker manifest create nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for web (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-web:latest \ nexent/nexent-web:amd64 \ @@ -371,13 +379,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for terminal (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-ubuntu-terminal:${{ inputs.version }} \ - nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \ - nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-ubuntu-terminal:${{ inputs.version }} + docker manifest create nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for terminal (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-ubuntu-terminal:latest \ nexent/nexent-ubuntu-terminal:amd64 \ @@ -393,13 +402,14 @@ jobs: - name: Login to DockerHub run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin - name: Create and push manifest for mcp (DockerHub) + if: github.event_name != 'push' || github.ref != 'refs/heads/main' run: | - docker manifest create nexent/nexent-mcp:${{ inputs.version }} \ - nexent/nexent-mcp:${{ inputs.version }}-amd64 \ - nexent/nexent-mcp:${{ inputs.version }}-arm64 - docker manifest push nexent/nexent-mcp:${{ inputs.version }} + docker manifest create nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \ + nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \ + nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 + docker manifest push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} - name: Create and push latest manifest for mcp (DockerHub) - if: inputs.push_latest == 'true' + if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main') run: | docker manifest create nexent/nexent-mcp:latest \ nexent/nexent-mcp:amd64 \ diff --git a/.gitignore b/.gitignore index d1b2af30b..20de73e8a 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,12 @@ model-assets/ .opencode/ openspec/ logs/ + +.devspace/ +devspace.yaml +k8s/helm/**/*.tgz +k8s/helm/nexent/Chart.lock + +MAC_DEVELOPMENT_GUIDE.md +# Mac本地开发数据持久化(无需提交) +data/ diff --git a/README.md b/README.md index 894cd1862..51eb0927b 100644 --- a/README.md +++ b/README.md @@ -11,111 +11,106 @@ Nexent is a zero-code platform for auto-generating production-grade AI agents, b > One prompt. Endless reach. -### 🌐 Visit our [official website](https://nexent.tech/) + -![Nexent Banner](./assets/architecture_en.png) +# 🚀 Get Started Now -https://github.com/user-attachments/assets/db6b7f5a-9ee8-4327-ae6f-c5af896126b4 +> ⭐ Before you get started, please star us on [GitHub](https://github.com/ModelEngine-Group/nexent) — your support drives us forward! -# ⚡ Have a try first +## Option 1: Try Our Official Demo -### 📋 Prerequisites +No installation required — jump right in with our **[online demo environment](http://60.204.251.153:3000/en)** to experience Nexent's capabilities instantly. -| Resource | Minimum | -|----------|---------| -| **CPU** | 2 cores | -| **RAM** | 6 GiB | -| **Software** | Docker & Docker Compose installed | +## Option 2: Deploy on Your Own -### 🛠️ Quick start with Docker Compose +If you need to run Nexent locally or in your private infrastructure, we offer two deployment options: -```bash -git clone https://github.com/ModelEngine-Group/nexent.git -cd nexent/docker -cp .env.example .env # fill only necessary configs -bash deploy.sh -``` - -When the containers are running, open **http://localhost:3000** in your browser and follow the setup wizard. - -# 🤝 Join Our Community - -> *If you want to go fast, go alone; if you want to go far, go together.* - -We have released **Nexent v1**, and the platform is now relatively stable. However, there may still be some bugs, and we are continuously improving and adding new features. Stay tuned: we will announce **v2.0** soon! - -* **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features. -* **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab. -* **🐛 Check our [Known Issues page](https://github.com/orgs/ModelEngine-Group/projects/9)** for the latest issue status and solutions. - -> *Rome wasn't built in a day.* - -If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us. - -Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life. +### System Requirements -Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing. - -## 💬 Community & contact - -- Browse the [Documentation](https://modelengine-group.github.io/nexent) for more information. -- Join our [Discord community](https://discord.gg/tb5H3S3wyv) to chat with other developers and get help! -- Conntact us by Wechat, find our QR Code in our [website](https://nexent.tech/en/contact) +| Resource | Docker | Kubernetes | +|----------|--------|-------------| +| **CPU** | 4 cores (min) / 8 cores (rec.) | 4 cores (min) / 8 cores (rec.) | +| **Memory** | 8 GiB (min) / 16 GiB (rec.) | 16 GiB (min) / 64 GiB (rec.) | +| **Disk** | 40 GiB (min) / 100 GiB (rec.) | 100 GiB (min) / 200 GiB (rec.) | +| **Architecture** | x86_64 / ARM64 | x86_64 / ARM64 | +| **Software** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ | -# ✨ Key Features +> **Note:** Recommended configurations ensure optimal performance in production environments. -`1` **Smart agent prompt generation** - Turn plain language into runnable prompts. Nexent automatically chooses the right tools and plans the best action path for every request. +### Docker Deployment (Recommended for Individuals/Small Teams) - ![Feature 1](./assets/Feature1.png) +Quick and straightforward for most users. Prerequisites: Docker 24+ and Docker Compose v2+: -`2` **Scalable data process engine** - Process 20+ data formats with fast OCR and table structure extraction, scaling smoothly from a single process to large-batch pipelines. - - ![Feature 2](./assets/Feature2.png) +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/docker +cp .env.example .env +bash deploy.sh +``` -`3` **Personal-grade knowledge base** - Import files in real time, auto-summarise them, and let agents access both personal and global knowledge instantly, also knowing what it can get from each knowledge base. +For detailed deployment instructions, see [Docker Installation](https://modelengine-group.github.io/nexent/en/quick-start/installation.html). - ![Feature 3](./assets/Feature3.png) +### Kubernetes Deployment (For Enterprise Production) -`4` **Internet knowledge search** - Connect to 5+ web search providers so agents can mix fresh internet facts with your private data. +Ideal for enterprise scenarios requiring high availability and elastic scaling. Prerequisites: Kubernetes 1.24+ and Helm 3+: - ![Feature 4](./assets/Feature4.png) +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/k8s/helm +./deploy-helm.sh apply +``` -`5` **Knowledge-level traceability** - Serve answers with precise citations from web and knowledge-base sources, making every fact verifiable. +For detailed deployment instructions, see [Kubernetes Installation](https://modelengine-group.github.io/nexent/en/quick-start/kubernetes-installation.html). - ![Feature 5](./assets/Feature5.png) +# ✨ Core Features -`6` **Multimodal understanding & dialogue** - Speak, type, files, or show images. Nexent understands voice, text, and pictures, and can even generate new images on demand. +Nexent provides a comprehensive feature set for building powerful AI agents: - ![Feature 6](./assets/Feature6.png) +| Feature | Description | +|---------|-------------| +| **⚙️ Multi-Model Integration** | OpenAI-compatible with any provider, full LLM/Embedding/VLM/STT/TTS coverage, supports domestic model switching | +| **🤖 Zero-Code Agent Generation** | Describe requirements in natural language, generate executable agents instantly, what you think is what you get | +| **🤝 A2A Agent Collaboration** | Agent-to-Agent protocol enables seamless multi-agent cooperation and distributed workflows | +| **🧠 Layered Memory Mechanism** | Two-tier memory (user-level + user-agent-level) for persistent context across conversations | +| **📝 Progressive Skill Disclosure** | Dynamically loads Skill into context, maximizing context window efficiency | +| **🗄️ Personal-Grade Knowledge Base** | Real-time import and intelligent retrieval for 20+ document formats, auto summaries, fine-grained access control | +| **🔧 MCP Tool Ecosystem** | Plug-and-play extension system with custom development and third-party MCP service support | +| **🌐 Internet Knowledge Integration** | Multi-source search blending real-time information with private data | +| **🔍 Knowledge-Level Traceability** | Precise citations and source verification, full transparency for every fact | +| **🎭 Multimodal Interaction** | Voice, text, images, files — comprehensive natural dialogue | +| **🔢 Agent Version Management** | Version iteration and history rollback, safe and controllable | +| **🏪 Agent Marketplace** | Official and community curated agents, one-click install and use | +| **👥 Multi-Tenancy & RBAC** | Multi-tenant isolation, role-based access control, fine-grained resource management | -`7` **MCP tool ecosystem** - Drop in or build Python plug-ins that follow the MCP spec; swap models, tools, and chains without touching core code. +# 🤝 Join Our Community - ![Feature 7](./assets/Feature7.png) +> *If you want to go fast, go alone; if you want to go far, go together.* -# 🌱 MCP Tool Ecosystem +We have released **Nexent v2.0**! A comprehensive upgrade from v1.0, featuring A2A protocol support, progressive Skill disclosure, layered memory mechanism, user management with multi-tenancy, agent version management, agent marketplace, and more. -Check our [MCP Ecosystem page](https://modelengine-group.github.io/nexent/en/mcp-ecosystem/overview.html) for detailed information about the MCP tool ecosystem, including community hubs, recommended tools, and integration guides. +- **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features. +- **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab. -# 🛠️ Developer Guide +> *Rome wasn't built in a day.* -### 🤖 Model Configuration & Provider Recommendations +If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us. -Check our [Model Providers page](https://modelengine-group.github.io/nexent/en/getting-started/model-providers.html) for detailed model configuration guides and recommended provider information. +Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life. -### 🔧 Hack on Nexent +Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing. -Want to build from source or add new features? Check the [Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing) for step-by-step instructions. +# 📖 What's Next -### 🛠️ Build from Source +Ready to dive deeper? Here are the main documentation entry points: -Prefer to run Nexent from source code? Follow our [Developer Guide](https://modelengine-group.github.io/nexent/en/getting-started/development-guide) for detailed setup instructions and customization options. +- **[Quick Start](https://modelengine-group.github.io/nexent/en/quick-start/installation.html)** — System requirements and deployment guide +- **[Core Features](https://modelengine-group.github.io/nexent/en/getting-started/features.html)** — Comprehensive feature documentation +- **[User Guide](https://modelengine-group.github.io/nexent/en/user-guide/home-page.html)** — Agent development and usage +- **[Developer Guide](https://modelengine-group.github.io/nexent/en/developer-guide/overview)** — Build from source and customization +- **[FAQ](https://modelengine-group.github.io/nexent/en/quick-start/faq.html)** — Common questions and troubleshooting # 📄 License diff --git a/README_CN.md b/README_CN.md index c16de5d32..032776418 100644 --- a/README_CN.md +++ b/README_CN.md @@ -11,111 +11,104 @@ Nexent 是一个基于 **Harness Engineering** 原则打造的零代码智能体 > 一个提示词,无限种可能。 -### 🌐 访问我们的[官方网站](https://nexent.tech/) + -![Nexent Banner](./assets/architecture_zh.png) +# 🚀 先来试试看 -https://github.com/user-attachments/assets/b844e05d-5277-4509-9463-1c5b3516f11e +> ⭐ 在您开始使用前,请您顺手在 [GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点个 Star,您的支持是我们前进的动力! -# ⚡ 先来试试看 +## 方式一:使用官方体验环境 -### 📋 系统要求 +无需安装,直接访问我们的 **[在线体验环境](http://60.204.251.153:3000/zh)**,快速体验 Nexent 的强大功能。 -| 资源 | 最低要求 | -|----------|---------| -| **CPU** | 2 核 | -| **内存** | 6 GiB | -| **软件** | 已安装 Docker 和 Docker Compose | +## 方式二:自行部署 -### 🛠️ 使用 Docker Compose 快速开始 +如果需要在本地或私有环境中部署 Nexent,我们提供两种部署方式: -```bash -git clone https://github.com/ModelEngine-Group/nexent.git -cd nexent/docker -cp .env.example .env # fill only necessary configs -bash deploy.sh -``` - -当容器运行后,在浏览器中打开 **http://localhost:3000** 并按照设置向导操作。 - -# 🤝 加入我们的社区 - -> *If you want to go fast, go alone; if you want to go far, go together.* - -我们已经发布了 **Nexent v1**,平台现在相对稳定。但是,可能仍然存在一些 bug,我们正在持续改进并添加新功能。敬请期待:我们很快将宣布 **v2.0**! - -* **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。 -* **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。 -* **🐛 查看我们的[已知问题页面](https://github.com/orgs/ModelEngine-Group/projects/9)** 了解最新的问题状态和解决方案。 - -> *Rome wasn't built in a day.* - -如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。 - -早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。 +### 系统要求 -最重要的是,我们需要关注度。请为仓库点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。 +| 资源 | Docker 部署 | Kubernetes 部署 | +|------|------------|----------------| +| **CPU** | 4 核(最低)/ 8 核(推荐) | 4 核(最低)/ 8 核(推荐) | +| **内存** | 8 GiB(最低)/ 16 GiB(推荐) | 16 GiB(最低)/ 64 GiB(推荐) | +| **磁盘** | 40 GiB(最低)/ 100 GiB(推荐) | 100 GiB(最低)/ 200 GiB(推荐) | +| **架构** | x86_64 / ARM64 | x86_64 / ARM64 | +| **软件** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ | -## 💬 社区与联系方式 +> **注意:** 推荐配置可确保生产环境下的最佳性能。 -- 浏览 [文档](https://modelengine-group.github.io/nexent) 了解更多信息。 -- 加入我们的 [Discord 社区](https://discord.gg/tb5H3S3wyv) 与其他开发者交流并获取帮助! -- 通过微信联系我们,在我们的[网站](https://nexent.tech/zh/contact)找到二维码 +### Docker 部署(推荐个人/小团队使用) -# ✨ 主要特性 +适用于大多数用户,快速简单。部署前需准备Docker 24+, Docker Compose v2+: -`1` **智能体提示词自动生成** - 将自然语言转化为可被Agent执行的提示词。Nexent可以根据你的需要自动选择正确的工具并为每个请求规划最佳执行路径。 - - ![Feature 1](./assets/Feature1.png) - -`2` **可扩展数据处理引擎** - 支持 20+ 数据格式的快速 OCR 和表格结构提取,从单进程到大规模批处理管道都能平滑扩展。 - - ![Feature 2](./assets/Feature2.png) - -`3` **个人级知识库** - 实时导入文件,自动总结,让智能体能够即时访问个人和全局知识,并了解每个知识库能提供什么。 +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/docker +cp .env.example .env +bash deploy.sh +``` - ![Feature 3](./assets/Feature3.png) +详细部署指南请参考 [Docker 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)。 -`4` **互联网知识搜索** - 连接 5+ 个网络搜索提供商,让智能体能够将最新的互联网信息与您的私有数据结合。 +### Kubernetes 部署(适合企业级生产环境) - ![Feature 4](./assets/Feature4.png) +适用于需要高可用、弹性扩展的企业场景。部署前需准备 Kubernetes 集群(1.24+)和 Helm 3+: -`5` **知识级可追溯性** - 提供来自网络和知识库来源的精确引用,使每个事实都可验证。 +```bash +git clone https://github.com/ModelEngine-Group/nexent.git +cd nexent/k8s/helm +./deploy-helm.sh apply +``` - ![Feature 5](./assets/Feature5.png) +详细部署指南请参考 [Kubernetes 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/kubernetes-installation.html)。 -`6` **多模态理解与对话** - 说话、打字、文件或展示图片。Nexent 理解语音、文本和图片,甚至可以根据需求生成新图像。 +# ✨ 核心特性 - ![Feature 6](./assets/Feature6.png) +Nexent 为构建强大的 AI 智能体提供全面的功能集: -`7` **MCP 工具生态系统** - 插入或构建符合 MCP 规范的 Python 插件;无需修改核心代码即可更换模型、工具和链。 +| 特性 | 描述 | +|------|------| +| **⚙️ 多模型集成** | OpenAI 兼容任意提供商,LLM/Embedding/VLM/STT/TTS 全覆盖,支持灵活切换 | +| **🤖 零代码智能体生成** | 纯自然语言描述需求,一键生成可执行智能体,所想即所得 | +| **🤝 A2A 智能体协作** | Agent-to-Agent 协议支持多智能体无缝协作,构建分布式工作流 | +| **🧠 分层记忆机制** | 两层记忆体系(用户级+用户-智能体级),跨对话持续积累上下文 | +| **📝 Skill 渐进式披露** | 动态加载 Skill 内容至上下文,高效利用上下文窗口 | +| **🗄️ 个人级知识库** | 20+ 文档格式实时导入与智能检索,自动摘要,细粒度权限控制 | +| **🔧 MCP 工具生态** | 即插即用的扩展工具体系,支持自定义开发和第三方 MCP 服务 | +| **🌐 互联网知识集成** | 多搜索源混合,实时信息与私有数据融合 | +| **🔍 知识级溯源** | 精确引用与来源验证,每个事实透明可查 | +| **🎭 多模态交互** | 语音、文字、图像、文件,全方位自然对话 | +| **🔢 智能体版本管理** | 版本迭代与历史回溯,安全可控 | +| **🏪 智能体市场** | 官方与社区优质智能体一键安装即用 | +| **👥 分权分域管理** | 多租户隔离,RBAC 权限体系,资源级精细管控 | - ![Feature 7](./assets/Feature7.png) +# 🤝 加入我们的社区 -# 🌱 MCP 工具生态 +> *If you want to go fast, go alone; if you want to go far, go together.* -查看我们的[MCP 生态系统页面](https://modelengine-group.github.io/nexent/zh/mcp-ecosystem/overview.html)了解 MCP 工具生态系统的详细信息,包括社区中心、推荐工具和集成指南。 +- **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。 +- **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。 -# 🛠️ 开发者指南 +> *Rome wasn't built in a day.* -### 🤖 模型配置与模型提供商推荐 +如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。 -查看我们的[模型提供商页面](https://modelengine-group.github.io/nexent/zh/getting-started/model-providers.html)了解详细的模型配置指南和推荐的提供商信息。 +早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。 -### 🔧 开发 Nexent +最重要的是,我们需要关注度。请 [前往 GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。 -想要从源代码构建或添加新功能?查看 [贡献指南](https://modelengine-group.github.io/nexent/zh/contributing) 获取分步说明。 +# 📖 下一步 -### 🛠️ 从源码构建 +准备好深入了解了吗?以下是主要文档入口: -想要从源码运行 Nexent?查看我们的[开发者指南](https://modelengine-group.github.io/nexent/zh/getting-started/development-guide)获取详细的设置说明和自定义选项。 +- **[快速开始](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)** — 系统要求和部署指南 +- **[核心特性详解](https://modelengine-group.github.io/nexent/zh/getting-started/features.html)** — 完整的功能说明 +- **[用户指南](https://modelengine-group.github.io/nexent/zh/user-guide/home-page.html)** — 智能体开发与使用 +- **[开发者指南](https://modelengine-group.github.io/nexent/zh/developer-guide/overview)** — 从源码构建和自定义 +- **[常见问题](https://modelengine-group.github.io/nexent/zh/quick-start/faq.html)** — 常见问题和故障排除 # 📄 许可证 diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py index 933fcd129..5a11b550b 100644 --- a/backend/agents/create_agent_info.py +++ b/backend/agents/create_agent_info.py @@ -14,7 +14,7 @@ from services.vectordatabase_service import ( ElasticSearchService, get_vector_db_core, - get_embedding_model, + get_embedding_model_by_index_name, get_rerank_model, ) from services.remote_mcp_service import get_remote_mcp_server_list @@ -32,7 +32,7 @@ from utils.prompt_template_utils import get_agent_prompt_template from utils.config_utils import tenant_config_manager, get_model_name_from_config from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE -import re +from consts.exceptions import ValidationError logger = logging.getLogger("create_agent_info") logger.setLevel(logging.DEBUG) @@ -488,11 +488,23 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int tool_config.metadata = { "vdb_core": get_vector_db_core(), - "embedding_model": get_embedding_model(tenant_id=tenant_id), + "embedding_model": None, "rerank_model": rerank_model, "display_name_to_index_map": display_name_to_index_map, "index_name_to_display_map": index_name_to_display_map, } + + # Must have embedding model for knowledge base search + if not index_names: + raise ValidationError( + "Embedding model is required for knowledge_base_search but index_names is empty") + + embedding_model, _, _ = get_embedding_model_by_index_name(tenant_id, index_names[0]) + if not embedding_model: + raise ValidationError( + f"No embedding model found for index '{index_names[0]}'. " + f"Please configure an embedding model for this knowledge base.") + tool_config.metadata["embedding_model"] = embedding_model elif tool_config.class_name in ["DifySearchTool", "DataMateSearchTool"]: rerank = param_dict.get("rerank", False) rerank_model_name = param_dict.get("rerank_model_name", "") diff --git a/backend/apps/a2a_client_app.py b/backend/apps/a2a_client_app.py index db7acd108..ea149ac31 100644 --- a/backend/apps/a2a_client_app.py +++ b/backend/apps/a2a_client_app.py @@ -5,6 +5,7 @@ Used internally for configuring A2A sub-agents. """ import logging +import uuid from typing import Annotated, List, Optional from http import HTTPStatus @@ -45,6 +46,14 @@ class UpdateAgentProtocolRequest(BaseModel): ) +class TestNacosConnectionRequest(BaseModel): + """Request to test Nacos connectivity without saving the config.""" + nacos_addr: str = Field(description="Nacos server address (e.g., http://nacos-server:8848)") + nacos_username: Optional[str] = None + nacos_password: Optional[str] = None + namespace_id: Optional[str] = "public" + + # ============================================================================= # External Agent Discovery # ============================================================================= @@ -102,7 +111,7 @@ async def discover_from_nacos( results = await a2a_client_service.discover_from_nacos( nacos_config_id=request.nacos_config_id, - agent_names=request.agent_names, + agent_names=[name.strip() for name in request.agent_names], tenant_id=tenant_id, user_id=user_id, namespace=request.namespace @@ -482,6 +491,17 @@ class CreateNacosConfigRequest(BaseModel): description: Optional[str] = None +class UpdateNacosConfigRequest(BaseModel): + """Request to update a Nacos config.""" + name: Optional[str] = None + nacos_addr: Optional[str] = None + nacos_username: Optional[str] = None + nacos_password: Optional[str] = None + namespace_id: Optional[str] = None + description: Optional[str] = None + is_active: Optional[bool] = None + + @router.post("/nacos-configs") async def create_nacos_config( request: CreateNacosConfigRequest, @@ -577,6 +597,51 @@ async def get_nacos_config( ) +@router.put("/nacos-configs/{config_id}") +async def update_nacos_config( + config_id: str, + request: UpdateNacosConfigRequest, + authorization: Annotated[Optional[str], Header()] = None, + http_request: Request = None +): + """Update a Nacos configuration.""" + try: + user_id, tenant_id, _ = get_current_user_info(authorization, http_request) + + result = a2a_agent_db.update_nacos_config( + config_id=config_id, + tenant_id=tenant_id, + user_id=user_id, + name=request.name, + nacos_addr=request.nacos_addr, + nacos_username=request.nacos_username, + nacos_password=request.nacos_password, + namespace_id=request.namespace_id, + description=request.description, + is_active=request.is_active + ) + + if not result: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"Nacos config {config_id} not found" + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content={"status": "success", "data": result} + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Update Nacos config failed: {e}", exc_info=True) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Failed to update Nacos config" + ) + + @router.delete("/nacos-configs/{config_id}") async def delete_nacos_config( config_id: str, @@ -610,6 +675,62 @@ async def delete_nacos_config( ) +@router.post("/nacos-configs/test-connection") +async def test_nacos_connection( + request: TestNacosConnectionRequest, + authorization: Annotated[Optional[str], Header()] = None, + http_request: Request = None +): + """Test connectivity to Nacos server without saving the configuration.""" + from utils.nacos_client import NacosClient, NacosConnectionError + + try: + get_current_user_info(authorization, http_request) + + async with NacosClient( + nacos_addr=request.nacos_addr, + username=request.nacos_username, + password=request.nacos_password + ) as client: + result = await client.test_connectivity(namespace=request.namespace_id or "public") + + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "status": "success", + "data": { + "success": result["success"], + "message": result["message"] + } + } + ) + + except NacosConnectionError as e: + logger.warning(f"Nacos connection test failed: {e}") + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "status": "success", + "data": { + "success": False, + "message": str(e) + } + } + ) + except Exception as e: + logger.error(f"Test Nacos connection failed: {e}", exc_info=True) + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "status": "success", + "data": { + "success": False, + "message": f"Failed to test Nacos connection: {e}" + } + } + ) + + # ============================================================================= # External Agent Chat # ============================================================================= @@ -648,11 +769,11 @@ async def chat_with_external_agent( # Build A2A message format following A2A protocol with parts array a2a_message = { + "message_id": f"msg_{uuid.uuid4().hex}", "role": "ROLE_USER", "parts": [ { "text": request_body.message.strip(), - "mediaType": "text/plain" } ], } diff --git a/backend/apps/knowledge_summary_app.py b/backend/apps/knowledge_summary_app.py index e4e11ace9..ab45170fb 100644 --- a/backend/apps/knowledge_summary_app.py +++ b/backend/apps/knowledge_summary_app.py @@ -8,6 +8,7 @@ from consts.model import ChangeSummaryRequest from services.vectordatabase_service import ElasticSearchService, get_vector_db_core from utils.auth_utils import get_current_user_id, get_current_user_info +from utils.config_utils import tenant_config_manager router = APIRouter(prefix="/summary") logger = logging.getLogger("knowledge_summary_app") @@ -31,6 +32,19 @@ async def auto_summary( authorization, http_request) service = ElasticSearchService() + # Get model_id from tenant config if not provided + if model_id is None and tenant_id: + try: + tenant_config = tenant_config_manager.load_config(tenant_id) + model_id_str = tenant_config.get("LLM_ID") + if model_id_str: + model_id = int(model_id_str) + logger.info(f"Using LLM_ID {model_id} from tenant config for auto-summary") + else: + logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary may be placeholder") + except Exception as e: + logger.warning(f"Failed to get LLM_ID from tenant config: {e}") + return await service.summary_index_name( index_name=index_name, batch_size=batch_size, diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py index 0a5a04139..278b729e8 100644 --- a/backend/apps/model_managment_app.py +++ b/backend/apps/model_managment_app.py @@ -372,7 +372,10 @@ async def manage_check_model_health( f"Start to check model connectivity for tenant, user_id: {user_id}, " f"target_tenant_id: {request.tenant_id}, display_name: {request.display_name}") - result = await check_model_connectivity(request.display_name, request.tenant_id) + result = await check_model_connectivity( + request.display_name, + request.tenant_id + ) return JSONResponse(status_code=HTTPStatus.OK, content={ "message": "Successfully checked model connectivity", "data": result diff --git a/backend/apps/vectordatabase_app.py b/backend/apps/vectordatabase_app.py index 872b5387b..6f4232afd 100644 --- a/backend/apps/vectordatabase_app.py +++ b/backend/apps/vectordatabase_app.py @@ -1,30 +1,47 @@ import logging import json from http import HTTPStatus -from typing import Any, Dict, List, Optional +from typing import Annotated, Any, Dict, List, Optional from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query from fastapi.responses import JSONResponse import re from consts.model import ChunkCreateRequest, ChunkUpdateRequest, HybridSearchRequest, IndexingResponse +from consts.scheduler import VALID_SUMMARY_FREQUENCIES, SUMMARY_FREQUENCY_OPTIONS_FOR_API from nexent.vector_database.base import VectorDatabaseCore from services.vectordatabase_service import ( ElasticSearchService, - get_embedding_model, + get_embedding_model_by_id, get_vector_db_core, check_knowledge_base_exist_impl, + KnowledgeBaseNeedsModelConfigError, ) from services.redis_service import get_redis_service from utils.auth_utils import get_current_user_id from utils.file_management_utils import get_all_files_status from database.knowledge_db import get_index_name_by_knowledge_name, get_knowledge_record +from database.model_management_db import get_model_by_model_id router = APIRouter(prefix="/indices") service = ElasticSearchService() logger = logging.getLogger("vectordatabase_app") +@router.get("/summary_frequency_options") +async def get_summary_frequency_options(): + """ + Get valid summary frequency options for frontend. + Frontend should call this API to get the list of valid frequencies. + """ + return JSONResponse( + status_code=HTTPStatus.OK, + content={ + "options": SUMMARY_FREQUENCY_OPTIONS_FOR_API, + "valid_values": VALID_SUMMARY_FREQUENCIES, + } + ) + @router.post("/check_exist") async def check_knowledge_base_exist( request: Dict[str, str] = Body( @@ -160,6 +177,186 @@ async def update_index( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating index: {str(exc)}") +@router.patch("/{index_name}/summary_frequency") +async def update_summary_frequency_endpoint( + index_name: Annotated[str, Path(..., description="Name of the index to update")], + request: Annotated[Dict[str, Any], Body(..., description="Update payload with summary_frequency")], + authorization: Annotated[Optional[str], Header()] = None, +): + """Update the auto-summary frequency for a knowledge base.""" + try: + user_id, tenant_id = get_current_user_id(authorization) + summary_frequency = request.get("summary_frequency") + + valid_frequencies = VALID_SUMMARY_FREQUENCIES + if summary_frequency not in valid_frequencies: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail=f"Invalid summary_frequency. Must be one of: {valid_frequencies}" + ) + + from database.knowledge_db import update_summary_frequency + success = update_summary_frequency( + index_name=index_name, + summary_frequency=summary_frequency, + _tenant_id=tenant_id, + user_id=user_id + ) + + if success: + return JSONResponse( + status_code=HTTPStatus.OK, + content={"message": "Summary frequency updated successfully", "status": "success"} + ) + else: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"Knowledge base '{index_name}' not found" + ) + except HTTPException: + raise + except Exception as exc: + logger.exception("Error updating summary frequency") + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating summary frequency: {str(exc)}" + ) + + +@router.get("/{index_name}/embedding-model-status") +def get_embedding_model_status( + index_name: str = Path(..., description="Name of the index to check"), + authorization: Optional[str] = Header(None) +): + """ + Check the embedding model status of a knowledge base. + Returns information about whether a model is configured and if an update is needed. + + This endpoint is used by the frontend to determine whether to show + a dialog prompting the user to select an embedding model for knowledge bases + that were created before the model ID feature was added. + + Note: The path parameter is the internal index_name. + """ + try: + _, tenant_id = get_current_user_id(authorization) + + # Get the knowledge base record by index_name + knowledge_record = get_knowledge_record({ + "index_name": index_name, + "tenant_id": tenant_id + }) + + if not knowledge_record: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=f"Knowledge base '{index_name}' not found" + ) + + # Check if model_id exists + model_id = knowledge_record.get("embedding_model_id") + embedding_model_name = knowledge_record.get("embedding_model_name") + + # Get model info if model_id exists + model_info = None + if model_id: + model = get_model_by_model_id(model_id, tenant_id) + if model: + model_info = { + "model_id": model.get("model_id"), + "model_name": model.get("model_name"), + "display_name": model.get("display_name"), + "model_type": model.get("model_type"), + } + + # Determine status + if model_id and model_info: + status = "configured" + message = f"Embedding model '{model_info.get('display_name', model_info.get('model_name'))}' is configured" + needs_config = False + elif embedding_model_name: + # Has model name but no model_id (legacy data) + status = "legacy" + message = "This knowledge base was created with an older version. Please select an embedding model to ensure proper functionality." + needs_config = True + else: + # No model configured at all + status = "missing" + message = "No embedding model configured. Please select an embedding model." + needs_config = True + + # Get actual internal index_name from the database record + actual_index_name = knowledge_record.get("index_name") + + return { + "status": status, + "needs_config": needs_config, + "index_name": actual_index_name, + "knowledge_name": knowledge_record.get("knowledge_name"), + "model_id": model_id, + "embedding_model_name": embedding_model_name, + "model_info": model_info, + "message": message, + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting embedding model status for '{index_name}': {e}", exc_info=True) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=f"Error checking embedding model status: {str(e)}" + ) + + +@router.put("/{index_name}/embedding-model") +def update_embedding_model( + index_name: str = Path(..., description="Internal index name of the knowledge base to update"), + request: Dict[str, Any] = Body(..., + description="Update payload with model_id"), + authorization: Optional[str] = Header(None) +): + """ + Update the embedding model for a knowledge base. + This is used when a user selects an embedding model from the dialog + for knowledge bases that don't have a model configured. + """ + try: + user_id, tenant_id = get_current_user_id(authorization) + + model_id = request.get("model_id") + if not model_id: + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="model_id is required" + ) + + result = ElasticSearchService.update_embedding_model( + index_name=index_name, + model_id=model_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + return JSONResponse( + status_code=HTTPStatus.OK, + content=result + ) + + except ValueError as exc: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail=str(exc) + ) + except HTTPException: + raise + except Exception as exc: + logger.error(f"Error updating embedding model for '{index_name}': {exc}", exc_info=True) + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail=f"Error updating embedding model: {str(exc)}" + ) + + @router.get("") def get_list_indices( pattern: str = Query("*", description="Pattern to match index names"), @@ -191,6 +388,8 @@ def create_index_documents( authorization: Optional[str] = Header(None), task_id: Optional[str] = Header( None, alias="X-Task-Id", description="Task ID for progress tracking"), + large_mode: bool = Query( + False, description="Force large-batch path when current request chunk count is below threshold"), ): """ Index documents with embeddings, creating the index if it doesn't exist. @@ -198,22 +397,24 @@ def create_index_documents( """ try: user_id, tenant_id = get_current_user_id(authorization) - + # Get the knowledge base record to retrieve the saved embedding model knowledge_record = get_knowledge_record({'index_name': index_name}) - saved_embedding_model_name = None + saved_embedding_model_id = None if knowledge_record: - saved_embedding_model_name = knowledge_record.get('embedding_model_name') - - # Use the saved model from knowledge base, fallback to tenant default if not set - embedding_model = get_embedding_model(tenant_id, saved_embedding_model_name) - + saved_embedding_model_id = knowledge_record.get('embedding_model_id') + + # Use the saved model from knowledge base by model_id + embedding_model, _ = get_embedding_model_by_id(tenant_id, saved_embedding_model_id) if saved_embedding_model_id else (None, None) + return ElasticSearchService.index_documents( embedding_model=embedding_model, index_name=index_name, data=data, vdb_core=vdb_core, task_id=task_id, + large_mode=large_mode, + model_id=saved_embedding_model_id, ) except Exception as e: error_msg = str(e) @@ -538,9 +739,19 @@ async def hybrid_search( vdb_core=vdb_core, ) return JSONResponse(status_code=HTTPStatus.OK, content=result) + except KnowledgeBaseNeedsModelConfigError as exc: + # Return a specific error that frontend can detect to show the config dialog + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail={ + "error_type": "KNOWLEDGE_BASE_NEEDS_MODEL_CONFIG", + "index_name": exc.index_name, + "message": exc.message, + "suggestion": "Please select an embedding model for this knowledge base before searching." + } + ) except ValueError as exc: - raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, - detail=str(exc)) + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)) except Exception as exc: logger.error(f"Hybrid search failed: {exc}", exc_info=True) raise HTTPException( diff --git a/backend/apps/voice_app.py b/backend/apps/voice_app.py index 8f517cd07..7451a95c4 100644 --- a/backend/apps/voice_app.py +++ b/backend/apps/voice_app.py @@ -1,15 +1,12 @@ -import asyncio import logging from http import HTTPStatus -from fastapi import APIRouter, WebSocket, HTTPException, Body, Query +from fastapi import APIRouter, WebSocket, HTTPException from fastapi.responses import JSONResponse from consts.exceptions import ( VoiceServiceException, STTConnectionException, - TTSConnectionException, - VoiceConfigException ) from consts.model import VoiceConnectivityRequest, VoiceConnectivityResponse from services.voice_service import get_voice_service @@ -26,10 +23,29 @@ async def stt_websocket(websocket: WebSocket): logger.info("STT WebSocket connection attempt...") await websocket.accept() logger.info("STT WebSocket connection accepted") - + + # Receive config from client + client_config = {} + try: + msg = await websocket.receive() + if msg["type"] == "websocket.receive": + import json + client_config = json.loads(msg["text"]) + logger.info(f"Received client config: {client_config}") + elif msg["type"] == "bytes": + try: + import json + client_config = json.loads(msg["bytes"].decode('utf-8')) + logger.info(f"Received client config from bytes: {client_config}") + except Exception as e: + logger.warning(f"Failed to parse bytes as JSON: {e}") + except Exception as e: + logger.error(f"Error receiving config: {e}") + client_config = {} + try: voice_service = get_voice_service() - await voice_service.start_stt_streaming_session(websocket) + await voice_service.start_stt_streaming_session(websocket, stt_config=client_config) except STTConnectionException as e: logger.error(f"STT WebSocket error: {str(e)}") await websocket.send_json({"error": str(e)}) @@ -40,55 +56,12 @@ async def stt_websocket(websocket: WebSocket): logger.info("STT WebSocket connection closed") -@voice_runtime_router.websocket("/tts/ws") -async def tts_websocket(websocket: WebSocket): - """WebSocket endpoint for streaming TTS""" - logger.info("TTS WebSocket connection attempt...") - await websocket.accept() - logger.info("TTS WebSocket connection accepted") - - try: - # Receive text from client (single request) - data = await websocket.receive_json() - text = data.get("text") - - if not text: - if websocket.client_state.name == "CONNECTED": - await websocket.send_json({"error": "No text provided"}) - return - - # Stream TTS audio to WebSocket - voice_service = get_voice_service() - await voice_service.stream_tts_to_websocket(websocket, text) - - except TTSConnectionException as e: - logger.error(f"TTS WebSocket error: {str(e)}") - await websocket.send_json({"error": str(e)}) - except Exception as e: - logger.error(f"TTS WebSocket error: {str(e)}") - await websocket.send_json({"error": str(e)}) - finally: - logger.info("TTS WebSocket connection closed") - # Ensure connection is properly closed - if websocket.client_state.name == "CONNECTED": - await websocket.close() - - @voice_config_router.post("/connectivity") async def check_voice_connectivity(request: VoiceConnectivityRequest): - """ - Check voice service connectivity - - Args: - request: VoiceConnectivityRequest containing model_type - - Returns: - VoiceConnectivityResponse with connectivity status - """ + """Check voice service connectivity.""" try: voice_service = get_voice_service() connected = await voice_service.check_voice_connectivity(request.model_type) - return JSONResponse( status_code=HTTPStatus.OK, content=VoiceConnectivityResponse( @@ -99,25 +72,10 @@ async def check_voice_connectivity(request: VoiceConnectivityRequest): ) except VoiceServiceException as e: logger.error(f"Voice service error: {str(e)}") - raise HTTPException( - status_code=HTTPStatus.BAD_REQUEST, - detail=str(e) - ) - except (STTConnectionException, TTSConnectionException) as e: + raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e)) + except STTConnectionException as e: logger.error(f"Voice connectivity error: {str(e)}") - raise HTTPException( - status_code=HTTPStatus.SERVICE_UNAVAILABLE, - detail=str(e) - ) - except VoiceConfigException as e: - logger.error(f"Voice configuration error: {str(e)}") - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail=str(e) - ) + raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail=str(e)) except Exception as e: logger.error(f"Unexpected voice service error: {str(e)}") - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Voice service error" - ) + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Voice service error") diff --git a/backend/assets/test_voice.pcm b/backend/assets/test_voice.pcm new file mode 100644 index 000000000..0a78f9a15 Binary files /dev/null and b/backend/assets/test_voice.pcm differ diff --git a/backend/consts/const.py b/backend/consts/const.py index db1e69184..77e86a185 100644 --- a/backend/consts/const.py +++ b/backend/consts/const.py @@ -7,9 +7,12 @@ load_dotenv(override=True) # TODO: Analyze every variable if this is used -# Test voice file path +# Test voice file path (WAV format for volcengine STT) TEST_VOICE_PATH = os.path.join(os.path.dirname( os.path.dirname(__file__)), 'assets', 'test.wav') +# Test PCM file path (raw PCM format for Ali STT) +TEST_PCM_PATH = os.path.join(os.path.dirname( + os.path.dirname(__file__)), 'assets', 'test_voice.pcm') # Vector database providers @@ -36,6 +39,11 @@ class VectorDatabaseType(str, Enum): UPLOAD_FOLDER = os.getenv('UPLOAD_FOLDER', 'uploads') ROOT_DIR = os.getenv("ROOT_DIR") +PER_WAVE_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_PER_WAVE_S", "30")) +MAX_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_MAX_S", "1800")) + + + # Container-internal skills storage path CONTAINER_SKILLS_PATH = os.getenv("SKILLS_PATH") @@ -149,7 +157,7 @@ class VectorDatabaseType(str, Enum): RAY_ACTOR_NUM_CPUS = int(os.getenv("RAY_ACTOR_NUM_CPUS", "2")) RAY_DASHBOARD_PORT = int(os.getenv("RAY_DASHBOARD_PORT", "8265")) RAY_DASHBOARD_HOST = os.getenv("RAY_DASHBOARD_HOST", "0.0.0.0") -RAY_NUM_CPUS = os.getenv("RAY_NUM_CPUS") +RAY_NUM_CPUS = int(os.getenv("RAY_NUM_CPUS", "4")) RAY_OBJECT_STORE_MEMORY_GB = float( os.getenv("RAY_OBJECT_STORE_MEMORY_GB", "0.25")) RAY_TEMP_DIR = os.getenv("RAY_TEMP_DIR", "/tmp/ray") @@ -182,10 +190,22 @@ class VectorDatabaseType(str, Enum): # Worker Configuration RAY_ADDRESS = os.getenv("RAY_ADDRESS", "auto") -QUEUES = os.getenv("QUEUES", "process_q,forward_q") +QUEUES = os.getenv("QUEUES", "process_q,process_part_q,forward_q") # Will be dynamically set based on PID if not provided WORKER_NAME = os.getenv("WORKER_NAME") WORKER_CONCURRENCY = int(os.getenv("WORKER_CONCURRENCY", "4")) +RAY_WARM_ACTOR_POOL_SIZE_PART = int(os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PART", "2")) +RAY_WARM_ACTOR_POOL_SIZE_PROCESS = int(os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PROCESS", "1")) +# Global Ray actor pool (shared by process_q/process_part_q workers) +RAY_GLOBAL_ACTOR_POOL_SIZE = int(os.getenv("RAY_GLOBAL_ACTOR_POOL_SIZE", "3")) +RAY_ACTOR_WARM_TIMEOUT_S = float(os.getenv("RAY_ACTOR_WARM_TIMEOUT_S", "60")) +RAY_GLOBAL_ACTOR_POOL_NAME = os.getenv( + "RAY_GLOBAL_ACTOR_POOL_NAME", "nexent_global_data_processor_pool") +RAY_GLOBAL_ACTOR_POOL_NAMESPACE = os.getenv( + "RAY_GLOBAL_ACTOR_POOL_NAMESPACE", "nexent-data-process") + + + # Voice Service Configuration @@ -348,7 +368,7 @@ class VectorDatabaseType(str, Enum): # APP Version -APP_VERSION = "v2.1.0" +APP_VERSION = "v2.1.1" # Skill Creation Streaming Configuration diff --git a/backend/consts/error_message.py b/backend/consts/error_message.py index 4ff1141c7..27ac33d00 100644 --- a/backend/consts/error_message.py +++ b/backend/consts/error_message.py @@ -5,6 +5,8 @@ Frontend should use i18n for localized messages. """ +from typing import Dict, Tuple + from .error_code import ErrorCode @@ -145,11 +147,11 @@ def get_message(cls, error_code: ErrorCode) -> str: return cls._MESSAGES.get(error_code, "An error occurred. Please try again later.") @classmethod - def get_message_with_code(cls, error_code: ErrorCode) -> tuple[int, str]: + def get_message_with_code(cls, error_code: ErrorCode) -> Tuple[int, str]: """Get error code and message as tuple.""" return (error_code.value, cls.get_message(error_code)) @classmethod - def get_all_messages(cls) -> dict: + def get_all_messages(cls) -> Dict: """Get all error code to message mappings.""" return {code.value: msg for code, msg in cls._MESSAGES.items()} diff --git a/backend/consts/exceptions.py b/backend/consts/exceptions.py index 9481ebab2..a32f0282e 100644 --- a/backend/consts/exceptions.py +++ b/backend/consts/exceptions.py @@ -190,18 +190,6 @@ class STTConnectionException(Exception): pass -class TTSConnectionException(Exception): - """Raised when TTS service connection fails.""" - - pass - - -class VoiceConfigException(Exception): - """Raised when voice configuration is invalid.""" - - pass - - class ToolExecutionException(Exception): """Raised when mcp tool execution failed.""" diff --git a/backend/consts/model.py b/backend/consts/model.py index 7cea3fdb5..bcaffcae7 100644 --- a/backend/consts/model.py +++ b/backend/consts/model.py @@ -118,6 +118,9 @@ class ModelRequest(BaseModel): expected_chunk_size: Optional[int] = None maximum_chunk_size: Optional[int] = None chunk_batch: Optional[int] = None + # STT specific fields + model_appid: Optional[str] = None + access_token: Optional[str] = None class ProviderModelRequest(BaseModel): @@ -147,14 +150,23 @@ class SingleModelConfig(BaseModel): dimension: Optional[int] = None +class STTModelConfig(BaseModel): + """STT model specific configuration with factory, appid, and access token fields""" + modelName: str + displayName: str + apiConfig: Optional[ModelApiConfig] = None + modelFactory: Optional[str] = None + modelAppid: Optional[str] = None + accessToken: Optional[str] = None + + class ModelConfig(BaseModel): llm: SingleModelConfig embedding: SingleModelConfig multiEmbedding: SingleModelConfig rerank: SingleModelConfig vlm: SingleModelConfig - stt: SingleModelConfig - tts: SingleModelConfig + stt: STTModelConfig class AppConfig(BaseModel): @@ -334,6 +346,7 @@ class AgentInfoRequest(BaseModel): enabled_tool_ids: Optional[List[int]] = None enabled_skill_ids: Optional[List[int]] = None related_agent_ids: Optional[List[int]] = None + related_external_agent_ids: Optional[List[int]] = None group_ids: Optional[List[int]] = None ingroup_permission: Optional[str] = None enable_context_manager: Optional[bool] = None @@ -492,7 +505,7 @@ def default(cls) -> "MemoryAgentShareMode": class VoiceConnectivityRequest(BaseModel): """Request model for voice service connectivity check""" model_type: str = Field(..., - description="Type of model to check ('stt' or 'tts')") + description="Type of model to check ('stt')") class VoiceConnectivityResponse(BaseModel): @@ -503,19 +516,6 @@ class VoiceConnectivityResponse(BaseModel): message: str = Field(..., description="Status message") -class TTSRequest(BaseModel): - """Request model for TTS text-to-speech conversion""" - text: str = Field(..., min_length=1, - description="Text to convert to speech") - stream: bool = Field(True, description="Whether to stream the audio") - - -class TTSResponse(BaseModel): - """Response model for TTS conversion""" - status: str = Field(..., description="Status of the TTS conversion") - message: Optional[str] = Field(None, description="Additional message") - - class ToolValidateRequest(BaseModel): """Request model for tool validation""" name: str = Field(..., description="Tool name to validate") @@ -744,15 +744,18 @@ class ManageTenantModelCreateRequest(BaseModel): tenant_id: str = Field(..., min_length=1, description="Target tenant ID to create model for") model_repo: Optional[str] = Field('', description="Model repository path") model_name: str = Field(..., description="Model name") - model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'tts', 'stt')") + model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'stt')") api_key: Optional[str] = Field('', description="API key for the model") base_url: Optional[str] = Field('', description="Base URL for the model API") max_tokens: Optional[int] = Field(0, description="Maximum tokens for the model") display_name: Optional[str] = Field('', description="Display name for the model") - model_factory: Optional[str] = Field('OpenAI-API-Compatible', description="Model factory/provider name") + model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model") expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models") maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models") chunk_batch: Optional[int] = Field(None, description="Batch size for chunking") + # STT specific fields + model_appid: Optional[str] = Field(None, description="Application ID for STT models (e.g., Volcano Engine)") + access_token: Optional[str] = Field(None, description="Access token for STT models (e.g., Volcano Engine)") class ManageTenantModelUpdateRequest(BaseModel): @@ -766,10 +769,13 @@ class ManageTenantModelUpdateRequest(BaseModel): base_url: Optional[str] = Field(None, description="Base URL for the model API") max_tokens: Optional[int] = Field(None, description="Maximum tokens for the model") display_name: Optional[str] = Field(None, description="New display name for the model") - model_factory: Optional[str] = Field(None, description="Model factory/provider name") + model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model") expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models") maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models") chunk_batch: Optional[int] = Field(None, description="Batch size for chunking") + # STT specific fields + model_appid: Optional[str] = Field(None, description="Application ID for STT models") + access_token: Optional[str] = Field(None, description="Access token for STT models") class ManageTenantModelDeleteRequest(BaseModel): diff --git a/backend/consts/scheduler.py b/backend/consts/scheduler.py new file mode 100644 index 000000000..6820a9687 --- /dev/null +++ b/backend/consts/scheduler.py @@ -0,0 +1,28 @@ +""" +Scheduler frequency constants +Centralized definition for auto-summary frequency options +""" +from datetime import timedelta + +# Core frequency config: includes value, timedelta, and label; this is the single source of truth +SUMMARY_FREQUENCY_CONFIG = [ + {"value": "1h", "timedelta": timedelta(hours=1), "label": "1h"}, + {"value": "3h", "timedelta": timedelta(hours=3), "label": "3h"}, + {"value": "6h", "timedelta": timedelta(hours=6), "label": "6h"}, + {"value": "1d", "timedelta": timedelta(days=1), "label": "1d"}, + {"value": "1w", "timedelta": timedelta(weeks=1), "label": "1w"}, +] + +# Generate valid frequency list from config (for validation) +VALID_SUMMARY_FREQUENCIES = [item["value"] for item in SUMMARY_FREQUENCY_CONFIG] + [None] + +# Generate frequency to timedelta mapping from config (direct value, no loop conversion needed) +FREQUENCY_MAP = {item["value"]: item["timedelta"] for item in SUMMARY_FREQUENCY_CONFIG} + +# Generate API options from config (for frontend) +SUMMARY_FREQUENCY_OPTIONS_FOR_API = [ + {"value": "disabled", "label": "Disabled"}, +] + [{"value": item["value"], "label": item["value"]} for item in SUMMARY_FREQUENCY_CONFIG] + +# Scheduler check interval (seconds) +SCHEDULER_CHECK_INTERVAL_SECONDS = 30 * 60 diff --git a/backend/data_process/ray_actors.py b/backend/data_process/ray_actors.py index 2fa590bec..0dea828ce 100644 --- a/backend/data_process/ray_actors.py +++ b/backend/data_process/ray_actors.py @@ -1,5 +1,6 @@ import logging import json +import time from typing import Any, Dict, List, Optional import ray @@ -27,6 +28,84 @@ def __init__(self): f"Ray actor initialized using {RAY_ACTOR_NUM_CPUS} CPU cores...") self._processor = DataProcessCore() + def ping(self) -> bool: + """Lightweight health check used by prewarm logic.""" + return True + + def _prepare_process_params( + self, + task_id: Optional[str], + model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], + ) -> Dict[str, Any]: + """ + Normalize task/model-related processing params. + """ + process_params = dict(params) + if task_id: + process_params["task_id"] = task_id + + if not (model_id and tenant_id): + return process_params + + try: + model_record = get_model_by_model_id( + model_id=model_id, tenant_id=tenant_id) + if not model_record: + logger.warning( + f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes") + return process_params + + expected_chunk_size = model_record.get( + "expected_chunk_size", DEFAULT_EXPECTED_CHUNK_SIZE) + maximum_chunk_size = model_record.get( + "maximum_chunk_size", DEFAULT_MAXIMUM_CHUNK_SIZE) + model_name = model_record.get("display_name") + + process_params["max_characters"] = maximum_chunk_size + process_params["new_after_n_chars"] = expected_chunk_size + + logger.info( + f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): " + f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}") + except Exception as e: + logger.warning( + f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes") + return process_params + + def _run_file_process( + self, + file_data: bytes, + filename: str, + chunking_strategy: str, + process_params: Dict[str, Any], + log_subject: str, + ) -> List[Dict[str, Any]]: + chunks = self._processor.file_process( + file_data=file_data, + filename=filename, + chunking_strategy=chunking_strategy, + **process_params + ) + + if chunks is None: + logger.warning( + f"[RayActor] file_process returned None for {log_subject}='{filename}'") + return [] + if not isinstance(chunks, list): + logger.error( + f"[RayActor] file_process returned non-list type {type(chunks)} for {log_subject}='{filename}'") + return [] + if len(chunks) == 0: + logger.warning( + f"[RayActor] file_process returned empty list for {log_subject}='{filename}'") + return [] + + logger.info( + f"[RayActor] Processing done: produced {len(chunks)} chunks for {log_subject}='{filename}'") + return chunks + def process_file( self, source: str, @@ -54,70 +133,125 @@ def process_file( """ logger.info( f"[RayActor] Processing start: source='{source}', destination='{destination}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'") - - if task_id: - params['task_id'] = task_id - - # Get chunk size parameters from embedding model if model_id is provided - if model_id and tenant_id: - try: - # Get embedding model details directly by model_id - model_record = get_model_by_model_id( - model_id=model_id, tenant_id=tenant_id) - if model_record: - expected_chunk_size = model_record.get( - 'expected_chunk_size', DEFAULT_EXPECTED_CHUNK_SIZE) - maximum_chunk_size = model_record.get( - 'maximum_chunk_size', DEFAULT_MAXIMUM_CHUNK_SIZE) - model_name = model_record.get('display_name') - - # Pass chunk sizes to processing parameters - params['max_characters'] = maximum_chunk_size - params['new_after_n_chars'] = expected_chunk_size - - logger.info( - f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): " - f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}") - else: - logger.warning( - f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes") - except Exception as e: - logger.warning( - f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes") + process_params = self._prepare_process_params( + task_id=task_id, + model_id=model_id, + tenant_id=tenant_id, + params=params, + ) try: + fetch_start = time.perf_counter() file_stream = get_file_stream(source) if file_stream is None: raise FileNotFoundError( f"Unable to fetch file from URL: {source}") file_data = file_stream.read() + fetch_elapsed = time.perf_counter() - fetch_start + logger.info( + f"[RayActor] Fetch file bytes done: destination='{destination}', source='{source}', " + f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s") except Exception as e: logger.error(f"Failed to fetch file from {source}: {e}") raise - chunks = self._processor.file_process( + return self._run_file_process( file_data=file_data, filename=source, chunking_strategy=chunking_strategy, + process_params=process_params, + log_subject="source", + ) + + def process_bytes( + self, + file_bytes: bytes, + filename: str, + chunking_strategy: str, + task_id: Optional[str] = None, + model_id: Optional[int] = None, + tenant_id: Optional[str] = None, + **params + ) -> List[Dict[str, Any]]: + """ + Process in-memory file bytes, auto-detecting its type using DataProcessCore.file_process. + """ + logger.info( + f"[RayActor] Processing bytes: filename='{filename}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'" + ) + process_params = self._prepare_process_params( + task_id=task_id, + model_id=model_id, + tenant_id=tenant_id, + params=params, + ) + + return self._run_file_process( + file_data=file_bytes, + filename=filename, + chunking_strategy=chunking_strategy, + process_params=process_params, + log_subject="filename", + ) + + def split_file( + self, + source: str, + destination: str, + task_id: Optional[str] = None, + max_size: int = 5 * 1024 * 1024, + file_data: Optional[bytes] = None, + **params + ) -> List[bytes]: + """ + Split file into parts using DataProcessCore.file_split and return raw bytes list. + """ + logger.info( + f"[RayActor] Splitting file: source='{source}', destination='{destination}', task_id='{task_id}', max_size={max_size}" + ) + + if file_data is None: + try: + fetch_start = time.perf_counter() + file_stream = get_file_stream(source) + if file_stream is None: + raise FileNotFoundError( + f"Unable to fetch file from URL: {source}") + file_data = file_stream.read() + fetch_elapsed = time.perf_counter() - fetch_start + logger.info( + f"[RayActor] Fetch file bytes for split done: destination='{destination}', source='{source}', " + f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s") + except Exception as e: + logger.error(f"Failed to fetch file from {source}: {e}") + raise + + split_start = time.perf_counter() + parts = self._processor.file_split( + file_data=file_data, + filename=source, + max_size=max_size, **params ) + split_elapsed = time.perf_counter() - split_start - if chunks is None: - logger.warning( - f"[RayActor] file_process returned None for source='{source}'") - return [] - if not isinstance(chunks, list): - logger.error( - f"[RayActor] file_process returned non-list type {type(chunks)} for source='{source}'") - return [] - if len(chunks) == 0: - logger.warning( - f"[RayActor] file_process returned empty list for source='{source}'") + if not parts: + logger.info( + f"[RayActor] Split done: destination='{destination}', source='{source}', " + f"parts=0, elapsed={split_elapsed:.3f}s") return [] + bytes_parts: List[bytes] = [] + for part in parts: + try: + bytes_parts.append(part.getvalue()) + except Exception: + continue + logger.info( - f"[RayActor] Processing done: produced {len(chunks)} chunks for source='{source}'") - return chunks + f"[RayActor] Split done: destination='{destination}', source='{source}', " + f"parts={len(bytes_parts)}, elapsed={split_elapsed:.3f}s") + return bytes_parts def store_chunks_in_redis(self, redis_key: str, chunks: List[Dict[str, Any]]) -> bool: """ diff --git a/backend/data_process/tasks.py b/backend/data_process/tasks.py index 50414b711..f2a30f9b7 100644 --- a/backend/data_process/tasks.py +++ b/backend/data_process/tasks.py @@ -4,32 +4,180 @@ import asyncio import json import logging +import math import os import threading import time -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, List, Tuple import aiohttp import re import ray -from celery import Task, chain, states +from celery import Task, chain, states, group, chord from celery.exceptions import Retry +from celery.result import allow_join_result -from consts.const import ELASTICSEARCH_SERVICE from utils.file_management_utils import get_file_size +from database.attachment_db import get_file_stream from services.redis_service import get_redis_service from .app import app from .ray_actors import DataProcessorRayActor from consts.const import ( + ELASTICSEARCH_SERVICE, REDIS_BACKEND_URL, FORWARD_REDIS_RETRY_DELAY_S, FORWARD_REDIS_RETRY_MAX, + DP_REDIS_CHUNKS_WAIT_TIMEOUT_S, + DP_REDIS_CHUNKS_POLL_INTERVAL_MS, + RAY_ACTOR_NUM_CPUS, + RAY_NUM_CPUS, DISABLE_RAY_DASHBOARD, ROOT_DIR, + PER_WAVE_TIMEOUT, + MAX_TIMEOUT, + RAY_GLOBAL_ACTOR_POOL_SIZE, + RAY_ACTOR_WARM_TIMEOUT_S, + RAY_GLOBAL_ACTOR_POOL_NAME, + RAY_GLOBAL_ACTOR_POOL_NAMESPACE ) logger = logging.getLogger("data_process.tasks") +ASYNC_SPLIT_RETRY_MAX = max(FORWARD_REDIS_RETRY_MAX * 5, FORWARD_REDIS_RETRY_MAX) +FORWARD_ES_CHUNK_BATCH_SIZE = 64 +IMAGE_METADATA_PROCESS_SOURCE = "UniversalImageExtractor" + +def _wait_for_split_ready(redis_key: str, timeout_s: int, poll_interval_ms: int) -> int: + """ + Wait until async split aggregation is marked ready in Redis. + Returns aggregated chunk count. + Raises TimeoutError on timeout. + """ + if not REDIS_BACKEND_URL: + raise RuntimeError("REDIS_BACKEND_URL not configured") + + import redis + + client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True) + ready_key = f"{redis_key}:ready" + deadline = time.time() + timeout_s + + while time.time() < deadline: + if client.get(ready_key): + cached = client.get(redis_key) + if cached: + try: + chunks = json.loads(cached) + return len(chunks) if isinstance(chunks, list) else 0 + except Exception: + return 0 + return 0 + time.sleep(max(0.01, poll_interval_ms / 1000.0)) + + raise TimeoutError( + f"Timed out waiting for async split aggregation at key '{ready_key}' after {timeout_s}s" + ) + + +def _estimate_parallel_parts() -> int: + try: + total_cpus = RAY_NUM_CPUS + except Exception: + total_cpus = os.cpu_count() or 1 + actor_cpus = max(1, int(RAY_ACTOR_NUM_CPUS)) + return max(1, total_cpus // actor_cpus) + + +def _compute_split_wait_timeout(parts_count: int) -> int: + base_timeout = DP_REDIS_CHUNKS_WAIT_TIMEOUT_S + waves = math.ceil(max(1, parts_count) / _estimate_parallel_parts()) + dynamic_timeout = base_timeout + max(0, waves - 1) * max(1, PER_WAVE_TIMEOUT) + return min(MAX_TIMEOUT, max(base_timeout, dynamic_timeout)) + + +def _count_image_metadata_chunks(chunks: Optional[List[Dict[str, Any]]]) -> int: + if not chunks: + return 0 + return sum( + 1 + for chunk in chunks + if isinstance(chunk, dict) and chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ) + + +def _get_next_available_batch_index( + batches: List[List[Dict[str, Any]]], + start_idx: int, + batch_size: int, +) -> int: + total_batches = len(batches) + idx = start_idx + for _ in range(total_batches): + if len(batches[idx]) < batch_size: + return idx + idx = (idx + 1) % total_batches + raise RuntimeError("No available batch capacity") + + +def _distribute_chunks_round_robin( + batches: List[List[Dict[str, Any]]], + chunks: List[Dict[str, Any]], + batch_size: int, + error_context: str, +) -> None: + idx = 0 + for chunk in chunks: + try: + idx = _get_next_available_batch_index(batches, idx, batch_size) + except RuntimeError as exc: + raise RuntimeError( + f"No available batch capacity while distributing {error_context}" + ) from exc + batches[idx].append(chunk) + idx = (idx + 1) % len(batches) + + +def _build_balanced_batches( + formatted_chunks: List[Dict[str, Any]], + batch_size: int = FORWARD_ES_CHUNK_BATCH_SIZE, +) -> List[List[Dict[str, Any]]]: + """ + Split chunks into max-size batches and spread image-metadata chunks evenly. + """ + total = len(formatted_chunks) + if total == 0: + return [] + if total <= batch_size: + return [formatted_chunks] + + total_batches = math.ceil(total / batch_size) + image_chunks = [ + chunk for chunk in formatted_chunks + if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ] + text_chunks = [ + chunk for chunk in formatted_chunks + if chunk.get("process_source") != IMAGE_METADATA_PROCESS_SOURCE + ] + + batches: List[List[Dict[str, Any]]] = [[] for _ in range(total_batches)] + + _distribute_chunks_round_robin( + batches=batches, + chunks=image_chunks, + batch_size=batch_size, + error_context="image metadata chunks", + ) + _distribute_chunks_round_robin( + batches=batches, + chunks=text_chunks, + batch_size=batch_size, + error_context="text chunks", + ) + + return batches + + # Thread lock for initializing Ray to prevent race conditions ray_init_lock = threading.Lock() @@ -179,23 +327,257 @@ def run_in_thread(): raise -# Initialize the data processing core LAZILY -# This will be initialized on first task run by a worker process -def get_ray_actor() -> Any: +def _build_forward_error( + message: str, + index_name: str, + source: Optional[str], + original_filename: Optional[str], +) -> Exception: + return Exception(json.dumps({ + "message": message, + "index_name": index_name, + "task_name": "forward", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + +def _parse_json_or_none(text: str) -> Optional[Dict[str, Any]]: + try: + parsed = json.loads(text) + return parsed if isinstance(parsed, dict) else None + except Exception: + return None + + +def _extract_error_code_from_es_response( + parsed_body: Optional[Dict[str, Any]], + text: str, +) -> Optional[str]: + error_code = None + if isinstance(parsed_body, dict): + error_code = parsed_body.get("error_code") + detail = parsed_body.get("detail") + if isinstance(detail, dict) and detail.get("error_code"): + error_code = detail.get("error_code") + elif isinstance(detail, str): + parsed_detail = _parse_json_or_none(detail) + if isinstance(parsed_detail, dict): + error_code = parsed_detail.get("error_code", error_code) + + if error_code: + return error_code + + try: + match = re.search( + r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text) + return match.group(1) if match else None + except Exception: + return None + + +def _send_chunks_to_es( + chunks: List[Dict[str, Any]], + index_name: str, + authorization: str | None, + task_id: Optional[str] = None, + source: str = "", + original_filename: str = "", + large_mode: bool = False, +) -> Dict[str, Any]: + async def _post(): + elasticsearch_url = ELASTICSEARCH_SERVICE + if not elasticsearch_url: + raise _build_forward_error( + message="ELASTICSEARCH_SERVICE env is not set", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + route_url = f"/indices/{index_name}/documents" + full_url = elasticsearch_url + route_url + headers = {"Content-Type": "application/json"} + if authorization: + headers["Authorization"] = authorization + if task_id: + headers["X-Task-Id"] = task_id + try: + connector = aiohttp.TCPConnector(verify_ssl=False) + timeout = aiohttp.ClientTimeout(total=600) + + request_params: Dict[str, str] = {} + + if large_mode: + request_params["large_mode"] = "true" + + async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session: + async with session.post( + full_url, + headers=headers, + json=chunks, + params=request_params, + raise_for_status=False + ) as response: + text = await response.text() + status = response.status + parsed_body = _parse_json_or_none(text) + + if status >= 400: + error_code = _extract_error_code_from_es_response(parsed_body, text) + if error_code: + raise Exception(json.dumps({ + "error_code": error_code + }, ensure_ascii=False)) + + raise Exception( + f"ElasticSearch service returned HTTP {status}") + + result = parsed_body if isinstance(parsed_body, dict) else await response.json() + return result + + except aiohttp.ClientConnectorError as e: + logger.error( + f"[{task_id}] FORWARD TASK: Connection error to {full_url}: {str(e)}") + raise _build_forward_error( + message=f"Failed to connect to API: {str(e)}", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + except asyncio.TimeoutError as e: + logger.warning( + f"[{task_id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.") + raise _build_forward_error( + message=f"Timeout when indexing documents: {str(e)}", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + except Exception as e: + logger.error( + f"[{task_id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.") + raise _build_forward_error( + message=f"Unexpected error when indexing documents: {str(e)}", + index_name=index_name, + source=source, + original_filename=original_filename, + ) + + return run_async(_post()) + + +@ray.remote(num_cpus=0) +class GlobalRayActorPoolManager: """ - Creates a new, anonymous DataProcessorRayActor instance for each call. - This allows for parallel execution of data processing tasks, with each - task running in its own actor. + Cluster-wide shared actor pool manager. + A single detached manager serves all Celery worker processes. """ + + def __init__(self, warm_timeout_s: float): + self.warm_timeout_s = warm_timeout_s + self.actors: List[Any] = [] + self.rr_index = 0 + + def _create_and_warm_actor(self) -> Optional[Any]: + actor = DataProcessorRayActor.remote() + try: + ray.get(actor.ping.remote(), timeout=self.warm_timeout_s) + return actor + except Exception as exc: + try: + ray.kill(actor, no_restart=True) + except Exception: + pass + logger.warning( + f"[GlobalRayActorPoolManager] Warm actor failed in {self.warm_timeout_s:.1f}s: {exc}" + ) + return None + + def ensure_pool(self, desired: int, max_allowed: int) -> int: + desired = max(0, int(desired)) + max_allowed = max(1, int(max_allowed)) + desired = min(desired, max_allowed) + missing = max(0, desired - len(self.actors)) + for _ in range(missing): + actor = self._create_and_warm_actor() + if actor is not None: + self.actors.append(actor) + return len(self.actors) + + def get_actor(self) -> Any: + if not self.actors: + actor = self._create_and_warm_actor() + if actor is None: + raise RuntimeError("Global actor pool is empty and actor warm-up failed") + self.actors.append(actor) + idx = self.rr_index % len(self.actors) + self.rr_index += 1 + return self.actors[idx] + + +def _get_or_create_global_pool_manager() -> Any: with ray_init_lock: init_ray_in_worker() - actor = DataProcessorRayActor.remote() - logger.debug( - "Successfully created a new DataProcessorRayActor for a task.") - return actor + # Prefer atomic get/create when supported. + try: + return GlobalRayActorPoolManager.options( + name=RAY_GLOBAL_ACTOR_POOL_NAME, + namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE, + lifetime="detached", + get_if_exists=True, + ).remote(RAY_ACTOR_WARM_TIMEOUT_S) + except TypeError: + pass + + try: + return ray.get_actor( + RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE) + except Exception: + pass + + try: + return GlobalRayActorPoolManager.options( + name=RAY_GLOBAL_ACTOR_POOL_NAME, + namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE, + lifetime="detached", + ).remote(RAY_ACTOR_WARM_TIMEOUT_S) + except Exception: + # Name race: another worker may have created it in the meantime. + return ray.get_actor( + RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE) + + +def prewarm_ray_actors(target_size: Optional[int] = None) -> int: + """ + Ensure a global shared pool of warm Ray actors exists for low-latency task execution. + """ + desired = RAY_GLOBAL_ACTOR_POOL_SIZE if target_size is None else max(0, int(target_size)) + manager = _get_or_create_global_pool_manager() + current_after = ray.get( + manager.ensure_pool.remote(desired=desired, max_allowed=_estimate_parallel_parts()) + ) + logger.info( + f"Global Ray actor pool ready: current={current_after}, desired={desired}" + ) + return current_after + + +def get_ray_actor() -> Any: + """ + Return a warm actor from the global shared pool with round-robin selection. + """ + manager = _get_or_create_global_pool_manager() + return ray.get(manager.get_actor.remote()) +def _get_split_actor() -> Any: + """ + Reuse warm DataProcessorRayActor instances for split operations. + This keeps split path aligned with prewarmed actor pool. + """ + return get_ray_actor() + class LoggingTask(Task): """Base task class with enhanced logging""" @@ -221,6 +603,472 @@ def on_retry(self, exc, task_id, args, kwargs, einfo): return super().on_retry(exc, task_id, args, kwargs, einfo) +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_part', queue='process_part_q') +def process_part( + self, + part_bytes: bytes, + filename: str, + chunking_strategy: str, + part_redis_key: str, + source: Optional[str] = None, + source_type: Optional[str] = None, + model_id: Optional[int] = None, + tenant_id: Optional[str] = None, + **params +) -> Dict[str, Any]: + """ + Hidden sub-task to process a file part with Ray. + """ + actor = get_ray_actor() + try: + chunks_ref = actor.process_bytes.remote( + part_bytes, + filename, + chunking_strategy, + task_id=None, + model_id=model_id, + tenant_id=tenant_id, + **params + ) + chunks = ray.get(chunks_ref) or [] + + if not REDIS_BACKEND_URL: + raise RuntimeError("REDIS_BACKEND_URL not configured") + + import redis + client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True) + client.set(part_redis_key, json.dumps(chunks, ensure_ascii=False)) + client.expire(part_redis_key, 2 * 60 * 60) + + return { + "part_redis_key": part_redis_key, + "chunks_count": len(chunks), + } + except Exception as e: + logger.error(f"[process_part] Failed to process part for '{filename}': {str(e)}") + return { + "part_redis_key": part_redis_key, + "chunks_count": 0, + } + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_parts', queue='process_part_q') +def aggregate_parts( + self, + parts_results: List[List[Dict[str, Any]]], + source: Optional[str] = None, + index_name: Optional[str] = None, + original_filename: Optional[str] = None +) -> Dict[str, Any]: + """ + Hidden sub-task to aggregate part chunks. + """ + merged: List[Dict[str, Any]] = [] + for part_chunks in parts_results or []: + if part_chunks: + merged.extend(part_chunks) + return { + "chunks": merged, + "source": source, + "index_name": index_name, + "original_filename": original_filename + } + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_store_chunks', queue='process_part_q') +def aggregate_store_chunks( + self, + parts_results: List[Dict[str, Any]], + redis_key: str, + source: Optional[str] = None, + index_name: Optional[str] = None, + original_filename: Optional[str] = None +) -> Dict[str, Any]: + """ + Hidden sub-task to aggregate part chunks and store into Redis for forward task. + """ + if not REDIS_BACKEND_URL: + raise Exception(json.dumps({ + "message": "REDIS_BACKEND_URL not configured to store chunks", + "index_name": index_name, + "task_name": "process", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + try: + import redis + client = redis.Redis.from_url( + REDIS_BACKEND_URL, decode_responses=True) + + merged: List[Dict[str, Any]] = [] + for part_result in parts_results or []: + part_key = (part_result or {}).get("part_redis_key") + if not part_key: + continue + cached = client.get(part_key) + if not cached: + continue + try: + part_chunks = json.loads(cached) + if isinstance(part_chunks, list): + merged.extend(part_chunks) + except Exception: + continue + # best-effort cleanup for part payload key + try: + client.delete(part_key) + except Exception: + pass + + serialized = json.dumps(merged, ensure_ascii=False) + client.set(redis_key, serialized) + client.expire(redis_key, 2 * 60 * 60) + ready_key = f"{redis_key}:ready" + client.set(ready_key, "1") + client.expire(ready_key, 2 * 60 * 60) + logger.info( + f"[{self.request.id}] PROCESS TASK: Stored aggregated chunks in Redis at key '{redis_key}', count={len(merged)}") + except Exception as exc: + raise Exception(json.dumps({ + "message": f"Failed to store chunks to Redis: {str(exc)}", + "index_name": index_name, + "task_name": "process", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + return { + "chunks_count": len(merged), + "redis_key": redis_key, + "source": source, + "index_name": index_name, + "original_filename": original_filename + } + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.forward_part', queue='forward_q') +def forward_part( + self, + chunks: List[Dict[str, Any]], + index_name: str, + authorization: Optional[str] = None, + parent_task_id: Optional[str] = None, + parent_total_chunks: Optional[int] = None, + source: Optional[str] = None, + original_filename: Optional[str] = None, + batch_index: Optional[int] = None, + total_batches: Optional[int] = None, + large_mode: Optional[bool] = False, +) -> Dict[str, Any]: + """ + Forward sub-task that indexes a chunk batch. + """ + try: + # Respect cancellation from parent task if available + if parent_task_id: + try: + redis_service = get_redis_service() + if redis_service.is_task_cancelled(parent_task_id): + raise RuntimeError( + f"Parent task {parent_task_id} marked as cancelled") + except Exception: + pass + + es_result = _send_chunks_to_es( + chunks=chunks, + index_name=index_name, + authorization=authorization, + task_id=None, + source=source, + original_filename=original_filename, + large_mode=large_mode, + ) + + if not isinstance(es_result, dict) or not es_result.get("success"): + error_message = es_result.get( + "message", "Unknown error from main_server") if isinstance(es_result, dict) else "Unknown error" + raise Exception(json.dumps({ + "message": f"main_server API error: {error_message}", + "index_name": index_name, + "task_name": "forward_part", + "source": source, + "original_filename": original_filename + }, ensure_ascii=False)) + + # Update parent task progress per finished batch so frontend can show real-time indexing count. + if parent_task_id: + try: + processed_delta = int(es_result.get("total_indexed", 0) or 0) + redis_service = get_redis_service() + redis_service.increment_progress_info( + task_id=parent_task_id, + delta_processed=processed_delta, + total_chunks=parent_total_chunks, + ) + except Exception as progress_exc: + logger.warning( + f"[{self.request.id}] FORWARD PART: Failed to update parent progress " + f"for task {parent_task_id}: {progress_exc}" + ) + + return { + "success": True, + "total_indexed": es_result.get("total_indexed", 0), + "total_submitted": es_result.get("total_submitted", len(chunks)), + "batch_index": batch_index, + "total_batches": total_batches, + } + except Exception as e: + retry_num = getattr(self.request, 'retries', 0) + logger.warning( + f"[{self.request.id}] FORWARD PART: Failed batch {batch_index}/{total_batches} " + f"(retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX}): {str(e)}" + ) + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=FORWARD_REDIS_RETRY_MAX, + exc=e + ) + + +@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_forward_parts', queue='forward_q') +def aggregate_forward_parts( + self, + parts_results: List[Dict[str, Any]], + source: Optional[str] = None, + index_name: Optional[str] = None, + original_filename: Optional[str] = None +) -> Dict[str, Any]: + """ + Aggregate forward_part results. + """ + total_indexed = 0 + total_submitted = 0 + for result in parts_results or []: + if not result: + continue + total_indexed += int(result.get("total_indexed", 0) or 0) + total_submitted += int(result.get("total_submitted", 0) or 0) + + return { + "success": True, + "total_indexed": total_indexed, + "total_submitted": total_submitted, + "source": source, + "index_name": index_name, + "original_filename": original_filename + } + + +def _split_file_for_processing( + request_id: str, + source: str, + source_type: str, + task_id: str, + params: Dict[str, Any], + file_data: Optional[bytes] = None, +) -> List[bytes]: + max_size = 5 * 1024 * 1024 + params.pop("max_size", None) + logger.info( + f"[{request_id}] PROCESS TASK: Splitting file before processing (max_size={max_size})") + + split_actor_get_start = time.perf_counter() + split_actor = _get_split_actor() + split_actor_get_elapsed = time.perf_counter() - split_actor_get_start + logger.info( + f"[{request_id}] PROCESS TASK: split actor ready in {split_actor_get_elapsed:.3f}s") + + split_call_start = time.perf_counter() + split_kwargs = { + "source": source, + "destination": source_type, + "task_id": task_id, + "max_size": max_size, + **params, + } + if file_data is not None: + split_kwargs["file_data"] = file_data + + parts_ref = split_actor.split_file.remote(**split_kwargs) + parts = ray.get(parts_ref) + split_call_elapsed = time.perf_counter() - split_call_start + logger.info( + f"[{request_id}] PROCESS TASK: split_file RPC done in {split_call_elapsed:.3f}s " + f"(source_type={source_type})") + + if parts: + part_sizes = [len(p) for p in parts] + total_bytes = sum(part_sizes) + min_size = min(part_sizes) + max_part_size = max(part_sizes) + avg_size = total_bytes / len(part_sizes) + logger.info( + f"[{request_id}] PROCESS TASK: Split stats: parts={len(part_sizes)}, " + f"total={total_bytes/1024/1024:.2f}MB, " + f"min={min_size/1024:.2f}KB, max={max_part_size/1024:.2f}KB, avg={avg_size/1024:.2f}KB") + + return parts + + +def _run_processing_for_parts( + request_id: str, + source: str, + source_type: str, + task_id: str, + chunking_strategy: str, + filename_for_processing: str, + parts: List[bytes], + index_name: Optional[str], + original_filename: Optional[str], + embedding_model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], +) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]: + if not parts: + logger.warning( + f"[{request_id}] PROCESS TASK: Split returned no parts; fallback to full-file processing") + process_actor = get_ray_actor() + chunks_ref = process_actor.process_file.remote( + source, + chunking_strategy, + destination=source_type, + task_id=task_id, + model_id=embedding_model_id, + tenant_id=tenant_id, + **params + ) + logger.info( + f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...") + return False, ray.get(chunks_ref), None + + if len(parts) == 1: + process_actor = get_ray_actor() + chunks_ref = process_actor.process_bytes.remote( + parts[0], + filename_for_processing, + chunking_strategy, + task_id=None, + model_id=embedding_model_id, + tenant_id=tenant_id, + **params + ) + logger.info( + f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...") + return False, ray.get(chunks_ref), None + + redis_key = f"dp:{task_id}:chunks" + group_tasks = group( + process_part.s( + part_bytes=part, + filename=filename_for_processing, + chunking_strategy=chunking_strategy, + part_redis_key=f"dp:{task_id}:part:{idx}", + source=source, + source_type=source_type, + model_id=embedding_model_id, + tenant_id=tenant_id, + **params + ) for idx, part in enumerate(parts) + ) + callback = aggregate_store_chunks.s( + redis_key=redis_key, + source=source, + index_name=index_name, + original_filename=original_filename + ).set(queue='process_part_q') + logger.info( + f"[{request_id}] PROCESS TASK: Dispatching {len(parts)} part tasks...") + chord(group_tasks)(callback) + + split_wait_timeout = _compute_split_wait_timeout(len(parts)) + logger.info( + f"[{request_id}] PROCESS TASK: Waiting split aggregation, timeout={split_wait_timeout}s, " + f"parts={len(parts)}, est_parallel={_estimate_parallel_parts()}") + split_chunk_count = _wait_for_split_ready( + redis_key=redis_key, + timeout_s=split_wait_timeout, + poll_interval_ms=DP_REDIS_CHUNKS_POLL_INTERVAL_MS, + ) + return True, None, split_chunk_count + + +def _process_source_with_split( + request_id: str, + source: str, + source_type: str, + task_id: str, + chunking_strategy: str, + index_name: Optional[str], + original_filename: Optional[str], + embedding_model_id: Optional[int], + tenant_id: Optional[str], + params: Dict[str, Any], + file_data: Optional[bytes] = None, +) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]: + parts = _split_file_for_processing( + request_id=request_id, + source=source, + source_type=source_type, + task_id=task_id, + params=params, + file_data=file_data, + ) + filename_for_processing = original_filename or os.path.basename(source) + split_async, chunks, split_chunk_count = _run_processing_for_parts( + request_id=request_id, + source=source, + source_type=source_type, + task_id=task_id, + chunking_strategy=chunking_strategy, + filename_for_processing=filename_for_processing, + parts=parts, + index_name=index_name, + original_filename=original_filename, + embedding_model_id=embedding_model_id, + tenant_id=tenant_id, + params=params, + ) + + if split_async: + logger.info( + f"[{request_id}] PROCESS TASK: Async split finished with {split_chunk_count or 0} chunks") + else: + logger.info( + f"[{request_id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks") + + if not split_async: + redis_key = f"dp:{task_id}:chunks" + process_actor = get_ray_actor() + process_actor.store_chunks_in_redis.remote(redis_key, chunks) + logger.info( + f"[{request_id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'") + + return split_async, chunks, split_chunk_count + + +def _build_no_valid_chunks_error( + split_async: bool, + index_name: Optional[str], + source: str, + original_filename: Optional[str], +) -> Exception: + message = ( + "Async split completed but produced 0 chunks" + if split_async else + "Ray processing completed but produced 0 chunks" + ) + return Exception(json.dumps({ + "message": message, + "index_name": index_name, + "task_name": "process", + "source": source, + "original_filename": original_filename, + "error_code": "no_valid_chunks" + }, ensure_ascii=False)) + + @app.task(bind=True, base=LoggingTask, name='data_process.tasks.process', queue='process_q') def process( self, @@ -248,6 +1096,7 @@ def process( """ start_time = time.time() task_id = self.request.id + # _warn_if_queue_mismatch("PROCESS TASK", "process_q", self.request) logger.info( f"[{self.request.id}] PROCESS TASK: source_type: {source_type}") @@ -264,51 +1113,39 @@ def process( 'stage': 'extracting_text' } ) - # Get the data processor instance - actor = get_ray_actor() - try: # Process the file based on the source type file_size_mb = 0 + split_chunk_count = None + image_metadata_chunk_count = 0 + elapsed_time = 0.0 + chunks: Optional[List[Dict[str, Any]]] = None + split_async = False + if source_type == "local": # Check file existence and size for optimization if not os.path.exists(source): raise FileNotFoundError(f"File does not exist: {source}") file_size = os.path.getsize(source) - file_size_mb = file_size / (1024 * 1024) + file_size_mb = file_size / (5 * 1024 * 1024) logger.info( f"[{self.request.id}] PROCESS TASK: File size: {file_size_mb:.2f}MB") - # The unified actor call, mapping 'file' source_type to 'local' destination - # Submit Ray work and WAIT for processing to complete - logger.info( - f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for source='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}") - chunks_ref = actor.process_file.remote( - source, - chunking_strategy, - destination=source_type, + split_async, chunks, split_chunk_count = _process_source_with_split( + request_id=self.request.id, + source=source, + source_type=source_type, task_id=task_id, - model_id=embedding_model_id, + chunking_strategy=chunking_strategy, + index_name=index_name, + original_filename=original_filename, + embedding_model_id=embedding_model_id, tenant_id=tenant_id, - **params + params=params, ) - # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state) - logger.info( - f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...") - chunks = ray.get(chunks_ref) - logger.info( - f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks") - - # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task) - redis_key = f"dp:{task_id}:chunks" - actor.store_chunks_in_redis.remote(redis_key, chunks) - logger.info( - f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'") - - end_time = time.time() - elapsed_time = end_time - start_time + elapsed_time = time.time() - start_time processing_speed = file_size_mb / \ elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0 logger.info( @@ -318,33 +1155,31 @@ def process( logger.info( f"[{self.request.id}] PROCESS TASK: Processing from URL: {source}") - # For URL source, core.py expects a non-local destination to trigger URL fetching + # Measure MinIO fetch time in process worker logs for observability + fetch_start = time.perf_counter() + file_stream = get_file_stream(source) + if file_stream is None: + raise FileNotFoundError(f"Unable to fetch file from URL: {source}") + file_data = file_stream.read() + fetch_elapsed = time.perf_counter() - fetch_start logger.info( - f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for URL='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}") - chunks_ref = actor.process_file.remote( - source, - chunking_strategy, - destination=source_type, + f"[{self.request.id}] PROCESS TASK: MinIO fetch done in {fetch_elapsed:.3f}s, " + f"bytes={len(file_data)}") + + split_async, chunks, split_chunk_count = _process_source_with_split( + request_id=self.request.id, + source=source, + source_type=source_type, task_id=task_id, - model_id=embedding_model_id, + chunking_strategy=chunking_strategy, + index_name=index_name, + original_filename=original_filename, + embedding_model_id=embedding_model_id, tenant_id=tenant_id, - **params + params=params, + file_data=file_data, ) - # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state) - logger.info( - f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...") - chunks = ray.get(chunks_ref) - logger.info( - f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks") - - # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task) - redis_key = f"dp:{task_id}:chunks" - actor.store_chunks_in_redis.remote(redis_key, chunks) - logger.info( - f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'") - - end_time = time.time() - elapsed_time = end_time - start_time + elapsed_time = time.time() - start_time logger.info( f"[{self.request.id}] PROCESS TASK: URL processing completed in {elapsed_time:.2f}s") @@ -353,33 +1188,61 @@ def process( raise NotImplementedError( f"Source type '{source_type}' not yet supported") - chunk_count = len(chunks) if chunks else 0 - if chunk_count == 0: - raise Exception(json.dumps({ - "message": "Ray processing completed but produced 0 chunks", - "index_name": index_name, - "task_name": "process", - "source": source, - "original_filename": original_filename, - "error_code": "no_valid_chunks" - }, ensure_ascii=False)) + if split_async: + chunk_count = split_chunk_count or 0 + if chunk_count == 0: + raise _build_no_valid_chunks_error( + split_async=True, + index_name=index_name, + source=source, + original_filename=original_filename, + ) + # For async split, chunks are persisted in Redis; count image-metadata chunks from cached payload. + try: + if REDIS_BACKEND_URL: + import redis + redis_key = f"dp:{task_id}:chunks" + client = redis.Redis.from_url( + REDIS_BACKEND_URL, decode_responses=True) + cached = client.get(redis_key) + if cached: + cached_chunks = json.loads(cached) + if isinstance(cached_chunks, list): + image_metadata_chunk_count = _count_image_metadata_chunks(cached_chunks) + except Exception as image_count_exc: + logger.warning( + f"[{self.request.id}] PROCESS TASK: Failed counting image metadata chunks for async split: {image_count_exc}") + else: + chunk_count = len(chunks) if chunks else 0 + if chunk_count == 0: + raise _build_no_valid_chunks_error( + split_async=False, + index_name=index_name, + source=source, + original_filename=original_filename, + ) + image_metadata_chunk_count = _count_image_metadata_chunks(chunks) + + logger.info( + f"[{self.request.id}] PROCESS TASK: Chunk composition: total={chunk_count}, " + f"image_metadata={image_metadata_chunk_count}, text={max(0, chunk_count - image_metadata_chunk_count)}") # Update task state to SUCCESS after Ray processing completes # This transitions from STARTED (PROCESSING) to SUCCESS (WAIT_FOR_FORWARDING) self.update_state( state=states.SUCCESS, meta={ - 'chunks_count': len(chunks) if chunks else 0, - 'processing_time': elapsed_time, - 'source': source, - 'index_name': index_name, - 'original_filename': original_filename, - 'task_name': 'process', - 'stage': 'text_extracted', - 'file_size_mb': file_size_mb, - 'processing_speed_mb_s': file_size_mb / elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0 - } - ) + 'chunks_count': chunk_count, + 'processing_time': elapsed_time, + 'source': source, + 'index_name': index_name, + 'original_filename': original_filename, + 'task_name': 'process', + 'stage': 'text_extracted', + 'file_size_mb': file_size_mb, + 'processing_speed_mb_s': file_size_mb / elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0 + } + ) logger.info( f"[{self.request.id}] PROCESS TASK: Processing complete, waiting for forward task") @@ -391,7 +1254,9 @@ def process( 'source': source, 'index_name': index_name, 'original_filename': original_filename, - 'task_id': task_id + 'task_id': task_id, + 'split_async': split_async, + 'image_metadata_chunk_count': image_metadata_chunk_count, } return returned_data @@ -537,6 +1402,7 @@ def forward( """ start_time = time.time() task_id = self.request.id + # _warn_if_queue_mismatch("FORWARD TASK", "forward_q", self.request) original_source = source original_index_name = index_name filename = original_filename @@ -575,6 +1441,7 @@ def forward( ) chunks = processed_data.get('chunks') + split_async = bool(processed_data.get('split_async')) # If chunks are not in payload, try loading from Redis via the redis_key if (not chunks) and processed_data.get('redis_key'): redis_key = processed_data.get('redis_key') @@ -590,6 +1457,24 @@ def forward( import redis client = redis.Redis.from_url( REDIS_BACKEND_URL, decode_responses=True) + ready_key = f"{redis_key}:ready" + if split_async: + ready_flag = client.get(ready_key) + if not ready_flag: + retry_num = getattr(self.request, 'retries', 0) + logger.info( + f"[{self.request.id}] FORWARD TASK: Async split not ready for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=ASYNC_SPLIT_RETRY_MAX, + exc=Exception(json.dumps({ + "message": "Async split not ready; will retry", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + ) cached = client.get(redis_key) if cached: try: @@ -604,6 +1489,21 @@ def forward( f"[{self.request.id}] FORWARD TASK: JSON decode error for key '{redis_key}': {str(jde)}; raw_prefix={raw_preview!r}") raise else: + if split_async: + retry_num = getattr(self.request, 'retries', 0) + logger.info( + f"[{self.request.id}] FORWARD TASK: Async split ready but chunks missing for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=ASYNC_SPLIT_RETRY_MAX, + exc=Exception(json.dumps({ + "message": "Async split ready but chunks missing; will retry", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + ) # No busy-wait: release the worker slot and retry later retry_num = getattr(self.request, 'retries', 0) logger.info( @@ -650,9 +1550,29 @@ def forward( "original_filename": original_filename }, ensure_ascii=False)) if len(chunks) == 0: + if split_async and processed_data.get('redis_key'): + retry_num = getattr(self.request, 'retries', 0) + logger.info( + f"[{self.request.id}] FORWARD TASK: Empty chunks while waiting for async split. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s") + raise self.retry( + countdown=FORWARD_REDIS_RETRY_DELAY_S, + max_retries=ASYNC_SPLIT_RETRY_MAX, + exc=Exception(json.dumps({ + "message": "Chunks not ready in Redis (empty); will retry", + "index_name": original_index_name, + "task_name": "forward", + "source": original_source, + "original_filename": filename + }, ensure_ascii=False)) + ) logger.warning( f"[{self.request.id}] FORWARD TASK: Empty chunks list received for source {original_source}") formatted_chunks = [] + # Compute once per file to avoid repeated IO/MinIO calls inside loop + file_size = get_file_size(source_type, original_source) if isinstance( + original_source, str) else 0 + filename_resolved = filename or (os.path.basename(original_source) if original_source and isinstance( + original_source, str) else "") for i, chunk in enumerate(chunks): # Extract text and metadata content = chunk.get("content", "") @@ -664,20 +1584,18 @@ def forward( f"[{self.request.id}] FORWARD TASK: Chunk {i+1} has empty text content, skipping") continue - file_size = get_file_size(source_type, original_source) if isinstance( - original_source, str) else 0 - # Format as expected by the Elasticsearch API formatted_chunk = { "metadata": metadata, - "filename": filename or (os.path.basename(original_source) if original_source and isinstance(original_source, str) else ""), + "filename": filename_resolved, "path_or_url": original_source, "content": content, - "process_source": "Unstructured", + "process_source": chunk.get("process_source", "Unstructured"), "source_type": source_type, "file_size": file_size, "create_time": metadata.get("creation_date"), "date": metadata.get("date"), + "index": i, } formatted_chunks.append(formatted_chunk) @@ -691,112 +1609,6 @@ def forward( "error_code": "no_valid_chunks" }, ensure_ascii=False)) - async def index_documents(): - elasticsearch_url = ELASTICSEARCH_SERVICE - if not elasticsearch_url: - raise Exception(json.dumps({ - "message": "ELASTICSEARCH_SERVICE env is not set", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - route_url = f"/indices/{original_index_name}/documents" - full_url = elasticsearch_url + route_url - headers = {"Content-Type": "application/json"} - if authorization: - headers["Authorization"] = authorization - # Add task_id header for progress tracking - headers["X-Task-Id"] = task_id - - try: - connector = aiohttp.TCPConnector(verify_ssl=False) - timeout = aiohttp.ClientTimeout(total=600) - - async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session: - async with session.post( - full_url, - headers=headers, - json=formatted_chunks, - raise_for_status=False - ) as response: - text = await response.text() - status = response.status - # Try parse JSON body for structured error_code/message - parsed_body = None - try: - parsed_body = json.loads(text) - except Exception: - parsed_body = None - - if status >= 400: - error_code = None - if isinstance(parsed_body, dict): - error_code = parsed_body.get("error_code") - detail = parsed_body.get("detail") - if isinstance(detail, dict) and detail.get("error_code"): - error_code = detail.get("error_code") - elif isinstance(detail, str): - try: - parsed_detail = json.loads(detail) - if isinstance(parsed_detail, dict): - error_code = parsed_detail.get( - "error_code", error_code) - except Exception: - pass - - if not error_code: - try: - match = re.search( - r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text) - if match: - error_code = match.group(1) - except Exception: - pass - - if error_code: - # Raise flat payload to avoid nested JSON and preserve error_code - raise Exception(json.dumps({ - "error_code": error_code - }, ensure_ascii=False)) - - raise Exception( - f"ElasticSearch service returned HTTP {status}") - - result = parsed_body if isinstance(parsed_body, dict) else await response.json() - return result - - except aiohttp.ClientConnectorError as e: - logger.error( - f"[{self.request.id}] FORWARD TASK: Connection error to {full_url}: {str(e)}") - raise Exception(json.dumps({ - "message": f"Failed to connect to API: {str(e)}", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - except asyncio.TimeoutError as e: - logger.warning( - f"[{self.request.id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.") - raise Exception(json.dumps({ - "message": f"Timeout when indexing documents: {str(e)}", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - except Exception as e: - logger.error( - f"[{self.request.id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.") - raise Exception(json.dumps({ - "message": f"Unexpected error when indexing documents: {str(e)}", - "index_name": original_index_name, - "task_name": "forward", - "source": original_source, - "original_filename": original_filename - }, ensure_ascii=False)) - logger.info( f"[{self.request.id}] FORWARD TASK: Starting ES indexing for {len(formatted_chunks)} chunks to index '{original_index_name}'...") @@ -814,8 +1626,69 @@ async def index_documents(): 'processed_chunks': 0 # Will be updated during vectorization via Redis } ) + try: + redis_service = get_redis_service() + redis_service.save_progress_info(task_id, 0, total_chunks) + except Exception as progress_init_exc: + logger.warning( + f"[{self.request.id}] FORWARD TASK: Failed to initialize progress in Redis: " + f"{progress_init_exc}" + ) - es_result = run_async(index_documents()) + if len(formatted_chunks) < FORWARD_ES_CHUNK_BATCH_SIZE: + es_result = _send_chunks_to_es( + chunks=formatted_chunks, + index_name=original_index_name, + authorization=authorization, + task_id=task_id, + source=original_source, + original_filename=original_filename, + large_mode=False, + ) + else: + batches = _build_balanced_batches( + formatted_chunks=formatted_chunks, + batch_size=FORWARD_ES_CHUNK_BATCH_SIZE, + ) + total_batches = len(batches) + image_chunks_total = sum( + 1 for chunk in formatted_chunks if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ) + image_distribution = [ + sum( + 1 + for chunk in batch + if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE + ) + for batch in batches + ] + logger.info( + f"[{self.request.id}] FORWARD TASK: Batch distribution ready: total_batches={total_batches}, " + f"batch_size={FORWARD_ES_CHUNK_BATCH_SIZE}, image_metadata_total={image_chunks_total}, " + f"image_per_batch={image_distribution}") + group_tasks = group( + forward_part.s( + chunks=batch, + index_name=original_index_name, + authorization=authorization, + parent_task_id=task_id, + parent_total_chunks=total_chunks, + source=original_source, + original_filename=original_filename, + batch_index=idx + 1, + total_batches=total_batches, + # If request was split into multiple groups, force all groups to use large path. + large_mode=True, + ).set(queue='forward_q') for idx, batch in enumerate(batches) + ) + callback = aggregate_forward_parts.s( + source=original_source, + index_name=original_index_name, + original_filename=original_filename + ).set(queue='forward_q') + result = chord(group_tasks)(callback) + with allow_join_result(): + es_result = result.get() logger.debug( f"[{self.request.id}] FORWARD TASK: API response from main_server for source '{original_source}': {es_result}") diff --git a/backend/data_process/worker.py b/backend/data_process/worker.py index a5f5f4a27..48323869b 100644 --- a/backend/data_process/worker.py +++ b/backend/data_process/worker.py @@ -1,4 +1,4 @@ -""" +""" Celery worker script for data processing tasks This script is used to start Celery workers for processing data @@ -21,6 +21,7 @@ import os import sys import time +import threading import traceback import ray @@ -44,6 +45,7 @@ REDIS_URL, WORKER_CONCURRENCY, WORKER_NAME, + RAY_GLOBAL_ACTOR_POOL_SIZE, ) from .app import app @@ -200,6 +202,60 @@ def worker_ready_handler(**kwargs): # Register health check endpoints, start monitoring, etc. logger.debug("🔍 Worker is ready to receive tasks") + # Prewarm Ray actors for process-related queues to reduce first-task latency. + # IMPORTANT: run asynchronously so worker queue registration is never blocked. + try: + queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()} + if "process_q" in queue_set or "process_part_q" in queue_set: + from data_process.tasks import prewarm_ray_actors + + # Prewarm a cluster-global shared actor pool once at startup. + # Multiple workers may trigger this, but pool manager is idempotent. + target = RAY_GLOBAL_ACTOR_POOL_SIZE + + def _prewarm_in_background(): + try: + warmed = prewarm_ray_actors(target_size=target) + logger.info( + f"Prewarmed Ray actor pool in background, warmed_actors={warmed}, target={target}, queues={sorted(queue_set)}" + ) + except Exception as exc: + logger.warning(f"Background prewarm failed: {exc}") + + threading.Thread(target=_prewarm_in_background, daemon=True).start() + except Exception as exc: + logger.warning(f"Failed to schedule Ray actor prewarm on worker ready: {exc}") + + # Periodic concurrency + Ray CPU availability log for process_part_q. + try: + queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()} + if "process_part_q" in queue_set: + def _log_part_concurrency(): + while True: + try: + inspector = app.control.inspect(timeout=1) + active = inspector.active() or {} + part_active = 0 + for _, tasks in active.items(): + for t in tasks or []: + if t.get("name") == "data_process.tasks.process_part": + part_active += 1 + try: + ray_available = ray.available_resources() if ray.is_initialized() else {} + except Exception: + ray_available = {} + avail_cpu = ray_available.get("CPU", 0.0) + logger.info( + f"[process_part] active={part_active}, ray_available_cpu={avail_cpu}" + ) + except Exception as exc: + logger.debug(f"Failed to collect process_part concurrency stats: {exc}") + time.sleep(5) + + threading.Thread(target=_log_part_concurrency, daemon=True).start() + except Exception as exc: + logger.warning(f"Failed to start process_part concurrency logger: {exc}") + @worker_shutting_down.connect def worker_shutdown_handler(**kwargs): @@ -289,9 +345,9 @@ def validate_redis_connection() -> bool: def start_worker(): """Start Celery worker with appropriate settings""" - # Get configuration parameters + # Read from runtime env first, so launcher-assigned values always win. queues = QUEUES - worker_name = WORKER_NAME or f'worker-{os.getpid()}' + worker_name = WORKER_NAME concurrency = WORKER_CONCURRENCY logger.info(f"Start Celery worker '{worker_name}' with queues: {queues}") diff --git a/backend/data_process_service.py b/backend/data_process_service.py index 0576e01fc..23d3497d9 100644 --- a/backend/data_process_service.py +++ b/backend/data_process_service.py @@ -206,13 +206,21 @@ def start_workers(self): logger.debug(f"Process-worker concurrency set to: {process_worker_concurrency}") logger.debug(f"Forward-worker concurrency set to: {forward_worker_concurrency}") - # Define worker configurations based on new architecture + # Define worker configurations based on split architecture: + # - process-worker handles orchestration (process_q) + # - process-part-worker handles split sub-tasks (process_part_q) + # - forward-worker handles vectorization/storage (forward_q) workers_config = [ { 'name': 'process-worker', 'queue': 'process_q', 'concurrency': process_worker_concurrency }, + { + 'name': 'process-part-worker', + 'queue': 'process_part_q', + 'concurrency': process_worker_concurrency + }, { 'name': 'forward-worker', 'queue': 'forward_q', @@ -243,7 +251,7 @@ def start_workers(self): logging.basicConfig(level=logging.INFO, format='[%(asctime)s: %(levelname)s/%(name)s] %(message)s') logger = logging.getLogger("data_process.worker_launcher") -os.environ["QUEUES"] = "{config['queue']}" +os.environ["QUEUES"] = "{config['queue']}" # backward compatibility os.environ["WORKER_NAME"] = "{config['name']}" os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}" @@ -254,6 +262,10 @@ def start_workers(self): logger.debug(f"Celery app instance: {{celery_app}}") logger.debug(f"Attempting to start worker for queue: {config['queue']}") from data_process.worker import start_worker + # Re-apply launcher values after imports in case .env override changed them. + os.environ["QUEUES"] = "{config['queue']}" + os.environ["WORKER_NAME"] = "{config['name']}" + os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}" start_worker() except ImportError as e: logger.error(f"Import error: {{e}}") @@ -564,7 +576,11 @@ def start_all_services(self): if success_count > 0: self.log_service_info() - + + # Start auto-summary scheduler + from services.auto_summary_scheduler import auto_summary_scheduler + auto_summary_scheduler.start() + return success_count == enabled_count def log_service_info(self): @@ -700,7 +716,11 @@ def stop_all_services(self): logger.error(f"Final attempt to kill Flower process failed: {final_e}") finally: service_processes['flower'] = None - + + # Stop auto-summary scheduler + from services.auto_summary_scheduler import auto_summary_scheduler + auto_summary_scheduler.stop() + # Stop Redis last if service_processes['redis']: try: diff --git a/backend/database/a2a_agent_db.py b/backend/database/a2a_agent_db.py index 9becdd67b..c1d998272 100644 --- a/backend/database/a2a_agent_db.py +++ b/backend/database/a2a_agent_db.py @@ -29,6 +29,22 @@ def _get_db_session(): # Default cache TTL in seconds (24 hours) DEFAULT_CACHE_TTL_HOURS = 24 + +def _extract_base_url(url: str) -> str: + """Extract base URL (scheme + host + port) from a full URL. + + Args: + url: Full URL, e.g., http://example.com/path/to/agent.json + + Returns: + Base URL, e.g., http://example.com + """ + from urllib.parse import urlparse + parsed = urlparse(url) + if parsed.port: + return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}" + return f"{parsed.scheme}://{parsed.hostname}" + # Standard human-readable protocol label PROTOCOL_HTTP_JSON = "HTTP+JSON" PROTOCOL_JSONRPC = "JSONRPC" @@ -51,27 +67,19 @@ def _generate_endpoint_id(agent_id: int) -> str: def _extract_primary_interface(supported_interfaces: List[Dict[str, Any]]) -> tuple[str, str]: - """Extract the primary interface (HTTP+JSON) from supported interfaces. + """Extract the primary interface (first one) from supported interfaces. Args: supported_interfaces: List of interface objects with protocolBinding, url, protocolVersion. Returns: Tuple of (agent_url, protocol_version). - Falls back to first interface if HTTP+JSON not found. + Returns empty string for url if no interfaces found. """ if not supported_interfaces: return "", "1.0" - # Prefer HTTP+JSON - for iface in supported_interfaces: - if iface.get("protocolBinding", "").upper() in (PROTOCOL_HTTP_JSON, PROTOCOL_JSONRPC, PROTOCOL_GRPC): - return ( - iface.get("url", ""), - iface.get("protocolVersion", "1.0") - ) - - # Fall back to first interface + # Return the first interface to ensure URL and protocol are from the same interface first = supported_interfaces[0] return ( first.get("url", ""), @@ -148,6 +156,7 @@ def create_external_agent_from_url( version: Optional[str] = None, streaming: bool = False, supported_interfaces: Optional[List[Dict[str, Any]]] = None, + base_url: Optional[str] = None, ) -> Dict[str, Any]: """Create or update an external A2A agent discovered from URL. @@ -162,6 +171,7 @@ def create_external_agent_from_url( version: Agent version from Agent Card. streaming: Whether this agent supports SSE streaming. supported_interfaces: All supported protocol interfaces. + base_url: Base URL for health checks (service root address). Returns: Created agent information dict. @@ -170,6 +180,10 @@ def create_external_agent_from_url( expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS) protocol_type = _extract_protocol_type(supported_interfaces) + # Extract base_url from source_url if not provided + if not base_url and source_url: + base_url = _extract_base_url(source_url) + with _get_db_session() as session: # Check if agent already exists by source_url existing = session.query(A2AExternalAgent).filter( @@ -191,6 +205,8 @@ def create_external_agent_from_url( existing.cached_at = now existing.cache_expires_at = expires_at existing.updated_by = user_id + if base_url: + existing.base_url = base_url agent = existing else: # Create new record @@ -210,6 +226,7 @@ def create_external_agent_from_url( raw_card=raw_card, cached_at=now, cache_expires_at=expires_at, + base_url=base_url, delete_flag='N' ) session.add(agent) @@ -226,6 +243,7 @@ def create_external_agent_from_url( "streaming": agent.streaming, "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, + "base_url": agent.base_url, "is_available": agent.is_available, "cached_at": agent.cached_at.isoformat() if agent.cached_at else None, "cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None, @@ -244,6 +262,7 @@ def create_external_agent_from_nacos( version: Optional[str] = None, streaming: bool = False, supported_interfaces: Optional[List[Dict[str, Any]]] = None, + base_url: Optional[str] = None, ) -> Dict[str, Any]: """Create or update an external A2A agent discovered from Nacos. @@ -259,6 +278,7 @@ def create_external_agent_from_nacos( version: Agent version from Agent Card. streaming: Whether this agent supports SSE streaming. supported_interfaces: All supported protocol interfaces. + base_url: Base URL for health checks (service root address). Returns: Created agent information dict. @@ -267,6 +287,10 @@ def create_external_agent_from_nacos( expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS) protocol_type = _extract_protocol_type(supported_interfaces) + # Extract base_url from agent_url if not provided + if not base_url and agent_url: + base_url = _extract_base_url(agent_url) + with _get_db_session() as session: # Check if agent already exists by nacos_config_id + nacos_agent_name existing = session.query(A2AExternalAgent).filter( @@ -288,6 +312,8 @@ def create_external_agent_from_nacos( existing.cached_at = now existing.cache_expires_at = expires_at existing.updated_by = user_id + if base_url: + existing.base_url = base_url agent = existing else: agent = A2AExternalAgent( @@ -307,6 +333,7 @@ def create_external_agent_from_nacos( raw_card=raw_card, cached_at=now, cache_expires_at=expires_at, + base_url=base_url, delete_flag='N' ) session.add(agent) @@ -323,6 +350,7 @@ def create_external_agent_from_nacos( "streaming": agent.streaming, "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, + "base_url": agent.base_url, "is_available": agent.is_available, "cached_at": agent.cached_at.isoformat() if agent.cached_at else None, "cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None, @@ -360,6 +388,7 @@ def get_external_agent_by_id(external_agent_id: int, tenant_id: str) -> Optional "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, "source_url": agent.source_url, + "base_url": agent.base_url, "nacos_config_id": agent.nacos_config_id, "nacos_agent_name": agent.nacos_agent_name, "raw_card": agent.raw_card, @@ -416,6 +445,8 @@ def list_external_agents( "protocol_type": agent.protocol_type, "supported_interfaces": agent.supported_interfaces, "source_type": agent.source_type, + "source_url": agent.source_url, + "base_url": agent.base_url, "is_available": agent.is_available, "last_check_result": agent.last_check_result, "create_time": agent.create_time.isoformat() if agent.create_time else None, @@ -1714,6 +1745,7 @@ def get_nacos_config_by_id(config_id: str, tenant_id: str) -> Optional[Dict[str, "name": config.name, "nacos_addr": config.nacos_addr, "nacos_username": config.nacos_username, + "nacos_password": config.nacos_password, "namespace_id": config.namespace_id, "description": config.description, "is_active": config.is_active, @@ -1749,6 +1781,8 @@ def list_nacos_configs(tenant_id: str, is_active: Optional[bool] = None) -> List "name": config.name, "nacos_addr": config.nacos_addr, "namespace_id": config.namespace_id, + "nacos_username": config.nacos_username, + "nacos_password": config.nacos_password, "is_active": config.is_active, "last_scan_at": config.last_scan_at.isoformat() if config.last_scan_at else None, } @@ -1804,6 +1838,75 @@ def delete_nacos_config(config_id: str, tenant_id: str) -> bool: return True +def update_nacos_config( + config_id: str, + tenant_id: str, + user_id: str, + name: Optional[str] = None, + nacos_addr: Optional[str] = None, + nacos_username: Optional[str] = None, + nacos_password: Optional[str] = None, + namespace_id: Optional[str] = None, + description: Optional[str] = None, + is_active: Optional[bool] = None +) -> Optional[Dict[str, Any]]: + """Update a Nacos config. + + Args: + config_id: The config ID. + tenant_id: Tenant ID. + user_id: User who is updating this config. + name: Optional new display name. + nacos_addr: Optional new Nacos server address. + nacos_username: Optional new Nacos username. + nacos_password: Optional new Nacos password. + namespace_id: Optional new Nacos namespace. + description: Optional new description. + is_active: Optional active status. + + Returns: + Updated config information dict, or None if not found. + """ + with _get_db_session() as session: + config = session.query(A2ANacosConfig).filter( + A2ANacosConfig.config_id == config_id, + A2ANacosConfig.tenant_id == tenant_id, + A2ANacosConfig.delete_flag != 'Y' + ).first() + + if not config: + return None + + if name is not None: + config.name = name + if nacos_addr is not None: + config.nacos_addr = nacos_addr + if nacos_username is not None: + config.nacos_username = nacos_username + if nacos_password is not None: + config.nacos_password = nacos_password + if namespace_id is not None: + config.namespace_id = namespace_id + if description is not None: + config.description = description + if is_active is not None: + config.is_active = is_active + + config.updated_by = user_id + session.flush() + + return { + "id": config.id, + "config_id": config.config_id, + "name": config.name, + "nacos_addr": config.nacos_addr, + "namespace_id": config.namespace_id, + "nacos_username": config.nacos_username, + "nacos_password": config.nacos_password, + "is_active": config.is_active, + } + + # ============================================================================= # A2A Artifact Operations # ============================================================================= diff --git a/backend/database/attachment_db.py b/backend/database/attachment_db.py index fbfc83583..187381cd2 100644 --- a/backend/database/attachment_db.py +++ b/backend/database/attachment_db.py @@ -396,6 +396,7 @@ def get_content_type(file_path: str) -> str: '.html': 'text/html', '.htm': 'text/html', '.json': 'application/json', + '.epub': 'application/epub', '.xml': 'application/xml', '.zip': 'application/zip', '.rar': 'application/x-rar-compressed', diff --git a/backend/database/db_models.py b/backend/database/db_models.py index 947c0a812..baa8e903e 100644 --- a/backend/database/db_models.py +++ b/backend/database/db_models.py @@ -1,4 +1,4 @@ -from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, ForeignKeyConstraint, Integer, JSON, Numeric, PrimaryKeyConstraint, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float +from sqlalchemy import BigInteger, Boolean, Column, Integer, JSON, Numeric, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import DeclarativeBase from sqlalchemy.sql import func @@ -178,6 +178,10 @@ class ModelRecord(TableBase): Boolean, default=True, doc="Whether to verify SSL certificates when connecting to this model API. Default is true. Set to false for local services without SSL support.") chunk_batch = Column( Integer, doc="Batch size for concurrent embedding requests during document chunking") + model_appid = Column( + String(100), doc="Application ID for model authentication (used by some STT/TTS providers like Volcano Engine)") + access_token = Column( + String(100), doc="Access token for model authentication (used by some STT/TTS providers like Volcano Engine)") class ModelMonitoringRecord(SimpleTableBase): @@ -353,10 +357,17 @@ class KnowledgeRecord(TableBase): knowledge_describe = Column(String(3000), doc="Knowledge base description") knowledge_sources = Column(String(300), doc="Knowledge base sources") embedding_model_name = Column(String(200), doc="Embedding model name, used to record the embedding model used by the knowledge base") + embedding_model_id = Column(Integer, doc="Embedding model ID, foreign key reference to model_record_t.model_id") tenant_id = Column(String(100), doc="Tenant ID") group_ids = Column(String, doc="Knowledge base group IDs list") ingroup_permission = Column( String(30), doc="In-group permission: EDIT, READ_ONLY, PRIVATE") + summary_frequency = Column(String(10), nullable=True, + doc="Auto-summary frequency: '3h', '5h', '1d', '1w', or NULL (disabled)") + last_summary_time = Column(TIMESTAMP(timezone=False), nullable=True, + doc="Timestamp of last summary generation") + last_doc_update_time = Column(TIMESTAMP(timezone=False), nullable=True, + doc="Timestamp of last document add/delete operation") class TenantConfig(TableBase): @@ -775,6 +786,9 @@ class A2AExternalAgent(TableBase): nacos_config_id = Column(String(64), doc="Reference to Nacos config used for discovery") nacos_agent_name = Column(String(255), doc="Original name used for Nacos query") + # Base URL for infrastructure health checks + base_url = Column(String(512), doc="Base URL for health checks (service root address), e.g., http://agent:8080") + # Tenant isolation tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC) @@ -803,12 +817,6 @@ class A2AExternalAgentRelation(TableBase): name="uq_local_external_agent", deferrable=True, ), - ForeignKeyConstraint( - ["external_agent_id"], - [f"{SCHEMA}.ag_a2a_external_agent_t.id"], - name="fk_external_agent", - deferrable=True, - ), {"schema": SCHEMA}, ) @@ -919,7 +927,7 @@ class A2AMessage(SimpleTableBase): # Core identifiers (following A2A spec) message_id = Column(String(64), primary_key=True, doc="Message ID (A2A spec: messageId)") - task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=True, doc="Task ID this message belongs to (nullable for standalone/simple requests)") + task_id = Column(String(64), nullable=True, doc="Task ID this message belongs to (nullable for standalone/simple requests)") # Message attributes message_index = Column(Integer, nullable=False, doc="Order of message in the conversation") @@ -947,7 +955,7 @@ class A2AArtifact(SimpleTableBase): # Core identifiers (following A2A spec) id = Column(String(64), primary_key=True, doc="Internal primary key") artifact_id = Column(String(64), nullable=False, doc="Artifact ID (A2A spec: artifactId)") - task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=False, doc="Task ID this artifact belongs to") + task_id = Column(String(64), nullable=False, doc="Task ID this artifact belongs to") # Artifact attributes name = Column(String(255), doc="Human-readable artifact name") diff --git a/backend/database/knowledge_db.py b/backend/database/knowledge_db.py index 0d13eb9f7..8674bb4fb 100644 --- a/backend/database/knowledge_db.py +++ b/backend/database/knowledge_db.py @@ -1,5 +1,6 @@ from typing import Any, Dict, List, Optional +import logging import uuid from sqlalchemy import func from sqlalchemy.exc import SQLAlchemyError @@ -7,6 +8,9 @@ from database.client import as_dict, get_db_session from database.db_models import KnowledgeRecord from utils.str_utils import convert_list_to_string +from consts.scheduler import VALID_SUMMARY_FREQUENCIES + +logger = logging.getLogger("knowledge_db") def _generate_index_name(knowledge_id: int) -> str: @@ -49,6 +53,7 @@ def create_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]: "knowledge_sources": query.get("knowledge_sources", "elasticsearch"), "tenant_id": query.get("tenant_id"), "embedding_model_name": query.get("embedding_model_name"), + "embedding_model_id": query.get("embedding_model_id"), "knowledge_name": knowledge_name, "group_ids": convert_list_to_string(group_ids) if isinstance(group_ids, list) else group_ids, "ingroup_permission": query.get("ingroup_permission"), @@ -116,6 +121,7 @@ def upsert_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]: existing_record.knowledge_describe = query.get('knowledge_describe', '') existing_record.knowledge_sources = query.get('knowledge_sources', 'elasticsearch') existing_record.embedding_model_name = query.get('embedding_model_name') + existing_record.embedding_model_id = query.get('embedding_model_id') existing_record.updated_by = query.get('user_id') existing_record.update_time = func.current_timestamp() @@ -345,6 +351,43 @@ def update_model_name_by_index_name(index_name: str, embedding_model_name: str, raise e +def update_embedding_model_by_index_name( + index_name: str, + embedding_model_id: int, + embedding_model_name: str, + tenant_id: str, + user_id: str +) -> bool: + """ + Update the embedding model (both ID and name) for a knowledge base. + + Args: + index_name: Internal index name of the knowledge base + embedding_model_id: New embedding model ID + embedding_model_name: New embedding model name + tenant_id: Tenant ID + user_id: User ID making the update + + Returns: + bool: Whether the update was successful + """ + try: + with get_db_session() as session: + result = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y', + KnowledgeRecord.tenant_id == tenant_id + ).update({ + "embedding_model_id": embedding_model_id, + "embedding_model_name": embedding_model_name, + "updated_by": user_id + }) + session.commit() + return result > 0 + except SQLAlchemyError as e: + raise e + + def get_index_name_by_knowledge_name(knowledge_name: str, tenant_id: str) -> str: """ Get the internal index_name from user-facing knowledge_name. @@ -411,5 +454,77 @@ def get_knowledge_name_map_by_index_names(index_names: List[str]) -> Dict[str, s knowledge_name_map[index_name] = index_name return knowledge_name_map - except SQLAlchemyError as e: - raise e + except SQLAlchemyError: + logger.exception("Query knowledge name map error") + raise + + +def update_summary_frequency(index_name: str, summary_frequency: Optional[str], + _tenant_id: str, user_id: str) -> bool: + """Update the auto-summary frequency for a knowledge base.""" + valid_frequencies = VALID_SUMMARY_FREQUENCIES + if summary_frequency not in valid_frequencies: + raise ValueError(f"Invalid summary_frequency: {summary_frequency}") + try: + with get_db_session() as session: + record = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if not record: + return False + record.summary_frequency = summary_frequency + record.updated_by = user_id + session.commit() + return True + except SQLAlchemyError: + logger.exception("Update summary frequency error") + raise + + +def update_last_summary_time(index_name: str): + """Update last_summary_time to now after a successful summary generation.""" + from datetime import datetime + try: + with get_db_session() as session: + record = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if record: + record.last_summary_time = datetime.now() + session.commit() + except SQLAlchemyError: + logger.exception("Update last summary time error") + raise + + +def update_last_doc_update_time(index_name: str): + """Update last_doc_update_time to now after document add/delete operation.""" + from datetime import datetime + try: + with get_db_session() as session: + record = session.query(KnowledgeRecord).filter( + KnowledgeRecord.index_name == index_name, + KnowledgeRecord.delete_flag != 'Y' + ).first() + if record: + record.last_doc_update_time = datetime.now() + session.commit() + except SQLAlchemyError: + logger.exception("Update last doc update time error") + raise + + +def get_knowledge_bases_for_auto_summary() -> List[Dict[str, Any]]: + """Query all knowledge bases with non-null summary_frequency.""" + try: + with get_db_session() as session: + records = session.query(KnowledgeRecord).filter( + KnowledgeRecord.summary_frequency.isnot(None), + KnowledgeRecord.delete_flag != 'Y' + ).all() + return [as_dict(record) for record in records] + except SQLAlchemyError: + logger.exception("Get knowledge bases error") + raise diff --git a/backend/nexent_context_metrics.log b/backend/nexent_context_metrics.log deleted file mode 100644 index ebd63bcae..000000000 --- a/backend/nexent_context_metrics.log +++ /dev/null @@ -1,39 +0,0 @@ -Step 1: main_i=2291 main_o=54 | comp_i=0 comp_o=0 | mem_est_input=2897 |mem_est_output=88 -Total: main_i=2291 main_o=54 | comp_i=0 comp_o=0 | all_i=2291 all_o=54 | mem_est_input=2897 |mem_est_output=88 ------ -Step 1: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | mem_est_input=1807 |mem_est_output=69 -Total: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | all_i=1422 all_o=48 | mem_est_input=1807 |mem_est_output=69 ------ -Step 1: main_i=1486 main_o=444 | comp_i=0 comp_o=0 | mem_est_input=1891 |mem_est_output=555 -Total: main_i=1486 main_o=444 | comp_i=0 comp_o=0 | all_i=1486 all_o=444 | mem_est_input=1891 |mem_est_output=555 ------ -Step 1: main_i=1423 main_o=15 | comp_i=0 comp_o=0 | mem_est_input=1811 |mem_est_output=10 -Total: main_i=1423 main_o=15 | comp_i=0 comp_o=0 | all_i=1423 all_o=15 | mem_est_input=1811 |mem_est_output=10 ------ -Step 1: main_i=1450 main_o=298 | comp_i=0 comp_o=0 | mem_est_input=1835 |mem_est_output=330 -Total: main_i=1450 main_o=298 | comp_i=0 comp_o=0 | all_i=1450 all_o=298 | mem_est_input=1835 |mem_est_output=330 ------ -Step 1: main_i=1422 main_o=46 | comp_i=0 comp_o=0 | mem_est_input=1807 |mem_est_output=63 -Total: main_i=1422 main_o=46 | comp_i=0 comp_o=0 | all_i=1422 all_o=46 | mem_est_input=1807 |mem_est_output=63 ------ -Step 1: main_i=1425 main_o=47 | comp_i=0 comp_o=0 | mem_est_input=1810 |mem_est_output=62 -Total: main_i=1425 main_o=47 | comp_i=0 comp_o=0 | all_i=1425 all_o=47 | mem_est_input=1810 |mem_est_output=62 ------ -Step 1: main_i=1480 main_o=30 | comp_i=0 comp_o=0 | mem_est_input=1876 |mem_est_output=37 -Total: main_i=1480 main_o=30 | comp_i=0 comp_o=0 | all_i=1480 all_o=30 | mem_est_input=1876 |mem_est_output=37 ------ -Step 1: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | mem_est_input=1807 |mem_est_output=67 -Total: main_i=1422 main_o=48 | comp_i=0 comp_o=0 | all_i=1422 all_o=48 | mem_est_input=1807 |mem_est_output=67 ------ -Step 1: main_i=1518 main_o=104 | comp_i=0 comp_o=0 | mem_est_input=1918 |mem_est_output=140 -Total: main_i=1518 main_o=104 | comp_i=0 comp_o=0 | all_i=1518 all_o=104 | mem_est_input=1918 |mem_est_output=140 ------ -Step 1: main_i=1758 main_o=36 | comp_i=0 comp_o=0 | mem_est_input=2171 |mem_est_output=51 -Total: main_i=1758 main_o=36 | comp_i=0 comp_o=0 | all_i=1758 all_o=36 | mem_est_input=2171 |mem_est_output=51 ------ -Step 1: main_i=1479 main_o=61 | comp_i=0 comp_o=0 | mem_est_input=1879 |mem_est_output=80 -Total: main_i=1479 main_o=61 | comp_i=0 comp_o=0 | all_i=1479 all_o=61 | mem_est_input=1879 |mem_est_output=80 ------ -Step 1: main_i=1551 main_o=467 | comp_i=0 comp_o=0 | mem_est_input=1970 |mem_est_output=607 -Total: main_i=1551 main_o=467 | comp_i=0 comp_o=0 | all_i=1551 all_o=467 | mem_est_input=1970 |mem_est_output=607 ------ diff --git a/backend/prompts/managed_system_prompt_template_en.yaml b/backend/prompts/managed_system_prompt_template_en.yaml index 1d555a907..67da8305c 100644 --- a/backend/prompts/managed_system_prompt_template_en.yaml +++ b/backend/prompts/managed_system_prompt_template_en.yaml @@ -166,5 +166,14 @@ planning: final_answer: pre_messages: |- + You have reached the maximum step limit. Please provide a comprehensive summary of: + 1. What has been accomplished so far + 2. Key findings or results + 3. Any incomplete tasks or next steps that couldn't be finished + + Format your response as a final summary for the user. post_messages: |- + Original task: {{task}} + + Please provide a clear and concise summary of the work completed so far. diff --git a/backend/prompts/managed_system_prompt_template_zh.yaml b/backend/prompts/managed_system_prompt_template_zh.yaml index 971737862..231eee325 100644 --- a/backend/prompts/managed_system_prompt_template_zh.yaml +++ b/backend/prompts/managed_system_prompt_template_zh.yaml @@ -83,7 +83,7 @@ system_prompt: |- value = config["key1"]["key2"] print(value) - 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的��程。 + 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的流程。 4. **执行技能脚本**:如果技能指南中引用了附加脚本(形如 ``),使用以下格式调用: 代码: @@ -211,11 +211,11 @@ system_prompt: |- ### python代码规范 1. 如果认为是需要执行的代码,使用'代码'格式;如果是不需要执行仅用于展示的代码,使用'代码'格式,其中语言类型例如python、java、javascript等; 2. 只使用已定义的变量,变量将在多次调用之间持续保持; - 3. 使用“print()”函数让下一次的模型调用看到对应变量信息; + 3. 使用"print()"函数让下一次的模型调用看到对应变量信息; 4. 正确使用工具的入参,使用关键字参数,不要用字典形式; 5. 避免在一轮对话中进行过多的工具调用,这会导致输出格式难以预测; 6. 只在需要时调用工具,不重复相同参数的调用; - 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用“print()”来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误; + 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用"print()"来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误; 9. 示例中的代码避免出现**if**、**for**等逻辑,仅调用工具,示例中的每一次的行动都是确定事件。如果有不同的条件,你应该给出不同条件下的示例; 10. 工具调用使用关键字参数,如:tool_name(param1="value1", param2="value2"); 11. 不要放弃!你负责解决任务,而不是提供解决方向。 @@ -259,5 +259,14 @@ planning: final_answer: pre_messages: |- + 你已达到最大步数限制。请提供一份全面的工作总结,内容包括: + 1. 到目前为止已完成的工作 + 2. 主要发现或结果 + 3. 未能完成的任务或后续步骤 + + 请以最终总结的格式呈现给用户。 post_messages: |- + 原始任务:{{task}} + + 请对迄今为止完成的工作进行清晰、简洁的总结。 diff --git a/backend/prompts/manager_system_prompt_template_en.yaml b/backend/prompts/manager_system_prompt_template_en.yaml index 50cfbc411..a4ffae074 100644 --- a/backend/prompts/manager_system_prompt_template_en.yaml +++ b/backend/prompts/manager_system_prompt_template_en.yaml @@ -210,5 +210,14 @@ planning: final_answer: pre_messages: |- + You have reached the maximum step limit. Please provide a comprehensive summary of: + 1. What has been accomplished so far + 2. Key findings or results + 3. Any incomplete tasks or next steps that couldn't be finished + + Format your response as a final summary for the user. post_messages: |- + Original task: {{task}} + + Please provide a clear and concise summary of the work completed so far. diff --git a/backend/prompts/manager_system_prompt_template_zh.yaml b/backend/prompts/manager_system_prompt_template_zh.yaml index 3c7144cad..6743316e3 100644 --- a/backend/prompts/manager_system_prompt_template_zh.yaml +++ b/backend/prompts/manager_system_prompt_template_zh.yaml @@ -287,5 +287,14 @@ planning: final_answer: pre_messages: |- + 你已达到最大步数限制。请提供一份全面的工作总结,内容包括: + 1. 到目前为止已完成的工作 + 2. 主要发现或结果 + 3. 未能完成的任务或后续步骤 + + 请以最终总结的格式呈现给用户。 post_messages: |- + 原始任务:{{task}} + + 请对迄今为止完成的工作进行清晰、简洁的总结。 diff --git a/backend/services/a2a_agent_adapter.py b/backend/services/a2a_agent_adapter.py index c052b5d37..b6fddc500 100644 --- a/backend/services/a2a_agent_adapter.py +++ b/backend/services/a2a_agent_adapter.py @@ -261,7 +261,7 @@ def build_a2a_message_response( A2A Message response dict wrapped in {"message": {...}}. """ if not message_id: - message_id = f"msg_{uuid4().hex[:16]}" + message_id = f"msg_{uuid4().hex}" if parts: message_parts = parts diff --git a/backend/services/a2a_client_service.py b/backend/services/a2a_client_service.py index 14f721ffd..e4e81fec5 100644 --- a/backend/services/a2a_client_service.py +++ b/backend/services/a2a_client_service.py @@ -88,15 +88,24 @@ async def discover_from_url( # Extract endpoint URL - prioritize supportedInterfaces (A2A v1.0 standard) agent_url = self._extract_agent_url(card) - # Extract protocol info and supported interfaces - capabilities = card.get("capabilities", {}) - protocol_version = capabilities.get("protocolVersion", "1.0") - streaming = capabilities.get("streaming", False) - transport_type = "http-streaming" if streaming else "http-polling" - # Extract supported interfaces (A2A v1.0 standard format) supported_interfaces = card.get("supportedInterfaces", []) + # Extract protocol info from supported_interfaces (A2A 1.0 spec) + # protocol_version and streaming are properties of each interface, not top-level + first_interface = supported_interfaces[0] if supported_interfaces else {} + interface_capabilities = first_interface.get("capabilities", {}) + protocol_version = first_interface.get("protocolVersion", "1.0") + streaming = interface_capabilities.get("streaming", False) + + # Fallback to top-level capabilities if no supported_interfaces + if not supported_interfaces: + card_capabilities = card.get("capabilities", {}) + if protocol_version == "1.0" and card_capabilities.get("protocolVersion"): + protocol_version = card_capabilities.get("protocolVersion") + if not streaming and card_capabilities.get("streaming"): + streaming = card_capabilities.get("streaming") + # Store in database result = a2a_agent_db.create_external_agent_from_url( source_url=url, @@ -104,7 +113,7 @@ async def discover_from_url( description=description, agent_url=agent_url, version=protocol_version, - streaming=(transport_type == "http-streaming"), + streaming=streaming, tenant_id=tenant_id, user_id=user_id, raw_card=card, @@ -222,50 +231,95 @@ async def _discover_single_from_nacos( client = NacosClient(nacos_addr, username, password) try: - # Query service instance from Nacos - instance = await client.query_service_instance(agent_name, namespace) - if not instance: - logger.warning(f"No instance found for agent '{agent_name}' in Nacos") + # Query A2A agent from Nacos using dedicated A2A endpoint + agent_info = await client.query_a2a_agent(agent_name, namespace) + if not agent_info: + logger.warning(f"No A2A agent found for '{agent_name}' in Nacos") return None - # Fetch Agent Card from instance - agent_card_url = instance.get("metadata", {}).get("a2a_card_url") - if not agent_card_url: - # Construct URL from instance host/port - host = instance.get("ip") - port = instance.get("port") - if host and port: - agent_card_url = f"http://{host}:{port}/.well-known/agent-{agent_name}.json" - - if not agent_card_url: - logger.warning(f"No Agent Card URL found for agent '{agent_name}'") + # Extract agent URL from A2A response + agent_url = agent_info.get("agent_url") or agent_info.get("url") + if not agent_url: + logger.warning(f"No agent URL found for A2A agent '{agent_name}'") return None - # Fetch Agent Card - try: - async with A2AHttpClient() as http_client: - card = await http_client.get_json(agent_card_url) - except aiohttp.ClientError: - # Network errors retrieving agent card should result in None - logger.warning(f"Failed to retrieve agent card from {agent_card_url}") - return None + # Get metadata and extract description from Nacos response + metadata = agent_info.get("metadata") or {} + description = agent_info.get("description") or metadata.get("description", "") + nacos_interfaces = metadata.get("supported_interfaces", []) + supported_interfaces = nacos_interfaces.copy() if nacos_interfaces else [] + protocol_version = "1.0" + streaming = False + agent_card_fetched = False + + # Fetch Agent Card from agent_url to get supported_interfaces (A2A v1.0 spec) + # Try common Agent Card endpoints (order matters - try more specific paths first) + card_urls = [ + f"{agent_url.rstrip('/')}/.well-known/agent-card.json", + f"{agent_url.rstrip('/')}/.well-known/agent.json", + f"{agent_url.rstrip('/')}/.well-known/agent-1.0.json", + f"{agent_url.rstrip('/')}/agent-card.json", + f"{agent_url.rstrip('/')}/agent.json", + ] + + for card_url in card_urls: + try: + async with A2AHttpClient() as http_client: + card = await http_client.get_json(card_url, headers=build_a2a_headers()) + + if card and (card.get("name") or card.get("agent_id")): + logger.info(f"Fetched Agent Card from {card_url}") + + # Extract supported_interfaces from Agent Card + card_interfaces = card.get("supportedInterfaces", []) + + # Always update from Agent Card if present + if card_interfaces: + supported_interfaces = card_interfaces + agent_card_fetched = True + + # Extract description from Agent Card if not found in Nacos + if not description: + description = card.get("description", "") + + # Extract protocol info from supported_interfaces + first_interface = supported_interfaces[0] if supported_interfaces else {} + capabilities = first_interface.get("capabilities", {}) + protocol_version = first_interface.get("protocolVersion", "1.0") + streaming = capabilities.get("streaming", False) + + # Merge raw_card: Agent Card takes precedence over Nacos info + agent_info = card + break + + except Exception as e: + logger.warning(f"Failed to fetch Agent Card from {card_url}: {e}") + continue + + if not agent_card_fetched: + logger.warning( + f"[Nacos Discovery] Failed to fetch Agent Card for '{agent_name}', " + f"using Nacos interfaces: {supported_interfaces}" + ) - # Extract endpoint URL and supported interfaces - agent_url = self._extract_agent_url(card) - supported_interfaces = card.get("supportedInterfaces", []) + logger.info( + f"[Nacos Discovery] Storing agent: name={agent_name}, " + f"agent_url={agent_url}, supported_interfaces_count={len(supported_interfaces) if supported_interfaces else 0}, " + f"protocol_version={protocol_version}, streaming={streaming}" + ) # Store in database result = a2a_agent_db.create_external_agent_from_nacos( - name=card.get("name", agent_name), - description=card.get("description", ""), + name=agent_name, + description=description, agent_url=agent_url, - protocol_version=card.get("capabilities", {}).get("protocolVersion", "1.0"), - transport_type="http-streaming" if card.get("capabilities", {}).get("streaming") else "http-polling", + version=protocol_version, + streaming=streaming, nacos_config_id=nacos_config["config_id"], nacos_agent_name=agent_name, tenant_id=tenant_id, user_id=user_id, - raw_card=card, + raw_card=agent_info, supported_interfaces=supported_interfaces ) @@ -312,13 +366,10 @@ def _extract_agent_url(self, card: Dict[str, Any]) -> str: return "" def _find_url_in_interfaces(self, interfaces: List[Any]) -> str: - """Find URL from supportedInterfaces array, preferring http-json-rpc.""" - json_rpc_protocols = ("http-json-rpc", "jsonrpc", "httpjsonrpc") - for iface in interfaces: - if iface.get("protocolBinding", "").lower() in json_rpc_protocols: - url = iface.get("url", "") - if url: - return url + """Find URL from supportedInterfaces array - return the first interface's URL. + + This ensures protocol and URL are always from the same interface. + """ for iface in interfaces: url = iface.get("url", "") if url: @@ -426,46 +477,128 @@ async def refresh_agent_card( if not agent: raise AgentDiscoveryError(f"Agent {external_agent_id} not found") + source_type = agent.get("source_type") + source_url = agent.get("source_url") + agent_url = agent.get("agent_url") + base_url = agent.get("base_url") + try: - # Fetch fresh Agent Card - source_url = agent.get("source_url") - if not source_url: - raise AgentDiscoveryError("No source URL available for refresh") + if source_type == "nacos": + # Nacos discovered agents: use /health endpoint to check availability + if not base_url: + raise AgentDiscoveryError("No base_url available for health check") - async with A2AHttpClient() as client: - card = await client.get_json(source_url) + health_url = f"{base_url.rstrip('/')}/health" + logger.info(f"Checking health for Nacos agent: {health_url}") - # Extract updated info - use _extract_agent_url for A2A v1.0 standard - new_url = self._extract_agent_url(card) - new_name = card.get("name") - new_description = card.get("description") - new_supported_interfaces = card.get("supportedInterfaces", []) + async with A2AHttpClient() as client: + health_response = await client.get_json(health_url) - # Note: Do NOT update protocol_type and agent_url during refresh - # These are user-configured values and should not be overwritten - # The refresh should only update metadata (name, description, supported_interfaces, raw_card) + # Update availability based on health check + a2a_agent_db.update_agent_availability( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + is_available=True, + check_result="OK" + ) - # Update cache - result = a2a_agent_db.refresh_external_agent_cache( - external_agent_id=external_agent_id, - tenant_id=tenant_id, - user_id=user_id, - new_raw_card=card, - new_name=new_name, - new_description=new_description, - new_supported_interfaces=new_supported_interfaces - ) + # Update cache timestamp + a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id + ) - # Update availability - a2a_agent_db.update_agent_availability( - external_agent_id=external_agent_id, - tenant_id=tenant_id, - is_available=True, - check_result="OK" - ) + logger.info(f"Health check passed for agent {external_agent_id}") + return { + "agent_id": external_agent_id, + "source_type": source_type, + "health_url": health_url, + "health_response": health_response, + "status": "available" + } - logger.info(f"Refreshed agent {external_agent_id}") - return result + else: + # URL discovered agents: fetch fresh Agent Card from source_url + if not source_url: + raise AgentDiscoveryError("No source URL available for refresh") + + async with A2AHttpClient() as client: + card = await client.get_json(source_url) + + # Extract updated info - use _extract_agent_url for A2A v1.0 standard + new_url = self._extract_agent_url(card) + new_name = card.get("name") + new_description = card.get("description") + new_supported_interfaces = card.get("supportedInterfaces", []) + + # Extract new protocol type from the card + new_protocol_type = _extract_protocol_type(new_supported_interfaces) + current_protocol_type = agent.get("protocol_type") + + # Determine if we need to update agent_url and protocol_type + # Update agent_url if it changed in the remote card + update_agent_url = new_url is not None and new_url != agent_url + + # Update protocol_type if it changed in the remote card + update_protocol_type = new_protocol_type != current_protocol_type + + # When protocol_type changes, we need to find the corresponding interface URL + if update_protocol_type: + logger.info( + f"Protocol type changed for agent {external_agent_id}: " + f"{current_protocol_type} -> {new_protocol_type}" + ) + # The database function will handle finding the correct interface URL + result = a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id, + new_raw_card=card, + new_agent_url=new_url if update_agent_url else None, + new_name=new_name, + new_description=new_description, + new_supported_interfaces=new_supported_interfaces, + new_protocol_type=new_protocol_type + ) + elif update_agent_url: + # Only agent_url changed + logger.info( + f"Agent URL changed for agent {external_agent_id}: " + f"{agent_url} -> {new_url}" + ) + result = a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id, + new_raw_card=card, + new_agent_url=new_url, + new_name=new_name, + new_description=new_description, + new_supported_interfaces=new_supported_interfaces + ) + else: + # No changes to agent_url or protocol_type, just update metadata + result = a2a_agent_db.refresh_external_agent_cache( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + user_id=user_id, + new_raw_card=card, + new_name=new_name, + new_description=new_description, + new_supported_interfaces=new_supported_interfaces + ) + + # Update availability + a2a_agent_db.update_agent_availability( + external_agent_id=external_agent_id, + tenant_id=tenant_id, + is_available=True, + check_result="OK" + ) + + logger.info(f"Refreshed agent {external_agent_id}") + return result except aiohttp.ClientError as e: logger.error(f"Failed to refresh agent {external_agent_id}: {e}") diff --git a/backend/services/agent_service.py b/backend/services/agent_service.py index 73c6a4640..02fa7d8c6 100644 --- a/backend/services/agent_service.py +++ b/backend/services/agent_service.py @@ -46,6 +46,7 @@ update_related_agents, clear_agent_new_mark ) +from database import a2a_agent_db from database.model_management_db import get_model_by_model_id, get_model_id_by_display_name from database.remote_mcp_db import get_mcp_server_by_name_and_tenant from database.tool_db import ( @@ -967,6 +968,49 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = logger.error(f"Failed to update related agents: {str(e)}") raise ValueError(f"Failed to update related agents: {str(e)}") + # Handle related external agents saving when provided + try: + if request.related_external_agent_ids is not None and agent_id is not None: + related_external_agent_ids = request.related_external_agent_ids + # Query current relations + current_relations = a2a_agent_db.list_external_relations_by_local_agent( + local_agent_id=agent_id, + tenant_id=tenant_id + ) + current_external_ids = { + rel["external_agent_id"] for rel in current_relations + } + new_external_ids = set(related_external_agent_ids) if related_external_agent_ids else set() + + # Find IDs to delete (in current but not in new) + ids_to_delete = current_external_ids - new_external_ids + # Find IDs to add (in new but not in current) + ids_to_add = new_external_ids - current_external_ids + + # Soft delete removed relations + for ext_agent_id in ids_to_delete: + a2a_agent_db.remove_external_agent_relation( + local_agent_id=agent_id, + external_agent_id=ext_agent_id, + tenant_id=tenant_id + ) + + # Add new relations + for ext_agent_id in ids_to_add: + try: + a2a_agent_db.add_external_agent_relation( + local_agent_id=agent_id, + external_agent_id=ext_agent_id, + tenant_id=tenant_id, + user_id=user_id + ) + except ValueError: + # Relation already exists, skip + pass + except Exception as e: + logger.error(f"Failed to update related external agents: {str(e)}") + raise ValueError(f"Failed to update related external agents: {str(e)}") + return {"agent_id": agent_id} diff --git a/backend/services/agent_version_service.py b/backend/services/agent_version_service.py index 067fd0e1c..69163dbc6 100644 --- a/backend/services/agent_version_service.py +++ b/backend/services/agent_version_service.py @@ -817,7 +817,8 @@ async def list_published_agents_impl( # Apply visibility filter for DEV/USER based on group overlap if not can_edit_all: agent_group_ids = set(convert_string_to_list(agent.get("group_ids"))) - if len(user_group_ids.intersection(agent_group_ids)) == 0: + is_creator = str(agent.get("created_by)) == str(user_id)")) + if not is_creator and len(user_group_ids.intersection(agent_group_ids)) == 0: continue agent_id = agent.get("agent_id") diff --git a/backend/services/auto_summary_scheduler.py b/backend/services/auto_summary_scheduler.py new file mode 100644 index 000000000..5bc44e442 --- /dev/null +++ b/backend/services/auto_summary_scheduler.py @@ -0,0 +1,211 @@ +""" +Background scheduler that periodically checks knowledge bases with +auto-summary enabled and regenerates summaries as needed. +""" +import logging +import threading +import time +from datetime import datetime, timedelta +from typing import Optional + +from consts.scheduler import ( + FREQUENCY_MAP, + SCHEDULER_CHECK_INTERVAL_SECONDS, +) +from database.knowledge_db import get_knowledge_bases_for_auto_summary +from services.vectordatabase_service import ElasticSearchService, get_vector_db_core +from utils.config_utils import tenant_config_manager + +logger = logging.getLogger(__name__) + +# Check interval from centralized config +CHECK_INTERVAL_SECONDS = SCHEDULER_CHECK_INTERVAL_SECONDS + +# Track knowledge bases currently being processed to avoid duplicates +_in_flight: set = set() + + +def _parse_last_summary_time(last_summary_time) -> Optional[datetime]: + """Parse last_summary_time from various formats.""" + if last_summary_time is None: + return None + if isinstance(last_summary_time, datetime): + return last_summary_time.replace(tzinfo=None) + if isinstance(last_summary_time, str): + try: + return datetime.fromisoformat(last_summary_time) + except (ValueError, TypeError): + return None + return None + + +def _is_due_for_summary(last_summary_time, frequency: str, last_doc_update_time) -> bool: + """Check if a knowledge base is due for summary regeneration. + + Args: + last_summary_time: Timestamp of last summary generation + frequency: Summary frequency (e.g., '3h', '1d') + last_doc_update_time: Timestamp of last document add/delete operation + + Returns: + True if summary should be regenerated, False otherwise + """ + interval = FREQUENCY_MAP.get(frequency) + if interval is None: + return False + + last = _parse_last_summary_time(last_summary_time) + if last is None: + return True # Never summarized, do it now + + # Check if time interval has elapsed + if (datetime.now() - last) < interval: + return False + + # Check if there are new document changes since last summary + doc_update = _parse_last_summary_time(last_doc_update_time) + if doc_update is None: + return True # No doc update time recorded, assume need summary + + # Skip if no new documents since last summary + if doc_update <= last: + logger.info(f"Skipping summary: no document changes since last summary") + return False + + return True + + +def _run_auto_summary_for_kb(index_name: str, tenant_id: str): + """Run the summary generation for a single knowledge base.""" + if index_name in _in_flight: + logger.info(f"Skipping {index_name}: already being processed") + return + + _in_flight.add(index_name) + try: + logger.info(f"Starting auto-summary for knowledge base: {index_name}") + vdb_core = get_vector_db_core() + service = ElasticSearchService() + + from utils.document_vector_utils import ( + process_documents_for_clustering, + kmeans_cluster_documents, + summarize_clusters_map_reduce, + merge_cluster_summaries, + ) + + # Get model_id from tenant config for LLM summarization + model_id = None + if tenant_id: + try: + tenant_config = tenant_config_manager.load_config(tenant_id) + model_id_str = tenant_config.get("LLM_ID") + if model_id_str: + model_id = int(model_id_str) + logger.info(f"Using LLM model ID {model_id} for auto-summary (tenant: {tenant_id})") + else: + logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary will be placeholder only") + except Exception as e: + logger.warning(f"Failed to get LLM_ID from tenant config: {e}") + + sample_count = 40 # Smaller sample for auto-summary + document_samples, doc_embeddings = process_documents_for_clustering( + index_name=index_name, + vdb_core=vdb_core, + sample_doc_count=sample_count, + ) + + if not document_samples: + logger.warning(f"No documents found for auto-summary: {index_name}") + return + + clusters = kmeans_cluster_documents(doc_embeddings, k=None) + cluster_summaries = summarize_clusters_map_reduce( + document_samples=document_samples, + clusters=clusters, + language="zh", + doc_max_words=100, + cluster_max_words=150, + model_id=model_id, + tenant_id=tenant_id, + ) + final_summary = merge_cluster_summaries(cluster_summaries) + + # Save the summary and update last_summary_time + service.change_summary( + index_name=index_name, + summary_result=final_summary, + user_id="auto_scheduler", + ) + # change_summary already calls update_last_summary_time + logger.info(f"Auto-summary completed for knowledge base: {index_name}") + + except Exception as e: + logger.error(f"Auto-summary failed for {index_name}: {e}", exc_info=True) + finally: + _in_flight.discard(index_name) + + +def _scheduler_loop(stop_event: threading.Event): + """Main scheduler loop that runs in a background thread.""" + logger.info("Auto-summary scheduler started") + while not stop_event.is_set(): + try: + kbs = get_knowledge_bases_for_auto_summary() + logger.info(f"Checking {len(kbs)} knowledge bases for auto-summary") + + for kb in kbs: + if stop_event.is_set(): + break + frequency = kb.get("summary_frequency") + if _is_due_for_summary( + kb.get("last_summary_time"), + frequency, + kb.get("last_doc_update_time") + ): + _run_auto_summary_for_kb( + index_name=kb["index_name"], + tenant_id=kb.get("tenant_id", ""), + ) + + except Exception as e: + logger.error(f"Auto-summary scheduler check failed: {e}", exc_info=True) + + # Wait for next check interval, but respond to stop_event + stop_event.wait(timeout=CHECK_INTERVAL_SECONDS) + + logger.info("Auto-summary scheduler stopped") + + +class AutoSummaryScheduler: + """Manages the auto-summary background thread.""" + + def __init__(self): + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + + def start(self): + """Start the scheduler thread.""" + if self._thread and self._thread.is_alive(): + logger.warning("Auto-summary scheduler is already running") + return + self._stop_event.clear() + self._thread = threading.Thread( + target=_scheduler_loop, + args=(self._stop_event,), + daemon=True, + name="auto-summary-scheduler", + ) + self._thread.start() + logger.info("Auto-summary scheduler thread started") + + def stop(self): + """Signal the scheduler thread to stop.""" + self._stop_event.set() + if self._thread: + self._thread.join(timeout=60) + logger.info("Auto-summary scheduler thread stopped") + + +# Singleton instance +auto_summary_scheduler = AutoSummaryScheduler() diff --git a/backend/services/config_sync_service.py b/backend/services/config_sync_service.py index 9fe50813a..0ed29bfc5 100644 --- a/backend/services/config_sync_service.py +++ b/backend/services/config_sync_service.py @@ -112,6 +112,21 @@ async def save_config_impl(config, tenant_id, user_id): embedding_api_config = model_config.get("apiConfig", {}) env_config[f"{model_prefix}_API_KEY"] = safe_value( embedding_api_config.get("apiKey")) + + # Save STT specific fields for speech recognition models + if model_type == "stt": + if model_config.get("modelFactory"): + stt_factory_key = "STT_MODEL_FACTORY" + tenant_config_manager.set_single_config( + user_id, tenant_id, stt_factory_key, model_config.get("modelFactory")) + if model_config.get("modelAppid"): + stt_appid_key = "STT_MODEL_APPID" + tenant_config_manager.set_single_config( + user_id, tenant_id, stt_appid_key, model_config.get("modelAppid")) + if model_config.get("accessToken"): + stt_token_key = "STT_ACCESS_TOKEN" + tenant_config_manager.set_single_config( + user_id, tenant_id, stt_token_key, model_config.get("accessToken")) logger.info("Configuration saved successfully") @@ -187,4 +202,11 @@ def build_model_config(model_config: dict) -> dict: if "embedding" in model_config.get("model_type", ""): config["dimension"] = model_config.get("max_tokens", 0) + # Add STT model specific fields + model_type = model_config.get("model_type", "") + if model_type == "stt": + config["modelFactory"] = model_config.get("model_factory", "") + config["modelAppid"] = model_config.get("model_appid", "") + config["accessToken"] = model_config.get("access_token", "") + return config diff --git a/backend/services/data_process_service.py b/backend/services/data_process_service.py index 2b222a584..a024089a3 100644 --- a/backend/services/data_process_service.py +++ b/backend/services/data_process_service.py @@ -148,8 +148,28 @@ async def get_all_tasks(self, filter: bool = True) -> List[Dict[str, Any]]: logger.debug( f"⏰ Inspector initialization took {time.time() - start_time}s") - # Collect task IDs from different sources + # Collect task IDs from different sources and keep runtime metadata task_ids = set() + runtime_task_meta: Dict[str, Dict[str, Any]] = {} + + def _normalize_runtime_meta(task: Dict[str, Any]) -> Dict[str, Any]: + task_name_full = task.get('name', '') or '' + task_name = task_name_full.split('.')[-1] if task_name_full else '' + kwargs = task.get('kwargs') or {} + if isinstance(kwargs, str): + try: + import json as _json + kwargs = _json.loads(kwargs) + except Exception: + kwargs = {} + if not isinstance(kwargs, dict): + kwargs = {} + return { + 'task_name': task_name, + 'index_name': kwargs.get('index_name', ''), + 'path_or_url': kwargs.get('source', ''), + 'original_filename': kwargs.get('original_filename', ''), + } def get_active(): return inspector.active() @@ -169,12 +189,15 @@ def get_reserved(): task_id = task.get('id') if task_id: task_ids.add(task_id) + runtime_task_meta[task_id] = _normalize_runtime_meta(task) if reserved_tasks_dict: for worker, tasks in reserved_tasks_dict.items(): for task in tasks: task_id = task.get('id') if task_id: task_ids.add(task_id) + # Keep active metadata if already present + runtime_task_meta.setdefault(task_id, _normalize_runtime_meta(task)) # Currently, we don't have scheduled tasks, so skip getting scheduled tasks here start_time = time.time() @@ -192,15 +215,33 @@ def get_reserved(): f"Failed to query Redis for stored task IDs: {str(redis_error)}") logger.debug( f"Total unique task IDs collected (inspector + Redis): {len(task_ids)}") - tasks = [get_task_info(task_id) for task_id in task_ids] + task_id_list = list(task_ids) + tasks = [get_task_info(task_id) for task_id in task_id_list] all_task_infos = await asyncio.gather(*tasks, return_exceptions=True) - for task_info in all_task_infos: + for idx, task_info in enumerate(all_task_infos): if isinstance(task_info, Exception): logger.warning( f"Failed to get status for a task: {task_info}") continue + task_id = task_id_list[idx] + runtime_meta = runtime_task_meta.get(task_id, {}) + # Backfill runtime info for pending/reserved tasks that do not have result metadata yet + if runtime_meta: + if not task_info.get('task_name') and runtime_meta.get('task_name'): + task_info['task_name'] = runtime_meta.get('task_name') + if not task_info.get('index_name') and runtime_meta.get('index_name'): + task_info['index_name'] = runtime_meta.get('index_name') + if not task_info.get('path_or_url') and runtime_meta.get('path_or_url'): + task_info['path_or_url'] = runtime_meta.get('path_or_url') + if not task_info.get('original_filename') and runtime_meta.get('original_filename'): + task_info['original_filename'] = runtime_meta.get('original_filename') + if filter and not (task_info.get('index_name') and task_info.get('task_name')): - continue + # Keep user-visible queued tasks even before worker updates task meta. + if task_info.get('task_name') not in {'process', 'forward', 'process_and_forward'}: + continue + if not task_info.get('index_name'): + continue all_tasks.append(task_info) logger.debug(f"Retrieved {len(all_tasks)} tasks.") except Exception as e: diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py index 626e19007..a20b2a6ca 100644 --- a/backend/services/model_health_service.py +++ b/backend/services/model_health_service.py @@ -67,6 +67,9 @@ async def _perform_connectivity_check( model_base_url: str, model_api_key: str, ssl_verify: bool = True, + model_factory: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, display_name: Optional[str] = None, ) -> bool: """ @@ -133,9 +136,34 @@ async def _perform_connectivity_check( api_key=model_api_key, ssl_verify=ssl_verify ).check_connectivity() - elif model_type in ["tts", "stt"]: + elif model_type == 'stt': voice_service = get_voice_service() - connectivity = await voice_service.check_voice_connectivity(model_type) + + + # Determine STT provider based on model_factory + use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"] + + if use_volc: + # Use Volcano STT with appid and access_token + connectivity = await voice_service.check_voice_connectivity( + model_type="stt", + stt_config={ + "model_factory": model_factory, + "model_appid": model_appid, + "access_token": access_token, + "base_url": model_base_url + } + ) + else: + # Use Ali STT (default) with api_key and model name + connectivity = await voice_service.check_voice_connectivity( + model_type="stt", + stt_config={ + "api_key": model_api_key, + "base_url": model_base_url, + "model": model_name + } + ) else: raise ValueError(f"Unsupported model type: {model_type}") @@ -150,13 +178,10 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: raise LookupError( f"Model configuration not found for {display_name}") - # Still use repo/name concatenation for model instantiation repo, name = model.get("model_repo", ""), model.get("model_name", "") model_name = f"{repo}/{name}" if repo else name - # Set model to "detecting" status - update_data = { - "connect_status": ModelConnectStatusEnum.DETECTING.value} + update_data = {"connect_status": ModelConnectStatusEnum.DETECTING.value} update_model_record(model["model_id"], update_data) model_type = model["model_type"] @@ -164,13 +189,16 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: model_api_key = model["api_key"] # Default to True if not present ssl_verify = model.get("ssl_verify", True) + model_factory = model.get("model_factory") + model_appid = model.get("model_appid") + access_token = model.get("access_token") try: set_monitoring_context(tenant_id=tenant_id) connectivity = await _perform_connectivity_check( model_name, model_type, model_base_url, model_api_key, ssl_verify, - display_name=display_name, + model_factory, model_appid, access_token,display_name=display_name, ) except Exception as e: update_data = { @@ -198,36 +226,38 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict: update_data = { "connect_status": ModelConnectStatusEnum.UNAVAILABLE.value} update_model_record(model["model_id"], update_data) - # Propagate for app layer to translate into HTTP raise e + + async def verify_model_config_connectivity(model_config: dict): """ - Verify the connectivity of the model configuration, do not save to the database - Args: - model_config: Model configuration dictionary, containing necessary connection parameters - Returns: - dict: Contains the result of the connectivity test and error message if failed + Verify the connectivity of the model configuration, do not save to the database. """ try: model_name = model_config.get("model_name", "") model_type = model_config["model_type"] - model_base_url = model_config["base_url"] + model_base_url = model_config.get("base_url", "") model_api_key = model_config["api_key"] # Default to True if not present ssl_verify = model_config.get("ssl_verify", True) + model_factory = model_config.get("model_factory") + model_appid = model_config.get("model_appid") + access_token = model_config.get("access_token") try: - # Use the common connectivity check function connectivity = await _perform_connectivity_check( - model_name, model_type, model_base_url, model_api_key, ssl_verify + model_name, model_type, model_base_url, model_api_key, ssl_verify, + model_factory, model_appid, access_token ) if not connectivity and ssl_verify: connectivity = await _perform_connectivity_check( - model_name, model_type, model_base_url, model_api_key, False + model_name, model_type, model_base_url, model_api_key, False, + model_factory, model_appid, access_token ) if not connectivity: + error_msg = f"Failed to connect to model '{model_name}' at {model_base_url}. Please verify the URL, API key, and network connection." return { "connectivity": False, "model_name": model_name, diff --git a/backend/services/providers/dashscope_provider.py b/backend/services/providers/dashscope_provider.py index b9fb7ab7b..69096fb15 100644 --- a/backend/services/providers/dashscope_provider.py +++ b/backend/services/providers/dashscope_provider.py @@ -68,7 +68,7 @@ async def get_models(self, provider_config: Dict) -> List[Dict]: # Extract key fields for logical determination (lowercased for robustness) m_id = model_obj.get('model', '').lower() desc = model_obj.get('description', '') - metadata = model_obj.get('inference_metadata', {}) + metadata = model_obj.get('inference_metadata') or {} req_mod = metadata.get('request_modality', []) res_mod = metadata.get('response_modality', []) model_obj.setdefault("object", model_obj.get("object", "model")) diff --git a/backend/services/redis_service.py b/backend/services/redis_service.py index efd2c0a7b..dae617f60 100644 --- a/backend/services/redis_service.py +++ b/backend/services/redis_service.py @@ -1,6 +1,7 @@ import json import logging -from typing import Dict, Any, Optional +import re +from typing import Dict, Any, Optional, Tuple, Set import redis @@ -215,7 +216,7 @@ def delete_document_records(self, index_name: str, path_or_url: str) -> Dict[str return result - def _recursively_delete_task_and_parents(self, task_id: str) -> tuple[int, set]: + def _recursively_delete_task_and_parents(self, task_id: str) -> Tuple[int, Set[str]]: """ Iteratively delete a Celery task and all its parent tasks from Redis. A single task chain is deleted, and the IDs of the deleted tasks are returned. @@ -309,16 +310,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int: # Check for failed tasks where metadata is in the exception message if task_index_name is None and 'exc_message' in result: - try: - exc_str = str(result['exc_message']) - if '{' in exc_str and '}' in exc_str: - json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1] - cleaned_json_part = json_part.replace('\\"', '"') - error_data = json.loads(cleaned_json_part) - task_index_name = error_data.get('index_name') - except (json.JSONDecodeError, TypeError, IndexError) as e: - key_str = key.decode('utf-8') if isinstance(key, bytes) else key - logger.warning(f"Could not parse exception metadata for task key {key_str}: {e}") + error_data = self._extract_error_metadata_from_exc_message( + result.get("exc_message") + ) + if error_data: + task_index_name = error_data.get('index_name') if task_index_name == index_name: key_str = key.decode('utf-8') if isinstance(key, bytes) else key @@ -366,15 +362,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int: ) if task_index_name is None and 'exc_message' in result: - try: - exc_str = str(result['exc_message']) - if '{' in exc_str and '}' in exc_str: - json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1] - cleaned_json_part = json_part.replace('\\"', '"') - error_data = json.loads(cleaned_json_part) - task_index_name = error_data.get('index_name') - except (json.JSONDecodeError, TypeError, IndexError): - pass + error_data = self._extract_error_metadata_from_exc_message( + result.get("exc_message") + ) + if error_data: + task_index_name = error_data.get('index_name') if task_index_name == index_name: key_str = key.decode('utf-8') if isinstance(key, bytes) else key @@ -497,16 +489,12 @@ def _cleanup_document_celery_tasks(self, index_name: str, path_or_url: str) -> i # Check for failed tasks where metadata is in the exception message if task_index_name is None and 'exc_message' in result: - try: - exc_str = str(result['exc_message']) - if '{' in exc_str and '}' in exc_str: - json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1] - cleaned_json_part = json_part.replace('\\"', '"') - error_data = json.loads(cleaned_json_part) - task_index_name = error_data.get('index_name') - task_source = error_data.get('source') or error_data.get('path_or_url') - except (json.JSONDecodeError, TypeError, IndexError) as e: - logger.warning(f"Could not parse exception metadata for task {task_id}: {e}") + error_data = self._extract_error_metadata_from_exc_message( + result.get("exc_message") + ) + if error_data: + task_index_name = error_data.get('index_name') + task_source = error_data.get('source') or error_data.get('path_or_url') # Match both index name and document path/source if task_index_name == index_name and task_source == path_or_url: @@ -728,6 +716,122 @@ def save_progress_info(self, task_id: str, processed_chunks: int, total_chunks: logger.error(f"Failed to save progress info for task {task_id}: {str(e)}") return False + def increment_progress_info(self, task_id: str, delta_processed: int, total_chunks: Optional[int] = None, ttl_hours: int = 24) -> bool: + """ + Atomically increment processed chunks for a task. + """ + if not task_id: + logger.error("Cannot increment progress info: task_id is empty") + return False + if delta_processed <= 0: + return True + + progress_key = f"progress:{task_id}" + ttl_seconds = ttl_hours * 3600 + max_retries = 5 + + for attempt in range(max_retries): + pipe = self.client.pipeline() + try: + pipe.watch(progress_key) + raw = pipe.get(progress_key) + current_processed, current_total = self._parse_progress(raw, total_chunks) + new_processed, current_total = self._compute_next_progress( + current_processed=current_processed, + delta_processed=delta_processed, + current_total=current_total, + total_chunks=total_chunks, + ) + + payload = json.dumps({ + "processed_chunks": new_processed, + "total_chunks": current_total, + }) + + pipe.multi() + pipe.setex(progress_key, ttl_seconds, payload) + pipe.execute() + logger.info( + f"[REDIS PROGRESS] Incremented progress for task {task_id}: " + f"+{delta_processed}, now {new_processed}/{current_total}" + ) + return True + except redis.WatchError: + continue + except Exception as exc: + logger.warning(f"Failed to increment progress for task {task_id}: {exc}") + return False + finally: + pipe.reset() + + logger.warning(f"Failed to increment progress for task {task_id}: too many concurrent updates") + return False + + def _parse_progress(self, raw: Any, total_chunks: Optional[int]) -> Tuple[int, int]: + """ + Parse persisted progress payload from Redis with tolerant fallback. + """ + default_total = int(total_chunks or 0) + if not raw: + return 0, default_total + + if isinstance(raw, bytes): + raw = raw.decode("utf-8") + + try: + data = json.loads(raw) + processed = int(data.get("processed_chunks", 0) or 0) + total = default_total if total_chunks else int(data.get("total_chunks", 0) or 0) + return processed, total + except Exception: + return 0, default_total + + def _compute_next_progress( + self, + current_processed: int, + delta_processed: int, + current_total: int, + total_chunks: Optional[int], + ) -> Tuple[int, int]: + """ + Compute new processed/total values, clamping to known total when available. + """ + next_processed = current_processed + int(delta_processed) + next_total = int(current_total or 0) + + if next_total <= 0 and total_chunks: + next_total = int(total_chunks) + + if next_total > 0: + next_processed = min(next_processed, next_total) + + return next_processed, next_total + + def _extract_error_metadata_from_exc_message(self, exc_message: Any) -> Optional[Dict[str, Any]]: + """ + Try to parse embedded JSON metadata from exception message with tolerant escaping. + """ + try: + exc_str = str(exc_message or "") + if "{" not in exc_str or "}" not in exc_str: + return None + json_part = exc_str[exc_str.find("{"): exc_str.rfind("}") + 1] + candidates = [ + json_part, + json_part.replace('\\"', '"'), + re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', json_part), + ] + for candidate in candidates: + try: + parsed = json.loads(candidate) + if isinstance(parsed, dict): + return parsed + except Exception: + continue + return None + except Exception: + return None + def get_progress_info(self, task_id: str) -> Optional[Dict[str, int]]: """ Get progress information for a specific task diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py index 88edfba17..5e5229ff6 100644 --- a/backend/services/tool_configuration_service.py +++ b/backend/services/tool_configuration_service.py @@ -15,7 +15,6 @@ from consts.const import DATA_PROCESS_SERVICE, LOCAL_MCP_SERVER, MCP_MANAGEMENT_API from consts.exceptions import MCPConnectionError, NotFoundException, ToolExecutionException from consts.model import ToolInstanceInfoRequest, ToolInfo, ToolSourceEnum, ToolValidateRequest -from database.client import minio_client from database.outer_api_tool_db import ( upsert_openapi_service, query_openapi_services_by_tenant, @@ -37,11 +36,11 @@ from database.knowledge_db import get_knowledge_name_map_by_index_names from mcpadapt.smolagents_adapter import _sanitize_function_name from services.file_management_service import get_llm_model, validate_urls_access -from services.vectordatabase_service import get_embedding_model, get_rerank_model, get_vector_db_core +from services.vectordatabase_service import get_embedding_model_by_index_name, get_rerank_model from database.client import minio_client from services.image_service import get_vlm_model from nexent.monitor import set_monitoring_context, set_monitoring_operation -from services.vectordatabase_service import get_embedding_model, get_vector_db_core +from services.vectordatabase_service import get_vector_db_core from utils.langchain_utils import discover_langchain_modules from utils.tool_utils import get_local_tools_classes, get_local_tools_description_zh @@ -704,7 +703,19 @@ def _validate_local_tool( instantiation_params[param_name] = param.default if tool_name == "knowledge_base_search": - embedding_model = get_embedding_model(tenant_id=tenant_id) + index_names = instantiation_params.get("index_names", []) + + # Must have embedding model for knowledge base search + if not index_names or not tenant_id: + raise ToolExecutionException( + "Embedding model is required for knowledge_base_search but index_names or tenant_id is missing") + + embedding_model, model_id, _ = get_embedding_model_by_index_name(tenant_id, index_names[0]) + if not embedding_model: + raise ToolExecutionException( + f"No embedding model found for index '{index_names[0]}'. " + f"Please configure an embedding model for this knowledge base.") + vdb_core = get_vector_db_core() # Get rerank configuration @@ -715,7 +726,6 @@ def _validate_local_tool( rerank_model = get_rerank_model(tenant_id=tenant_id, model_name=rerank_model_name) # Build display_name to index_name mapping for LLM parameter conversion - index_names = instantiation_params.get("index_names", []) display_name_to_index_map = {} if index_names: knowledge_name_map = get_knowledge_name_map_by_index_names(index_names) diff --git a/backend/services/vectordatabase_service.py b/backend/services/vectordatabase_service.py index 5639103de..8ad9b54e2 100644 --- a/backend/services/vectordatabase_service.py +++ b/backend/services/vectordatabase_service.py @@ -36,11 +36,14 @@ update_knowledge_record, get_knowledge_info_by_tenant_id, update_model_name_by_index_name, + update_last_doc_update_time, + update_last_summary_time, + update_embedding_model_by_index_name, ) from utils.str_utils import convert_list_to_string from database.user_tenant_db import get_user_tenant_by_user_id from database.group_db import query_group_ids_by_user -from database.model_management_db import get_model_records +from database.model_management_db import get_model_by_display_name, get_model_by_model_id, get_model_records from services.redis_service import get_redis_service from services.group_service import get_tenant_default_group_id from utils.config_utils import tenant_config_manager, get_model_name_from_config @@ -76,6 +79,111 @@ def _update_progress(task_id: str, processed: int, total: int): f"[PROGRESS CALLBACK] Exception updating progress for task {task_id}: {str(e)}") +def _get_embedding_model_display_name(model_id: Optional[int], tenant_id: str) -> str: + """ + Get embedding model display_name from model_id. + + Args: + model_id: The model ID to look up + tenant_id: Tenant ID for the lookup + + Returns: + The model's display_name if found, empty string otherwise + """ + if model_id is None: + return "" + try: + model = get_model_by_model_id(model_id, tenant_id) + if model: + return model.get("display_name", "") + except Exception as e: + logger.warning(f"Failed to get display_name for model_id {model_id}: {e}") + return "" + + +class KnowledgeBaseNeedsModelConfigError(Exception): + """Exception raised when a knowledge base needs an embedding model to be configured.""" + def __init__(self, index_name: str, message: str = None): + self.index_name = index_name + self.message = message or f"Knowledge base '{index_name}' needs an embedding model to be configured" + super().__init__(self.message) + + +def get_embedding_model_by_index_name(tenant_id: str, index_name: str) -> tuple[Optional[Any], Optional[int], dict]: + """ + Get the embedding model for a knowledge base by its index_name. + + Args: + tenant_id: Tenant ID + index_name: The index name of the knowledge base + + Returns: + Tuple of (embedding model instance or None, model_id or None, metadata dict) + metadata contains: { + "status": str, # "ok" | "needs_config" | "error" + "needs_update": bool, # Whether the database needs to be updated + "update_info": dict, # Fields to update if needs_update is True + "message": str # Status message + } + + Design principles: + - Force explicit configuration: model_id must be explicitly set by user + - No auto-fix: never automatically use tenant default model + - Clear error guidance: return needs_config status for user action + """ + try: + knowledge_record = get_knowledge_record({ + "index_name": index_name, + "tenant_id": tenant_id + }) + + if not knowledge_record: + return None, None, { + "status": "error", + "needs_update": False, + "message": f"Knowledge base '{index_name}' not found" + } + + model_id = knowledge_record.get("embedding_model_id") + + # Case 1: model_id exists and is valid, use it + if model_id: + model, _ = get_embedding_model_by_id(tenant_id, model_id) + if model: + return model, model_id, { + "status": "ok", + "needs_update": False, + "message": "Embedding model found" + } + # Model ID exists but model not found - fall through to error + logger.warning(f"Model ID {model_id} specified for index '{index_name}' but model not found") + + # Case 2: model_id does not exist or is invalid + # Design principle: Force explicit configuration, no auto-fix + # Return needs_config to guide user to select a model + embedding_model_name = knowledge_record.get("embedding_model_name") + if embedding_model_name: + # Has model_name but no valid model_id (legacy data) + logger.warning(f"Index '{index_name}' has embedding_model_name but no valid model_id, needs explicit configuration") + else: + # No model configured at all + logger.error(f"Index '{index_name}' has no embedding model configured") + + return None, None, { + "status": "needs_config", + "needs_update": False, + "message": f"No embedding model configured for knowledge base '{index_name}'. Please select a model." + } + + except Exception as e: + logger.warning(f"Failed to get embedding model for index {index_name}: {e}") + return None, None, { + "status": "error", + "needs_update": False, + "message": str(e) + } + + ALLOWED_CHUNK_FIELDS = { "id", "title", @@ -176,70 +284,105 @@ def check_knowledge_base_exist_impl(knowledge_name: str, vdb_core: VectorDatabas return {"status": "available"} -def get_embedding_model(tenant_id: str, model_name: Optional[str] = None): +def get_embedding_model(tenant_id: str, model_name: Optional[str] = None) -> tuple[Optional[Any], Optional[int]]: """ Get the embedding model for the tenant, optionally using a specific model name. Args: tenant_id: Tenant ID - model_name: Optional specific model name to use (format: "model_repo/model_name" or just "model_name") - If provided, will try to find the model in the tenant's model list. + model_name: Optional display name of the embedding model to use. + If provided, will find the model by display_name in the tenant's model list. Returns: - Embedding model instance or None + Tuple of (embedding model instance or None, model_id or None) """ - # If model_name is provided, try to find it in the tenant's models + # If model_name is provided, find the model by display_name if model_name: try: - models = get_model_records({"model_type": "embedding"}, tenant_id) - for model in models: - model_display_name = model.get("model_repo") + "/" + model["model_name"] if model.get("model_repo") else model["model_name"] - if model_display_name == model_name: - # Found the model, create embedding instance - model_config = { - "model_repo": model.get("model_repo", ""), - "model_name": model["model_name"], - "api_key": model.get("api_key", ""), - "base_url": model.get("base_url", ""), - "model_type": "embedding", - "max_tokens": model.get("max_tokens", 1024), - "ssl_verify": model.get("ssl_verify", True), - } - return OpenAICompatibleEmbedding( + model = get_model_by_display_name(model_name, tenant_id) + if model and model.get("model_type") in ["embedding", "multi_embedding"]: + model_config = { + "model_repo": model.get("model_repo", ""), + "model_name": model["model_name"], + "api_key": model.get("api_key", ""), + "base_url": model.get("base_url", ""), + "model_type": model.get("model_type", "embedding"), + "max_tokens": model.get("max_tokens", 1024), + "ssl_verify": model.get("ssl_verify", True), + } + model_type = model.get("model_type", "embedding") + if model_type == "multi_embedding": + embedding_model = JinaEmbedding( + api_key=model_config.get("api_key", ""), + base_url=model_config.get("base_url", ""), + model_name=get_model_name_from_config(model_config) or "", + embedding_dim=model_config.get("max_tokens", 1024), + ssl_verify=model_config.get("ssl_verify", True), + ) + else: + embedding_model = OpenAICompatibleEmbedding( api_key=model_config.get("api_key", ""), base_url=model_config.get("base_url", ""), model_name=get_model_name_from_config(model_config) or "", embedding_dim=model_config.get("max_tokens", 1024), ssl_verify=model_config.get("ssl_verify", True), ) + return embedding_model, model.get("model_id") + else: + logger.warning(f"Model '{model_name}' not found or is not an embedding model") except Exception as e: logger.warning(f"Failed to get embedding model by name {model_name}: {e}") - # Fall back to default embedding model (current behavior) - model_config = tenant_config_manager.get_model_config( - key="EMBEDDING_ID", tenant_id=tenant_id) + # No default fallback - return None, None when no model is specified or found + return None, None - model_type = model_config.get("model_type", "") - if model_type == "embedding": - # Get the es core - return OpenAICompatibleEmbedding( - api_key=model_config.get("api_key", ""), - base_url=model_config.get("base_url", ""), - model_name=get_model_name_from_config(model_config) or "", - embedding_dim=model_config.get("max_tokens", 1024), - ssl_verify=model_config.get("ssl_verify", True), - ) - elif model_type == "multi_embedding": - return JinaEmbedding( - api_key=model_config.get("api_key", ""), - base_url=model_config.get("base_url", ""), - model_name=get_model_name_from_config(model_config) or "", - embedding_dim=model_config.get("max_tokens", 1024), - ssl_verify=model_config.get("ssl_verify", True), - ) - else: - return None +def get_embedding_model_by_id(tenant_id: str, model_id: int) -> tuple[Optional[Any], Optional[int]]: + """ + Get the embedding model by model_id. + + Args: + tenant_id: Tenant ID + model_id: Model ID to query + + Returns: + Tuple of (embedding model instance or None, model_id or None) + """ + try: + model = get_model_by_model_id(model_id, tenant_id) + if model and model.get("model_type") in ["embedding", "multi_embedding"]: + model_config = { + "model_repo": model.get("model_repo", ""), + "model_name": model["model_name"], + "api_key": model.get("api_key", ""), + "base_url": model.get("base_url", ""), + "model_type": model.get("model_type", "embedding"), + "max_tokens": model.get("max_tokens", 1024), + "ssl_verify": model.get("ssl_verify", True), + } + model_type = model.get("model_type", "embedding") + if model_type == "multi_embedding": + embedding_model = JinaEmbedding( + api_key=model_config.get("api_key", ""), + base_url=model_config.get("base_url", ""), + model_name=get_model_name_from_config(model_config) or "", + embedding_dim=model_config.get("max_tokens", 1024), + ssl_verify=model_config.get("ssl_verify", True), + ) + else: + embedding_model = OpenAICompatibleEmbedding( + api_key=model_config.get("api_key", ""), + base_url=model_config.get("base_url", ""), + model_name=get_model_name_from_config(model_config) or "", + embedding_dim=model_config.get("max_tokens", 1024), + ssl_verify=model_config.get("ssl_verify", True), + ) + return embedding_model, model.get("model_id") + else: + logger.warning(f"Model with id {model_id} not found or is not an embedding model") + except Exception as e: + logger.warning(f"Failed to get embedding model by id {model_id}: {e}") + return None, None def get_rerank_model(tenant_id: str, model_name: Optional[str] = None): @@ -415,11 +558,19 @@ def create_index( None, description="ID of the user creating the knowledge base"), tenant_id: Optional[str] = Body( None, description="ID of the tenant creating the knowledge base"), + model_id: Optional[int] = Body( + None, description="ID of the embedding model to use"), ): try: if vdb_core.check_index_exists(index_name): raise Exception(f"Index {index_name} already exists") - embedding_model = get_embedding_model(tenant_id) + + # Get embedding model by model_id if provided + if model_id: + embedding_model, actual_model_id = get_embedding_model_by_id(tenant_id, model_id) + else: + embedding_model, actual_model_id = None, None + success = vdb_core.create_index(index_name, embedding_dim=embedding_dim or ( embedding_model.embedding_dim if embedding_model else 1024)) if not success: @@ -427,7 +578,8 @@ def create_index( knowledge_data = {"index_name": index_name, "created_by": user_id, "tenant_id": tenant_id, - "embedding_model_name": embedding_model.model} + "embedding_model_name": embedding_model.model if embedding_model else None, + "embedding_model_id": actual_model_id} create_knowledge_record(knowledge_data) return {"status": "success", "message": f"Index {index_name} created successfully"} except Exception as e: @@ -468,7 +620,7 @@ def create_knowledge_base( """ try: # Get embedding model - use user-selected model if provided, otherwise use tenant default - embedding_model = get_embedding_model(tenant_id, embedding_model_name) + embedding_model, model_id = get_embedding_model(tenant_id, embedding_model_name) # Determine the embedding model name to save: use user-provided name if available, # otherwise use the model's display name @@ -483,6 +635,7 @@ def create_knowledge_base( "user_id": user_id, "tenant_id": tenant_id, "embedding_model_name": saved_embedding_model_name, + "embedding_model_id": model_id, } # Add group permission and group IDs if provided @@ -570,6 +723,77 @@ def update_knowledge_base( return result + @staticmethod + def update_embedding_model( + index_name: str, + model_id: int, + tenant_id: str, + user_id: Optional[str] = None, + ) -> Dict[str, Any]: + """ + Update the embedding model for a knowledge base. + + Args: + index_name: Internal index name of the knowledge base + model_id: ID of the embedding model to use + tenant_id: Tenant ID + user_id: ID of the user making the update + + Returns: + Dict containing update result information + + Raises: + ValueError: If model is not found or is not an embedding model + Exception: If update fails + """ + try: + # Validate the model exists and is an embedding model + model = get_model_by_model_id(model_id, tenant_id) + if not model: + raise ValueError(f"Model with id {model_id} not found") + + if model.get("model_type") not in ["embedding", "multi_embedding"]: + raise ValueError( + f"Model '{model.get('display_name', model_id)}' is not an embedding model. " + f"Please select an embedding model." + ) + + # Update the database record + # Use display_name as embedding_model_name + embedding_model_name = model.get("display_name") + success = update_embedding_model_by_index_name( + index_name=index_name, + embedding_model_id=model_id, + embedding_model_name=embedding_model_name, + tenant_id=tenant_id, + user_id=user_id or "" + ) + + if not success: + raise Exception(f"Failed to update embedding model for index '{index_name}'") + + logger.info( + f"Embedding model updated for knowledge base '{index_name}' " + f"to model '{model.get('display_name', model_id)}' (id: {model_id}) by user '{user_id}'" + ) + + # Use display_name for consistency with database update + model_display_name = model.get("display_name") + return { + "status": "success", + "index_name": index_name, + "model_id": model_id, + "model_name": model_display_name, + "model_display_name": model.get("display_name"), + "message": f"Embedding model updated successfully to '{model_display_name}'" + } + + except ValueError: + raise + except Exception as e: + logger.error(f"Failed to update embedding model for index '{index_name}': {e}") + raise Exception(f"Failed to update embedding model: {str(e)}") + @staticmethod async def delete_index( index_name: str = Path(..., @@ -774,6 +998,11 @@ def list_indices( index_name = record["index_name"] index_stats = indice_stats.get(index_name, {}) + # Get embedding model display_name from model_id + model_id = record.get("embedding_model_id") + tenant_id = record.get("tenant_id") or target_tenant_id + embedding_model_display_name = _get_embedding_model_display_name(model_id, tenant_id) + stats_info.append({ # Internal index name (used as ID) "name": index_name, @@ -785,8 +1014,14 @@ def list_indices( "knowledge_sources": record["knowledge_sources"], "ingroup_permission": record["ingroup_permission"], "tenant_id": record.get("tenant_id"), + # Embedding model info: display_name from model_id + "embedding_model_name": embedding_model_display_name or record.get("embedding_model_name", ""), + "embedding_model_id": model_id, # Update time for sorting and display "update_time": record.get("update_time"), + # Auto-summary settings + "summary_frequency": record.get("summary_frequency"), + "last_summary_time": record.get("last_summary_time"), "stats": index_stats, }) @@ -812,6 +1047,9 @@ def index_documents( ] = Body(..., description="Document List to process"), vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), task_id: Optional[str] = None, + model_id: Optional[int] = Body( + None, description="ID of the embedding model to use"), + large_mode: bool = False, ): """ Index documents and create vector embeddings, create index if it doesn't exist @@ -821,6 +1059,8 @@ def index_documents( index_name: Index name data: List containing document data to be indexed vdb_core: VectorDatabaseCore instance + task_id: Optional task ID for progress tracking + model_id: Optional model ID for the embedding model Returns: IndexingResponse object containing indexing result information @@ -833,7 +1073,7 @@ def index_documents( if not vdb_core.check_index_exists(index_name): try: ElasticSearchService.create_index( - index_name, vdb_core=vdb_core) + index_name, vdb_core=vdb_core, model_id=model_id) logger.info(f"Created new index {index_name}") except Exception as create_error: raise Exception( @@ -939,6 +1179,7 @@ def index_documents( embedding_model=embedding_model, documents=documents, embedding_batch_size=embedding_batch_size, + large_mode=large_mode, progress_callback=lambda processed, total: _update_progress( task_id, processed, total) if task_id else None ) @@ -959,6 +1200,9 @@ def index_documents( logger.warning( f"[REDIS PROGRESS] Exception updating final progress for task {task_id}: {str(e)}") + # Update last_doc_update_time for auto-summary tracking + update_last_doc_update_time(index_name) + return { "success": True, "message": f"Successfully indexed {total_indexed} documents", @@ -1228,6 +1472,10 @@ def delete_documents( index_name, path_or_url) # 2. Delete MinIO file minio_result = delete_file(path_or_url) + + # Update last_doc_update_time for auto-summary tracking + update_last_doc_update_time(index_name) + return {"status": "success", "deleted_es_count": deleted_count, "deleted_minio": minio_result.get("success")} @staticmethod @@ -1450,6 +1698,8 @@ def change_summary( "index_name": index_name } update_knowledge_record(update_data) + # Update last_summary_time for auto-summary tracking + update_last_summary_time(index_name) return {"status": "success", "message": f"Index {index_name} summary updated successfully", "summary": summary_result} except Exception as e: @@ -1550,23 +1800,23 @@ def create_chunk( Automatically generates and stores embedding for semantic search. """ try: - # Get knowledge base's embedding model name - embedding_model_name = None + # Get knowledge base's embedding model by model_id + embedding_model_id = None if tenant_id: try: knowledge_record = get_knowledge_record({ "index_name": index_name, "tenant_id": tenant_id }) - embedding_model_name = knowledge_record.get("embedding_model_name") if knowledge_record else None + embedding_model_id = knowledge_record.get("embedding_model_id") if knowledge_record else None except Exception as e: - logger.warning(f"Failed to get embedding model name for index {index_name}: {e}") + logger.warning(f"Failed to get embedding model id for index {index_name}: {e}") # Generate embedding if we have content and can get embedding model embedding_vector = None if chunk_request.content: try: - embedding_model = get_embedding_model(tenant_id, embedding_model_name) if tenant_id else None + embedding_model = get_embedding_model_by_id(tenant_id, embedding_model_id)[0] if tenant_id and embedding_model_id else None if embedding_model: embeddings = embedding_model.get_embeddings(chunk_request.content) if embeddings and len(embeddings) > 0: @@ -1596,8 +1846,8 @@ def create_chunk( # Add embedding if generated if embedding_vector: chunk_payload["embedding"] = embedding_vector - if embedding_model_name: - chunk_payload["embedding_model_name"] = embedding_model_name + if embedding_model_id: + chunk_payload["embedding_model_id"] = embedding_model_id result = vdb_core.create_chunk(index_name, chunk_payload) return { @@ -1700,10 +1950,23 @@ def search_hybrid( if weight_accurate < 0 or weight_accurate > 1: raise ValueError("weight_accurate must be between 0 and 1") - embedding_model = get_embedding_model(tenant_id) + # Get embedding model from the first index's knowledge base record + if not index_names: + raise ValueError("At least one index name is required") + + embedding_model, model_id, meta = get_embedding_model_by_index_name(tenant_id, index_names[0]) + if not embedding_model: - raise ValueError( - "No embedding model configured for the current tenant") + if meta.get("status") == "needs_config": + # Return a clear error indicating model needs to be configured + raise KnowledgeBaseNeedsModelConfigError( + index_name=index_names[0], + message=f"Knowledge base '{index_names[0]}' does not have an embedding model configured. Please select a model in the knowledge base settings." + ) + else: + raise ValueError( + f"No embedding model found for index '{index_names[0]}'. " + f"Please configure an embedding model for this knowledge base.") start_time = time.perf_counter() raw_results = vdb_core.hybrid_search( @@ -1729,6 +1992,8 @@ def search_hybrid( "total": len(formatted_results), "query_time_ms": elapsed_ms, } + except KnowledgeBaseNeedsModelConfigError: + raise except ValueError: raise except Exception as exc: diff --git a/backend/services/voice_service.py b/backend/services/voice_service.py index 05dba6231..80d6264db 100644 --- a/backend/services/voice_service.py +++ b/backend/services/voice_service.py @@ -1,147 +1,219 @@ -import asyncio import logging -from typing import Any, Optional +from typing import Any, Dict, Optional -from nexent.core.models.stt_model import STTConfig, STTModel -from nexent.core.models.tts_model import TTSConfig, TTSModel +from nexent.core.models.stt_model import BaseSTTModel +from nexent.core.models.volc_stt_model import VolcSTTConfig, VolcSTTModel +from nexent.core.models.ali_stt_model import AliSTTConfig, AliSTTModel -from consts.const import APPID, CLUSTER, SPEED_RATIO, TEST_VOICE_PATH, TOKEN, VOICE_TYPE +from consts.const import TEST_PCM_PATH from consts.exceptions import ( VoiceServiceException, STTConnectionException, - TTSConnectionException, - VoiceConfigException ) +from database.model_management_db import get_model_records +from utils.config_utils import tenant_config_manager logger = logging.getLogger("voice_service") class VoiceService: - """Voice service that handles STT and TTS operations""" - - def __init__(self): - """Initialize the voice service with configurations from const.py""" - try: - # Initialize STT configuration - self.stt_config = STTConfig( - appid=APPID, - token=TOKEN - ) - - # Initialize TTS configuration - self.tts_config = TTSConfig( - appid=APPID, - token=TOKEN, - cluster=CLUSTER, - voice_type=VOICE_TYPE, - speed_ratio=SPEED_RATIO - ) - - # Initialize models - self.stt_model = STTModel(self.stt_config, TEST_VOICE_PATH) - self.tts_model = TTSModel(self.tts_config) - - except Exception as e: - logger.error(f"Failed to initialize voice service: {str(e)}") - raise VoiceConfigException(f"Voice service initialization failed: {str(e)}") from e - - async def start_stt_streaming_session(self, websocket) -> None: + """Voice service that handles STT operations""" + + def _get_stt_model_from_config( + self, + model_factory: Optional[str] = None, + model_name: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + base_url: Optional[str] = None, + language: str = "zh" + ) -> BaseSTTModel: """ - Start STT streaming session + Get the appropriate STT model based on model factory configuration. Args: - websocket: WebSocket connection for real-time audio streaming + model_factory: Model factory/vendor name + model_name: Model name + api_key: API key (for Ali STT) + model_appid: Application ID (for Volcano STT) + access_token: Access token (for Volcano STT) + base_url: Custom WebSocket URL (optional) + language: Language for speech recognition - Raises: - STTConnectionException: If STT streaming fails + Returns: + STT model instance based on configuration """ - try: - logger.info("Starting STT streaming session") - await self.stt_model.start_streaming_session(websocket) - except Exception as e: - logger.error(f"STT streaming session failed: {str(e)}") - raise STTConnectionException(f"STT streaming failed: {str(e)}") from e + use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"] + + if use_volc: + volc_config = VolcSTTConfig( + appid=model_appid or "", + access_token=access_token or "", + ws_url=base_url if base_url else "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel", + format="pcm", + rate=16000 + ) + return VolcSTTModel(volc_config, TEST_PCM_PATH) + else: + ali_config = AliSTTConfig( + api_key=api_key or "", + model=model_name or "qwen3-asr-flash-realtime", + language=language, + ws_url=base_url if base_url else None, + format="pcm", + rate=16000, + enable_vad=True, + timeout=5 + ) + return AliSTTModel(ali_config, TEST_PCM_PATH) - async def generate_tts_speech(self, text: str, stream: bool = True) -> Any: + def _get_stt_model_from_tenant_config( + self, + tenant_id: str, + language: str = "zh" + ) -> BaseSTTModel: """ - Generate TTS speech from text + Get STT model based on tenant's model configuration. Args: - text: Text to convert to speech - stream: Whether to stream the audio or return complete audio + tenant_id: Tenant ID + language: Language for speech recognition Returns: - Audio data (streaming or complete) - - Raises: - TTSConnectionException: If TTS generation fails + STT model instance based on tenant's configuration """ - if not text: - raise VoiceServiceException("No text provided for TTS generation") - try: - logger.info(f"Generating TTS speech for text: {text[:50]}...") - speech_result = await self.tts_model.generate_speech(text, stream=stream) - return speech_result - except Exception as e: - logger.error(f"TTS generation failed: {str(e)}") - raise TTSConnectionException(f"TTS generation failed: {str(e)}") from e + stt_config = tenant_config_manager.get_model_config(tenant_id, "stt") + + if stt_config: + model_factory = stt_config.get("model_factory", "") + model_name = stt_config.get("model_name", "") + api_key = stt_config.get("api_key", "") + base_url = stt_config.get("base_url", "") + model_appid = stt_config.get("model_appid", "") + access_token_val = stt_config.get("access_token", "") + + return self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model_name, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + base_url=base_url, + language=language + ) + + model_records = get_model_records({"model_type": "stt"}, tenant_id) + if model_records: + record = model_records[0] + model_factory = record.get("model_factory", "") + model_name = record.get("model_name", "") + api_key = record.get("api_key", "") + base_url = record.get("base_url", "") + model_appid = record.get("model_appid", "") + access_token_val = record.get("access_token", "") + + return self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model_name, + api_key=api_key, + model_appid=model_appid, + access_token=access_token_val, + base_url=base_url, + language=language + ) + + logger.warning(f"No STT model configuration found for tenant {tenant_id}, using default config") + return self._get_stt_model_from_config(language=language) - async def stream_tts_to_websocket(self, websocket, text: str) -> None: + except Exception as e: + logger.error(f"Error getting STT model config for tenant {tenant_id}: {str(e)}") + return self._get_stt_model_from_config(language=language) + + async def start_stt_streaming_session( + self, + websocket, + stt_config: Optional[Dict[str, Any]] = None, + tenant_id: Optional[str] = None, + language: str = "zh" + ) -> None: """ - Stream TTS audio to WebSocket with proper error handling and fallback + Start STT streaming session. Args: - websocket: WebSocket connection to stream to - text: Text to convert to speech + websocket: WebSocket connection for real-time audio streaming + stt_config: STT configuration dict from client (preferred) + tenant_id: Tenant ID for model lookup + language: Language for speech recognition (default: zh) Raises: - TTSConnectionException: If TTS service connection fails - VoiceServiceException: If TTS streaming fails + STTConnectionException: If STT streaming fails """ try: - # Generate and stream audio chunks - speech_result = await self.generate_tts_speech(text, stream=True) - - # Check if it's an async iterator or a regular iterable - if hasattr(speech_result, '__aiter__'): - # It's an async iterator, use async for - async for chunk in speech_result: - if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(chunk) - else: - break - elif hasattr(speech_result, '__iter__'): - # It's a regular iterator, use normal for - for chunk in speech_result: - if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(chunk) - else: - break + model_factory = None + model_name = None + api_key = None + model_appid = None + access_token = None + base_url = None + + if stt_config: + model_factory = stt_config.get("model_factory") + model_name = stt_config.get("model") or stt_config.get("model_name") + api_key = stt_config.get("api_key") or stt_config.get("apiKey") + model_appid = stt_config.get("model_appid") or stt_config.get("appid") + access_token = stt_config.get("access_token") + base_url = stt_config.get("base_url") or stt_config.get("baseUrl") + language = stt_config.get("language", language) else: - # It's a single chunk, send it directly - if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(speech_result) - - await asyncio.sleep(0.1) - - except TypeError as te: - # If speech_result is still a coroutine, try calling it directly without stream=True - if "async for" in str(te) and "requires an object with __aiter__" in str(te): - logger.error("Falling back to non-streaming TTS") - speech_data = await self.generate_tts_speech(text, stream=False) - if websocket.client_state.name == "CONNECTED": - await websocket.send_bytes(speech_data) + logger.warning("No stt_config provided, will use tenant model config if available") + + if model_factory or api_key or model_appid: + stt_model = self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model_name, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + base_url=base_url, + language=language + ) + elif tenant_id: + stt_model = self._get_stt_model_from_tenant_config(tenant_id, language) else: - raise + logger.warning("No tenant_id provided and no explicit config, using default Ali STT") + stt_model = self._get_stt_model_from_config( + api_key=api_key, + language=language + ) - # Send end marker after successful TTS generation - if websocket.client_state.name == "CONNECTED": - await websocket.send_json({"status": "completed"}) + await stt_model.start_streaming_session(websocket) + except Exception as e: + logger.error(f"STT streaming session failed: {str(e)}") + raise STTConnectionException(f"STT streaming failed: {str(e)}") from e - async def check_stt_connectivity(self) -> bool: + async def check_stt_connectivity( + self, + model_factory: Optional[str] = None, + api_key: Optional[str] = None, + model_appid: Optional[str] = None, + access_token: Optional[str] = None, + language: str = "zh", + model: str = "qwen3-asr-flash-realtime", + base_url: Optional[str] = None + ) -> bool: """ - Check STT service connectivity + Check STT service connectivity. + + Args: + model_factory: Model factory/vendor name (e.g., "volc", "dashscope") + api_key: API key for Ali STT + model_appid: Application ID for Volcano STT + access_token: Access token for Volcano STT + language: Language for speech recognition (default: zh) + model: STT model name (default: qwen3-asr-flash-realtime) + base_url: Custom WebSocket URL (optional) Returns: bool: True if STT service is connected, False otherwise @@ -150,8 +222,18 @@ async def check_stt_connectivity(self) -> bool: STTConnectionException: If connectivity check fails """ try: - logger.info(f"Checking STT connectivity with config: {self.stt_config}") - connected = await self.stt_model.check_connectivity() + stt_model = self._get_stt_model_from_config( + model_factory=model_factory, + model_name=model, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + base_url=base_url, + language=language + ) + + connected = await stt_model.check_connectivity() + if not connected: logger.error("STT service connection failed") raise STTConnectionException("STT service connection failed") @@ -162,53 +244,48 @@ async def check_stt_connectivity(self) -> bool: logger.error(f"STT connectivity check failed: {str(e)}") raise STTConnectionException(f"STT connectivity check failed: {str(e)}") from e - async def check_tts_connectivity(self) -> bool: + async def check_voice_connectivity( + self, + model_type: str, + stt_config: Optional[Dict[str, Any]] = None + ) -> bool: """ - Check TTS service connectivity - - Returns: - bool: True if TTS service is connected, False otherwise - - Raises: - TTSConnectionException: If connectivity check fails - """ - try: - logger.info(f"Checking TTS connectivity with config: {self.tts_config}") - connected = await self.tts_model.check_connectivity() - if not connected: - logger.error("TTS service connection failed") - raise TTSConnectionException("TTS service connection failed") - return connected - except TTSConnectionException: - raise - except Exception as e: - logger.error(f"TTS connectivity check failed: {str(e)}") - raise TTSConnectionException(f"TTS connectivity check failed: {str(e)}") from e - - async def check_voice_connectivity(self, model_type: str) -> bool: - """ - Check voice service connectivity based on model type + Check voice service connectivity based on model type. Args: - model_type: Type of model to check ('stt' or 'tts') + model_type: Type of model to check ('stt' only) + stt_config: Optional STT configuration dict Returns: - bool: True if the specified service is connected, False otherwise + bool: True if the service is connected, False otherwise Raises: VoiceServiceException: If model_type is invalid STTConnectionException: If STT connectivity check fails - TTSConnectionException: If TTS connectivity check fails """ + if model_type != "stt": + logger.error(f"Unsupported model type: {model_type}") + raise VoiceServiceException(f"Unsupported model type: {model_type}") + try: - if model_type == 'stt': - return await self.check_stt_connectivity() - elif model_type == 'tts': - return await self.check_tts_connectivity() - else: - logger.error(f"Unknown model type: {model_type}") - raise VoiceServiceException(f"Unknown model type: {model_type}") - except (STTConnectionException, TTSConnectionException): + model_factory = stt_config.get("model_factory") if stt_config else None + api_key = stt_config.get("api_key") if stt_config else None + model_appid = stt_config.get("model_appid") if stt_config else None + access_token = stt_config.get("access_token") if stt_config else None + language = stt_config.get("language", "zh") if stt_config else "zh" + model = stt_config.get("model", "qwen3-asr-flash-realtime") if stt_config else "qwen3-asr-flash-realtime" + base_url = stt_config.get("base_url") if stt_config else None + + return await self.check_stt_connectivity( + model_factory=model_factory, + api_key=api_key, + model_appid=model_appid, + access_token=access_token, + language=language, + model=model, + base_url=base_url + ) + except STTConnectionException: raise except Exception as e: logger.error(f"Voice service connectivity check failed: {str(e)}") @@ -220,12 +297,7 @@ async def check_voice_connectivity(self, model_type: str) -> bool: def get_voice_service() -> VoiceService: - """ - Get the global voice service instance - - Returns: - VoiceService: The global voice service instance - """ + """Get the global voice service instance.""" global _voice_service_instance if _voice_service_instance is None: _voice_service_instance = VoiceService() diff --git a/backend/utils/a2a_http_client.py b/backend/utils/a2a_http_client.py index 2bc829403..8b7c55d9f 100644 --- a/backend/utils/a2a_http_client.py +++ b/backend/utils/a2a_http_client.py @@ -134,6 +134,7 @@ async def get_json( "User-Agent": "Nexent-A2A-Client/1.0", "Accept": CONTENT_TYPE_JSON, "Connection": "close", + "A2A-Version": "1.0", } if headers: request_headers.update(headers) @@ -141,14 +142,24 @@ async def get_json( logger.debug(f"A2A GET request: url={url}") try: - _, body = await self._request_with_retry( + status, body = await self._request_with_retry( "GET", url, headers=request_headers ) + # Decode body and handle empty responses + body_text = body.decode('utf-8') if body else "" + + if not body_text.strip(): + logger.error( + f"A2A GET received empty response for {url}: HTTP status={status}. " + f"Expected JSON response but got empty body." + ) + raise ValueError(f"Empty response from {url} (HTTP {status})") + # Parse JSON from body import json - data = json.loads(body.decode('utf-8')) + data = json.loads(body_text) return data except asyncio.TimeoutError as e: logger.error(f"A2A GET timeout for {url}: {e}") @@ -156,6 +167,9 @@ async def get_json( except aiohttp.ClientResponseError as e: logger.error(f"A2A GET HTTP error for {url}: {e.status}") raise + except ValueError: + # Re-raise empty response errors without wrapping + raise except Exception as e: import traceback logger.error(f"A2A GET request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}") @@ -176,6 +190,7 @@ async def post_json( "Content-Type": CONTENT_TYPE_JSON, "Accept": CONTENT_TYPE_JSON, "Connection": "close", + "A2A-Version": "1.0", } if headers: request_headers.update(headers) @@ -183,15 +198,29 @@ async def post_json( logger.info(f"A2A POST request: url={url}, payload={payload}") try: - _, body = await self._request_with_retry( + status, body = await self._request_with_retry( "POST", url, json=payload, headers=request_headers ) + # Decode body and handle empty responses + body_text = body.decode('utf-8') if body else "" + + if not body_text.strip(): + logger.error( + f"A2A POST received empty response for {url}: HTTP status={status}. " + f"This usually indicates the remote agent is not responding correctly. " + f"Check that the agent URL '{url}' is correct and the agent is running." + ) + raise ValueError( + f"Empty response from agent at {url} (HTTP {status}). " + f"The agent may be unreachable, still processing, or the endpoint URL is incorrect." + ) + # Parse JSON from body import json - data = json.loads(body.decode('utf-8')) + data = json.loads(body_text) return data except asyncio.TimeoutError as e: logger.error(f"A2A POST timeout for {url}: {e}") @@ -199,6 +228,9 @@ async def post_json( except aiohttp.ClientResponseError as e: logger.error(f"A2A POST HTTP error for {url}: {e.status}") raise + except ValueError: + # Re-raise empty response errors without wrapping + raise except Exception as e: import traceback logger.error(f"A2A POST request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}") @@ -249,6 +281,7 @@ def build_a2a_headers(api_key: Optional[str] = None) -> Dict[str, str]: headers = { "Content-Type": CONTENT_TYPE_JSON, "Accept": CONTENT_TYPE_JSON, + "A2A-Version": "1.0", } if api_key: headers["Authorization"] = f"Bearer {api_key}" diff --git a/backend/utils/nacos_client.py b/backend/utils/nacos_client.py new file mode 100644 index 000000000..0fa87410a --- /dev/null +++ b/backend/utils/nacos_client.py @@ -0,0 +1,624 @@ +""" +Nacos Client for service discovery. + +Provides functionality to query service instances from Nacos service registry. +Used by A2A agent discovery to find external A2A agents registered in Nacos. +""" +import logging +from typing import Any, Dict, Optional + +import aiohttp + +logger = logging.getLogger(__name__) + + +class NacosClientError(Exception): + """Base exception for Nacos client errors.""" + pass + + +class NacosConnectionError(NacosClientError): + """Raised when connection to Nacos fails.""" + pass + + +class NacosServiceNotFoundError(NacosClientError): + """Raised when the requested service is not found in Nacos.""" + pass + + +class NacosClient: + """Async client for Nacos service registry operations. + + Provides methods to query service instances for A2A agent discovery. + """ + + def __init__( + self, + nacos_addr: str, + username: Optional[str] = None, + password: Optional[str] = None + ): + """Initialize Nacos client. + + Args: + nacos_addr: Nacos server address (e.g., http://nacos-server:8848). + username: Optional Nacos username for authentication. + password: Optional Nacos password for authentication. + """ + self.nacos_addr = nacos_addr.rstrip("/") + self.username = username + self.password = password + self._session: Optional[aiohttp.ClientSession] = None + self._access_token: Optional[str] = None + + async def _get_session(self) -> aiohttp.ClientSession: + """Get or create an aiohttp session.""" + if self._session is None or self._session.closed: + timeout = aiohttp.ClientTimeout(total=30) + self._session = aiohttp.ClientSession(timeout=timeout) + return self._session + + async def close(self) -> None: + """Close the client session.""" + if self._session and not self._session.closed: + await self._session.close() + self._session = None + + def _build_auth_params(self) -> Dict[str, str]: + """Build authentication parameters for Nacos API requests.""" + params = {} + if self.username: + params["username"] = self.username + if self.password: + params["password"] = self.password + return params + + async def query_a2a_agent( + self, + agent_name: str, + namespace: str = "public" + ) -> Optional[Dict[str, Any]]: + """Query A2A agent info from Nacos using the dedicated A2A endpoint. + + Args: + agent_name: The name of the A2A agent to query. + namespace: Nacos namespace ID (defaults to "public"). + + Returns: + A dict containing agent information: + - agent_name: Agent name + - agent_url: A2A agent endpoint URL + - metadata: Additional metadata + Or None if no agent is found. + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + params = self._build_auth_params() + agent_name = agent_name.strip() + params["agentName"] = agent_name + params["namespaceId"] = namespace.strip() if namespace else "public" + + url = f"{self.nacos_addr}/nacos/v3/admin/ai/a2a" + + try: + session = await self._get_session() + async with session.get(url, params=params) as response: + text = await response.text() + + if response.status == 200: + data = await response.json() + return self._parse_a2a_response(data, agent_name) + elif response.status == 404: + logger.warning( + f"A2A agent '{agent_name}' not found in Nacos namespace '{namespace}'" + ) + return None + else: + raise NacosConnectionError( + f"Nacos A2A API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}") + raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e + + def _parse_a2a_response( + self, + response_data: Dict[str, Any], + agent_name: str + ) -> Optional[Dict[str, Any]]: + """Parse Nacos A2A agent response. + + Args: + response_data: Response data from Nacos A2A API. + agent_name: Agent name for logging. + + Returns: + Agent info dict or None if no agent found. + """ + if response_data.get("code") != 0: + msg = response_data.get("message", "unknown error") + logger.warning(f"Nacos A2A API error for '{agent_name}': {msg}") + return None + + data = response_data.get("data") + if not data: + logger.info(f"No A2A agent data found for '{agent_name}'") + return None + + logger.info(f"[Nacos A2A Parse] Found agent: {data}") + return data + + async def query_service_instance( + self, + service_name: str, + namespace: str = "public", + clusters: Optional[str] = None, + healthy_only: bool = False, + group_name: str = "DEFAULT_GROUP" + ) -> Optional[Dict[str, Any]]: + """Query service instance(s) from Nacos using v3 client API. + + Args: + service_name: The name of the service to query. + namespace: Nacos namespace ID (defaults to "public"). + clusters: Comma-separated cluster names (optional). + healthy_only: If True, only return healthy instances. + group_name: Nacos group name (defaults to "DEFAULT_GROUP"). + + Returns: + A dict containing instance information with keys: + - ip: Instance IP address + - port: Instance port + - metadata: Instance metadata dict (may contain 'a2a_card_url') + Or None if no instance is found. + + Raises: + NacosConnectionError: If connection to Nacos fails. + NacosServiceNotFoundError: If the service does not exist. + """ + params = self._build_auth_params() + service_name = service_name.strip() + params["serviceName"] = service_name + params["namespaceId"] = namespace.strip() if namespace else "public" + params["groupName"] = group_name + if clusters: + params["clusterName"] = clusters + if healthy_only: + params["healthyOnly"] = "true" + + url = f"{self.nacos_addr}/nacos/v3/client/ns/instance/list" + + logger.info( + f"[Nacos Query] URL: {url}, params: " + f"serviceName='{service_name}', namespaceId='{namespace}', groupName='{group_name}'" + ) + + try: + session = await self._get_session() + async with session.get(url, params=params) as response: + text = await response.text() + logger.info( + f"[Nacos Response] status={response.status}, " + f"body_len={len(text)}, body={text[:300]}" + ) + + if response.status == 200: + data = await response.json() + return self._parse_v3_instance_response(data, service_name) + elif response.status == 404: + logger.warning( + f"Service '{service_name}' not found in Nacos namespace '{namespace}'" + ) + return None + else: + raise NacosConnectionError( + f"Nacos API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}") + raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e + + def _parse_v3_instance_response( + self, + response_data: Dict[str, Any], + service_name: str + ) -> Optional[Dict[str, Any]]: + """Parse Nacos v3 client API instance list response. + + Nacos v3 API returns: { "code": 0, "message": "success", "data": [...] } + + Args: + response_data: Response data from Nacos v3 API. + service_name: Service name for fallback metadata. + + Returns: + First instance as a dict or None if no instances exist. + """ + if response_data.get("code") != 0: + msg = response_data.get("message", "unknown error") + logger.warning(f"Nacos API error for '{service_name}': {msg}") + return None + + data = response_data.get("data") + if data is None: + logger.info(f"[Nacos Parse] No data field in response for service '{service_name}'") + return None + + hosts = data if isinstance(data, list) else [] + logger.info(f"[Nacos Parse] Found {len(hosts)} instances for service '{service_name}'") + + if not hosts: + logger.info(f"[Nacos Parse] No hosts found for service '{service_name}'") + return None + + for instance in hosts: + instance_data = { + "ip": instance.get("ip"), + "port": instance.get("port"), + "healthy": instance.get("healthy", False), + "weight": instance.get("weight", 1.0), + "enabled": instance.get("enabled", True), + "metadata": instance.get("metadata") or {} + } + + if instance_data["enabled"] and instance_data.get("healthy", False): + logger.info( + f"[Nacos Parse] Found healthy instance for '{service_name}': " + f"{instance_data['ip']}:{instance_data['port']}" + ) + return instance_data + + first_instance = hosts[0] + logger.info( + f"[Nacos Parse] No healthy instance found, returning first instance for '{service_name}': " + f"{first_instance.get('ip')}:{first_instance.get('port')}" + ) + return { + "ip": first_instance.get("ip"), + "port": first_instance.get("port"), + "healthy": first_instance.get("healthy", False), + "weight": first_instance.get("weight", 1.0), + "enabled": first_instance.get("enabled", True), + "metadata": first_instance.get("metadata") or {} + } + + def _parse_instance_response( + self, + data: Dict[str, Any], + service_name: str + ) -> Optional[Dict[str, Any]]: + """Parse Nacos instance list response (v1 API legacy format). + + Args: + data: Response data from Nacos /instance/list API. + service_name: Service name for fallback metadata. + + Returns: + First instance as a dict or None if no instances exist. + """ + hosts = data.get("hosts") or [] + + if not hosts: + logger.debug(f"No hosts found for service '{service_name}'") + return None + + for instance in hosts: + instance_data = { + "ip": instance.get("ip"), + "port": instance.get("port"), + "healthy": instance.get("healthy", False), + "weight": instance.get("weight", 1.0), + "enabled": instance.get("enabled", True), + "metadata": instance.get("metadata") or {} + } + + if instance_data["enabled"] and instance_data.get("healthy", False): + logger.debug( + f"Found healthy instance for '{service_name}': " + f"{instance_data['ip']}:{instance_data['port']}" + ) + return instance_data + + first_instance = hosts[0] + return { + "ip": first_instance.get("ip"), + "port": first_instance.get("port"), + "healthy": first_instance.get("healthy", False), + "weight": first_instance.get("weight", 1.0), + "enabled": first_instance.get("enabled", True), + "metadata": first_instance.get("metadata") or {} + } + + async def list_services( + self, + namespace: str = "public", + page_no: int = 1, + page_size: int = 100, + group_name: str = "DEFAULT_GROUP" + ) -> Dict[str, Any]: + """List all services in a namespace using v3 Admin API. + + Args: + namespace: Nacos namespace ID (defaults to "public"). + page_no: Page number (1-indexed). + page_size: Number of services per page. + group_name: Group name filter (defaults to "DEFAULT_GROUP"). + + Returns: + Dict containing: + - count: Total number of services + - services: List of service names + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + session = await self._get_session() + access_token = None + if self.username and self.password: + access_token = await self._get_access_token(session) + if not access_token: + raise NacosConnectionError("Authentication failed. Please check username and password.") + + params = { + "pageNo": page_no, + "pageSize": page_size, + "namespaceId": namespace, + "groupName": group_name + } + headers = {} + if access_token: + headers["AccessToken"] = access_token + + url = f"{self.nacos_addr}/nacos/v3/admin/ns/service" + + try: + async with session.get(url, params=params, headers=headers) as response: + if response.status == 200: + data = await response.json() + if data.get("code") == 0: + return { + "count": data.get("data", {}).get("count", 0), + "services": data.get("data", {}).get("doms", []) + } + elif data.get("code") == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + raise NacosConnectionError( + f"Nacos API error: {data.get('message', 'unknown')}" + ) + elif response.status == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + text = await response.text() + raise NacosConnectionError( + f"Nacos API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to list services from Nacos: {e}") + raise NacosConnectionError(f"Failed to list services from Nacos: {e}") from e + + async def get_service_detail( + self, + service_name: str, + namespace: str = "public", + group_name: str = "DEFAULT_GROUP" + ) -> Optional[Dict[str, Any]]: + """Get detailed information about a service using v3 Admin API. + + Args: + service_name: The name of the service. + namespace: Nacos namespace ID (defaults to "public"). + group_name: Nacos group name (defaults to "DEFAULT_GROUP"). + + Returns: + Service detail dict or None if not found. + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + session = await self._get_session() + access_token = None + if self.username and self.password: + access_token = await self._get_access_token(session) + if not access_token: + raise NacosConnectionError("Authentication failed. Please check username and password.") + + params = { + "serviceName": service_name, + "namespaceId": namespace, + "groupName": group_name + } + headers = {} + if access_token: + headers["AccessToken"] = access_token + + url = f"{self.nacos_addr}/nacos/v3/admin/ns/service" + + try: + async with session.get(url, params=params, headers=headers) as response: + if response.status == 200: + data = await response.json() + if data.get("code") == 0: + return data.get("data") + elif data.get("code") == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + msg = data.get("message", "") + if "not found" in msg.lower() or "not exist" in msg.lower(): + return None + raise NacosConnectionError( + f"Nacos API error: {msg}" + ) + elif response.status == 404: + return None + elif response.status == 403: + self._clear_access_token() + raise NacosConnectionError("Authentication failed. Please check username and password.") + else: + text = await response.text() + raise NacosConnectionError( + f"Nacos API returned status {response.status}: {text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"Failed to get service detail from Nacos: {e}") + raise NacosConnectionError( + f"Failed to get service detail from Nacos: {e}" + ) from e + + async def check_health( + self, + host: str, + port: int, + namespace: str = "public" + ) -> bool: + """Check if an instance is healthy. + + Args: + host: Instance IP address. + port: Instance port. + namespace: Nacos namespace ID. + + Returns: + True if the instance is healthy, False otherwise. + + Raises: + NacosConnectionError: If connection to Nacos fails. + """ + params = self._build_auth_params() + params["serviceName"] = "__nacos^naming*" + params["ip"] = host + params["port"] = port + params["namespaceId"] = namespace + + url = f"{self.nacos_addr}/nacos/v1/ns/instance/health" + + try: + session = await self._get_session() + async with session.get(url, params=params) as response: + if response.status == 200: + text = await response.text() + return text.lower() == "ok" + return False + + except aiohttp.ClientError as e: + logger.error(f"Failed to check instance health: {e}") + return False + + async def test_connectivity( + self, + namespace: str = "public" + ) -> Dict[str, Any]: + """Test connectivity to the Nacos server. + + Args: + namespace: Nacos namespace ID to test connectivity with. + + Returns: + Dict containing: + - success: Whether the connection was successful + - message: Human-readable message about the result + """ + try: + session = await self._get_session() + + access_token = None + if self.username and self.password: + access_token = await self._get_access_token(session) + if not access_token: + return { + "success": False, + "message": "Authentication failed. Please check username and password." + } + + url = f"{self.nacos_addr}/nacos/v3/admin/ns/ops/metrics" + headers = {} + if access_token: + headers["AccessToken"] = access_token + + async with session.get(url, headers=headers) as response: + if response.status == 200: + data = await response.json() + if data.get("code") == 0: + return { + "success": True, + "message": "Successfully connected to Nacos server" + } + else: + return { + "success": False, + "message": f"Nacos API error: {data.get('message', 'unknown')}" + } + elif response.status == 403: + return { + "success": False, + "message": "Authentication failed. Please check username and password." + } + else: + text = await response.text() + return { + "success": False, + "message": f"Nacos server returned status {response.status}: {text}" + } + + except aiohttp.ClientError as e: + logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}") + return { + "success": False, + "message": f"Failed to connect to Nacos server: {e}" + } + + async def _get_access_token(self, session: aiohttp.ClientSession) -> Optional[str]: + """Get access token from Nacos authentication endpoint with caching. + + Args: + session: aiohttp session to use for the request. + + Returns: + Access token string if authentication successful, None otherwise. + """ + if self._access_token: + return self._access_token + + try: + url = f"{self.nacos_addr}/nacos/v1/auth/login" + form_data = aiohttp.FormData() + form_data.add_field("username", self.username) + form_data.add_field("password", self.password) + + async with session.post(url, data=form_data) as response: + if response.status == 200: + result = await response.json() + token = result.get("accessToken") + if token: + self._access_token = token + return token + logger.warning(f"Nacos login failed: {result.get('message', 'unknown')}") + else: + text = await response.text() + logger.warning(f"Nacos login request returned status {response.status}: {text}") + return None + + except aiohttp.ClientError as e: + logger.error(f"Failed to login to Nacos: {e}") + return None + + def _clear_access_token(self) -> None: + """Clear the cached access token.""" + self._access_token = None + + async def __aenter__(self) -> "NacosClient": + """Async context manager entry.""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + """Async context manager exit.""" + await self.close() diff --git a/doc/docs/en/backend/overview.md b/doc/docs/en/backend/overview.md index 962233f18..d77dfee3c 100644 --- a/doc/docs/en/backend/overview.md +++ b/doc/docs/en/backend/overview.md @@ -202,4 +202,6 @@ python backend/mcp_service.py # MCP service - Resource pool management - Auto-scaling capabilities -For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview). \ No newline at end of file +For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview). + +For skill development and management, see the [Skills System Documentation](./skills/index). \ No newline at end of file diff --git a/doc/docs/en/backend/skills/index.md b/doc/docs/en/backend/skills/index.md new file mode 100644 index 000000000..7824260fa --- /dev/null +++ b/doc/docs/en/backend/skills/index.md @@ -0,0 +1,37 @@ +# Backend Skills Documentation + +This section covers Nexent's Skills system in the backend infrastructure, including skill definitions, skill package structures, and system architecture. + +## Available Documentation + +### Overview and Architecture +- [Skills System Overview](./overview): Skill types, lifecycle, and version management + +## Skills vs. Tools + +In Nexent, **Tools** and **Skills** are two distinct layers: + +- **Tool**: A single atomic operation the agent can call, such as `read_file` or `tavily_search`. When enabled, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for this conversation, the LLM still consumes context tokens to "see" it. +- **Skill**: A workflow of multiple tools bundled with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance; it decides whether to activate a skill based on the user's actual needs. The corresponding toolset is only loaded when activated — effectively saving Token consumption. + +## Quick Start + +1. **Explore capabilities**: Read [Skills System Overview](./overview) to understand the supported skill types +2. **Try creation**: Experience NL-to-Skill creation on the [Skill Management](../../user-guide/skills) page +3. **Create manually**: Upload `SKILL.md` or a ZIP package to create a custom skill +4. **Configure for agents**: Enable skills in the agent's tool configuration + +## Related References + +- [Skill Management (User Guide)](../../user-guide/skills) +- [Agent Development Guide](../../user-guide/agent-development) +- [Local Tools Overview](../../user-guide/local-tools/index) +- [SDK Tool Development Guide](../../sdk/core/tools) +- [MCP Tool Development](../tools/mcp) +- [FAQ](../../quick-start/faq) + +## Getting Help + +- Check the [FAQ](../../quick-start/faq) for common skill usage questions +- Ask questions in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions) +- Review [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) for known issues diff --git a/doc/docs/en/backend/skills/overview.md b/doc/docs/en/backend/skills/overview.md new file mode 100644 index 000000000..34fbd2f97 --- /dev/null +++ b/doc/docs/en/backend/skills/overview.md @@ -0,0 +1,138 @@ +# Skills System Overview + +A Skill is Nexent's way of extending an agent's capabilities. Each skill consists of: + +- **Skill description**: What this skill does and when to use it +- **Tool bundle**: A package of one or more Nexent SDK methods or user-defined tools +- **Parameter template**: Which parameters users can fill in for this skill +- **Usage examples**: How this skill is typically used + +Compared to selecting tools one by one, skills make configuring complex capabilities simple — install one skill package instead of configuring each tool separately. + +## Skill Package Structure + +A skill can be a single `SKILL.md` file or a ZIP package with multiple files: + +``` +skill-name/ +├── SKILL.md # Skill definition file (required) +├── config/ +│ ├── config.yaml # Default parameter values (optional) +│ └── schema.yaml # Parameter types and descriptions (optional) +├── scripts/ +│ └── *.py # Python scripts (optional) +├── examples.md # Usage examples (optional) +└── assets/ # Static assets (optional) +``` + +### SKILL.md Structure + +Each skill must have a `SKILL.md` file, consisting of two parts: + +**Part 1: YAML Frontmatter (required)** + +```yaml +--- +name: skill-name +description: | + A description of what this skill does and when to use it. + Write in third person, e.g., "This skill is used for..." +tags: + - tag1 + - tag2 +--- +``` + +**Part 2: Skill Body** + +Below the frontmatter, you can write Markdown content including: +- Detailed usage instructions and guidelines +- Example code for tool invocation +- Error handling instructions +- Usage limits and caveats + +### Two Skill Types + +Skills fall into two categories based on their purpose: + +**Tool Skills**: Used to expose the capabilities of one or more Nexent SDK methods. The body should include tool parameter descriptions, usage examples, return formats, and error handling. Once the user configures the parameters, the agent can call these tools directly. + +**Agent Skills**: Used to teach an agent how to perform a complex task. The body should include workflow instructions, domain knowledge, best practices, and sometimes helper scripts. The body will contain detailed step-by-step guidance. + +## Official Skills Overview + +### File Operations + +| Skill Name | Description | +|-----------|-------------| +| `read-file` | Read file content and metadata within the workspace | +| `create-file-directory` | Create files or directories | +| `delete-file-directory` | Delete files or directories | +| `move-file-directory` | Move or rename files/directories | +| `list-directory` | List directory structure in a tree view | + +### Knowledge Base Search + +| Skill Name | Description | +|-----------|-------------| +| `search-knowledge-base` | Local knowledge base semantic search (supports hybrid / accurate / semantic modes) | +| `search-dify` | Dify knowledge base search | +| `search-idata` | iData knowledge base search | +| `search-datamate` | DataMate knowledge base search (with similarity threshold control) | + +### Web Search + +| Skill Name | Description | +|-----------|-------------| +| `search-web-tavily` | Tavily real-time web search | +| `search-web-linkup` | Linkup image and text mixed search | +| `search-web-exa` | Exa deep web search | + +### Multimodal Analysis + +| Skill Name | Description | +|-----------|-------------| +| `analyze-image` | VLM-based image content analysis and Q&A | +| `analyze-text-file` | PDF/Word/Excel file content extraction and Q&A | + +### Communication and Remote Operations + +| Skill Name | Description | +|-----------|-------------| +| `email-utils` | IMAP receive / SMTP send (supports HTML / CC / BCC) | +| `run-shell-ssh` | Persistent SSH session for remote command execution | + +## Skill Lifecycle + +### Version Management + +Each skill supports two version states: + +- **Draft version (version=0)**: Development and debugging stage, changes take effect immediately, suitable for iterative adjustments +- **Published version (version>=1)**: Production use, parameters locked to prevent accidental changes + +### Skill Instances + +The same skill can be configured with different parameter values for different agents, independently. + +For example, a search skill can be configured for a "Technical Documentation Agent" to search only the technical knowledge base, and for a "Customer Service Agent" to search only the customer service knowledge base. + +### Common Workflow + +``` +Create skill → Configure parameters → Select skill for agent → Debug → Publish + ↓ + Edit draft version +``` + +## Security Notes + +- **Path isolation**: Files within a skill package can only be accessed within the skill directory scope +- **Parameter validation**: Parameters defined in schema.yaml are validated by the frontend form +- **Permission control**: Skill instances are tenant-isolated; APIs require authentication tokens + +## Related References + +- [Skill Management (User Guide)](../../user-guide/skills) +- [Agent Development Guide](../../user-guide/agent-development) +- [Local Tools Overview](../../user-guide/local-tools/index) diff --git a/doc/docs/en/backend/tools/index.md b/doc/docs/en/backend/tools/index.md index 2d2d2c185..82d73b82c 100644 --- a/doc/docs/en/backend/tools/index.md +++ b/doc/docs/en/backend/tools/index.md @@ -12,6 +12,10 @@ Integrate with the LangChain ecosystem for advanced AI workflows. Model Context Protocol tools for standardized AI agent communication. → [MCP Tools Development](./mcp) +### Skills System +Create reusable skill packages through natural language or ZIP files, giving agents more flexible tool-calling capabilities. +→ [Skills Documentation](../skills/index) + ## Quick Start 1. **Choose your tool type**: LangChain for general AI workflows, MCP for standardized agent communication diff --git a/doc/docs/en/getting-started/features.md b/doc/docs/en/getting-started/features.md index c0b6b4703..2216d7163 100644 --- a/doc/docs/en/getting-started/features.md +++ b/doc/docs/en/getting-started/features.md @@ -25,7 +25,7 @@ The system automatically extracts key information from conversations to generate ## 📝 Progressive Skill Disclosure -Nexent introduces a **Progressive Skill Disclosure** mechanism. As users input tasks, the system dynamically reveals the most relevant Skill suggestions based on the current context — helping users quickly find the tools and methods best suited to the current task. This mechanism enables newcomers to progressively explore system capabilities without adding operational complexity for advanced users. +Nexent introduces a **Progressive Skill Disclosure** mechanism. As users input tasks, the system dynamically reveals the most relevant Skill suggestions based on the current context — helping users quickly find the tools and methods best suited to the current task. This mechanism helps preventing context explosion and maximizing context window efficiency. ## 🗄️ Personal-Grade Knowledge Base diff --git a/doc/docs/en/sdk/data-process.md b/doc/docs/en/sdk/data-process.md index 2d11202b1..614c4b438 100644 --- a/doc/docs/en/sdk/data-process.md +++ b/doc/docs/en/sdk/data-process.md @@ -43,10 +43,10 @@ def file_process(self, ## 📁 Supported File Formats -- **Text files**: .txt, .md, .csv -- **Documents**: .pdf, .docx, .pptx +- **Text files**: .txt, .md, .csv, .json +- **Documents**: .pdf, .docx, .pptx, .epub - **Images**: .jpg, .png, .gif (with OCR) -- **Web content**: HTML, URLs +- **Web content**: HTML, URLs, XML - **Archives**: .zip, .tar ## 💡 Usage Examples diff --git a/doc/docs/en/user-guide/agent-development.md b/doc/docs/en/user-guide/agent-development.md index db2614f7d..109674273 100644 --- a/doc/docs/en/user-guide/agent-development.md +++ b/doc/docs/en/user-guide/agent-development.md @@ -31,15 +31,86 @@ You can configure other collaborative agents for your created agent, as well as ### 🤝 Collaborative Agents +Collaborative agents help the current agent complete complex tasks. The sources of collaborative agents are divided into two categories: + +- **Internal Agents**: Published agents on the platform +- **External A2A Agents**: Third-party agents discovered through the A2A protocol + 1. Click the plus sign under the "Collaborative Agent" tab to open the selectable agent list -2. Select the agents you want to add from the dropdown list -3. Multiple collaborative agents can be selected -4. Click × to remove an agent from the selection +2. The agent list is divided into two tabs: "Internal Agent" and "External A2A Agent". You can choose based on your needs +3. Select the agent you want to add from the dropdown list +4. Multiple collaborative agents can be selected +5. Click × to remove an agent from the selection + +
+ +
+ +#### 🌐 Add External A2A Agents + +Nexent supports communication with third-party agents through the A2A protocol. You can discover external A2A agents in the following two ways: + +##### Discover Agent via URL + +If you know the Agent Card address of the target agent, you can use the URL discovery method: + +
+ +
+ +1. In the External A2A Agent list, click the "Add External Agent" button +2. Select the "URL Discovery" tab +3. Fill in the Agent Card URL address, for example: `https://example.com/.well-known/agent.json` +4. Click the "Discover" button; the system will automatically retrieve the agent's related information +5. After successful discovery, you can view the agent's name, description, capabilities and other information +6. Click "Add to List" to complete the addition + +> 💡 **Tip**: The Agent Card is an Agent description file that complies with the A2A 1.0 specification, containing the agent's name, description, calling address, capabilities and other information. + +##### Discover Agent via Nacos + +If your agent is registered with the Nacos service discovery platform, you can use the Nacos discovery method:
- +
+1. In the External A2A Agent list, click the "Add External Agent" button +2. Select the "Nacos Discovery" tab +3. For first-time use, you need to configure the Nacos connection information: + - **Nacos Server Address**: Fill in the Nacos server address, such as `http://127.0.0.1:8848` + - **Namespace ID**: Fill in the Nacos namespace ID (optional) + - **Group Name**: Fill in the service group name, default is `DEFAULT_GROUP` + - **Username/Password**: Fill in the Nacos access credentials (optional) +4. Click "Save Configuration" to save the Nacos connection information +5. Fill in the Agent service name to scan +6. Click the "Scan" button; the system will obtain matching Agent information from Nacos +7. The scan results will list all matching Agents. You can select the agents you need and add them to the list + +> ⚠️ **Note**: Make sure the Nacos service is running properly and the target Agent is correctly registered with Nacos. + +##### Manage Discovered External Agents + +In the External A2A Agent list, you can view and manage all discovered external agents: + +
+ +
+ +1. **View Agent Details**: Click on the agent card to view its complete information, including name, description, URL, capability list, etc. +2. **Test Agent**: Click the "Test" button to send a test message to the agent and verify if it is working properly +3. **Chat with Agent**: Click the "Chat" button to open a chat window and interact with the agent in real time +4. **Configure Calling Protocol**: Click the "Protocol Configuration" button to select the calling protocol for this agent: + - **HTTP + JSON**: Use REST API style calls + - **JSON-RPC**: Use JSON-RPC protocol calls +5. **Refresh Agent Information**: If the agent information changes, click the "Refresh" button to re-fetch the latest Agent Card +6. **Remove Agent**: Click the "Remove" button to delete the agent from the discovered list + +> 💡 **Use Cases**: +> - Quickly integrate known third-party agent services through URL discovery +> - Batch integrate all agents from the same service registry through Nacos discovery +> - Configure protocols to meet the requirements of different agent service providers + ### 🛠️ Select Agent Tools Agents can use various tools to complete tasks, such as knowledge base search, file parsing, image parsing, email sending/receiving, file management, and other local tools. They can also integrate third-party MCP tools or custom tools. @@ -60,6 +131,8 @@ Agents can use various tools to complete tasks, such as knowledge base search, f > 2. Please select the `analyze_text_file` tool to enable the parsing function for document and text files. > 3. Please select the `analyze_image` tool to enable the parsing function for image files. > +> ⚠️ **Embedding Model Configuration**: When using the `knowledge_base_search` tool, ensure that the knowledge base has an embedding model configured. For existing knowledge bases, the system will prompt you to select an embedding model. Make sure to select **the same embedding model used when creating the knowledge base**. If the selected model differs from the one used during knowledge base creation, it may cause search failures or inaccurate results. +> > 📚 Want to learn about all the built-in local tools available in the system? Please refer to [Local Tools Overview](./local-tools/index.md). ### 🔌 Add MCP Tools @@ -108,6 +181,39 @@ You can add MCP services to Nexent in the following two ways: Many third-party services such as [ModelScope](https://www.modelscope.cn/mcp) provide MCP services, which you can quickly integrate and use. You can also develop your own MCP services and connect them to Nexent; see [MCP Tool Development](../backend/tools/mcp). +**3️⃣ Convert Stock API to MCP Service** + +🔔 This method is suitable for quickly converting existing REST API endpoints into MCP tools without additional development, allowing agents to call existing API capabilities: + +>1. In the MCP Config module, select **"API to MCP"** as the access type +> +>2. Fill in the API basic information in the input box below: +> - **Service Name**: Display name for the MCP service +> - **OpenAPI JSON**: OpenAPI 3.x specification in JSON format +> - **Base Service URL**: Base address of the API service (supports http/https) +> +>3. Click the **+ Add** button in the lower right corner to complete the MCP service conversion + +
+ +
+ +>4. After conversion, you can view all externally converted MCP tools in the **Outer APIs** tab + +
+ +
+ +
+ +
+ +>💡 **Use Cases**: +>- Quickly integrate internal enterprise REST API endpoints +>- Convert third-party service HTTP APIs into MCP tools +>- Generate tools directly from OpenAPI specifications without writing MCP Server code + + ### ⚙️ Custom Tools You can refer to the following guides to develop your own tools and integrate them into Nexent to enrich agent capabilities: @@ -129,7 +235,7 @@ Nexent provides a "Tool Testing" capability for all types of tools—whether the - The test `query`, such as "benefits of vitamin C" - The search `search_mode` (default is `hybrid`) - The target index list `index_names`, such as `["Medical", "Vitamin Encyclopedia"]` - - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page + - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page 6. After entering the parameters, click "Execute Test" to start the test and view the test results below
@@ -181,6 +287,134 @@ After completing the initial agent configuration, you can debug the agent and fi After successful debugging, click the "Save" button in the lower right corner, and the agent will be saved and appear in the agent list. +## 📋 Version Management + +Nexent supports agent version management. You can save different versions of agent configurations during the debugging process. + +Once the agent configuration is verified, you can publish the agent. After publishing, the agent will be visible in the Agent Space and Start Chat pages. + +![Version Management 1](./assets/agent-development/version_management_1.png) + +If you need to rollback to a previous version, click the "Rollback" button on the version management page. + +![Version Management 2](./assets/agent-development/version_management_2.png) + +### 🚀 Publish as A2A Agent + +Nexent supports exposing published agents as A2A Agents for external systems to call. When publishing a version, you can check the "Publish as A2A Agent" option to register the current agent as an A2A 1.0 compliant Agent. + +
+ +
+ +After successful publishing, the system will display the A2A Agent's call information: + +
+ +
+ +| Field | Description | +|-------|-------------| +| **Endpoint ID** | Unique identifier for the A2A Agent | +| **Agent Card URL** | Agent discovery endpoint; external systems use this address to retrieve Agent descriptions | +| **Protocol Version** | A2A protocol version; currently 1.0 | +| **REST Endpoints** | REST-style API endpoints | +| **JSON-RPC Endpoint** | JSON-RPC 2.0 protocol calling endpoint | + +#### Calling Methods + +The published A2A Agent supports the following two calling protocols: + +##### REST API + +```bash +# Get Agent Card (for Agent discovery) +GET /nb/a2a/{endpoint_id}/.well-known/agent-card.json + +# Send synchronous message +POST /nb/a2a/{endpoint_id}/message:send +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "Please help me complete a task" + } +} + +# Send streaming message (SSE) +POST /nb/a2a/{endpoint_id}/message:stream +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "Please help me complete a task" + } +} + +# Get task status +GET /nb/a2a/{endpoint_id}/tasks/{task_id} +``` + +##### JSON-RPC 2.0 + +```bash +POST /nb/a2a/{endpoint_id}/v1 +Content-Type: application/json + +# Send synchronous message +{ + "jsonrpc": "2.0", + "method": "SendMessage", + "params": { + "message": { + "role": "user", + "content": "Please help me complete a task" + } + }, + "id": 1 +} + +# Send streaming message +{ + "jsonrpc": "2.0", + "method": "SendStreamingMessage", + "params": { + "message": { + "role": "user", + "content": "Please help me complete a task" + } + }, + "id": 2 +} + +# Get task status +{ + "jsonrpc": "2.0", + "method": "GetTask", + "params": { + "taskId": "task_abc123" + }, + "id": 3 +} +``` + +> 💡 **Tips**: +> - For local development, replace the `/nb/a2a` prefix with `http://localhost:5013/nb/a2a` +> - For production environments, replace the prefix with your server domain name or public IP address + +> ⚠️ **Notes**: +> - Calling A2A Agents requires carrying valid authentication information in the request headers +> - Agent Card information is cached with a refresh interval of 1 hour +> - If you need to update Agent information, you need to republish the agent version + +When an agent is published as an A2A-compliant Agent, users can view the detailed A2A Agent calling information by clicking the button shown below in the agent list: + +
+ +
+ ## 📋 Manage Agents In the agent list on the left, you can perform the following operations on existing agents: diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg new file mode 100644 index 000000000..399af1c56 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg new file mode 100644 index 000000000..5c523f7b1 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg new file mode 100644 index 000000000..4c42104ec Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg new file mode 100644 index 000000000..fdfa2e826 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg new file mode 100644 index 000000000..5c523f7b1 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg new file mode 100644 index 000000000..4632206fb Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png new file mode 100644 index 000000000..2cce2a44a Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png differ diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png new file mode 100644 index 000000000..12e9358c5 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png differ diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png new file mode 100644 index 000000000..4221b41f5 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png differ diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg new file mode 100644 index 000000000..fdfa2e826 Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg differ diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png deleted file mode 100644 index 7f47ba1a2..000000000 Binary files a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png and /dev/null differ diff --git a/doc/docs/en/user-guide/knowledge-base.md b/doc/docs/en/user-guide/knowledge-base.md index e5e5714ff..05456e5fa 100644 --- a/doc/docs/en/user-guide/knowledge-base.md +++ b/doc/docs/en/user-guide/knowledge-base.md @@ -26,12 +26,14 @@ Create and manage knowledge bases, upload documents, and generate summaries. Kno ### Supported File Formats Nexent supports multiple file formats, including: -- **Text:** .txt, .md +- **Text:** .txt, .md, .csv, .json - **PDF:** .pdf - **Word:** .docx - **PowerPoint:** .pptx +- **EPUB:** .epub - **Excel:** .xlsx - **Data files:** .csv +- **Web content:** .html, .xml ## 📊 Knowledge Base Summary diff --git a/doc/docs/en/user-guide/skills.md b/doc/docs/en/user-guide/skills.md new file mode 100644 index 000000000..0cdc2a288 --- /dev/null +++ b/doc/docs/en/user-guide/skills.md @@ -0,0 +1,572 @@ +--- +title: Skill Management +--- + +# Skill Management + +A Skill is a core mechanism in Nexent for extending agent capabilities. Each skill packages multiple tools with usage documentation into a reusable unit of capability, enabling agents to handle complex tasks like assembling building blocks — without consuming excessive context space. + +## Table of Contents + +- [Skills vs. Tools](#-skills-vs-tools): Understanding the core concepts +- [Using Skills](#-using-skills): How to use skills in agent development +- [Skill Management](#-skill-management): Create, edit, import, and export skills +- [Skill Upload Guide](#-skill-upload-guide): SKILL.md format, ZIP structure, special tags, and writing standards +- [NL-to-Skill](#-nl-to-skill): Automatically generate skills from natural language descriptions +- [Official Skills Overview](#-official-skills-overview): Built-in skills and their capabilities + +## The Relationship Between Skills and Tools + +In Nexent, **Tools** and **Skills** are two distinct layers. Understanding their differences helps you configure agent capabilities more effectively. + +A **Tool** is a single atomic operation the agent can call, such as `read_file` or `tavily_search`. When a tool is enabled for an agent, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for the current conversation, the LLM still consumes context tokens to "see" it. + +A **Skill** bundles the capabilities of multiple tools into a complete workflow, complete with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance. Based on the user's actual needs, it decides whether to activate a skill. Only when activated does the system load the corresponding toolset — effectively saving Token consumption. + +| Dimension | Tool | Skill | +|-----------|------|-------| +| Granularity | Single atomic operation | Bundle of multiple tools + configuration + documentation | +| Token consumption | Occupies context on every turn | Loaded only when activated | +| Parameters | Fixed parameter schema | Customizable parameter templates | +| Versioning | No version management | Supports draft/published versions | +| Distribution | Code-level | ZIP package distribution, plug-and-play | + +**Analogy**: Tools are individual items like a screwdriver, hammer, or saw. A Skill is a toolbox — with tools pre-matched for a work scenario and accompanied by usage instructions. Open the right toolbox for the task at hand. + +## Using Skills + +### Configuring Skills for an Agent + +1. Open the **[Agent Development](./agent-development)** page +2. On the "Select Tools" tab, find the **Skills** group +3. Click a skill name to select it; click again to deselect +4. After selecting a skill, click the ⚙️ button next to it to configure skill parameters +5. Save the agent configuration + +
+ +
+ +> 💡 **Tip**: If a skill has required parameters that are not configured, a guided parameter-filling prompt will appear upon selection. + +### Skill Parameters + +Each skill's parameter definitions come from the `config/schema.yaml` file in the skill package. The configuration interface auto-generates a parameter form based on the schema, including: + +- **Parameter name and description** (bilingual: English and Chinese) +- **Required/optional markers** +- **Default values** +- **Parameter types** (string, number, boolean, array, object) +- **YAML comment auto-mapped tooltips** + +### Skill Versions + +Each skill supports multi-version management: + +- **Draft version (version=0)**: Development and debugging stage; changes take effect immediately +- **Published version (version>=1)**: Production use; parameters are locked + +When configuring the same skill for different agents, you can set different parameter values independently. + +## Skill Management + +### Viewing Installed Skills + +The "Select Tools" skill group displays all installed skills, including: +- Official skills (`official` source) +- Custom skills (`custom` source) + +### Creating Custom Skills + +Nexent supports two ways to create custom skills: uploading a skill package file, or generating one automatically from a natural language description. + +#### Method 1: Upload SKILL.md or ZIP + +1. Go to the skill configuration interface +2. Click the "Upload Skill" button +3. Select a `SKILL.md` file (single file) or a `.zip` package (complete skill package) +4. The system automatically parses and creates the skill + +#### Method 2: NL-to-Skill Natural Language Creation + +Click the **"NL Create Skill"** button on the skill management page. See the [NL-to-Skill](#-nl-to-skill) section below for details. + +### Editing Skills + +1. Find the target skill in the skill list +2. Click the skill card to enter the edit page +3. Modify the skill name, description, tags, parameter configuration, etc. +4. Save changes + +### Importing/Exporting Skills + +- **Export**: Click "Export" on the skill detail page to download as a JSON configuration file +- **Import**: Click "Import Skill" on the Agent Development page to upload a JSON configuration file + +> ⚠️ **Note**: When importing skills containing knowledge base tools (such as `knowledge_base_search`), these tools will only search **knowledge bases that the currently logged-in user is permitted to access in this environment**. The original skill's knowledge base configuration will not be automatically inherited. + +## Skill Upload Guide + +### Skill Package Structure + +A skill can be a single file or a ZIP package containing multiple files: + +``` +skill-name/ +├── SKILL.md # Skill definition file (required) +├── config/ +│ ├── config.yaml # Default parameter values +│ └── schema.yaml # Parameter types and descriptions +├── scripts/ +│ └── *.py # Python scripts +├── examples.md # Usage examples +└── assets/ # Static assets +``` + +### SKILL.md Format in Detail + +`SKILL.md` is the core file of a skill, consisting of a YAML frontmatter section and a body section. + +**YAML Frontmatter (required)** + +The file must start with YAML frontmatter: + +```yaml +--- +name: skill-name +description: | + A description of what this skill does and when to use it. + Write in third person. +tags: + - tag1 + - tag2 +--- +``` + +| Field | Required | Description | Example | +|-------|----------|-------------|---------| +| `name` | Yes | Skill name; English only, lowercase, hyphenated | `github-repo-analyzer` | +| `description` | Yes | Skill function description; 1-3 sentences, include use case | `This skill analyzes GitHub repositories and extracts key metrics` | +| `tags` | No | Skill tag list for categorization and search | `["code", "github", "analysis"]` | +| `allowed-tools` | No | List of allowed tools (all available by default) | `[file_read, web_search]` | +| `always` | No | Whether to auto-activate on every turn (default: false) | `false` | + +**Body (optional)** + +Below the frontmatter, you can write Markdown content including usage instructions, best practices, example code, and more. + +### Two Skill Types + +Based on their purpose, skills fall into two categories with different writing styles: + +**Tool Skills**: Used to expose tool capabilities. The body should include tool parameter descriptions, usage examples, return formats, and error handling. + +**Agent Skills**: Used to teach the agent how to perform a complex task. The body should include workflow instructions, domain knowledge, boundary conditions, and best practices. + +### config/schema.yaml: Defining Parameter Forms + +If a skill requires user-supplied parameters, create a `config/schema.yaml` file. The system will auto-generate a parameter configuration form in the frontend based on this file. + +```yaml +param_name: + type: string | number | boolean | array | object + required: true | false + default: + description: "English description of the parameter" + description_zh: "Chinese description of the parameter" +``` + +**Supported types**: `string`, `number`, `boolean`, `array`, `object` + +**Complete example**: + +```yaml +query: + type: string + required: true + description: "Search query string" + description_zh: "Search keyword" + default: "" + +top_k: + type: number + required: false + description: "Number of results to return" + description_zh: "Number of returned results" + default: 3 + +enable_rerank: + type: boolean + required: false + description: "Enable result reranking" + description_zh: "Whether to enable result reranking" + default: false +``` + +### config/config.yaml: Setting Parameter Defaults + +If you want certain parameters to have default values, create `config/config.yaml`: + +```yaml +# Initial workspace path +init_path: "/mnt/nexent" + +# Maximum number of results +top_k: 5 +``` + +### Special Tags + +You can use the following special tags in the SKILL.md body: + +#### ``: Lazy-loading Example Files + +Use the `` tag to reference external files. The referenced file is loaded only when needed, keeping the main `SKILL.md` file lightweight. + +```markdown +## Example Reference + +> **Note**: Only load the reference example file when the default Usage examples cannot meet your needs. + + +``` + +#### ``: Declaring Bundled Scripts + +If the skill package contains Python or Shell scripts, declare them in `SKILL.md`: + +```markdown + +``` + +#### ``: Displaying Executable Code Examples + +Use the `` tag to wrap executable code examples (usually Python code): + +```markdown + +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py", "--verbose": True} +) +print(result) + +``` + +### Helper Functions + +In agent skill bodies and examples, you can use the following functions: + +**`run_skill_script(skill_name, script_path, params)`**: Execute a script bundled in the skill package + +```python +# Execute a Python script +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py"} +) + +# Execute a Shell script +result = run_skill_script( + "database-migration", + "scripts/migrate.sh", + {"--direction": "up", "--steps": 1} +) +``` + +**`read_skill_md(skill_name, files)`**: Read files from the skill package + +```python +# By default, only reads SKILL.md (referenced files are not auto-included) +content = read_skill_md("my-skill") + +# Explicitly specify which files to read +full_content = read_skill_md("my-skill", [ + "SKILL.md", + "reference/api-reference.md" +]) +``` + +### Writing Standards and Best Practices + +**SKILL.md Writing Standards**: + +1. **Be specific**: Explain when to use the skill, not just what it does + - ✓ "Used when you need to analyze GitHub repository popularity metrics" + - ✗ "GitHub search function" + +2. **Avoid time-sensitive information**: Do not include specific dates, version numbers, or other content that will become outdated + +3. **Stay concise**: Keep the `SKILL.md` body under 500 lines. Use `` for complex content that can be lazy-loaded + +4. **Path format**: Always use forward slashes `/`, even on Windows + - ✓ `src/services/payment_service.py` + - ✗ `src\services\payment_service.py` + +5. **Consistent parameter naming**: Use the same terminology and naming style throughout + +6. **Include boundary conditions**: Explain the skill's scope and limitations + +**Parameter Description Best Practices**: + +```yaml +# ✓ Good: Clearly specify purpose and format +query: + type: string + required: true + description: "GitHub repository owner/name or full URL" + description_zh: "GitHub repository in owner/name format or full URL" + +# ✗ Bad: Too vague +query: + type: string + required: true + description: "Search query" + description_zh: "Query" +``` + +**Code Example Best Practices**: + +- Provide at least 2 different-scenario examples for each tool +- Include common parameter combinations in examples +- Demonstrate both successful calls and common error handling + +### Learning from Existing Skills + +The system includes several complete skill reference examples in `test_skill_examples/official-skills/`: + +| Skill Name | Reference Value | +|-----------|-----------------| +| `create-file-directory` | Standard writing for tool skills, with complete parameter tables, usage examples, and error handling tables | +| `search-knowledge-base` | Parameter configuration for search skills, with complete `schema.yaml` and `config.yaml` examples | +| `analyze-image` | Multimodal tool example with `` call format | +| `code_review_expert` | Agent skill reference with bundled scripts and `` tag usage | + +### FAQ + +**Q: Upload reports "SKILL.md not found"** + +Make sure the `SKILL.md` file is in the ZIP package's root directory, not inside a subfolder. + +**Q: Parameter form didn't generate correctly** + +Check that `config/schema.yaml` is formatted correctly. Ensure each field has both `type` and `description` fields. + +**Q: Skill description isn't taking effect** + +The skill description should be written in the YAML frontmatter's `description` field, not in the Markdown body section. Body content is not parsed as the skill description. + +## NL-to-Skill + +NL-to-Skill is an intelligent creation feature provided by Nexent. You simply describe a skill requirement in natural language, and the system automatically generates a complete skill package — including skill definition, parameter configuration, and even accompanying script code. The entire generation process is visible in real time, as if an AI assistant is writing code for you. + +In simple terms: + +> You say "I want a skill that can search GitHub repositories and extract Star counts," and the system automatically generates a complete, usable skill for you. + +### Quick Start + +#### Step 1: Describe Your Requirement + +In the input box, describe the skill you want in natural language. The clearer your description, the better the generated result. + +**Good examples**: +- "Create a skill that searches GitHub repositories by keywords and returns Star counts, descriptions, and links" +- "Create a skill that reads an Excel file, calculates statistics for each column, and generates a chart" +- "Create a skill that extracts order numbers, amounts, and dates from emails and compiles them into a table" + +**Bad examples**: +- "Help me make a chat skill" (too vague) +- "Search tool" (lacks specific capability description) + +#### Step 2: Watch the Generation Process + +After clicking "Generate," the page displays the AI's thinking and writing process in real time: +- See the AI analyzing your requirement +- See it writing the skill definition file +- See it planning the parameter structure + +This process is like watching AI write code live. You can click "Stop" at any time to interrupt. + +#### Step 3: Preview and Save + +After generation completes, the system displays the complete skill content: +- Skill name and description +- Parameter list (what each parameter is, whether required) +- Usage examples + +Check the preview carefully: +- To make adjustments, click "Edit" to fine-tune +- If it meets your expectations, click "Save" to add the skill to your skill library + +### Writing Tips + +#### How to Write a Good Skill Description + +**1. Clarify inputs and outputs** + +Tell the system what information the skill needs and what it will return. + +``` +✓ "Input a GitHub repository address; return the repository name, Star count, Fork count, and last update time" +✗ "Search GitHub" (too vague) +``` + +**2. Explain the use case** + +Help the AI understand in what situations this skill would be used. + +``` +✓ "Used to quickly query the popularity of open-source projects and assist with technical selection decisions" +✗ "Get data" (no context) +``` + +**3. Describe boundary conditions** + +If there are special processing logic or limitations, mention them. + +``` +✓ "If the repository doesn't exist, return a friendly message instead of an error" +✓ "Skip invalid image URLs and log them" +``` + +**4. Explicitly request examples** + +If the skill has complex usage scenarios with high accuracy requirements, explicitly request detailed examples. + +``` +✓ "Generate comprehensive and detailed usage examples" +``` + +#### Usage Scenario Examples + +| Scenario | Description Example | +|---------|-------------------| +| **Data collection** | "Search Zhihu for Q&A related to the keywords and extract summaries of the highest-liked answers" | +| **File processing** | "Upload a CSV file; automatically calculate statistics for each column and generate a line chart" | +| **API encapsulation** | "Create a skill that calls a weather API and returns a three-day forecast" | +| **Multi-tool combination** | "Input a product link; automatically compare prices (calling multiple e-commerce searches) and return the lowest-price link" | +| **Data cleaning** | "Read a messy text block; extract emails, phone numbers, and dates, and format the output" | + +### What You Can Do During Generation + +#### Real-time Preview + +During generation, skill content progressively appears in the preview area: +- `SKILL.md` content: skill definition, description, tags +- `examples.md`: skill usage examples +- `scripts/*.py`: tool scripts (in complex mode) + +#### Stop Anytime + +If the generation direction deviates from expectations: +- Click the "Stop" button; the AI immediately stops +- Existing generated results are preserved; you can review or discard them + +#### Multiple Attempts + +If the first generation result is unsatisfactory: +- Directly add more requirement details; modify based on the existing result +- Or manually adjust in the preview +- If you want to start completely fresh, click the "trash" icon in the upper right corner to clear all skill content + +### Limitations and Notes + +#### Model Capability Affects Quality + +NL-to-Skill uses the LLM model configured for your tenant to generate skills. The model's capability directly determines the generation quality: +- Smarter models accurately understand requirements and generate well-structured, easy-to-understand skills +- Weaker models may produce incomplete or misleading content, affecting agent efficiency and accuracy + +If the generation result is unsatisfactory, try: +1. Simplify the requirement description +2. Switch to a smarter, more capable model +3. Create in steps (make a simple version first, then manually expand) + +#### Token Consumption + +Complex skill generation consumes more tokens: +- **Simple mode**: Usually consumes less; suitable for quick validation +- **Complex mode**: Consumes more; suitable for formally creating complete skills + +It is recommended to first test the idea in simple mode, then use complex mode for formal creation after confirming feasibility. + +#### Not All Requirements Can Be Realized + +NL-to-Skill excels at generating skills for: +- Single tool wrapping (e.g., encapsulating a search capability) +- Simple multi-tool chaining (e.g., search → read → summarize) +- Common data processing flows (e.g., file format conversion, data extraction) + +The following types of skills may be beyond its capabilities: +- Requiring external APIs that are not integrated +- Involving complex state management or concurrency logic +- Requiring access to underlying platform interfaces that are not open + +When encountering requirements that cannot be fulfilled, the system will provide a prompt. You can consider creating manually or contacting technical support. + +#### Modifying Skills + +In the NL-to-Skill interface, you can select an existing skill. After selecting, the skill information loads automatically. You can then use natural language to attempt updating the skill in the left dialog. + +If the skill name you create conflicts with an existing skill, Nexent will automatically switch from skill creation mode to skill update mode. All content will overwrite the original skill. + +## Official Skills Overview + +### File Operations + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `read-file` | Read file content and metadata within the workspace | `read_file` | +| `create-file-directory` | Create files or directories | `create_file`, `create_directory` | +| `delete-file-directory` | Delete files or directories (irreversible) | `delete_file`, `delete_directory` | +| `move-file-directory` | Move or rename files/directories | `move_item` | +| `list-directory` | List directory structure in a tree view | `list_directory` | + +### Knowledge Base Search + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `search-knowledge-base` | Local knowledge base semantic search | `knowledge_base_search` | +| `search-dify` | Dify knowledge base search (supports semantic / keyword / full_text / hybrid modes) | `dify_search` | +| `search-idata` | iData knowledge base search | `idata_search` | +| `search-datamate` | DataMate knowledge base search (with similarity threshold control) | `datamate_search` | + +### Web Search + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `search-web-tavily` | Tavily real-time web search | `tavily_search` | +| `search-web-linkup` | Linkup image and text mixed search | `linkup_search` | +| `search-web-exa` | Exa deep web search | `exa_search` | + +### Multimodal Analysis + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `analyze-image` | VLM-based image content analysis and Q&A | `analyze_image` | +| `analyze-text-file` | PDF/Word/Excel file content extraction and Q&A | `analyze_text_file` | + +### Communication and Remote Operations + +| Skill Name | Description | Main Tools | +|-----------|-------------|------------| +| `email-utils` | IMAP receive / SMTP send (supports HTML / CC / BCC) | `get_email`, `send_email` | +| `run-shell-ssh` | Persistent SSH session for remote command execution | `terminal` | + +## Security and Best Practices + +- **Knowledge base access control**: When importing skills containing knowledge base tools, actual search scope is limited by the current user's permissions +- **Web search**: Tavily / Linkup / Exa web search requires the corresponding API Key to be configured in the platform security settings first +- **Path security**: File operations within skill packages are limited to the skill directory scope and cannot access arbitrary system paths +- **Irreversible operations**: Delete and move operations are irreversible; confirm the target before executing +- **NL-to-Skill Token consumption**: Complex skill generation consumes more model tokens; it is recommended to test in simple mode first + +## Related References + +- [Agent Development](./agent-development) +- [Local Tools Overview](./local-tools/index) +- [MCP Tool Configuration](./mcp-tools) +- [Skills System Overview](../backend/skills/overview) diff --git a/doc/docs/en/user-guide/start-chat.md b/doc/docs/en/user-guide/start-chat.md index 9593cb6ec..5834521ea 100644 --- a/doc/docs/en/user-guide/start-chat.md +++ b/doc/docs/en/user-guide/start-chat.md @@ -79,8 +79,8 @@ You can upload files during a chat so the agent can reason over their content: - Or drag files directly into the chat area 2. **Supported File Formats** - - **Documents:** PDF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx) - - **Text:** Markdown (.md), Plain text (.txt) + - **Documents:** PDF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx), EPUB (.epub), HTML (.html), XML (.xml) + - **Text & Data:** Markdown (.md), Plain text (.txt), JSON (.json), CSV (.csv) - **Images:** JPG, PNG, GIF, and other common formats 3. **File Processing Flow** diff --git a/doc/docs/zh/backend/skills/index.md b/doc/docs/zh/backend/skills/index.md new file mode 100644 index 000000000..10b37bc90 --- /dev/null +++ b/doc/docs/zh/backend/skills/index.md @@ -0,0 +1,37 @@ +# 后端技能(Skill)文档 + +本节介绍 Nexent 后端基础设施中 Skills 技能系统的完整生态,包括技能定义、技能包结构与系统架构。 + +## 可用文档 + +### 概览与架构 +- [技能系统概览](./overview):技能类型、生命周期与版本管理 + +## 技能与工具的关系 + +在 Nexent 中,**工具(Tool)** 与 **技能(Skill)** 是两个不同层次的概念: + +- **工具**:智能体可调用的单个原子操作。启用后,LLM 的每次思考都会在工具列表中搜索——即使本次对话完全不需要某个工具,LLM 仍然会消耗上下文额度。 +- **技能**:通过 `SKILL.md` 将多个工具的能力组合为一个完整的工作流,并附带参数配置与使用文档。LLM 根据用户实际需求自行判断是否激活技能,激活后才加载对应工具集——有效节省 Token 消耗。 + +## 快速开始 + +1. **了解能力**:阅读 [技能系统概览](./overview) 了解已支持的技能类型 +2. **体验创建**:在 [技能管理](../../user-guide/skills) 页面体验 NL-to-Skill 创建 +3. **手动创建**:上传 `SKILL.md` 或 ZIP 包创建自定义技能 +4. **为智能体配置**:在智能体工具配置中勾选技能 + +## 相关参考 + +- [技能管理(用户指南)](../../user-guide/skills) +- [智能体开发指南](../../user-guide/agent-development) +- [本地工具概览](../../user-guide/local-tools/index) +- [SDK 工具开发规范](../../sdk/core/tools) +- [MCP 工具开发](../tools/mcp) +- [常见问题](../../quick-start/faq) + +## 获取帮助 + +- 查看 [常见问题](../../quick-start/faq) 了解常见技能使用问题 +- 在 [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions) 中提问 +- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题 diff --git a/doc/docs/zh/backend/skills/overview.md b/doc/docs/zh/backend/skills/overview.md new file mode 100644 index 000000000..f3d866f78 --- /dev/null +++ b/doc/docs/zh/backend/skills/overview.md @@ -0,0 +1,138 @@ +# 技能系统概览 + +技能(Skill)是 Nexent 为智能体扩展能力的方式。每个技能由以下部分组成: + +- **技能描述**:这个技能是做什么的、什么时候该用它 +- **工具组合**:一个或多个 nexent sdk方法或用户自定义工具的打包 +- **参数模板**:用户可为技能填写哪些参数 +- **使用示例**:这个技能通常怎么用 + +与直接选择一个一个工具相比,技能让复杂能力的配置变得简单——只需安装一个技能包,无需分别配置每个工具。 + +## 技能包结构 + +技能包可以是单个 `SKILL.md` 文件,也可以是包含多个文件的 ZIP 包: + +``` +skill-name/ +├── SKILL.md # 技能定义文件(必需) +├── config/ +│ ├── config.yaml # 参数默认值(可选) +│ └── schema.yaml # 参数类型与说明(可选) +├── scripts/ +│ └── *.py # Python 脚本(可选) +├── examples.md # 使用示例(可选) +└── assets/ # 静态资源(可选) +``` + +### SKILL.md 的结构 + +每个技能必须有一个 `SKILL.md` 文件,分为两部分: + +**第一部分:YAML 元数据(必须)** + +```yaml +--- +name: skill-name +description: | + 一段描述,说明这个技能是做什么的、什么时候该用它。 + 建议用第三人称书写,如:"这个技能用于..." +tags: + - tag1 + - tag2 +--- +``` + +**第二部分:技能正文** + +元数据下方可以继续写 Markdown 内容,包括: +- 技能的详细说明与使用指南 +- 工具调用方式的示例代码 +- 错误处理说明 +- 使用限制与注意事项 + +### 两种技能类型 + +根据用途,技能分为两类: + +**工具类技能**:用于暴露一个或多个 Nexent sdk方法的能力,包含工具的参数说明、调用示例、返回格式、错误处理等。用户配置好参数后,智能体即可调用这些工具。 + +**智能体类技能**:用于教智能体如何执行一个复杂任务,包含工作流程说明、领域知识、最佳实践,有时附带辅助脚本。这类技能的正文会包含详细的步骤指引。 + +## 官方技能一览 + +### 文件操作类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `read-file` | 读取工作空间内文件内容与元信息 | +| `create-file-directory` | 创建文件或目录 | +| `delete-file-directory` | 删除文件或目录 | +| `move-file-directory` | 移动或重命名文件/目录 | +| `list-directory` | 树形列出目录结构 | + +### 知识库搜索类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `search-knowledge-base` | 本地知识库语义检索(支持 hybrid / accurate / semantic 模式) | +| `search-dify` | Dify 知识库检索 | +| `search-idata` | iData 知识库检索 | +| `search-datamate` | DataMate 知识库检索(支持相似度阈值控制) | + +### 公网搜索类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `search-web-tavily` | Tavily 公网实时搜索 | +| `search-web-linkup` | Linkup 图文混合搜索 | +| `search-web-exa` | Exa 深度网页搜索 | + +### 多模态分析类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `analyze-image` | 基于 VLM 的图片内容分析问答 | +| `analyze-text-file` | PDF/Word/Excel 等文件内容提取与问答 | + +### 通信与远程操作类 + +| 技能名称 | 能力说明 | +|---------|---------| +| `email-utils` | IMAP 收件 / SMTP 发件(支持 HTML / CC / BCC) | +| `run-shell-ssh` | 持久化 SSH 会话远程执行命令 | + +## 技能生命周期 + +### 版本管理 + +每个技能支持两个版本状态: + +- **草稿版本(version=0)**:开发调试阶段,修改即时生效,适合反复调整 +- **已发布版本(version>=1)**:正式使用,参数锁定,防止误改 + +### 技能实例 + +同一个技能可以为不同的智能体配置不同的参数值,互不影响。 + +例如,搜索技能可以为"技术文档 Agent"配置只搜索技术知识库,为"客服 Agent"配置只搜索客服知识库。 + +### 常见操作流程 + +``` +创建技能 → 配置参数 → 为智能体选择技能 → 调试 → 发布 + ↓ + 修改草稿版本 +``` + +## 安全说明 + +- **路径隔离**:技能包内文件仅能在技能目录范围内访问 +- **参数校验**:schema.yaml 中定义的参数均经过前端表单校验 +- **权限控制**:技能实例按租户隔离,API 需携带认证 Token + +## 相关参考 + +- [技能管理(用户指南)](../../user-guide/skills) +- [智能体开发指南](../../user-guide/agent-development) +- [本地工具概览](../../user-guide/local-tools/index) diff --git a/doc/docs/zh/backend/tools/index.md b/doc/docs/zh/backend/tools/index.md index 94e1fe36e..88560fdcf 100644 --- a/doc/docs/zh/backend/tools/index.md +++ b/doc/docs/zh/backend/tools/index.md @@ -12,6 +12,10 @@ 模型上下文协议工具,用于标准化 AI 智能体通信。 → [MCP 工具开发](./mcp) +### Skills 技能系统 +通过自然语言或 ZIP 包创建可复用的技能包,为智能体赋予更加灵活的工具调用能力。 +→ [Skills 技能文档](../skills/index) + ## 快速开始 1. **选择工具类型**: LangChain 用于通用 AI 工作流,MCP 用于标准化智能体通信 @@ -28,4 +32,4 @@ - 查看我们的 [常见问题](../../quick-start/faq) 了解常见工具集成问题 - 加入我们的 [Discord 社区](https://discord.gg/tb5H3S3wyv) 获取实时支持 -- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题 \ No newline at end of file +- 查看 [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) 了解已知问题 diff --git a/doc/docs/zh/getting-started/features.md b/doc/docs/zh/getting-started/features.md index 15db67357..658a89e18 100644 --- a/doc/docs/zh/getting-started/features.md +++ b/doc/docs/zh/getting-started/features.md @@ -25,7 +25,7 @@ Nexent 支持 **Agent-to-Agent(A2A)** 通信协议,让多个智能体能 ## 📝 Skill 渐进式披露 -Nexent 引入了 **渐进式 Skill 披露**机制。当用户输入任务时,系统会根据当前上下文动态揭示最相关的 Skill 建议,帮助用户快速找到适合当前任务的工具和方法。这一机制让新用户能够渐进式地探索系统能力,同时不增加高级用户的操作复杂度。 +Nexent 引入了 **渐进式 Skill 披露**机制。当用户输入任务时,系统会根据当前上下文动态揭示最相关的 Skill 建议,帮助用户快速找到适合当前任务的工具和方法。这一机制能够防止上下文爆炸,高效利用上下文窗口。 ## 🗄️ 个人级知识库 diff --git a/doc/docs/zh/sdk/data-process.md b/doc/docs/zh/sdk/data-process.md index a887c8442..1f1c27fde 100644 --- a/doc/docs/zh/sdk/data-process.md +++ b/doc/docs/zh/sdk/data-process.md @@ -98,6 +98,9 @@ def file_process(self, - `.odt` - OpenDocument文本 - `.pptx` - PowerPoint 2007及更高版本 - `.ppt` - PowerPoint 97-2003版本 +- `.xml` - XML数据文件 +- `.json` - JSON数据文件 +- `.csv` - 逗号分隔值文件 ## 💡 使用示例 diff --git a/doc/docs/zh/user-guide/agent-development.md b/doc/docs/zh/user-guide/agent-development.md index 67d3c8311..a8cca4a33 100644 --- a/doc/docs/zh/user-guide/agent-development.md +++ b/doc/docs/zh/user-guide/agent-development.md @@ -31,15 +31,88 @@ ### 🤝 协作 Agent +协作智能体用于帮助当前智能体完成复杂任务。协作智能体的来源分为两类: + +- **内部 Agent**:平台已发布的智能体 +- **外部 A2A Agent**:通过 A2A 协议发现的第三方 Agent + 1. 点击"协作 Agent"页签下的加号,弹出可选择的智能体列表 -2. 在下拉列表中选择要添加的智能体 -3. 允许选择多个协作智能体 -4. 可点击 × 取消选择此智能体 +2. 智能体列表分为"内部 Agent"和"外部 A2A Agent"两个页签,您可以根据需要选择 +3. 在下拉列表中选择要添加的智能体 +4. 允许选择多个协作智能体 +5. 可点击 × 取消选择此智能体 + +
+ +
+ +#### 🌐 添加外部 A2A Agent + +Nexent 支持通过 A2A 协议与第三方 Agent 进行通信。您可以通过以下两种方式发现外部 A2A Agent: + +##### 通过 URL 发现 Agent + +如果您知道目标 Agent 的 Agent Card 地址,可以使用 URL 发现方式: + +
+ +
+ +1. 在外部 A2A Agent 列表中,点击"添加外部 Agent"按钮 +2. 选择"URL 发现"页签 +3. 填写 Agent Card URL 地址,例如:`https://example.com/.well-known/agent.json` +4. 点击"发现"按钮,系统会自动获取 Agent 的相关信息 +5. 发现成功后,可以查看 Agent 的名称、描述、能力等信息 +6. 点击"添加到列表"完成添加 + +> 💡 **提示**:Agent Card 是符合 A2A 1.0 规范的 Agent 描述文件,包含了 Agent 的名称、描述、调用地址、能力等信息。 + +##### 通过 Nacos 发现 Agent + +如果您的 Agent 注册在 Nacos 服务发现平台,可以使用 Nacos 发现方式:
- +
+1. 在外部 A2A Agent 列表中,点击"添加外部 Agent"按钮 +2. 选择"Nacos 发现"页签 +3. 首次使用时,需要先配置 Nacos 连接信息: + - **Nacos 服务器地址**:填写 Nacos 服务器地址,如 `http://127.0.0.1:8848` + - **命名空间 ID**:填写 Nacos 命名空间 ID(可选) + - **分组名**:填写服务分组名,默认为 `DEFAULT_GROUP` + - **用户名/密码**:填写 Nacos 访问凭证(可选) +4. 点击"保存配置"保存 Nacos 连接信息 +5. 填写要扫描的 Agent 服务名称 +6. 点击"扫描"按钮,系统会从 Nacos 中获取匹配的 Agent 信息 +7. 扫描结果会列出所有匹配的 Agent,可以选择需要的 Agent 添加到列表 + +> ⚠️ **注意**:确保 Nacos 服务正常运行,且目标 Agent 已正确注册到 Nacos。 + +##### 管理已发现的外部 Agent + +在外部 A2A Agent 列表中,您可以查看和管理所有已发现的外部 Agent: + + + +
+ +
+ +1. **查看 Agent 详情**:点击 Agent 卡片,可以查看其完整信息,包括名称、描述、URL、能力列表等 +2. **测试 Agent**:点击"测试"按钮,可以向该 Agent 发送测试消息,验证其是否正常工作 +3. **与 Agent 对话**:点击"对话"按钮,可以打开对话窗口,与该 Agent 进行实时交互 +4. **配置调用协议**:点击"协议配置"按钮,可以选择该 Agent 的调用协议: + - **HTTP + JSON**:使用 REST API 风格调用 + - **JSON-RPC**:使用 JSON-RPC 协议调用 +5. **刷新 Agent 信息**:如果 Agent 信息发生变化,可以点击"刷新"按钮重新获取最新的 Agent Card +6. **移除 Agent**:点击"移除"按钮,可以将该 Agent 从已发现列表中删除 + +> 💡 **使用场景**: +> - 通过 URL 发现快速接入已知的第三方 Agent 服务 +> - 通过 Nacos 发现批量接入同一服务注册中心的所有 Agent +> - 配置协议以兼容不同 Agent 服务提供商的要求 + ### 🛠️ 选择智能体的工具 智能体可以使用各种工具来完成任务,如知识库检索、文件解析、图片解析、收发邮件、文件管理等本地工具,也可接入第三方 MCP 工具,或自定义工具。 @@ -60,7 +133,10 @@ > 2. 请选择 `analyze_text_file` 工具,启用文档类、文本类文件的解析功能。 > 3. 请选择 `analyze_image` 工具,启用图片类文件的解析功能。 > +> ⚠️ **向量化模型配置**:使用 `knowledge_base_search` 工具时,需要确保知识库已配置向量化模型。对于存量知识库,系统会提示选择向量化模型,请务必选择**创建该知识库时使用的向量化模型**。若选择的模型与知识库创建时使用的模型不一致,可能导致检索失败或结果不准确。 +> > 📚 想了解系统已经内置的所有本地工具能力?请参阅 [本地工具概览](./local-tools/index.md)。 +> 📚 想了解技能能力?请参阅 [技能管理](./skills.md)。 ### 🔌 添加 MCP 工具 @@ -108,6 +184,40 @@ 有许多第三方服务如 [ModelScope](https://www.modelscope.cn/mcp) 提供了 MCP 服务,您可以快速接入使用。 您也可以自行开发 MCP 服务并接入 Nexent 使用,参考文档 [MCP 工具开发](../backend/tools/mcp)。 +**3️⃣ 存量 API 转换为 MCP 服务** + +🔔 该方法适用于将已有的 REST API 接口快速转换为 MCP 工具,无需额外开发即可让智能体调用现有 API 能力: + +>1. 在 MCP 配置模块选择 **"API 转换为 MCP"** 接入类型 +> +>2. 在下方的输入框中填写 API 基础信息: +> - **服务名称**:MCP 服务的展示名称 +> - **OpenAPI JSON**:OpenAPI 3.x 规范的 JSON 内容 +> - **基础服务 URL**:API 服务的基础地址(支持 http/https) +> +>3. 点击右下角 **+ 添加** 按钮,完成对应 MCP 服务的转换 + +
+ +
+ +> +>4. 转换完成后,可在 **Outer APIs** 页签下查看所有外部 API 转换的 MCP 工具 + +
+ +
+ +
+ +
+ +>💡 **使用场景**: +>- 快速接入企业内部的 REST API 接口 +>- 将第三方服务的 HTTP API 转换为 MCP 工具 +>- 无需编写 MCP Server 代码,直接通过 OpenAPI 规范生成工具 + + ### ⚙️ 自定义工具 您可参考以下指导文档,开发自己的工具,并接入 Nexent 使用,丰富智能体能力。 @@ -129,8 +239,8 @@ - 测试的 `query`,例如"维生素C的功效" - 检索的模式 `search_mode`(默认为 `hybrid`) - 目标检索的知识库列表 `index_names`,如 `["医疗", "维生素知识大全"]` - - 若不输入 `index_names`,则默认检索知识库页面所选中的全部知识库 - - 是否启用重排模型(默认为 `false`),启用后配置重排模型,实现对检索结果的重排优化 + - 若不输入 `index_names`,则默认检索知识库页面所选中的全部知识库 + - 是否启用重排模型(默认为 `false`),启用后配置重排模型,实现对检索结果的重排优化 6. 输入完成后点击"执行测试"开始测试,并在下方查看测试结果
@@ -172,7 +282,8 @@
-### 🐛 调试与保存 +## 🐛 调试与保存 + 在完成初步智能体配置后,您可以对智能体进行调试,根据调试结果微调提示词,持续提升智能体表现。 @@ -182,7 +293,7 @@ 调试成功后,可点击右下角"保存"按钮,此智能体将会被保存并出现在智能体列表中。 -### 🐛 版本管理 +## 🐛 版本管理 Nexent 支持智能体的版本管理,您可以在调试过程中,保存不同版本的智能体配置。 @@ -194,6 +305,121 @@ Nexent 支持智能体的版本管理,您可以在调试过程中,保存不 ![版本管理2](./assets/agent-development/version_management_2.png) +### 🚀 发布为 A2A Agent + +Nexent 支持将已发布的智能体作为 A2A Agent 暴露给外部系统调用。在发布版本时,您可以勾选"发布为 A2A Agent"选项,将当前智能体注册为符合 A2A 1.0 规范的 Agent。 + +
+ +
+ +发布成功后,系统会显示 A2A Agent 的调用信息,包括: + +
+ +
+ +| 信息项 | 说明 | +|--------|------| +| **Endpoint ID** | A2A Agent 的唯一标识符 | +| **Agent Card URL** | Agent 发现端点,外部系统通过此地址获取 Agent 描述 | +| **协议版本** | A2A 协议版本,当前为 1.0 | +| **REST 端点** | 基于 REST 风格的 API 端点 | +| **JSON-RPC 端点** | 基于 JSON-RPC 2.0 协议的调用端点 | + +#### 调用方式 + +发布后的 A2A Agent 支持以下两种调用协议: + +##### REST API + +```bash +# 获取 Agent Card(用于 Agent 发现) +GET /nb/a2a/{endpoint_id}/.well-known/agent-card.json + +# 发送同步消息 +POST /nb/a2a/{endpoint_id}/message:send +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } +} + +# 发送流式消息(SSE) +POST /nb/a2a/{endpoint_id}/message:stream +Content-Type: application/json + +{ + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } +} + +# 获取任务状态 +GET /nb/a2a/{endpoint_id}/tasks/{task_id} +``` + +##### JSON-RPC 2.0 + +```bash +POST /nb/a2a/{endpoint_id}/v1 +Content-Type: application/json + +# 发送同步消息 +{ + "jsonrpc": "2.0", + "method": "SendMessage", + "params": { + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } + }, + "id": 1 +} + +# 发送流式消息 +{ + "jsonrpc": "2.0", + "method": "SendStreamingMessage", + "params": { + "message": { + "role": "user", + "content": "请帮我完成某个任务" + } + }, + "id": 2 +} + +# 获取任务状态 +{ + "jsonrpc": "2.0", + "method": "GetTask", + "params": { + "taskId": "task_abc123" + }, + "id": 3 +} +``` + +> 💡 **提示**: +> - 本地开发时,请将路径前面的 `/nb/a2a` 部分替换为 `http://localhost:5013/nb/a2a` +> - 生产环境请将路径替换为您的服务器域名或公网 IP 地址 + +> ⚠️ **注意事项**: +> - 调用 A2A Agent 需要在请求头中携带有效的认证信息 +> - Agent Card 信息会被缓存,刷新间隔为 1 小时 +> - 如需更新 Agent 信息,需要重新发布智能体版本 + +当发布的Agent为符合A2A协议的Agent时,在智能体列表中,用户可以在智能体列表中点击下面这个按钮查看A2A Agent调用具体信息: + +
+ +
## 🔧 管理智能体 diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-detail.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-detail.jpg new file mode 100644 index 000000000..e0ce35f1f Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-detail.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-discovery-list.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-discovery-list.jpg new file mode 100644 index 000000000..0464ce760 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-discovery-list.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-find-detail.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-find-detail.jpg new file mode 100644 index 000000000..ed9912627 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-find-detail.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-nacos-discovery.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-nacos-discovery.jpg new file mode 100644 index 000000000..f1fba231d Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-nacos-discovery.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-published-as.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-published-as.jpg new file mode 100644 index 000000000..7bfc7d170 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-published-as.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/a2a-url-discovery.jpg b/doc/docs/zh/user-guide/assets/agent-development/a2a-url-discovery.jpg new file mode 100644 index 000000000..a6e244ff1 Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/a2a-url-discovery.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api.png b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api.png new file mode 100644 index 000000000..ed03af94f Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api.png differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_1.png b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_1.png new file mode 100644 index 000000000..4dda4579d Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_1.png differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_2.png b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_2.png new file mode 100644 index 000000000..faba05fec Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/add_mcp_from_api_2.png differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.jpg b/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.jpg new file mode 100644 index 000000000..ccb8a2f6b Binary files /dev/null and b/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.jpg differ diff --git a/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.png b/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.png deleted file mode 100644 index 719f9b6ac..000000000 Binary files a/doc/docs/zh/user-guide/assets/agent-development/set-collaboration.png and /dev/null differ diff --git a/doc/docs/zh/user-guide/knowledge-base.md b/doc/docs/zh/user-guide/knowledge-base.md index fa98eac62..b0ebb53f5 100644 --- a/doc/docs/zh/user-guide/knowledge-base.md +++ b/doc/docs/zh/user-guide/knowledge-base.md @@ -26,12 +26,14 @@ Nexent支持多种文件格式,包括: -- **文本**: .txt, .md文件 +- **文本**: .txt, .md, .json文件 - **PDF**: .pdf文件 - **Word**: .docx文件 - **PowerPoint**: .pptx文件 - **Excel**: .xlsx文件 +- **EPUB** .epub文件 - **数据文件**: .csv文件 +- **Web content**: .html, .xml文件 ## 📊 知识库总结 diff --git a/doc/docs/zh/user-guide/local-tools/index.md b/doc/docs/zh/user-guide/local-tools/index.md index ebd7de972..ceaac3f54 100644 --- a/doc/docs/zh/user-guide/local-tools/index.md +++ b/doc/docs/zh/user-guide/local-tools/index.md @@ -9,6 +9,7 @@ - [搜索工具](./search-tools):本地/DataMate/Dify 知识库检索与 Exa/Tavily/Linkup 公网搜索。 - [多模态工具](./multimodal-tools):文本文件与图片的下载、解析、模型分析。 - [终端工具](./terminal-tool):持久化 SSH 会话,远程执行命令。 +- [技能(Skills)](../skills):Nexent内置工具组合或自定义能力包,支持 NL 生成与版本管理。 ## ⚙️ 配置入口 @@ -21,4 +22,4 @@ - 路径类操作仅限工作空间范围,请使用相对路径。 - 公网搜索需先在平台安全配置中填写 API Key。 - 终端工具涉及远程主机,请确认网络与账号安全策略。 -- 删除、移动类操作不可恢复,执行前先确认目标。 \ No newline at end of file +- 删除、移动类操作不可恢复,执行前先确认目标。 diff --git a/doc/docs/zh/user-guide/model-management.md b/doc/docs/zh/user-guide/model-management.md index 46c1b25b4..c8f07c0c3 100644 --- a/doc/docs/zh/user-guide/model-management.md +++ b/doc/docs/zh/user-guide/model-management.md @@ -169,6 +169,14 @@ Nexent支持与ModelEngine平台的无缝对接
+#### 语音合成模型 +语音合成模型用于将文本内容即时转换为自然流畅的语音输出,使系统能够以接近真人的方式进行语音交互与反馈。通过低延迟、高拟真度的语音生成能力,确保用户在对话过程中获得连贯、自然的听觉体验。配置合适的实时语音合成模型,可以显著提升语音交互系统的表现力和用户体验。 +- 点击语音合成模型下拉框,从已添加的视觉语言模型中选择一个。 + +#### 语音识别模型 +语音识别模型用于将用户输入的语音内容实时转换为文本,实现对语音指令和自然语言的准确理解与解析。通过高精度的语音转写与噪声鲁棒能力,确保在复杂环境下依然能够稳定识别用户意图。配置合适的语音识别模型,可以显著提升语音交互系统的理解能力和整体响应效率。 +- 点击语音识别模型下拉框,从已添加的视觉语言模型中选择一个。 + ### ✅ 检查模型连通性 定期检查模型连通性是确保系统稳定运行的重要环节。通过连通性检查功能,您可以及时发现和解决模型连接问题,保证服务的连续性和可靠性。 @@ -224,18 +232,29 @@ Nexent 支持任何 **遵循OpenAI API规范** 的大语言模型供应商,包 使用与大语言模型相同的API Key,但模型URL一般会有所差异,一般以`/v1/rerank`为结尾。 #### 🎤 语音模型 -目前仅支持火山引擎语音,且需要在`.env`中进行配置 +目前支持阿里灵积和火山引擎语音模型,阿里灵积需配置与大语言模型相同的apikey,火山引擎模型需配置appid与token +**火山引擎** - **网站**: [volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech) - **免费额度**: 个人使用可用 - **特色**: 高质量中英文语音合成 -**开始使用**: +- **开始使用**: + + 1. 注册火山引擎账户 + 2. 访问语音技术服务 + 3. 创建应用并获取appid和token + 4. 在添加模型页面中配置 TTS/STT 设置 + +**阿里灵积** +- **网站**: [aliyun.com/benefit/scene/voice](https://www.aliyun.com/benefit/scene/voice) + +- **开始使用**: -1. 注册火山引擎账户 -2. 访问语音技术服务 -3. 创建应用并获取 API Key -4. 在环境中配置 TTS/STT 设置 + 1. 注册阿里云账户 + 2. 访问阿里千问实时语音技术服务 + 3. 创建应用并获取 API Key + 4. 在添加模型页面中配置 TTS/STT 设置 ## 💡 需要帮助 diff --git a/doc/docs/zh/user-guide/skills.md b/doc/docs/zh/user-guide/skills.md new file mode 100644 index 000000000..54d0f97bb --- /dev/null +++ b/doc/docs/zh/user-guide/skills.md @@ -0,0 +1,476 @@ +--- +title: 技能管理 +--- + +# 技能管理 + +技能(Skill)是 Nexent 为智能体扩展能力的核心机制。每个技能将多个工具与使用文档打包为一个可复用的能力单元,可以像搭积木一样为智能体赋予复杂的工作能力。 + +## 目录 + +- [技能与工具的关系](#-技能与工具的关系):理解技能的核心概念 +- [技能使用指南](#-技能使用指南):如何在智能体开发中使用技能 +- [技能管理](#-技能管理):创建、编辑、安装外部技能 +- [技能上传指南](#-技能上传指南):SKILL.md 格式、ZIP 结构、特殊标签与书写规范 +- [NL-to-Skill](#-nl-to-skill):通过自然语言描述自动生成技能 +- [官方技能一览](#-官方技能一览):预置技能及其能力说明 + +## 技能与工具的关系 + +在 Nexent 中,**工具(Tool)** 与 **技能(Skill)** 是两个不同层次的概念,理解它们的区别有助于更好地为智能体配置能力。 + +**工具**是智能体可调用的单个原子操作。为智能体启用工具时,LLM 的每次思考都会在工具列表中搜索——这意味着即使某个工具本次对话完全不需要,LLM 仍然会消耗上下文额度去"看到"它。 + +**技能**则通过 `SKILL.md` 将多个工具的能力组合为一个完整的工作流,并附带参数配置与使用文档。LLM 不需要预先"看到"所有工具,而是根据用户的实际需求,自行判断是否激活某个技能。激活后,系统才会加载对应的工具集——从而有效节省 Token 消耗。 + +| 维度 | 工具 | 技能 | +|------|------|------| +| 粒度 | 单个原子操作 | 多个工具 + 配置 + 文档的组合 | +| Token 消耗 | 每次对话都占用上下文 | 仅在激活时才加载 | +| 参数 | 固定参数 schema | 可自定义参数模板 | +| 分发 | 代码级 | ZIP 包分发,即插即用 | + +## 技能使用指南 + +### 为智能体配置技能 + +1. 打开 **[智能体开发](./agent-development)** 页面 +2. 在"选择智能体的工具"页签中,找到 **技能(Skills)** 分组 +3. 点击技能名称即可选中,再次点击取消选择 +4. 保存智能体配置 + +## 技能管理 + +### 查看已安装的技能 + +在"选择智能体的工具"技能分组中,系统会展示所有已安装的技能列表,包括: +- 官方技能 +- 自定义技能 + +### 创建自定义技能 + +Nexent 支持两种方式创建自定义技能:上传技能包文件,或通过自然语言描述自动生成。 + +#### 方式一:上传 SKILL.md 或 ZIP + +1. 进入技能配置界面 +2. 点击"上传技能"按钮 +3. 选择 `SKILL.md` 文件(单文件)或 `.zip` 压缩包(完整技能包) +4. 系统自动解析并创建技能 + +#### 方式二:NL-to-Skill 自然语言创建 + +在技能管理页面,点击"**NL 创建技能**"按钮即可进入。具体用法详见下方 [NL-to-Skill](#-nl-to-skill) 专区。 + +## 技能上传指南 + +### 技能包结构 + +技能包可以是单个文件,也可以是包含多个文件的 ZIP 包: + +``` +skill-name/ +├── SKILL.md # 技能定义文件(必需) +├── config/ +│ ├── config.yaml # 参数默认值 +│ └── schema.yaml # 参数类型与说明 +├── scripts/ +│ └── *.py # Python 脚本 +├── examples.md # 使用示例 +└── assets/ # 静态资源 +``` + +### SKILL.md 格式详解 + +`SKILL.md` 是技能的核心文件,分为 YAML 元数据区和正文两部分。 + +**YAML 元数据(必需)** + +文件顶部必须有 YAML frontmatter,格式如下: + +```yaml +--- +name: skill-name +description: | + 一段描述,说明这个技能是做什么的、什么时候该用它。 + 建议用第三人称书写。 +tags: + - tag1 + - tag2 +--- +``` + +| 字段 | 必填 | 说明 | 示例 | +|------|------|------|------| +| `name` | 是 | 技能名称,全英文、小写、单词间用连字符 | `github-repo-analyzer` | +| `description` | 是 | 技能功能描述,建议 1-3 句话,包含使用场景 | `这个技能用于分析 GitHub 仓库并提取关键指标` | +| `tags` | 否 | 技能标签列表,便于分类检索 | `["code", "github", "analysis"]` | + +**正文** + +元数据下方可以写 Markdown 正文,包含技能的使用说明、最佳实践、示例代码等。 + +### 两种技能类型 + +根据用途,技能分为两类,书写方式有所不同: + +**工具类技能**:用于暴露工具能力。正文应包含工具的参数说明、调用示例、返回格式、错误处理等。 + +**智能体类技能**:用于教智能体执行复杂任务。正文应包含工作流程、领域知识、边界条件、最佳实践等。 + +### config/schema.yaml:定义参数表单 + +如果技能需要用户填写参数,可以创建 `config/schema.yaml` 文件。系统会根据此文件在前端自动生成参数配置表单。 + +```yaml +param_name: + type: string | number | boolean | array | object + required: true | false + default: <默认值> + description: "参数的英文说明" + description_zh: "参数的中文说明" +``` + +**支持的类型**:`string`、`number`、`boolean`、`array`、`object` + +**完整示例**: + +```yaml +query: + type: string + required: true + description: "Search query string" + description_zh: "搜索关键词" + default: "" + +top_k: + type: number + required: false + description: "Number of results to return" + description_zh: "返回结果数量" + default: 3 + +enable_rerank: + type: boolean + required: false + description: "Enable result reranking" + description_zh: "是否启用结果重排序" + default: false +``` + +### config/config.yaml:设置参数默认值 + +如果希望某些参数有默认值,可以创建 `config/config.yaml`: + +```yaml +# Initial workspace path +init_path: "/mnt/nexent" + +# Maximum number of results +top_k: 5 +``` + +### 特殊标签 + +在 SKILL.md 正文中,可以使用以下特殊标签: + +#### ``:按需加载示例文件 + +使用 `` 标签引用外部文件,该文件仅在需要时才被加载,不会增加 SKILL.md 的主文件大小。 + +```markdown +## 示例参考 + + +``` + +#### ``:声明捆绑的脚本 + +如果技能包中包含 Python 或 Shell 脚本,需要在 SKILL.md 中声明: + +```markdown + +``` + +#### ``:展示可执行代码示例 + +使用 `` 标签包裹可执行的代码示例(通常为 Python 代码): + +```markdown + +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py", "--verbose": True} +) +print(result) + +``` + +### 辅助函数 + +在智能体类技能的正文和示例中,可以使用以下函数: + +**`run_skill_script(skill_name, script_path, params)`**:执行技能包中的脚本 + +```python +# 执行 Python 脚本 +result = run_skill_script( + "code-reviewer", + "scripts/analyze.py", + {"--target": "/path/to/file.py"} +) + +# 执行 Shell 脚本 +result = run_skill_script( + "database-migration", + "scripts/migrate.sh", + {"--direction": "up", "--steps": 1} +) +``` + +**`read_skill_md(skill_name, files)`**:读取技能包中的文件内容 + +```python +# 默认只读取 SKILL.md(如果存在引用文件,不会自动包含) +content = read_skill_md("my-skill") + +# 显式指定要读取的文件 +full_content = read_skill_md("my-skill", [ + "SKILL.md", + "reference/api-reference.md" +]) +``` + +### 书写规范与最佳实践 + +**SKILL.md 书写规范**: + +1. **描述要具体**:说明技能在什么场景下使用,而不是仅仅描述功能 + - ✓ "当用户需要分析 GitHub 仓库的流行度指标时使用" + - ✗ "GitHub 搜索功能" + +2. **避免时间敏感信息**:不要包含具体日期、版本号等会过期的内容 + +3. **保持简洁**:SKILL.md 正文建议控制在 500 行以内。复杂内容用 `` 按需加载 + +4. **路径格式**:始终使用正斜杠 `/`,即使在 Windows 下也如此 + - ✓ `src/services/payment_service.py` + - ✗ `src\services\payment_service.py` + +5. **参数命名一致**:全文统一使用相同的术语和命名风格 + +6. **包含边界条件**:说明技能的适用范围和限制 + +**参数描述最佳实践**: + +```yaml +# ✓ 好:明确说明用途和格式 +query: + type: string + required: true + description: "GitHub repository owner/name or full URL" + description_zh: "GitHub 仓库的 owner/name 格式或完整 URL" + +# ✗ 差:过于模糊 +query: + type: string + required: true + description: "Search query" + description_zh: "查询" +``` + +**代码示例最佳实践**: + +- 每个工具至少提供 2 个不同场景的示例 +- 示例中包含常见参数组合 +- 示例展示成功调用和常见错误处理 + +### 从现有技能学习 + +系统内置了多个完整技能的参考示例,您可以在 `test_skill_examples/official-skills/` 目录下找到它们: + +| 技能名 | 参考价值 | +|--------|---------| +| `create-file-directory` | 工具类技能的标准写法,包含完整参数表、调用示例、错误处理表 | +| `search-knowledge-base` | 搜索类技能的参数配置,包含 schema.yaml 和 config.yaml 的完整示例 | +| `analyze-image` | 多模态工具的示例,包含 `` 调用格式 | +| `code_review_expert` | 智能体类技能的参考,包含捆绑脚本和 `` 标签用法 | + +### 常见问题 + +**Q: 上传 ZIP 包时报错"缺少 SKILL.md"** + +确保 ZIP 包根目录下包含 `SKILL.md` 文件,而不是将其放在子文件夹中。 + +**Q: 技能描述不生效** + +技能描述应写在 YAML frontmatter 的 `description` 字段中,而非正文的 Markdown 部分。正文内容不会被解析为技能描述。 + +## NL-to-Skill + +NL-to-Skill 是 Nexent 提供的一项智能创建功能。您只需要用**自然语言描述**一个技能的需求,系统就能自动生成完整的技能包,包括技能定义、参数配置、甚至配套的脚本代码。整个生成过程实时可见,就像有一个 AI 助手在帮您写代码一样。 + +简单来说: + +> 您说"我想要一个能搜索 GitHub 仓库并提取 Star 数的技能",系统就自动为您生成一个完整可用的技能。 + +### 快速上手 + +#### 第一步:描述您的需求 + +在输入框中,用自然语言描述您想要的技能。描述越清晰,生成效果越好。 + +**正例**: +- "创建一个技能,可以根据关键词搜索 GitHub 仓库并返回 Star 数、描述和链接" +- "创建一个读取 Excel 文件、统计各列数据并生成图表的技能" +- "创建一个技能,能从邮件中提取订单号、金额和日期,汇总成表格" + +**反例**: +- "帮我做一个聊天技能"(太模糊) +- "搜索工具"(缺少具体能力描述) + +#### 第二步:查看生成过程 + +点击"生成"后,页面会实时展示 AI 的思考和编写过程: +- 看到 AI 在分析您的需求 +- 看到它正在编写技能定义文件 +- 看到它在规划参数结构 + +这个过程就像看 AI 现场写代码,您可以随时点击"停止"中断。 + +#### 第三步:预览并保存 + +生成完成后,系统会展示技能的完整内容: +- 技能名称和描述 +- 参数列表(每个参数是什么、是否必填) +- 使用示例 + +仔细检查预览内容: +- 如需调整,点击"编辑"微调 +- 如符合预期,点击"保存"将技能添加到您的技能库 + +### 写作技巧 + +#### 如何写好技能描述 + +**1. 明确输入输出** + +告诉系统这个技能需要什么信息、会返回什么结果。 + +``` +✓ "输入一个 GitHub 仓库地址,返回仓库名称、Star 数、Fork 数和最新更新时间" +✗ "搜索 GitHub"(太模糊) +``` + +**2. 说明使用场景** + +让 AI 理解在什么情况下会用到这个技能。 + +``` +✓ "用于快速查询开源项目的流行程度,帮助做技术选型决策" +✗ "查数据"(没有场景) +``` + +**3. 描述边界条件** + +如果有特殊的处理逻辑或限制,一并说明。 + +``` +✓ "如果仓库不存在,返回友好提示而不是报错" +✓ "图片 URL 无效时跳过该图片并记录日志" +``` + +**4. 显式要求生成示例** + +如果技能使用场景复杂,且对边缘场景响应准确率要求较高,则可以在要求中明确提出生成更详细的示例。 + +``` +✓ "生成全面且详细的使用示例" +``` + +#### 适用场景举例 + +| 场景 | 描述示例 | +|------|---------| +| **数据采集** | "输入关键词,在知乎上搜索相关问答并提取最高赞回答的摘要" | +| **文件处理** | "上传一个 CSV 文件,自动统计各列数据并生成折线图" | +| **API 封装** | "创建一个调用天气 API 并返回未来三天预报的技能" | +| **多工具组合** | "输入商品链接,自动比价(调用多个电商搜索)并返回最低价链接" | +| **数据清洗** | "读取一段混乱的文本,提取其中的邮箱、手机号、日期并格式化输出" | + +### 生成过程中可以做什么 + +#### 实时预览 + +生成过程中,技能内容会逐步显示在预览区域: +- `SKILL.md` 内容:技能定义、描述、标签 +- `examples.md`:技能使用示例 +- `scripts/*.py`:工具脚本(复杂模式下) + +#### 随时停止 + +如果生成方向偏离预期: +- 点击"停止"按钮,AI 立即停止 +- 已有生成结果会保留,您可以查看或放弃 + +#### 多次尝试 + +如果第一次生成结果不理想: +- 直接补充需求细节,在原有基础上直接修改 +- 或者在预览中手动调整 +- 不满意当前生成的技能,希望重新再来时,您可以点击右上角的"垃圾桶"图标清空所有技能内容 + +### 使用限制与注意事项 + +#### 模型能力影响质量 + +NL-to-Skill 使用您租户配置的 LLM 模型来生成技能。模型的能力直接决定生成质量: +- 聪明的模型能准确理解需求,生成结构清晰、易于理解的技能 +- 较弱的模型可能生成不完整或有误导性的内容,影响智能体的效率与准确率 + +如果生成结果不理想,可以尝试: +1. 简化需求描述 +2. 切换到更聪明、更强大的模型 +3. 分步骤创建(先做简单版本,再手动扩展) + +#### Token 消耗 + +复杂技能生成会消耗更多 Token: +- **简单模式**:通常消耗较少,适合快速验证 +- **复杂模式**:消耗较多,适合正式创建完整技能 + +建议先用简单模式测试想法,确认可行后再用复杂模式正式创建。 + +#### 并非所有需求都能实现 + +NL-to-Skill 擅长生成以下类型的技能: +- 单一工具的包装(如封装一个搜索能力) +- 多工具的简单串联(如搜 → 读 → 总结) +- 常见数据处理流程(如文件格式转换、数据提取) + +以下类型的技能可能超出能力范围: +- 需要调用未接入的外部 API +- 涉及复杂的状态管理或并发逻辑 +- 需要访问平台未开放的底层接口 + +遇到无法实现的需求时,系统会给出提示,您可以考虑手动创建或联系技术支持。 + +#### 技能修改 + +在 NL-to-Skill 界面可以选中已经存在的技能。选中技能后,该技能信息将自动加载。您可以在左侧对话框中使用自然语言尝试对该技能进行更新。 + +如果您创建的技能名与已有技能重名,Nexent 将自动从技能创建模式切换为技能更新模式。所有内容将覆盖更新至原有技能。 + +## 安全与最佳实践 + +- **知识库访问控制**:导入包含知识库工具的技能时,实际检索范围受当前用户权限限制 +- **公网搜索**:Tavily / Linkup / Exa 等公网搜索需先在平台安全配置中填写对应 API Key +- **路径安全**:技能包内文件操作仅限技能目录范围内,无法访问系统任意路径 + +## 相关参考 + +- [智能体开发](./agent-development) +- [本地工具概览](./local-tools/index) +- [MCP 工具配置](./mcp-tools) +- [技能系统概览](../backend/skills/overview) diff --git a/doc/docs/zh/user-guide/start-chat.md b/doc/docs/zh/user-guide/start-chat.md index 4e9dce692..fb3e4f0c6 100644 --- a/doc/docs/zh/user-guide/start-chat.md +++ b/doc/docs/zh/user-guide/start-chat.md @@ -80,8 +80,8 @@ Nexent支持语音输入功能,让您可以通过语音与智能体交互。 - 或直接将文件拖拽到对话区域 2. **支持的文件格式** - - **文档类**:PDF、Word (.docx)、PowerPoint (.pptx)、Excel (.xlsx) - - **文本类**:Markdown (.md)、纯文本 (.txt) + - **文档类**:PDF、Word (.docx)、PowerPoint (.pptx)、Excel (.xlsx), EPUB (.epub), HTML (.html), XML (.xml) + - **文本类**:Markdown (.md)、纯文本 (.txt), JSON (.json), CSV (.csv) - **图片类**:JPG、PNG、GIF 等常见图片格式 3. **文件处理流程** diff --git a/docker/init.sql b/docker/init.sql index 2df9665c7..2e494fc72 100644 --- a/docker/init.sql +++ b/docker/init.sql @@ -175,6 +175,8 @@ CREATE TABLE IF NOT EXISTS "model_record_t" ( "updated_by" varchar(100) COLLATE "pg_catalog"."default", "created_by" varchar(100) COLLATE "pg_catalog"."default", "tenant_id" varchar(100) COLLATE "pg_catalog"."default" DEFAULT 'tenant_id', + "model_appid" varchar(100) COLLATE "pg_catalog"."default" DEFAULT '', + "access_token" varchar(100) COLLATE "pg_catalog"."default" DEFAULT '', CONSTRAINT "nexent_models_t_pk" PRIMARY KEY ("model_id") ); ALTER TABLE "model_record_t" OWNER TO "root"; @@ -198,6 +200,8 @@ COMMENT ON COLUMN "model_record_t"."update_time" IS 'Update time, audit field'; COMMENT ON COLUMN "model_record_t"."updated_by" IS 'Last updater ID, audit field'; COMMENT ON COLUMN "model_record_t"."created_by" IS 'Creator ID, audit field'; COMMENT ON COLUMN "model_record_t"."tenant_id" IS 'Tenant ID for filtering'; +COMMENT ON COLUMN "model_record_t"."model_appid" IS 'Application ID for model authentication.'; +COMMENT ON COLUMN "model_record_t"."access_token" IS 'Access token for model authentication.'; COMMENT ON TABLE "model_record_t" IS 'List of models defined by users in the configuration page'; INSERT INTO "nexent"."model_record_t" ("model_repo", "model_name", "model_factory", "model_type", "api_key", "base_url", "max_tokens", "used_token", "display_name", "connect_status") VALUES ('', 'volcano_tts', 'OpenAI-API-Compatible', 'tts', '', '', 0, 0, 'volcano_tts', 'unavailable'); @@ -211,6 +215,7 @@ CREATE TABLE IF NOT EXISTS "knowledge_record_t" ( "tenant_id" varchar(100) COLLATE "pg_catalog"."default", "knowledge_sources" varchar(100) COLLATE "pg_catalog"."default", "embedding_model_name" varchar(200) COLLATE "pg_catalog"."default", + "embedding_model_id" INTEGER, "group_ids" varchar, "ingroup_permission" varchar(30), "create_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP, @@ -218,6 +223,9 @@ CREATE TABLE IF NOT EXISTS "knowledge_record_t" ( "delete_flag" varchar(1) COLLATE "pg_catalog"."default" DEFAULT 'N'::character varying, "updated_by" varchar(100) COLLATE "pg_catalog"."default", "created_by" varchar(100) COLLATE "pg_catalog"."default", + "summary_frequency" varchar(10) COLLATE "pg_catalog"."default", + "last_summary_time" timestamp(0), + "last_doc_update_time" timestamp(0), CONSTRAINT "knowledge_record_t_pk" PRIMARY KEY ("knowledge_id") ); ALTER TABLE "knowledge_record_t" OWNER TO "root"; @@ -228,11 +236,17 @@ COMMENT ON COLUMN "knowledge_record_t"."knowledge_describe" IS 'Knowledge base d COMMENT ON COLUMN "knowledge_record_t"."tenant_id" IS 'Tenant ID'; COMMENT ON COLUMN "knowledge_record_t"."knowledge_sources" IS 'Knowledge base sources'; COMMENT ON COLUMN "knowledge_record_t"."embedding_model_name" IS 'Embedding model name, used to record the embedding model used by the knowledge base'; +COMMENT ON COLUMN "knowledge_record_t"."embedding_model_id" IS 'Embedding model ID, foreign key reference to model_record_t.model_id'; COMMENT ON COLUMN "knowledge_record_t"."group_ids" IS 'Knowledge base group IDs list'; COMMENT ON COLUMN "knowledge_record_t"."ingroup_permission" IS 'In-group permission: EDIT, READ_ONLY, PRIVATE'; COMMENT ON COLUMN "knowledge_record_t"."create_time" IS 'Creation time, audit field'; COMMENT ON COLUMN "knowledge_record_t"."update_time" IS 'Update time, audit field'; COMMENT ON COLUMN "knowledge_record_t"."delete_flag" IS 'When deleted by user frontend, delete flag will be set to true, achieving soft delete effect. Optional values Y/N'; +COMMENT ON COLUMN "knowledge_record_t"."updated_by" IS 'User who last updated the record, audit field'; +COMMENT ON COLUMN "knowledge_record_t"."created_by" IS 'User who created the record, audit field'; +COMMENT ON COLUMN "knowledge_record_t"."summary_frequency" IS 'Auto-summary frequency: 1h, 3h, 6h, 1d, 1w, or NULL (disabled)'; +COMMENT ON COLUMN "knowledge_record_t"."last_summary_time" IS 'Timestamp of last summary generation'; +COMMENT ON COLUMN "knowledge_record_t"."last_doc_update_time" IS 'Timestamp of last document add/delete operation, used for auto-summary optimization to skip unnecessary summary regeneration'; COMMENT ON COLUMN "knowledge_record_t"."updated_by" IS 'Last updater ID, audit field'; COMMENT ON COLUMN "knowledge_record_t"."created_by" IS 'Creator ID, audit field'; COMMENT ON TABLE "knowledge_record_t" IS 'Records knowledge base description and status information'; @@ -1306,6 +1320,9 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_external_agent_t ( nacos_config_id VARCHAR(64), nacos_agent_name VARCHAR(255), + -- Base URL for infrastructure health checks + base_url VARCHAR(512), + -- Tenant isolation tenant_id VARCHAR(100) NOT NULL, created_by VARCHAR(100) NOT NULL, @@ -1352,6 +1369,7 @@ COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.last_check_result IS 'Last heal COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.create_time IS 'Record creation timestamp'; COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.update_time IS 'Record last update timestamp'; COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.delete_flag IS 'Soft delete flag: Y/N'; -- NOSONAR +COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.base_url IS 'Base URL for health checks (service root address)'; CREATE TABLE IF NOT EXISTS nexent.ag_a2a_external_agent_relation_t ( @@ -1365,8 +1383,7 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_external_agent_relation_t ( create_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, delete_flag VARCHAR(1) DEFAULT 'N', - CONSTRAINT uq_local_external_agent UNIQUE (local_agent_id, external_agent_id), - CONSTRAINT fk_external_agent FOREIGN KEY (external_agent_id) REFERENCES nexent.ag_a2a_external_agent_t(id) + CONSTRAINT uq_local_external_agent UNIQUE (local_agent_id, external_agent_id) ); ALTER TABLE nexent.ag_a2a_external_agent_relation_t OWNER TO "root"; @@ -1476,9 +1493,7 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_message_t ( extensions JSONB, -- Extension URI list reference_task_ids JSONB, -- Referenced task IDs array create_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, - UNIQUE(task_id, message_index), - CONSTRAINT ag_a2a_message_t_task_id_fk FOREIGN KEY (task_id) - REFERENCES nexent.ag_a2a_task_t(id) ON DELETE CASCADE + UNIQUE(task_id, message_index) ); ALTER TABLE nexent.ag_a2a_message_t OWNER TO "root"; @@ -1504,8 +1519,6 @@ CREATE TABLE IF NOT EXISTS nexent.ag_a2a_artifact_t ( meta_data JSONB, -- Metadata extensions JSONB, -- Extension URI list create_time TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, - CONSTRAINT fk_artifact_task FOREIGN KEY (task_id) - REFERENCES nexent.ag_a2a_task_t(id) ON DELETE CASCADE, UNIQUE(task_id, artifact_id) ); diff --git a/docker/sql/v2.0.4_0506_add_base_url_in_external_agent.sql b/docker/sql/v2.0.4_0506_add_base_url_in_external_agent.sql new file mode 100644 index 000000000..e4723bc96 --- /dev/null +++ b/docker/sql/v2.0.4_0506_add_base_url_in_external_agent.sql @@ -0,0 +1,13 @@ +ALTER TABLE nexent.ag_a2a_external_agent_t +ADD COLUMN IF NOT EXISTS base_url VARCHAR(512); + +COMMENT ON COLUMN nexent.ag_a2a_external_agent_t.base_url IS 'Base URL for health checks (service root address)'; + +ALTER TABLE nexent.ag_a2a_message_t + DROP CONSTRAINT IF EXISTS ag_a2a_message_t_task_id_fk; + +ALTER TABLE nexent.ag_a2a_external_agent_relation_t + DROP CONSTRAINT IF EXISTS fk_external_agent; + +ALTER TABLE nexent.ag_a2a_artifact_t + DROP CONSTRAINT IF EXISTS fk_artifact_task; \ No newline at end of file diff --git a/docker/sql/v2.0.5_0511_add_auto_summary_fields_to_knowledge_record_t.sql b/docker/sql/v2.0.5_0511_add_auto_summary_fields_to_knowledge_record_t.sql new file mode 100644 index 000000000..491f6b27b --- /dev/null +++ b/docker/sql/v2.0.5_0511_add_auto_summary_fields_to_knowledge_record_t.sql @@ -0,0 +1,21 @@ +-- Migration: Add auto-summary fields to knowledge_record_t table +-- Date: 2026-05-11 +-- Description: Add summary_frequency, last_summary_time, and last_doc_update_time fields for auto-summary feature +-- This SQL consolidates fields added in multiple commits for clean upgrade path + +-- Add summary_frequency column (auto-summary frequency configuration) +ALTER TABLE nexent.knowledge_record_t +ADD COLUMN IF NOT EXISTS summary_frequency VARCHAR(10); + +-- Add last_summary_time column (timestamp of last summary generation) +ALTER TABLE nexent.knowledge_record_t +ADD COLUMN IF NOT EXISTS last_summary_time TIMESTAMP; + +-- Add last_doc_update_time column (timestamp of last document add/delete operation) +ALTER TABLE nexent.knowledge_record_t +ADD COLUMN IF NOT EXISTS last_doc_update_time TIMESTAMP; + +-- Add comments to the columns +COMMENT ON COLUMN nexent.knowledge_record_t.summary_frequency IS 'Auto-summary frequency: 1h, 3h, 6h, 1d, 1w, or NULL (disabled)'; +COMMENT ON COLUMN nexent.knowledge_record_t.last_summary_time IS 'Timestamp of last summary generation'; +COMMENT ON COLUMN nexent.knowledge_record_t.last_doc_update_time IS 'Timestamp of last document add/delete operation, used for auto-summary optimization to skip unnecessary summary regeneration'; \ No newline at end of file diff --git a/docker/sql/v2.1.1_0508_add_embedding_model_id_to_knowledge_record_t.sql b/docker/sql/v2.1.1_0508_add_embedding_model_id_to_knowledge_record_t.sql new file mode 100644 index 000000000..0305a2590 --- /dev/null +++ b/docker/sql/v2.1.1_0508_add_embedding_model_id_to_knowledge_record_t.sql @@ -0,0 +1,9 @@ +-- Add embedding_model_id column to knowledge_record_t table +-- This field stores the ID of the embedding model used by the knowledge base + +-- Add embedding_model_id column +ALTER TABLE "knowledge_record_t" +ADD COLUMN IF NOT EXISTS "embedding_model_id" INTEGER; + +-- Add column comment +COMMENT ON COLUMN "knowledge_record_t"."embedding_model_id" IS 'Embedding model ID, foreign key reference to model_record_t.model_id'; diff --git a/docker/sql/v2.1.1_0509_add_model_appid_token_to_model_record_t.sql b/docker/sql/v2.1.1_0509_add_model_appid_token_to_model_record_t.sql new file mode 100644 index 000000000..521fa38a4 --- /dev/null +++ b/docker/sql/v2.1.1_0509_add_model_appid_token_to_model_record_t.sql @@ -0,0 +1,9 @@ +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS model_appid VARCHAR(100) DEFAULT ''; + + +ALTER TABLE nexent.model_record_t +ADD COLUMN IF NOT EXISTS access_token VARCHAR(100) DEFAULT ''; + +COMMENT ON COLUMN nexent.model_record_t.model_appid IS 'Application ID for model authentication.'; +COMMENT ON COLUMN nexent.model_record_t.access_token IS 'Access token for model authentication.'; diff --git a/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx b/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx index 1988d6a8d..bc9260a29 100644 --- a/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx +++ b/frontend/app/[locale]/agents/components/a2a/A2AAgentDiscoveryModal.tsx @@ -33,8 +33,9 @@ import { Settings, MessageCircle, } from "lucide-react"; -import { a2aClientService, A2AExternalAgent, NacosConfig } from "@/services/a2aService"; +import { a2aClientService, A2AExternalAgent } from "@/services/a2aService"; import A2AChatModal from "./A2AChatModal"; +import NacosDiscoveryPanel from "./NacosDiscoveryPanel"; import log from "@/lib/logger"; const { Text, Title } = Typography; @@ -195,7 +196,7 @@ export default function A2AAgentDiscoveryModal({ const [chatAgent, setChatAgent] = useState(null); // Discovery mode - const [mode, setMode] = useState<"url" | "nacos">("url"); + const [mode, setMode] = useState<"url" | "nacos" | "list">("url"); const [loading, setLoading] = useState(false); const [discoveredAgents, setDiscoveredAgents] = useState([]); @@ -203,47 +204,11 @@ export default function A2AAgentDiscoveryModal({ const [url, setUrl] = useState(""); const [selectedAgent, setSelectedAgent] = useState(null); - // Nacos mode state - Add new config form (toggleable) - const [showAddNacosForm, setShowAddNacosForm] = useState(false); - const [newNacosConfig, setNewNacosConfig] = useState({ - name: "", - nacos_addr: "", - username: "", - password: "", - namespace_id: "public", - }); - const [savingNacosConfig, setSavingNacosConfig] = useState(false); - - // Nacos mode state - Existing configs list - const [nacosConfigs, setNacosConfigs] = useState([]); - const [loadingNacosConfigs, setLoadingNacosConfigs] = useState(false); - const [selectedNacosConfigId, setSelectedNacosConfigId] = useState(null); - - // Nacos scan state - const [agentNames, setAgentNames] = useState([]); - const [scanning, setScanning] = useState(false); - // List mode state const [agents, setAgents] = useState([]); const [loadingAgents, setLoadingAgents] = useState(false); const [refreshingId, setRefreshingId] = useState(null); - // Load Nacos configs and existing agents on mount - useEffect(() => { - if (open) { - loadNacosConfigs(); - loadAgents(); - } - }, [open]); - - const loadNacosConfigs = async () => { - setLoadingNacosConfigs(true); - const result = await a2aClientService.listNacosConfigs(); - if (result.success && result.data) { - setNacosConfigs(result.data); - } - setLoadingNacosConfigs(false); - }; const loadAgents = async () => { setLoadingAgents(true); @@ -275,7 +240,6 @@ export default function A2AAgentDiscoveryModal({ if (result.success && result.data) { setSelectedAgent(result.data); setDiscoveredAgents([result.data]); - loadAgents(); if (onDiscoverSuccess) { onDiscoverSuccess(); } @@ -285,90 +249,6 @@ export default function A2AAgentDiscoveryModal({ } }; - // Add new Nacos config - const handleAddNacosConfig = async () => { - if (!newNacosConfig.name.trim()) { - messageApi.error(t("a2a.discovery.nacosNameRequired")); - return; - } - if (!newNacosConfig.nacos_addr.trim()) { - messageApi.error(t("a2a.discovery.nacosAddrRequired")); - return; - } - - setSavingNacosConfig(true); - try { - const result = await a2aClientService.createNacosConfig({ - name: newNacosConfig.name.trim(), - nacos_addr: newNacosConfig.nacos_addr.trim(), - namespace_id: newNacosConfig.namespace_id || "public", - nacos_username: newNacosConfig.username.trim() || undefined, - nacos_password: newNacosConfig.password.trim() || undefined, - }); - - if (result.success && result.data) { - messageApi.success(t("a2a.discovery.addNacosConfigSuccess")); - await loadNacosConfigs(); - setSelectedNacosConfigId(result.data.config_id); - setNewNacosConfig({ name: "", nacos_addr: "", username: "", password: "", namespace_id: "public" }); - } else { - messageApi.error(result.message || t("a2a.discovery.addNacosConfigFailed")); - } - } catch (error) { - log.error("Failed to add Nacos config:", error); - messageApi.error(t("a2a.discovery.addNacosConfigFailed")); - } - setSavingNacosConfig(false); - }; - - // Delete Nacos config - const handleDeleteNacosConfig = async (configId: string) => { - const result = await a2aClientService.deleteNacosConfig(configId); - if (result.success) { - messageApi.success(t("a2a.discovery.deleteNacosConfigSuccess")); - if (selectedNacosConfigId === configId) { - setSelectedNacosConfigId(null); - } - await loadNacosConfigs(); - } else { - messageApi.error(result.message || t("a2a.discovery.deleteNacosConfigFailed")); - } - }; - - // Discover from Nacos - const handleDiscoverFromNacos = async () => { - if (!selectedNacosConfigId) { - messageApi.error(t("a2a.discovery.selectNacosConfig")); - return; - } - - if (agentNames.length === 0) { - messageApi.error(t("a2a.discovery.enterAgentNames")); - return; - } - - setScanning(true); - const result = await a2aClientService.discoverFromNacos({ - nacos_config_id: selectedNacosConfigId, - agent_names: agentNames, - namespace: newNacosConfig.namespace_id || "public", - }); - setScanning(false); - - if (result.success && result.data) { - setDiscoveredAgents(result.data); - if (result.data.length === 0) { - messageApi.warning(t("a2a.discovery.noAgentsFound")); - } else { - messageApi.success( - t("a2a.discovery.foundAgents", { count: result.data.length }) - ); - } - } else { - messageApi.error(result.message || t("a2a.discovery.failed")); - } - }; - // Refresh agent card const handleRefresh = async (agentId: string) => { setRefreshingId(agentId); @@ -456,59 +336,6 @@ export default function A2AAgentDiscoveryModal({ ); }; - // Nacos config table columns - const nacosConfigColumns = [ - { - title: t("a2a.discovery.nacosName"), - dataIndex: "name", - key: "name", - width: "30%", - ellipsis: true, - render: (text: string) => {text}, - }, - { - title: t("a2a.discovery.nacosAddr"), - dataIndex: "nacos_addr", - key: "nacos_addr", - width: "40%", - ellipsis: true, - render: (text: string) => {text}, - }, - { - title: t("a2a.discovery.namespace"), - dataIndex: "namespace_id", - key: "namespace_id", - width: "15%", - render: (text: string) => {text}, - }, - { - title: t("common.actions"), - key: "action", - width: "15%", - render: (_: any, record: NacosConfig) => ( - - - - - - - - {/* Add Nacos Config Form - Toggleable */} - {showAddNacosForm && ( - -
- - - setNewNacosConfig({ ...newNacosConfig, name: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, nacos_addr: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, namespace_id: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, username: e.target.value }) - } - disabled={savingNacosConfig} - /> - - - - - setNewNacosConfig({ ...newNacosConfig, password: e.target.value }) - } - disabled={savingNacosConfig} - /> - - -
- - -
-
-
- )} - - - record.config_id === selectedNacosConfigId ? "bg-blue-50" : "" - } - onRow={(record) => ({ - onClick: () => setSelectedNacosConfigId(record.config_id), - style: { cursor: "pointer" }, - })} - /> - - - {/* Scan Section - Only show when config is selected */} - {selectedNacosConfigId && ( - -
- - + setNacosConfig({ ...nacosConfig, name: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, nacos_addr: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, namespace_id: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, username: e.target.value }) + } + disabled={savingNacosConfig} + /> + + + + + setNacosConfig({ ...nacosConfig, password: e.target.value }) + } + disabled={savingNacosConfig} + /> + + +
+ + + +
+ +
+ )} + +
+ record.config_id === selectedNacosConfigId ? "bg-blue-50" : "" + } + onRow={(record) => ({ + onClick: () => setSelectedNacosConfigId(record.config_id), + style: { cursor: "pointer" }, + })} + /> + + + {/* Scan Section - Only show when config is selected */} + {selectedNacosConfigId && ( + +
+ + setDataMateUrl(e.target.value)} - onBlur={() => { - // Validate on blur - const error = validateDataMateUrl(dataMateUrl); - setDataMateUrlError(error); - }} - placeholder={t( - "knowledgeBase.modal.dataMateConfig.urlPlaceholder" - )} - /> - - +
+ + setDataMateUrl(e.target.value)} + onBlur={() => { + // Validate on blur + const error = validateDataMateUrl(dataMateUrl); + setDataMateUrlError(error); + }} + placeholder={t( + "knowledgeBase.modal.dataMateConfig.urlPlaceholder" + )} + /> + {dataMateUrlError && ( +
{dataMateUrlError}
+ )} +
diff --git a/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx b/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx index 06940d9f0..023f2205a 100644 --- a/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx +++ b/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx @@ -21,6 +21,7 @@ import { LAYOUT, DOCUMENT_STATUS, } from "@/const/knowledgeBase"; +import { SUMMARY_FREQUENCY_OPTIONS_API, FrequencyOption } from "@/const/scheduler"; import knowledgeBaseService from "@/services/knowledgeBaseService"; import { modelService } from "@/services/modelService"; import { getTenantDefaultGroupId } from "@/services/groupService"; @@ -80,6 +81,10 @@ interface DocumentListProps { selectedEmbeddingModel?: string; onEmbeddingModelChange?: (value: string) => void; permission?: string; // User's permission for this knowledge base (READ_ONLY, EDIT, etc.) + + // Auto-summary frequency + summaryFrequency?: string | null; + onSummaryFrequencyChange?: (frequency: string | null) => void; // Upload related props isDragging?: boolean; @@ -123,6 +128,10 @@ const DocumentListContainer = forwardRef( selectedEmbeddingModel, onEmbeddingModelChange, permission, + + // Auto-summary frequency + summaryFrequency, + onSummaryFrequencyChange, // Upload related props isDragging = false, @@ -227,13 +236,14 @@ const DocumentListContainer = forwardRef( const [showDetail, setShowDetail] = React.useState(false); const [showChunk, setShowChunk] = React.useState(false); const [summary, setSummary] = useState(""); - const [isSummarizing, setIsSummarizing] = useState(false); - const [isEditing, setIsEditing] = useState(false); - const [isSaving, setIsSaving] = useState(false); - const [selectedModel, setSelectedModel] = useState(0); - const [availableModels, setAvailableModels] = useState([]); - const [isLoadingModels, setIsLoadingModels] = useState(false); - const { t } = useTranslation(); +const [isSummarizing, setIsSummarizing] = useState(false); + const [isEditing, setIsEditing] = useState(false); + const [isSaving, setIsSaving] = useState(false); + const [selectedModel, setSelectedModel] = useState(0); + const [availableModels, setAvailableModels] = useState([]); + const [isLoadingModels, setIsLoadingModels] = useState(false); + const [frequencyOptions, setFrequencyOptions] = useState([]); + const { t } = useTranslation(); const isDataMate = (knowledgeBaseSource || "").toLowerCase() === "datamate"; // Determine if user has read-only permission @@ -304,10 +314,30 @@ const DocumentListContainer = forwardRef( // Check if group select should be disabled (when permission is PRIVATE) const isGroupSelectDisabled = ingroupPermission === "PRIVATE"; - // Load available models when showing detail - useEffect(() => { - const loadModels = async () => { - if (showDetail && availableModels.length === 0) { + // Load frequency options from backend API + useEffect(() => { + const loadFrequencyOptions = async () => { + if (showDetail && frequencyOptions.length === 0) { + try { + const response = await fetch(SUMMARY_FREQUENCY_OPTIONS_API); + const data = await response.json(); + setFrequencyOptions(data.options || []); + } catch (error) { + log.error("Failed to load frequency options:", error); + // Fallback to default options if API fails + setFrequencyOptions([ + { value: "disabled", label: t("knowledgeBase.tag.autoSummary.off") }, + ]); + } + } + }; + loadFrequencyOptions(); + }, [showDetail, frequencyOptions.length, t]); + + // Load available models when showing detail + useEffect(() => { + const loadModels = async () => { + if (showDetail && availableModels.length === 0) { setIsLoadingModels(true); try { const models = await modelService.getLLMModels(); @@ -625,7 +655,7 @@ const DocumentListContainer = forwardRef( /> ) : showDetail ? ( -
+
{t("document.summary.title")} @@ -649,6 +679,29 @@ const DocumentListContainer = forwardRef( }))} />
+
+ + {t("knowledgeBase.tag.autoSummary.label")} + + handleFormChange("sttProvider", value)} + > + + + +
+ )} + + {/* STT Fields for Volcano Engine */} + {!form.isBatchImport && isSTTModel && form.sttProvider === "volcengine" && ( + <> +
+ + handleFormChange("modelAppid", e.target.value)} + autoComplete="new-password" + /> +
+
+ + handleFormChange("accessToken", e.target.value)} + autoComplete="new-password" + /> +
+ + )} + + {/* API Key (for DashScope STT) */} + {!form.isBatchImport && isSTTModel && form.sttProvider === "dashscope" && ( +
+ + handleFormChange("apiKey", e.target.value)} + autoComplete="new-password" + /> +
+ )} + + {/* API Key (for non-STT, non-TTS models) */} + {!form.isBatchImport && !isSTTModel && ( +
+ + handleFormChange("apiKey", e.target.value)} + autoComplete="new-password" + /> +
+ )} {/* Chunk Size Slider (Embedding model only) */} {isEmbeddingModel && ( @@ -1006,7 +1173,7 @@ export const ModelAddDialog = ({ )} {/* Max Tokens */} - {!isEmbeddingModel && !isRerankModel && !form.isBatchImport && ( + {!isEmbeddingModel && !form.isBatchImport && !isSTTModel && (
-
- 💬 -
- {t("auth.inviteCodeHint.step2")} - - {t("auth.inviteCodeHint.contributionWallLink")} - - {t("auth.inviteCodeHint.step2Action")} - - - -
-
🎁
{t("auth.inviteCodeHint.step3")} void; + onConfigComplete: ( + indexNames: string, + modelId: string, + modelDisplayName?: string + ) => void; +} + +export default function EmbeddingModelConfigDialog({ + isOpen, + knowledgeBaseName, + indexName, + isModelMismatch = false, + kbIdsToUpdate = [], + onClose, + onConfigComplete, +}: EmbeddingModelConfigDialogProps) { + const { t } = useTranslation("common"); + const { message } = App.useApp(); + const { data: allModels = [], isLoading: modelsLoading } = useModelList(); + + const [selectedModelId, setSelectedModelId] = useState(null); + const [isSubmitting, setIsSubmitting] = useState(false); + + // Filter available embedding models + const embeddingModels = allModels.filter( + (model) => model.type === "embedding" && model.connect_status === "available" + ); + + // Reset state when dialog opens + useEffect(() => { + if (isOpen) { + setSelectedModelId(null); + setIsSubmitting(false); + } + }, [isOpen]); + + // Handle model selection + const handleModelChange = (value: string) => { + setSelectedModelId(value); + }; + + // Handle submit + const handleSubmit = async () => { + if (!selectedModelId) { + message.warning(t("knowledgeBase.embeddingModel.selectPlaceholder")); + return; + } + + setIsSubmitting(true); + try { + // Determine which index names to update + const indexNamesToUpdate = + kbIdsToUpdate.length > 0 + ? kbIdsToUpdate.join(",") + : indexName; + + // Get model display name + const selectedModel = embeddingModels.find( + (m) => String(m.id) === selectedModelId || m.name === selectedModelId + ); + const modelDisplayName = selectedModel?.displayName || selectedModel?.name || selectedModelId; + + // Call API to update embedding model for all indices + const indexNameList = indexNamesToUpdate.split(",").filter(Boolean); + for (const idxName of indexNameList) { + await knowledgeBaseService.updateEmbeddingModel(idxName.trim(), selectedModelId); + } + + message.success(t("knowledgeBase.embeddingModel.updateSuccess")); + // Save values before resetting state + const completedModelId = selectedModelId; + const completedModelDisplayName = modelDisplayName; + // Reset local UI state only — do NOT call onClose() here. + // Closing is handled exclusively by onConfigComplete to ensure + // the parent has processed the result before the dialog unmounts. + setSelectedModelId(null); + setIsSubmitting(false); + // Call onConfigComplete which handles closing and parent state updates + onConfigComplete(indexNamesToUpdate, completedModelId, completedModelDisplayName); + } catch (error) { + log.error("[EmbeddingModelConfigDialog] API failed:", error); + message.error( + error instanceof Error ? error.message : t("knowledgeBase.embeddingModel.updateFailed") + ); + setIsSubmitting(false); + } + }; + + // Handle cancel + const handleCancel = () => { + if (isSubmitting) return; + setSelectedModelId(null); + setIsSubmitting(false); + onClose(); + }; + + // Get dialog title based on mode + const getDialogTitle = () => { + if (isModelMismatch) { + return t("knowledgeBase.embeddingModel.modelMismatchTitle"); + } + return t("knowledgeBase.embeddingModel.configRequiredTitle"); + }; + + // Get dialog description based on mode + const getDialogDescription = () => { + if (isModelMismatch) { + return t("knowledgeBase.embeddingModel.mismatchDescription"); + } + return t("knowledgeBase.embeddingModel.configDescription", { + name: knowledgeBaseName, + }); + }; + + return ( + + + {getDialogTitle()} +
+ } + open={isOpen} + onCancel={handleCancel} + okText={t("common.confirm")} + cancelText={t("common.cancel")} + onOk={handleSubmit} + confirmLoading={isSubmitting} + okButtonProps={{ + disabled: !selectedModelId, + }} + cancelButtonProps={{ + disabled: isSubmitting, + }} + centered + > +
+

{getDialogDescription()}

+ + {modelsLoading ? ( +
+ +
+ ) : embeddingModels.length === 0 ? ( +
+

+ {t("knowledgeBase.embeddingModel.noModelsAvailable")} +

+

+ {t("knowledgeBase.embeddingModel.noModelsAvailableDesc")} +

+
+ ) : ( +
+ +