opea-project
diff --git a/‎.github/workflows/_run-docker-compose.yml‎
Lines changed: 10 additions & 9 deletions b/‎.github/workflows/_run-docker-compose.yml‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎.github/workflows/dockerhub-description.yml‎
Lines changed: 9 additions & 9 deletions b/‎.github/workflows/dockerhub-description.yml‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎AgentQnA/README.md‎
Lines changed: 9 additions & 2 deletions b/‎AgentQnA/README.md‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎AgentQnA/assets/img/opea-agent-setting.png‎
71 KB b/‎AgentQnA/assets/img/opea-agent-setting.png‎
71 KB
diff --git a/‎AgentQnA/assets/img/opea-agent-test.png‎
99.4 KB b/‎AgentQnA/assets/img/opea-agent-test.png‎
99.4 KB
diff --git a/‎AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml‎
Lines changed: 1 addition & 3 deletions b/‎AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml‎
Lines changed: 1 addition & 3 deletions b/‎AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh‎
Lines changed: 1 addition & 1 deletion b/‎AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎AgentQnA/kubernetes/helm/cpu-values.yaml‎
Lines changed: 22 additions & 0 deletions b/‎AgentQnA/kubernetes/helm/cpu-values.yaml‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎AgentQnA/kubernetes/helm/gaudi-values.yaml‎
Lines changed: 19 additions & 0 deletions b/‎AgentQnA/kubernetes/helm/gaudi-values.yaml‎
Lines changed: 19 additions & 0 deletions
@@ -116,13 +116,16 @@ jobs:
         run: |
           sudo rm -rf ${{github.workspace}}/* || true
 
-          # clean up containers use ports
+          echo "Cleaning up containers using ports..."
           cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-
           docker system prune -f
-          docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
-          docker rmi $(docker images --filter reference="*/*:ci" -q) || true
+
+          echo "Cleaning up images ..."
+          docker images --filter reference="*/*/*:latest" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="*/*:ci" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="*:5000/*/*" -q | xargs -r docker rmi && sleep 1s
+          docker images
 
       - name: Checkout out Repo
         uses: actions/checkout@v4
@@ -182,17 +185,15 @@ jobs:
         shell: bash
         if: cancelled() || failure()
         run: |
+          set -x
+
           cd ${{ github.workspace }}/${{ inputs.example }}
           export test_case=${{ matrix.test_case }}
           export hardware=${{ inputs.hardware }}
           bash ${{ github.workspace }}/.github/workflows/scripts/docker_compose_clean_up.sh "containers"
-
-          # clean up containers use ports
-          cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
-          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+          echo "Finish docker clean up."
 
           docker system prune -f
-          docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
 
       - name: Publish pipeline artifact
         if: ${{ !cancelled() }}
 
@@ -517,7 +517,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/gpt-sovits
         short-description: "The docker image exposed the OPEA GPT-SoVITS service for GenAI application use."
-        readme-filepath: GenAIComps/comps/tts/src/integrations/dependency/gpt-sovits/README.md
+        readme-filepath: GenAIComps/comps/third_parties/gpt-sovits/src/README.md
         enable-url-completion: false
 
     - name: Description for
@@ -697,7 +697,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/lvm-llava
         short-description: "The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use."
-        readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llava/README.md
+        readme-filepath: GenAIComps/comps/third_parties/llava/src/README.md
         enable-url-completion: false
 
     - name: Description for
@@ -707,7 +707,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/lvm-video-llama
         short-description: "The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) for GenAI application use."
-        readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/video-llama/README.md
+        readme-filepath: GenAIComps/comps/third_parties/video-llama/src/README.md
         enable-url-completion: false
 
     - name: Description for
@@ -717,7 +717,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/lvm-predictionguard
         short-description: "The docker image exposed the OPEA microservice running predictionguard as a large visual model (LVM) server for GenAI application use."
-        readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/predictionguard/README.md
+        readme-filepath: GenAIComps/comps/third_parties/predictionguard/src/README.md
         enable-url-completion: false
 
     - name: Description for
@@ -727,7 +727,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/llava-gaudi
         short-description: "The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi2."
-        readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llava/README.md
+        readme-filepath: GenAIComps/comps/third_parties/llava/src/README.md
         enable-url-completion: false
 
     - name: Description for
@@ -737,7 +737,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/lvm-llama-vision
         short-description: "The docker image exposed the OPEA microservice running Llama Vision as the base large visual model service for GenAI application use."
-        readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
+        readme-filepath: GenAIComps/comps/third_parties/llama-vision/src/README.md
         enable-url-completion: false
 
     - name: Description for
@@ -747,7 +747,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/lvm-llama-vision-tp
         short-description: "The docker image exposed the OPEA microservice running Llama Vision with deepspeed as the base large visual model service for GenAI application use."
-        readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
+        readme-filepath: GenAIComps/comps/third_parties/llama-vision/src/README.md
         enable-url-completion: false
 
     - name: Description for lvm-llama-vision-guard
@@ -757,7 +757,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/lvm-llama-vision-guard
         short-description: "The docker image exposed the OPEA microservice running Llama Vision Guard as the base large visual model service for GenAI application use."
-        readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
+        readme-filepath: GenAIComps/comps/third_parties/llama-vision/src/README.md
         enable-url-completion: false
 
     - name: Description for promptregistry-mongo
@@ -857,7 +857,7 @@ jobs:
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         repository: opea/gpt-sovits
         short-description: "The docker image exposed the OPEA gpt-sovits service for GenAI application use."
-        readme-filepath: GenAIComps/comps/tts/src/integrations/dependency/gpt-sovits/README.md
+        readme-filepath: GenAIComps/comps/third_parties/gpt-sovits/src/README.md
         enable-url-completion: false
 
     - name: Description for nginx
 
@@ -208,9 +208,16 @@ bash run_ingest_data.sh
 
 ## Launch the UI
 
-Open a web browser to http://localhost:5173 to access the UI. Ensure the environment variable `AGENT_URL` is set to http://$ip_address:9090/v1/chat/completions in [ui/svelte/.env](./ui/svelte/.env) or else the UI may not work properly.
+Open a web browser to http://localhost:5173 to access the UI.
 
-The AgentQnA UI can be deployed locally or using Docker. To customize deployment, refer to the [AgentQnA UI Guide](./ui/svelte/README.md).
+1. `create Admin Account` with a random value
+2. add opea agent endpoint `http://$ip_address:9090/v1` which is a openai compatible api
+
+![opea-agent-setting](assets/img/opea-agent-setting.png)
+
+3. test opea agent with ui
+
+![opea-agent-test](assets/img/opea-agent-test.png)
 
 ## [Optional] Deploy using Helm Charts
 
 
@@ -103,10 +103,8 @@ services:
   agent-ui:
     image: opea/agent-ui
     container_name: agent-ui
-    volumes:
-      - ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env # test db
     ports:
-      - "5173:5173"
+      - "5173:8080"
     ipc: host
 
 networks:
 
@@ -106,12 +106,10 @@ services:
   agent-ui:
     image: opea/agent-ui
     container_name: agent-ui
-    volumes:
-      - ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env
     environment:
       host_ip: ${host_ip}
     ports:
-      - "5173:5173"
+      - "5173:8080"
     ipc: host
   vllm-service:
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
 
@@ -42,7 +42,7 @@ if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
 fi
 
 # configure agent ui
-echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env
+# echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env
 
 # retriever
 export host_ip=$(hostname -I | awk '{print $1}')
 
@@ -0,0 +1,22 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: false
+vllm:
+  enabled: true
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
+  extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+
+supervisor:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
+ragagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
+sqlagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
@@ -4,13 +4,32 @@
 # Accelerate inferencing in heaviest components to improve performance
 # by overriding their subchart values
 
+tgi:
+  enabled: false
 vllm:
   enabled: true
+  accelDevice: "gaudi"
   image:
     repository: opea/vllm-gaudi
+  resources:
+    limits:
+      habana.ai/gaudi: 4
+  LLM_MODEL_ID: "meta-llama/Llama-3.3-70B-Instruct"
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+  VLLM_SKIP_WARMUP: true
+  shmSize: 16Gi
+  extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+
 supervisor:
   llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
 ragagent:
   llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
 sqlagent:
   llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"