Skip to content

Commit 5ea8491

Browse files
authored
Merge branch 'main' into videoqna-bug
2 parents 0e911fc + 46ebb78 commit 5ea8491

153 files changed

Lines changed: 1477 additions & 4158 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/_run-docker-compose.yml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,16 @@ jobs:
116116
run: |
117117
sudo rm -rf ${{github.workspace}}/* || true
118118
119-
# clean up containers use ports
119+
echo "Cleaning up containers using ports..."
120120
cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
121121
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
122-
123122
docker system prune -f
124-
docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
125-
docker rmi $(docker images --filter reference="*/*:ci" -q) || true
123+
124+
echo "Cleaning up images ..."
125+
docker images --filter reference="*/*/*:latest" -q | xargs -r docker rmi && sleep 1s
126+
docker images --filter reference="*/*:ci" -q | xargs -r docker rmi && sleep 1s
127+
docker images --filter reference="*:5000/*/*" -q | xargs -r docker rmi && sleep 1s
128+
docker images
126129
127130
- name: Checkout out Repo
128131
uses: actions/checkout@v4
@@ -182,17 +185,15 @@ jobs:
182185
shell: bash
183186
if: cancelled() || failure()
184187
run: |
188+
set -x
189+
185190
cd ${{ github.workspace }}/${{ inputs.example }}
186191
export test_case=${{ matrix.test_case }}
187192
export hardware=${{ inputs.hardware }}
188193
bash ${{ github.workspace }}/.github/workflows/scripts/docker_compose_clean_up.sh "containers"
189-
190-
# clean up containers use ports
191-
cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
192-
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
194+
echo "Finish docker clean up."
193195
194196
docker system prune -f
195-
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
196197
197198
- name: Publish pipeline artifact
198199
if: ${{ !cancelled() }}

.github/workflows/dockerhub-description.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ jobs:
517517
password: ${{ secrets.DOCKERHUB_TOKEN }}
518518
repository: opea/gpt-sovits
519519
short-description: "The docker image exposed the OPEA GPT-SoVITS service for GenAI application use."
520-
readme-filepath: GenAIComps/comps/tts/src/integrations/dependency/gpt-sovits/README.md
520+
readme-filepath: GenAIComps/comps/third_parties/gpt-sovits/src/README.md
521521
enable-url-completion: false
522522

523523
- name: Description for
@@ -697,7 +697,7 @@ jobs:
697697
password: ${{ secrets.DOCKERHUB_TOKEN }}
698698
repository: opea/lvm-llava
699699
short-description: "The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use."
700-
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llava/README.md
700+
readme-filepath: GenAIComps/comps/third_parties/llava/src/README.md
701701
enable-url-completion: false
702702

703703
- name: Description for
@@ -707,7 +707,7 @@ jobs:
707707
password: ${{ secrets.DOCKERHUB_TOKEN }}
708708
repository: opea/lvm-video-llama
709709
short-description: "The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) for GenAI application use."
710-
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/video-llama/README.md
710+
readme-filepath: GenAIComps/comps/third_parties/video-llama/src/README.md
711711
enable-url-completion: false
712712

713713
- name: Description for
@@ -717,7 +717,7 @@ jobs:
717717
password: ${{ secrets.DOCKERHUB_TOKEN }}
718718
repository: opea/lvm-predictionguard
719719
short-description: "The docker image exposed the OPEA microservice running predictionguard as a large visual model (LVM) server for GenAI application use."
720-
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/predictionguard/README.md
720+
readme-filepath: GenAIComps/comps/third_parties/predictionguard/src/README.md
721721
enable-url-completion: false
722722

723723
- name: Description for
@@ -727,7 +727,7 @@ jobs:
727727
password: ${{ secrets.DOCKERHUB_TOKEN }}
728728
repository: opea/llava-gaudi
729729
short-description: "The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi2."
730-
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llava/README.md
730+
readme-filepath: GenAIComps/comps/third_parties/llava/src/README.md
731731
enable-url-completion: false
732732

733733
- name: Description for
@@ -737,7 +737,7 @@ jobs:
737737
password: ${{ secrets.DOCKERHUB_TOKEN }}
738738
repository: opea/lvm-llama-vision
739739
short-description: "The docker image exposed the OPEA microservice running Llama Vision as the base large visual model service for GenAI application use."
740-
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
740+
readme-filepath: GenAIComps/comps/third_parties/llama-vision/src/README.md
741741
enable-url-completion: false
742742

743743
- name: Description for
@@ -747,7 +747,7 @@ jobs:
747747
password: ${{ secrets.DOCKERHUB_TOKEN }}
748748
repository: opea/lvm-llama-vision-tp
749749
short-description: "The docker image exposed the OPEA microservice running Llama Vision with deepspeed as the base large visual model service for GenAI application use."
750-
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
750+
readme-filepath: GenAIComps/comps/third_parties/llama-vision/src/README.md
751751
enable-url-completion: false
752752

753753
- name: Description for lvm-llama-vision-guard
@@ -757,7 +757,7 @@ jobs:
757757
password: ${{ secrets.DOCKERHUB_TOKEN }}
758758
repository: opea/lvm-llama-vision-guard
759759
short-description: "The docker image exposed the OPEA microservice running Llama Vision Guard as the base large visual model service for GenAI application use."
760-
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
760+
readme-filepath: GenAIComps/comps/third_parties/llama-vision/src/README.md
761761
enable-url-completion: false
762762

763763
- name: Description for promptregistry-mongo
@@ -857,7 +857,7 @@ jobs:
857857
password: ${{ secrets.DOCKERHUB_TOKEN }}
858858
repository: opea/gpt-sovits
859859
short-description: "The docker image exposed the OPEA gpt-sovits service for GenAI application use."
860-
readme-filepath: GenAIComps/comps/tts/src/integrations/dependency/gpt-sovits/README.md
860+
readme-filepath: GenAIComps/comps/third_parties/gpt-sovits/src/README.md
861861
enable-url-completion: false
862862

863863
- name: Description for nginx

AgentQnA/README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,16 @@ bash run_ingest_data.sh
208208
209209
## Launch the UI
210210

211-
Open a web browser to http://localhost:5173 to access the UI. Ensure the environment variable `AGENT_URL` is set to http://$ip_address:9090/v1/chat/completions in [ui/svelte/.env](./ui/svelte/.env) or else the UI may not work properly.
211+
Open a web browser to http://localhost:5173 to access the UI.
212212

213-
The AgentQnA UI can be deployed locally or using Docker. To customize deployment, refer to the [AgentQnA UI Guide](./ui/svelte/README.md).
213+
1. `create Admin Account` with a random value
214+
2. add opea agent endpoint `http://$ip_address:9090/v1` which is a openai compatible api
215+
216+
![opea-agent-setting](assets/img/opea-agent-setting.png)
217+
218+
3. test opea agent with ui
219+
220+
![opea-agent-test](assets/img/opea-agent-test.png)
214221

215222
## [Optional] Deploy using Helm Charts
216223

71 KB
Loading
99.4 KB
Loading

AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,8 @@ services:
103103
agent-ui:
104104
image: opea/agent-ui
105105
container_name: agent-ui
106-
volumes:
107-
- ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env # test db
108106
ports:
109-
- "5173:5173"
107+
- "5173:8080"
110108
ipc: host
111109

112110
networks:

AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,10 @@ services:
106106
agent-ui:
107107
image: opea/agent-ui
108108
container_name: agent-ui
109-
volumes:
110-
- ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env
111109
environment:
112110
host_ip: ${host_ip}
113111
ports:
114-
- "5173:5173"
112+
- "5173:8080"
115113
ipc: host
116114
vllm-service:
117115
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}

AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
4242
fi
4343

4444
# configure agent ui
45-
echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env
45+
# echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env
4646

4747
# retriever
4848
export host_ip=$(hostname -I | awk '{print $1}')
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
tgi:
5+
enabled: false
6+
vllm:
7+
enabled: true
8+
LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
9+
extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
10+
11+
supervisor:
12+
llm_endpoint_url: http://{{ .Release.Name }}-vllm
13+
llm_engine: vllm
14+
model: "meta-llama/Meta-Llama-3-8B-Instruct"
15+
ragagent:
16+
llm_endpoint_url: http://{{ .Release.Name }}-vllm
17+
llm_engine: vllm
18+
model: "meta-llama/Meta-Llama-3-8B-Instruct"
19+
sqlagent:
20+
llm_endpoint_url: http://{{ .Release.Name }}-vllm
21+
llm_engine: vllm
22+
model: "meta-llama/Meta-Llama-3-8B-Instruct"

AgentQnA/kubernetes/helm/gaudi-values.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,32 @@
44
# Accelerate inferencing in heaviest components to improve performance
55
# by overriding their subchart values
66

7+
tgi:
8+
enabled: false
79
vllm:
810
enabled: true
11+
accelDevice: "gaudi"
912
image:
1013
repository: opea/vllm-gaudi
14+
resources:
15+
limits:
16+
habana.ai/gaudi: 4
17+
LLM_MODEL_ID: "meta-llama/Llama-3.3-70B-Instruct"
18+
OMPI_MCA_btl_vader_single_copy_mechanism: none
19+
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
20+
VLLM_SKIP_WARMUP: true
21+
shmSize: 16Gi
22+
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
23+
1124
supervisor:
1225
llm_endpoint_url: http://{{ .Release.Name }}-vllm
26+
llm_engine: vllm
27+
model: "meta-llama/Llama-3.3-70B-Instruct"
1328
ragagent:
1429
llm_endpoint_url: http://{{ .Release.Name }}-vllm
30+
llm_engine: vllm
31+
model: "meta-llama/Llama-3.3-70B-Instruct"
1532
sqlagent:
1633
llm_endpoint_url: http://{{ .Release.Name }}-vllm
34+
llm_engine: vllm
35+
model: "meta-llama/Llama-3.3-70B-Instruct"

0 commit comments

Comments
 (0)