Skip to content

Commit 42431c2

Browse files
Merge branch 'main' into fixes_for_gaudi
2 parents bc85086 + 13dd27e commit 42431c2

68 files changed

Lines changed: 2980 additions & 270 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/ISSUE_TEMPLATE/1_bug_template.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ body:
3232
- Mac
3333
- BSD
3434
- Other (Please let us know in description)
35+
- N/A
3536
validations:
3637
required: true
3738

@@ -56,6 +57,7 @@ body:
5657
- GPU-Nvidia
5758
- GPU-AMD
5859
- GPU-other (Please let us know in description)
60+
- N/A
5961
validations:
6062
required: true
6163

@@ -67,6 +69,7 @@ body:
6769
- label: Pull docker images from hub.docker.com
6870
- label: Build docker images from source
6971
- label: Other
72+
- label: N/A
7073
validations:
7174
required: true
7275

@@ -80,6 +83,7 @@ body:
8083
- label: Kubernetes Helm Charts
8184
- label: Kubernetes GMC
8285
- label: Other
86+
- label: N/A
8387
validations:
8488
required: true
8589

@@ -91,6 +95,7 @@ body:
9195
- Single Node
9296
- Multiple Nodes
9397
- Other
98+
- N/A
9499
default: 0
95100
validations:
96101
required: true

.github/ISSUE_TEMPLATE/2_feature_template.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ body:
3232
- Mac
3333
- BSD
3434
- Other (Please let us know in description)
35+
- N/A
3536
validations:
3637
required: true
3738

@@ -56,6 +57,7 @@ body:
5657
- GPU-Nvidia
5758
- GPU-AMD
5859
- GPU-other (Please let us know in description)
60+
- N/A
5961
validations:
6062
required: true
6163

@@ -67,6 +69,7 @@ body:
6769
- Single Node
6870
- Multiple Nodes
6971
- Other
72+
- N/A
7073
default: 0
7174
validations:
7275
required: true

.github/workflows/_helm-e2e.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ jobs:
8181
if [[ "${{ inputs.hardware }}" == "gaudi" ]]; then
8282
value_files="${value_files}\"${filename}\","
8383
fi
84+
elif [[ "$filename" == *"rocm"* ]]; then
85+
if [[ "${{ inputs.hardware }}" == "rocm" ]]; then
86+
value_files="${value_files}\"${filename}\","
87+
fi
8488
elif [[ "$filename" == *"nv"* ]]; then
8589
continue
8690
else

.github/workflows/nightly-docker-build-publish.yml

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ name: Nightly build/publish latest docker images
55

66
on:
77
schedule:
8-
- cron: "30 14 * * *" # UTC time
8+
- cron: "30 14 * * 1-5" # UTC time
99
workflow_dispatch:
1010

1111
env:
@@ -38,30 +38,44 @@ jobs:
3838
with:
3939
node: gaudi
4040

41-
build-and-test:
42-
needs: get-build-matrix
41+
build-images:
42+
needs: [get-build-matrix, build-comps-base]
43+
strategy:
44+
matrix:
45+
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
46+
fail-fast: false
47+
uses: ./.github/workflows/_build_image.yml
48+
with:
49+
node: gaudi
50+
example: ${{ matrix.example }}
51+
inject_commit: true
52+
secrets: inherit
53+
54+
test-example:
55+
needs: [get-build-matrix]
4356
if: ${{ needs.get-build-matrix.outputs.examples_json != '' }}
4457
strategy:
4558
matrix:
4659
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
4760
fail-fast: false
4861
uses: ./.github/workflows/_example-workflow.yml
4962
with:
50-
node: gaudi
63+
node: xeon
64+
build: false
5165
example: ${{ matrix.example }}
5266
test_compose: true
5367
inject_commit: true
5468
secrets: inherit
5569

5670
get-image-list:
57-
needs: get-build-matrix
71+
needs: [get-build-matrix]
5872
uses: ./.github/workflows/_get-image-list.yml
5973
with:
6074
examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
6175

6276
publish:
63-
needs: [get-build-matrix, get-image-list, build-and-test]
64-
if: always() && ${{ needs.get-image-list.outputs.matrix != '' }}
77+
needs: [get-build-matrix, get-image-list, build-images]
78+
if: always()
6579
strategy:
6680
matrix:
6781
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}

.github/workflows/pr-chart-e2e.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ jobs:
4646
example=$(echo "$values_file" | cut -d'/' -f1) # CodeGen
4747
if [[ "$valuefile" == *"gaudi"* ]]; then
4848
hardware="gaudi"
49+
elif [[ "$valuefile" == *"rocm"* ]]; then
50+
hardware="rocm"
4951
elif [[ "$valuefile" == *"nv"* ]]; then
5052
continue
5153
else
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
name: Weekly test all examples on multiple HWs
5+
6+
on:
7+
schedule:
8+
- cron: "30 2 * * 6" # UTC time
9+
workflow_dispatch:
10+
11+
env:
12+
EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
13+
NODES: "gaudi,xeon,rocm,arc"
14+
15+
jobs:
16+
get-test-matrix:
17+
runs-on: ubuntu-latest
18+
outputs:
19+
examples: ${{ steps.get-matrix.outputs.examples }}
20+
nodes: ${{ steps.get-matrix.outputs.nodes }}
21+
steps:
22+
- name: Create Matrix
23+
id: get-matrix
24+
run: |
25+
examples=($(echo ${EXAMPLES} | tr ',' ' '))
26+
examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
27+
echo "examples=$examples_json" >> $GITHUB_OUTPUT
28+
nodes=($(echo ${NODES} | tr ',' ' '))
29+
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
30+
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
31+
32+
build-comps-base:
33+
needs: [get-test-matrix]
34+
strategy:
35+
matrix:
36+
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
37+
uses: ./.github/workflows/_build_comps_base_image.yml
38+
with:
39+
node: ${{ matrix.node }}
40+
41+
run-examples:
42+
needs: [get-test-matrix, build-comps-base]
43+
strategy:
44+
matrix:
45+
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
46+
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
47+
fail-fast: false
48+
uses: ./.github/workflows/_example-workflow.yml
49+
with:
50+
node: ${{ matrix.node }}
51+
example: ${{ matrix.example }}
52+
build: true
53+
test_compose: true
54+
test_helmchart: true
55+
secrets: inherit

AgentQnA/README.md

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
1. [Overview](#overview)
66
2. [Deploy with Docker](#deploy-with-docker)
7-
3. [Launch the UI](#launch-the-ui)
7+
3. [How to interact with the agent system with UI](#how-to-interact-with-the-agent-system-with-ui)
88
4. [Validate Services](#validate-services)
99
5. [Register Tools](#how-to-register-other-tools-with-the-ai-agent)
1010

@@ -144,21 +144,19 @@ source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
144144

145145
### 2. Launch the multi-agent system. </br>
146146

147-
Two options are provided for the `llm_engine` of the agents: 1. open-source LLMs on Gaudi, 2. OpenAI models via API calls.
147+
We make it convenient to launch the whole system with docker compose, which includes microservices for LLM, agents, UI, retrieval tool, vector database, dataprep, and telemetry. There are 3 docker compose files, which make it easy for users to pick and choose. Users can choose a different retrieval tool other than the `DocIndexRetriever` example provided in our GenAIExamples repo. Users can choose not to launch the telemetry containers.
148148

149-
#### Gaudi
149+
#### Launch on Gaudi
150150

151-
On Gaudi, `meta-llama/Meta-Llama-3.1-70B-Instruct` will be served using vllm.
152-
By default, both the RAG agent and SQL agent will be launched to support the React Agent.
153-
The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose.yaml` files need to be run with docker compose to start the multi-agent system.
154-
155-
> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.
151+
On Gaudi, `meta-llama/Meta-Llama-3.3-70B-Instruct` will be served using vllm. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
156152

157153
```bash
158154
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
159155
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml up -d
160156
```
161157

158+
> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.
159+
162160
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
163161
Gaudi example with Open Telemetry feature:
164162

@@ -183,11 +181,9 @@ docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/
183181

184182
</details>
185183

186-
#### Xeon
184+
#### Launch on Xeon
187185

188-
On Xeon, only OpenAI models are supported.
189-
By default, both the RAG Agent and SQL Agent will be launched to support the React Agent.
190-
The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose yaml` files need to be run with docker compose to start the multi-agent system.
186+
On Xeon, only OpenAI models are supported. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
191187

192188
```bash
193189
export OPENAI_API_KEY=<your-openai-key>
@@ -206,9 +202,10 @@ bash run_ingest_data.sh
206202

207203
> **Note**: This is a one-time operation.
208204
209-
## Launch the UI
205+
## How to interact with the agent system with UI
210206

211-
Open a web browser to http://localhost:5173 to access the UI.
207+
The UI microservice is launched in the previous step with the other microservices.
208+
To see the UI, open a web browser to `http://${ip_address}:5173` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.
212209

213210
1. `create Admin Account` with a random value
214211
2. add opea agent endpoint `http://$ip_address:9090/v1` which is a openai compatible api

AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ services:
104104
- "8080:8000"
105105
ipc: host
106106
agent-ui:
107-
image: opea/agent-ui
107+
image: opea/agent-ui:latest
108108
container_name: agent-ui
109109
environment:
110110
host_ip: ${host_ip}
@@ -138,4 +138,4 @@ services:
138138
cap_add:
139139
- SYS_NICE
140140
ipc: host
141-
command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 16384
141+
command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 16384

AgentQnA/kubernetes/helm/cpu-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ tgi:
66
vllm:
77
enabled: true
88
LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
9-
extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
9+
extraCmdArgs: ["--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
1010

1111
supervisor:
1212
llm_endpoint_url: http://{{ .Release.Name }}-vllm

AgentQnA/kubernetes/helm/gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ vllm:
1919
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
2020
VLLM_SKIP_WARMUP: true
2121
shmSize: 16Gi
22-
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
22+
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
2323

2424
supervisor:
2525
llm_endpoint_url: http://{{ .Release.Name }}-vllm

0 commit comments

Comments
 (0)