Skip to content

Commit aee357f

Browse files
authored
add EC-RAG feature (#2280)
Signed-off-by: Yongbozzz <yongbo.zhu@intel.com>
1 parent 3c7f1e4 commit aee357f

106 files changed

Lines changed: 5084 additions & 1208 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

EdgeCraftRAG/Dockerfile.server

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,17 @@
11
FROM python:3.11-slim
22

33
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
4-
5-
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
6-
libjemalloc-dev \
7-
libmagic1 \
8-
libglib2.0-0 \
9-
poppler-utils \
10-
tesseract-ocr
11-
12-
RUN apt-get update && apt-get install -y gnupg wget git
13-
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
14-
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
15-
RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
16-
tee /etc/apt/sources.list.d/intel-gpu-jammy.list
17-
RUN apt-get update && apt-get install -y \
18-
intel-opencl-icd intel-level-zero-gpu \
19-
intel-level-zero-gpu-raytracing \
20-
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
21-
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
22-
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
23-
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
4+
RUN apt-get update && apt-get install -y gnupg2 wget git
5+
RUN apt-get remove -y libze-intel-gpu1 libigc1 libigdfcl1 libze-dev || true; \
6+
apt-get update; \
7+
apt-get install -y curl
8+
RUN curl -sL 'https://keyserver.ubuntu.com/pks/lookup?fingerprint=on&op=get&search=0x0C0E6AF955CE463C03FC51574D098D70AFBE5E1F' | tee /etc/apt/trusted.gpg.d/driver.asc
9+
RUN echo -e "Types: deb\nURIs: https://ppa.launchpadcontent.net/kobuk-team/intel-graphics/ubuntu/\nSuites: plucky\nComponents: main\nSigned-By: /etc/apt/trusted.gpg.d/driver.asc" > /etc/apt/sources.list.d/driver.sources
10+
RUN apt-get update && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-dev intel-ocloc libze-intel-gpu-raytracing
2411

2512
RUN useradd -m -s /bin/bash user && \
2613
mkdir -p /home/user && \
27-
chown -R user /home/user/
14+
chown -R user /home/user/
2815

2916
RUN mkdir /templates && \
3017
chown -R user /templates
@@ -33,17 +20,19 @@ RUN chown -R user /templates/default_prompt.txt
3320

3421
COPY ./edgecraftrag /home/user/edgecraftrag
3522

36-
RUN mkdir -p /home/user/ui_cache
23+
RUN mkdir -p /home/user/ui_cache
3724
ENV UI_UPLOAD_PATH=/home/user/ui_cache
3825

3926
USER user
4027

4128
WORKDIR /home/user/edgecraftrag
42-
RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \
43-
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
29+
RUN pip3 install --no-cache-dir --upgrade setuptools==70.0.0 --break-system-packages && \
30+
pip3 install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt --break-system-packages
31+
32+
RUN pip3 install --no-cache-dir docarray==0.40.0 --break-system-packages
4433

4534
WORKDIR /home/user/
4635
RUN git clone https://github.com/openvinotoolkit/openvino.genai.git genai
4736
ENV PYTHONPATH="$PYTHONPATH:/home/user/genai/tools/llm_bench"
4837

49-
ENTRYPOINT ["python", "-m", "edgecraftrag.server"]
38+
ENTRYPOINT ["python3", "-m", "edgecraftrag.server"]

EdgeCraftRAG/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ Retrieval-Augmented Generation system for edge solutions. It is designed to
55
curate the RAG pipeline to meet hardware requirements at edge with guaranteed
66
quality and performance.
77

8+
## What's New
9+
10+
1. Support Intel Arc B60 for model inference
11+
2. support KBadmin for knowledge base management
12+
3. support Experience Injection module in UI
13+
814
## Table of contents
915

1016
1. [Architecture](#architecture)
43.3 KB
Loading
21.4 KB
Loading
25.4 KB
Loading

EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,27 @@ This document outlines the deployment process for Edge Craft Retrieval-Augmented
1010

1111
This section describes how to quickly deploy and test the EdgeCraftRAG service manually on Intel® Arc® platform. The basic steps are:
1212

13-
1. [Prerequisites](#prerequisites)
14-
2. [Access the Code](#access-the-code)
15-
3. [Prepare models](#prepare-models)
16-
4. [Prepare env variables and configurations](#prepare-env-variables-and-configurations)
17-
5. [Configure the Deployment Environment](#configure-the-deployment-environment)
18-
6. [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose)
19-
7. [Access UI](#access-ui)
20-
8. [Cleanup the Deployment](#cleanup-the-deployment)
13+
1. [Prerequisites](#1-prerequisites)
14+
2. [Access the Code](#2-access-the-code)
15+
3. [Prepare models](#3-prepare-models)
16+
4. [Prepare env variables and configurations](#4-prepare-env-variables-and-configurations)
17+
5. [Deploy the Service on Arc A770 Using Docker Compose](#5-deploy-the-service-on-intel-gpu-using-docker-compose)
18+
6. [Access UI](#6-access-ui)
19+
7. [Cleanup the Deployment](#7-cleanup-the-deployment)
2120

22-
### Prerequisites
21+
### 1. Prerequisites
2322

2423
EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU. Prerequisites are shown as below:
2524
Hardware: Intel Arc A770
2625
OS: Ubuntu Server 22.04.1 or newer (at least 6.2 LTS kernel)
2726
Driver & libraries: please to [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/installation-rolling.html#installing-gpu-drivers) for detailed driver & libraries setup
2827

28+
Hardware: Intel Arc B60
29+
please to [Install Native Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-native-environment) for detailed setup
30+
2931
Below steps are based on **vLLM** as inference engine, if you want to choose **OpenVINO**, please refer to [OpenVINO Local Inference](../../../../docs/Advanced_Setup.md#openvino-local-inference)
3032

31-
### Access the Code
33+
### 2. Access the Code
3234

3335
Clone the GenAIExample repository and access the EdgeCraftRAG Intel® Arc® platform Docker Compose files and supporting scripts:
3436

@@ -43,7 +45,7 @@ Checkout a released version, such as v1.3:
4345
git checkout v1.3
4446
```
4547

46-
### Prepare models
48+
### 3. Prepare models
4749

4850
```bash
4951
# Prepare models for embedding, reranking:
@@ -62,7 +64,7 @@ modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}"
6264
# huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}"
6365
```
6466

65-
### Prepare env variables and configurations
67+
### 4. Prepare env variables and configurations
6668

6769
Below steps are for single Intel Arc GPU inference, if you want to setup multi Intel Arc GPUs inference, please refer to [Multi-ARC Setup](../../../../docs/Advanced_Setup.md#multi-arc-setup)
6870

@@ -77,32 +79,23 @@ export HOST_IP=$ip_address # Your host ip
7779
export VIDEOGROUPID=$(getent group video | cut -d: -f3)
7880
export RENDERGROUPID=$(getent group render | cut -d: -f3)
7981

80-
# If you have a proxy configured, uncomment below line
81-
# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server
82-
# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server
82+
# If you have a proxy configured, execute below line
83+
export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server
84+
export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server
8385
# If you have a HF mirror configured, it will be imported to the container
8486
# export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint"
8587

8688
# Make sure all 3 folders have 1000:1000 permission, otherwise
87-
# chown 1000:1000 ${MODEL_PATH} ${PWD} # the default value of DOC_PATH and TMPFILE_PATH is PWD ,so here we give permission to ${PWD}
89+
chown 1000:1000 ${MODEL_PATH} ${PWD} # the default value of DOC_PATH and TMPFILE_PATH is PWD ,so here we give permission to ${PWD}
8890
# In addition, also make sure the .cache folder has 1000:1000 permission, otherwise
89-
# chown 1000:1000 -R $HOME/.cache
91+
chown 1000:1000 -R $HOME/.cache
9092
```
9193

9294
For more advanced env variables and configurations, please refer to [Prepare env variables for vLLM deployment](../../../../docs/Advanced_Setup.md#prepare-env-variables-for-vllm-deployment)
9395

94-
#### Generate nginx config file
95-
96-
```bash
97-
export VLLM_SERVICE_PORT_0=8100 # You can set your own port for vllm service
98-
# Generate your nginx config file
99-
# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath
100-
bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf
101-
# set NGINX_CONFIG_PATH
102-
export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf"
103-
```
96+
### 5. Deploy the Service on Intel GPU Using Docker Compose
10497

105-
### Deploy the Service Using Docker Compose
98+
set Milvus DB and chat history round for inference:
10699

107100
```bash
108101
# EC-RAG support Milvus as persistent database, by default milvus is disabled, you can choose to set MILVUS_ENABLED=1 to enable it
@@ -112,12 +105,45 @@ export MILVUS_ENABLED=0
112105

113106
# EC-RAG support chat history round setting, by default chat history is disabled, you can set CHAT_HISTORY_ROUND to control it
114107
# export CHAT_HISTORY_ROUND= # change to your preference
108+
```
109+
110+
#### option a. Deploy the Service on Arc A770 Using Docker Compose
111+
112+
```bash
113+
export VLLM_SERVICE_PORT_0=8100 # You can set your own port for vllm service
114+
# Generate your nginx config file
115+
# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath
116+
bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf
117+
# set NGINX_CONFIG_PATH
118+
export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf"
115119

116120
# Launch EC-RAG service with compose
117121
docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml up -d
118122
```
119123

120-
### Access UI
124+
#### option b. Deploy the Service on Arc B60 Using Docker Compose
125+
126+
```bash
127+
# Besides MILVUS_ENABLED and CHAT_HISTORY_ROUND, below environments are exposed for vLLM config, you can change them to your preference:
128+
# export VLLM_SERVICE_PORT_B60=8086
129+
# export DTYPE=float16
130+
# export TP=1 # for multi GPU, you can change TP value
131+
# export DP=1
132+
# export ZE_AFFINITY_MASK=0 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2...
133+
# export ENFORCE_EAGER=1
134+
# export TRUST_REMOTE_CODE=1
135+
# export DISABLE_SLIDING_WINDOW=1
136+
# export GPU_MEMORY_UTIL=0.8
137+
# export NO_ENABLE_PREFIX_CACHING=1
138+
# export MAX_NUM_BATCHED_TOKENS=8192
139+
# export DISABLE_LOG_REQUESTS=1
140+
# export MAX_MODEL_LEN=49152
141+
# export BLOCK_SIZE=64
142+
# export QUANTIZATION=fp8
143+
docker compose -f docker_compose/intel/gpu/arc/compose_vllm_b60.yaml up -d
144+
```
145+
146+
### 6. Access UI
121147

122148
Open your browser, access http://${HOST_IP}:8082
123149

@@ -126,12 +152,13 @@ Open your browser, access http://${HOST_IP}:8082
126152
Below is the UI front page, for detailed operations on UI and EC-RAG settings, please refer to [Explore_Edge_Craft_RAG](../../../../docs/Explore_Edge_Craft_RAG.md)
127153
![front_page](../../../../assets/img/front_page.png)
128154

129-
### Cleanup the Deployment
155+
### 7. Cleanup the Deployment
130156

131157
To stop the containers associated with the deployment, execute the following command:
132158

133159
```
134160
docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml down
161+
# or docker compose -f docker_compose/intel/gpu/arc/compose_vllm_b60.yaml down
135162
```
136163

137164
All the EdgeCraftRAG containers will be stopped and then removed on completion of the "down" command.

EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ services:
7171
no_proxy: ${no_proxy}
7272
http_proxy: ${http_proxy}
7373
https_proxy: ${https_proxy}
74-
HF_ENDPOINT: ${HF_ENDPOINT}
7574
vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}}
7675
ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
7776
MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000}
@@ -80,7 +79,6 @@ services:
8079
- ${MODEL_PATH:-${PWD}}:/home/user/models
8180
- ${DOC_PATH:-${PWD}}:/home/user/docs
8281
- ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
83-
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
8482
- ${PROMPT_PATH:-${PWD}}:/templates/custom
8583
restart: always
8684
ports:

EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,15 @@ services:
7272
no_proxy: ${no_proxy}
7373
http_proxy: ${http_proxy}
7474
https_proxy: ${https_proxy}
75-
HF_ENDPOINT: ${HF_ENDPOINT}
7675
vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}}
7776
LLM_MODEL: ${LLM_MODEL}
7877
ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
79-
MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000}
78+
MAX_MODEL_LEN: ${MAX_MODEL_LEN:-10240}
8079
CHAT_HISTORY_ROUND: ${CHAT_HISTORY_ROUND:-0}
8180
volumes:
8281
- ${MODEL_PATH:-${PWD}}:/home/user/models
8382
- ${DOC_PATH:-${PWD}}:/home/user/docs
8483
- ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
85-
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
8684
- ${PROMPT_PATH:-${PWD}}:/templates/custom
8785
restart: always
8886
ports:
@@ -157,7 +155,6 @@ services:
157155
no_proxy: ${no_proxy}
158156
http_proxy: ${http_proxy}
159157
https_proxy: ${https_proxy}
160-
HF_ENDPOINT: ${HF_ENDPOINT}
161158
MODEL_PATH: "/llm/models"
162159
SERVED_MODEL_NAME: ${LLM_MODEL}
163160
TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1}

0 commit comments

Comments
 (0)