Skip to content

Commit e211b99

Browse files
committed
fix CI issues
Signed-off-by: Yongbozzz <yongbo.zhu@intel.com>
1 parent 7ac16dd commit e211b99

4 files changed

Lines changed: 30 additions & 130 deletions

File tree

.github/code_spell_ignore.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
ModelIn
22
modelin
33
pressEnter
4-
PromptIn
4+
PromptIn
5+
OT

EdgeCraftRAG/edgecraftrag/api/v1/data.py

Lines changed: 0 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -140,90 +140,6 @@ async def get_files():
140140
return ctx.get_file_mgr().get_files()
141141

142142

143-
# GET chunks by document name
144-
@data_app.get(path="/v1/data/{document_name}/nodes")
145-
async def get_nodes_by_document_name(document_name: str) -> List[dict]:
146-
pl = ctx.get_pipeline_mgr().get_active_pipeline()
147-
if pl is None:
148-
raise HTTPException(
149-
status_code=status.HTTP_404_NOT_FOUND, detail="No active pipeline")
150-
151-
nodelist = ctx.get_node_mgr().get_nodes(pl.node_parser.idx)
152-
153-
if not nodelist:
154-
return []
155-
156-
matching_nodes = []
157-
for node in nodelist:
158-
if hasattr(node, 'metadata') and node.metadata:
159-
node_file_name = node.metadata.get('file_name', '')
160-
node_file_path = node.metadata.get('file_path', '')
161-
162-
if (node_file_name == document_name or
163-
document_name in node_file_name or
164-
document_name in node_file_path):
165-
node_dict = node.model_dump()
166-
matching_nodes.append(node_dict)
167-
168-
return matching_nodes
169-
170-
171-
# GET chunk by node id
172-
@data_app.get(path="/v1/data/nodes/{node_id}")
173-
async def get_node_by_id(node_id: str) -> dict:
174-
pl = ctx.get_pipeline_mgr().get_active_pipeline()
175-
if pl is None:
176-
raise HTTPException(
177-
status_code=status.HTTP_404_NOT_FOUND, detail="No active pipeline")
178-
179-
nodelist = ctx.get_node_mgr().get_nodes(pl.node_parser.idx)
180-
181-
if not nodelist:
182-
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="No nodes found")
183-
for node in nodelist:
184-
# node_id attribute expected per existing node usage
185-
if node.node_id == node_id:
186-
return node.model_dump()
187-
188-
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Node {node_id} not found")
189-
190-
191-
# GET available document names
192-
@data_app.get(path="/v1/data/documents")
193-
async def get_document_names():
194-
pl = ctx.get_pipeline_mgr().get_active_pipeline()
195-
if pl is None:
196-
raise HTTPException(
197-
status_code=status.HTTP_404_NOT_FOUND, detail="No active pipeline")
198-
199-
nodelist = ctx.get_node_mgr().get_nodes(pl.node_parser.idx)
200-
201-
if not nodelist:
202-
return {"documents": []}
203-
204-
documents = {}
205-
for node in nodelist:
206-
if hasattr(node, 'metadata') and node.metadata:
207-
file_name = node.metadata.get('file_name')
208-
file_path = node.metadata.get('file_path')
209-
210-
if file_name and file_name not in documents:
211-
documents[file_name] = {
212-
"file_name": file_name,
213-
"file_path": file_path,
214-
"file_type": node.metadata.get('file_type', 'unknown'),
215-
"chunk_count": 0
216-
}
217-
218-
if file_name:
219-
documents[file_name]["chunk_count"] += 1
220-
221-
return {
222-
"total_documents": len(documents),
223-
"documents": list(documents.values())
224-
}
225-
226-
227143
# GET a file
228144
@data_app.get(path="/v1/data/files/{name}")
229145
async def get_file_docs(name):

EdgeCraftRAG/edgecraftrag/api/v1/prompt.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from edgecraftrag.context import ctx
66
from edgecraftrag.api.v1.pipeline import save_pipeline_configurations
77
from fastapi import FastAPI, File, HTTPException, UploadFile, status
8-
8+
from edgecraftrag.utils import DEFAULT_TEMPLATE
99
prompt_app = FastAPI()
1010

1111

@@ -61,6 +61,9 @@ async def get_tagged_prompt():
6161
except Exception as e:
6262
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
6363

64+
@prompt_app.get(path="/v1/chatqna/prompt/default")
65+
async def get_default_prompt():
66+
return DEFAULT_TEMPLATE
6467

6568
# Reset prompt for LLM ChatQnA
6669
@prompt_app.post(path="/v1/chatqna/prompt/reset")

EdgeCraftRAG/tools/quick_start.sh

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ get_enable_function() {
2323
}
2424

2525
function start_vllm_services() {
26-
COMPOSE_FILE="compose_vllm.yaml"
26+
COMPOSE_FILE="compose.yaml"
2727
echo "stop former service..."
2828
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down
2929

@@ -63,24 +63,11 @@ function start_vllm_services() {
6363
sudo chown -R 1000:1000 ${HF_CACHE}
6464
HF_ENDPOINT=https://hf-mirror.com
6565
# vllm ENV
66-
export NGINX_PORT=8086
67-
export vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}"
68-
read -p "DP number(how many containers to run vLLM) [1] , press Enter to confirm, or type a new value:" DP_NUM; DP_NUM=${DP_NUM:-1}
69-
read -p "Tensor parallel size(your tp size [1]), press Enter to confirm, or type a new value:" TENSOR_PARALLEL_SIZE; TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-1}
70-
71-
for (( x=0; x<DP_NUM; x++ )); do
72-
start_gpu=$(( x * TENSOR_PARALLEL_SIZE ))
73-
default_gpu_list=$(seq -s, $start_gpu $(( start_gpu + TENSOR_PARALLEL_SIZE - 1 )))
74-
75-
read -p "selected XPU(your selected_XPU_${x} [${default_gpu_list}]) , press Enter to confirm, or type a new value:" input_gpu_list
76-
selected_gpu_list=${input_gpu_list:-$default_gpu_list}
66+
export VLLM_SERVICE_PORT_A770=8086
7767

78-
export SELECTED_XPU_${x}="$selected_gpu_list"
79-
export VLLM_SERVICE_PORT_${x}="8$((x+1))00"
80-
done
68+
read -p "Tensor parallel size(your tp size [1]), press Enter to confirm, or type a new value:" TENSOR_PARALLEL_SIZE; TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-1}
8169
CCL_DG2_USM=$(get_user_input "Set USM (Core=1, Xeon=0, default=0)" 0)
8270
export HOST_IP=${HOST_IP}
83-
export VLLM_SERVICE_PORT_0=8100
8471
# export ENV
8572
export MODEL_PATH=${MODEL_PATH}
8673
export DOC_PATH=${DOC_PATH}
@@ -90,18 +77,14 @@ function start_vllm_services() {
9077
export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}"
9178
export MILVUS_ENABLED=${MILVUS_ENABLED}
9279
export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND}
93-
export SELECTED_XPU_0=${SELECTED_XPU_0}
9480
export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE}
9581
export CCL_DG2_USM=${CCL_DG2_USM}
9682
export VIDEOGROUPID=$(getent group video | cut -d: -f3)
9783
export RENDERGROUPID=$(getent group render | cut -d: -f3)
9884

99-
bash $WORKPATH/nginx/nginx-conf-generator.sh $DP_NUM $WORKPATH/nginx/nginx.conf
100-
export NGINX_CONFIG_PATH="${WORKPATH}/nginx/nginx.conf"
10185

10286
# Start Docker Containers
103-
bash $WORKPATH/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh $DP_NUM $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE
104-
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
87+
docker compose --profile a770 -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
10588
echo "ipex-llm-serving-xpu is booting, please wait..."
10689
n=0
10790
until [[ "$n" -ge 100 ]]; do
@@ -176,6 +159,7 @@ function start_services() {
176159
export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND}
177160
export VIDEOGROUPID=$(getent group video | cut -d: -f3)
178161
export RENDERGROUPID=$(getent group render | cut -d: -f3)
162+
export MAX_MODEL_LEN=5000
179163

180164
# Start Docker Containers
181165
COMPOSE_FILE="compose.yaml"
@@ -199,10 +183,11 @@ function check_baai_folder() {
199183

200184
function quick_start_vllm_services() {
201185
WORKPATH=$(dirname "$PWD")
202-
COMPOSE_FILE="compose_vllm.yaml"
186+
COMPOSE_FILE="compose.yaml"
203187
EC_RAG_SERVICE_PORT=16010
204188
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down
205189

190+
ip_address=$(hostname -I | awk '{print $1}')
206191
export HOST_IP=${HOST_IP:-"${ip_address}"}
207192
export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"}
208193
export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"}
@@ -211,21 +196,17 @@ function quick_start_vllm_services() {
211196
export MILVUS_ENABLED=${MILVUS_ENABLED:-1}
212197
export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-2}
213198
export HF_ENDPOINT=${HF_ENDPOINT:-https://hf-mirror.com}
214-
export NGINX_PORT=${NGINX_PORT:-8086}
215-
export NGINX_PORT_0=${NGINX_PORT_0:-8100}
216-
export VLLM_SERVICE_PORT_0=${VLLM_SERVICE_PORT_0:-8100}
217199
export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-1}
218-
export SELECTED_XPU_0=${SELECTED_XPU_0:-0}
219200
export MAX_NUM_SEQS=${MAX_NUM_SEQS:-64}
220-
export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-4000}
221-
export MAX_MODEL_LEN=${MAX_MODEL_LEN:-3000}
201+
export MAX_MODEL_LEN=${MAX_MODEL_LEN:-10240}
202+
export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-10240}
222203
export LOAD_IN_LOW_BIT=${LOAD_IN_LOW_BIT:-fp8}
223204
export CCL_DG2_USM=${CCL_DG2_USM:-0}
224-
export vLLM_ENDPOINT=${vLLM_ENDPOINT:-"http://${HOST_IP}:${NGINX_PORT}"}
225205
export LLM_MODEL=${LLM_MODEL:-Qwen/Qwen3-8B}
226206
export LLM_MODEL_PATH=${LLM_MODEL_PATH:-"${MODEL_PATH}/Qwen/Qwen3-8B"}
227207
export VIDEOGROUPID=$(getent group video | cut -d: -f3)
228208
export RENDERGROUPID=$(getent group render | cut -d: -f3)
209+
export VLLM_SERVICE_PORT_A770=8086
229210

230211
check_baai_folder
231212
export HF_CACHE=${HF_CACHE:-"${HOME}/.cache"}
@@ -237,11 +218,8 @@ function quick_start_vllm_services() {
237218
sudo chown -R 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH}
238219
sudo chown -R 1000:1000 ${HF_CACHE}
239220
cd $WORKPATH/docker_compose/intel/gpu/arc
240-
bash $WORKPATH/nginx/nginx-conf-generator.sh $DP_NUM $WORKPATH/nginx/nginx.conf
241-
export NGINX_CONFIG_PATH=${NGINX_CONFIG_PATH:-"$WORKPATH/nginx/nginx.conf"}
242221

243-
bash $WORKPATH/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh $DP_NUM $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE
244-
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
222+
docker compose --profile a770 -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
245223
echo "ipex-llm-serving-xpu is booting, please wait..."
246224
n=0
247225
until [[ "$n" -ge 100 ]]; do
@@ -272,6 +250,7 @@ function quick_start_ov_services() {
272250
export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"}
273251
export VIDEOGROUPID=$(getent group video | cut -d: -f3)
274252
export RENDERGROUPID=$(getent group render | cut -d: -f3)
253+
export MAX_MODEL_LEN=5000
275254

276255
check_baai_folder
277256
export HF_CACHE=${HF_CACHE:-"${HOME}/.cache"}
@@ -292,7 +271,7 @@ function quick_start_ov_services() {
292271

293272

294273
function start_vLLM_B60_services() {
295-
COMPOSE_FILE="compose_vllm_b60.yaml"
274+
COMPOSE_FILE="compose.yaml"
296275
echo "stop former service..."
297276
export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"}
298277
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down
@@ -339,7 +318,7 @@ function start_vLLM_B60_services() {
339318
NO_ENABLE_PREFIX_CACHING=$(get_user_input "NO_ENABLE_PREFIX_CACHING (disable prefix caching, 1=disable/0=enable)" "1")
340319
MAX_NUM_BATCHED_TOKENS=$(get_user_input "MAX_NUM_BATCHED_TOKENS (max number of batched tokens)" "8192")
341320
DISABLE_LOG_REQUESTS=$(get_user_input "DISABLE_LOG_REQUESTS (disable request logs, 1=disable/0=enable)" "1")
342-
MAX_MODEL_LEN=$(get_user_input "MAX_MODEL_LEN (max model context length, e.g. 49152/10240)" "49152")
321+
MAX_MODEL_LEN=$(get_user_input "MAX_MODEL_LEN (max model context length, e.g. 40000/10240)" "40000")
343322
BLOCK_SIZE=$(get_user_input "BLOCK_SIZE (vLLM block size)" "64")
344323
QUANTIZATION=$(get_user_input "QUANTIZATION (model quantization method, e.g. fp8/int4)" "fp8")
345324
# export ENV
@@ -371,7 +350,7 @@ function start_vLLM_B60_services() {
371350
export QUANTIZATION=${QUANTIZATION}
372351

373352
# Start Docker Containers
374-
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
353+
docker compose --profile b60 -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
375354
echo "ipex-llm-serving-xpu is booting, please wait..."
376355
n=0
377356
until [[ "$n" -ge 100 ]]; do
@@ -389,10 +368,11 @@ function start_vLLM_B60_services() {
389368

390369
function quick_start_vllm_B60_services() {
391370
WORKPATH=$(dirname "$PWD")
392-
COMPOSE_FILE="compose_vllm_b60.yaml"
371+
COMPOSE_FILE="compose.yaml"
393372
EC_RAG_SERVICE_PORT=16010
394373
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down
395374

375+
ip_address=$(hostname -I | awk '{print $1}')
396376
export HOST_IP=${HOST_IP:-"${ip_address}"}
397377
export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"}
398378
export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"}
@@ -403,26 +383,26 @@ function quick_start_vllm_B60_services() {
403383
export VIDEOGROUPID=$(getent group video | cut -d: -f3)
404384
export RENDERGROUPID=$(getent group render | cut -d: -f3)
405385
# export vllm ENV
406-
export DP=${DP:-4}
386+
export DP=${DP:-1}
407387
export TP=${TP:-1}
408388
export DTYPE=${DTYPE:-float16}
409-
export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-0,1,2,3}
389+
export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-0}
410390
export ENFORCE_EAGER=${ENFORCE_EAGER:-1}
411391
export TRUST_REMOTE_CODE=${TRUST_REMOTE_CODE:-1}
412392
export DISABLE_SLIDING_WINDOW=${DISABLE_SLIDING_WINDOW:-1}
413393
export GPU_MEMORY_UTIL=${GPU_MEMORY_UTIL:-0.8}
414394
export NO_ENABLE_PREFIX_CACHING=${NO_ENABLE_PREFIX_CACHING:-1}
415395
export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-8192}
416396
export DISABLE_LOG_REQUESTS=${disable_LOG_REQUESTS:-1}
417-
export MAX_MODEL_LEN=${MAX_MODEL_LEN:-49152}
397+
export MAX_MODEL_LEN=${MAX_MODEL_LEN:-40000}
418398
export BLOCK_SIZE=${BLOCK_SIZE:-64}
419399
export QUANTIZATION=${QUANTIZATION:-fp8}
420400

421401

422402
check_baai_folder
423403
export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}"
424404
sudo chown -R 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH}
425-
docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
405+
docker compose --profile b60 -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d
426406
echo "ipex-llm-serving-xpu is booting, please wait..."
427407
n=0
428408
until [[ "$n" -ge 100 ]]; do
@@ -450,10 +430,10 @@ function main {
450430
start_services
451431
fi
452432
else
453-
export SERVICE_TYPE=${SERVICE_TYPE:-"vLLM_A770"}
454-
if [[ "$SERVICE_TYPE" == "vLLM_A770" || "$SERVICE_TYPE" == "vLLM" ]]; then
433+
export COMPOSE_PROFILES=${COMPOSE_PROFILES:-""}
434+
if [[ "$COMPOSE_PROFILES" == "vLLM_A770" || "$COMPOSE_PROFILES" == "vLLM" || "$COMPOSE_PROFILES" == "vllm_on_a770" ]]; then
455435
quick_start_vllm_services
456-
elif [[ "$SERVICE_TYPE" == "vLLM_B60" || "$SERVICE_TYPE" == "vLLM_b60" ]]; then
436+
elif [[ "$COMPOSE_PROFILES" == "vLLM_B60" || "$COMPOSE_PROFILES" == "vLLM_b60" || "$COMPOSE_PROFILES" == "vllm_on_b60" ]]; then
457437
quick_start_vllm_B60_services
458438
else
459439
quick_start_ov_services

0 commit comments

Comments
 (0)