Skip to content

Commit f7b239f

Browse files
committed
update the script
1 parent 0fc8d0c commit f7b239f

2 files changed

Lines changed: 126 additions & 3 deletions

File tree

examples/sycl/start_svr.sh

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/bin/bash
2+
3+
# MIT license
4+
# Copyright (C) 2024 Intel Corporation
5+
# SPDX-License-Identifier: MIT
6+
7+
Help() {
8+
cat << EOF
9+
Usage: $(basename "$0") [OPTIONS]
10+
11+
This script processes files with specified options.
12+
13+
Options:
14+
-h, --help Display this help message and exit.
15+
-c, --context <value> Set context length. Bigger need more memory.
16+
-p, --promote <value> Prompt to start generation with.
17+
-m, --model <value> Full model file path.
18+
-mg,--main-gpu <value> Set main GPU ID (0 - n) for single GPU mode.
19+
-sm,--split-mode <value> How to split the model across multiple GPUs, one of:
20+
- none: use one GPU only
21+
- layer (default): split layers and KV across GPUs
22+
- row: split rows across GPUs
23+
-ngl,--n-gpu-layers <value> Max. number of layers to store in VRAM (default: -1)
24+
-lv,--log-verbosity <value> Set the verbosity threshold. Messages with a higher verbosity will be
25+
ignored. Values:
26+
- 0: generic output
27+
- 1: error
28+
- 2: warning
29+
- 3: info
30+
- 4: debug
31+
32+
33+
EOF
34+
}
35+
36+
BIN_FILE=./build/bin/llama-server
37+
SEED=0
38+
GPUS_SETTING=""
39+
40+
MODEL_FILE=../models/Qwen3.5-4B-Q4_0.gguf
41+
NGL=99
42+
CONTEXT=4096
43+
GGML_SYCL_DEVICE=-1
44+
SPLIT_MODE=layer
45+
LOG_VERBOSE=3
46+
while [[ $# -gt 0 ]]; do
47+
case "$1" in
48+
-c|--context)
49+
CONTEXT=$2
50+
# Shift twice to consume both the option flag and its value
51+
shift
52+
shift
53+
;;
54+
-m|--model)
55+
MODEL_FILE="$2"
56+
# Shift twice to consume both the option flag and its value
57+
shift
58+
shift
59+
;;
60+
-mg|--main-gpu)
61+
GGML_SYCL_DEVICE=$2
62+
SPLIT_MODE=none
63+
# Shift twice to consume both the option flag and its value
64+
shift
65+
shift
66+
;;
67+
-sm|--split-mode)
68+
SPLIT_MODE=$2
69+
# Shift twice to consume both the option flag and its value
70+
shift
71+
shift
72+
;;
73+
-ngl|--n-gpu-layers)
74+
NGL=$2
75+
# Shift twice to consume both the option flag and its value
76+
shift
77+
shift
78+
;;
79+
-lv|--log-verbosity)
80+
LOG_VERBOSE=$2
81+
# Shift twice to consume both the option flag and its value
82+
shift
83+
shift
84+
;;
85+
-h|--help)
86+
Help
87+
exit 0
88+
;;
89+
*)
90+
# Handle unknown options or stop processing options
91+
echo "Invalid option: $1"
92+
# Optional: exit script or shift to treat remaining as positional args
93+
exit 1
94+
;;
95+
esac
96+
done
97+
98+
99+
100+
source /opt/intel/oneapi/setvars.sh
101+
102+
#export GGML_SYCL_DEBUG=1
103+
104+
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
105+
106+
#support malloc device memory more than 4GB.
107+
export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1
108+
echo "UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=${UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS}"
109+
110+
if [ $GGML_SYCL_DEVICE -ne -1 ]; then
111+
echo "Use $GGML_SYCL_DEVICE as main GPU"
112+
#use signle GPU only
113+
GPUS_SETTING="-mg $GGML_SYCL_DEVICE -sm ${SPLIT_MODE}"
114+
export ONEAPI_DEVICE_SELECTOR="level_zero:${$GGML_SYCL_DEVICE}"
115+
echo "ONEAPI_DEVICE_SELECTOR=${ONEAPI_DEVICE_SELECTOR}"
116+
else
117+
echo "Use all Intel GPUs, including iGPU & dGPU"
118+
fi
119+
120+
echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap "
121+
ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap --host 0.0.0.0 --port 8000
122+
123+

examples/sycl/test.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ SEED=0
3838
GPUS_SETTING=""
3939

4040
INPUT_PROMPT="Building a website can be done in 10 simple steps:\nStep 1:"
41-
MODEL_FILE=models/llama-2-7b.Q4_0.gguf
41+
MODEL_FILE=../models/llama-2-7b.Q4_0.gguf
4242
NGL=99
4343
CONTEXT=4096
4444
GGML_SYCL_DEVICE=-1
@@ -125,6 +125,6 @@ else
125125
echo "Use all Intel GPUs, including iGPU & dGPU"
126126
fi
127127

128-
echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap "
129-
ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap
128+
echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap "
129+
ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap
130130

0 commit comments

Comments
 (0)