File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -22,9 +22,9 @@ if [ $# -gt 0 ]; then
2222 GGML_SYCL_DEVICE=$1
2323 echo " use $GGML_SYCL_DEVICE as main GPU"
2424 # use signle GPU only
25- ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
25+ ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
2626
2727else
2828 # use multiple GPUs with same max compute units
29- ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
29+ ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
3030fi
Original file line number Diff line number Diff line change @@ -24,8 +24,8 @@ export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1
2424if [ $# -gt 0 ]; then
2525 GGML_SYCL_DEVICE=$1
2626 echo " Using $GGML_SYCL_DEVICE as the main GPU"
27- ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
27+ ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
2828else
2929 # use multiple GPUs with same max compute units
30- ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
30+ ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -p " ${INPUT_PROMPT} " -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
3131fi
Original file line number Diff line number Diff line change @@ -8,4 +8,4 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
88:: support malloc device memory more than 4GB.
99set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS = 1
1010
11- .\build\bin\llama-cli .exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 99 -s 0
11+ .\build\bin\llama-completion .exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 99 -s 0
Original file line number Diff line number Diff line change @@ -8,4 +8,4 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
88:: support malloc device memory more than 4GB.
99set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS = 1
1010
11- .\build\bin\llama-cli .exe -m models\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -p %INPUT2% -n 400 -s 0 -e -ngl 99
11+ .\build\bin\llama-completion .exe -m models\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -p %INPUT2% -n 400 -s 0 -e -ngl 99
You can’t perform that action at this time.
0 commit comments