Skip to content

Commit 7c4d434

Browse files
[prf/dec][ci] Extend CI workflow to include GPU prefill-decode and batched-prefill-decode test cases for Llama 3.2 1B FP16
1 parent 1183221 commit 7c4d434

1 file changed

Lines changed: 35 additions & 1 deletion

File tree

.github/workflows/build-and-run.yml

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,47 @@ jobs:
9393
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
9494
tornado --version
9595
./mvnw clean package -DskipTests
96-
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf
96+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard
9797
run: |
9898
cd ${{ github.workspace }}
9999
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
100100
./llama-tornado --gpu --${{ matrix.backend.name }} \
101101
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
102102
--prompt "Say hello"
103+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode
104+
run: |
105+
cd ${{ github.workspace }}
106+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
107+
./llama-tornado --gpu --${{ matrix.backend.name }} \
108+
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
109+
--prompt "Say hello" \
110+
--with-prefill-decode \
111+
--no-cuda-graphs
112+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
113+
run: |
114+
cd ${{ github.workspace }}
115+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
116+
./llama-tornado --gpu --${{ matrix.backend.name }} \
117+
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
118+
--prompt "Say hello" \
119+
--with-prefill-decode --batch-prefill-size 32 \
120+
--no-cuda-graphs
121+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
122+
run: |
123+
cd ${{ github.workspace }}
124+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
125+
./llama-tornado --gpu --${{ matrix.backend.name }} \
126+
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
127+
--prompt "Say hello" \
128+
--with-prefill-decode
129+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
130+
run: |
131+
cd ${{ github.workspace }}
132+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
133+
./llama-tornado --gpu --${{ matrix.backend.name }} \
134+
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
135+
--prompt "Say hello" \
136+
--with-prefill-decode --batch-prefill-size 32
103137
- name: FP16 - Run Qwen3-4B-f16.gguf
104138
run: |
105139
cd ${{ github.workspace }}

0 commit comments

Comments
 (0)