Skip to content

Commit 11ecb11

Browse files
[ci][prf/dec] Simplify CI workflow by unifying backend-specific prefill-decode and batch-prefill-decode steps
1 parent 1569343 commit 11ecb11

1 file changed

Lines changed: 10 additions & 12 deletions

File tree

.github/workflows/build-and-run.yml

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -101,26 +101,22 @@ jobs:
101101
./llama-tornado --gpu --${{ matrix.backend.name }} \
102102
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
103103
--prompt "Say hello"
104-
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode - PTX
105-
if: matrix.backend.name == 'ptx'
104+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode
106105
run: |
107106
cd ${{ github.workspace }}
108107
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
109-
./llama-tornado --gpu --ptx \
108+
./llama-tornado --gpu --${{ matrix.backend.name }} \
110109
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
111110
--prompt "Say hello" \
112-
--with-prefill-decode \
113-
--no-cuda-graphs
114-
- name: PTX- FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
115-
if: matrix.backend.name == 'ptx'
111+
--with-prefill-decode
112+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
116113
run: |
117114
cd ${{ github.workspace }}
118115
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
119-
./llama-tornado --gpu --ptx \
116+
./llama-tornado --gpu --${{ matrix.backend.name }} \
120117
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
121118
--prompt "Say hello" \
122-
--with-prefill-decode --batch-prefill-size 32 \
123-
--no-cuda-graphs
119+
--with-prefill-decode --batch-prefill-size 32
124120
- name: PTX- FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
125121
if: matrix.backend.name == 'ptx'
126122
run: |
@@ -129,7 +125,8 @@ jobs:
129125
./llama-tornado --gpu --ptx \
130126
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
131127
--prompt "Say hello" \
132-
--with-prefill-decode
128+
--with-prefill-decode \
129+
--cuda-graphs
133130
- name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
134131
if: matrix.backend.name == 'ptx'
135132
run: |
@@ -138,7 +135,8 @@ jobs:
138135
./llama-tornado --gpu --ptx \
139136
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
140137
--prompt "Say hello" \
141-
--with-prefill-decode --batch-prefill-size 32
138+
--with-prefill-decode --batch-prefill-size 32 \
139+
--cuda-graphs
142140
- name: FP16 - Run Qwen3-4B-f16.gguf
143141
run: |
144142
cd ${{ github.workspace }}

0 commit comments

Comments
 (0)