[prf/dec][ci] Extend CI workflow to include GPU prefill-decode and batched-prefill-decode test cases for Llama 3.2 1B FP16

orionpapadakis · orionpapadakis · commit 7c4d43442218 · 2026-04-19T12:26:33.000+03:00
diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml
@@ -93,13 +93,47 @@ jobs:
           export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
           tornado --version
           ./mvnw clean package -DskipTests
-      - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf
+      - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard
         run: |
           cd ${{ github.workspace }}
           export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
           ./llama-tornado --gpu --${{ matrix.backend.name }} \
             --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
             --prompt "Say hello"
+      - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
+            --prompt "Say hello" \
+            --with-prefill-decode \
+            --no-cuda-graphs
+      - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
+            --prompt "Say hello" \
+            --with-prefill-decode --batch-prefill-size 32 \
+            --no-cuda-graphs
+      - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
+            --prompt "Say hello" \
+            --with-prefill-decode
+      - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
+            --prompt "Say hello" \
+            --with-prefill-decode --batch-prefill-size 32
       - name: FP16 - Run Qwen3-4B-f16.gguf
         run: |
           cd ${{ github.workspace }}