@@ -93,13 +93,47 @@ jobs:
9393 export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
9494 tornado --version
9595 ./mvnw clean package -DskipTests
96- - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf
96+ - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard
9797 run : |
9898 cd ${{ github.workspace }}
9999 export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
100100 ./llama-tornado --gpu --${{ matrix.backend.name }} \
101101 --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
102102 --prompt "Say hello"
103+ - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode
104+ run : |
105+ cd ${{ github.workspace }}
106+ export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
107+ ./llama-tornado --gpu --${{ matrix.backend.name }} \
108+ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
109+ --prompt "Say hello" \
110+ --with-prefill-decode \
111+ --no-cuda-graphs
112+ - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
113+ run : |
114+ cd ${{ github.workspace }}
115+ export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
116+ ./llama-tornado --gpu --${{ matrix.backend.name }} \
117+ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
118+ --prompt "Say hello" \
119+ --with-prefill-decode --batch-prefill-size 32 \
120+ --no-cuda-graphs
121+ - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
122+ run : |
123+ cd ${{ github.workspace }}
124+ export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
125+ ./llama-tornado --gpu --${{ matrix.backend.name }} \
126+ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
127+ --prompt "Say hello" \
128+ --with-prefill-decode
129+ - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
130+ run : |
131+ cd ${{ github.workspace }}
132+ export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
133+ ./llama-tornado --gpu --${{ matrix.backend.name }} \
134+ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
135+ --prompt "Say hello" \
136+ --with-prefill-decode --batch-prefill-size 32
103137 - name : FP16 - Run Qwen3-4B-f16.gguf
104138 run : |
105139 cd ${{ github.workspace }}
0 commit comments