@@ -100,37 +100,37 @@ jobs:
100100 ./llama-tornado --gpu --${{ matrix.backend.name }} \
101101 --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
102102 --prompt "Say hello"
103- - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode
103+ - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode - PTX
104104 run : |
105105 cd ${{ github.workspace }}
106106 export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
107- ./llama-tornado --gpu --${{ matrix.backend.name }} \
107+ ./llama-tornado --gpu --ptx \
108108 --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
109109 --prompt "Say hello" \
110110 --with-prefill-decode \
111111 --no-cuda-graphs
112- - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
112+ - name : PTX- FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
113113 run : |
114114 cd ${{ github.workspace }}
115115 export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
116- ./llama-tornado --gpu --${{ matrix.backend.name }} \
116+ ./llama-tornado --gpu --ptx \
117117 --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
118118 --prompt "Say hello" \
119119 --with-prefill-decode --batch-prefill-size 32 \
120120 --no-cuda-graphs
121- - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
121+ - name : PTX- FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
122122 run : |
123123 cd ${{ github.workspace }}
124124 export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
125- ./llama-tornado --gpu --${{ matrix.backend.name }} \
125+ ./llama-tornado --gpu --ptx \
126126 --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
127127 --prompt "Say hello" \
128128 --with-prefill-decode
129- - name : FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
129+ - name : PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
130130 run : |
131131 cd ${{ github.workspace }}
132132 export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
133- ./llama-tornado --gpu --${{ matrix.backend.name }} \
133+ ./llama-tornado --gpu --ptx \
134134 --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
135135 --prompt "Say hello" \
136136 --with-prefill-decode --batch-prefill-size 32
0 commit comments