File tree Expand file tree Collapse file tree
src/main/java/org/beehive/gpullama3/tornadovm Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -93,7 +93,9 @@ class LlamaRunner:
9393 if args .batch_prefill_size is not None :
9494 cmd .append (f"-Dllama.prefillBatchSize={ args .batch_prefill_size } " )
9595
96- if args .no_cuda_graphs :
96+ if args .cuda_graphs :
97+ cmd .append ("-Dllama.cudaGraphs=true" )
98+ elif args .no_cuda_graphs :
9799 cmd .append ("-Dllama.cudaGraphs=false" )
98100
99101 # Debug options
@@ -505,11 +507,17 @@ def create_parser() -> argparse.ArgumentParser:
505507 "N>1: batched prefill processing N tokens per chunk (llama.prefillBatchSize=N)."
506508 ),
507509 )
510+ prefill_group .add_argument (
511+ "--cuda-graphs" ,
512+ dest = "cuda_graphs" ,
513+ action = "store_true" ,
514+ help = "Enable CUDA graph capture/replay (llama.cudaGraphs=true); PTX backend only" ,
515+ )
508516 prefill_group .add_argument (
509517 "--no-cuda-graphs" ,
510518 dest = "no_cuda_graphs" ,
511519 action = "store_true" ,
512- help = "Disable CUDA graph capture/replay (llama.cudaGraphs=false); useful for debugging " ,
520+ help = "Disable CUDA graph capture/replay (llama.cudaGraphs=false); no-op, disabled by default " ,
513521 )
514522
515523 # Advanced options
Original file line number Diff line number Diff line change @@ -26,9 +26,9 @@ public interface TornadoVMMasterPlan {
2626 boolean ENABLE_TORNADOVM_INIT_TIME = Boolean .parseBoolean (
2727 System .getProperty ("llama.EnableTimingForTornadoVMInit" , "False" ));
2828
29- /** When {@code false }, {@code withCUDAGraph()} is never called — useful for debugging . */
29+ /** When {@code true }, {@code withCUDAGraph()} is called — PTX/CUDA backend only . */
3030 boolean CUDA_GRAPHS = Boolean .parseBoolean (
31- System .getProperty ("llama.cudaGraphs" , "true " ));
31+ System .getProperty ("llama.cudaGraphs" , "false " ));
3232
3333 boolean WITH_PREFILL_DECODE = Boolean .getBoolean ("llama.withPrefillDecode" );
3434
You can’t perform that action at this time.
0 commit comments