@@ -499,17 +499,13 @@ def create_parser() -> argparse.ArgumentParser:
499499 help = "Execute the command after showing it (use with --show-command)" ,
500500 )
501501
502- # Prefill/ Decode optimization
503- prefill_group = parser .add_argument_group ("Prefill/ Decode Optimization " )
502+ # Prefill- Decode optimizations
503+ prefill_group = parser .add_argument_group ("Prefill- Decode Optimizations " )
504504 prefill_group .add_argument (
505505 "--with-prefill-decode" ,
506506 dest = "with_prefill_decode" ,
507507 action = "store_true" ,
508- help = (
509- "Enable prefill/decode separation. "
510- "Alone: sequential prefill (skip logits) + standard decode. "
511- "With --batch-prefill-size N (N>1): batched GPU prefill via TornadoVMMasterPlanWithBatchPrefillDecode."
512- ),
508+ help = "Enable single-token prefill decode" ,
513509 )
514510 prefill_group .add_argument (
515511 "--batch-prefill-size" ,
@@ -518,22 +514,17 @@ def create_parser() -> argparse.ArgumentParser:
518514 default = None ,
519515 metavar = "N" ,
520516 help = (
521- "Prefill chunk size (requires --with-prefill-decode). "
522- "N=1: sequential prefill (same as --with-prefill-decode alone). "
523- "N>1: batched prefill processing N tokens per chunk (llama.prefillBatchSize=N)."
517+ "Enable batching in prefill when --with-prefill-decode is active and N>1. "
524518 ),
525519 )
526- prefill_group .add_argument (
520+
521+ # Advanced CUDA features
522+ advanced_cuda_features_group = parser .add_argument_group ("Advanced CUDA Features" )
523+ advanced_cuda_features_group .add_argument (
527524 "--cuda-graphs" ,
528525 dest = "cuda_graphs" ,
529526 action = "store_true" ,
530- help = "Enable CUDA graph capture/replay (llama.cudaGraphs=true); PTX backend only" ,
531- )
532- prefill_group .add_argument (
533- "--no-cuda-graphs" ,
534- dest = "no_cuda_graphs" ,
535- action = "store_true" ,
536- help = "Disable CUDA graph capture/replay (llama.cudaGraphs=false); no-op, disabled by default" ,
527+ help = "Enable CUDA graph capture/replay (llama.cudaGraphs=true); PTX backend only." ,
537528 )
538529
539530 # Advanced options
0 commit comments