File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -499,17 +499,13 @@ def create_parser() -> argparse.ArgumentParser:
499499 help = "Execute the command after showing it (use with --show-command)" ,
500500 )
501501
502- # Prefill/ Decode optimization
503- prefill_group = parser .add_argument_group ("Prefill/ Decode Optimization " )
502+ # Prefill- Decode optimizations
503+ prefill_group = parser .add_argument_group ("Prefill- Decode Optimizations " )
504504 prefill_group .add_argument (
505505 "--with-prefill-decode" ,
506506 dest = "with_prefill_decode" ,
507507 action = "store_true" ,
508- help = (
509- "Enable prefill/decode separation. "
510- "Alone: sequential prefill (skip logits) + standard decode. "
511- "With --batch-prefill-size N (N>1): batched GPU prefill via TornadoVMMasterPlanWithBatchPrefillDecode."
512- ),
508+ help = "Enable single-token prefill decode" ,
513509 )
514510 prefill_group .add_argument (
515511 "--batch-prefill-size" ,
@@ -518,9 +514,7 @@ def create_parser() -> argparse.ArgumentParser:
518514 default = None ,
519515 metavar = "N" ,
520516 help = (
521- "Prefill chunk size (requires --with-prefill-decode). "
522- "N=1: sequential prefill (same as --with-prefill-decode alone). "
523- "N>1: batched prefill processing N tokens per chunk (llama.prefillBatchSize=N)."
517+ "Enable batching in prefill when --with-prefill-decode is active and N>1. "
524518 ),
525519 )
526520
You can’t perform that action at this time.
0 commit comments