Skip to content

Commit e0414a3

Browse files
Simplify prefill-decode argument help text and improve naming consistency
1 parent 4dba662 commit e0414a3

1 file changed

Lines changed: 4 additions & 10 deletions

File tree

llama-tornado

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -499,17 +499,13 @@ def create_parser() -> argparse.ArgumentParser:
499499
help="Execute the command after showing it (use with --show-command)",
500500
)
501501

502-
# Prefill/Decode optimization
503-
prefill_group = parser.add_argument_group("Prefill/Decode Optimization")
502+
# Prefill-Decode optimizations
503+
prefill_group = parser.add_argument_group("Prefill-Decode Optimizations")
504504
prefill_group.add_argument(
505505
"--with-prefill-decode",
506506
dest="with_prefill_decode",
507507
action="store_true",
508-
help=(
509-
"Enable prefill/decode separation. "
510-
"Alone: sequential prefill (skip logits) + standard decode. "
511-
"With --batch-prefill-size N (N>1): batched GPU prefill via TornadoVMMasterPlanWithBatchPrefillDecode."
512-
),
508+
help="Enable single-token prefill decode",
513509
)
514510
prefill_group.add_argument(
515511
"--batch-prefill-size",
@@ -518,9 +514,7 @@ def create_parser() -> argparse.ArgumentParser:
518514
default=None,
519515
metavar="N",
520516
help=(
521-
"Prefill chunk size (requires --with-prefill-decode). "
522-
"N=1: sequential prefill (same as --with-prefill-decode alone). "
523-
"N>1: batched prefill processing N tokens per chunk (llama.prefillBatchSize=N)."
517+
"Enable batching in prefill when --with-prefill-decode is active and N>1. "
524518
),
525519
)
526520

0 commit comments

Comments
 (0)