You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
help="Limit the number of examples per task. ""If <1, limit is a percentage of the total number of examples.",
233
+
help=("Limit examples per task: use -1 (or omit) for all samples, ""0 < limit < 1 for a fraction of the dataset, and limit >= 1 ""for an absolute sample count."),
help="Number of samples per question for model stability measurement. ""When n > 1, enables k-samples mode and computes EA, CA, IV, CR metrics.",
395
+
help=("Number of repeated generations per question for model stability ""measurement. Backward-compatible alias: --num_samples. ""When n > 1, enables k-samples ""mode and computes EA, CA, IV, CR metrics."),
394
396
)
395
397
parser.add_argument("--baseline", type=str, default=None, help="Baseline for paired t-test comparison. Accepts: local JSONL path, hf://user/repo, or preset name (e.g., qwen25vl).")
0 commit comments