|
14 | 14 | --use_full_prompt |
15 | 15 |
|
16 | 16 | Usage: |
17 | | - # Run with default settings (evaluates on all tasks) |
18 | | - python mmlu_benchmark.py --model_id "meta-llama/Llama-2-7b-hf" --data_path /path/to/mmlu_prompts_examples.json |
| 17 | + # Run with default settings (evaluates on all tasks; uses arubique/flattened-MMLU by default) |
| 18 | + python mmlu_benchmark.py --model_id "meta-llama/Llama-2-7b-hf" |
19 | 19 |
|
20 | 20 | # Run with anchor points filtering (for DISCO prediction) |
21 | 21 | python mmlu_benchmark.py \\ |
22 | 22 | --model_id "alignment-handbook/zephyr-7b-sft-full" \\ |
23 | | - --data_path /path/to/mmlu_prompts_examples.json \\ |
24 | 23 | --anchor_points_path /path/to/anchor_points_disagreement.pkl |
25 | 24 |
|
26 | 25 | # Run with DISCO prediction (passing --disco_model_path enables it) |
27 | 26 | python mmlu_benchmark.py \\ |
28 | 27 | --model_id "alignment-handbook/zephyr-7b-sft-full" \\ |
29 | | - --data_path /path/to/mmlu_prompts_examples.json \\ |
30 | 28 | --anchor_points_path /path/to/anchor_points_disagreement.pkl \\ |
31 | 29 | --disco_model_path /path/to/fitted_weights.pkl \\ |
32 | 30 | --disco_transform_path /path/to/transform.pkl \\ |
33 | 31 | --pca 256 |
34 | 32 |
|
35 | 33 | # Run on a subset of tasks for testing |
36 | | - python mmlu_benchmark.py \\ |
37 | | - --model_id "meta-llama/Llama-2-7b-hf" \\ |
38 | | - --data_path /path/to/mmlu_prompts_examples.json \\ |
39 | | - --limit 10 |
| 34 | + python mmlu_benchmark.py --model_id "meta-llama/Llama-2-7b-hf" --limit 10 |
| 35 | +
|
| 36 | + # Override data source (path to JSON or Hugging Face repo id) |
| 37 | + python mmlu_benchmark.py --model_id "meta-llama/Llama-2-7b-hf" --data_path /path/to/mmlu_prompts_examples.json |
40 | 38 | """ |
41 | 39 |
|
42 | 40 | import argparse |
@@ -90,7 +88,7 @@ def parse_args(): |
90 | 88 | parser.add_argument( |
91 | 89 | "--data_path", |
92 | 90 | type=str, |
93 | | - required=True, |
| 91 | + default="arubique/flattened-MMLU", |
94 | 92 | help="Path to MMLU prompts JSON file, or Hugging Face dataset repo id (e.g. username/mmlu-prompts-examples)", |
95 | 93 | ) |
96 | 94 |
|
|
0 commit comments