|
2 | 2 | from unitxt.catalog import add_to_catalog |
3 | 3 | from unitxt.standard import DatasetRecipe |
4 | 4 |
|
| 5 | +MAX_TEST_INSTANCES = 500 |
| 6 | + |
5 | 7 | benchmark = Benchmark( |
6 | 8 | subsets={ |
7 | 9 | "attaq": DatasetRecipe( |
8 | 10 | card="cards.safety.attaq_gg", |
9 | 11 | template_card_index="default", |
10 | | - max_test_instances=500, |
| 12 | + group_by=["label"], |
| 13 | + max_test_instances=MAX_TEST_INSTANCES, |
11 | 14 | ), |
12 | 15 | "provoq": DatasetRecipe( |
13 | 16 | card="cards.safety.provoq", |
14 | 17 | template_card_index="default", |
15 | 18 | group_by=["group"], |
16 | | - max_test_instances=500, |
| 19 | + max_test_instances=MAX_TEST_INSTANCES, |
17 | 20 | ), |
18 | 21 | "airbench": DatasetRecipe( |
19 | 22 | card="cards.safety.airbench2024", |
20 | 23 | template_card_index="default", |
21 | 24 | group_by=["l2-name"], |
22 | | - max_test_instances=500, |
| 25 | + max_test_instances=MAX_TEST_INSTANCES, |
23 | 26 | ), |
24 | 27 | "ailuminate": DatasetRecipe( |
25 | 28 | card="cards.safety.mlcommons_ailuminate", |
26 | 29 | template_card_index="default", |
27 | 30 | group_by=["hazard"], |
28 | | - max_test_instances=500, |
| 31 | + max_test_instances=MAX_TEST_INSTANCES, |
29 | 32 | ), |
30 | 33 | } |
31 | 34 | ) |
|
0 commit comments