-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
49 lines (37 loc) · 1.57 KB
/
main.py
File metadata and controls
49 lines (37 loc) · 1.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from src.parser import get_experiment_parameters, parse_args
from src.questions import download_questions
from src.rephrase_questions import get_rephrased_questions
from src.utils import evaluate_models_on_datasets, evaluate_rephased_data_adhoc
def main():
download_questions()
user_input = parse_args()
datasets, models, no_of_runs, no_of_questions = get_experiment_parameters(user_input)
evaluate_models_on_datasets(datasets=datasets, models=models, N_RUNS=no_of_runs, N_QUESTIONS=no_of_questions)
def rephrased_run():
data = ['medmcqa', 'tau_cqa', 'mmlu', 'mathqa']
N_REPHRASES = 100
models=[
'anthropic/claude-3.5-sonnet',
'google/gemini-2.0-flash-001',
'openai/o3-mini-high',
'openai/ο1',
'deepseek/deepseek-chat',
'deepseek/deepseek-chat-v3-0324',
'meta-llama/llama-3.3-70b-instruct',
'qwen/qwq-32b',
'deepseek/deepseek-r1',
'openai/gpt-4o-2024-11-20',
"openai/o1-mini-high",
]
# Running once to get the rephrased questions
get_rephrased_questions(datasets_list=data, no_of_rephrases=N_REPHRASES)
evaluate_rephased_data_adhoc(
datasets=data,
models=models,
no_of_rephrasings=N_REPHRASES,
)
if __name__ == "__main__":
main()
# Uncomment the line below to run the adhoc comparison
# output folder also have been renamed manually from google/gemini-2.0-flash-001 to gemini_2_flash, openai/gpt-4o-2024-11-20 to gpt_4o in the rephrased data folder
rephrased_run()