βββ __init__.py # Python package initialization file
βββ main.py # Parent script to execute all flow
βββ pyproject.toml # Project configuration for Python tools
βββ uv.lock # Dependency lock file for uv package manager
βββ .gitignore # Specifies intentionally untracked files to ignore
βββ .pre-commit-config.yaml # Configuration for pre-commit hooks
βββ .python-version # Python version specification
βββ README.md # Project overview and documentation
|
βββ data/ # Data directory for datasets with 100 random questions
β βββ ai2_arc_questions.json # AI2 Reasoning Challenge dataset
β βββ mathqa_questions.json # Math QA questions dataset
β βββ medmcqa_questions.json # Medical MCQ questions dataset
β βββ mmlu_questions.json # MMLU benchmark questions
β βββ pubmedqa_questions.json # PubMed QA dataset
β βββ tau_cqa_questions.json # Tau Clinical QA dataset
β
βββ rephrased_data/ # Data directory for datasets with the rephrased questions from data folder.
β βββ ...
β
βββ docs/ # Documentation files
β βββ CodePractices.md # Code practices and structure documentation
β βββ datasets.md # Dataset documentation
β βββ Install.md # Installation instructions
β βββ models.md # Model documentation
β
βββ plots/ # Directory for visualization plots
β βββ individual/ # Individual plots directory
β βββ comparison/ # Comparison plots directory
|
βββ results/ # Results output directory
β βββ ... # Result files for LLMs answers on questions
β
|
βββ analysis_output_comparison_betabinom/ # Bayesian results for kappa, mu, gamma, svg images
β βββ ...
β
βββ csv/ # Tables for simple statistics score (mean, stdev) for 4 datasets on all models
β βββ ...
|
βββ src/ # Source code directory
βββ __init__.py # Package initialization file
βββ constants.py # Project constants and configurations
βββ custom_logger.py # Custom logging setup
βββ example_prompts.py # Example prompts for different datasets
βββ llm_utils.py # LLM API interaction utilities
βββ parser.py # Command-line argument parser
βββ plots.py # Visualization and plotting utilities
βββ questions.py # Dataset loading and question processing
βββ Bayesian_Benchmark_Inter_Intra.py # Bayesian Beta-Binomial framework processing
βββ Plot_bt_distance.py # Plotting Bhattacharyya Distances
βββ utils.py # General utility functions and evaluation logic