|
1 | 1 | #!/bin/bash |
2 | 2 |
|
3 | | -# Set environment variables |
4 | | -export CUDA_VISIBLE_DEVICES=0,1,2,3 |
5 | | -export PYTHON_MULTIPROCESSING_METHOD=spawn |
6 | | -export VLLM_WORKER_MULTIPROC_METHOD=spawn |
7 | | -export HF_ALLOW_CODE_EVAL=1 |
| 3 | +usage() { |
| 4 | + cat << EOF |
| 5 | +Usage: $0 [OPTIONS] <model_path1> <model_path2> ... |
| 6 | +
|
| 7 | +Options: |
| 8 | + -d, --devices DEVICES CUDA devices to use (default: 0,1,2,3) |
| 9 | + -t, --tensor-parallel SIZE Tensor parallel size (default: 4) |
| 10 | + -g, --gpu-memory UTILIZATION GPU memory utilization (default: 0.9) |
| 11 | + -r, --result-dir DIR Base result directory (default: ./results) |
| 12 | + -b, --batch-size SIZE Batch size for auto tasks (default: auto) |
| 13 | + --tasks TASK1,TASK2,... Comma-separated list of tasks to evaluate (default: ceval-valid,mmlu,gsm8k,humaneval) |
| 14 | + -n, --num-fewshot NUM Number of few-shot examples (default: 0) |
| 15 | + -h, --help Show this help message |
| 16 | +
|
| 17 | +Examples: |
| 18 | + bash $0 -d 0,1 -t 2 --gpu-memory 0.8 /path/to/model1 /path/to/model2 |
| 19 | + bash $0 --tasks ceval-valid,mmlu,gsm8k,humaneval /path/to/model1 |
| 20 | +EOF |
| 21 | +} |
8 | 22 |
|
| 23 | +CUDA_VISIBLE_DEVICES="0,1,2,3" |
9 | 24 | INFERENCE_TP_SIZE=4 |
| 25 | +GPU_MEMORY_UTILIZATION=0.9 |
| 26 | +RESULT_BASE_DIR="./results" |
| 27 | +BATCH_SIZE="auto" |
| 28 | +TASKS=("ceval-valid" "mmlu" "gsm8k" "humaneval") |
| 29 | +NUM_FEWSHOT=0 |
| 30 | + |
| 31 | +POSITIONAL_ARGS=() |
| 32 | + |
| 33 | +while [[ $# -gt 0 ]]; do |
| 34 | + case $1 in |
| 35 | + -d|--devices) |
| 36 | + CUDA_VISIBLE_DEVICES="$2" |
| 37 | + shift 2 |
| 38 | + ;; |
| 39 | + -t|--tensor-parallel) |
| 40 | + INFERENCE_TP_SIZE="$2" |
| 41 | + shift 2 |
| 42 | + ;; |
| 43 | + -g|--gpu-memory) |
| 44 | + GPU_MEMORY_UTILIZATION="$2" |
| 45 | + shift 2 |
| 46 | + ;; |
| 47 | + -r|--result-dir) |
| 48 | + RESULT_BASE_DIR="$2" |
| 49 | + shift 2 |
| 50 | + ;; |
| 51 | + -b|--batch-size) |
| 52 | + BATCH_SIZE="$2" |
| 53 | + shift 2 |
| 54 | + ;; |
| 55 | + --tasks) |
| 56 | + IFS=',' read -ra TASKS <<< "$2" |
| 57 | + shift 2 |
| 58 | + ;; |
| 59 | + -n|--num-fewshot) |
| 60 | + NUM_FEWSHOT="$2" |
| 61 | + shift 2 |
| 62 | + ;; |
| 63 | + -h|--help) |
| 64 | + usage |
| 65 | + exit 0 |
| 66 | + ;; |
| 67 | + -*|--*) |
| 68 | + echo "Error: Unknown option: $1" |
| 69 | + usage |
| 70 | + exit 1 |
| 71 | + ;; |
| 72 | + *) |
| 73 | + POSITIONAL_ARGS+=("$1") |
| 74 | + shift |
| 75 | + ;; |
| 76 | + esac |
| 77 | +done |
| 78 | + |
| 79 | +set -- "${POSITIONAL_ARGS[@]}" |
10 | 80 |
|
11 | 81 | # Check if model paths are provided |
12 | 82 | if [ $# -eq 0 ]; then |
13 | 83 | echo "Usage: $0 <model_path1> <model_path2> ..." |
14 | 84 | exit 1 |
15 | 85 | fi |
16 | 86 |
|
| 87 | +# Set environment variables |
| 88 | +export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES |
| 89 | +export PYTHON_MULTIPROCESSING_METHOD=spawn |
| 90 | +export VLLM_WORKER_MULTIPROC_METHOD=spawn |
| 91 | +export HF_ALLOW_CODE_EVAL=1 |
| 92 | + |
| 93 | +echo "======================================================" |
| 94 | +echo " Model Evaluation Configuration" |
| 95 | +echo "======================================================" |
| 96 | +echo "CUDA Visible Devices: $CUDA_VISIBLE_DEVICES" |
| 97 | +echo "Tensor Parallel Size: $INFERENCE_TP_SIZE" |
| 98 | +echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION" |
| 99 | +echo "Result Base Directory: $RESULT_BASE_DIR" |
| 100 | +echo "Batch Size: $BATCH_SIZE" |
| 101 | +echo "Number of Few-shot: $NUM_FEWSHOT" |
| 102 | +echo "Tasks to Evaluate: ${TASKS[*]}" |
| 103 | +echo "Number of Models: $#" |
| 104 | +echo "Model Paths:" |
| 105 | +for model_path in "$@"; do |
| 106 | + echo " - $model_path" |
| 107 | +done |
| 108 | +echo "======================================================" |
| 109 | +echo |
| 110 | + |
17 | 111 | # Iterate over all provided model paths |
18 | 112 | for MODEL_PATH in "$@"; do |
19 | 113 | # Extract model name from path (last directory name) |
20 | 114 | MODEL_NAME=$(basename "$MODEL_PATH") |
21 | 115 | echo "======================================================" |
22 | 116 | echo "Evaluating model: $MODEL_NAME" |
23 | 117 | echo "Model path: $MODEL_PATH" |
24 | | - echo "======================================================" |
25 | 118 |
|
26 | 119 | # Create dedicated result directory for the model |
27 | | - RESULT_PATH="./results/$MODEL_NAME" |
| 120 | + RESULT_PATH="$RESULT_BASE_DIR/$MODEL_NAME" |
28 | 121 | mkdir -p "$RESULT_PATH" |
29 | 122 |
|
30 | | - # Evaluate ceval, mmlu, gsm8k |
31 | | - lm_eval --model vllm \ |
32 | | - --model_args pretrained=$MODEL_PATH,add_bos_token=True,gpu_memory_utilization=0.9,tensor_parallel_size=$INFERENCE_TP_SIZE \ |
33 | | - --tasks ceval-valid \ |
34 | | - --num_fewshot 5 \ |
35 | | - --batch_size auto \ |
36 | | - --output_path "$RESULT_PATH/ceval_results.json" 2>&1 | tee "$RESULT_PATH/ceval.log" |
37 | | - |
38 | | - lm_eval --model vllm \ |
39 | | - --model_args pretrained=$MODEL_PATH,add_bos_token=True,gpu_memory_utilization=0.9,tensor_parallel_size=$INFERENCE_TP_SIZE \ |
40 | | - --tasks mmlu \ |
41 | | - --num_fewshot 4 \ |
42 | | - --batch_size 1 \ |
43 | | - --output_path "$RESULT_PATH/mmlu_results.json" 2>&1 | tee "$RESULT_PATH/mmlu.log" |
44 | | - |
45 | | - lm_eval --model vllm \ |
46 | | - --model_args pretrained=$MODEL_PATH,add_bos_token=True,gpu_memory_utilization=0.9,tensor_parallel_size=$INFERENCE_TP_SIZE \ |
47 | | - --tasks gsm8k \ |
48 | | - --num_fewshot 5 \ |
49 | | - --batch_size auto \ |
50 | | - --output_path "$RESULT_PATH/gsm8k_results.json" 2>&1 | tee "$RESULT_PATH/gsm8k.log" |
51 | | - |
52 | | - # Evaluate humaneval |
53 | | - lm_eval --model vllm \ |
54 | | - --model_args pretrained=$MODEL_PATH,add_bos_token=True,gpu_memory_utilization=0.9,tensor_parallel_size=$INFERENCE_TP_SIZE \ |
55 | | - --tasks humaneval \ |
56 | | - --num_fewshot 0 \ |
57 | | - --batch_size auto \ |
58 | | - --confirm_run_unsafe_code \ |
59 | | - --output_path "$RESULT_PATH/humaneval_results.json" 2>&1 | tee "$RESULT_PATH/humaneval.log" |
60 | | - |
| 123 | + for TASK in "${TASKS[@]}"; do |
| 124 | + echo "==============================================" |
| 125 | + echo "Evaluating task: $TASK" |
| 126 | + echo "Number of few-shot: $NUM_FEWSHOT" |
| 127 | + echo "==============================================" |
| 128 | + if [[ "$TASK" == *"humaneval"* ]]; then |
| 129 | + # Evaluate humaneval |
| 130 | + lm_eval --model vllm \ |
| 131 | + --model_args pretrained=$MODEL_PATH,add_bos_token=True,gpu_memory_utilization=$GPU_MEMORY_UTILIZATION,tensor_parallel_size=$INFERENCE_TP_SIZE \ |
| 132 | + --tasks $TASK \ |
| 133 | + --num_fewshot $NUM_FEWSHOT \ |
| 134 | + --batch_size $BATCH_SIZE \ |
| 135 | + --confirm_run_unsafe_code \ |
| 136 | + --output_path "$RESULT_PATH/$TASK.json" 2>&1 | tee "$RESULT_PATH/$TASK.log" |
| 137 | + else |
| 138 | + lm_eval --model vllm \ |
| 139 | + --model_args pretrained=$MODEL_PATH,add_bos_token=True,gpu_memory_utilization=$GPU_MEMORY_UTILIZATION,tensor_parallel_size=$INFERENCE_TP_SIZE \ |
| 140 | + --tasks $TASK \ |
| 141 | + --num_fewshot $NUM_FEWSHOT \ |
| 142 | + --batch_size $BATCH_SIZE \ |
| 143 | + --output_path "$RESULT_PATH/$TASK.json" 2>&1 | tee "$RESULT_PATH/$TASK.log" |
| 144 | + fi |
| 145 | + done |
| 146 | + |
61 | 147 | echo "Evaluation completed for $MODEL_NAME" |
62 | 148 | echo "Results saved to: $RESULT_PATH" |
63 | 149 | done |
|
0 commit comments