4545 PR_TITLE=$(echo "$$PR_DATA" | jq -r '.title')
4646
4747 # Check if execution labels are present using exact matching via jq
48- if ! jq -e '.labels | any(.name == "autorelease: pending" or .name == "ci:run-evals")' pr_data.json > /dev/null; then
49- echo "PR does not have 'autorelease: pending' or 'ci:run-evals' label . Skipping execution."
48+ if ! jq -e '.labels | any(.name == "autorelease: pending" or .name == "ci:run-evals" or .name == "ci:run-evals-gemini" or .name == "ci:run-evals-claude" )' pr_data.json > /dev/null; then
49+ echo "PR does not have required labels . Skipping execution."
5050 exit 0
5151 fi
5252 echo "Execution label detected. Processing release version context..."
7272 export GOOGLE_CLOUD_PROJECT=$PROJECT_ID
7373 export EVAL_REPORTING_PROJECT=$_EVAL_REPORTING_PROJECT
7474
75-
7675 # Set environment variables for extension
7776 export CLOUD_SQL_POSTGRES_PROJECT=$PROJECT_ID
7877 export CLOUD_SQL_POSTGRES_INSTANCE=$_CLOUD_SQL_INSTANCE
@@ -84,18 +83,28 @@ steps:
8483 # Maps the decrypted DB_PASSWORD to the exact variable expected by gemini_cli and extension skills
8584 export CLOUD_SQL_POSTGRES_PASSWORD=$$DB_PASSWORD
8685
87- # Combine CI metadata with run config
88- cat /workspace/evals/ci_metadata.yaml >> /workspace/evals/run_config.yaml
86+ # Combine CI metadata with all available run configs
87+ for config in /workspace/evals/*run_config.yaml; do
88+ if [ -f "$config" ]; then
89+ echo "Appending CI metadata to $config"
90+ cat /workspace/evals/ci_metadata.yaml >> "$config"
91+ fi
92+ done
8993
90- # Substitute environment variables in model_config.yaml
94+ # Substitute environment variables in all configs
9195 python3 /workspace/evals/substitute_env.py
9296
9397 cd /evalbench
9498 export PYTHONPATH=./evalbench:./evalbench/evalproto
9599 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
96100
97- echo "Launching Standalone Evaluation..."
98- python3 evalbench/evalbench.py --experiment_config=/workspace/evals/run_config.yaml
101+ # Run evaluations for all available run configs
102+ for config in /workspace/evals/*run_config.yaml; do
103+ if [ -f "$config" ]; then
104+ echo "Launching Evaluation for config: $config"
105+ python3 evalbench/evalbench.py --experiment_config="$config"
106+ fi
107+ done
99108
100109
101110availableSecrets :
0 commit comments