Skip to content

Commit 85d070c

Browse files
ci: add AGY authentication secrets to cloudbuild and introduce evaluation configuration files
1 parent 5b9bc21 commit 85d070c

3 files changed

Lines changed: 78 additions & 1 deletion

File tree

cloudbuild.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ options:
1818
steps:
1919

2020
# --- Evaluation Step ---
21-
- name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest'
21+
- name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:test'
2222
entrypoint: 'bash'
2323
# Decrypts the secret from Secret Manager into the DB_PASSWORD environment variable
2424
secretEnv: ['DB_PASSWORD', 'GITHUB_TOKEN']
@@ -113,3 +113,7 @@ availableSecrets:
113113
env: 'DB_PASSWORD'
114114
- versionName: projects/$PROJECT_ID/secrets/GITHUB_TOKEN/versions/latest
115115
env: 'GITHUB_TOKEN'
116+
- versionName: projects/$PROJECT_ID/secrets/AGY_OAUTH_TOKEN/versions/latest
117+
env: 'AGY_OAUTH_TOKEN'
118+
- versionName: projects/$PROJECT_ID/secrets/AGY_INSTALLATION_ID/versions/latest
119+
env: 'AGY_INSTALLATION_ID'

evals/agy_cli_model.yaml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
agy_cli_version: "agy"
16+
generator: agy_cli
17+
18+
model: "Gemini 3.1 Pro (High)"
19+
20+
env:
21+
GOOGLE_CLOUD_PROJECT: "ext-test-cloud-sql-postgres"
22+
GOOGLE_CLOUD_LOCATION: "global"
23+
GOOGLE_GENAI_USE_VERTEXAI: "true"
24+
25+
CLOUD_SQL_POSTGRES_PROJECT: "${CLOUD_SQL_POSTGRES_PROJECT}"
26+
CLOUD_SQL_POSTGRES_INSTANCE: "${CLOUD_SQL_POSTGRES_INSTANCE}"
27+
CLOUD_SQL_POSTGRES_REGION: "${CLOUD_SQL_POSTGRES_REGION}"
28+
CLOUD_SQL_POSTGRES_DATABASE: "${CLOUD_SQL_POSTGRES_DATABASE}"
29+
CLOUD_SQL_POSTGRES_USER: "${CLOUD_SQL_POSTGRES_USER}"
30+
CLOUD_SQL_POSTGRES_PASSWORD: '${CLOUD_SQL_POSTGRES_PASSWORD}'
31+
CLOUD_SQL_POSTGRES_IP_TYPE: "${CLOUD_SQL_POSTGRES_IP_TYPE}"
32+
33+
setup:
34+
skills:
35+
- "/workspace/cloud-sql-postgresql"
36+

evals/agy_run_config.yaml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
dataset_config: /workspace/evals/gemini_dataset.json
16+
dataset_format: agent-format
17+
18+
orchestrator: agent
19+
model_config: /workspace/evals/agy_cli_model.yaml
20+
simulated_user_model_config: /workspace/evals/gemini_2.5_pro_model.yaml
21+
22+
scorers:
23+
trajectory_matcher: {}
24+
goal_completion:
25+
model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
26+
behavioral_metrics:
27+
model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
28+
parameter_analysis:
29+
model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
30+
turn_count: {}
31+
end_to_end_latency: {}
32+
tool_call_latency: {}
33+
token_consumption: {}
34+
35+
reporting:
36+
bigquery:
37+
gcp_project_id: "${EVAL_REPORTING_PROJECT}"

0 commit comments

Comments
 (0)