From 85d070cfe4e228954da96bcdf3847a4f5c54e017 Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Mon, 1 Jun 2026 13:47:56 +0000 Subject: [PATCH 1/4] ci: add AGY authentication secrets to cloudbuild and introduce evaluation configuration files --- cloudbuild.yaml | 6 +++++- evals/agy_cli_model.yaml | 36 ++++++++++++++++++++++++++++++++++++ evals/agy_run_config.yaml | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 evals/agy_cli_model.yaml create mode 100644 evals/agy_run_config.yaml diff --git a/cloudbuild.yaml b/cloudbuild.yaml index cceb631..fe8df76 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -18,7 +18,7 @@ options: steps: # --- Evaluation Step --- - - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest' + - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:test' entrypoint: 'bash' # Decrypts the secret from Secret Manager into the DB_PASSWORD environment variable secretEnv: ['DB_PASSWORD', 'GITHUB_TOKEN'] @@ -113,3 +113,7 @@ availableSecrets: env: 'DB_PASSWORD' - versionName: projects/$PROJECT_ID/secrets/GITHUB_TOKEN/versions/latest env: 'GITHUB_TOKEN' + - versionName: projects/$PROJECT_ID/secrets/AGY_OAUTH_TOKEN/versions/latest + env: 'AGY_OAUTH_TOKEN' + - versionName: projects/$PROJECT_ID/secrets/AGY_INSTALLATION_ID/versions/latest + env: 'AGY_INSTALLATION_ID' diff --git a/evals/agy_cli_model.yaml b/evals/agy_cli_model.yaml new file mode 100644 index 0000000..3ab0b30 --- /dev/null +++ b/evals/agy_cli_model.yaml @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +agy_cli_version: "agy" +generator: agy_cli + +model: "Gemini 3.1 Pro (High)" + +env: + GOOGLE_CLOUD_PROJECT: "ext-test-cloud-sql-postgres" + GOOGLE_CLOUD_LOCATION: "global" + GOOGLE_GENAI_USE_VERTEXAI: "true" + + CLOUD_SQL_POSTGRES_PROJECT: "${CLOUD_SQL_POSTGRES_PROJECT}" + CLOUD_SQL_POSTGRES_INSTANCE: "${CLOUD_SQL_POSTGRES_INSTANCE}" + CLOUD_SQL_POSTGRES_REGION: "${CLOUD_SQL_POSTGRES_REGION}" + CLOUD_SQL_POSTGRES_DATABASE: "${CLOUD_SQL_POSTGRES_DATABASE}" + CLOUD_SQL_POSTGRES_USER: "${CLOUD_SQL_POSTGRES_USER}" + CLOUD_SQL_POSTGRES_PASSWORD: '${CLOUD_SQL_POSTGRES_PASSWORD}' + CLOUD_SQL_POSTGRES_IP_TYPE: "${CLOUD_SQL_POSTGRES_IP_TYPE}" + +setup: + skills: + - "/workspace/cloud-sql-postgresql" + diff --git a/evals/agy_run_config.yaml b/evals/agy_run_config.yaml new file mode 100644 index 0000000..c06f9d4 --- /dev/null +++ b/evals/agy_run_config.yaml @@ -0,0 +1,37 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset_config: /workspace/evals/gemini_dataset.json +dataset_format: agent-format + +orchestrator: agent +model_config: /workspace/evals/agy_cli_model.yaml +simulated_user_model_config: /workspace/evals/gemini_2.5_pro_model.yaml + +scorers: + trajectory_matcher: {} + goal_completion: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + behavioral_metrics: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + parameter_analysis: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + turn_count: {} + end_to_end_latency: {} + tool_call_latency: {} + token_consumption: {} + +reporting: + bigquery: + gcp_project_id: "${EVAL_REPORTING_PROJECT}" \ No newline at end of file From 0d14498f2659001c17723b539a8e1f855640bf4b Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Mon, 1 Jun 2026 13:50:33 +0000 Subject: [PATCH 2/4] ci: add AGY_OAUTH_TOKEN and AGY_INSTALLATION_ID to cloudbuild secretEnv --- cloudbuild.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index fe8df76..bd2c939 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -21,7 +21,7 @@ steps: - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:test' entrypoint: 'bash' # Decrypts the secret from Secret Manager into the DB_PASSWORD environment variable - secretEnv: ['DB_PASSWORD', 'GITHUB_TOKEN'] + secretEnv: ['DB_PASSWORD', 'GITHUB_TOKEN', 'AGY_OAUTH_TOKEN', 'AGY_INSTALLATION_ID'] args: - '-c' - | From 1dab6ba3816f1fd3d7ada186121fe2e1512a33a4 Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Mon, 1 Jun 2026 14:03:19 +0000 Subject: [PATCH 3/4] ci: seed antigravity CLI auth tokens in Cloud Build step --- cloudbuild.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index bd2c939..587d195 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -83,6 +83,15 @@ steps: # Maps the decrypted DB_PASSWORD to the exact variable expected by gemini_cli and extension skills export CLOUD_SQL_POSTGRES_PASSWORD=$$DB_PASSWORD + # Seed agy auth: this step overrides the image ENTRYPOINT (entrypoint: bash), + # so entrypoint.sh never runs -- seed the token files here instead. HOME is + # /builder/home in a Cloud Build step, which is exactly where the harness reads. + AGY_DIR="$$HOME/.gemini/antigravity-cli" + mkdir -p "$$AGY_DIR" + printf '%s' "$$AGY_OAUTH_TOKEN" > "$$AGY_DIR/antigravity-oauth-token" + printf '%s' "$$AGY_INSTALLATION_ID" > "$$AGY_DIR/installation_id" + chmod 600 "$$AGY_DIR/antigravity-oauth-token" "$$AGY_DIR/installation_id" + # Combine CI metadata with all available run configs for config in /workspace/evals/*run_config.yaml; do if [ -f "$config" ]; then From 9c71d66327eb2b653ab6bd6903b7299b8ccf0910 Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Mon, 1 Jun 2026 16:22:42 +0000 Subject: [PATCH 4/4] ci: remove manual AGY auth token seeding from cloudbuild in favor of secret path configuration in agy_cli_model.yaml --- cloudbuild.yaml | 15 +-------------- evals/agy_cli_model.yaml | 7 +++++++ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 587d195..c652546 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -21,7 +21,7 @@ steps: - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:test' entrypoint: 'bash' # Decrypts the secret from Secret Manager into the DB_PASSWORD environment variable - secretEnv: ['DB_PASSWORD', 'GITHUB_TOKEN', 'AGY_OAUTH_TOKEN', 'AGY_INSTALLATION_ID'] + secretEnv: ['DB_PASSWORD', 'GITHUB_TOKEN'] args: - '-c' - | @@ -83,15 +83,6 @@ steps: # Maps the decrypted DB_PASSWORD to the exact variable expected by gemini_cli and extension skills export CLOUD_SQL_POSTGRES_PASSWORD=$$DB_PASSWORD - # Seed agy auth: this step overrides the image ENTRYPOINT (entrypoint: bash), - # so entrypoint.sh never runs -- seed the token files here instead. HOME is - # /builder/home in a Cloud Build step, which is exactly where the harness reads. - AGY_DIR="$$HOME/.gemini/antigravity-cli" - mkdir -p "$$AGY_DIR" - printf '%s' "$$AGY_OAUTH_TOKEN" > "$$AGY_DIR/antigravity-oauth-token" - printf '%s' "$$AGY_INSTALLATION_ID" > "$$AGY_DIR/installation_id" - chmod 600 "$$AGY_DIR/antigravity-oauth-token" "$$AGY_DIR/installation_id" - # Combine CI metadata with all available run configs for config in /workspace/evals/*run_config.yaml; do if [ -f "$config" ]; then @@ -122,7 +113,3 @@ availableSecrets: env: 'DB_PASSWORD' - versionName: projects/$PROJECT_ID/secrets/GITHUB_TOKEN/versions/latest env: 'GITHUB_TOKEN' - - versionName: projects/$PROJECT_ID/secrets/AGY_OAUTH_TOKEN/versions/latest - env: 'AGY_OAUTH_TOKEN' - - versionName: projects/$PROJECT_ID/secrets/AGY_INSTALLATION_ID/versions/latest - env: 'AGY_INSTALLATION_ID' diff --git a/evals/agy_cli_model.yaml b/evals/agy_cli_model.yaml index 3ab0b30..8582b95 100644 --- a/evals/agy_cli_model.yaml +++ b/evals/agy_cli_model.yaml @@ -17,6 +17,13 @@ generator: agy_cli model: "Gemini 3.1 Pro (High)" +# agy is OAuth-only. The harness seeds these auth files into the sandbox from +# Secret Manager (needs ADC + secretAccessor on the build/runtime SA), so no +# interactive login or entrypoint seeding is required. Values are Secret +# Manager resource paths; `latest` is fine since OAuth tokens rotate. +agy_oauth_token_secret: "projects/${GOOGLE_CLOUD_PROJECT}/secrets/AGY_OAUTH_TOKEN/versions/latest" +agy_installation_id_secret: "projects/${GOOGLE_CLOUD_PROJECT}/secrets/AGY_INSTALLATION_ID/versions/latest" + env: GOOGLE_CLOUD_PROJECT: "ext-test-cloud-sql-postgres" GOOGLE_CLOUD_LOCATION: "global"