From bd08f2d554778bd908182b5d59069bb9f764306a Mon Sep 17 00:00:00 2001 From: Diego Orellana Date: Wed, 1 Jul 2026 19:37:34 -0700 Subject: [PATCH] Measure Time to Create & Ready for Agent Runtime. PiperOrigin-RevId: 941416315 --- .../ai_agent_benchmark_helper.py | 1 + .../data/agents/common_utils.py | 15 ++++- .../data/agents/deploy_agent_engine.py | 41 +++++++++++++ .../providers/gcp/gcp_ai_agent_service.py | 60 ++++++++++++++++++- .../resources/ai_agent_service.py | 17 ++++++ 5 files changed, 131 insertions(+), 3 deletions(-) diff --git a/perfkitbenchmarker/ai_agent_benchmark_helper.py b/perfkitbenchmarker/ai_agent_benchmark_helper.py index c671eca836..626eb2902f 100644 --- a/perfkitbenchmarker/ai_agent_benchmark_helper.py +++ b/perfkitbenchmarker/ai_agent_benchmark_helper.py @@ -122,6 +122,7 @@ def Prepare(self) -> None: self.agent_service.agent_config = self._GetDefaultAgentConfig() self.BeforeCreateAgent() + self.spec.always_call_cleanup = True self.agent_service.Create() self.UploadValidatorScript() self.PostPrepare() diff --git a/perfkitbenchmarker/data/agents/common_utils.py b/perfkitbenchmarker/data/agents/common_utils.py index 5069eea8f7..4e1705ae41 100644 --- a/perfkitbenchmarker/data/agents/common_utils.py +++ b/perfkitbenchmarker/data/agents/common_utils.py @@ -3,7 +3,7 @@ import abc import collections.abc import json -from typing import Any +from typing import Any, Self import urllib.parse from absl import logging @@ -38,6 +38,19 @@ class PromptConfig[AgentConfigT](BasePromptConfig): run_uri: str agent_config: AgentConfigT + @classmethod + def create_for_initial_prompt(cls, deployment_config: Any) -> Self: + return cls( + agent=deployment_config.agent, + framework=deployment_config.framework, + prompt=deployment_config.initial_prompt, + output_dir="", + session_id="timetoreadysession", + user_id="timetoreadyuser", + run_uri=deployment_config.run_uri, + agent_config=deployment_config.agent_config, + ) + class BaseEndpoint(abc.ABC): """An abstract interface for executing agents across different environments. diff --git a/perfkitbenchmarker/data/agents/deploy_agent_engine.py b/perfkitbenchmarker/data/agents/deploy_agent_engine.py index 28e1d71bc4..40c3651bd3 100644 --- a/perfkitbenchmarker/data/agents/deploy_agent_engine.py +++ b/perfkitbenchmarker/data/agents/deploy_agent_engine.py @@ -4,10 +4,13 @@ """ import argparse +import asyncio import importlib import os +import time from typing import Any +import common_utils import pydantic import vertexai import yaml @@ -33,6 +36,7 @@ class DeploymentConfig[AgentConfigT](BaseDeploymentConfig): staging_bucket: str run_uri: str agent_config: AgentConfigT + initial_prompt: str | None = None def _import_agent_module(agent: str, framework: str) -> Any: @@ -41,6 +45,31 @@ def _import_agent_module(agent: str, framework: str) -> Any: return importlib.import_module(module_name) +async def _measure_time_to_ready[AgentConfigT]( + remote_agent: Any, + handler: Any, + config: DeploymentConfig[AgentConfigT], +) -> float | None: + """Measures the time to first chunk.""" + print("Sending initial prompt...") + endpoint = handler.create_endpoint(remote_agent) + prompt_config = common_utils.PromptConfig[ + AgentConfigT + ].create_for_initial_prompt(config) + first_chunk_time = None + try: + async for _ in endpoint.stream_execute(prompt_config=prompt_config): + if first_chunk_time is None: + first_chunk_time = time.monotonic() + return first_chunk_time + except Exception as e: # pylint: disable=broad-exception-caught + # Since the agent has already been created, it's better to let this script + # finish normally, so the resource is marked as created and can be cleaned + # up normally. + print(f"Error measuring time to ready: {e}") + return None + + def run_deployment[AgentConfigT]( config: DeploymentConfig[AgentConfigT], module: Any ) -> None: @@ -75,13 +104,25 @@ def run_deployment[AgentConfigT]( "display_name": display_name, } + create_start = time.monotonic() remote_agent = client.agent_engines.create( agent=agent_to_deploy, config=deploy_config, ) + create_time = time.monotonic() - create_start + print(f"Time to Create: {create_time}") + print("Successfully deployed Agent Engine!") print(f"Resource name: {remote_agent.api_resource.name}") + if config.initial_prompt: + first_chunk_time = asyncio.run( + _measure_time_to_ready(remote_agent, handler, config) + ) + if first_chunk_time is not None: + ready_time = first_chunk_time - create_start + print(f"Time to Ready: {ready_time}") + def main() -> None: parser = argparse.ArgumentParser( diff --git a/perfkitbenchmarker/providers/gcp/gcp_ai_agent_service.py b/perfkitbenchmarker/providers/gcp/gcp_ai_agent_service.py index a3389b96c9..2fcef16147 100644 --- a/perfkitbenchmarker/providers/gcp/gcp_ai_agent_service.py +++ b/perfkitbenchmarker/providers/gcp/gcp_ai_agent_service.py @@ -12,6 +12,7 @@ from perfkitbenchmarker import context from perfkitbenchmarker import data from perfkitbenchmarker import errors +from perfkitbenchmarker import sample from perfkitbenchmarker import vm_util from perfkitbenchmarker.providers.gcp import gce_virtual_machine from perfkitbenchmarker.providers.gcp import gcs @@ -363,6 +364,8 @@ def __init__(self, client_vm, ai_agent_spec): self._remote_agent_name = None self._staging_bucket = self.base_dir self.spec = ai_agent_spec + self._time_to_create: float | None = None + self._time_to_ready: float | None = None @override def _StageAgentCode(self): @@ -419,6 +422,9 @@ def _GetDeploymentConfig(self) -> dict[str, Any]: 'staging_bucket': self._staging_bucket, 'agent_config': self.agent_config, }) + initial_prompt = self._GetInitialPromptText() + if initial_prompt: + config['initial_prompt'] = initial_prompt return config def _Create(self): @@ -446,12 +452,17 @@ def _Create(self): command = ' && '.join(command_parts) stdout, _ = self.client_vm.RemoteCommand(command) - # 5. Parse output to get remote agent name + # 5. Parse output to get remote agent name and latencies for line in stdout.split('\n'): if line.startswith('Resource name: '): _, _, agent_name = line.partition('Resource name: ') self._remote_agent_name = agent_name.strip() - break + elif line.startswith('Time to Create: '): + _, _, time_str = line.partition('Time to Create: ') + self._time_to_create = float(time_str.strip()) + elif line.startswith('Time to Ready: '): + _, _, time_str = line.partition('Time to Ready: ') + self._time_to_ready = float(time_str.strip()) if not self._remote_agent_name: raise errors.Benchmarks.PrepareException( @@ -464,6 +475,16 @@ def _Create(self): self._remote_agent_name, ) + def _PostCreate(self): + if ( + ai_agent_service.AI_AGENT_INITIAL_PROMPT_URL.value + and not self._time_to_ready + ): + raise errors.Benchmarks.PrepareException( + 'Initial prompt was explictly passed, but failed to get remote ready' + ' time from deploy script output.' + ) + def _Delete(self): """Deletes the remote agent.""" if not self._remote_agent_name: @@ -600,3 +621,38 @@ def Execute( 'Agent execution finished. Raw output:\n%s', stdout, ) + + @override + def GetSamples(self): + samples = super().GetSamples() + create_time_sample = [s for s in samples if s.metric == 'Time to Create'][0] + resource_type = create_time_sample.metadata.get('resource_type') + resource_class = create_time_sample.metadata.get('resource_class') + metadata = { + 'resource_type': resource_type, + 'resource_class': resource_class, + } + + # Remove existing samples for 'Time to Create' and 'Time to Ready' if any + samples = [ + s + for s in samples + if s.metric not in ('Time to Create', 'Time to Ready') + ] + + if self._time_to_create is not None: + samples.append( + sample.Sample( + 'Time to Create', + self._time_to_create, + 'seconds', + metadata=metadata, + ) + ) + if self._time_to_ready is not None: + samples.append( + sample.Sample( + 'Time to Ready', self._time_to_ready, 'seconds', metadata=metadata + ) + ) + return samples diff --git a/perfkitbenchmarker/resources/ai_agent_service.py b/perfkitbenchmarker/resources/ai_agent_service.py index 58770ac93f..ad784ee318 100644 --- a/perfkitbenchmarker/resources/ai_agent_service.py +++ b/perfkitbenchmarker/resources/ai_agent_service.py @@ -14,6 +14,12 @@ FLAGS = flags.FLAGS +AI_AGENT_INITIAL_PROMPT_URL = flags.DEFINE_string( + 'ai_agent_initial_prompt_url', + None, + 'Object storage URL (e.g. gs://bucket/prompt.txt) to an initial prompt.', +) + def GetAiAgentServiceClass(cloud: str, deployment_type: str): """Returns the correct AI agent service class based on cloud/type.""" @@ -66,6 +72,17 @@ def _GetDeploymentConfig(self) -> dict[str, Any]: """Gets config dict for deployment/creation.""" return {'run_uri': FLAGS.run_uri} + def _GetInitialPromptText(self) -> str | None: + """Fetches the initial prompt text from object storage.""" + url = AI_AGENT_INITIAL_PROMPT_URL.value + if not url: + return None + + local_path = vm_util.PrependTempDir('initial_prompt.txt') + self.storage_service.Copy(url, local_path) + with open(local_path, 'r') as f: + return f.read().strip() + def _GetRunConfig( self, output_dir: str,