Skip to content

Commit be85a80

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: Fix create_session AttributeError for agents without AdkApp
PiperOrigin-RevId: 899742948
1 parent e73d4e7 commit be85a80

2 files changed

Lines changed: 219 additions & 70 deletions

File tree

tests/unit/vertexai/genai/test_evals.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3144,6 +3144,94 @@ def test_run_inference_with_agent_engine_with_response_column_raises_error(
31443144
"'intermediate_events' or 'response' columns"
31453145
) in str(excinfo.value)
31463146

3147+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
3148+
@mock.patch("vertexai._genai._evals_common.vertexai.Client")
3149+
def test_run_inference_with_agent_engine_falls_back_to_managed_sessions_api(
3150+
self,
3151+
mock_vertexai_client,
3152+
mock_eval_dataset_loader,
3153+
):
3154+
"""Tests that run_inference falls back to the managed Sessions API
3155+
when the agent engine does not have create_session registered."""
3156+
mock_df = pd.DataFrame(
3157+
{
3158+
"prompt": ["agent prompt"],
3159+
"session_inputs": [
3160+
{
3161+
"user_id": "123",
3162+
"state": {"a": "1"},
3163+
}
3164+
],
3165+
}
3166+
)
3167+
mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict(
3168+
orient="records"
3169+
)
3170+
3171+
# Create a mock agent engine WITHOUT create_session (simulates agents
3172+
# deployed via Console, gcloud, or source code deployment).
3173+
mock_agent_engine = mock.Mock(
3174+
spec=["api_client", "api_resource", "stream_query"],
3175+
)
3176+
mock_agent_engine.api_resource.name = (
3177+
"projects/test-project/locations/us-central1/reasoningEngines/123"
3178+
)
3179+
3180+
# Mock the managed Sessions API to return a session.
3181+
mock_session_operation = mock.Mock()
3182+
mock_session_operation.response.name = (
3183+
"projects/test-project/locations/us-central1"
3184+
"/reasoningEngines/123/sessions/managed-session-1"
3185+
)
3186+
mock_agent_engine.api_client.sessions.create.return_value = (
3187+
mock_session_operation
3188+
)
3189+
3190+
stream_query_return_value = [
3191+
{
3192+
"id": "1",
3193+
"content": {"parts": [{"text": "intermediate1"}]},
3194+
"timestamp": 123,
3195+
"author": "model",
3196+
},
3197+
{
3198+
"id": "2",
3199+
"content": {"parts": [{"text": "agent response"}]},
3200+
"timestamp": 124,
3201+
"author": "model",
3202+
},
3203+
]
3204+
mock_agent_engine.stream_query.return_value = iter(stream_query_return_value)
3205+
mock_vertexai_client.return_value.agent_engines.get.return_value = (
3206+
mock_agent_engine
3207+
)
3208+
3209+
inference_result = self.client.evals.run_inference(
3210+
agent="projects/test-project/locations/us-central1/reasoningEngines/123",
3211+
src=mock_df,
3212+
)
3213+
3214+
# Verify the managed Sessions API was called as fallback.
3215+
mock_agent_engine.api_client.sessions.create.assert_called_once_with(
3216+
name="projects/test-project/locations/us-central1/reasoningEngines/123",
3217+
user_id="123",
3218+
config=vertexai_genai_types.CreateAgentEngineSessionConfig(
3219+
session_state={"a": "1"},
3220+
),
3221+
)
3222+
3223+
# Verify stream_query was called with the session ID extracted from
3224+
# the managed session's resource name.
3225+
mock_agent_engine.stream_query.assert_called_once_with(
3226+
user_id="123",
3227+
session_id="managed-session-1",
3228+
message="agent prompt",
3229+
)
3230+
3231+
# Verify the inference results are correct.
3232+
assert inference_result.eval_dataset_df["response"].iloc[0] == "agent response"
3233+
assert inference_result.candidate_name == "agent_engine_0"
3234+
31473235
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
31483236
@mock.patch("vertexai._genai._evals_common.InMemorySessionService") # fmt: skip
31493237
@mock.patch("vertexai._genai._evals_common.Runner")

vertexai/_genai/_evals_common.py

Lines changed: 131 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,35 +1895,35 @@ def _run_agent(
18951895
genai_types.GenerateContentResponse,
18961896
]
18971897
]:
1898-
"""Internal helper to run inference using Gemini model with concurrency."""
1899-
original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
1900-
location_overridden = False
1901-
1902-
if user_simulator_config and user_simulator_config.model_name:
1903-
model_name = user_simulator_config.model_name
1904-
if model_name.startswith("gemini-3") and "/" not in model_name:
1905-
current_location = original_location or api_client.location or "us-central1"
1906-
if current_location != "global" and not allow_cross_region_model:
1907-
raise ValueError(
1898+
"""Internal helper to run inference using Gemini model with concurrency."""
1899+
original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
1900+
location_overridden = False
1901+
1902+
if user_simulator_config and user_simulator_config.model_name:
1903+
model_name = user_simulator_config.model_name
1904+
if model_name.startswith("gemini-3") and "/" not in model_name:
1905+
current_location = original_location or api_client.location or "us-central1"
1906+
if current_location != "global" and not allow_cross_region_model:
1907+
raise ValueError(
19081908
f"The model '{model_name}' is currently only available in the"
19091909
" 'global' region. Because this request originated in"
19101910
f" '{current_location}', you must explicitly set "
19111911
"allow_cross_region_model=True to allow your data to be routed outside"
19121912
" of your request's region."
19131913
)
19141914

1915-
logger.warning(
1915+
logger.warning(
19161916
"Model %s is only available in the global region. Routing to global.",
19171917
model_name,
19181918
)
1919-
user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
1920-
if original_location != "global":
1921-
os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
1922-
location_overridden = True
1923-
1924-
try:
1925-
if agent_engine:
1926-
return _execute_inference_concurrently(
1919+
user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
1920+
if original_location != "global":
1921+
os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
1922+
location_overridden = True
1923+
1924+
try:
1925+
if agent_engine:
1926+
return _execute_inference_concurrently(
19271927
api_client=api_client,
19281928
agent_engine=agent_engine,
19291929
prompt_dataset=prompt_dataset,
@@ -1932,8 +1932,8 @@ def _run_agent(
19321932
user_simulator_config=None,
19331933
inference_fn=_execute_agent_run_with_retry,
19341934
)
1935-
elif agent:
1936-
return _execute_inference_concurrently(
1935+
elif agent:
1936+
return _execute_inference_concurrently(
19371937
api_client=api_client,
19381938
agent=agent,
19391939
prompt_dataset=prompt_dataset,
@@ -1942,14 +1942,74 @@ def _run_agent(
19421942
user_simulator_config=user_simulator_config,
19431943
inference_fn=_execute_local_agent_run_with_retry,
19441944
)
1945-
else:
1946-
raise ValueError("Neither agent_engine nor agent is provided.")
1947-
finally:
1948-
if location_overridden:
1949-
if original_location is None:
1950-
del os.environ["GOOGLE_CLOUD_LOCATION"]
1951-
else:
1952-
os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
1945+
else:
1946+
raise ValueError("Neither agent_engine nor agent is provided.")
1947+
finally:
1948+
if location_overridden:
1949+
if original_location is None:
1950+
del os.environ["GOOGLE_CLOUD_LOCATION"]
1951+
else:
1952+
os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
1953+
1954+
1955+
def _create_agent_engine_session(
1956+
*,
1957+
agent_engine: types.AgentEngine,
1958+
user_id: str,
1959+
session_state: Optional[dict[str, Any]] = None,
1960+
) -> str:
1961+
"""Creates a session for an agent engine and returns the session ID.
1962+
1963+
First attempts to use the agent engine's own `create_session` operation
1964+
(available for agents deployed via AdkApp). If the agent engine does not
1965+
have `create_session` registered, falls back to the managed Vertex AI
1966+
Sessions API.
1967+
1968+
Args:
1969+
agent_engine: The AgentEngine instance.
1970+
user_id: The user ID for the session.
1971+
session_state: Optional initial state for the session.
1972+
1973+
Returns:
1974+
The session ID string.
1975+
1976+
Raises:
1977+
RuntimeError: If the session could not be created via either path.
1978+
"""
1979+
try:
1980+
session = agent_engine.create_session( # type: ignore[attr-defined]
1981+
user_id=user_id,
1982+
state=session_state,
1983+
)
1984+
return session["id"]
1985+
except AttributeError as exc:
1986+
# Agent engine does not have create_session registered (e.g. deployed
1987+
# via Console, gcloud, or source code deployment without AdkApp).
1988+
# Fall back to the managed Vertex AI Sessions API.
1989+
logger.info(
1990+
"Agent engine does not have 'create_session' operation registered."
1991+
" Falling back to managed Sessions API."
1992+
)
1993+
operation = agent_engine.api_client.sessions.create(
1994+
name=agent_engine.api_resource.name,
1995+
user_id=user_id,
1996+
config=types.CreateAgentEngineSessionConfig(
1997+
session_state=session_state,
1998+
),
1999+
)
2000+
if operation.response and operation.response.name:
2001+
# Session name format:
2002+
# projects/{p}/locations/{l}/reasoningEngines/{re}/sessions/{id}
2003+
return operation.response.name.split("/")[-1]
2004+
elif operation.error:
2005+
raise RuntimeError(
2006+
f"Failed to create session via managed API: {operation.error}"
2007+
) from exc
2008+
else:
2009+
raise RuntimeError(
2010+
"Failed to create session via managed API: "
2011+
"operation returned no response."
2012+
) from exc
19532013

19542014

19552015
def _execute_agent_run_with_retry(
@@ -1958,54 +2018,55 @@ def _execute_agent_run_with_retry(
19582018
agent_engine: types.AgentEngine,
19592019
max_retries: int = 3,
19602020
) -> Union[list[dict[str, Any]], dict[str, Any]]:
1961-
"""Executes agent run over agent engine for a single prompt."""
2021+
"""Executes agent run over agent engine for a single prompt."""
2022+
try:
2023+
session_inputs = _get_session_inputs(row)
2024+
user_id = session_inputs.user_id
2025+
session_state = session_inputs.state
2026+
session_id = _create_agent_engine_session(
2027+
agent_engine=agent_engine,
2028+
user_id=user_id,
2029+
session_state=session_state,
2030+
)
2031+
except KeyError as e:
2032+
return {"error": f"Failed to get all required agent engine inputs: {e}"}
2033+
except Exception as e:
2034+
return {"error": f"Failed to create a new session : {e}"}
2035+
for attempt in range(max_retries):
19622036
try:
1963-
session_inputs = _get_session_inputs(row)
1964-
user_id = session_inputs.user_id
1965-
session_state = session_inputs.state
1966-
session = agent_engine.create_session( # type: ignore[attr-defined]
1967-
user_id=user_id,
1968-
state=session_state,
1969-
)
1970-
except KeyError as e:
1971-
return {"error": f"Failed to get all required agent engine inputs: {e}"}
1972-
except Exception as e:
1973-
return {"error": f"Failed to create a new session : {e}"}
1974-
for attempt in range(max_retries):
1975-
try:
1976-
responses = []
1977-
for event in agent_engine.stream_query( # type: ignore[attr-defined]
1978-
user_id=user_id,
1979-
session_id=session["id"],
1980-
message=contents,
1981-
):
1982-
if event and CONTENT in event and PARTS in event[CONTENT]:
1983-
responses.append(event)
1984-
return responses
1985-
except api_exceptions.ResourceExhausted as e:
1986-
logger.warning(
1987-
"Resource Exhausted error on attempt %d/%d: %s. Retrying in %s"
1988-
" seconds...",
1989-
attempt + 1,
1990-
max_retries,
1991-
e,
1992-
2**attempt,
1993-
)
1994-
if attempt == max_retries - 1:
1995-
return {"error": f"Resource exhausted after retries: {e}"}
1996-
time.sleep(2**attempt)
1997-
except Exception as e: # pylint: disable=broad-exception-caught
1998-
logger.error(
2037+
responses = []
2038+
for event in agent_engine.stream_query( # type: ignore[attr-defined]
2039+
user_id=user_id,
2040+
session_id=session_id,
2041+
message=contents,
2042+
):
2043+
if event and CONTENT in event and PARTS in event[CONTENT]:
2044+
responses.append(event)
2045+
return responses
2046+
except api_exceptions.ResourceExhausted as e:
2047+
logger.warning(
2048+
"Resource Exhausted error on attempt %d/%d: %s. Retrying in %s"
2049+
" seconds...",
2050+
attempt + 1,
2051+
max_retries,
2052+
e,
2053+
2**attempt,
2054+
)
2055+
if attempt == max_retries - 1:
2056+
return {"error": f"Resource exhausted after retries: {e}"}
2057+
time.sleep(2**attempt)
2058+
except Exception as e: # pylint: disable=broad-exception-caught
2059+
logger.error(
19992060
"Unexpected error during generate_content on attempt %d/%d: %s",
20002061
attempt + 1,
20012062
max_retries,
20022063
e,
20032064
)
20042065

2005-
if attempt == max_retries - 1:
2006-
return {"error": f"Failed after retries: {e}"}
2007-
time.sleep(1)
2008-
return {"error": f"Failed to get agent run results after {max_retries} retries"}
2066+
if attempt == max_retries - 1:
2067+
return {"error": f"Failed after retries: {e}"}
2068+
time.sleep(1)
2069+
return {"error": f"Failed to get agent run results after {max_retries} retries"}
20092070

20102071

20112072
def _execute_local_agent_run_with_retry(

0 commit comments

Comments
 (0)