fix: Fix create_session AttributeError for agents without AdkApp

vertex-sdk-bot · copybara-github · commit be85a803f0a2 · 2026-04-14T13:44:24.000-07:00
PiperOrigin-RevId: 899742948
diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
@@ -3144,6 +3144,94 @@ def test_run_inference_with_agent_engine_with_response_column_raises_error(
             "'intermediate_events' or 'response' columns"
         ) in str(excinfo.value)
 
+    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch("vertexai._genai._evals_common.vertexai.Client")
+    def test_run_inference_with_agent_engine_falls_back_to_managed_sessions_api(
+        self,
+        mock_vertexai_client,
+        mock_eval_dataset_loader,
+    ):
+        """Tests that run_inference falls back to the managed Sessions API
+        when the agent engine does not have create_session registered."""
+        mock_df = pd.DataFrame(
+            {
+                "prompt": ["agent prompt"],
+                "session_inputs": [
+                    {
+                        "user_id": "123",
+                        "state": {"a": "1"},
+                    }
+                ],
+            }
+        )
+        mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict(
+            orient="records"
+        )
+
+        # Create a mock agent engine WITHOUT create_session (simulates agents
+        # deployed via Console, gcloud, or source code deployment).
+        mock_agent_engine = mock.Mock(
+            spec=["api_client", "api_resource", "stream_query"],
+        )
+        mock_agent_engine.api_resource.name = (
+            "projects/test-project/locations/us-central1/reasoningEngines/123"
+        )
+
+        # Mock the managed Sessions API to return a session.
+        mock_session_operation = mock.Mock()
+        mock_session_operation.response.name = (
+            "projects/test-project/locations/us-central1"
+            "/reasoningEngines/123/sessions/managed-session-1"
+        )
+        mock_agent_engine.api_client.sessions.create.return_value = (
+            mock_session_operation
+        )
+
+        stream_query_return_value = [
+            {
+                "id": "1",
+                "content": {"parts": [{"text": "intermediate1"}]},
+                "timestamp": 123,
+                "author": "model",
+            },
+            {
+                "id": "2",
+                "content": {"parts": [{"text": "agent response"}]},
+                "timestamp": 124,
+                "author": "model",
+            },
+        ]
+        mock_agent_engine.stream_query.return_value = iter(stream_query_return_value)
+        mock_vertexai_client.return_value.agent_engines.get.return_value = (
+            mock_agent_engine
+        )
+
+        inference_result = self.client.evals.run_inference(
+            agent="projects/test-project/locations/us-central1/reasoningEngines/123",
+            src=mock_df,
+        )
+
+        # Verify the managed Sessions API was called as fallback.
+        mock_agent_engine.api_client.sessions.create.assert_called_once_with(
+            name="projects/test-project/locations/us-central1/reasoningEngines/123",
+            user_id="123",
+            config=vertexai_genai_types.CreateAgentEngineSessionConfig(
+                session_state={"a": "1"},
+            ),
+        )
+
+        # Verify stream_query was called with the session ID extracted from
+        # the managed session's resource name.
+        mock_agent_engine.stream_query.assert_called_once_with(
+            user_id="123",
+            session_id="managed-session-1",
+            message="agent prompt",
+        )
+
+        # Verify the inference results are correct.
+        assert inference_result.eval_dataset_df["response"].iloc[0] == "agent response"
+        assert inference_result.candidate_name == "agent_engine_0"
+
     @mock.patch.object(_evals_utils, "EvalDatasetLoader")
     @mock.patch("vertexai._genai._evals_common.InMemorySessionService")  # fmt: skip
     @mock.patch("vertexai._genai._evals_common.Runner")
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
@@ -1895,35 +1895,35 @@ def _run_agent(
         genai_types.GenerateContentResponse,
     ]
 ]:
-    """Internal helper to run inference using Gemini model with concurrency."""
-    original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
-    location_overridden = False
-
-    if user_simulator_config and user_simulator_config.model_name:
-        model_name = user_simulator_config.model_name
-        if model_name.startswith("gemini-3") and "/" not in model_name:
-            current_location = original_location or api_client.location or "us-central1"
-            if current_location != "global" and not allow_cross_region_model:
-                raise ValueError(
+  """Internal helper to run inference using Gemini model with concurrency."""
+  original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
+  location_overridden = False
+
+  if user_simulator_config and user_simulator_config.model_name:
+    model_name = user_simulator_config.model_name
+    if model_name.startswith("gemini-3") and "/" not in model_name:
+      current_location = original_location or api_client.location or "us-central1"
+      if current_location != "global" and not allow_cross_region_model:
+        raise ValueError(
                     f"The model '{model_name}' is currently only available in the"
                     " 'global' region. Because this request originated in"
                     f" '{current_location}', you must explicitly set "
                     "allow_cross_region_model=True to allow your data to be routed outside"
                     " of your request's region."
                 )
 
-            logger.warning(
+      logger.warning(
                 "Model %s is only available in the global region. Routing to global.",
                 model_name,
             )
-            user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
-            if original_location != "global":
-                os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
-                location_overridden = True
-
-    try:
-        if agent_engine:
-            return _execute_inference_concurrently(
+      user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
+      if original_location != "global":
+        os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
+        location_overridden = True
+
+  try:
+    if agent_engine:
+      return _execute_inference_concurrently(
                 api_client=api_client,
                 agent_engine=agent_engine,
                 prompt_dataset=prompt_dataset,
@@ -1932,8 +1932,8 @@ def _run_agent(
                 user_simulator_config=None,
                 inference_fn=_execute_agent_run_with_retry,
             )
-        elif agent:
-            return _execute_inference_concurrently(
+    elif agent:
+      return _execute_inference_concurrently(
                 api_client=api_client,
                 agent=agent,
                 prompt_dataset=prompt_dataset,
@@ -1942,14 +1942,74 @@ def _run_agent(
                 user_simulator_config=user_simulator_config,
                 inference_fn=_execute_local_agent_run_with_retry,
             )
-        else:
-            raise ValueError("Neither agent_engine nor agent is provided.")
-    finally:
-        if location_overridden:
-            if original_location is None:
-                del os.environ["GOOGLE_CLOUD_LOCATION"]
-            else:
-                os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
+    else:
+      raise ValueError("Neither agent_engine nor agent is provided.")
+  finally:
+    if location_overridden:
+      if original_location is None:
+        del os.environ["GOOGLE_CLOUD_LOCATION"]
+      else:
+        os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
+
+
+def _create_agent_engine_session(
+    *,
+    agent_engine: types.AgentEngine,
+    user_id: str,
+    session_state: Optional[dict[str, Any]] = None,
+) -> str:
+  """Creates a session for an agent engine and returns the session ID.
+
+  First attempts to use the agent engine's own `create_session` operation
+  (available for agents deployed via AdkApp). If the agent engine does not
+  have `create_session` registered, falls back to the managed Vertex AI
+  Sessions API.
+
+  Args:
+      agent_engine: The AgentEngine instance.
+      user_id: The user ID for the session.
+      session_state: Optional initial state for the session.
+
+  Returns:
+      The session ID string.
+
+  Raises:
+      RuntimeError: If the session could not be created via either path.
+  """
+  try:
+    session = agent_engine.create_session(  # type: ignore[attr-defined]
+        user_id=user_id,
+        state=session_state,
+    )
+    return session["id"]
+  except AttributeError as exc:
+    # Agent engine does not have create_session registered (e.g. deployed
+    # via Console, gcloud, or source code deployment without AdkApp).
+    # Fall back to the managed Vertex AI Sessions API.
+    logger.info(
+        "Agent engine does not have 'create_session' operation registered."
+        " Falling back to managed Sessions API."
+    )
+    operation = agent_engine.api_client.sessions.create(
+        name=agent_engine.api_resource.name,
+        user_id=user_id,
+        config=types.CreateAgentEngineSessionConfig(
+            session_state=session_state,
+        ),
+    )
+    if operation.response and operation.response.name:
+      # Session name format:
+      # projects/{p}/locations/{l}/reasoningEngines/{re}/sessions/{id}
+      return operation.response.name.split("/")[-1]
+    elif operation.error:
+      raise RuntimeError(
+          f"Failed to create session via managed API: {operation.error}"
+      ) from exc
+    else:
+      raise RuntimeError(
+          "Failed to create session via managed API: "
+          "operation returned no response."
+      ) from exc
 
 
 def _execute_agent_run_with_retry(
@@ -1958,54 +2018,55 @@ def _execute_agent_run_with_retry(
     agent_engine: types.AgentEngine,
     max_retries: int = 3,
 ) -> Union[list[dict[str, Any]], dict[str, Any]]:
-    """Executes agent run over agent engine for a single prompt."""
+  """Executes agent run over agent engine for a single prompt."""
+  try:
+    session_inputs = _get_session_inputs(row)
+    user_id = session_inputs.user_id
+    session_state = session_inputs.state
+    session_id = _create_agent_engine_session(
+        agent_engine=agent_engine,
+        user_id=user_id,
+        session_state=session_state,
+    )
+  except KeyError as e:
+    return {"error": f"Failed to get all required agent engine inputs: {e}"}
+  except Exception as e:
+    return {"error": f"Failed to create a new session : {e}"}
+  for attempt in range(max_retries):
     try:
-        session_inputs = _get_session_inputs(row)
-        user_id = session_inputs.user_id
-        session_state = session_inputs.state
-        session = agent_engine.create_session(  # type: ignore[attr-defined]
-            user_id=user_id,
-            state=session_state,
-        )
-    except KeyError as e:
-        return {"error": f"Failed to get all required agent engine inputs: {e}"}
-    except Exception as e:
-        return {"error": f"Failed to create a new session : {e}"}
-    for attempt in range(max_retries):
-        try:
-            responses = []
-            for event in agent_engine.stream_query(  # type: ignore[attr-defined]
-                user_id=user_id,
-                session_id=session["id"],
-                message=contents,
-            ):
-                if event and CONTENT in event and PARTS in event[CONTENT]:
-                    responses.append(event)
-            return responses
-        except api_exceptions.ResourceExhausted as e:
-            logger.warning(
-                "Resource Exhausted error on attempt %d/%d: %s. Retrying in %s"
-                " seconds...",
-                attempt + 1,
-                max_retries,
-                e,
-                2**attempt,
-            )
-            if attempt == max_retries - 1:
-                return {"error": f"Resource exhausted after retries: {e}"}
-            time.sleep(2**attempt)
-        except Exception as e:  # pylint: disable=broad-exception-caught
-            logger.error(
+      responses = []
+      for event in agent_engine.stream_query(  # type: ignore[attr-defined]
+          user_id=user_id,
+          session_id=session_id,
+          message=contents,
+      ):
+        if event and CONTENT in event and PARTS in event[CONTENT]:
+          responses.append(event)
+      return responses
+    except api_exceptions.ResourceExhausted as e:
+      logger.warning(
+          "Resource Exhausted error on attempt %d/%d: %s. Retrying in %s"
+          " seconds...",
+          attempt + 1,
+          max_retries,
+          e,
+          2**attempt,
+      )
+      if attempt == max_retries - 1:
+        return {"error": f"Resource exhausted after retries: {e}"}
+      time.sleep(2**attempt)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+      logger.error(
                 "Unexpected error during generate_content on attempt %d/%d: %s",
                 attempt + 1,
                 max_retries,
                 e,
             )
 
-            if attempt == max_retries - 1:
-                return {"error": f"Failed after retries: {e}"}
-            time.sleep(1)
-    return {"error": f"Failed to get agent run results after {max_retries} retries"}
+      if attempt == max_retries - 1:
+        return {"error": f"Failed after retries: {e}"}
+      time.sleep(1)
+  return {"error": f"Failed to get agent run results after {max_retries} retries"}
 
 
 def _execute_local_agent_run_with_retry(