@@ -1895,35 +1895,35 @@ def _run_agent(
18951895 genai_types .GenerateContentResponse ,
18961896 ]
18971897]:
1898- """Internal helper to run inference using Gemini model with concurrency."""
1899- original_location = os .environ .get ("GOOGLE_CLOUD_LOCATION" )
1900- location_overridden = False
1901-
1902- if user_simulator_config and user_simulator_config .model_name :
1903- model_name = user_simulator_config .model_name
1904- if model_name .startswith ("gemini-3" ) and "/" not in model_name :
1905- current_location = original_location or api_client .location or "us-central1"
1906- if current_location != "global" and not allow_cross_region_model :
1907- raise ValueError (
1898+ """Internal helper to run inference using Gemini model with concurrency."""
1899+ original_location = os .environ .get ("GOOGLE_CLOUD_LOCATION" )
1900+ location_overridden = False
1901+
1902+ if user_simulator_config and user_simulator_config .model_name :
1903+ model_name = user_simulator_config .model_name
1904+ if model_name .startswith ("gemini-3" ) and "/" not in model_name :
1905+ current_location = original_location or api_client .location or "us-central1"
1906+ if current_location != "global" and not allow_cross_region_model :
1907+ raise ValueError (
19081908 f"The model '{ model_name } ' is currently only available in the"
19091909 " 'global' region. Because this request originated in"
19101910 f" '{ current_location } ', you must explicitly set "
19111911 "allow_cross_region_model=True to allow your data to be routed outside"
19121912 " of your request's region."
19131913 )
19141914
1915- logger .warning (
1915+ logger .warning (
19161916 "Model %s is only available in the global region. Routing to global." ,
19171917 model_name ,
19181918 )
1919- user_simulator_config .model_name = f"projects/{ api_client .project } /locations/global/publishers/google/models/{ model_name } "
1920- if original_location != "global" :
1921- os .environ ["GOOGLE_CLOUD_LOCATION" ] = "global"
1922- location_overridden = True
1923-
1924- try :
1925- if agent_engine :
1926- return _execute_inference_concurrently (
1919+ user_simulator_config .model_name = f"projects/{ api_client .project } /locations/global/publishers/google/models/{ model_name } "
1920+ if original_location != "global" :
1921+ os .environ ["GOOGLE_CLOUD_LOCATION" ] = "global"
1922+ location_overridden = True
1923+
1924+ try :
1925+ if agent_engine :
1926+ return _execute_inference_concurrently (
19271927 api_client = api_client ,
19281928 agent_engine = agent_engine ,
19291929 prompt_dataset = prompt_dataset ,
@@ -1932,8 +1932,8 @@ def _run_agent(
19321932 user_simulator_config = None ,
19331933 inference_fn = _execute_agent_run_with_retry ,
19341934 )
1935- elif agent :
1936- return _execute_inference_concurrently (
1935+ elif agent :
1936+ return _execute_inference_concurrently (
19371937 api_client = api_client ,
19381938 agent = agent ,
19391939 prompt_dataset = prompt_dataset ,
@@ -1942,14 +1942,74 @@ def _run_agent(
19421942 user_simulator_config = user_simulator_config ,
19431943 inference_fn = _execute_local_agent_run_with_retry ,
19441944 )
1945- else :
1946- raise ValueError ("Neither agent_engine nor agent is provided." )
1947- finally :
1948- if location_overridden :
1949- if original_location is None :
1950- del os .environ ["GOOGLE_CLOUD_LOCATION" ]
1951- else :
1952- os .environ ["GOOGLE_CLOUD_LOCATION" ] = original_location
1945+ else :
1946+ raise ValueError ("Neither agent_engine nor agent is provided." )
1947+ finally :
1948+ if location_overridden :
1949+ if original_location is None :
1950+ del os .environ ["GOOGLE_CLOUD_LOCATION" ]
1951+ else :
1952+ os .environ ["GOOGLE_CLOUD_LOCATION" ] = original_location
1953+
1954+
1955+ def _create_agent_engine_session (
1956+ * ,
1957+ agent_engine : types .AgentEngine ,
1958+ user_id : str ,
1959+ session_state : Optional [dict [str , Any ]] = None ,
1960+ ) -> str :
1961+ """Creates a session for an agent engine and returns the session ID.
1962+
1963+ First attempts to use the agent engine's own `create_session` operation
1964+ (available for agents deployed via AdkApp). If the agent engine does not
1965+ have `create_session` registered, falls back to the managed Vertex AI
1966+ Sessions API.
1967+
1968+ Args:
1969+ agent_engine: The AgentEngine instance.
1970+ user_id: The user ID for the session.
1971+ session_state: Optional initial state for the session.
1972+
1973+ Returns:
1974+ The session ID string.
1975+
1976+ Raises:
1977+ RuntimeError: If the session could not be created via either path.
1978+ """
1979+ try :
1980+ session = agent_engine .create_session ( # type: ignore[attr-defined]
1981+ user_id = user_id ,
1982+ state = session_state ,
1983+ )
1984+ return session ["id" ]
1985+ except AttributeError as exc :
1986+ # Agent engine does not have create_session registered (e.g. deployed
1987+ # via Console, gcloud, or source code deployment without AdkApp).
1988+ # Fall back to the managed Vertex AI Sessions API.
1989+ logger .info (
1990+ "Agent engine does not have 'create_session' operation registered."
1991+ " Falling back to managed Sessions API."
1992+ )
1993+ operation = agent_engine .api_client .sessions .create (
1994+ name = agent_engine .api_resource .name ,
1995+ user_id = user_id ,
1996+ config = types .CreateAgentEngineSessionConfig (
1997+ session_state = session_state ,
1998+ ),
1999+ )
2000+ if operation .response and operation .response .name :
2001+ # Session name format:
2002+ # projects/{p}/locations/{l}/reasoningEngines/{re}/sessions/{id}
2003+ return operation .response .name .split ("/" )[- 1 ]
2004+ elif operation .error :
2005+ raise RuntimeError (
2006+ f"Failed to create session via managed API: { operation .error } "
2007+ ) from exc
2008+ else :
2009+ raise RuntimeError (
2010+ "Failed to create session via managed API: "
2011+ "operation returned no response."
2012+ ) from exc
19532013
19542014
19552015def _execute_agent_run_with_retry (
@@ -1958,54 +2018,55 @@ def _execute_agent_run_with_retry(
19582018 agent_engine : types .AgentEngine ,
19592019 max_retries : int = 3 ,
19602020) -> Union [list [dict [str , Any ]], dict [str , Any ]]:
1961- """Executes agent run over agent engine for a single prompt."""
2021+ """Executes agent run over agent engine for a single prompt."""
2022+ try :
2023+ session_inputs = _get_session_inputs (row )
2024+ user_id = session_inputs .user_id
2025+ session_state = session_inputs .state
2026+ session_id = _create_agent_engine_session (
2027+ agent_engine = agent_engine ,
2028+ user_id = user_id ,
2029+ session_state = session_state ,
2030+ )
2031+ except KeyError as e :
2032+ return {"error" : f"Failed to get all required agent engine inputs: { e } " }
2033+ except Exception as e :
2034+ return {"error" : f"Failed to create a new session : { e } " }
2035+ for attempt in range (max_retries ):
19622036 try :
1963- session_inputs = _get_session_inputs (row )
1964- user_id = session_inputs .user_id
1965- session_state = session_inputs .state
1966- session = agent_engine .create_session ( # type: ignore[attr-defined]
1967- user_id = user_id ,
1968- state = session_state ,
1969- )
1970- except KeyError as e :
1971- return {"error" : f"Failed to get all required agent engine inputs: { e } " }
1972- except Exception as e :
1973- return {"error" : f"Failed to create a new session : { e } " }
1974- for attempt in range (max_retries ):
1975- try :
1976- responses = []
1977- for event in agent_engine .stream_query ( # type: ignore[attr-defined]
1978- user_id = user_id ,
1979- session_id = session ["id" ],
1980- message = contents ,
1981- ):
1982- if event and CONTENT in event and PARTS in event [CONTENT ]:
1983- responses .append (event )
1984- return responses
1985- except api_exceptions .ResourceExhausted as e :
1986- logger .warning (
1987- "Resource Exhausted error on attempt %d/%d: %s. Retrying in %s"
1988- " seconds..." ,
1989- attempt + 1 ,
1990- max_retries ,
1991- e ,
1992- 2 ** attempt ,
1993- )
1994- if attempt == max_retries - 1 :
1995- return {"error" : f"Resource exhausted after retries: { e } " }
1996- time .sleep (2 ** attempt )
1997- except Exception as e : # pylint: disable=broad-exception-caught
1998- logger .error (
2037+ responses = []
2038+ for event in agent_engine .stream_query ( # type: ignore[attr-defined]
2039+ user_id = user_id ,
2040+ session_id = session_id ,
2041+ message = contents ,
2042+ ):
2043+ if event and CONTENT in event and PARTS in event [CONTENT ]:
2044+ responses .append (event )
2045+ return responses
2046+ except api_exceptions .ResourceExhausted as e :
2047+ logger .warning (
2048+ "Resource Exhausted error on attempt %d/%d: %s. Retrying in %s"
2049+ " seconds..." ,
2050+ attempt + 1 ,
2051+ max_retries ,
2052+ e ,
2053+ 2 ** attempt ,
2054+ )
2055+ if attempt == max_retries - 1 :
2056+ return {"error" : f"Resource exhausted after retries: { e } " }
2057+ time .sleep (2 ** attempt )
2058+ except Exception as e : # pylint: disable=broad-exception-caught
2059+ logger .error (
19992060 "Unexpected error during generate_content on attempt %d/%d: %s" ,
20002061 attempt + 1 ,
20012062 max_retries ,
20022063 e ,
20032064 )
20042065
2005- if attempt == max_retries - 1 :
2006- return {"error" : f"Failed after retries: { e } " }
2007- time .sleep (1 )
2008- return {"error" : f"Failed to get agent run results after { max_retries } retries" }
2066+ if attempt == max_retries - 1 :
2067+ return {"error" : f"Failed after retries: { e } " }
2068+ time .sleep (1 )
2069+ return {"error" : f"Failed to get agent run results after { max_retries } retries" }
20092070
20102071
20112072def _execute_local_agent_run_with_retry (
0 commit comments