diff --git a/pyproject.toml b/pyproject.toml index 2251579d..45c6e921 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "a2a-sdk==0.3.7", # For Google Agent2Agent protocol "deprecated==1.2.18", "google-adk==1.19.0", # For basic agent architecture - "litellm==1.74.3", # For model inference + "litellm>=1.74.3", # For model inference "loguru==0.7.3", # For better logging "opentelemetry-exporter-otlp==1.37.0", "opentelemetry-instrumentation-logging>=0.56b0", @@ -73,6 +73,9 @@ dev = [ "pytest-xdist>=3.8.0", ] +responses = [ + "litellm>=1.79.3" +] [dependency-groups] dev = [ diff --git a/veadk/agent.py b/veadk/agent.py index 959139c5..c9b73a0e 100644 --- a/veadk/agent.py +++ b/veadk/agent.py @@ -15,7 +15,7 @@ from __future__ import annotations import os -from typing import Optional, Union +from typing import Optional, Union, AsyncGenerator # If user didn't set LITELLM_LOCAL_MODEL_COST_MAP, set it to True # to enable local model cost map. @@ -24,10 +24,12 @@ if not os.getenv("LITELLM_LOCAL_MODEL_COST_MAP"): os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" -from google.adk.agents import LlmAgent, RunConfig +from google.adk.agents import LlmAgent, RunConfig, InvocationContext from google.adk.agents.base_agent import BaseAgent +from google.adk.agents.context_cache_config import ContextCacheConfig from google.adk.agents.llm_agent import InstructionProvider, ToolUnion from google.adk.agents.run_config import StreamingMode +from google.adk.events import Event, EventActions from google.adk.models.lite_llm import LiteLlm from google.adk.runners import Runner from google.genai import types @@ -52,6 +54,7 @@ from veadk.tracing.base_tracer import BaseTracer from veadk.utils.logger import get_logger from veadk.utils.patches import patch_asyncio, patch_tracer +from veadk.utils.misc import check_litellm_version from veadk.version import VERSION patch_tracer() @@ -109,6 +112,10 @@ class Agent(LlmAgent): tracers: list[BaseTracer] = [] + enable_responses: bool = False + + context_cache_config: Optional[ContextCacheConfig] = None + run_processor: Optional[BaseRunProcessor] = Field(default=None, exclude=True) """Optional run processor for intercepting and processing agent execution flows. @@ -157,12 +164,31 @@ def model_post_init(self, __context: Any) -> None: logger.info(f"Model extra config: {self.model_extra_config}") if not self.model: - self.model = LiteLlm( - model=f"{self.model_provider}/{self.model_name}", - api_key=self.model_api_key, - api_base=self.model_api_base, - **self.model_extra_config, - ) + if self.enable_responses: + min_version = "1.79.3" + check_litellm_version(min_version) + + from veadk.models.ark_llm import ArkLlm + + self.model = ArkLlm( + model=f"{self.model_provider}/{self.model_name}", + api_key=self.model_api_key, + api_base=self.model_api_base, + **self.model_extra_config, + ) + if not self.context_cache_config: + self.context_cache_config = ContextCacheConfig( + cache_intervals=100, # maximum number + ttl_seconds=315360000, + min_tokens=0, + ) + else: + self.model = LiteLlm( + model=f"{self.model_provider}/{self.model_name}", + api_key=self.model_api_key, + api_base=self.model_api_base, + **self.model_extra_config, + ) logger.debug( f"LiteLLM client created with config: {self.model_extra_config}" ) @@ -218,6 +244,28 @@ def model_post_init(self, __context: Any) -> None: f"Agent: {self.model_dump(include={'name', 'model_name', 'model_api_base', 'tools'})}" ) + async def _run_async_impl( + self, ctx: InvocationContext + ) -> AsyncGenerator[Event, None]: + if self.enable_responses: + if not ctx.context_cache_config: + ctx.context_cache_config = self.context_cache_config + + async for event in super()._run_async_impl(ctx): + yield event + if self.enable_responses and event.cache_metadata: + # for persistent short-term memory with response api + session_state_event = Event( + invocation_id=event.invocation_id, + author=event.author, + actions=EventActions( + state_delta={ + "response_id": event.cache_metadata.cache_name, + } + ), + ) + yield session_state_event + async def _run( self, runner, diff --git a/veadk/memory/short_term_memory.py b/veadk/memory/short_term_memory.py index 6b537e82..259ea928 100644 --- a/veadk/memory/short_term_memory.py +++ b/veadk/memory/short_term_memory.py @@ -32,6 +32,7 @@ from veadk.memory.short_term_memory_backends.sqlite_backend import ( SQLiteSTMBackend, ) +from veadk.models.ark_transform import build_cache_metadata from veadk.utils.logger import get_logger logger = get_logger(__name__) @@ -49,6 +50,21 @@ async def wrapper(*args, **kwargs): setattr(obj, "get_session", wrapper) +def enable_responses_api_for_session_service(result, *args, **kwargs): + if result and isinstance(result, Session): + if result.events: + for event in result.events: + if ( + event.actions + and event.actions.state_delta + and not event.cache_metadata + and "response_id" in event.actions.state_delta + ): + event.cache_metadata = build_cache_metadata( + response_id=event.actions.state_delta.get("response_id"), + ) + + class ShortTermMemory(BaseModel): """Short term memory for agent execution. @@ -170,6 +186,11 @@ def model_post_init(self, __context: Any) -> None: db_kwargs=self.db_kwargs, **self.backend_configs ).session_service + if self.backend != "local": + wrap_get_session_with_callbacks( + self._session_service, enable_responses_api_for_session_service + ) + if self.after_load_memory_callback: wrap_get_session_with_callbacks( self._session_service, self.after_load_memory_callback diff --git a/veadk/models/ark_llm.py b/veadk/models/ark_llm.py new file mode 100644 index 00000000..54295b7e --- /dev/null +++ b/veadk/models/ark_llm.py @@ -0,0 +1,252 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# adapted from Google ADK models adk-python/blob/main/src/google/adk/models/lite_llm.py at f1f44675e4a86b75e72cfd838efd8a0399f23e24 · google/adk-python + +import json +from typing import Any, Dict, Union, AsyncGenerator + +import litellm +import openai +from openai.types.responses import Response as OpenAITypeResponse, ResponseStreamEvent +from google.adk.models import LlmRequest, LlmResponse +from google.adk.models.lite_llm import ( + LiteLlm, + _get_completion_inputs, + FunctionChunk, + TextChunk, + _message_to_generate_content_response, + UsageMetadataChunk, +) +from google.genai import types +from litellm import ChatCompletionAssistantMessage +from litellm.types.utils import ( + ChatCompletionMessageToolCall, + Function, +) +from pydantic import Field + +from veadk.models.ark_transform import ( + CompletionToResponsesAPIHandler, +) +from veadk.utils.logger import get_logger + +# This will add functions to prompts if functions are provided. +litellm.add_function_to_prompt = True + +logger = get_logger(__name__) + + +class ArkLlmClient: + async def aresponse( + self, **kwargs + ) -> Union[OpenAITypeResponse, openai.AsyncStream[ResponseStreamEvent]]: + # 1. Get request params + api_base = kwargs.pop("api_base", None) + api_key = kwargs.pop("api_key", None) + + # 2. Call openai responses + client = openai.AsyncOpenAI( + base_url=api_base, + api_key=api_key, + ) + + raw_response = await client.responses.create(**kwargs) + return raw_response + + +class ArkLlm(LiteLlm): + llm_client: ArkLlmClient = Field(default_factory=ArkLlmClient) + _additional_args: Dict[str, Any] = None + transform_handler: CompletionToResponsesAPIHandler = Field( + default_factory=CompletionToResponsesAPIHandler + ) + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + async def generate_content_async( + self, llm_request: LlmRequest, stream: bool = False + ) -> AsyncGenerator[LlmResponse, None]: + """Generates content asynchronously. + + Args: + llm_request: LlmRequest, the request to send to the LiteLlm model. + stream: bool = False, whether to do streaming call. + + Yields: + LlmResponse: The model response. + """ + self._maybe_append_user_content(llm_request) + # logger.debug(_build_request_log(llm_request)) + + messages, tools, response_format, generation_params = _get_completion_inputs( + llm_request + ) + + if "functions" in self._additional_args: + # LiteLLM does not support both tools and functions together. + tools = None + # ------------------------------------------------------ # + # get previous_response_id + previous_response_id = None + if llm_request.cache_metadata and llm_request.cache_metadata.cache_name: + previous_response_id = llm_request.cache_metadata.cache_name + completion_args = { + "model": self.model, + "messages": messages, + "tools": tools, + "response_format": response_format, + "previous_response_id": previous_response_id, # supply previous_response_id + } + # ------------------------------------------------------ # + completion_args.update(self._additional_args) + + if generation_params: + completion_args.update(generation_params) + response_args = self.transform_handler.transform_request(**completion_args) + + if stream: + text = "" + # Track function calls by index + function_calls = {} # index -> {name, args, id} + response_args["stream"] = True + aggregated_llm_response = None + aggregated_llm_response_with_tool_call = None + usage_metadata = None + fallback_index = 0 + raw_response = await self.llm_client.aresponse(**response_args) + async for part in raw_response: + for ( + model_response, + chunk, + finish_reason, + ) in self.transform_handler.stream_event_to_chunk( + part, model=self.model + ): + if isinstance(chunk, FunctionChunk): + index = chunk.index or fallback_index + if index not in function_calls: + function_calls[index] = {"name": "", "args": "", "id": None} + + if chunk.name: + function_calls[index]["name"] += chunk.name + if chunk.args: + function_calls[index]["args"] += chunk.args + + # check if args is completed (workaround for improper chunk + # indexing) + try: + json.loads(function_calls[index]["args"]) + fallback_index += 1 + except json.JSONDecodeError: + pass + + function_calls[index]["id"] = ( + chunk.id or function_calls[index]["id"] or str(index) + ) + elif isinstance(chunk, TextChunk): + text += chunk.text + yield _message_to_generate_content_response( + ChatCompletionAssistantMessage( + role="assistant", + content=chunk.text, + ), + is_partial=True, + ) + elif isinstance(chunk, UsageMetadataChunk): + usage_metadata = types.GenerateContentResponseUsageMetadata( + prompt_token_count=chunk.prompt_tokens, + candidates_token_count=chunk.completion_tokens, + total_token_count=chunk.total_tokens, + ) + # ------------------------------------------------------ # + if model_response.get("usage", {}).get("prompt_tokens_details"): + usage_metadata.cached_content_token_count = ( + model_response.get("usage", {}) + .get("prompt_tokens_details") + .cached_tokens + ) + # ------------------------------------------------------ # + + if ( + finish_reason == "tool_calls" or finish_reason == "stop" + ) and function_calls: + tool_calls = [] + for index, func_data in function_calls.items(): + if func_data["id"]: + tool_calls.append( + ChatCompletionMessageToolCall( + type="function", + id=func_data["id"], + function=Function( + name=func_data["name"], + arguments=func_data["args"], + index=index, + ), + ) + ) + aggregated_llm_response_with_tool_call = ( + _message_to_generate_content_response( + ChatCompletionAssistantMessage( + role="assistant", + content=text, + tool_calls=tool_calls, + ) + ) + ) + self.transform_handler.adapt_responses_api( + model_response, + aggregated_llm_response_with_tool_call, + stream=True, + ) + text = "" + function_calls.clear() + elif finish_reason == "stop" and text: + aggregated_llm_response = _message_to_generate_content_response( + ChatCompletionAssistantMessage( + role="assistant", content=text + ) + ) + self.transform_handler.adapt_responses_api( + model_response, + aggregated_llm_response, + stream=True, + ) + text = "" + + # waiting until streaming ends to yield the llm_response as litellm tends + # to send chunk that contains usage_metadata after the chunk with + # finish_reason set to tool_calls or stop. + if aggregated_llm_response: + if usage_metadata: + aggregated_llm_response.usage_metadata = usage_metadata + usage_metadata = None + yield aggregated_llm_response + + if aggregated_llm_response_with_tool_call: + if usage_metadata: + aggregated_llm_response_with_tool_call.usage_metadata = ( + usage_metadata + ) + yield aggregated_llm_response_with_tool_call + + else: + raw_response = await self.llm_client.aresponse(**response_args) + for ( + llm_response + ) in self.transform_handler.openai_response_to_generate_content_response( + llm_request, raw_response + ): + yield llm_response diff --git a/veadk/models/ark_transform.py b/veadk/models/ark_transform.py new file mode 100644 index 00000000..dc2301e5 --- /dev/null +++ b/veadk/models/ark_transform.py @@ -0,0 +1,397 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# adapted from Google ADK models adk-python/blob/main/src/google/adk/models/lite_llm.py at f1f44675e4a86b75e72cfd838efd8a0399f23e24 · google/adk-python + +import uuid +from typing import Any, Dict, Optional, cast, List, Generator, Tuple, Union + +import litellm +from google.adk.models import LlmResponse, LlmRequest +from google.adk.models.cache_metadata import CacheMetadata +from google.adk.models.lite_llm import ( + TextChunk, + FunctionChunk, + UsageMetadataChunk, + _model_response_to_chunk, + _model_response_to_generate_content_response, +) +from openai.types.responses import ( + Response as OpenAITypeResponse, + ResponseStreamEvent, + ResponseTextDeltaEvent, + ResponseOutputMessage, + ResponseFunctionToolCall, +) +from openai.types.responses import ( + ResponseCompletedEvent, +) +from litellm.completion_extras.litellm_responses_transformation.transformation import ( + LiteLLMResponsesTransformationHandler, +) +from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider +from litellm.types.llms.openai import ResponsesAPIResponse +from litellm.types.utils import ( + ModelResponse, + LlmProviders, + Choices, + Message, +) +from litellm.utils import ProviderConfigManager + +from veadk.utils.logger import get_logger + +# This will add functions to prompts if functions are provided. +litellm.add_function_to_prompt = True + +logger = get_logger(__name__) + + +openai_supported_fields = [ + "stream", + "background", + "include", + "input", + "instructions", + "max_output_tokens", + "max_tool_calls", + "metadata", + "model", + "parallel_tool_calls", + "previous_response_id", + "prompt", + "prompt_cache_key", + "reasoning", + "safety_identifier", + "service_tier", + "store", + "stream", + "stream_options", + "temperature", + "text", + "tool_choice", + "tools", + "top_logprobs", + "top_p", + "truncation", + "user", + "extra_headers", + "extra_query", + "extra_body", + "timeout", + # auth params + "api_key", + "api_base", +] + + +def ark_field_reorganization(request_data: dict) -> dict: + # [Note: Ark Limitations] tools and previous_response_id + # Remove tools in subsequent rounds (when previous_response_id is present) + if ( + "tools" in request_data + and "previous_response_id" in request_data + and request_data["previous_response_id"] is not None + ): + # Remove tools in subsequent rounds regardless of caching status + del request_data["tools"] + + # [Note: Ark Limitations] caching and store + # Ensure store field is true or default when caching is enabled + if ( + "extra_body" in request_data + and isinstance(request_data["extra_body"], dict) + and "caching" in request_data["extra_body"] + and isinstance(request_data["extra_body"]["caching"], dict) + and request_data["extra_body"]["caching"].get("type") == "enabled" + ): + # Set store to true when caching is enabled for writing + if "store" not in request_data: + request_data["store"] = True + elif request_data["store"] is False: + # Override false to true for cache writing + request_data["store"] = True + + # [NOTE Ark Limitations] instructions -> input (because of caching) + # Due to the Volcano Ark settings, there is a conflict between the cache and the instructions field. + # If a system prompt is needed, it should be placed in the system role message within the input, instead of using the instructions parameter. + # https://www.volcengine.com/docs/82379/1585128 + instructions = request_data.pop("instructions", None) + if instructions: + request_data["input"] = [ + { + "content": [{"text": instructions, "type": "input_text"}], + "role": "system", + "type": "message", + } + ] + request_data["input"] + + return request_data + + +def build_cache_metadata(response_id: str) -> CacheMetadata: + """Create a new CacheMetadata instance for agent response tracking. + + Args: + response_id: Response ID to track + + Returns: + A new CacheMetadata instance with the agent-response mapping + """ + if "contents_count" in CacheMetadata.model_fields: # adk >= 1.17 + cache_metadata = CacheMetadata( + cache_name=response_id, + expire_time=0, + fingerprint="", + invocations_used=0, + contents_count=0, + ) + else: # 1.15 <= adk < 1.17 + cache_metadata = CacheMetadata( + cache_name=response_id, + expire_time=0, + fingerprint="", + invocations_used=0, + cached_contents_count=0, + ) + return cache_metadata + + +class CompletionToResponsesAPIHandler: + def __init__(self): + self.litellm_handler = LiteLLMResponsesTransformationHandler() + + def transform_request( + self, model: str, messages: list, tools: Optional[list], **kwargs + ): + # Keep the first message and all consecutive user messages from the end + filtered_messages = messages[:1] + + # Collect all consecutive user messages from the end + user_messages_from_end = [] + for message in reversed(messages[1:]): # Skip the first message + if message.get("role") and message.get("role") in {"user", "tool"}: + user_messages_from_end.append(message) + else: + break # Stop when we encounter a non-user message + + # Reverse to maintain original order and add to filtered messages + filtered_messages.extend(reversed(user_messages_from_end)) + + messages = filtered_messages + # completion_request to responses api request + # 1. model and llm_custom_provider + model, custom_llm_provider, _, _ = get_llm_provider(model=model) + + # 2. input and instruction + if custom_llm_provider is not None and custom_llm_provider in [ + provider.value for provider in LlmProviders + ]: + provider_config = ProviderConfigManager.get_provider_chat_config( + model=model, provider=LlmProviders(custom_llm_provider) + ) + if provider_config is not None: + messages = provider_config.translate_developer_role_to_system_role( + messages=messages + ) + + input_items, instructions = ( + self.litellm_handler.convert_chat_completion_messages_to_responses_api( + messages + ) + ) + if tools is not None: + tools = self.litellm_handler._convert_tools_to_responses_format( + cast(List[Dict[str, Any]], tools) + ) + + response_args = { + "input": input_items, + "instructions": instructions, + "tools": tools, + "stream": kwargs.get("stream", False), + "model": model, + **kwargs, + } + result = { + key: value + for key, value in response_args.items() + if key in openai_supported_fields + } + + # Filter and reorganize scenarios that are not supported by some arks + return ark_field_reorganization(result) + + def transform_response( + self, openai_response: OpenAITypeResponse, stream: bool = False + ) -> list[ModelResponse]: + # openai_type_response -> responses_api_response -> completion_response + result_list = [] + raw_response_list = construct_responses_api_response(openai_response) + for raw_response in raw_response_list: + model_response = ModelResponse(stream=stream) + setattr(model_response, "usage", litellm.Usage()) + response = self.litellm_handler.transform_response( + model=raw_response.model, + raw_response=raw_response, + model_response=model_response, + logging_obj=None, + request_data={}, + messages=[], + optional_params={}, + litellm_params={}, + encoding=None, + ) + if raw_response and hasattr(raw_response, "id"): + response.id = raw_response.id + result_list.append(response) + + return result_list + + def openai_response_to_generate_content_response( + self, llm_request: LlmRequest, raw_response: OpenAITypeResponse + ) -> list[LlmResponse]: + """ + OpenAITypeResponse -> litellm.ModelResponse -> LlmResponse + instead of `_model_response_to_generate_content_response`, + """ + # no stream response + model_response_list = self.transform_response( + openai_response=raw_response, stream=False + ) + llm_response_list = [] + for model_response in model_response_list: + llm_response = _model_response_to_generate_content_response(model_response) + + llm_response = self.adapt_responses_api( + model_response, + llm_response, + ) + llm_response_list.append(llm_response) + return llm_response_list + + def adapt_responses_api( + self, + model_response: ModelResponse, + llm_response: LlmResponse, + stream: bool = False, + ): + """ + Adapt responses api. + """ + if not model_response.id.startswith("chatcmpl"): + previous_response_id = model_response["id"] + llm_response.cache_metadata = build_cache_metadata( + previous_response_id, + ) + # add responses cache data + if not stream: + if model_response.get("usage", {}).get("prompt_tokens_details"): + if llm_response.usage_metadata: + llm_response.usage_metadata.cached_content_token_count = ( + model_response.get("usage", {}) + .get("prompt_tokens_details") + .cached_tokens + ) + return llm_response + + def stream_event_to_chunk( + self, event: ResponseStreamEvent, model: str + ) -> Generator[ + Tuple[ + ModelResponse, + Optional[Union[TextChunk, FunctionChunk, UsageMetadataChunk]], + Optional[str], + ], + None, + None, + ]: + """ + instead of using `_model_response_to_chunk`, + we use our own implementation to support the responses api. + """ + choices = [] + + if isinstance(event, ResponseTextDeltaEvent): + delta = Message(content=event.delta) + choices.append( + Choices(delta=delta, index=event.output_index, finish_reason=None) + ) + model_response = ModelResponse( + stream=True, choices=choices, model=model, id=str(uuid.uuid4()) + ) + for chunk, _ in _model_response_to_chunk(model_response): + # delta text, not finish + yield model_response, chunk, None + elif isinstance(event, ResponseCompletedEvent): + response = event.response + model_response_list = self.transform_response(response, stream=True) + for model_response in model_response_list: + model_response = fix_model_response(model_response) + + for chunk, finish_reason in _model_response_to_chunk(model_response): + if isinstance(chunk, TextChunk): + yield model_response, None, finish_reason + else: + yield model_response, chunk, finish_reason + else: + # Ignore other event types like ResponseOutputItemAddedEvent, etc. + pass + + +def fix_model_response(model_response: ModelResponse) -> ModelResponse: + """ + fix: tool_call has no attribute `index` in `_model_response_to_chunk` + """ + for i, choice in enumerate(model_response.choices): + if choice.message.tool_calls: + for idx, tool_call in enumerate(choice.message.tool_calls): + if not tool_call.get("index"): + model_response.choices[i].message.tool_calls[idx].index = 0 + + return model_response + + +def construct_responses_api_response( + openai_response: OpenAITypeResponse, +) -> list[ResponsesAPIResponse]: + output = openai_response.output + + # Check if we need to split the response + if len(output) >= 2: + # Check if output contains both ResponseOutputMessage and ResponseFunctionToolCall types + has_message = any(isinstance(item, ResponseOutputMessage) for item in output) + has_tool_call = any( + isinstance(item, ResponseFunctionToolCall) for item in output + ) + + if has_message and has_tool_call: + # Split into separate responses for each item + raw_response_list = [] + for item in output: + if isinstance(item, (ResponseOutputMessage, ResponseFunctionToolCall)): + raw_response_list.append( + ResponsesAPIResponse( + **{ + k: v + for k, v in openai_response.model_dump().items() + if k != "output" + }, + output=[item], + ) + ) + return raw_response_list + + # Otherwise, return the original response structure + return [ResponsesAPIResponse(**openai_response.model_dump())] diff --git a/veadk/utils/misc.py b/veadk/utils/misc.py index 24866922..6c20cf0c 100644 --- a/veadk/utils/misc.py +++ b/veadk/utils/misc.py @@ -182,3 +182,35 @@ def get_agent_dir(): full_path = os.getcwd() return full_path + + +def check_litellm_version(min_version: str): + """ + Check if the installed litellm version meets the minimum requirement. + + Args: + min_version (str): The minimum required version of litellm. + """ + try: + from packaging.version import InvalidVersion + from packaging.version import parse as parse_version + import pkg_resources + + try: + installed = parse_version(pkg_resources.get_distribution("litellm").version) + except pkg_resources.DistributionNotFound: + raise ImportError( + "litellm installation not detected, please install it first: pip install litellm>=1.79.3" + ) from None + except InvalidVersion as e: + raise ValueError(f"Invalid format of litellm version number:{e}") from None + required = parse_version(min_version) + if installed < required: + raise ValueError( + "You have used `enable_responses=True`. If you want to use the `responses_api`, please install the relevant support:" + "\npip install veadk-python[responses]" + ) + except ImportError: + raise ImportError( + "packaging or pkg_resources not found. Please install them: pip install packaging setuptools" + )