Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
098a973
feat: ark litellm client
zakahan Nov 5, 2025
92b170a
feat: use the patch method to pass it
zakahan Nov 5, 2025
d2b75fe
chore: organize the code
zakahan Nov 5, 2025
3603050
feat: openai sdk for responses api
zakahan Nov 5, 2025
15fb480
feat: use callback instead patch
zakahan Nov 5, 2025
37bc971
feat: format patches
zakahan Nov 5, 2025
0ab4d40
feat: system-prompt
zakahan Nov 5, 2025
13564de
fix: async openai
zakahan Nov 6, 2025
5b9e6d7
fix: acompletion to aresponse
zakahan Nov 9, 2025
c668164
fix: stream type done but response.id
zakahan Nov 9, 2025
ecdc6f6
fix: send response.id usage matadata
zakahan Nov 10, 2025
102f88e
fix: add license header
zakahan Nov 10, 2025
d3132ec
fix: finish reason and chunk send
zakahan Nov 10, 2025
c802b24
fix: without instruction bug
zakahan Nov 10, 2025
8e4874d
fix: update google-adk version >=1.18
zakahan Nov 10, 2025
143ad50
fix: back
zakahan Nov 10, 2025
45a6070
fix: version and cache metadata
zakahan Nov 10, 2025
bd6658d
fix: multi-agent and multi llm_response scenario
zakahan Nov 11, 2025
8aa0ec7
fix: transport response_id by session state
zakahan Nov 11, 2025
894d4c3
fix: multi-agent bug
zakahan Nov 11, 2025
b453d48
fix: remove before_model_callback
zakahan Nov 12, 2025
44a249e
fix: clarify the transmission of response_id
zakahan Nov 12, 2025
c86cdbb
Merge branch 'main' into feat/responses_api_2
zakahan Nov 29, 2025
5045497
feat: enable persistent short-term memory to pass `response-id`
zakahan Nov 29, 2025
904204d
Merge branch 'main' into feat/responses_api_2
zakahan Dec 8, 2025
80a3b72
feat: add package
zakahan Dec 9, 2025
8f08e5a
fix: check litellm version
zakahan Dec 9, 2025
805c9da
Merge branch 'main' into feat/responses_api_2
zakahan Dec 17, 2025
a85b890
chore: litellm version for pyproject
zakahan Dec 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies = [
"a2a-sdk==0.3.7", # For Google Agent2Agent protocol
"deprecated==1.2.18",
"google-adk==1.19.0", # For basic agent architecture
"litellm==1.74.3", # For model inference
"litellm>=1.74.3", # For model inference
"loguru==0.7.3", # For better logging
"opentelemetry-exporter-otlp==1.37.0",
"opentelemetry-instrumentation-logging>=0.56b0",
Expand Down Expand Up @@ -73,6 +73,9 @@ dev = [
"pytest-xdist>=3.8.0",
]

responses = [
"litellm>=1.79.3"
]

[dependency-groups]
dev = [
Expand Down
64 changes: 56 additions & 8 deletions veadk/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from __future__ import annotations

import os
from typing import Optional, Union
from typing import Optional, Union, AsyncGenerator

# If user didn't set LITELLM_LOCAL_MODEL_COST_MAP, set it to True
# to enable local model cost map.
Expand All @@ -24,10 +24,12 @@
if not os.getenv("LITELLM_LOCAL_MODEL_COST_MAP"):
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"

from google.adk.agents import LlmAgent, RunConfig
from google.adk.agents import LlmAgent, RunConfig, InvocationContext
from google.adk.agents.base_agent import BaseAgent
from google.adk.agents.context_cache_config import ContextCacheConfig
from google.adk.agents.llm_agent import InstructionProvider, ToolUnion
from google.adk.agents.run_config import StreamingMode
from google.adk.events import Event, EventActions
from google.adk.models.lite_llm import LiteLlm
from google.adk.runners import Runner
from google.genai import types
Expand All @@ -52,6 +54,7 @@
from veadk.tracing.base_tracer import BaseTracer
from veadk.utils.logger import get_logger
from veadk.utils.patches import patch_asyncio, patch_tracer
from veadk.utils.misc import check_litellm_version
from veadk.version import VERSION

patch_tracer()
Expand Down Expand Up @@ -109,6 +112,10 @@ class Agent(LlmAgent):

tracers: list[BaseTracer] = []

enable_responses: bool = False

context_cache_config: Optional[ContextCacheConfig] = None

run_processor: Optional[BaseRunProcessor] = Field(default=None, exclude=True)
"""Optional run processor for intercepting and processing agent execution flows.

Expand Down Expand Up @@ -157,12 +164,31 @@ def model_post_init(self, __context: Any) -> None:
logger.info(f"Model extra config: {self.model_extra_config}")

if not self.model:
self.model = LiteLlm(
model=f"{self.model_provider}/{self.model_name}",
api_key=self.model_api_key,
api_base=self.model_api_base,
**self.model_extra_config,
)
if self.enable_responses:
min_version = "1.79.3"
check_litellm_version(min_version)

from veadk.models.ark_llm import ArkLlm

self.model = ArkLlm(
model=f"{self.model_provider}/{self.model_name}",
api_key=self.model_api_key,
api_base=self.model_api_base,
**self.model_extra_config,
)
if not self.context_cache_config:
self.context_cache_config = ContextCacheConfig(
cache_intervals=100, # maximum number
ttl_seconds=315360000,
min_tokens=0,
)
else:
self.model = LiteLlm(
model=f"{self.model_provider}/{self.model_name}",
api_key=self.model_api_key,
api_base=self.model_api_base,
**self.model_extra_config,
)
logger.debug(
f"LiteLLM client created with config: {self.model_extra_config}"
)
Expand Down Expand Up @@ -218,6 +244,28 @@ def model_post_init(self, __context: Any) -> None:
f"Agent: {self.model_dump(include={'name', 'model_name', 'model_api_base', 'tools'})}"
)

async def _run_async_impl(
self, ctx: InvocationContext
) -> AsyncGenerator[Event, None]:
if self.enable_responses:
if not ctx.context_cache_config:
ctx.context_cache_config = self.context_cache_config

async for event in super()._run_async_impl(ctx):
yield event
if self.enable_responses and event.cache_metadata:
# for persistent short-term memory with response api
session_state_event = Event(
invocation_id=event.invocation_id,
author=event.author,
actions=EventActions(
state_delta={
"response_id": event.cache_metadata.cache_name,
}
),
)
yield session_state_event

async def _run(
self,
runner,
Expand Down
21 changes: 21 additions & 0 deletions veadk/memory/short_term_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from veadk.memory.short_term_memory_backends.sqlite_backend import (
SQLiteSTMBackend,
)
from veadk.models.ark_transform import build_cache_metadata
from veadk.utils.logger import get_logger

logger = get_logger(__name__)
Expand All @@ -49,6 +50,21 @@ async def wrapper(*args, **kwargs):
setattr(obj, "get_session", wrapper)


def enable_responses_api_for_session_service(result, *args, **kwargs):
if result and isinstance(result, Session):
if result.events:
for event in result.events:
if (
event.actions
and event.actions.state_delta
and not event.cache_metadata
and "response_id" in event.actions.state_delta
):
event.cache_metadata = build_cache_metadata(
response_id=event.actions.state_delta.get("response_id"),
)


class ShortTermMemory(BaseModel):
"""Short term memory for agent execution.

Expand Down Expand Up @@ -170,6 +186,11 @@ def model_post_init(self, __context: Any) -> None:
db_kwargs=self.db_kwargs, **self.backend_configs
).session_service

if self.backend != "local":
wrap_get_session_with_callbacks(
self._session_service, enable_responses_api_for_session_service
)

if self.after_load_memory_callback:
wrap_get_session_with_callbacks(
self._session_service, self.after_load_memory_callback
Expand Down
Loading