Skip to content

Commit 3abde98

Browse files
authored
Move limit middlewares from splunklib.ai.hooks to splunklib.ai.limits (#759)
Also changed the TokenLimitExceededException to accept an int, instead of a float.
1 parent 92cd2a8 commit 3abde98

7 files changed

Lines changed: 194 additions & 167 deletions

File tree

splunklib/ai/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,7 @@ class. The default for that limit is suppressed automatically - the other defaul
958958
remain active:
959959

960960
```py
961-
from splunklib.ai.hooks import (
961+
from splunklib.ai.limits import (
962962
TokenLimitMiddleware,
963963
StepLimitMiddleware,
964964
TimeoutLimitMiddleware,

splunklib/ai/base_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from pydantic import BaseModel
2222

2323
from splunklib.ai.conversation_store import ConversationStore
24-
from splunklib.ai.hooks import (
24+
from splunklib.ai.limits import (
2525
DEFAULT_STEP_LIMIT,
2626
DEFAULT_STRUCTURED_OUTPUT_RETRY_LIMIT,
2727
DEFAULT_TIMEOUT_SECONDS,

splunklib/ai/hooks.py

Lines changed: 0 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import inspect
22
from collections.abc import Awaitable, Callable
3-
from time import monotonic
43
from typing import Any, override
54

65
from splunklib.ai.messages import AgentResponse
@@ -12,44 +11,6 @@
1211
ModelRequest,
1312
ModelResponse,
1413
)
15-
from splunklib.ai.structured_output import StructuredOutputGenerationException
16-
17-
DEFAULT_TIMEOUT_SECONDS: float = 600.0
18-
DEFAULT_STEP_LIMIT: int = 100
19-
DEFAULT_TOKEN_LIMIT: int = 200_000
20-
DEFAULT_STRUCTURED_OUTPUT_RETRY_LIMIT: int = 3
21-
22-
23-
class AgentStopException(Exception):
24-
"""Custom exception to indicate conversation stopping conditions."""
25-
26-
27-
class TokenLimitExceededException(AgentStopException):
28-
"""Raised by `Agent.invoke`, when token limit exceeds"""
29-
30-
def __init__(self, token_limit: float) -> None:
31-
super().__init__(f"Token limit of {token_limit} exceeded.")
32-
33-
34-
class StepsLimitExceededException(AgentStopException):
35-
"""Raised by `Agent.invoke`, when steps limit exceeds"""
36-
37-
def __init__(self, steps_limit: int) -> None:
38-
super().__init__(f"Steps limit of {steps_limit} exceeded.")
39-
40-
41-
class TimeoutExceededException(AgentStopException):
42-
"""Raised by `Agent.invoke`, when timeout exceeds"""
43-
44-
def __init__(self, timeout_seconds: float) -> None:
45-
super().__init__(f"Timed out after {timeout_seconds} seconds.")
46-
47-
48-
class StructuredOutputRetryLimitExceededException(AgentStopException):
49-
"""Raised by `Agent.invoke`, when structured output retry limit exceeds"""
50-
51-
def __init__(self, retry_count: int) -> None:
52-
super().__init__(f"Structured output retry limit of {retry_count} exceeded")
5314

5415

5516
def before_model(
@@ -132,123 +93,3 @@ async def agent_middleware(
13293
return handler_response
13394

13495
return _Middleware()
135-
136-
137-
class TokenLimitMiddleware(AgentMiddleware):
138-
"""Stops agent execution when the token count of messages passed to the model exceeds the given limit."""
139-
140-
_limit: int
141-
142-
def __init__(self, limit: int) -> None:
143-
self._limit = limit
144-
145-
@override
146-
async def model_middleware(
147-
self,
148-
request: ModelRequest,
149-
handler: ModelMiddlewareHandler,
150-
) -> ModelResponse:
151-
if request.state.token_count >= self._limit:
152-
raise TokenLimitExceededException(token_limit=self._limit)
153-
return await handler(request)
154-
155-
156-
class StepLimitMiddleware(AgentMiddleware):
157-
"""Stops agent execution when the number of steps taken reaches the given limit."""
158-
159-
_limit: int
160-
161-
def __init__(self, limit: int) -> None:
162-
self._limit = limit
163-
164-
@override
165-
async def model_middleware(
166-
self,
167-
request: ModelRequest,
168-
handler: ModelMiddlewareHandler,
169-
) -> ModelResponse:
170-
if request.state.total_steps >= self._limit:
171-
raise StepsLimitExceededException(steps_limit=self._limit)
172-
return await handler(request)
173-
174-
175-
class TimeoutLimitMiddleware(AgentMiddleware):
176-
"""Stops agent execution when wall-clock time within an invoke exceeds the given seconds.
177-
178-
The deadline resets on every invoke call - it measures time from the start of
179-
each invocation, not from agent construction.
180-
181-
Do not share instances between agents.
182-
"""
183-
184-
_seconds: float
185-
_deadline_per_thread_id: dict[str, float]
186-
187-
def __init__(self, seconds: float) -> None:
188-
self._seconds = seconds
189-
self._deadline_per_thread_id = {}
190-
191-
@override
192-
async def agent_middleware(
193-
self,
194-
request: AgentRequest,
195-
handler: AgentMiddlewareHandler,
196-
) -> AgentResponse[Any | None]:
197-
try:
198-
# Agent loop starting.
199-
self._deadline_per_thread_id[request.thread_id] = (
200-
monotonic() + self._seconds
201-
)
202-
return await handler(request)
203-
finally:
204-
del self._deadline_per_thread_id[request.thread_id] # don't leak memory
205-
206-
@override
207-
async def model_middleware(
208-
self,
209-
request: ModelRequest,
210-
handler: ModelMiddlewareHandler,
211-
) -> ModelResponse:
212-
if monotonic() >= self._deadline_per_thread_id[request.state.thread_id]:
213-
raise TimeoutExceededException(timeout_seconds=self._seconds)
214-
return await handler(request)
215-
216-
217-
class StructuredOutputRetryLimitMiddleware(AgentMiddleware):
218-
"""Stops agent execution when the agent exceeds structured output
219-
retry limit during a single agent loop invocation. Pass 0 to disable retries.
220-
"""
221-
222-
_limit: int
223-
_retries_per_thread_id: dict[str, int]
224-
225-
def __init__(self, limit: int) -> None:
226-
self._limit = limit
227-
self._retries_per_thread_id = {}
228-
229-
@override
230-
async def agent_middleware(
231-
self,
232-
request: AgentRequest,
233-
handler: AgentMiddlewareHandler,
234-
) -> AgentResponse[Any | None]:
235-
try:
236-
# Agent loop starting.
237-
self._retries_per_thread_id[request.thread_id] = 0
238-
return await handler(request)
239-
finally:
240-
del self._retries_per_thread_id[request.thread_id] # don't leak memory
241-
242-
@override
243-
async def model_middleware(
244-
self,
245-
request: ModelRequest,
246-
handler: ModelMiddlewareHandler,
247-
) -> ModelResponse:
248-
try:
249-
return await handler(request)
250-
except StructuredOutputGenerationException:
251-
self._retries_per_thread_id[request.state.thread_id] += 1
252-
if self._retries_per_thread_id[request.state.thread_id] > self._limit:
253-
raise StructuredOutputRetryLimitExceededException(self._limit)
254-
raise # re-raise, to retry structured output generation

splunklib/ai/limits.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
# Copyright © 2011-2026 Splunk, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"): you may
4+
# not use this file except in compliance with the License. You may obtain
5+
# a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12+
# License for the specific language governing permissions and limitations
13+
# under the License.
14+
15+
from time import monotonic
16+
from typing import Any, override
17+
18+
from splunklib.ai.messages import AgentResponse
19+
from splunklib.ai.middleware import (
20+
AgentMiddleware,
21+
AgentMiddlewareHandler,
22+
AgentRequest,
23+
ModelMiddlewareHandler,
24+
ModelRequest,
25+
ModelResponse,
26+
)
27+
from splunklib.ai.structured_output import StructuredOutputGenerationException
28+
29+
DEFAULT_TIMEOUT_SECONDS: float = 600.0
30+
DEFAULT_STEP_LIMIT: int = 100
31+
DEFAULT_TOKEN_LIMIT: int = 200_000
32+
DEFAULT_STRUCTURED_OUTPUT_RETRY_LIMIT: int = 3
33+
34+
35+
class AgentStopException(Exception):
36+
"""Custom exception to indicate conversation stopping conditions."""
37+
38+
39+
class TokenLimitExceededException(AgentStopException):
40+
"""Raised by `Agent.invoke`, when token limit exceeds"""
41+
42+
def __init__(self, token_limit: int) -> None:
43+
super().__init__(f"Token limit of {token_limit} exceeded.")
44+
45+
46+
class StepsLimitExceededException(AgentStopException):
47+
"""Raised by `Agent.invoke`, when steps limit exceeds"""
48+
49+
def __init__(self, steps_limit: int) -> None:
50+
super().__init__(f"Steps limit of {steps_limit} exceeded.")
51+
52+
53+
class TimeoutExceededException(AgentStopException):
54+
"""Raised by `Agent.invoke`, when timeout exceeds"""
55+
56+
def __init__(self, timeout_seconds: float) -> None:
57+
super().__init__(f"Timed out after {timeout_seconds} seconds.")
58+
59+
60+
class StructuredOutputRetryLimitExceededException(AgentStopException):
61+
"""Raised by `Agent.invoke`, when structured output retry limit exceeds"""
62+
63+
def __init__(self, retry_count: int) -> None:
64+
super().__init__(f"Structured output retry limit of {retry_count} exceeded")
65+
66+
67+
class TokenLimitMiddleware(AgentMiddleware):
68+
"""Stops agent execution when the token count of messages passed to the model exceeds the given limit."""
69+
70+
_limit: int
71+
72+
def __init__(self, limit: int) -> None:
73+
self._limit = limit
74+
75+
@override
76+
async def model_middleware(
77+
self,
78+
request: ModelRequest,
79+
handler: ModelMiddlewareHandler,
80+
) -> ModelResponse:
81+
if request.state.token_count >= self._limit:
82+
raise TokenLimitExceededException(token_limit=self._limit)
83+
return await handler(request)
84+
85+
86+
class StepLimitMiddleware(AgentMiddleware):
87+
"""Stops agent execution when the number of steps taken reaches the given limit."""
88+
89+
_limit: int
90+
91+
def __init__(self, limit: int) -> None:
92+
self._limit = limit
93+
94+
@override
95+
async def model_middleware(
96+
self,
97+
request: ModelRequest,
98+
handler: ModelMiddlewareHandler,
99+
) -> ModelResponse:
100+
if request.state.total_steps >= self._limit:
101+
raise StepsLimitExceededException(steps_limit=self._limit)
102+
return await handler(request)
103+
104+
105+
class TimeoutLimitMiddleware(AgentMiddleware):
106+
"""Stops agent execution when wall-clock time within an invoke exceeds the given seconds.
107+
108+
The deadline resets on every invoke call - it measures time from the start of
109+
each invocation, not from agent construction.
110+
111+
Do not share instances between agents.
112+
"""
113+
114+
_seconds: float
115+
_deadline_per_thread_id: dict[str, float]
116+
117+
def __init__(self, seconds: float) -> None:
118+
self._seconds = seconds
119+
self._deadline_per_thread_id = {}
120+
121+
@override
122+
async def agent_middleware(
123+
self,
124+
request: AgentRequest,
125+
handler: AgentMiddlewareHandler,
126+
) -> AgentResponse[Any | None]:
127+
try:
128+
# Agent loop starting.
129+
self._deadline_per_thread_id[request.thread_id] = (
130+
monotonic() + self._seconds
131+
)
132+
return await handler(request)
133+
finally:
134+
del self._deadline_per_thread_id[request.thread_id] # don't leak memory
135+
136+
@override
137+
async def model_middleware(
138+
self,
139+
request: ModelRequest,
140+
handler: ModelMiddlewareHandler,
141+
) -> ModelResponse:
142+
if monotonic() >= self._deadline_per_thread_id[request.state.thread_id]:
143+
raise TimeoutExceededException(timeout_seconds=self._seconds)
144+
return await handler(request)
145+
146+
147+
class StructuredOutputRetryLimitMiddleware(AgentMiddleware):
148+
"""Stops agent execution when the agent exceeds structured output
149+
retry limit during a single agent loop invocation. Pass 0 to disable retires.
150+
"""
151+
152+
_limit: int
153+
_retries_per_thread_id: dict[str, int]
154+
155+
def __init__(self, limit: int) -> None:
156+
self._limit = limit
157+
self._retries_per_thread_id = {}
158+
159+
@override
160+
async def agent_middleware(
161+
self,
162+
request: AgentRequest,
163+
handler: AgentMiddlewareHandler,
164+
) -> AgentResponse[Any | None]:
165+
try:
166+
# Agent loop starting.
167+
self._retries_per_thread_id[request.thread_id] = 0
168+
return await handler(request)
169+
finally:
170+
del self._retries_per_thread_id[request.thread_id] # don't leak memory
171+
172+
@override
173+
async def model_middleware(
174+
self,
175+
request: ModelRequest,
176+
handler: ModelMiddlewareHandler,
177+
) -> ModelResponse:
178+
try:
179+
return await handler(request)
180+
except StructuredOutputGenerationException:
181+
self._retries_per_thread_id[request.state.thread_id] += 1
182+
if self._retries_per_thread_id[request.state.thread_id] > self._limit:
183+
raise StructuredOutputRetryLimitExceededException(self._limit)
184+
raise # re-raise, to retry structured output generation

0 commit comments

Comments
 (0)