Skip to content

Commit ee9570a

Browse files
fix: make endpoint_path required and document parameter
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 6827c1f commit ee9570a

4 files changed

Lines changed: 40 additions & 29 deletions

File tree

src/app/endpoints/responses.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,7 @@ async def response_generator(
816816
turn_summary: TurnSummary to populate during streaming
817817
inline_rag_context: Inline RAG context to be used for the response
818818
filter_server_tools: Whether to filter server-deployed MCP tool events from the stream
819+
endpoint_path: API endpoint path used for metric labeling.
819820
Yields:
820821
SSE-formatted strings for streaming events, ending with [DONE]
821822
"""

src/app/endpoints/streaming_query.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals
318318
async def retrieve_response_generator(
319319
responses_params: ResponsesApiParams,
320320
context: ResponseGeneratorContext,
321-
endpoint_path: str = "",
321+
endpoint_path: str,
322322
) -> tuple[AsyncIterator[str], TurnSummary]:
323323
"""
324324
Retrieve the appropriate response generator.
@@ -330,6 +330,7 @@ async def retrieve_response_generator(
330330
Args:
331331
responses_params: The Responses API parameters
332332
context: The response generator context
333+
endpoint_path: API endpoint path used for metric labeling.
333334
Returns:
334335
tuple[AsyncIterator[str], TurnSummary]: The response generator and turn summary
335336
@@ -689,7 +690,7 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
689690
turn_response: AsyncIterator[OpenAIResponseObjectStream],
690691
context: ResponseGeneratorContext,
691692
turn_summary: TurnSummary,
692-
endpoint_path: str = "",
693+
endpoint_path: str,
693694
) -> AsyncIterator[str]:
694695
"""Generate SSE formatted streaming response.
695696
@@ -701,6 +702,7 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
701702
turn_response: The streaming response from Llama Stack
702703
context: The response generator context
703704
turn_summary: TurnSummary to populate during streaming
705+
endpoint_path: API endpoint path used for metric labeling.
704706
705707
Yields:
706708
SSE-formatted strings for tokens, tool calls, tool results,

src/metrics/recording.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def record_rest_api_call(path: str, status_code: int) -> None:
4444
logger.warning("Failed to update REST API call metric", exc_info=True)
4545

4646

47-
def record_llm_call(provider: str, model: str, endpoint_path: str = "") -> None:
47+
def record_llm_call(provider: str, model: str, endpoint_path: str) -> None:
4848
"""Record one LLM call for a provider and model.
4949
5050
Args:
@@ -58,7 +58,7 @@ def record_llm_call(provider: str, model: str, endpoint_path: str = "") -> None:
5858
logger.warning("Failed to update LLM call metric", exc_info=True)
5959

6060

61-
def record_llm_failure(provider: str, model: str, endpoint_path: str = "") -> None:
61+
def record_llm_failure(provider: str, model: str, endpoint_path: str) -> None:
6262
"""Record one failed LLM call for a provider and model.
6363
6464
Args:
@@ -89,7 +89,7 @@ def record_llm_token_usage(
8989
model: str,
9090
input_tokens: int,
9191
output_tokens: int,
92-
endpoint_path: str = "",
92+
endpoint_path: str,
9393
) -> None:
9494
"""Record LLM token usage for a provider and model.
9595

tests/unit/app/endpoints/test_streaming_query.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ async def mock_response_generator(
855855
)
856856

857857
generator, turn_summary = await retrieve_response_generator(
858-
mock_responses_params, mock_context
858+
mock_responses_params, mock_context, endpoint_path=""
859859
)
860860

861861
assert isinstance(turn_summary, TurnSummary)
@@ -894,7 +894,7 @@ async def test_retrieve_response_generator_shield_blocked(
894894
)
895895

896896
_generator, turn_summary = await retrieve_response_generator(
897-
mock_responses_params, mock_context
897+
mock_responses_params, mock_context, endpoint_path=""
898898
)
899899

900900
assert isinstance(turn_summary, TurnSummary)
@@ -949,7 +949,9 @@ async def test_retrieve_response_generator_connection_error(
949949
)
950950

951951
with pytest.raises(HTTPException) as exc_info:
952-
await retrieve_response_generator(mock_responses_params, mock_context)
952+
await retrieve_response_generator(
953+
mock_responses_params, mock_context, endpoint_path=""
954+
)
953955

954956
assert exc_info.value.status_code == 503
955957

@@ -999,7 +1001,9 @@ async def test_retrieve_response_generator_api_status_error(
9991001
)
10001002

10011003
with pytest.raises(HTTPException) as exc_info:
1002-
await retrieve_response_generator(mock_responses_params, mock_context)
1004+
await retrieve_response_generator(
1005+
mock_responses_params, mock_context, endpoint_path=""
1006+
)
10031007

10041008
assert exc_info.value.status_code == 500
10051009

@@ -1046,7 +1050,9 @@ async def test_retrieve_response_generator_runtime_error_context_length(
10461050
)
10471051

10481052
with pytest.raises(HTTPException) as exc_info:
1049-
await retrieve_response_generator(mock_responses_params, mock_context)
1053+
await retrieve_response_generator(
1054+
mock_responses_params, mock_context, endpoint_path=""
1055+
)
10501056

10511057
assert exc_info.value.status_code == 413
10521058

@@ -1083,7 +1089,9 @@ async def test_retrieve_response_generator_runtime_error_other(
10831089
)
10841090

10851091
with pytest.raises(RuntimeError):
1086-
await retrieve_response_generator(mock_responses_params, mock_context)
1092+
await retrieve_response_generator(
1093+
mock_responses_params, mock_context, endpoint_path=""
1094+
)
10871095

10881096

10891097
class TestGenerateResponse:
@@ -1870,7 +1878,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
18701878

18711879
result = []
18721880
async for item in response_generator(
1873-
mock_turn_response(), mock_context, mock_turn_summary
1881+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
18741882
):
18751883
result.append(item)
18761884

@@ -1900,7 +1908,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
19001908

19011909
result = []
19021910
async for item in response_generator(
1903-
mock_turn_response(), mock_context, mock_turn_summary
1911+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
19041912
):
19051913
result.append(item)
19061914

@@ -1938,7 +1946,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
19381946
)
19391947

19401948
async for _ in response_generator(
1941-
mock_turn_response(), mock_context, mock_turn_summary
1949+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
19421950
):
19431951
pass
19441952

@@ -1980,7 +1988,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
19801988

19811989
result = []
19821990
async for item in response_generator(
1983-
mock_turn_response(), mock_context, mock_turn_summary
1991+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
19841992
):
19851993
result.append(item)
19861994

@@ -2029,7 +2037,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
20292037

20302038
result = []
20312039
async for item in response_generator(
2032-
mock_turn_response(), mock_context, mock_turn_summary
2040+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
20332041
):
20342042
result.append(item)
20352043

@@ -2080,7 +2088,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
20802088

20812089
result = []
20822090
async for item in response_generator(
2083-
mock_turn_response(), mock_context, mock_turn_summary
2091+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
20842092
):
20852093
result.append(item)
20862094

@@ -2123,7 +2131,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
21232131
)
21242132

21252133
async for _ in response_generator(
2126-
mock_turn_response(), mock_context, mock_turn_summary
2134+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
21272135
):
21282136
pass
21292137

@@ -2172,7 +2180,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
21722180

21732181
result = []
21742182
async for item in response_generator(
2175-
mock_turn_response(), mock_context, mock_turn_summary
2183+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
21762184
):
21772185
result.append(item)
21782186

@@ -2218,7 +2226,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
22182226

22192227
result = []
22202228
async for item in response_generator(
2221-
mock_turn_response(), mock_context, mock_turn_summary
2229+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
22222230
):
22232231
result.append(item)
22242232

@@ -2263,7 +2271,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
22632271

22642272
result = []
22652273
async for item in response_generator(
2266-
mock_turn_response(), mock_context, mock_turn_summary
2274+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
22672275
):
22682276
result.append(item)
22692277

@@ -2306,7 +2314,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
23062314

23072315
result = []
23082316
async for item in response_generator(
2309-
mock_turn_response(), mock_context, mock_turn_summary
2317+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
23102318
):
23112319
result.append(item)
23122320

@@ -2350,7 +2358,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
23502358

23512359
result = []
23522360
async for item in response_generator(
2353-
mock_turn_response(), mock_context, mock_turn_summary
2361+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
23542362
):
23552363
result.append(item)
23562364

@@ -2392,7 +2400,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
23922400

23932401
result = []
23942402
async for item in response_generator(
2395-
mock_turn_response(), mock_context, mock_turn_summary
2403+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
23962404
):
23972405
result.append(item)
23982406

@@ -2445,7 +2453,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
24452453
)
24462454

24472455
async for _ in response_generator(
2448-
mock_turn_response(), mock_context, mock_turn_summary
2456+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
24492457
):
24502458
pass
24512459

@@ -2572,7 +2580,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
25722580

25732581
result = []
25742582
async for item in response_generator(
2575-
mock_turn_response(), mock_context, mock_turn_summary
2583+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
25762584
):
25772585
result.append(item)
25782586

@@ -2635,7 +2643,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
26352643

26362644
result = []
26372645
async for item in response_generator(
2638-
mock_turn_response(), mock_context, mock_turn_summary
2646+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
26392647
):
26402648
result.append(item)
26412649

@@ -2728,7 +2736,7 @@ def build_mcp_tool_call_side_effect(
27282736

27292737
result = []
27302738
async for item in response_generator(
2731-
mock_turn_response(), mock_context, mock_turn_summary
2739+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
27322740
):
27332741
result.append(item)
27342742

@@ -2798,7 +2806,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
27982806

27992807
result = []
28002808
async for item in response_generator(
2801-
mock_turn_response(), mock_context, mock_turn_summary
2809+
mock_turn_response(), mock_context, mock_turn_summary, endpoint_path=""
28022810
):
28032811
result.append(item)
28042812

0 commit comments

Comments
 (0)