Skip to content

Commit bc59b98

Browse files
authored
Merge pull request #1029 from asimurka/tool_call_extraction_improvement
Restored rag_chunks attribute in query response
2 parents 6132941 + c8cb85a commit bc59b98

5 files changed

Lines changed: 61 additions & 6 deletions

File tree

docs/openapi.json

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7648,6 +7648,14 @@
76487648
"Kubernetes is an open-source container orchestration system for automating ..."
76497649
]
76507650
},
7651+
"rag_chunks": {
7652+
"items": {
7653+
"$ref": "#/components/schemas/RAGChunk"
7654+
},
7655+
"type": "array",
7656+
"title": "Rag Chunks",
7657+
"description": "Deprecated: List of RAG chunks used to generate the response."
7658+
},
76517659
"referenced_documents": {
76527660
"items": {
76537661
"$ref": "#/components/schemas/ReferencedDocument"
@@ -7732,7 +7740,7 @@
77327740
"response"
77337741
],
77347742
"title": "QueryResponse",
7735-
"description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: List of RAG chunks used to generate the response.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.",
7743+
"description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.",
77367744
"examples": [
77377745
{
77387746
"available_quotas": {
@@ -7965,6 +7973,45 @@
79657973
"title": "QuotaSchedulerConfiguration",
79667974
"description": "Quota scheduler configuration."
79677975
},
7976+
"RAGChunk": {
7977+
"properties": {
7978+
"content": {
7979+
"type": "string",
7980+
"title": "Content",
7981+
"description": "The content of the chunk"
7982+
},
7983+
"source": {
7984+
"anyOf": [
7985+
{
7986+
"type": "string"
7987+
},
7988+
{
7989+
"type": "null"
7990+
}
7991+
],
7992+
"title": "Source",
7993+
"description": "Source document or URL"
7994+
},
7995+
"score": {
7996+
"anyOf": [
7997+
{
7998+
"type": "number"
7999+
},
8000+
{
8001+
"type": "null"
8002+
}
8003+
],
8004+
"title": "Score",
8005+
"description": "Relevance score"
8006+
}
8007+
},
8008+
"type": "object",
8009+
"required": [
8010+
"content"
8011+
],
8012+
"title": "RAGChunk",
8013+
"description": "Model representing a RAG chunk used in the response."
8014+
},
79688015
"RAGInfoResponse": {
79698016
"properties": {
79708017
"id": {

src/app/endpoints/query.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ async def query_endpoint_handler_base( # pylint: disable=R0914
441441
response=summary.llm_response,
442442
tool_calls=summary.tool_calls,
443443
tool_results=summary.tool_results,
444+
rag_chunks=summary.rag_chunks,
444445
referenced_documents=referenced_documents,
445446
truncated=False, # TODO: implement truncation detection
446447
input_tokens=token_usage.input_tokens,

src/app/endpoints/query_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ def extract_rag_chunks_from_file_search_item(
492492
if item.results is not None:
493493
for result in item.results:
494494
rag_chunk = RAGChunk(
495-
content=result.text, source="file_search", score=result.score
495+
content=result.text, source=result.filename, score=result.score
496496
)
497497
rag_chunks.append(rag_chunk)
498498

src/models/responses.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from quota.quota_exceed_error import QuotaExceedError
1212
from models.config import Action, Configuration
13-
from utils.types import ToolCallSummary, ToolResultSummary
13+
from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary
1414

1515
SUCCESSFUL_RESPONSE_DESCRIPTION = "Successful response"
1616
BAD_REQUEST_DESCRIPTION = "Invalid request format"
@@ -348,9 +348,11 @@ class QueryResponse(AbstractSuccessfulResponse):
348348
Attributes:
349349
conversation_id: The optional conversation ID (UUID).
350350
response: The response.
351-
rag_chunks: List of RAG chunks used to generate the response.
351+
rag_chunks: Deprecated. List of RAG chunks used to generate the response.
352+
This information is now available in tool_results under file_search_call type.
352353
referenced_documents: The URLs and titles for the documents used to generate the response.
353354
tool_calls: List of tool calls made during response generation.
355+
tool_results: List of tool results.
354356
truncated: Whether conversation history was truncated.
355357
input_tokens: Number of tokens sent to LLM.
356358
output_tokens: Number of tokens received from LLM.
@@ -370,6 +372,11 @@ class QueryResponse(AbstractSuccessfulResponse):
370372
],
371373
)
372374

375+
rag_chunks: list[RAGChunk] = Field(
376+
default_factory=list,
377+
description="Deprecated: List of RAG chunks used to generate the response.",
378+
)
379+
373380
referenced_documents: list[ReferencedDocument] = Field(
374381
default_factory=list,
375382
description="List of documents referenced in generating the response",

tests/unit/app/endpoints/test_query_v2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,8 @@ async def test_retrieve_response_parses_referenced_documents(
998998
# Verify RAG chunks were extracted from file_search_call results
999999
assert len(_summary.rag_chunks) == 2
10001000
assert _summary.rag_chunks[0].content == "Sample text from file2.pdf"
1001-
assert _summary.rag_chunks[0].source == "file_search"
1001+
assert _summary.rag_chunks[0].source == "file2.pdf"
10021002
assert _summary.rag_chunks[0].score == 0.95
10031003
assert _summary.rag_chunks[1].content == "Sample text from file3.docx"
1004-
assert _summary.rag_chunks[1].source == "file_search"
1004+
assert _summary.rag_chunks[1].source == "file3.docx"
10051005
assert _summary.rag_chunks[1].score == 0.85

0 commit comments

Comments
 (0)