Skip to content

Commit c8cb85a

Browse files
committed
Restored rag_chunks attribute in query response
1 parent d08f406 commit c8cb85a

5 files changed

Lines changed: 69 additions & 28 deletions

File tree

docs/openapi.json

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7648,6 +7648,14 @@
76487648
"Kubernetes is an open-source container orchestration system for automating ..."
76497649
]
76507650
},
7651+
"rag_chunks": {
7652+
"items": {
7653+
"$ref": "#/components/schemas/RAGChunk"
7654+
},
7655+
"type": "array",
7656+
"title": "Rag Chunks",
7657+
"description": "Deprecated: List of RAG chunks used to generate the response."
7658+
},
76517659
"referenced_documents": {
76527660
"items": {
76537661
"$ref": "#/components/schemas/ReferencedDocument"
@@ -7711,32 +7719,18 @@
77117719
]
77127720
},
77137721
"tool_calls": {
7714-
"anyOf": [
7715-
{
7716-
"items": {
7717-
"$ref": "#/components/schemas/ToolCallSummary"
7718-
},
7719-
"type": "array"
7720-
},
7721-
{
7722-
"type": "null"
7723-
}
7724-
],
7722+
"items": {
7723+
"$ref": "#/components/schemas/ToolCallSummary"
7724+
},
7725+
"type": "array",
77257726
"title": "Tool Calls",
77267727
"description": "List of tool calls made during response generation"
77277728
},
77287729
"tool_results": {
7729-
"anyOf": [
7730-
{
7731-
"items": {
7732-
"$ref": "#/components/schemas/ToolResultSummary"
7733-
},
7734-
"type": "array"
7735-
},
7736-
{
7737-
"type": "null"
7738-
}
7739-
],
7730+
"items": {
7731+
"$ref": "#/components/schemas/ToolResultSummary"
7732+
},
7733+
"type": "array",
77407734
"title": "Tool Results",
77417735
"description": "List of tool results"
77427736
}
@@ -7746,7 +7740,7 @@
77467740
"response"
77477741
],
77487742
"title": "QueryResponse",
7749-
"description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: List of RAG chunks used to generate the response.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.",
7743+
"description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.",
77507744
"examples": [
77517745
{
77527746
"available_quotas": {
@@ -7979,6 +7973,45 @@
79797973
"title": "QuotaSchedulerConfiguration",
79807974
"description": "Quota scheduler configuration."
79817975
},
7976+
"RAGChunk": {
7977+
"properties": {
7978+
"content": {
7979+
"type": "string",
7980+
"title": "Content",
7981+
"description": "The content of the chunk"
7982+
},
7983+
"source": {
7984+
"anyOf": [
7985+
{
7986+
"type": "string"
7987+
},
7988+
{
7989+
"type": "null"
7990+
}
7991+
],
7992+
"title": "Source",
7993+
"description": "Source document or URL"
7994+
},
7995+
"score": {
7996+
"anyOf": [
7997+
{
7998+
"type": "number"
7999+
},
8000+
{
8001+
"type": "null"
8002+
}
8003+
],
8004+
"title": "Score",
8005+
"description": "Relevance score"
8006+
}
8007+
},
8008+
"type": "object",
8009+
"required": [
8010+
"content"
8011+
],
8012+
"title": "RAGChunk",
8013+
"description": "Model representing a RAG chunk used in the response."
8014+
},
79828015
"RAGInfoResponse": {
79838016
"properties": {
79848017
"id": {

src/app/endpoints/query.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ async def query_endpoint_handler_base( # pylint: disable=R0914
441441
response=summary.llm_response,
442442
tool_calls=summary.tool_calls,
443443
tool_results=summary.tool_results,
444+
rag_chunks=summary.rag_chunks,
444445
referenced_documents=referenced_documents,
445446
truncated=False, # TODO: implement truncation detection
446447
input_tokens=token_usage.input_tokens,

src/app/endpoints/query_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ def extract_rag_chunks_from_file_search_item(
492492
if item.results is not None:
493493
for result in item.results:
494494
rag_chunk = RAGChunk(
495-
content=result.text, source="file_search", score=result.score
495+
content=result.text, source=result.filename, score=result.score
496496
)
497497
rag_chunks.append(rag_chunk)
498498

src/models/responses.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from quota.quota_exceed_error import QuotaExceedError
1212
from models.config import Action, Configuration
13-
from utils.types import ToolCallSummary, ToolResultSummary
13+
from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary
1414

1515
SUCCESSFUL_RESPONSE_DESCRIPTION = "Successful response"
1616
BAD_REQUEST_DESCRIPTION = "Invalid request format"
@@ -348,9 +348,11 @@ class QueryResponse(AbstractSuccessfulResponse):
348348
Attributes:
349349
conversation_id: The optional conversation ID (UUID).
350350
response: The response.
351-
rag_chunks: List of RAG chunks used to generate the response.
351+
rag_chunks: Deprecated. List of RAG chunks used to generate the response.
352+
This information is now available in tool_results under file_search_call type.
352353
referenced_documents: The URLs and titles for the documents used to generate the response.
353354
tool_calls: List of tool calls made during response generation.
355+
tool_results: List of tool results.
354356
truncated: Whether conversation history was truncated.
355357
input_tokens: Number of tokens sent to LLM.
356358
output_tokens: Number of tokens received from LLM.
@@ -370,6 +372,11 @@ class QueryResponse(AbstractSuccessfulResponse):
370372
],
371373
)
372374

375+
rag_chunks: list[RAGChunk] = Field(
376+
default_factory=list,
377+
description="Deprecated: List of RAG chunks used to generate the response.",
378+
)
379+
373380
referenced_documents: list[ReferencedDocument] = Field(
374381
default_factory=list,
375382
description="List of documents referenced in generating the response",

tests/unit/app/endpoints/test_query_v2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,8 @@ async def test_retrieve_response_parses_referenced_documents(
998998
# Verify RAG chunks were extracted from file_search_call results
999999
assert len(_summary.rag_chunks) == 2
10001000
assert _summary.rag_chunks[0].content == "Sample text from file2.pdf"
1001-
assert _summary.rag_chunks[0].source == "file_search"
1001+
assert _summary.rag_chunks[0].source == "file2.pdf"
10021002
assert _summary.rag_chunks[0].score == 0.95
10031003
assert _summary.rag_chunks[1].content == "Sample text from file3.docx"
1004-
assert _summary.rag_chunks[1].source == "file_search"
1004+
assert _summary.rag_chunks[1].source == "file3.docx"
10051005
assert _summary.rag_chunks[1].score == 0.85

0 commit comments

Comments
 (0)