Skip to content

Commit 2adb747

Browse files
authored
Merge pull request lightspeed-core#893 from are-ces/rag-chunks-fix
LCORE-1094: Rag chunks are not parsed
2 parents 9ead098 + 4cd0215 commit 2adb747

3 files changed

Lines changed: 87 additions & 22 deletions

File tree

src/app/endpoints/query_v2.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,12 @@
4040
get_system_prompt,
4141
get_topic_summary_system_prompt,
4242
)
43-
from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id
4443
from utils.mcp_headers import mcp_headers_dependency
4544
from utils.responses import extract_text_from_response_output_item
4645
from utils.shields import detect_shield_violations, get_available_shields
46+
from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id
4747
from utils.token_counter import TokenCounter
48-
from utils.types import ToolCallSummary, ToolResultSummary, TurnSummary
48+
from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary, TurnSummary
4949

5050
logger = logging.getLogger("app.endpoints.handlers")
5151
router = APIRouter(tags=["query_v1"])
@@ -419,11 +419,14 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
419419
len(llm_response),
420420
)
421421

422+
# Extract rag chunks
423+
rag_chunks = parse_rag_chunks_from_responses_api(response)
424+
422425
summary = TurnSummary(
423426
llm_response=llm_response,
424427
tool_calls=tool_calls,
425428
tool_results=tool_results,
426-
rag_chunks=[],
429+
rag_chunks=rag_chunks,
427430
)
428431

429432
# Extract referenced documents and token usage from Responses API response
@@ -447,6 +450,34 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
447450
)
448451

449452

453+
def parse_rag_chunks_from_responses_api(response_obj: Any) -> list[RAGChunk]:
454+
"""
455+
Extract rag_chunks from the llama-stack OpenAI response.
456+
457+
Args:
458+
response_obj: The ResponseObject from OpenAI compatible response API in llama-stack.
459+
460+
Returns:
461+
List of RAGChunk with content, source, score
462+
"""
463+
rag_chunks = []
464+
465+
for output_item in response_obj.output:
466+
if (
467+
hasattr(output_item, "type")
468+
and output_item.type == "file_search_call"
469+
and hasattr(output_item, "results")
470+
):
471+
472+
for result in output_item.results:
473+
rag_chunk = RAGChunk(
474+
content=result.text, source="file_search", score=result.score
475+
)
476+
rag_chunks.append(rag_chunk)
477+
478+
return rag_chunks
479+
480+
450481
def parse_referenced_documents_from_responses_api(
451482
response: OpenAIResponseObject, # pylint: disable=unused-argument
452483
) -> list[ReferencedDocument]:

tests/integration/endpoints/test_query_v2_integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ async def test_query_v2_endpoint_with_tool_calls(
362362
mock_result.file_id = "doc-1"
363363
mock_result.filename = "ansible-docs.txt"
364364
mock_result.score = 0.95
365+
mock_result.text = "Ansible is an open-source automation tool..."
365366
mock_result.attributes = {
366367
"doc_url": "https://example.com/ansible-docs.txt",
367368
"link": "https://example.com/ansible-docs.txt",

tests/unit/app/endpoints/test_query_v2.py

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -794,17 +794,12 @@ async def test_retrieve_response_no_violation_with_shields(
794794
validation_metric.inc.assert_not_called()
795795

796796

797-
@pytest.mark.asyncio
798-
async def test_retrieve_response_parses_referenced_documents(
799-
mocker: MockerFixture,
800-
) -> None:
801-
"""Test that retrieve_response correctly parses referenced documents from response."""
802-
mock_client = mocker.AsyncMock()
803-
797+
def _create_message_output_with_citations(mocker: MockerFixture) -> Any:
798+
"""Create mock message output item with content annotations (citations)."""
804799
# 1. Output item with message content annotations (citations)
805-
output_item_1 = mocker.Mock()
806-
output_item_1.type = "message"
807-
output_item_1.role = "assistant"
800+
output_item = mocker.Mock()
801+
output_item.type = "message"
802+
output_item.role = "assistant"
808803

809804
# Mock content with annotations
810805
content_part = mocker.Mock()
@@ -823,19 +818,48 @@ async def test_retrieve_response_parses_referenced_documents(
823818
annotation2.title = None
824819

825820
content_part.annotations = [annotation1, annotation2]
826-
output_item_1.content = [content_part]
821+
output_item.content = [content_part]
822+
return output_item
823+
827824

825+
def _create_file_search_output(mocker: MockerFixture) -> Any:
826+
"""Create mock file search tool call output with results."""
828827
# 2. Output item with file search tool call results
829-
output_item_2 = mocker.Mock()
830-
output_item_2.type = "file_search_call"
831-
output_item_2.queries = (
828+
output_item = mocker.Mock()
829+
output_item.type = "file_search_call"
830+
output_item.queries = (
832831
[]
833832
) # Ensure queries is a list to avoid iteration error in tool summary
834-
output_item_2.status = "completed"
835-
output_item_2.results = [
836-
{"filename": "file2.pdf", "attributes": {"url": "http://example.com/doc2"}},
837-
{"filename": "file3.docx", "attributes": {}}, # No URL
838-
]
833+
output_item.status = "completed"
834+
# Create mock result objects with proper attributes matching real llama-stack response
835+
result_1 = mocker.Mock()
836+
result_1.filename = "file2.pdf"
837+
result_1.attributes = {"url": "http://example.com/doc2"}
838+
result_1.text = "Sample text from file2.pdf"
839+
result_1.score = 0.95
840+
result_1.file_id = "file-123"
841+
842+
result_2 = mocker.Mock()
843+
result_2.filename = "file3.docx"
844+
result_2.attributes = {}
845+
result_2.text = "Sample text from file3.docx"
846+
result_2.score = 0.85
847+
result_2.file_id = "file-456"
848+
849+
output_item.results = [result_1, result_2]
850+
return output_item
851+
852+
853+
@pytest.mark.asyncio
854+
async def test_retrieve_response_parses_referenced_documents(
855+
mocker: MockerFixture,
856+
) -> None:
857+
"""Test that retrieve_response correctly parses referenced documents from response."""
858+
mock_client = mocker.AsyncMock()
859+
860+
# Create output items using helper functions
861+
output_item_1 = _create_message_output_with_citations(mocker)
862+
output_item_2 = _create_file_search_output(mocker)
839863

840864
response_obj = mocker.Mock()
841865
response_obj.id = "resp-docs"
@@ -877,3 +901,12 @@ async def test_retrieve_response_parses_referenced_documents(
877901
doc4 = next((d for d in referenced_docs if d.doc_title == "file3.docx"), None)
878902
assert doc4
879903
assert doc4.doc_url is None
904+
905+
# Verify RAG chunks were extracted from file_search_call results
906+
assert len(_summary.rag_chunks) == 2
907+
assert _summary.rag_chunks[0].content == "Sample text from file2.pdf"
908+
assert _summary.rag_chunks[0].source == "file_search"
909+
assert _summary.rag_chunks[0].score == 0.95
910+
assert _summary.rag_chunks[1].content == "Sample text from file3.docx"
911+
assert _summary.rag_chunks[1].source == "file_search"
912+
assert _summary.rag_chunks[1].score == 0.85

0 commit comments

Comments
 (0)