@@ -443,7 +443,6 @@ async def _fetch_byok_rag( # pylint: disable=too-many-locals
443443 client : AsyncLlamaStackClient ,
444444 query : str ,
445445 vector_store_ids : Optional [list [str ]] = None ,
446- max_chunks : Optional [int ] = None ,
447446) -> tuple [list [RAGChunk ], list [ReferencedDocument ]]:
448447 """Fetch chunks and documents from BYOK RAG sources.
449448
@@ -453,15 +452,13 @@ async def _fetch_byok_rag( # pylint: disable=too-many-locals
453452 vector_store_ids: Optional list of vector store IDs to query.
454453 If provided, only these stores will be queried. If None, all stores
455454 (excluding Solr) will be queried.
456- max_chunks: Maximum number of chunks to return. If None, uses
457- rag.byok.max_chunks from configuration.
458455
459456 Returns:
460457 Tuple containing:
461458 - rag_chunks: RAG chunks from BYOK RAG
462459 - referenced_documents: Documents referenced in BYOK RAG results
463460 """
464- limit = max_chunks if max_chunks is not None else configuration .rag .byok .max_chunks
461+ limit = configuration .rag .byok .max_chunks
465462 rag_chunks : list [RAGChunk ] = []
466463 referenced_documents : list [ReferencedDocument ] = []
467464
@@ -550,7 +547,7 @@ async def _fetch_byok_rag( # pylint: disable=too-many-locals
550547 return rag_chunks , referenced_documents
551548
552549
553- async def _fetch_solr_rag ( # pylint: disable=too-many-locals
550+ async def _fetch_okp_rag ( # pylint: disable=too-many-locals
554551 client : AsyncLlamaStackClient ,
555552 query : str ,
556553 solr : Optional [SolrVectorSearchRequest ] = None ,
@@ -561,8 +558,6 @@ async def _fetch_solr_rag( # pylint: disable=too-many-locals
561558 client: The AsyncLlamaStackClient to use for the request
562559 query: The user's query
563560 solr: Structured Solr inline RAG request from the API (optional).
564- max_chunks: Maximum number of chunks to return. If None, uses
565- rag.okp.max_chunks from configuration.
566561
567562 Returns:
568563 Tuple containing:
@@ -658,10 +653,8 @@ async def build_rag_context( # pylint: disable=too-many-locals,too-many-branche
658653 top_k = configuration .rag .retrieval .inline .max_chunks
659654
660655 # Fetch from each source using per-source limits for the reranking pool
661- byok_chunks_task = _fetch_byok_rag (
662- client , query , vector_store_ids , max_chunks = configuration .rag .byok .max_chunks
663- )
664- solr_chunks_task = _fetch_solr_rag (client , query , solr )
656+ byok_chunks_task = _fetch_byok_rag (client , query , vector_store_ids )
657+ solr_chunks_task = _fetch_okp_rag (client , query , solr )
665658
666659 (byok_chunks , byok_documents ), (solr_chunks , solr_documents ) = await asyncio .gather (
667660 byok_chunks_task , solr_chunks_task
0 commit comments