From ac0b354feba31631b48a7e71ac7aaa6aa4cf9f7d Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Wed, 3 Jun 2026 11:01:26 +0200 Subject: [PATCH] LCORE-1037: address review nits in BYOK and RAG guides - Replace pdm with uv for embedding model download command - Remove unnecessary OpenAI API key note - Fix Ollama section: clarify no tool RAG but inline RAG supported - Remove empty References section from RAG guide - Fix incorrect embedding dimensions for all-mpnet-base-v2 (768, not 1024/384) Co-Authored-By: Claude Opus 4.6 --- docs/byok_guide.md | 6 +----- docs/rag_guide.md | 10 +--------- examples/lightspeed-stack-byok-okp-rag.yaml | 4 ++-- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/docs/byok_guide.md b/docs/byok_guide.md index 86acc1c80..d1fb94b19 100644 --- a/docs/byok_guide.md +++ b/docs/byok_guide.md @@ -161,7 +161,7 @@ You can use the embedding generation step mentioned in the rag-content repo: ```bash mkdir ./embeddings_model -pdm run python ./scripts/download_embeddings_model.py -l ./embeddings_model/ -r sentence-transformers/all-mpnet-base-v2 +uv run python ./scripts/download_embeddings_model.py -l ./embeddings_model/ -r sentence-transformers/all-mpnet-base-v2 ``` #### Option 2: Manual Download and Configuration @@ -340,10 +340,6 @@ rag: - company-docs ``` -> [!NOTE] -> Your LLM inference provider (e.g., OpenAI, vLLM) must also be configured in your `run.yaml`. -> For OpenAI, set the `OPENAI_API_KEY` environment variable. - ### Example 2: Multiple Knowledge Sources with pgvector A configuration combining a local FAISS store (via `byok_rag`) with a remote pgvector store (configured directly in the Llama Stack configuration file): diff --git a/docs/rag_guide.md b/docs/rag_guide.md index 740e34c47..490e413ef 100644 --- a/docs/rag_guide.md +++ b/docs/rag_guide.md @@ -223,11 +223,7 @@ Not yet supported. ### Ollama -The `remote::ollama` provider can be used for inference. However, it does not support tool calling, including RAG. -While Ollama also exposes an OpenAI compatible endpoint that supports tool calling, it cannot currently be used due to limitations in the `remote::openai` provider. - -Tool calling with Ollama is not yet supported. -Currently, tool calling is not supported out of the box. Some experimental patches exist (including internal workarounds), but these are not officially released. +The `remote::ollama` provider does not support tool calling, so RAG as a tool is not available. However, inline RAG is supported. ### vLLM Mistral @@ -386,7 +382,3 @@ You are a helpful assistant with access to a 'knowledge_search' tool. When users The top-level `vector_stores` block in [`run.yaml`](../examples/run.yaml) may include `annotation_prompt_params` to control whether extra RAG annotation instructions are injected into the model prompt (for example, citation-style markers). The default configuration sets `enable_annotations: false` under that block to avoid unwanted annotations. ---- - -# References - diff --git a/examples/lightspeed-stack-byok-okp-rag.yaml b/examples/lightspeed-stack-byok-okp-rag.yaml index 3e2fb18b8..7cbb36fc8 100644 --- a/examples/lightspeed-stack-byok-okp-rag.yaml +++ b/examples/lightspeed-stack-byok-okp-rag.yaml @@ -38,14 +38,14 @@ byok_rag: - rag_id: ocp-docs # referenced in rag.inline / rag.tool rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 1024 + embedding_dimension: 768 vector_db_id: vs_123 # Vector store ID (from index generation) db_path: /tmp/ocp.faiss score_multiplier: 1.0 # Weight for this vector store's results (Inline RAG only) - rag_id: knowledge-base # referenced in rag.inline / rag.tool rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 384 + embedding_dimension: 768 vector_db_id: vs_456 # Vector store ID (from index generation) db_path: /tmp/kb.faiss score_multiplier: 1.2 # Weight for this vector store's results (Inline RAG only)