diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index 92ffe445b..873eccd6e 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -1,14 +1,13 @@
 # copy-pr-bot configuration for NVIDIA-AI-Blueprints/rag
 # Docs: https://docs.gha-runners.nvidia.com/platform/apps/copy-pr-bot/
 #
-# All NVIDIA org members with write access are auto-trusted and auto-vetters.
-# No individual names needed — scales to any number of contributors.
-# Commit signing required for trusted-change classification.
-# See: https://docs.github.com/en/authentication/managing-commit-signature-verification
-#
-# Only add to additional_vetters if someone needs vetting rights
-# but has read-only repo access (rare for internal repos).
+# additional_trustees: users explicitly trusted even if not org members.
+# Needed when author_association is CONTRIBUTOR (not MEMBER of NVIDIA-AI-Blueprints org).
 
 enabled: true
 auto_sync_draft: false
 auto_sync_ready: true
+
+additional_trustees:
+  - vidushig-nv
+  - richa-nvidia
diff --git a/.github/skill-eval/AGENTS.md b/.github/skill-eval/AGENTS.md
index aa605fcc9..d8b0d6e64 100644
--- a/.github/skill-eval/AGENTS.md
+++ b/.github/skill-eval/AGENTS.md
@@ -22,6 +22,18 @@ find /tmp/skill-eval/results -mindepth 1 -maxdepth 1 -type d \
   ! -name "${GITHUB_RUN_ID}" -exec rm -rf {} + 2>/dev/null || true
 
 mkdir -p /tmp/skill-eval/datasets /tmp/skill-eval/results
+
+# Log exact image digests for traceability (resolve :latest to sha256)
+echo "=== Image digests (for traceability) ==="
+for img in \
+  nvcr.io/nvstaging/blueprint/rag-server:${TAG:-latest} \
+  nvcr.io/nvstaging/blueprint/ingestor-server:${TAG:-latest}; do
+  digest=$(docker inspect "$img" --format '{{index .RepoDigests 0}}' 2>/dev/null \
+    || docker pull "$img" -q 2>/dev/null \
+    && docker inspect "$img" --format '{{index .RepoDigests 0}}' 2>/dev/null \
+    || echo "$img — not yet pulled")
+  echo "  $img → $digest"
+done
 ```
 
 ## Your job, in order
@@ -54,7 +66,7 @@ mkdir -p /tmp/skill-eval/datasets /tmp/skill-eval/results
 
    Skills with no `eval/` dir are not yet migrated — skip them.
 
-3. **Check the shared adapter.** All rag-* skills use a single adapter
+3. **Check the shared adapter.** All rag-\* skills use a single adapter
    at `skill-eval/adapters/rag-blueprint/generate.py` with
    `--skill-name <skill>`. Verify it accepts `--skill-name`:
 
@@ -68,7 +80,7 @@ mkdir -p /tmp/skill-eval/datasets /tmp/skill-eval/results
    § 3c) with the fix and emit `BLOCKED: adapter missing --skill-name`.
 
    Unlike VSS, you do NOT create per-skill adapters — one shared
-   adapter serves all rag-* skills. If a skill genuinely needs custom
+   adapter serves all rag-\* skills. If a skill genuinely needs custom
    adapter logic (different PREAMBLE, non-standard platform), note it
    in the PR comment and raise a bot PR adding
    `skill-eval/adapters/<skill>/generate.py`.
@@ -78,8 +90,8 @@ mkdir -p /tmp/skill-eval/datasets /tmp/skill-eval/results
    is the spec filename without `.json`.
 
    Resolve `SKILL_DIR` based on where the skill lives:
-   - Decomposed skills:  `SKILL_DIR="$REPO_ROOT/skills/<skill>"`
-   - Monolithic skills:  `SKILL_DIR="$REPO_ROOT/skill-source/.agents/skills/<skill>"`
+   - Decomposed skills: `SKILL_DIR="$REPO_ROOT/skills/<skill>"`
+   - Monolithic skills: `SKILL_DIR="$REPO_ROOT/skill-source/.agents/skills/<skill>"`
 
    ```bash
    cd "$REPO_ROOT/skill-eval"
@@ -95,14 +107,13 @@ mkdir -p /tmp/skill-eval/datasets /tmp/skill-eval/results
    generation fails, read the traceback, fix the adapter, rerun.
 
 5. **Run Harbor trials.** Platform routing:
-
    - **`cpu` platform** (`nvidia_hosted.json` specs) → `LocalEnvironment`.
      Docker runs directly on the `rag-skill-validator` runner — no
      Brev VM needed. The runner IS the deploy host.
 
    - **`H100_x2` platform** (`h100.json` specs) → `BrevEnvironment`.
-     Pre-provision an ephemeral Brev VM, run Harbor against it,
-     delete it after. See § GPU provisioning below.
+     Pre-provision ONE ephemeral Brev VM for all H100 specs in this run
+     (see § GPU provisioning). Run all H100 trials against that single VM.
 
    For **cpu skills**, clean any leftover Docker state first:
 
@@ -119,6 +130,30 @@ mkdir -p /tmp/skill-eval/datasets /tmp/skill-eval/results
    skills set — it deploys the RAG stack that all other skills test
    against. Then run remaining cpu skills in any order.
 
+   **GPU pre-flight (automatic, no action required from skill authors):**
+   Before running ANY H100 spec for any skill, first sync the Brev VM's repo
+   to the PR base branch so compose files, env files, and skill docs all match
+   the branch under test (Harbor clones the default branch — main — not the PR):
+
+   ```bash
+   brev exec "$BREV_INSTANCE" -- \
+     "cd /home/nvidia/rag && git fetch origin ${PR_BASE} && git checkout ${PR_BASE} && git pull origin ${PR_BASE}" \
+     2>/dev/null || true
+   ```
+
+   Then check if the RAG stack is already running on the Brev VM:
+
+   ```bash
+   brev exec "$BREV_INSTANCE" "curl -sf http://localhost:8081/v1/health" \
+     2>/dev/null && RAG_RUNNING=true || RAG_RUNNING=false
+   ```
+
+   If `RAG_RUNNING=false` and `rag-blueprint/eval/h100.json` exists in
+   the repo, run it first to deploy the self-hosted RAG stack. This
+   happens automatically regardless of which skills are in the PR diff —
+   skill authors do NOT need to declare this dependency in their specs.
+   Once deployed, all subsequent H100 specs reuse the running stack.
+
    Use the canonical Harbor invocation from § Harbor invocation below.
    One step at a time, in order. Skip remaining steps if a step's
    reward < 1.0 (skip-on-prior-fail).
@@ -153,15 +188,29 @@ mkdir -p /tmp/skill-eval/datasets /tmp/skill-eval/results
 
 ## GPU provisioning (H100_x2 specs only)
 
-For specs with `platforms: ["H100_x2"]`:
+**One VM per platform per run.** If multiple skills have `H100_x2` specs
+(e.g. rag-eval/h100.json + rag-perf/h100.json), provision ONE Brev VM at
+the start and run ALL H100 trials against it sequentially. Do NOT provision
+a new VM per spec — that wastes 13+ min provisioning time and doubles cost.
+
+**Before processing specs**, collect all unique platforms needed:
 
 ```bash
-BREV_TYPE="dmz.h100x2.pcie"
+# Scan all changed skill specs for their platform requirements
+GPU_PLATFORMS_NEEDED=$(...)  # e.g. "H100_x2"
+```
+
+Then provision once per platform, store the instance name, reuse it for
+all specs of that platform:
+
+```bash
+# Provision ONCE for all H100_x2 specs in this run
+BREV_TYPE="dmz.h100x2,scaleway_H100x2,gpu-h100-sxm.1gpu-16vcpu-200gb"
 BREV_INSTANCE="rag-eval-gpu-$(date +%s | tail -c 8)"
 
-# Create with retry
+# Create with retry + fallback types
 for attempt in $(seq 1 5); do
-  echo "$BREV_TYPE" | brev create "$BREV_INSTANCE" --detached 2>&1 | tail -5
+  brev create "$BREV_INSTANCE" --type "$BREV_TYPE" --detached 2>&1 | tail -5
   brev ls 2>/dev/null | awk -v n="$BREV_INSTANCE" '$1==n {found=1} END{exit !found}' \
     && break
   sleep 15
@@ -169,18 +218,20 @@ done
 
 # Wait for RUNNING+READY (up to 30 min)
 DEADLINE=$(( $(date +%s) + 1800 ))
+last_state=""
 while [ "$(date +%s)" -lt "$DEADLINE" ]; do
   STATE=$(brev ls 2>/dev/null | awk -v n="$BREV_INSTANCE" '$1==n {print $2"+"$4}')
+  [ -n "$STATE" ] && [ "$STATE" != "$last_state" ] && echo "  $(date -u +%H:%M:%SZ) $BREV_INSTANCE: $STATE" && last_state="$STATE"
   [ "$STATE" = "RUNNING+READY" ] && break
   sleep 15
 done
-[ "$STATE" = "RUNNING+READY" ] || { echo "BLOCKED: H100 VM never reached RUNNING+READY"; exit 1; }
+[ "$last_state" = "RUNNING+READY" ] || { echo "BLOCKED: H100 VM never reached RUNNING+READY"; exit 1; }
 
-# Record for cleanup
+# Record for cleanup — workflow step deletes after 5-min cooldown
 mkdir -p /tmp/brev
 echo "$BREV_INSTANCE" >> "/tmp/brev/started-by-${GITHUB_RUN_ID}.txt"
 
-export BREV_INSTANCE
+export BREV_INSTANCE  # reuse this for ALL H100_x2 specs below
 ```
 
 ---
@@ -248,15 +299,23 @@ done
 ```
 
 **Never background harbor and poll.** Use foreground blocking calls only.
+`harbor run` MUST be called directly in a Bash tool call and allowed to block
+until it exits. Do NOT use TaskCreate, background processes (`&`), `nohup`,
+`Monitor`, or any other mechanism to run harbor asynchronously — not even
+wrapped in a shell script. The Bash tool call itself must block until harbor
+exits. The call will block for up to 90 minutes on GPU specs — that is
+expected and correct. Do NOT check on it with sleep loops, Read, or Monitor.
+Just wait. Violating this rule causes the agent to exit without DONE:/BLOCKED:
+(exit 4). This has happened multiple times — do not repeat the mistake.
 
 ---
 
 ## Platform topology
 
-| Platform | `spec.platforms` value | Environment | Instance | After run |
-|---|---|---|---|---|
-| CPU / cloud NIMs | `cpu` | LocalEnvironment | `rag-skill-validator` runner | docker down + volume cleanup |
-| 2× H100 80GB | `H100_x2` | BrevEnvironment | `rag-eval-gpu-<ts>` (`dmz.h100x2.pcie`) | workflow step deletes after 5-min cooldown |
+| Platform         | `spec.platforms` value | Environment      | Instance                                | After run                                  |
+| ---------------- | ---------------------- | ---------------- | --------------------------------------- | ------------------------------------------ |
+| CPU / cloud NIMs | `cpu`                  | LocalEnvironment | `rag-skill-validator` runner            | docker down + volume cleanup               |
+| 2× H100 80GB     | `H100_x2`              | BrevEnvironment  | `rag-eval-gpu-<ts>` (`dmz.h100x2.pcie`) | workflow step deletes after 5-min cooldown |
 
 `rag-skill-validator` is the CI runner host — **never** provision Brev against it.
 
@@ -270,10 +329,10 @@ done
 Head: `<short-sha>` · spec `<spec-sha>`
 First started: `<utc>` · Last finished: `<utc>` · Total: `<Xhr Ymin>`
 
-| Platform | Step | Query | Result | Reward | Duration | Turns |
-|---|---|---|---|---|---|---|
-| cpu | step-1 | Deploy via Docker Compose... | ✅ 1.0 (6/6) | 1.0 | 4m 29s | 18 |
-| cpu | step-2 | Get RAG Blueprint running... | ✅ 1.0 (5/5) | 1.0 | 1m 23s | 9 |
+| Platform | Step   | Query                        | Result       | Reward | Duration | Turns |
+| -------- | ------ | ---------------------------- | ------------ | ------ | -------- | ----- |
+| cpu      | step-1 | Deploy via Docker Compose... | ✅ 1.0 (6/6) | 1.0    | 4m 29s   | 18    |
+| cpu      | step-2 | Get RAG Blueprint running... | ✅ 1.0 (5/5) | 1.0    | 1m 23s   | 9     |
 
 ### Failing checks
 
@@ -320,6 +379,7 @@ END=$(jq -r '.trial_finished_at'  "$RESULTS"/*/*/step-${STEP}__*/result.json 2>/
 ## Manual full-sweep mode
 
 When `MANUAL_FULL_SWEEP=1` (workflow_dispatch):
+
 - **Step 1 override:** skip diff. Enumerate `skills/*/eval/*.json`;
   filter by `MANUAL_SKILLS_FILTER` (`*` = all skills).
 - **Step 3 override:** no bot-PR flow. Record missing adapter as
diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
index 20095e4c1..d617961e9 100644
--- a/.github/workflows/ci-pipeline.yml
+++ b/.github/workflows/ci-pipeline.yml
@@ -38,10 +38,10 @@ jobs:
     if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || github.event.pull_request.head.repo.full_name == github.repository
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Setup Helm
-        uses: azure/setup-helm@v4
+        uses: azure/setup-helm@v5
         with:
           version: 'latest'
 
@@ -67,7 +67,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repository code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - uses: actions/setup-python@v3
       - uses: pre-commit/action@v3.0.1
@@ -79,7 +79,7 @@ jobs:
       image: python:3.12-slim
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Install system dependencies
         run: |
@@ -112,7 +112,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Setup Node.js
         uses: actions/setup-node@v4
@@ -152,7 +152,7 @@ jobs:
           pnpm test:coverage
 
       - name: Upload coverage artifacts
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         if: always()
         with:
           name: frontend-coverage-${{ steps.sanitize.outputs.ref_name }}-${{ github.sha }}
@@ -166,7 +166,7 @@ jobs:
       image: python:3.12-slim
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Install required packages
         run: |
@@ -192,7 +192,7 @@ jobs:
       ENABLE_NRL_INTEGRATION_TESTS: "false"
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Install NGC CLI
         env:          
@@ -1264,7 +1264,7 @@ jobs:
           echo "ref_name=$SANITIZED_REF" >> $GITHUB_OUTPUT
 
       - name: Upload all integration test logs
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         if: always()
         with:
           name: integration-tests-logs-${{ steps.sanitize.outputs.ref_name }}-${{ github.sha }}
diff --git a/.github/workflows/publish-artifacts.yml b/.github/workflows/publish-artifacts.yml
index 8baf59f9e..001852b88 100644
--- a/.github/workflows/publish-artifacts.yml
+++ b/.github/workflows/publish-artifacts.yml
@@ -73,7 +73,7 @@ jobs:
       image: python:3.10
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Set artifactory version
         run: |
@@ -101,7 +101,7 @@ jobs:
           ls -la dist/
 
       - name: Upload wheel artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: wheel-${{ env.ARTIFACTORY_VERSION }}
           path: dist/*.whl
@@ -156,10 +156,10 @@ jobs:
     if: github.event_name != 'workflow_dispatch' || ((github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'containers-only') && github.event.inputs.PUBLISH_RAG_SERVER != 'false')
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
 
       - name: Determine TAG
         id: tag
@@ -177,7 +177,7 @@ jobs:
           echo "Final TAG value: $TAG"
 
       - name: Login to NGC Container Registry
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
         with:
           registry: nvcr.io
           username: '$oauthtoken'
@@ -216,10 +216,10 @@ jobs:
     if: github.event_name != 'workflow_dispatch' || ((github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'containers-only') && github.event.inputs.PUBLISH_INGESTOR_SERVER != 'false')
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
 
       - name: Determine TAG
         id: tag
@@ -237,7 +237,7 @@ jobs:
           echo "Final TAG value: $TAG"
 
       - name: Login to NGC Container Registry
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
         with:
           registry: nvcr.io
           username: '$oauthtoken'
@@ -276,10 +276,10 @@ jobs:
     if: github.event_name != 'workflow_dispatch' || ((github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'containers-only') && github.event.inputs.PUBLISH_RAG_FRONTEND != 'false')
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
 
       - name: Determine TAG
         id: tag
@@ -297,7 +297,7 @@ jobs:
           echo "Final TAG value: $TAG"
 
       - name: Login to NGC Container Registry
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
         with:
           registry: nvcr.io
           username: '$oauthtoken'
@@ -336,10 +336,10 @@ jobs:
     if: github.event_name != 'workflow_dispatch' || github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'helm-chart-only'
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Install Helm
-        uses: azure/setup-helm@v4
+        uses: azure/setup-helm@v5
         with:
           version: 'v3.17.0'
 
diff --git a/.github/workflows/run-branch-script.yml b/.github/workflows/run-branch-script.yml
index 6100e30a2..c140f16a6 100644
--- a/.github/workflows/run-branch-script.yml
+++ b/.github/workflows/run-branch-script.yml
@@ -42,7 +42,7 @@ jobs:
 
     steps:
       - name: Checkout target ref
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           ref: ${{ inputs.ref }}
 
@@ -64,7 +64,7 @@ jobs:
 
       - name: Upload artifacts
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: branch-script-${{ github.run_id }}
           path: |
diff --git a/.github/workflows/skills-eval.yml b/.github/workflows/skills-eval.yml
index 84c71b68e..4f28379d7 100644
--- a/.github/workflows/skills-eval.yml
+++ b/.github/workflows/skills-eval.yml
@@ -22,7 +22,7 @@ on:
     branches:
       - "pull-request/[0-9]+"
   schedule:
-    - cron: '0 2 * * *'    # 2am UTC nightly — all rag-* cpu skills
+    - cron: "0 2 * * *" # 2am UTC nightly — all rag-* cpu skills
   workflow_dispatch:
     inputs:
       skills:
@@ -58,7 +58,7 @@ defaults:
 jobs:
   eval:
     name: Eval changed skills against PR
-    runs-on: [self-hosted, rag-skill-validator]
+    runs-on: [self-hosted, rag-eval]
 
     # 4-hour cap: 8 cpu skills × ~15 min each = 2h max with 1.5x timeouts.
     # Nightly runs all skills; PR runs only changed ones so usually much faster.
@@ -83,7 +83,7 @@ jobs:
           done
 
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           fetch-depth: 0
 
@@ -114,17 +114,6 @@ jobs:
               - '.github/workflows/skills-eval.yml'
               - 'ci/run_skill_eval.sh'
 
-      - name: Load coordinator env
-        if: github.event_name != 'push' || steps.changes.outputs.relevant == 'true'
-        run: |
-          # Secrets stored in /home/ubuntu/eval-coordinator/.env on the runner.
-          # Contains: NVIDIA_INFERENCE_KEY, NGC_API_KEY
-          set -a
-          source /home/ubuntu/eval-coordinator/.env
-          set +a
-          printf "COORDINATOR_ENV=loaded\n" >> "$GITHUB_ENV"
-          printf "CLAUDE_CODE_DISABLE_THINKING=1\n" >> "$GITHUB_ENV"
-
       - name: Run skills eval agent
         id: agent
         if: github.event_name != 'push' || steps.changes.outputs.relevant == 'true'
@@ -132,11 +121,16 @@ jobs:
           GH_TOKEN: ${{ github.token }}
           GH_CONFIG_DIR: ${{ runner.temp }}/gh-skill-eval-${{ github.run_id }}
           INPUT_SKILLS: ${{ inputs.skills }}
+          ANTHROPIC_API_KEY: ${{ secrets.NVBASE_INFERENCE_API_KEY }}
+          ANTHROPIC_BASE_URL: https://inference-api.nvidia.com
+          ANTHROPIC_MODEL: aws/anthropic/bedrock-claude-sonnet-4-6
+          NVIDIA_INFERENCE_KEY: ${{ secrets.NVBASE_INFERENCE_API_KEY }}
+          NVIDIA_API_KEY: ${{ secrets.NVBASE_INFERENCE_API_KEY }}
+          NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
+          CLAUDE_CODE_DISABLE_THINKING: "1"
+          TAG: latest
         run: |
           mkdir -p "$GH_CONFIG_DIR" /tmp/brev /tmp/skill-eval
-          set -a
-          source /home/ubuntu/eval-coordinator/.env
-          set +a
           export PR_NUMBER="${{ steps.pr.outputs.number }}"
           export PR_BASE="${{ steps.pr.outputs.base }}"
           export PR_HEAD_SHA="${{ github.sha }}"
@@ -150,9 +144,24 @@ jobs:
           fi
           python3 .github/skill-eval/skills_eval_agent.py
 
+      - name: Collect results for artifact
+        if: always() && (github.event_name != 'push' || steps.changes.outputs.relevant == 'true')
+        run: |
+          if [ ! -d /tmp/skill-eval/results ]; then
+            echo "no results dir — agent blocked before running trials"
+            exit 0
+          fi
+          RESULTS=$(find /tmp/skill-eval/results -maxdepth 3 -name "result.json" 2>/dev/null | head -50 || true)
+          if [ -n "$RESULTS" ]; then
+            tar czf /tmp/skills-eval-results.tar.gz -C /tmp/skill-eval results
+            echo "archived $(echo "$RESULTS" | wc -l) result.json files"
+          else
+            echo "results dir exists but empty — nothing to archive"
+          fi
+
       - name: Upload results artifact
         if: always() && (github.event_name != 'push' || steps.changes.outputs.relevant == 'true')
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: >-
             ${{ github.event_name == 'schedule'
@@ -160,7 +169,7 @@ jobs:
               || github.event_name == 'workflow_dispatch'
               && format('skills-eval-manual-{0}', github.run_id)
               || format('skills-eval-pr-{0}-{1}', steps.pr.outputs.number, github.run_id) }}
-          path: eval-results/
+          path: /tmp/skills-eval-results.tar.gz
           if-no-files-found: ignore
           retention-days: 7
 
diff --git a/.github/workflows/skills-nv-base.yml b/.github/workflows/skills-nv-base.yml
index 39a9c5053..2752f56b9 100644
--- a/.github/workflows/skills-nv-base.yml
+++ b/.github/workflows/skills-nv-base.yml
@@ -2,17 +2,13 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Tier 1 skills validation: schema, security, PII, naming, frontmatter.
-# Runs on every PR mirror branch push that touches skills/.
-# Uses dorny/paths-filter for cumulative diff (PR base ↔ HEAD) — GitHub's
-# top-level paths: filter evaluates per-push diff and misses copy-pr-bot
-# merge commits that don't themselves touch skills/.
+# push: trigger removed — job is if: false (nv-base requires NVIDIA internal
+# network, unavailable on external runners). Keeping only workflow_dispatch
+# so the "Run workflow" button appears in the Actions UI for future use.
 
 name: Skills NV-BASE
 
 on:
-  push:
-    branches:
-      - "pull-request/[0-9]+"
   workflow_dispatch:
     inputs:
       skills:
@@ -48,7 +44,7 @@ defaults:
 jobs:
   skills-check:
     name: skills-check
-    runs-on: [self-hosted, rag-skill-validator]
+    runs-on: [self-hosted, rag-eval]
     timeout-minutes: 20
 
     # Tier 1 disabled — nv-base requires pre-installation on the runner
@@ -58,7 +54,7 @@ jobs:
 
     steps:
       - name: Checkout mirror head
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           fetch-depth: 0
 
diff --git a/README.md b/README.md
index 8d8b5d11c..09d1e12ce 100644
--- a/README.md
+++ b/README.md
@@ -164,7 +164,7 @@ The following is a step-by-step explanation of the workflow from the end-user pe
 
 ## AI Agent Skill
 
-An agent skill is included that enables AI coding assistants (Claude Code, Cursor, etc.) to deploy, configure, troubleshoot, and manage the RAG Blueprint autonomously.
+Agent skills in [`skill-source/`](skill-source/) let coding assistants (Claude Code, Cursor, Codex, etc.) operate this blueprint from natural language.
 
 ### Install
 
@@ -172,17 +172,19 @@ An agent skill is included that enables AI coding assistants (Claude Code, Curso
 npx skills add .
 ```
 
-This installs the `rag-blueprint` skill from `skill-source/`. After installation, the agent handles requests like:
+Installs all skills below from `skill-source/.agents/skills/`.
 
-- *"Deploy RAG on Docker with NVIDIA-hosted models"*
-- *"Enable VLM image captioning and restart the ingestor"*
-- *"Ingestion failed for 3 files, can you check why?"*
-- *"Switch from Docker to library mode"*
-- *"Shut down all RAG services"*
+| Skill | Use for | Example prompts |
+|-------|---------|-----------------|
+| **`rag-blueprint`** | Deploy, configure, troubleshoot, shutdown; REST API usage (`/v1/generate`, ingestor upload) | *"Deploy RAG with self-hosted NIMs"*, *"Enable guardrails"*, *"Wide-net search then high-precision on my collection"* |
+| **`rag-eval`** | RAGAS quality benchmarks with `corpus/` + `train.json` and `scripts/eval/evaluate_rag.py` | *"Run RAGAS eval on my dataset"*, *"Compare reranker on vs off"* |
+| **`rag-perf`** | Latency/throughput benchmarks via `scripts/rag-perf` (profiling + aiperf) | *"Profile retrieval bottlenecks"*, *"Run a concurrency sweep"* |
 
-> **Note:** If the agent doesn't pick up the skill automatically (e.g., for short or ambiguous queries), invoke it explicitly with `/rag-blueprint <your request>`.
+Pick the skill that matches the task: operations → **rag-blueprint**; answer quality → **rag-eval**; performance → **rag-perf**.
 
-For skill architecture details, see [`skill-source/README.md`](skill-source/README.md).
+> **Note:** If routing is unclear, invoke explicitly: `/rag-blueprint`, `/rag-eval`, or `/rag-perf` plus your request.
+
+More detail: [`skill-source/README.md`](skill-source/README.md). OpenClaw plugin: [`.openclaw/README.md`](.openclaw/README.md).
 
 
 ## Get Started With NVIDIA RAG Blueprint
diff --git a/docs/agentic-rag.md b/docs/agentic-rag.md
index 225c3e1bf..c118c7d57 100644
--- a/docs/agentic-rag.md
+++ b/docs/agentic-rag.md
@@ -33,6 +33,7 @@ The pipeline defaults to off because Agentic RAG trades latency and extra LLM ca
 - The agentic path does not use NeMo Guardrails, Self-Reflection, Query Decomposition, or VLM Inference. Query rewriting, multi-turn history, multi-collection retrieval, citations, filter generation, and reranking are supported.
 - Verification runs once; there's no nested verification loop.
 - Tasks in a plan run at one parallel level; there's no DAG or depends-on construct.
+- Per-response retrieval metrics are not emitted. The agentic pipeline issues multiple retrieval calls across initial retrieval, per-task execution, and verification re-retrieval, so the single `metrics` block returned by the standard chain is not populated for agentic requests.
 
 ## Architecture Overview
 
diff --git a/docs/release-notes.md b/docs/release-notes.md
index 8683edd74..fae923eef 100644
--- a/docs/release-notes.md
+++ b/docs/release-notes.md
@@ -10,7 +10,7 @@ This documentation contains the release notes for [NVIDIA RAG Blueprint](readme.
 
 ## Release 2.6.0 (TBD)
 
-This release adds [Agentic RAG](./agentic-rag.md) support with plan-and-execute pipelines, streaming responses, and UI integration; changes the default vector database to Elasticsearch and the default object store to SeaweedFS; and introduces new [agent skills](../skill-source/README.md) for deployment, evaluation, and performance tooling.
+This release adds [Agentic RAG](./agentic-rag.md) support with plan-and-execute pipelines, streaming responses, and UI integration; changes the default vector database to Elasticsearch and the default object store to SeaweedFS; adds [Red Hat OpenShift](./deploy-helm-openshift.md) support for Helm-based deployment; and introduces new [agent skills](../skill-source/README.md) for deployment, evaluation, and performance tooling.
 
 ### Highlights
 
@@ -30,6 +30,7 @@ This release includes the following key updates:
 - Updated NV-Ingest to version 26.3.0.
 - Updated OCR NIM naming from `nemoretriever-ocr-v1` to `nemotron-ocr-v1`.
 - Added OpenClaw plugin for agent-driven deploy/configure/eval workflows.
+- Added [Red Hat OpenShift and OKD support](./deploy-helm-openshift.md) for Helm deployments.
 
 ### Fixed Known Issues
 
diff --git a/notebooks/rag_library_lite_usage.ipynb b/notebooks/rag_library_lite_usage.ipynb
index 90900e788..b3d36928f 100644
--- a/notebooks/rag_library_lite_usage.ipynb
+++ b/notebooks/rag_library_lite_usage.ipynb
@@ -243,8 +243,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Set logging level\n",
-    "First let's set the required logging level. Set to INFO for displaying basic important logs. Set to DEBUG for full verbosity."
+    "## Configure notebook logging\n",
+    "Set the notebook log level and hide known benign Milvus Lite compatibility noise. Unrelated errors remain visible."
    ]
   },
   {
@@ -255,6 +255,8 @@
    "source": [
     "import logging\n",
     "import os\n",
+    "import traceback\n",
+    "import warnings\n",
     "\n",
     "# Set the log level via environment variable before importing nvidia_rag\n",
     "# This ensures the package respects our log level setting\n",
@@ -264,12 +266,46 @@
     "# Configure logging\n",
     "logging.basicConfig(level=LOGLEVEL, force=True)\n",
     "\n",
-    "# Set log levels for specific loggers after package import\n",
-    "for name in logging.root.manager.loggerDict:\n",
-    "    if name == \"nvidia_rag\" or name.startswith(\"nvidia_rag.\"):\n",
+    "# Keep known benign Milvus Lite/PyMilvus compatibility noise out of notebook output.\n",
+    "# These messages can appear during collection creation even when the operation succeeds.\n",
+    "warnings.filterwarnings(\n",
+    "    \"ignore\",\n",
+    "    message=r\"`connections\\.(has_connection|connect)` is an ORM-style PyMilvus API.*\",\n",
+    ")\n",
+    "\n",
+    "\n",
+    "class _GrpcAllocTimestampFilter(logging.Filter):\n",
+    "    def filter(self, record: logging.LogRecord) -> bool:\n",
+    "        if record.name != \"grpc._server\":\n",
+    "            return True\n",
+    "        benign_message = \"Exception calling application: Method not implemented!\"\n",
+    "        if benign_message not in record.getMessage():\n",
+    "            return True\n",
+    "        if not record.exc_info:\n",
+    "            return True\n",
+    "        exc_text = \"\".join(traceback.format_exception(*record.exc_info))\n",
+    "        return \"AllocTimestamp\" not in exc_text\n",
+    "\n",
+    "\n",
+    "grpc_logger = logging.getLogger(\"grpc._server\")\n",
+    "if not any(\n",
+    "    item.__class__.__name__ == \"_GrpcAllocTimestampFilter\"\n",
+    "    for item in grpc_logger.filters\n",
+    "):\n",
+    "    grpc_logger.addFilter(_GrpcAllocTimestampFilter())\n",
+    "\n",
+    "# Set log levels for specific loggers used by the notebook.\n",
+    "for name in (\"nvidia_rag\", \"nv_ingest_client\"):\n",
+    "    logging.getLogger(name).setLevel(LOGLEVEL)\n",
+    "\n",
+    "for name in list(logging.root.manager.loggerDict):\n",
+    "    if name.startswith(\"nvidia_rag.\") or name.startswith(\"nv_ingest_client.\"):\n",
     "        logging.getLogger(name).setLevel(LOGLEVEL)\n",
-    "    if name == \"nv_ingest_client\" or name.startswith(\"nv_ingest_client.\"):\n",
-    "        logging.getLogger(name).setLevel(LOGLEVEL)"
+    "\n",
+    "print(\n",
+    "    f\"Notebook logging set to {logging.getLevelName(LOGLEVEL)}. \"\n",
+    "    \"Known benign Milvus Lite compatibility messages will be hidden.\"\n",
+    ")"
    ]
   },
   {
diff --git a/src/nvidia_rag/ingestor_server/main.py b/src/nvidia_rag/ingestor_server/main.py
index ff50af757..bffbec8cb 100644
--- a/src/nvidia_rag/ingestor_server/main.py
+++ b/src/nvidia_rag/ingestor_server/main.py
@@ -275,7 +275,10 @@ def __prepare_vdb_op_and_collection_name(
                 config=self.config,
                 vdb_auth_token=vdb_auth_token,
             )
-            return vdb_op, collection_name
+            # Return the backend-canonicalized name (e.g. Elasticsearch
+            # lowercases index names) so downstream summary keys in Redis
+            # and object storage align with what GET /collections reports.
+            return vdb_op, vdb_op.collection_name
 
         if not bypass_validation and (collection_name or custom_metadata):
             raise ValueError(
diff --git a/src/nvidia_rag/rag_server/agentic_rag/agentic_rag.py b/src/nvidia_rag/rag_server/agentic_rag/agentic_rag.py
index de56ac6c2..652d95582 100644
--- a/src/nvidia_rag/rag_server/agentic_rag/agentic_rag.py
+++ b/src/nvidia_rag/rag_server/agentic_rag/agentic_rag.py
@@ -54,6 +54,8 @@
 from opentelemetry import trace as otel_trace
 from pydantic import BaseModel, Field
 
+from nvidia_rag.rag_server.agentic_rag.response_parser import parse_json_response
+
 logger = logging.getLogger(__name__)
 
 _P = "[AGENTIC_RAG]"
@@ -453,44 +455,6 @@ def _filter_think_tokens(content: str) -> str:
         logger.warning("%s Truncated <think> block (no closing tag), discarding", _P)
         return ""
 
-    @staticmethod
-    def _sanitize_json_string(raw: str) -> str:
-        """Escape unescaped control characters inside JSON string values."""
-        out: list[str] = []
-        in_string = False
-        i = 0
-        length = len(raw)
-        while i < length:
-            ch = raw[i]
-            if ch == "\\" and in_string:
-                out.append(ch)
-                if i + 1 < length:
-                    i += 1
-                    out.append(raw[i])
-                i += 1
-                continue
-            if ch == '"':
-                in_string = not in_string
-                out.append(ch)
-                i += 1
-                continue
-            if in_string:
-                if ch == "\n":
-                    out.append("\\n")
-                    i += 1
-                    continue
-                if ch == "\r":
-                    out.append("\\r")
-                    i += 1
-                    continue
-                if ch == "\t":
-                    out.append("\\t")
-                    i += 1
-                    continue
-            out.append(ch)
-            i += 1
-        return "".join(out)
-
     @staticmethod
     async def _accumulate_astream(
         chain: Any,
@@ -745,37 +709,6 @@ async def _call_llm(
 
             return response_content
 
-    # =========================================================================
-    # JSON PARSING
-    # =========================================================================
-
-    def _parse_json_response(self, response: str) -> dict[str, Any]:
-        """Parse a JSON object from an LLM response, with fallback sanitization."""
-        try:
-            return json.loads(response)
-        except json.JSONDecodeError:
-            pass
-
-        start = response.find("{")
-        end = response.rfind("}") + 1
-        if start == -1 or end <= start:
-            logger.warning("%s No JSON object found in response: %.200s", _P, response)
-            return {"error": "Failed to parse JSON", "raw_response": response}
-
-        json_str = response[start:end]
-        try:
-            return json.loads(json_str)
-        except json.JSONDecodeError:
-            pass
-
-        try:
-            return json.loads(self._sanitize_json_string(json_str))
-        except json.JSONDecodeError:
-            pass
-
-        logger.warning("%s JSON parse failed: %.200s", _P, response)
-        return {"error": "Failed to parse JSON", "raw_response": response}
-
     # =========================================================================
     # CONTENT HELPERS
     # =========================================================================
@@ -913,7 +846,7 @@ def _parse_task_answer(self, raw_answer: str) -> dict:
         if not raw_answer:
             return {"completeness": "none", "answer": "[NO DATA]", "missing": ""}
 
-        parsed = self._parse_json_response(raw_answer)
+        parsed = parse_json_response(raw_answer)
         if parsed and "completeness" in parsed:
             return {
                 "completeness": parsed.get("completeness", "complete"),
@@ -999,7 +932,7 @@ def _finish_task(res: dict) -> dict:
                         json_mode=True,
                         # config intentionally omitted — see method docstring.
                     )
-                    seed_result = self._parse_json_response(seed_response)
+                    seed_result = parse_json_response(seed_response)
 
                     if seed_result.get("stop", False):
                         logger.debug(
@@ -1094,7 +1027,7 @@ def _finish_task(res: dict) -> dict:
                     {"question": task_question, "documents": docs_str},
                     step_name=f"Task {tid} answer (attempt {attempt + 1})",
                     # config intentionally omitted — see method docstring.
-                    json_mode=True,
+                    json_mode=False,
                 )
                 parsed = self._parse_task_answer(raw_answer)
 
@@ -1326,7 +1259,7 @@ async def plan_node(
                         json_mode=True,
                         config=config,
                     )
-                    plan = self._parse_json_response(response)
+                    plan = parse_json_response(response)
 
                     if "error" in plan:
                         logger.warning(
@@ -1875,7 +1808,7 @@ async def verify_node(
                     json_mode=True,
                     config=config,
                 )
-                result = self._parse_json_response(response)
+                result = parse_json_response(response)
 
                 passed = result.get("status") == "pass"
                 issues = result.get("issues", [])
diff --git a/src/nvidia_rag/rag_server/agentic_rag/response_parser.py b/src/nvidia_rag/rag_server/agentic_rag/response_parser.py
new file mode 100644
index 000000000..bff23bc05
--- /dev/null
+++ b/src/nvidia_rag/rag_server/agentic_rag/response_parser.py
@@ -0,0 +1,160 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""LLM response parsing with recovery for common malformed-output patterns.
+
+Handles common LLM output pathologies:
+  * Preamble / postscript text around the JSON object.
+  * "False start + restart" patterns from reasoning models — the model
+    emits a draft, then re-emits the full object. We pick the last
+    balanced top-level ``{...}`` candidate.
+  * Missing-colon typos like ``"tasks[`` instead of ``"tasks": [``.
+  * Unescaped control characters (newline / tab / carriage return)
+    inside JSON string values.
+
+Public surface
+--------------
+* ``parse_json_response`` — the only function callers need; returns a dict
+  on success or ``{"error": ..., "raw_response": ...}`` on failure.
+"""
+
+import json
+import logging
+import re
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+_P = "[AGENTIC_RAG]"
+
+
+def parse_json_response(response: str) -> dict[str, Any]:
+    """Parse a JSON object from an LLM response, with fallback sanitization.
+
+    Handles "false start + restart" patterns from reasoning models by
+    extracting all top-level balanced ``{...}`` candidates and trying
+    them from last to first. The last complete candidate is typically
+    the model's final revised output.
+    """
+    try:
+        return json.loads(response)
+    except json.JSONDecodeError:
+        pass
+
+    # Try balanced top-level candidates (handles "restart" patterns)
+    for cand in reversed(_extract_top_level_objects(response)):
+        try:
+            return json.loads(cand)
+        except json.JSONDecodeError:
+            pass
+        try:
+            return json.loads(_sanitize_json_string(cand))
+        except json.JSONDecodeError:
+            pass
+
+    # Fallback: broadest span (handles unterminated-string cases that
+    # confuse brace-counting, e.g. '"tasks[' missing-colon typos).
+    start = response.find("{")
+    end = response.rfind("}") + 1
+    if start == -1 or end <= start:
+        logger.warning("%s No JSON object found in response: %.200s", _P, response)
+        return {"error": "Failed to parse JSON", "raw_response": response}
+
+    broad = response[start:end]
+    try:
+        return json.loads(broad)
+    except json.JSONDecodeError:
+        pass
+    try:
+        return json.loads(_sanitize_json_string(broad))
+    except json.JSONDecodeError:
+        pass
+
+    logger.warning("%s JSON parse failed: %s", _P, response)
+    return {"error": "Failed to parse JSON", "raw_response": response}
+
+
+def _extract_top_level_objects(text: str) -> list[str]:
+    """Return all balanced top-level ``{...}`` substrings (string-aware)."""
+    candidates: list[str] = []
+    depth = 0
+    start_idx = -1
+    in_string = False
+    escape = False
+    for i, ch in enumerate(text):
+        if in_string:
+            if escape:
+                escape = False
+            elif ch == "\\":
+                escape = True
+            elif ch == '"':
+                in_string = False
+            continue
+        if ch == '"':
+            in_string = True
+        elif ch == "{":
+            if depth == 0:
+                start_idx = i
+            depth += 1
+        elif ch == "}":
+            if depth > 0:
+                depth -= 1
+                if depth == 0 and start_idx != -1:
+                    candidates.append(text[start_idx : i + 1])
+                    start_idx = -1
+    return candidates
+
+
+def _sanitize_json_string(raw: str) -> str:
+    """Escape unescaped control chars and repair common LLM JSON typos."""
+    # Repair missing colon between key and array/object value:
+    # e.g. '"tasks[' → '"tasks": [' and '"task{' → '"task": {'
+    raw = re.sub(r'"(\w+)"\s*(\[|\{)', r'"\1": \2', raw)
+    raw = re.sub(r'"(\w+)(\[|\{)', r'"\1": \2', raw)
+
+    out: list[str] = []
+    in_string = False
+    i = 0
+    length = len(raw)
+    while i < length:
+        ch = raw[i]
+        if ch == "\\" and in_string:
+            out.append(ch)
+            if i + 1 < length:
+                i += 1
+                out.append(raw[i])
+            i += 1
+            continue
+        if ch == '"':
+            in_string = not in_string
+            out.append(ch)
+            i += 1
+            continue
+        if in_string:
+            if ch == "\n":
+                out.append("\\n")
+                i += 1
+                continue
+            if ch == "\r":
+                out.append("\\r")
+                i += 1
+                continue
+            if ch == "\t":
+                out.append("\\t")
+                i += 1
+                continue
+        out.append(ch)
+        i += 1
+    return "".join(out)
diff --git a/tests/unit/test_ingestor_server/test_ingestor_main_core_components.py b/tests/unit/test_ingestor_server/test_ingestor_main_core_components.py
index 48ca314e9..b85773ffc 100644
--- a/tests/unit/test_ingestor_server/test_ingestor_main_core_components.py
+++ b/tests/unit/test_ingestor_server/test_ingestor_main_core_components.py
@@ -303,6 +303,7 @@ def test_prepare_vdb_op_without_vdb_op_missing_collection_name(self):
     def test_prepare_vdb_op_without_vdb_op_with_collection_name(self, mock_get_vdb):
         """Test __prepare_vdb_op without vdb_op but with collection_name."""
         mock_vdb_op = Mock(spec=VDBRag)
+        mock_vdb_op.collection_name = "test_collection"
         mock_get_vdb.return_value = mock_vdb_op
 
         ingestor = NvidiaRAGIngestor()
@@ -314,10 +315,30 @@ def test_prepare_vdb_op_without_vdb_op_with_collection_name(self, mock_get_vdb):
         assert result == (mock_vdb_op, "test_collection")
         mock_get_vdb.assert_called_once()
 
+    @patch("nvidia_rag.ingestor_server.main._get_vdb_op")
+    def test_prepare_vdb_op_returns_backend_canonicalized_name(self, mock_get_vdb):
+        """Backends that normalize (e.g. Elasticsearch lowercases index names) must
+        have their canonical name flow back to the caller so downstream summary
+        keys in Redis and object storage align with what GET /collections reports.
+        Regression guard for bug 6206269.
+        """
+        mock_vdb_op = Mock(spec=VDBRag)
+        mock_vdb_op.collection_name = "mycollection"
+        mock_get_vdb.return_value = mock_vdb_op
+
+        ingestor = NvidiaRAGIngestor()
+
+        result = ingestor._NvidiaRAGIngestor__prepare_vdb_op_and_collection_name(
+            collection_name="MyCollection"
+        )
+
+        assert result == (mock_vdb_op, "mycollection")
+
     @patch("nvidia_rag.ingestor_server.main._get_vdb_op")
     def test_prepare_vdb_op_bypass_validation(self, mock_get_vdb):
         """Test __prepare_vdb_op with bypass_validation=True."""
         mock_vdb_op = Mock(spec=VDBRag)
+        mock_vdb_op.collection_name = None
         mock_get_vdb.return_value = mock_vdb_op
 
         ingestor = NvidiaRAGIngestor()
diff --git a/tests/unit/test_ingestor_server/test_ingestor_main_document_operations.py b/tests/unit/test_ingestor_server/test_ingestor_main_document_operations.py
index 5dd394f01..222e485eb 100644
--- a/tests/unit/test_ingestor_server/test_ingestor_main_document_operations.py
+++ b/tests/unit/test_ingestor_server/test_ingestor_main_document_operations.py
@@ -550,6 +550,7 @@ def test_private_methods_coverage(self):
         # Test __prepare_vdb_op_and_collection_name
         with patch("nvidia_rag.ingestor_server.main._get_vdb_op") as mock_get_vdb:
             mock_vdb_instance = Mock(spec=VDBRag)
+            mock_vdb_instance.collection_name = "test_collection"
             mock_get_vdb.return_value = mock_vdb_instance
 
             vdb_op, collection_name = (
diff --git a/tests/unit/test_rag_server/test_agentic_rag.py b/tests/unit/test_rag_server/test_agentic_rag.py
index 301307b29..daf7f11a7 100644
--- a/tests/unit/test_rag_server/test_agentic_rag.py
+++ b/tests/unit/test_rag_server/test_agentic_rag.py
@@ -117,12 +117,6 @@ def test_filter_think_tokens_strips_closed_block(self) -> None:
     def test_filter_think_tokens_truncated_block(self) -> None:
         assert AgenticRag._filter_think_tokens("<think>no close") == ""
 
-    def test_sanitize_json_string_escapes_newlines_in_strings(self) -> None:
-        dirty = '{"x": "line1\nline2"}'
-        clean = AgenticRag._sanitize_json_string(dirty)
-        assert "\n" not in clean.split('"x":')[1]
-        assert json.loads(clean)["x"] == "line1\nline2"
-
     def test_rebuild_result_text_vs_chart(self) -> None:
         text_chunk = {
             "doc_name": "a.pdf",
@@ -163,17 +157,6 @@ def test_clean_answer_strips_markdown_headers(self) -> None:
 
 
 class TestAgenticRagInstanceHelpers:
-    def test_parse_json_response_direct_and_embedded(self) -> None:
-        agent = _minimal_agent()
-        assert agent._parse_json_response('{"k": 1}') == {"k": 1}
-        wrapped = 'prefix {"k": 2} suffix'
-        assert agent._parse_json_response(wrapped) == {"k": 2}
-
-    def test_parse_json_response_invalid_returns_error_dict(self) -> None:
-        agent = _minimal_agent()
-        out = agent._parse_json_response("not json at all")
-        assert out.get("error") == "Failed to parse JSON"
-
     def test_extract_chunks_from_model_dump_shape(self) -> None:
         agent = _minimal_agent()
         dumped = {