lightspeed-core
diff --git a/‎tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml‎
Lines changed: 2 additions & 1 deletion b/‎tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml‎
Lines changed: 205 additions & 0 deletions b/‎tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml‎
Lines changed: 205 additions & 0 deletions
diff --git a/‎tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml‎
Lines changed: 0 additions & 62 deletions b/‎tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml‎
Lines changed: 0 additions & 62 deletions
diff --git a/‎tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml‎
Lines changed: 0 additions & 1 deletion b/‎tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml‎
Lines changed: 1 addition & 1 deletion b/‎tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml‎
Lines changed: 1 addition & 1 deletion b/‎tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/e2e-prow/rhoai/pipeline-services.sh‎
Lines changed: 17 additions & 14 deletions b/‎tests/e2e-prow/rhoai/pipeline-services.sh‎
Lines changed: 17 additions & 14 deletions
@@ -22,13 +22,14 @@ spec:
             secretKeyRef:
               name: llama-stack-ip-secret
               key: key
-        # Same vars as docker-compose / server-mode YAML (${env.FAISS_VECTOR_STORE_ID} in byok_rag).
         - name: FAISS_VECTOR_STORE_ID
           valueFrom:
             secretKeyRef:
               name: faiss-vector-store-secret
               key: id
               optional: true
+        - name: KV_RAG_PATH
+          value: "/app-root/src/.llama/storage/rag/kv_store.db"
       image: ${LIGHTSPEED_STACK_IMAGE}
       ports:
         - containerPort: 8080
 
@@ -0,0 +1,205 @@
+# Llama Stack pod for Prow: uses pre-built image with enrichment + RAG restore.
+#
+# Requires: ConfigMap llama-stack-config (run.yaml), ConfigMap rag-data (kv_store.db.gz),
+#           ConfigMap lightspeed-stack-config (lightspeed-stack.yaml).
+# Requires: Image built as ${LLAMA_STACK_IMAGE} (set by pipeline.sh).
+#
+apiVersion: v1
+kind: Pod
+metadata:
+  name: llama-stack-service
+  labels:
+    pod: llama-stack-service
+spec:
+  securityContext:
+    seccompProfile:
+      type: RuntimeDefault
+  initContainers:
+    - name: setup-rag-data
+      image: busybox:latest
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 65534
+        seccompProfile:
+          type: RuntimeDefault
+      command:
+        - /bin/sh
+        - -c
+        - |
+          set -e
+          mkdir -p /data/src/.llama/storage/rag /data/src/.llama/storage/files /data/.e2e-rag-seed
+          if [ ! -f /rag-data/kv_store.db.gz ]; then
+            echo "FATAL: missing /rag-data/kv_store.db.gz"
+            ls -la /rag-data || true
+            exit 1
+          fi
+          gunzip -c /rag-data/kv_store.db.gz > /data/.e2e-rag-seed/kv_store.db
+          cp -f /data/.e2e-rag-seed/kv_store.db /data/src/.llama/storage/rag/kv_store.db
+          chmod -R 777 /data/src /data/.e2e-rag-seed
+          echo "RAG data extracted successfully"
+      volumeMounts:
+        - name: rag-storage
+          mountPath: /data
+        - name: rag-data
+          mountPath: /rag-data
+    - name: materialize-run-yaml
+      image: busybox:latest
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 65534
+        seccompProfile:
+          type: RuntimeDefault
+      command:
+        - /bin/sh
+        - -c
+        - |
+          set -e
+          cp /cm/run.yaml /work/run.yaml
+          chmod 664 /work/run.yaml
+      volumeMounts:
+        - name: config-cm
+          mountPath: /cm
+          readOnly: true
+        - name: rag-storage
+          mountPath: /work
+  containers:
+    - name: llama-stack-container
+      image: ${LLAMA_STACK_IMAGE}
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 1001
+        seccompProfile:
+          type: RuntimeDefault
+      workingDir: /opt/app-root
+      env:
+        - name: PYTHONPATH
+          value: "/opt/app-root/src"
+        - name: HOME
+          value: "/opt/app-root/src"
+        - name: KV_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/kv_store.db"
+        - name: KV_RAG_PATH
+          value: "/opt/app-root/src/.llama/storage/rag/kv_store.db"
+        - name: SQL_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/sql_store.db"
+        - name: KSVC_URL
+          valueFrom:
+            secretKeyRef:
+              name: api-url-secret
+              key: key
+        - name: VLLM_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: vllm-api-key-secret
+              key: key
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-3.1-8B-Instruct"
+        - name: OPENAI_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: openai-api-key-secret
+              key: key
+              optional: true
+        - name: E2E_OPENAI_MODEL
+          value: "gpt-4o-mini"
+        - name: LLAMA_STACK_CONFIG
+          value: "/opt/app-root/src/.llama/storage/run.yaml"
+        - name: FAISS_VECTOR_STORE_ID
+          valueFrom:
+            secretKeyRef:
+              name: faiss-vector-store-secret
+              key: id
+        - name: E2E_LLAMA_HOSTNAME
+          valueFrom:
+            secretKeyRef:
+              name: llama-stack-ip-secret
+              key: key
+      command:
+        - /bin/bash
+        - -c
+        - |
+          set -e
+          RAG_SEED="/opt/app-root/src/.llama/storage/.e2e-rag-seed/kv_store.db"
+          RAG_CM_GZ="/opt/app-root/rag-data-cm/kv_store.db.gz"
+          RAG_WORK="${KV_RAG_PATH:-/opt/app-root/src/.llama/storage/rag/kv_store.db}"
+          restore_rag_seed() {
+            mkdir -p "$(dirname "$RAG_WORK")"
+            if [[ -f "$RAG_CM_GZ" ]]; then
+              RAG_WORK="$RAG_WORK" RAG_CM_GZ="$RAG_CM_GZ" python3 -c 'import gzip, os, shutil, sys; r, g = os.environ["RAG_WORK"], os.environ["RAG_CM_GZ"]; t = r + ".tmp"; i = gzip.open(g, "rb"); o = open(t, "wb"); shutil.copyfileobj(i, o); i.close(); o.close(); sz = os.path.getsize(t); (sz >= 1048576) or (print("FATAL: RAG from ConfigMap too small:", sz, file=sys.stderr) or sys.exit(1)); os.replace(t, r); os.chmod(r, 0o664)' || exit 1
+            elif [[ -f "$RAG_SEED" ]]; then
+              cp -f "$RAG_SEED" "$RAG_WORK"
+              chmod 664 "$RAG_WORK" 2>/dev/null || true
+            fi
+          }
+          restore_rag_seed
+          INPUT_CONFIG="${LLAMA_STACK_CONFIG:-/opt/app-root/run.yaml}"
+          ENRICHED_CONFIG="/opt/app-root/run.yaml"
+          LIGHTSPEED_CONFIG="${LIGHTSPEED_CONFIG:-/opt/app-root/lightspeed-stack.yaml}"
+          ENV_FILE="/opt/app-root/.env"
+          if [[ -f "$LIGHTSPEED_CONFIG" ]]; then
+            echo "Enriching llama-stack config..."
+            ENRICHMENT_FAILED=0
+            python3 /opt/app-root/src/llama_stack_configuration.py \
+              -c "$LIGHTSPEED_CONFIG" \
+              -i "$INPUT_CONFIG" \
+              -o "$ENRICHED_CONFIG" \
+              -e "$ENV_FILE" 2>&1 || ENRICHMENT_FAILED=1
+            if [[ -f "$ENV_FILE" ]]; then
+              set -a && . "$ENV_FILE" && set +a
+            fi
+            if [[ -f "$ENRICHED_CONFIG" ]] && [[ "$ENRICHMENT_FAILED" -eq 0 ]]; then
+              echo "Using enriched config: $ENRICHED_CONFIG"
+              restore_rag_seed
+              exec llama stack run "$ENRICHED_CONFIG"
+            fi
+          fi
+          echo "Using original config: $INPUT_CONFIG"
+          restore_rag_seed
+          exec llama stack run "$INPUT_CONFIG"
+      ports:
+        - containerPort: 8321
+      readinessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 20
+        periodSeconds: 5
+        failureThreshold: 36
+      livenessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 120
+        periodSeconds: 20
+        failureThreshold: 3
+      volumeMounts:
+        - name: rag-storage
+          mountPath: /opt/app-root/src/.llama/storage
+        - name: lightspeed-config
+          mountPath: /opt/app-root/lightspeed-stack.yaml
+          subPath: lightspeed-stack.yaml
+          readOnly: true
+        - name: rag-data
+          mountPath: /opt/app-root/rag-data-cm
+          readOnly: true
+  volumes:
+    - name: rag-storage
+      emptyDir: {}
+    - name: config-cm
+      configMap:
+        name: llama-stack-config
+    - name: lightspeed-config
+      configMap:
+        name: lightspeed-stack-config
+    - name: rag-data
+      configMap:
+        name: rag-data
@@ -2,7 +2,6 @@ apiVersion: datasciencecluster.opendatahub.io/v1
 kind: DataScienceCluster
 metadata:
   name: default-dsc
-  namespace: e2e-rhoai-dsc
 spec:
   serviceMesh:
     managementState: Managed
 
@@ -24,7 +24,7 @@ spec:
         - --port
         - "8080"
         - --max-model-len
-        - "2048"
+        - "32768"
       image: quay.io/rh-ee-cpompeia/vllm-cpu:latest
       name: kserve-container
       env:
 
@@ -24,7 +24,7 @@ spec:
         - --port
         - "8080"
         - --max-model-len
-        - "2048"
+        - "32768"
         - --gpu-memory-utilization
         - "0.9"
       image: ${VLLM_IMAGE}
 
@@ -1,27 +1,30 @@
 #!/bin/bash
 
 BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
 
-# Deploy llama-stack
-envsubst < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -f -
+# Create llama-stack-ip-secret before deploying the pod (it references the secret as an env var)
+export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.${NAMESPACE}.svc.cluster.local"
+oc create secret generic llama-stack-ip-secret \
+    --from-literal=key="$E2E_LLAMA_HOSTNAME" \
+    -n "$NAMESPACE" 2>/dev/null || echo "Secret llama-stack-ip-secret exists"
+
+# Deploy llama-stack (substitute only LLAMA_STACK_IMAGE, leave other ${} intact)
+envsubst '${LLAMA_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/llama-stack-prow.yaml" | oc apply -n "$NAMESPACE" -f -
 
 oc wait pod/llama-stack-service \
-  -n e2e-rhoai-dsc --for=condition=Ready --timeout=600s
+  -n "$NAMESPACE" --for=condition=Ready --timeout=600s
 
-# Get url address of llama-stack pod
-oc label pod llama-stack-service pod=llama-stack-service -n e2e-rhoai-dsc
+# Expose llama-stack service
+oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE"
 
 oc expose pod llama-stack-service \
   --name=llama-stack-service-svc \
   --port=8321 \
   --type=ClusterIP \
-  -n e2e-rhoai-dsc
-
-export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.e2e-rhoai-dsc.svc.cluster.local"
-
-oc create secret generic llama-stack-ip-secret \
-    --from-literal=key="$E2E_LLAMA_HOSTNAME" \
-    -n e2e-rhoai-dsc || echo "Secret exists"
+  -n "$NAMESPACE"
 
-# Deploy lightspeed-stack
-oc apply -f "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml"
+# Deploy lightspeed-stack (substitute only LIGHTSPEED_STACK_IMAGE, leave other ${} intact)
+LIGHTSPEED_STACK_IMAGE="${LIGHTSPEED_STACK_IMAGE:-quay.io/lightspeed-core/lightspeed-stack:dev-latest}"
+export LIGHTSPEED_STACK_IMAGE
+envsubst '${LIGHTSPEED_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml" | oc apply -n "$NAMESPACE" -f -