Skip to content

Commit fbef706

Browse files
radofuchsRadovan Fuchscursoragent
authored
LCORE-2035- Add TLS fixes for konflux run (#1929)
* Add TLS fixes for konflux run Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Radovan Fuchs <rfuchs@rfuchs-thinkpadp1gen7.tpb.csb> Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 91d8ac7 commit fbef706

19 files changed

Lines changed: 1037 additions & 333 deletions

File tree

.tekton/integration-tests/pipeline/lightspeed-stack-integration-test.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ spec:
167167
echo "========== End parameters =========="
168168
- name: lightspeed-stack-integration-tests
169169
description: Task to run integration tests from lightspeed-stack repository
170+
# Full Behave suite (proxy + tls) can exceed 2h; needs PipelineRun timeouts >= this value.
171+
timeout: 3h
170172
params:
171173
- name: SNAPSHOT
172174
value: $(params.SNAPSHOT)
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Mock HTTPS OpenAI API for tls-*.feature (Konflux / Prow; no Docker Compose).
2+
# Llama Stack run.yaml uses https://e2e-mock-tls-inference.<ns>.svc.cluster.local:8443|8444|8445/v1
3+
apiVersion: v1
4+
kind: Pod
5+
metadata:
6+
name: e2e-mock-tls-inference
7+
labels:
8+
app: e2e-mock-tls-inference
9+
spec:
10+
securityContext:
11+
runAsNonRoot: true
12+
seccompProfile:
13+
type: RuntimeDefault
14+
containers:
15+
- name: e2e-mock-tls-inference
16+
image: python:3.12-slim
17+
securityContext:
18+
allowPrivilegeEscalation: false
19+
capabilities:
20+
drop: ["ALL"]
21+
runAsNonRoot: true
22+
runAsUser: 1000
23+
seccompProfile:
24+
type: RuntimeDefault
25+
env:
26+
- name: POD_NAMESPACE
27+
valueFrom:
28+
fieldRef:
29+
fieldPath: metadata.namespace
30+
- name: PYTHONPATH
31+
value: /app:/tmp/pydeps
32+
command:
33+
- /bin/sh
34+
- -c
35+
- |
36+
set -e
37+
pip install --quiet --no-cache-dir --target /tmp/pydeps 'trustme>=1.2.1' 'cryptography>=42.0.0'
38+
NS="${POD_NAMESPACE:-default}"
39+
export TLS_CERT_DNS_NAMES="mock-tls-inference,localhost,127.0.0.1,e2e-mock-tls-inference,e2e-mock-tls-inference.${NS}.svc.cluster.local"
40+
exec python /app/server.py
41+
ports:
42+
- containerPort: 8443
43+
name: tls
44+
- containerPort: 8444
45+
name: mtls
46+
- containerPort: 8445
47+
name: mismatch
48+
volumeMounts:
49+
- name: server-script
50+
mountPath: /app/server.py
51+
subPath: server.py
52+
readOnly: true
53+
- name: certs-work
54+
mountPath: /certs
55+
readinessProbe:
56+
exec:
57+
command:
58+
- python3
59+
- -c
60+
- |
61+
import ssl, urllib.request
62+
ctx = ssl.create_default_context()
63+
ctx.check_hostname = False
64+
ctx.verify_mode = ssl.CERT_NONE
65+
urllib.request.urlopen("https://localhost:8443/health", context=ctx)
66+
initialDelaySeconds: 8
67+
periodSeconds: 5
68+
livenessProbe:
69+
exec:
70+
command:
71+
- python3
72+
- -c
73+
- |
74+
import ssl, urllib.request
75+
ctx = ssl.create_default_context()
76+
ctx.check_hostname = False
77+
ctx.verify_mode = ssl.CERT_NONE
78+
urllib.request.urlopen("https://localhost:8443/health", context=ctx)
79+
initialDelaySeconds: 15
80+
periodSeconds: 20
81+
volumes:
82+
- name: server-script
83+
configMap:
84+
name: e2e-mock-tls-inference-script
85+
- name: certs-work
86+
emptyDir: {}
87+
---
88+
apiVersion: v1
89+
kind: Service
90+
metadata:
91+
name: e2e-mock-tls-inference
92+
spec:
93+
selector:
94+
app: e2e-mock-tls-inference
95+
ports:
96+
- name: tls
97+
port: 8443
98+
targetPort: tls
99+
- name: mtls
100+
port: 8444
101+
targetPort: mtls
102+
- name: mismatch
103+
port: 8445
104+
targetPort: mismatch

tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,28 @@ spec:
3838
- -c
3939
- |
4040
set -e
41+
# Fast-path: PVC already has a valid venv from a previous pod creation in this pipeline run.
42+
# TLS scenarios delete+recreate this pod up to 16 times; skipping the expensive install
43+
# reduces per-restart time from ~6-15 min to ~30-90 s (just RAG seed refresh + chown).
44+
if [[ -d /opt/app-root/.venv ]] \
45+
&& /opt/app-root/.venv/bin/python --version >/dev/null 2>&1 \
46+
&& [[ -d /opt/app-root/src ]]; then
47+
echo "PVC cache hit: app-root already provisioned — skipping full install"
48+
mkdir -p /opt/app-root/.e2e-rag-seed /opt/app-root/src/.llama/storage/rag /opt/app-root/src/.llama/storage/files
49+
if [[ -f /rag-seed/kv_store.db.gz ]]; then
50+
gzip -dc /rag-seed/kv_store.db.gz > /opt/app-root/.e2e-rag-seed/kv_store.db
51+
_sz=$(stat -c%s /opt/app-root/.e2e-rag-seed/kv_store.db)
52+
if [[ "${_sz}" -lt 1048576 ]]; then
53+
echo "FATAL: RAG seed too small (${_sz} bytes); check rag-data ConfigMap"
54+
exit 1
55+
fi
56+
cp -f /opt/app-root/.e2e-rag-seed/kv_store.db /opt/app-root/src/.llama/storage/rag/kv_store.db
57+
fi
58+
chmod -R 775 /opt/app-root && chown -R 1001:0 /opt/app-root
59+
echo "PVC fast-path complete"
60+
exit 0
61+
fi
62+
# Full provisioning (PVC is empty — first pod creation this pipeline run).
4163
REPO_URL="${REPO_URL:-https://github.com/lightspeed-core/lightspeed-stack.git}"
4264
REPO_REVISION="${REPO_REVISION:-main}"
4365
case "$REPO_URL" in git@github.com:*) REPO_URL="https://github.com/${REPO_URL#git@github.com:}"; esac
@@ -201,9 +223,14 @@ spec:
201223
mountPath: /tmp/interception-proxy-ca.pem
202224
subPath: ca.pem
203225
readOnly: true
226+
# tls-*.feature: client/CA PEMs from Secret e2e-mock-tls-certs (optional).
227+
- name: mock-tls-certs
228+
mountPath: /certs
229+
readOnly: true
204230
volumes:
205231
- name: app-root
206-
emptyDir: {}
232+
persistentVolumeClaim:
233+
claimName: llama-stack-app-root
207234
- name: config-cm
208235
configMap:
209236
name: llama-stack-config
@@ -217,3 +244,7 @@ spec:
217244
secret:
218245
secretName: e2e-interception-proxy-ca
219246
optional: true
247+
- name: mock-tls-certs
248+
secret:
249+
secretName: e2e-mock-tls-certs
250+
optional: true

tests/e2e-prow/rhoai/pipeline-konflux.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,25 @@ log "✅ Mock servers deployed"
143143
#========================================
144144
progress "Deploying lightspeed-stack and llama-stack"
145145

146+
# PVC for llama-stack app-root: caches dnf/uv/git install so TLS per-scenario pod
147+
# recreates skip the expensive init (~6-15 min → ~1-2 min). Delete first to guarantee
148+
# a fresh checkout for this pipeline revision; re-create immediately so the pod can bind.
149+
log "Recreating llama-stack-app-root PVC (fresh per pipeline run)..."
150+
oc delete pvc llama-stack-app-root -n "$NAMESPACE" --ignore-not-found=true 2>/dev/null || true
151+
cat <<'EOF' | oc apply -n "$NAMESPACE" -f -
152+
apiVersion: v1
153+
kind: PersistentVolumeClaim
154+
metadata:
155+
name: llama-stack-app-root
156+
spec:
157+
accessModes:
158+
- ReadWriteOnce
159+
resources:
160+
requests:
161+
storage: 10Gi
162+
EOF
163+
log "✅ llama-stack-app-root PVC created"
164+
146165
# Llama run config: single source with GitHub E2E (tests/e2e/configs/run-ci.yaml).
147166
# Lightspeed stack: same tree as local/docker E2E (tests/e2e/configuration/server-mode).
148167
oc create configmap llama-stack-config -n "$NAMESPACE" \
@@ -393,6 +412,7 @@ if [[ -n "${E2E_LLAMA_PORT_FORWARD_PID_FILE:-}" && -f "$E2E_LLAMA_PORT_FORWARD_P
393412
fi
394413
rm -f "$E2E_LLAMA_PORT_FORWARD_PID_FILE"
395414
fi
415+
396416
kill $PF_LCS_PID 2>/dev/null || true
397417
kill $PF_JWKS_PID 2>/dev/null || true
398418
kill $PF_LLAMA_PID 2>/dev/null || true

tests/e2e-prow/rhoai/pipeline-services-konflux.sh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,25 @@ oc create secret generic llama-stack-ip-secret \
2626
-n "$NAMESPACE" \
2727
--dry-run=client -o yaml | oc apply -f -
2828

29+
# PVC must exist before the pod (pipeline-konflux.sh creates it; guard here for standalone use).
30+
oc get pvc llama-stack-app-root -n "$NAMESPACE" >/dev/null 2>&1 || \
31+
oc apply -n "$NAMESPACE" -f - <<'PVCEOF'
32+
apiVersion: v1
33+
kind: PersistentVolumeClaim
34+
metadata:
35+
name: llama-stack-app-root
36+
spec:
37+
accessModes:
38+
- ReadWriteOnce
39+
resources:
40+
requests:
41+
storage: 10Gi
42+
PVCEOF
43+
2944
timeout 120 oc delete pod llama-stack-service -n "$NAMESPACE" --ignore-not-found=true --wait=true 2>/dev/null || true
3045
oc apply -n "$NAMESPACE" -f "$BASE_DIR/manifests/lightspeed/llama-stack-openai.yaml"
31-
oc wait pod/llama-stack-service -n "$NAMESPACE" --for=condition=Ready --timeout=600s
46+
# First boot runs the full init (dnf + git clone + uv sync ≈ 6-15 min); use a generous timeout.
47+
oc wait pod/llama-stack-service -n "$NAMESPACE" --for=condition=Ready --timeout=900s
3248
oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE"
3349
oc expose pod llama-stack-service --name=llama-stack-service-svc --port=8321 --type=ClusterIP -n "$NAMESPACE"
3450

0 commit comments

Comments
 (0)