Skip to content

Commit 4481123

Browse files
committed
Add k8s smoke test and update Makefile and documentation
- Introduced a new `k8s-smoke-generic` target in the Makefile for running Kubernetes smoke tests. - Updated README.md to include instructions for the new `k8s-smoke-generic` target. - Enhanced `generic-e2e.md` documentation to reflect the addition of the Kubernetes smoke test. - Added `ensure_kind` function in `acceptance_common.py` to manage KIND binary for Kubernetes testing. - Created `live_k8s_smoke.py` script to implement the live Kubernetes smoke testing functionality.
1 parent 360f50b commit 4481123

6 files changed

Lines changed: 312 additions & 3 deletions

File tree

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
PYTHON ?= python
22

3-
.PHONY: lint validate smoke-generic e2e-generic smoke-otel-demo e2e-otel-demo package-assets chart-package release-dry-run stack-manifest
3+
.PHONY: lint validate smoke-generic e2e-generic k8s-smoke-generic smoke-otel-demo e2e-otel-demo package-assets chart-package release-dry-run stack-manifest
44

55
lint:
66
$(PYTHON) scripts/tasks.py lint
@@ -14,6 +14,9 @@ smoke-generic:
1414
e2e-generic:
1515
$(PYTHON) scripts/tasks.py e2e-generic
1616

17+
k8s-smoke-generic:
18+
$(PYTHON) scripts/tasks.py k8s-smoke-generic
19+
1720
smoke-otel-demo:
1821
$(PYTHON) scripts/tasks.py smoke-otel-demo
1922

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ make lint
6565
make validate
6666
make smoke-generic
6767
make e2e-generic
68+
make k8s-smoke-generic
6869
make smoke-otel-demo
6970
make e2e-otel-demo
7071
make stack-manifest

docs/verification/generic-e2e.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ This is the required non-OTel acceptance path for the repository.
44

55
```bash
66
make e2e-generic
7+
make k8s-smoke-generic
78
```
89

910
What it checks:
@@ -13,5 +14,6 @@ What it checks:
1314
- stable artifact emission from Bering
1415
- Sheaft batch and serve consumption of that artifact
1516
- report generation and persisted history output
17+
- optional live-cluster install smoke through `kind` with the same generic synthetic profile
1618

1719
If this path fails, generic stack readiness is not proven even if the OTel Demo profile still passes.

scripts/acceptance_common.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
HELM_VERSION = "v3.16.4"
2020
OTEL_DEMO_CHART_VERSION = "0.40.5"
21+
KIND_VERSION = "v0.31.0"
2122
BERING_VERSION = "0.1.0"
2223
SHEAFT_VERSION = "0.1.1"
2324

@@ -42,6 +43,8 @@ def run(
4243
cwd=cwd,
4344
env=env,
4445
text=True,
46+
encoding="utf-8",
47+
errors="replace",
4548
capture_output=capture,
4649
check=False,
4750
)
@@ -122,8 +125,7 @@ def ensure_helm() -> Path:
122125
return binary_path
123126

124127

125-
def ensure_release_binary(product: str, version: str) -> Path:
126-
os_name, arch = current_platform()
128+
def ensure_release_binary_for_platform(product: str, version: str, os_name: str, arch: str) -> Path:
127129
extension = "zip" if os_name == "windows" else "tar.gz"
128130
archive_name = f"{product}_{version}_{os_name}_{arch}.{extension}"
129131
archive_path = TOOLS_DIR / archive_name
@@ -149,6 +151,41 @@ def ensure_release_binary(product: str, version: str) -> Path:
149151
return matches[0]
150152

151153

154+
def ensure_release_binary(product: str, version: str) -> Path:
155+
os_name, arch = current_platform()
156+
return ensure_release_binary_for_platform(product, version, os_name, arch)
157+
158+
159+
def ensure_kind() -> Path:
160+
explicit = os_environ().get("KIND_BIN")
161+
if explicit:
162+
kind = Path(explicit)
163+
check(kind.exists(), f"KIND_BIN does not exist: {kind}")
164+
return kind
165+
166+
found = shutil.which("kind")
167+
if found:
168+
return Path(found)
169+
170+
os_name, arch = current_platform()
171+
asset_name = f"kind-{os_name}-{arch}"
172+
if os_name != "windows":
173+
binary_name = "kind"
174+
else:
175+
binary_name = "kind.exe"
176+
destination_dir = TOOLS_DIR / f"kind-{KIND_VERSION}-{os_name}-{arch}"
177+
binary_path = destination_dir / binary_name
178+
if binary_path.exists():
179+
return binary_path
180+
destination_dir.mkdir(parents=True, exist_ok=True)
181+
print(f"[download] kind {KIND_VERSION}", flush=True)
182+
download(f"https://github.com/kubernetes-sigs/kind/releases/download/{KIND_VERSION}/{asset_name}", binary_path)
183+
if os_name != "windows":
184+
binary_path.chmod(0o755)
185+
check(binary_path.exists(), f"kind binary not found after download: {binary_path}")
186+
return binary_path
187+
188+
152189
def helm_env(helm_root: Path) -> dict[str, str]:
153190
env = os_environ()
154191
repo_dir = helm_root.parent / "helm-repo"

scripts/live_k8s_smoke.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
from __future__ import annotations
2+
3+
import argparse
4+
import json
5+
import os
6+
import shutil
7+
import subprocess
8+
import sys
9+
import time
10+
import urllib.request
11+
from pathlib import Path
12+
13+
from acceptance_common import (
14+
BERING_VERSION,
15+
ROOT,
16+
SHEAFT_VERSION,
17+
check,
18+
ensure_helm,
19+
ensure_kind,
20+
ensure_release_binary_for_platform,
21+
helm_env,
22+
synthetic_otlp_payload,
23+
wait_for_http,
24+
)
25+
26+
PROFILE_VALUES = ROOT / "examples" / "profiles" / "synthetic-otlp" / "values.yaml"
27+
CHART_DIR = ROOT / "charts" / "mb3r-stack"
28+
WORKDIR = ROOT / ".tmp" / "live-k8s-smoke"
29+
CLUSTER_NAME = "mb3r-stack-smoke"
30+
NAMESPACE = "mb3r-smoke"
31+
RELEASE_NAME = "mb3r"
32+
33+
34+
def run(
35+
command: list[str],
36+
*,
37+
env: dict[str, str] | None = None,
38+
capture: bool = True,
39+
) -> subprocess.CompletedProcess[str]:
40+
result = subprocess.run(
41+
command,
42+
cwd=ROOT,
43+
env=env,
44+
text=True,
45+
encoding="utf-8",
46+
errors="replace",
47+
capture_output=capture,
48+
check=False,
49+
)
50+
if result.returncode != 0:
51+
raise RuntimeError(
52+
f"command failed ({result.returncode}): {' '.join(command)}\n"
53+
f"stdout:\n{result.stdout}\n"
54+
f"stderr:\n{result.stderr}"
55+
)
56+
return result
57+
58+
59+
def write_text(path: Path, content: str) -> None:
60+
path.parent.mkdir(parents=True, exist_ok=True)
61+
path.write_text(content, encoding="utf-8")
62+
63+
64+
def build_local_image(product: str, version: str, image_ref: str) -> str:
65+
build_dir = WORKDIR / "images" / product
66+
build_dir.mkdir(parents=True, exist_ok=True)
67+
binary_path = ensure_release_binary_for_platform(product, version, "linux", "amd64")
68+
target_binary = build_dir / product
69+
shutil.copyfile(binary_path, target_binary)
70+
dockerfile = (
71+
"FROM debian:bookworm-slim\n"
72+
f"COPY {product} /usr/local/bin/{product}\n"
73+
f"RUN chmod +x /usr/local/bin/{product}\n"
74+
f'ENTRYPOINT ["/usr/local/bin/{product}"]\n'
75+
)
76+
write_text(build_dir / "Dockerfile", dockerfile)
77+
run(["docker", "build", "-t", image_ref, str(build_dir)])
78+
return image_ref
79+
80+
81+
def wait_for_json(url: str, *, attempts: int = 120, delay: float = 1.0) -> dict[str, object]:
82+
last_error = ""
83+
for _ in range(attempts):
84+
try:
85+
with urllib.request.urlopen(url, timeout=3) as response:
86+
return json.loads(response.read().decode("utf-8"))
87+
except Exception as exc:
88+
last_error = str(exc)
89+
time.sleep(delay)
90+
raise RuntimeError(f"timed out waiting for JSON response from {url}: {last_error}")
91+
92+
93+
def wait_for_port_forward(url: str) -> None:
94+
wait_for_http(url, attempts=120, delay=0.5)
95+
96+
97+
def pod_name() -> str:
98+
result = run(
99+
[
100+
"kubectl",
101+
"get",
102+
"pods",
103+
"-n",
104+
NAMESPACE,
105+
"-l",
106+
"app.kubernetes.io/component=bering",
107+
"-o",
108+
"jsonpath={.items[0].metadata.name}",
109+
]
110+
)
111+
name = result.stdout.strip()
112+
check(name != "", "failed to resolve live smoke pod name")
113+
return name
114+
115+
116+
def wait_for_container_ready(pod: str, container_name: str, *, timeout_seconds: int = 240) -> None:
117+
deadline = time.time() + timeout_seconds
118+
while time.time() < deadline:
119+
result = run(["kubectl", "get", "pod", pod, "-n", NAMESPACE, "-o", "json"], capture=True)
120+
payload = json.loads(result.stdout)
121+
for status in payload.get("status", {}).get("containerStatuses", []):
122+
if status.get("name") == container_name and status.get("ready") is True:
123+
return
124+
time.sleep(2)
125+
raise RuntimeError(f"timed out waiting for container {container_name} in pod {pod} to become ready")
126+
127+
128+
def wait_for_service_endpoints(service_name: str, *, timeout_seconds: int = 180) -> None:
129+
deadline = time.time() + timeout_seconds
130+
while time.time() < deadline:
131+
result = run(["kubectl", "get", "endpoints", service_name, "-n", NAMESPACE, "-o", "json"], capture=True)
132+
payload = json.loads(result.stdout)
133+
subsets = payload.get("subsets") or []
134+
if any(subset.get("addresses") for subset in subsets):
135+
return
136+
time.sleep(2)
137+
raise RuntimeError(f"timed out waiting for endpoints on service {service_name}")
138+
139+
140+
def post_trace_payload() -> None:
141+
request = urllib.request.Request(
142+
"http://127.0.0.1:14318/v1/traces",
143+
data=json.dumps(synthetic_otlp_payload()).encode("utf-8"),
144+
headers={"Content-Type": "application/json"},
145+
method="POST",
146+
)
147+
with urllib.request.urlopen(request, timeout=10) as response:
148+
check(response.status == 200, "Bering live-cluster OTLP/HTTP endpoint rejected the trace payload")
149+
150+
151+
def cleanup(kind_bin: Path, keep_cluster: bool, port_forwards: list[subprocess.Popen[str]]) -> None:
152+
for process in port_forwards:
153+
process.terminate()
154+
try:
155+
process.wait(timeout=5)
156+
except subprocess.TimeoutExpired:
157+
process.kill()
158+
process.wait(timeout=5)
159+
160+
if not keep_cluster:
161+
try:
162+
run([str(kind_bin), "delete", "cluster", "--name", CLUSTER_NAME])
163+
except Exception:
164+
pass
165+
166+
if WORKDIR.exists():
167+
shutil.rmtree(WORKDIR, ignore_errors=True)
168+
169+
170+
def main() -> int:
171+
parser = argparse.ArgumentParser(description="Run a live generic Kubernetes smoke against a kind cluster.")
172+
parser.add_argument("--keep-cluster", action="store_true", help="Preserve the kind cluster after the run.")
173+
args = parser.parse_args()
174+
175+
keep_cluster = args.keep_cluster or os.environ.get("MB3R_KEEP_CLUSTER") == "1"
176+
kind_bin = ensure_kind()
177+
helm_bin = ensure_helm()
178+
helm_environment = helm_env(helm_bin)
179+
port_forwards: list[subprocess.Popen[str]] = []
180+
181+
if WORKDIR.exists():
182+
shutil.rmtree(WORKDIR, ignore_errors=True)
183+
WORKDIR.mkdir(parents=True, exist_ok=True)
184+
185+
try:
186+
run([str(kind_bin), "delete", "cluster", "--name", CLUSTER_NAME])
187+
except Exception:
188+
pass
189+
190+
try:
191+
print("[cluster] create kind cluster", flush=True)
192+
run([str(kind_bin), "create", "cluster", "--name", CLUSTER_NAME, "--wait", "180s"], capture=True)
193+
194+
print("[images] build and load local Bering and Sheaft images", flush=True)
195+
bering_image = build_local_image("bering", BERING_VERSION, "mb3r-local/bering:live-smoke")
196+
sheaft_image = build_local_image("sheaft", SHEAFT_VERSION, "mb3r-local/sheaft:live-smoke")
197+
run([str(kind_bin), "load", "docker-image", "--name", CLUSTER_NAME, bering_image, sheaft_image])
198+
199+
print("[deploy] install generic synthetic profile", flush=True)
200+
run(
201+
[
202+
str(helm_bin),
203+
"upgrade",
204+
"--install",
205+
RELEASE_NAME,
206+
str(CHART_DIR),
207+
"--namespace",
208+
NAMESPACE,
209+
"--create-namespace",
210+
"-f",
211+
str(PROFILE_VALUES),
212+
"--set-string",
213+
"bering.image.repository=mb3r-local/bering",
214+
"--set-string",
215+
"bering.image.tag=live-smoke",
216+
"--set-string",
217+
"bering.image.digest=",
218+
"--set-string",
219+
"sheaft.image.repository=mb3r-local/sheaft",
220+
"--set-string",
221+
"sheaft.image.tag=live-smoke",
222+
"--set-string",
223+
"sheaft.image.digest=",
224+
],
225+
env=helm_environment,
226+
)
227+
228+
pod = pod_name()
229+
wait_for_container_ready(pod, "bering")
230+
231+
print("[verify] port-forward live smoke pod", flush=True)
232+
port_forward_log = (WORKDIR / "pod-port-forward.log").open("w", encoding="utf-8")
233+
port_forwards.append(
234+
subprocess.Popen(
235+
["kubectl", "port-forward", f"pod/{pod}", "14318:4318", "18080:8080", "-n", NAMESPACE],
236+
cwd=ROOT,
237+
stdout=port_forward_log,
238+
stderr=subprocess.STDOUT,
239+
text=True,
240+
)
241+
)
242+
243+
wait_for_port_forward("http://127.0.0.1:14318/readyz")
244+
245+
print("[verify] post synthetic OTLP payload", flush=True)
246+
post_trace_payload()
247+
248+
print("[verify] wait for Sheaft report", flush=True)
249+
report = wait_for_json("http://127.0.0.1:18080/current-report")
250+
decision = report["policy_evaluation"]["decision"]
251+
check(decision in {"warn", "pass", "fail", "report"}, "Sheaft live-cluster current-report is malformed")
252+
wait_for_service_endpoints("bering-discovery")
253+
wait_for_service_endpoints("sheaft-reports")
254+
255+
print("k8s-smoke-generic: ok")
256+
return 0
257+
except Exception as exc:
258+
print(f"k8s-smoke-generic: failed: {exc}", file=sys.stderr)
259+
return 1
260+
finally:
261+
cleanup(kind_bin, keep_cluster, port_forwards)
262+
263+
264+
if __name__ == "__main__":
265+
raise SystemExit(main())

scripts/tasks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"validate": [["validate.py"]],
1414
"smoke-generic": [["acceptance_generic.py", "--mode", "smoke"]],
1515
"e2e-generic": [["acceptance_generic.py", "--mode", "e2e"]],
16+
"k8s-smoke-generic": [["live_k8s_smoke.py"]],
1617
"smoke-otel-demo": [["acceptance_otel_demo.py", "--mode", "smoke"]],
1718
"e2e-otel-demo": [["acceptance_otel_demo.py", "--mode", "e2e"]],
1819
"package-assets": [["package_assets.py"]],

0 commit comments

Comments
 (0)