Skip to content

Commit 7a47dc4

Browse files
committed
test: add testing to original
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
1 parent 000baf4 commit 7a47dc4

21 files changed

Lines changed: 708 additions & 52 deletions

.dockerignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
README.md
12
bin/
23
.git/
3-
*.tar.gz
4+
*.tar.gz

.github/workflows/e2e-tests.yaml

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
name: e2e
2+
on:
3+
pull_request:
4+
branches: [main]
5+
push:
6+
branches: [main]
7+
concurrency:
8+
group: ${{ github.workflow }}-${{ github.ref }}
9+
cancel-in-progress: true
10+
11+
env:
12+
KIND_VERSION: v0.32.0
13+
IMAGE: ghcr.io/converged-computing/fluence:latest
14+
15+
jobs:
16+
e2e:
17+
runs-on: ubuntu-latest
18+
steps:
19+
- name: Checkout
20+
uses: actions/checkout@v4
21+
22+
#- name: Set up Docker Buildx
23+
# uses: docker/setup-buildx-action@v3
24+
25+
# - name: Build fluence image
26+
# uses: docker/build-push-action@v6
27+
# with:
28+
# context: .
29+
# file: ./Dockerfile
30+
# push: false
31+
# load: true
32+
# tags: ${{ env.IMAGE }}
33+
# cache-from: type=gha
34+
# cache-to: type=gha,mode=max
35+
- name: Create k8s Kind Cluster
36+
uses: helm/kind-action@v1.10.0
37+
with:
38+
version: v0.32.0 # Define your custom KinD CLI version here
39+
node_image: kindest/node:v1.36.1
40+
config: ./deploy/kind-config.yaml
41+
42+
- name: Free Disk Space (Ubuntu)
43+
run: |
44+
echo "=== Disk space before cleanup ==="
45+
df -h
46+
47+
# Remove large software runtimes and tools
48+
sudo rm -rf /usr/share/dotnet
49+
sudo rm -rf /usr/local/lib/android
50+
sudo rm -rf /opt/ghc
51+
sudo rm -rf /opt/hostedtoolcache/CodeQL
52+
53+
# Clean package caches
54+
sudo apt-get clean
55+
echo "=== Disk space after cleanup ==="
56+
df -h
57+
58+
- name: Deploy fluence (base)
59+
run: |
60+
kubectl apply -f deploy/fluence-test.yaml
61+
kubectl rollout status -n kube-system deployment/fluence --timeout=180s
62+
POD=$(kubectl -n kube-system get pods -l app=fluence -o name | head -1)
63+
kubectl -n kube-system exec "${POD#pod/}" -- ls /tmp/
64+
kubectl -n kube-system logs "${POD#pod/}"
65+
kubectl -n kube-system exec "${POD#pod/}" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
66+
kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{": cpu="}{.status.allocatable.cpu}{" mem="}{.status.allocatable.memory}{"\n"}{end}'
67+
68+
- name: E2E - classical gang
69+
run: bash test/e2e/01-classical-gang.sh
70+
71+
- name: Deploy quantum add-on
72+
run: |
73+
# Includes the device plugin and oriented to testing container
74+
kubectl apply -f deploy/fluence-resources-test.yaml
75+
kubectl rollout restart -n kube-system deployment/fluence
76+
kubectl rollout status -n kube-system deployment/fluence --timeout=60s
77+
for i in $(seq 1 60); do
78+
kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}'
79+
kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}' | grep -q 'fluxion.flux-framework.org/qpu' && break
80+
sleep 1
81+
done
82+
POD=$(kubectl -n kube-system get pods -l app=fluence -o name | head -1)
83+
kubectl -n kube-system exec "${POD#pod/}" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
84+
85+
- name: Wait for webhook
86+
run: |
87+
88+
# wait for the deployment AND for the caBundle to be populated on the webhook config
89+
kubectl -n kube-system rollout status deployment/fluence-webhook --timeout=120s
90+
for i in $(seq 1 30); do
91+
cab=$(kubectl get mutatingwebhookconfiguration fluence-webhook \
92+
-o jsonpath='{.webhooks[0].clientConfig.caBundle}' 2>/dev/null)
93+
[ -n "$cab" ] && break
94+
sleep 2
95+
done
96+
# let TLS serving settle after caBundle patch
97+
sleep 3
98+
99+
- name: E2E - quantum placement
100+
run: bash test/e2e/02-quantum-placement.sh
101+
102+
#- name: E2E - restart recovery (no double-book)
103+
# run: bash test/e2e/03-restart-recovery.sh
104+
105+
- name: Dump diagnostics on failure
106+
if: failure()
107+
run: |
108+
kubectl get pods -A -o wide
109+
kubectl logs -n kube-system deployment/fluence

Dockerfile

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,5 @@
1-
# Mr. Fluence!
2-
# Multi-stage build for the fluence scheduler.
3-
# The scheduler binary cgo-links flux-sched (Fluxion) for resource matching.
1+
FROM ghcr.io/converged-computing/fluence-base:latest AS builder
42

5-
FROM fluxrm/flux-core:noble AS builder
6-
7-
USER root
8-
ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/lib
9-
ENV DEBIAN_FRONTEND=noninteractive
10-
11-
RUN apt-get update && apt-get install -y --no-install-recommends \
12-
libboost-graph-dev libboost-system-dev libboost-filesystem-dev \
13-
libboost-regex-dev libyaml-cpp-dev libedit-dev libczmq-dev \
14-
python3-yaml ninja-build cmake curl git wget ca-certificates \
15-
&& rm -rf /var/lib/apt/lists/*
16-
17-
# Go toolchain
18-
RUN wget -q https://go.dev/dl/go1.26.0.linux-amd64.tar.gz \
19-
&& tar -C /usr/local -xzf go1.26.0.linux-amd64.tar.gz && rm go1.26.0.linux-amd64.tar.gz
20-
ENV PATH=$PATH:/usr/local/go/bin
21-
22-
# flux-sched (Fluxion) with the Go reapi bindings -> /usr; build tree at /opt/flux-sched
23-
#RUN git clone https://github.com/flux-framework/flux-sched /opt/flux-sched \
24-
RUN git clone -b implement-reapi-cli-update-allocate https://github.com/vsoch/flux-sched /opt/flux-sched \
25-
&& export FLUX_SCHED_VERSION=0.53.0 \
26-
&& cd /opt/flux-sched && export WITH_GO=yes && ./configure --prefix=/usr \
27-
&& mkdir build && cd build && cmake ../ && cd ../ && make -j"$(nproc)" && make install
28-
ENV FLUX_SCHED_ROOT=/opt/flux-sched
29-
30-
# Build the scheduler
313
WORKDIR /src
324
COPY go.mod go.sum* ./
335
RUN go mod download || true

Makefile

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,25 @@ test-restore:
4343
image: ## Build the scheduler container image
4444
docker build -t $(IMG) .
4545

46+
.PHONY: test-image
47+
test-image: ## Build the scheduler container image
48+
docker build -t $(IMG)-test .
49+
docker push $(IMG)-test
50+
51+
.PHONY: test-image-deploy
52+
test-image-deploy: test-image
53+
kubectl patch podgroup training -n default --type=merge -p '{"metadata":{"finalizers":null}}' || true
54+
kubectl delete deployments --all
55+
kubectl delete pods --all
56+
kubectl delete -f deploy/fluence-test.yaml
57+
kubectl delete pods --all
58+
59+
4660
.PHONY: deploy
4761
deploy: ## Install RBAC + scheduler into kube-system
4862
kubectl apply -f deploy/fluence.yaml
4963

5064
.PHONY: help
5165
help:
5266
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
53-
awk 'BEGIN{FS=":.*?## "}{printf " %-14s %s\n", $$1, $$2}'
67+
awk 'BEGIN{FS=":.*?## "}{printf " %-14s %s\n", $$1, $$2}'

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
![img/fluence.png](img/fluence.png)
44

5+
🚧 **UNDER DEVELOPMENT** 🚧 not ready for production use! I rolled back features since the recorded demo, and am going to add them back with proper testing. I have not finished this yet, but anticipate later in the week of 6/16/2026. Thank you for your patience! -@vsoch
6+
57
A Kubernetes scheduler plugin that places **pod groups** (and individual pods)
68
by matching them against a [Fluxion](https://github.com/flux-framework/flux-sched)
79
(flux-sched) resource graph built from the live cluster.

deploy/fluence-resources-test.yaml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Resources add-on for fluence. Turns on fluence-managed resources by supplying
2+
# (1) the resources config and (2) the device plugin that advertises them.
3+
# Quantum backends are just the example payload here; any resource type fluence
4+
# can model goes in the same ConfigMap. Apply AFTER deploy/fluence.yaml:
5+
#
6+
# kubectl apply -f deploy/fluence.yaml # base scheduler (no devices)
7+
# kubectl apply -f deploy/fluence-resources.yaml # + resources config + device plugin
8+
# kubectl rollout restart deployment/fluence -n kube-system # scheduler re-reads resources
9+
#
10+
# The base scheduler already mounts the `fluence-resources` ConfigMap optionally
11+
# and reads FLUENCE_RESOURCES, so this add-on is purely additive — no edits to
12+
# the base Deployment.
13+
14+
# Resources config: the SINGLE source of truth for the resource types fluence
15+
# injects/advertises. The scheduler builds qpu/qubit graph vertices from it; the
16+
# device plugin derives which extended resources to advertise from the SAME
17+
# document (same rule), so the two cannot drift.
18+
apiVersion: v1
19+
kind: ConfigMap
20+
metadata:
21+
name: fluence-resources
22+
namespace: kube-system
23+
data:
24+
resources.yaml: |
25+
backends:
26+
- name: ibm_fez
27+
num_qubits: 156
28+
vendor: ibm
29+
qrmi_type: qiskit-runtime-service
30+
- name: ibm_marrakesh
31+
num_qubits: 156
32+
vendor: ibm
33+
qrmi_type: qiskit-runtime-service
34+
---
35+
# Device plugin: advertises the exotic Fluxion resource types (derived from the
36+
# resources config above) on every node, so pods can request them via resources
37+
# and NodeResourcesFit is satisfied.
38+
apiVersion: apps/v1
39+
kind: DaemonSet
40+
metadata:
41+
name: fluence-deviceplugin
42+
namespace: kube-system
43+
labels: {app: fluence-deviceplugin}
44+
spec:
45+
selector:
46+
matchLabels: {app: fluence-deviceplugin}
47+
template:
48+
metadata:
49+
labels: {app: fluence-deviceplugin}
50+
spec:
51+
priorityClassName: system-node-critical
52+
tolerations:
53+
- operator: Exists # run on every node, including tainted/control-plane
54+
containers:
55+
- name: deviceplugin
56+
image: ghcr.io/converged-computing/fluence:test
57+
command: ["/bin/fluence-deviceplugin"]
58+
env:
59+
- name: FLUENCE_RESOURCES
60+
value: /etc/fluence/resources.yaml
61+
- name: FLUENCE_RESOURCE_CAPACITY
62+
value: "1000"
63+
securityContext:
64+
privileged: true
65+
volumeMounts:
66+
- name: device-plugin
67+
mountPath: /var/lib/kubelet/device-plugins
68+
- name: resources
69+
mountPath: /etc/fluence
70+
volumes:
71+
- name: device-plugin
72+
hostPath:
73+
path: /var/lib/kubelet/device-plugins
74+
- name: resources
75+
configMap:
76+
name: fluence-resources

0 commit comments

Comments
 (0)