From 95ed73245387d1bd85f7732d07fb5dce5c30af04 Mon Sep 17 00:00:00 2001 From: William Yao Date: Wed, 22 Apr 2026 15:36:54 -0700 Subject: [PATCH] Rename demo examples with descriptive names and add per-example documentation Signed-off-by: William Yao --- README.md | 82 ++++++++++--------- demo/README.md | 31 +++++++ demo/{gpu-test7.yaml => admin-access.yaml} | 38 +++++++-- ...est2.yaml => basic-multiple-requests.yaml} | 23 ++++-- ...=> basic-resourceclaim-opaque-config.yaml} | 30 +++++-- ....yaml => basic-resourceclaimtemplate.yaml} | 26 ++++-- ...basic-shared-claim-across-containers.yaml} | 24 ++++-- ...ml => basic-shared-claim-across-pods.yaml} | 29 +++++-- demo/{gpu-test8.yaml => cel-selector.yaml} | 25 ++++-- ...st6.yaml => initcontainer-shared-gpu.yaml} | 24 ++++-- demo/test-admin-access.sh | 16 ++-- .../helm/dra-example-driver/values.yaml | 2 +- test/e2e/e2e_setup_test.go | 16 ++-- test/e2e/e2e_test.go | 32 ++++---- 14 files changed, 280 insertions(+), 118 deletions(-) create mode 100644 demo/README.md rename demo/{gpu-test7.yaml => admin-access.yaml} (53%) rename demo/{gpu-test2.yaml => basic-multiple-requests.yaml} (57%) rename demo/{gpu-test5.yaml => basic-resourceclaim-opaque-config.yaml} (65%) rename demo/{gpu-test1.yaml => basic-resourceclaimtemplate.yaml} (58%) rename demo/{gpu-test3.yaml => basic-shared-claim-across-containers.yaml} (54%) rename demo/{gpu-test4.yaml => basic-shared-claim-across-pods.yaml} (53%) rename demo/{gpu-test8.yaml => cel-selector.yaml} (69%) rename demo/{gpu-test6.yaml => initcontainer-shared-gpu.yaml} (54%) diff --git a/README.md b/README.md index 9b221cad..783c6c5f 100644 --- a/README.md +++ b/README.md @@ -244,43 +244,40 @@ Next, deploy four example apps that demonstrate how `ResourceClaim`s, `ResourceClaimTemplate`s, and custom `GpuConfig` objects can be used to select and configure resources in various ways: ```bash -kubectl apply --filename=demo/gpu-test{1,2,3,4,5}.yaml +kubectl apply --filename=demo/basic-resourceclaimtemplate.yaml \ + --filename=demo/basic-multiple-requests.yaml \ + --filename=demo/basic-shared-claim-across-containers.yaml \ + --filename=demo/basic-shared-claim-across-pods.yaml \ + --filename=demo/basic-resourceclaim-opaque-config.yaml ``` And verify that they are coming up successfully: ```console $ kubectl get pod -A -NAMESPACE NAME READY STATUS RESTARTS AGE +NAMESPACE NAME READY STATUS RESTARTS AGE ... -gpu-test1 pod0 0/1 Pending 0 2s -gpu-test1 pod1 0/1 Pending 0 2s -gpu-test2 pod0 0/2 Pending 0 2s -gpu-test3 pod0 0/1 ContainerCreating 0 2s -gpu-test3 pod1 0/1 ContainerCreating 0 2s -gpu-test4 pod0 0/1 Pending 0 2s -gpu-test5 pod0 0/4 Pending 0 2s +basic-resourceclaimtemplate pod0 0/1 Pending 0 2s +basic-resourceclaimtemplate pod1 0/1 Pending 0 2s +basic-multiple-requests pod0 0/2 Pending 0 2s +basic-shared-claim-across-containers pod0 0/1 ContainerCreating 0 2s +basic-shared-claim-across-containers pod1 0/1 ContainerCreating 0 2s +basic-shared-claim-across-pods pod0 0/1 Pending 0 2s +basic-resourceclaim-opaque-config pod0 0/4 Pending 0 2s ... ``` -Use your favorite editor to look through each of the `gpu-test{1,2,3,4,5}.yaml` -files and see what they are doing. The semantics of each match the figure -below: - -![Demo Apps Figure](demo/demo-apps.png?raw=true "Semantics of the applications requesting resources from the example DRA resource driver.") +Use your favorite editor to look through each of the `basic-*.yaml` +files and see what they are doing. Then dump the logs of each app to verify that GPUs were allocated to them according to these semantics: ```bash -for example in $(seq 1 5); do \ - echo "gpu-test${example}:" - for pod in $(kubectl get pod -n gpu-test${example} --output=jsonpath='{.items[*].metadata.name}'); do \ - for ctr in $(kubectl get pod -n gpu-test${example} ${pod} -o jsonpath='{.spec.containers[*].name}'); do \ +for ns in basic-resourceclaimtemplate basic-multiple-requests basic-shared-claim-across-containers basic-shared-claim-across-pods basic-resourceclaim-opaque-config; do \ + echo "${ns}:" + for pod in $(kubectl get pod -n ${ns} --output=jsonpath='{.items[*].metadata.name}'); do \ + for ctr in $(kubectl get pod -n ${ns} ${pod} -o jsonpath='{.spec.containers[*].name}'); do \ echo "${pod} ${ctr}:" - if [ "${example}" -lt 3 ]; then - kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+=" | grep -v "RESOURCE_CLAIM" - else - kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+" | grep -v "RESOURCE_CLAIM" - fi + kubectl logs -n ${ns} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+" | grep -v "RESOURCE_CLAIM" done done echo "" @@ -289,18 +286,18 @@ done This should produce output similar to the following: ```bash -gpu-test1: +basic-resourceclaimtemplate: pod0 ctr0: declare -x GPU_DEVICE_6="gpu-6" pod1 ctr0: declare -x GPU_DEVICE_7="gpu-7" -gpu-test2: +basic-multiple-requests: pod0 ctr0: declare -x GPU_DEVICE_0="gpu-0" declare -x GPU_DEVICE_1="gpu-1" -gpu-test3: +basic-shared-claim-across-containers: pod0 ctr0: declare -x GPU_DEVICE_2="gpu-2" declare -x GPU_DEVICE_2_SHARING_STRATEGY="TimeSlicing" @@ -310,7 +307,7 @@ declare -x GPU_DEVICE_2="gpu-2" declare -x GPU_DEVICE_2_SHARING_STRATEGY="TimeSlicing" declare -x GPU_DEVICE_2_TIMESLICE_INTERVAL="Default" -gpu-test4: +basic-shared-claim-across-pods: pod0 ctr0: declare -x GPU_DEVICE_3="gpu-3" declare -x GPU_DEVICE_3_SHARING_STRATEGY="TimeSlicing" @@ -320,7 +317,7 @@ declare -x GPU_DEVICE_3="gpu-3" declare -x GPU_DEVICE_3_SHARING_STRATEGY="TimeSlicing" declare -x GPU_DEVICE_3_TIMESLICE_INTERVAL="Default" -gpu-test5: +basic-resourceclaim-opaque-config: pod0 ts-ctr0: declare -x GPU_DEVICE_4="gpu-4" declare -x GPU_DEVICE_4_SHARING_STRATEGY="TimeSlicing" @@ -353,14 +350,14 @@ This example driver includes support for the [DRA AdminAccess feature](https://k #### Usage Example -See `demo/gpu-test7.yaml` for a complete example. Key points: +See `demo/admin-access.yaml` for a complete example. Key points: 1. **Namespace**: Must have the `resource.kubernetes.io/admin-access` label set to create ResourceClaimTemplate and ResourceClaim with `adminAccess: true` for Kubernetes v1.34+. ```yaml apiVersion: v1 kind: Namespace metadata: - name: gpu-test7 + name: admin-access labels: resource.kubernetes.io/admin-access: "true" ``` @@ -399,22 +396,27 @@ This demonstration shows the end-to-end flow of the DRA AdminAccess feature. In Once you have verified everything is running correctly, delete all of the example apps: ```bash -kubectl delete --wait=false --filename=demo/gpu-test{1,2,3,4,5,7}.yaml +kubectl delete --wait=false --filename=demo/basic-resourceclaimtemplate.yaml \ + --filename=demo/basic-multiple-requests.yaml \ + --filename=demo/basic-shared-claim-across-containers.yaml \ + --filename=demo/basic-shared-claim-across-pods.yaml \ + --filename=demo/basic-resourceclaim-opaque-config.yaml \ + --filename=demo/admin-access.yaml ``` And wait for them to terminate: ```console $ kubectl get pod -A -NAMESPACE NAME READY STATUS RESTARTS AGE +NAMESPACE NAME READY STATUS RESTARTS AGE ... -gpu-test1 pod0 1/1 Terminating 0 31m -gpu-test1 pod1 1/1 Terminating 0 31m -gpu-test2 pod0 2/2 Terminating 0 31m -gpu-test3 pod0 1/1 Terminating 0 31m -gpu-test3 pod1 1/1 Terminating 0 31m -gpu-test4 pod0 1/1 Terminating 0 31m -gpu-test5 pod0 4/4 Terminating 0 31m -gpu-test7 pod0 1/1 Terminating 0 31m +basic-resourceclaimtemplate pod0 1/1 Terminating 0 31m +basic-resourceclaimtemplate pod1 1/1 Terminating 0 31m +basic-multiple-requests pod0 2/2 Terminating 0 31m +basic-shared-claim-across-containers pod0 1/1 Terminating 0 31m +basic-shared-claim-across-containers pod1 1/1 Terminating 0 31m +basic-shared-claim-across-pods pod0 1/1 Terminating 0 31m +basic-resourceclaim-opaque-config pod0 4/4 Terminating 0 31m +admin-access pod0 1/1 Terminating 0 31m ... ``` diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 00000000..9335f4b3 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,31 @@ +# Demo Examples + +This directory contains example workloads that demonstrate different ways to +request and configure devices using Dynamic Resource Allocation (DRA). + +Examples prefixed with `basic-` are a good starting point for +learning about DRA. + +Each example file has detailed comments at the top explaining what it +demonstrates, what output to expect, and the driver and cluster requirements. + +## Running Examples + +Each example can be run individually: + +```bash +kubectl apply -f demo/.yaml +``` + +To clean up: + +```bash +kubectl delete -f demo/.yaml +``` + +## Notes + +- The default Helm chart configures **8 GPUs** per node, which is enough to run + several examples simultaneously. +- Each example creates its own namespace, so examples don't interfere with + each other's resource names. diff --git a/demo/gpu-test7.yaml b/demo/admin-access.yaml similarity index 53% rename from demo/gpu-test7.yaml rename to demo/admin-access.yaml index 0bfdc0f4..63440b60 100644 --- a/demo/gpu-test7.yaml +++ b/demo/admin-access.yaml @@ -1,22 +1,46 @@ -# One Namespace with admin access label -# One pod with one container requesting all GPUs with admin access -# This demo shows the DRA admin access feature with DRA_ADMIN_ACCESS environment variable +# Example: DRA Admin Access +# +# One namespace with admin access label. +# One pod with one container requesting all GPUs with admin access. +# This demo shows the DRA admin access feature with DRA_ADMIN_ACCESS +# environment variable. +# +# Key requirements: +# - The namespace must have the label: +# resource.kubernetes.io/admin-access: "true" +# - The request must set adminAccess: true +# - "allocationMode: All" is used here to access all available GPUs on a Node. +# Admins typically require access to all devices on a node to perform +# maintenance or monitoring. +# +# Expected: The container has DRA_ADMIN_ACCESS=true and GPU_DEVICE env vars +# for all available GPUs. Check with: +# kubectl logs -n admin-access pod0 -c ctr0 | grep DRA_ADMIN_ACCESS +# kubectl logs -n admin-access pod0 -c ctr0 | grep GPU_DEVICE +# +# Driver requirements: +# Profile: gpu +# GPUs: all available on a Node (uses allocationMode: All) +# +# Cluster requirements: +# Kubernetes 1.34+ +# Feature gate: DRAAdminAccess --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test7 + name: admin-access labels: resource.kubernetes.io/admin-access: "true" --- apiVersion: resource.k8s.io/v1 kind: ResourceClaimTemplate metadata: - namespace: gpu-test7 + namespace: admin-access name: multiple-gpus-admin spec: - spec: + spec: devices: requests: - name: admin-gpu @@ -29,7 +53,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test7 + namespace: admin-access name: pod0 spec: containers: diff --git a/demo/gpu-test2.yaml b/demo/basic-multiple-requests.yaml similarity index 57% rename from demo/gpu-test2.yaml rename to demo/basic-multiple-requests.yaml index bd428cae..0a97dd61 100644 --- a/demo/gpu-test2.yaml +++ b/demo/basic-multiple-requests.yaml @@ -1,17 +1,30 @@ -# One pod, one container -# Asking for 2 distinct GPUs +# Example: One Pod, Two GPUs +# +# One pod, one container. +# Asking for 2 distinct GPUs. +# +# Expected: The container gets 2 different GPUs. Check with: +# kubectl logs -n basic-multiple-requests pod0 -c ctr0 | grep GPU_DEVICE +# The container should have 2 GPU_DEVICE env vars with distinct GPU IDs. +# +# Driver requirements: +# Profile: gpu +# GPUs: 2 +# +# Cluster requirements: +# Kubernetes 1.34+ --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test2 + name: basic-multiple-requests --- apiVersion: resource.k8s.io/v1 kind: ResourceClaimTemplate metadata: - namespace: gpu-test2 + namespace: basic-multiple-requests name: multiple-gpus spec: spec: @@ -28,7 +41,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test2 + namespace: basic-multiple-requests name: pod0 labels: app: pod diff --git a/demo/gpu-test5.yaml b/demo/basic-resourceclaim-opaque-config.yaml similarity index 65% rename from demo/gpu-test5.yaml rename to demo/basic-resourceclaim-opaque-config.yaml index 31c9a328..cf80465c 100644 --- a/demo/gpu-test5.yaml +++ b/demo/basic-resourceclaim-opaque-config.yaml @@ -1,17 +1,37 @@ -# One pod, 1 container -# Run as deployment with 1 replica +# Example: GPU Sharing Strategies (TimeSlicing + SpacePartitioning) +# +# One pod, four containers, two GPUs with custom GpuConfig: +# +# - ts-gpu: Configured with TimeSlicing (interval: Long). Two containers +# (ts-ctr0, ts-ctr1) share this GPU by taking turns. +# +# - sp-gpu: Configured with SpacePartitioning (partitionCount: 10). Two +# containers (sp-ctr0, sp-ctr1) each get a partition of this GPU. +# +# Expected: ts-ctr0 and ts-ctr1 share one GPU with SHARING_STRATEGY=TimeSlicing +# and TIMESLICE_INTERVAL=Long. sp-ctr0 and sp-ctr1 share a different GPU with +# SHARING_STRATEGY=SpacePartitioning and PARTITION_COUNT=10. Check with: +# kubectl logs -n basic-resourceclaim-opaque-config pod0 -c ts-ctr0 | grep GPU_DEVICE +# kubectl logs -n basic-resourceclaim-opaque-config pod0 -c sp-ctr0 | grep GPU_DEVICE +# +# Driver requirements: +# Profile: gpu +# GPUs: 2 +# +# Cluster requirements: +# Kubernetes 1.34+ --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test5 + name: basic-resourceclaim-opaque-config --- apiVersion: resource.k8s.io/v1 kind: ResourceClaimTemplate metadata: - namespace: gpu-test5 + namespace: basic-resourceclaim-opaque-config name: multiple-gpus spec: spec: @@ -49,7 +69,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test5 + namespace: basic-resourceclaim-opaque-config name: pod0 spec: containers: diff --git a/demo/gpu-test1.yaml b/demo/basic-resourceclaimtemplate.yaml similarity index 58% rename from demo/gpu-test1.yaml rename to demo/basic-resourceclaimtemplate.yaml index 53edba06..2dc097cb 100644 --- a/demo/gpu-test1.yaml +++ b/demo/basic-resourceclaimtemplate.yaml @@ -1,17 +1,31 @@ -# Two pods, one container each -# Each container asking for 1 distinct GPU +# Example: Two Pods, One GPU Each +# +# Two pods, one container each. +# Each container asking for 1 distinct GPU. +# +# Expected: Each pod gets a different GPU. Check with: +# kubectl logs -n basic-resourceclaimtemplate pod0 -c ctr0 | grep GPU_DEVICE +# kubectl logs -n basic-resourceclaimtemplate pod1 -c ctr0 | grep GPU_DEVICE +# Each container should have 1 GPU_DEVICE env var with a distinct GPU ID. +# +# Driver requirements: +# Profile: gpu +# GPUs: 2 +# +# Cluster requirements: +# Kubernetes 1.34+ --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test1 + name: basic-resourceclaimtemplate --- apiVersion: resource.k8s.io/v1 kind: ResourceClaimTemplate metadata: - namespace: gpu-test1 + namespace: basic-resourceclaimtemplate name: single-gpu spec: spec: @@ -25,7 +39,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test1 + namespace: basic-resourceclaimtemplate name: pod0 labels: app: pod @@ -46,7 +60,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test1 + namespace: basic-resourceclaimtemplate name: pod1 labels: app: pod diff --git a/demo/gpu-test3.yaml b/demo/basic-shared-claim-across-containers.yaml similarity index 54% rename from demo/gpu-test3.yaml rename to demo/basic-shared-claim-across-containers.yaml index 8082a25f..7ad0315d 100644 --- a/demo/gpu-test3.yaml +++ b/demo/basic-shared-claim-across-containers.yaml @@ -1,17 +1,31 @@ -# One pod, two containers -# Each asking for shared access to a single GPU +# Example: Shared GPU Across Containers +# +# One pod, two containers. +# Each asking for shared access to a single GPU. +# +# Expected: Both containers see the same GPU. Check with: +# kubectl logs -n basic-shared-claim-across-containers pod0 -c ctr0 | grep GPU_DEVICE +# kubectl logs -n basic-shared-claim-across-containers pod0 -c ctr1 | grep GPU_DEVICE +# Both containers should show the same GPU ID. +# +# Driver requirements: +# Profile: gpu +# GPUs: 1 +# +# Cluster requirements: +# Kubernetes 1.34+ --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test3 + name: basic-shared-claim-across-containers --- apiVersion: resource.k8s.io/v1 kind: ResourceClaimTemplate metadata: - namespace: gpu-test3 + namespace: basic-shared-claim-across-containers name: single-gpu spec: spec: @@ -25,7 +39,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test3 + namespace: basic-shared-claim-across-containers name: pod0 spec: containers: diff --git a/demo/gpu-test4.yaml b/demo/basic-shared-claim-across-pods.yaml similarity index 53% rename from demo/gpu-test4.yaml rename to demo/basic-shared-claim-across-pods.yaml index fca06776..96fc57fc 100644 --- a/demo/gpu-test4.yaml +++ b/demo/basic-shared-claim-across-pods.yaml @@ -1,17 +1,34 @@ -# One shared, global claim providing access to a GPU -# Two pods, each asking for access to the shared GPU +# Example: Shared ResourceClaim Across Pods +# +# One shared claim providing access to a GPU. +# Two pods, each asking for access to the shared GPU. +# +# Uses a ResourceClaim (not a ResourceClaimTemplate), which is a standalone +# object that multiple pods can reference by name. +# +# Expected: Both pods see the same GPU. Check with: +# kubectl logs -n basic-shared-claim-across-pods pod0 -c ctr0 | grep GPU_DEVICE +# kubectl logs -n basic-shared-claim-across-pods pod1 -c ctr0 | grep GPU_DEVICE +# Both pods should show the same GPU ID. +# +# Driver requirements: +# Profile: gpu +# GPUs: 1 +# +# Cluster requirements: +# Kubernetes 1.34+ --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test4 + name: basic-shared-claim-across-pods --- apiVersion: resource.k8s.io/v1 kind: ResourceClaim metadata: - namespace: gpu-test4 + namespace: basic-shared-claim-across-pods name: single-gpu spec: devices: @@ -24,7 +41,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test4 + namespace: basic-shared-claim-across-pods name: pod0 labels: app: pod @@ -45,7 +62,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test4 + namespace: basic-shared-claim-across-pods name: pod1 labels: app: pod diff --git a/demo/gpu-test8.yaml b/demo/cel-selector.yaml similarity index 69% rename from demo/gpu-test8.yaml rename to demo/cel-selector.yaml index bb25d450..a865bf1d 100644 --- a/demo/gpu-test8.yaml +++ b/demo/cel-selector.yaml @@ -1,18 +1,31 @@ -# One pod, one container -# Uses CEL expression selectors instead of matching on a DeviceClass -# Selects a GPU with a specific model and at least 4Gi of memory +# Example: CEL Expression Selectors +# +# One pod, one container. +# Uses CEL expression selectors instead of matching on a DeviceClass. +# Selects a GPU with a specific model and at least 4Gi of memory. +# +# Expected: The container gets 1 GPU matching the CEL constraints. Check with: +# kubectl logs -n cel-selector pod0 -c ctr0 | grep GPU_DEVICE +# The container should have 1 GPU_DEVICE env var. +# +# Driver requirements: +# Profile: gpu +# GPUs: 1 +# +# Cluster requirements: +# Kubernetes 1.34+ --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test8 + name: cel-selector --- apiVersion: resource.k8s.io/v1 kind: ResourceClaimTemplate metadata: - namespace: gpu-test8 + namespace: cel-selector name: single-gpu-cel spec: spec: @@ -31,7 +44,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test8 + namespace: cel-selector name: pod0 labels: app: pod diff --git a/demo/gpu-test6.yaml b/demo/initcontainer-shared-gpu.yaml similarity index 54% rename from demo/gpu-test6.yaml rename to demo/initcontainer-shared-gpu.yaml index b97ac0ce..58cdbc16 100644 --- a/demo/gpu-test6.yaml +++ b/demo/initcontainer-shared-gpu.yaml @@ -1,17 +1,31 @@ -# One pod: one initContainer + one container -# Each asking for shared access to a single GPU +# Example: InitContainer Sharing a GPU with a Container +# +# One pod: one initContainer + one container. +# Each asking for shared access to a single GPU. +# +# Expected: Both the initContainer and container see the same GPU. Check with: +# kubectl logs -n initcontainer-shared-gpu pod0 -c init0 | grep GPU_DEVICE +# kubectl logs -n initcontainer-shared-gpu pod0 -c ctr0 | grep GPU_DEVICE +# Both should show the same GPU ID. +# +# Driver requirements: +# Profile: gpu +# GPUs: 1 +# +# Cluster requirements: +# Kubernetes 1.34+ --- apiVersion: v1 kind: Namespace metadata: - name: gpu-test6 + name: initcontainer-shared-gpu --- apiVersion: resource.k8s.io/v1 kind: ResourceClaimTemplate metadata: - namespace: gpu-test6 + namespace: initcontainer-shared-gpu name: single-gpu spec: spec: @@ -25,7 +39,7 @@ spec: apiVersion: v1 kind: Pod metadata: - namespace: gpu-test6 + namespace: initcontainer-shared-gpu name: pod0 spec: initContainers: diff --git a/demo/test-admin-access.sh b/demo/test-admin-access.sh index 84b18b81..cf729b3b 100755 --- a/demo/test-admin-access.sh +++ b/demo/test-admin-access.sh @@ -37,27 +37,27 @@ fi echo "✅ Kubernetes cluster is accessible" # Apply the demo -echo "📦 Applying gpu-test7.yaml demo..." -kubectl apply -f demo/gpu-test7.yaml +echo "📦 Applying admin-access.yaml demo..." +kubectl apply -f demo/admin-access.yaml echo "⏳ Waiting for pod to be ready..." -kubectl wait --for=condition=Ready pod/pod0 -n gpu-test7 --timeout=120s || true +kubectl wait --for=condition=Ready pod/pod0 -n admin-access --timeout=120s || true echo echo "=== Pod Status ===" -kubectl get pods -n gpu-test7 +kubectl get pods -n admin-access echo echo "=== ResourceClaims Status ===" -kubectl get resourceclaims -n gpu-test7 +kubectl get resourceclaims -n admin-access echo echo "=== Pod0 Logs (showing admin access demo) ===" -kubectl logs pod0 -n gpu-test7 || echo "⚠️ Pod0 logs not ready yet" +kubectl logs pod0 -n admin-access || echo "⚠️ Pod0 logs not ready yet" echo echo "=== Checking DRA_ADMIN_ACCESS Environment Variable ===" -DRA_ADMIN_ACCESS_POD0=$(kubectl exec pod0 -n gpu-test7 -- printenv DRA_ADMIN_ACCESS 2>/dev/null || echo "not found") +DRA_ADMIN_ACCESS_POD0=$(kubectl exec pod0 -n admin-access -- printenv DRA_ADMIN_ACCESS 2>/dev/null || echo "not found") if [[ "$DRA_ADMIN_ACCESS_POD0" == "true" ]]; then echo "✅ Pod0: DRA_ADMIN_ACCESS=$DRA_ADMIN_ACCESS_POD0" @@ -67,4 +67,4 @@ fi echo echo "=== Test Complete ===" -echo "To clean up, run: kubectl delete namespace gpu-test7" +echo "To clean up, run: kubectl delete namespace admin-access" diff --git a/deployments/helm/dra-example-driver/values.yaml b/deployments/helm/dra-example-driver/values.yaml index 0e8553e7..9ddededa 100644 --- a/deployments/helm/dra-example-driver/values.yaml +++ b/deployments/helm/dra-example-driver/values.yaml @@ -37,7 +37,7 @@ serviceAccount: kubeletPlugin: # numDevices describes how many GPUs to advertise on each node when the "gpu" # deviceProfile is used. Not relevant for other profiles. - numDevices: 9 + numDevices: 8 priorityClassName: "system-node-critical" updateStrategy: type: RollingUpdate diff --git a/test/e2e/e2e_setup_test.go b/test/e2e/e2e_setup_test.go index bda0b9e2..fdffd8f6 100644 --- a/test/e2e/e2e_setup_test.go +++ b/test/e2e/e2e_setup_test.go @@ -46,14 +46,14 @@ import ( var rootDir, currentDir, demoManifestsDir string var observedGPUs map[string]string var demoFiles = []string{ - "gpu-test1.yaml", - "gpu-test2.yaml", - "gpu-test3.yaml", - "gpu-test7.yaml", // deploying this earlier to ensure the pod can access in-use devices and does not block future allocations of the same devices - "gpu-test4.yaml", - "gpu-test5.yaml", - "gpu-test6.yaml", - "gpu-test8.yaml", + "basic-resourceclaimtemplate.yaml", + "basic-multiple-requests.yaml", + "basic-shared-claim-across-containers.yaml", + "admin-access.yaml", // deploying this earlier to ensure the pod can access in-use devices and does not block future allocations of the same devices + "basic-shared-claim-across-pods.yaml", + "basic-resourceclaim-opaque-config.yaml", + "initcontainer-shared-gpu.yaml", + "cel-selector.yaml", } var clientset *kubernetes.Clientset var dynamicClient dynamic.Interface diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index fdfd7a17..9c5e895e 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -28,8 +28,8 @@ import ( ) var _ = Describe("Test GPU allocation", func() { - Context("GPU Test 1- Two pods, one container each, one GPU per container", func() { - namespace := "gpu-test1" + Context("Two pods, one container each, one GPU per container", func() { + namespace := "basic-resourceclaimtemplate" pods := []string{"pod0", "pod1"} containerName := "ctr0" expectedGPUCount := 1 @@ -43,8 +43,8 @@ var _ = Describe("Test GPU allocation", func() { } }) }) - Context("GPU Test 2- One pod, one container with two GPUs", func() { - namespace := "gpu-test2" + Context("One pod, one container with two GPUs", func() { + namespace := "basic-multiple-requests" pods := []string{"pod0"} containerName := "ctr0" expectedGPUCount := 2 @@ -56,8 +56,8 @@ var _ = Describe("Test GPU allocation", func() { verifyGPUAllocation(namespace, pods[0], containerName, expectedGPUCount) }) }) - Context("GPU Test 3- One pod, two containers and one GPU having TimeSlicing sharing strategy and Default TimeSlice interval", func() { - namespace := "gpu-test3" + Context("One pod, two containers sharing one GPU with TimeSlicing and Default interval", func() { + namespace := "basic-shared-claim-across-containers" pods := []string{"pod0"} containerNames := []string{"ctr0", "ctr1"} expectedGPUCount := 1 @@ -90,8 +90,8 @@ var _ = Describe("Test GPU allocation", func() { } }) }) - Context("GPU Test 4- Two pods, one container each, one GPU having TimeSlicing sharing strategy and Default TimeSlice interval", func() { - namespace := "gpu-test4" + Context("Two pods sharing a global ResourceClaim with TimeSlicing and Default interval", func() { + namespace := "basic-shared-claim-across-pods" pods := []string{"pod0", "pod1"} containers := []string{"ctr0"} expectedGPUCount := 1 @@ -124,8 +124,8 @@ var _ = Describe("Test GPU allocation", func() { } }) }) - Context("GPU Test 5- One pod, four containers, two shared GPUs having TimeSlicing & SpacePartitioning sharing strategy and Long TimeSlice interval", func() { - namespace := "gpu-test5" + Context("GPU sharing strategies: TimeSlicing with Long interval and SpacePartitioning", func() { + namespace := "basic-resourceclaim-opaque-config" pods := []string{"pod0"} tsContainers := []string{"ts-ctr0", "ts-ctr1"} spContainers := []string{"sp-ctr0", "sp-ctr1"} @@ -183,8 +183,8 @@ var _ = Describe("Test GPU allocation", func() { } }) }) - Context("GPU Test 6- One pod, one init container, one container, one GPU having TimeSlicing sharing strategy and Default TimeSlice interval", func() { - namespace := "gpu-test6" + Context("InitContainer and container sharing one GPU with TimeSlicing and Default interval", func() { + namespace := "initcontainer-shared-gpu" pods := []string{"pod0"} containerNames := []string{"init0", "ctr0"} expectedGPUCount := 1 @@ -217,8 +217,8 @@ var _ = Describe("Test GPU allocation", func() { } }) }) - Context("GPU Test 7- Test DRAAdminAccess set to true", func() { - namespace := "gpu-test7" + Context("DRA AdminAccess set to true", func() { + namespace := "admin-access" pods := []string{"pod0"} containerName := "ctr0" @@ -229,8 +229,8 @@ var _ = Describe("Test GPU allocation", func() { verifyDRAAdminAccess(namespace, pods[0], containerName, "true") }) }) - Context("GPU Test 8- One pod, one container with single gpu selected using cel expression", func() { - namespace := "gpu-test8" + Context("CEL expression selector for single GPU", func() { + namespace := "cel-selector" pods := []string{"pod0"} containerNames := []string{"ctr0"} expectedGPUCount := 1