@@ -326,3 +326,243 @@ docker-build:
326326# Run hyperagent in Docker (requires /dev/kvm or /dev/mshv)
327327docker-run * ARGS :
328328 ./ scripts/ hyperagent-docker {{ ARGS}}
329+
330+ # ── Kubernetes Deployment ─────────────────────────────────────────────
331+
332+ # Internal: check common K8s prerequisites
333+ _ k8s-check-common :
334+ #!/usr/bin/env bash
335+ source deploy/ k8s/ common.sh
336+ require_cmd docker " https://docs.docker.com/get-docker/" || exit 1
337+ require_cmd kubectl " https://kubernetes.io/docs/tasks/tools/" || exit 1
338+
339+ # Internal: check Azure prerequisites
340+ _ k8s-check-azure :
341+ #!/usr/bin/env bash
342+ source deploy/ k8s/ common.sh
343+ require_cmd az " https://docs.microsoft.com/en-us/cli/azure/install-azure-cli" || exit 1
344+ require_cmd kubectl " https://kubernetes.io/docs/tasks/tools/" || exit 1
345+ require_cmd envsubst " apt install gettext-base" || exit 1
346+ if ! az account show &>/ dev/ null; then
347+ log_error " Not logged in to Azure CLI. Run 'az login' first."
348+ exit 1
349+ fi
350+
351+ # Internal: check local (KIND) prerequisites
352+ _ k8s-check-local :
353+ #!/usr/bin/env bash
354+ source deploy/ k8s/ common.sh
355+ require_cmd docker " https://docs.docker.com/get-docker/" || exit 1
356+ require_cmd kind " go install sigs.k8s.io/kind@latest" || exit 1
357+ require_cmd kubectl " https://kubernetes.io/docs/tasks/tools/" || exit 1
358+ if [ ! -e / dev/ kvm ]; then
359+ log_error " /dev/kvm not found — Hyperlight requires hardware virtualisation"
360+ exit 1
361+ fi
362+
363+ # ── Local (KIND) ──────────────────────────────────────────────────────
364+
365+ # Create local KIND cluster with /dev/kvm and local registry
366+ k8s-local-up : _k8s-check-local
367+ ./ deploy/ k8s/ local/ setup.sh
368+
369+ # Tear down local KIND cluster and registry
370+ k8s-local-down : _k8s-check-local
371+ ./ deploy/ k8s/ local/ teardown.sh
372+
373+ # Build and load image into local KIND cluster
374+ k8s-local-build version = " 0.0.0-dev": _k8s-check-common
375+ #!/usr/bin/env bash
376+ # Resolve symlinks for Docker COPY
377+ if [ -L deps/ js-host-api ]; then
378+ target=$(readlink -f deps/ js-host-api)
379+ rm deps/ js-host-api
380+ cp -r " $target" deps/ js-host-api
381+ trap ' rm -rf deps/js-host-api && ln -sfn "'" $target"' " deps/js-host-api' EXIT
382+ fi
383+ docker build -t hyperagent --build-arg VERSION=" {{ version}} " .
384+ docker build -f deploy/ k8s/ Dockerfile -t hyperagent-k8s .
385+ # Push to local registry
386+ docker tag hyperagent-k8s localhost:5000 / hyperagent:latest
387+ docker push localhost:5000 / hyperagent:latest
388+
389+ # Deploy device plugin to local KIND cluster
390+ k8s-local-deploy-plugin : _k8s-check-common
391+ #!/usr/bin/env bash
392+ source deploy/ k8s/ common.sh
393+ export IMAGE=" ghcr.io/hyperlight-dev/hyperlight-device-plugin:latest" DEVICE_COUNT=" 2000" DEVICE_UID=" 65534" DEVICE_GID=" 65534"
394+ envsubst < deploy/ k8s/ manifests/ device-plugin.yaml | kubectl apply -f -
395+ kubectl apply -f deploy/ k8s/ manifests/ namespace.yaml
396+ echo " Waiting for device plugin pods..."
397+ kubectl rollout status daemonset/ hyperlight-device-plugin -n hyperlight-system --timeout=120 s
398+
399+ # Run a prompt on local KIND cluster
400+ k8s-local-run + ARGS :
401+ HYPERAGENT_K8S_IMAGE=localhost:5000 / hyperagent:latest ./ scripts/ hyperagent-k8s {{ ARGS}}
402+
403+ # ── Azure (AKS) ──────────────────────────────────────────────────────
404+
405+ # Create AKS cluster + ACR + KVM node pool
406+ k8s-infra-up : _k8s-check-azure
407+ ./ deploy/ k8s/ azure/ setup.sh
408+
409+ # Tear down all Azure resources
410+ k8s-infra-down : _k8s-check-azure
411+ ./ deploy/ k8s/ azure/ teardown.sh
412+
413+ # Stop AKS cluster (save costs when not in use)
414+ k8s-stop :
415+ #!/usr/bin/env bash
416+ source deploy/ k8s/ azure/ config.env
417+ az aks stop -g " ${RESOURCE_GROUP}" -n " ${CLUSTER_NAME}"
418+
419+ # Start AKS cluster
420+ k8s-start :
421+ #!/usr/bin/env bash
422+ source deploy/ k8s/ azure/ config.env
423+ az aks start -g " ${RESOURCE_GROUP}" -n " ${CLUSTER_NAME}"
424+
425+ # Get AKS credentials for kubectl
426+ k8s-credentials :
427+ #!/usr/bin/env bash
428+ source deploy/ k8s/ azure/ config.env
429+ az aks get-credentials -g " ${RESOURCE_GROUP}" -n " ${CLUSTER_NAME}" --overwrite-existing
430+
431+ # Deploy hyperlight device plugin to cluster
432+ k8s-deploy-plugin : _k8s-check-common
433+ #!/usr/bin/env bash
434+ source deploy/ k8s/ azure/ config.env
435+ export IMAGE=" ${DEVICE_PLUGIN_IMAGE}" DEVICE_COUNT=" ${DEVICE_COUNT}" DEVICE_UID=" ${DEVICE_UID}" DEVICE_GID=" ${DEVICE_GID}"
436+ envsubst < deploy/ k8s/ manifests/ device-plugin.yaml | kubectl apply -f -
437+ kubectl apply -f deploy/ k8s/ manifests/ namespace.yaml
438+ echo " Waiting for device plugin pods..."
439+ kubectl rollout status daemonset/ hyperlight-device-plugin -n hyperlight-system --timeout=120 s
440+
441+ # Build HyperAgent K8s image (builds base image first)
442+ k8s-build version = " 0.0.0-dev": _k8s-check-common
443+ #!/usr/bin/env bash
444+ # Resolve symlinks for Docker COPY
445+ if [ -L deps/ js-host-api ]; then
446+ target=$(readlink -f deps/ js-host-api)
447+ rm deps/ js-host-api
448+ cp -r " $target" deps/ js-host-api
449+ trap ' rm -rf deps/js-host-api && ln -sfn "'" $target"' " deps/js-host-api' EXIT
450+ fi
451+ docker build -t hyperagent --build-arg VERSION=" {{ version}} " .
452+ docker build -f deploy/ k8s/ Dockerfile -t hyperagent-k8s .
453+
454+ # Push HyperAgent K8s image to ACR
455+ k8s-push : _k8s-check-azure
456+ #!/usr/bin/env bash
457+ source deploy/ k8s/ azure/ config.env
458+ az acr login --name " ${ACR_NAME}"
459+ docker tag hyperagent-k8s " ${ACR_NAME}.azurecr.io/${HYPERAGENT_IMAGE_NAME}:${HYPERAGENT_IMAGE_TAG}"
460+ docker push " ${ACR_NAME}.azurecr.io/${HYPERAGENT_IMAGE_NAME}:${HYPERAGENT_IMAGE_TAG}"
461+
462+ # Set up GitHub authentication (K8s Secret — simple but less secure)
463+ k8s-setup-auth :
464+ ./ deploy/ k8s/ setup-auth.sh
465+
466+ # Set up GitHub authentication via Azure Key Vault
467+ k8s-setup-auth-keyvault :
468+ ./ deploy/ k8s/ setup-auth-keyvault.sh
469+
470+ # Run a prompt as a K8s Job
471+ k8s-run + ARGS :
472+ ./ scripts/ hyperagent-k8s {{ ARGS}}
473+
474+ # Show cluster, device plugin, and job status
475+ k8s-status :
476+ #!/usr/bin/env bash
477+ source deploy/ k8s/ common.sh
478+ echo " "
479+ log_step " Cluster nodes:"
480+ kubectl get nodes -o custom-columns=' NAME:.metadata.name,HYPERVISOR:.metadata.labels.hyperlight\.dev/hypervisor,CAPACITY:.status.allocatable.hyperlight\.dev/hypervisor' 2 >/ dev/ null || echo " (not connected)"
481+ echo " "
482+ log_step " Device plugin:"
483+ kubectl get pods -n hyperlight-system -l app.kubernetes.io/ name=hyperlight-device-plugin 2 >/ dev/ null || echo " (not deployed)"
484+ echo " "
485+ log_step " HyperAgent jobs:"
486+ kubectl get jobs -n hyperagent -l hyperagent.dev/ type=prompt-job 2 >/ dev/ null || echo " (none)"
487+ echo " "
488+
489+ # Smoke test: verify cluster, device plugin, auth, and image are all working
490+ k8s-smoke-test :
491+ #!/usr/bin/env bash
492+ source deploy/ k8s/ common.sh
493+ PASS=0
494+ FAIL=0
495+ echo " "
496+ log_step " Running K8s smoke tests..."
497+ echo " "
498+
499+ # 1. kubectl connected?
500+ if kubectl cluster-info &>/ dev/ null; then
501+ log_success " ✅ kubectl connected to cluster"
502+ PASS=$((PASS + 1 ))
503+ else
504+ log_error " ❌ kubectl not connected — run 'just k8s-credentials' or 'just k8s-local-up'"
505+ FAIL=$((FAIL + 1 ))
506+ fi
507+
508+ # 2. KVM nodes available?
509+ KVM_NODES=$(kubectl get nodes -l hyperlight.dev/ hypervisor=kvm -o name 2 >/ dev/ null | wc -l)
510+ if [ " $KVM_NODES" -gt 0 ]; then
511+ log_success " ✅ ${KVM_NODES} KVM node(s) available"
512+ PASS=$((PASS + 1 ))
513+ else
514+ log_error " ❌ No KVM nodes found — check node pool labels"
515+ FAIL=$((FAIL + 1 ))
516+ fi
517+
518+ # 3. Device plugin running?
519+ PLUGIN_READY=$(kubectl get pods -n hyperlight-system -l app.kubernetes.io/ name=hyperlight-device-plugin -o jsonpath=' {.items[*].status.phase}' 2 >/ dev/ null)
520+ if echo " $PLUGIN_READY" | grep -q " Running" ; then
521+ log_success " ✅ Device plugin running"
522+ PASS=$((PASS + 1 ))
523+ else
524+ log_error " ❌ Device plugin not running — run 'just k8s-deploy-plugin' or 'just k8s-local-deploy-plugin'"
525+ FAIL=$((FAIL + 1 ))
526+ fi
527+
528+ # 4. Hypervisor resource allocatable?
529+ CAPACITY=$(kubectl get nodes -o jsonpath=' {.items[*].status.allocatable.hyperlight\.dev/hypervisor}' 2 >/ dev/ null | tr ' ' ' \n' | grep -v ' ^$' | head -1)
530+ if [ -n " $CAPACITY" ] && [ " $CAPACITY" != " 0" ]; then
531+ log_success " ✅ hyperlight.dev/hypervisor resource available (capacity: ${CAPACITY})"
532+ PASS=$((PASS + 1 ))
533+ else
534+ log_error " ❌ No hyperlight.dev/hypervisor resource — device plugin may not be working"
535+ FAIL=$((FAIL + 1 ))
536+ fi
537+
538+ # 5. Namespace exists?
539+ if kubectl get namespace hyperagent &>/ dev/ null; then
540+ log_success " ✅ hyperagent namespace exists"
541+ PASS=$((PASS + 1 ))
542+ else
543+ log_error " ❌ hyperagent namespace missing — run 'just k8s-deploy-plugin' (creates namespace)"
544+ FAIL=$((FAIL + 1 ))
545+ fi
546+
547+ # 6. Auth secret exists?
548+ if kubectl get secret hyperagent-auth -n hyperagent &>/ dev/ null; then
549+ log_success " ✅ hyperagent-auth secret exists"
550+ PASS=$((PASS + 1 ))
551+ else
552+ log_error " ❌ hyperagent-auth secret missing — run 'just k8s-setup-auth'"
553+ FAIL=$((FAIL + 1 ))
554+ fi
555+
556+ # Summary
557+ echo " "
558+ echo " ════════════════════════════════════════"
559+ if [ " $FAIL" -eq 0 ]; then
560+ log_success " All ${PASS} checks passed — ready to run prompts! 🚀"
561+ else
562+ log_error " ${FAIL} check(s) failed, ${PASS} passed"
563+ echo " "
564+ log_info " Fix the issues above, then re-run: just k8s-smoke-test"
565+ fi
566+ echo " ════════════════════════════════════════"
567+ echo " "
568+ [ " $FAIL" -eq 0 ]
0 commit comments