From 518fba3c4fa73a545932e944fff8c2ed6dde7f2b Mon Sep 17 00:00:00 2001 From: Mallory Hill Date: Tue, 9 Jun 2026 15:28:06 -0400 Subject: [PATCH] HYPERFLEET-1058 - feat: Update e2e repo and remove dead code Removes deploy scripts, Kind setup tooling, and adapter testdata that have been migrated to the infra repository. Adds environment files and cleanup scripts to support the new infra-based workflow. - Remove deploy-scripts/ directory (deploy-clm.sh, Kind scripts, adapter/API/common/helm/sentinel libraries) - Remove testdata/adapter-configs/ for all adapters (cl-deployment, cl-job, cl-maestro, cl-namespace, np-configmap) - Remove docs/local-kind-setup.md (replaced by infra repo setup) - Add env/env.ci and env/env.local for infra repo integration - Add scripts/cleanup-k8s-resources.sh and scripts/cleanup-pubsub-resources.sh - Add docs/setup.md with new setup instructions - Update docs/runbook.md, docs/getting-started.md, docs/development.md to reference infra repo - Update README.md, CONTRIBUTING.md, Dockerfile, and Makefile to remove old deployment targets --- AGENTS.md | 2 +- CONTRIBUTING.md | 20 +- Dockerfile | 3 - Makefile | 14 - README.md | 36 +- deploy-scripts/.env.example | 142 ----- deploy-scripts/README.md | 342 ---------- deploy-scripts/deploy-clm.sh | 602 ------------------ deploy-scripts/kind-build-images.sh | 92 --- deploy-scripts/kind-local.sh | 222 ------- deploy-scripts/lib/adapter.sh | 384 ----------- deploy-scripts/lib/api.sh | 146 ----- deploy-scripts/lib/common.sh | 305 --------- deploy-scripts/lib/gcp.sh | 320 ---------- deploy-scripts/lib/helm.sh | 69 -- deploy-scripts/lib/sentinel.sh | 177 ----- docs/development.md | 88 +-- docs/getting-started.md | 33 +- docs/local-kind-setup.md | 106 --- docs/runbook.md | 377 ++++------- docs/setup.md | 150 +++++ env/env.local | 60 ++ .../cl-deployment/adapter-config.yaml | 23 - .../cl-deployment/adapter-task-config.yaml | 177 ----- .../adapter-task-resource-deployment.yaml | 29 - .../adapter-configs/cl-deployment/values.yaml | 32 - .../cl-job/adapter-config.yaml | 23 - .../cl-job/adapter-task-config.yaml | 186 ------ .../cl-job/adapter-task-resource-job.yaml | 29 - testdata/adapter-configs/cl-job/values.yaml | 32 - .../cl-maestro/adapter-config.yaml | 63 -- .../cl-maestro/adapter-task-config.yaml | 247 ------- .../adapter-task-resource-manifestwork.yaml | 144 ----- .../adapter-configs/cl-maestro/values.yaml | 33 - .../cl-namespace/adapter-config.yaml | 23 - .../cl-namespace/adapter-task-config.yaml | 189 ------ .../adapter-configs/cl-namespace/values.yaml | 38 -- .../np-configmap/adapter-config.yaml | 23 - .../np-configmap/adapter-task-config.yaml | 178 ------ .../adapter-task-resource-configmap.yaml | 19 - .../adapter-configs/np-configmap/values.yaml | 32 - 41 files changed, 456 insertions(+), 4754 deletions(-) delete mode 100644 deploy-scripts/.env.example delete mode 100644 deploy-scripts/README.md delete mode 100755 deploy-scripts/deploy-clm.sh delete mode 100755 deploy-scripts/kind-build-images.sh delete mode 100755 deploy-scripts/kind-local.sh delete mode 100755 deploy-scripts/lib/adapter.sh delete mode 100755 deploy-scripts/lib/api.sh delete mode 100755 deploy-scripts/lib/common.sh delete mode 100644 deploy-scripts/lib/gcp.sh delete mode 100755 deploy-scripts/lib/helm.sh delete mode 100755 deploy-scripts/lib/sentinel.sh delete mode 100644 docs/local-kind-setup.md create mode 100644 docs/setup.md create mode 100644 env/env.local delete mode 100644 testdata/adapter-configs/cl-deployment/adapter-config.yaml delete mode 100644 testdata/adapter-configs/cl-deployment/adapter-task-config.yaml delete mode 100644 testdata/adapter-configs/cl-deployment/adapter-task-resource-deployment.yaml delete mode 100644 testdata/adapter-configs/cl-deployment/values.yaml delete mode 100644 testdata/adapter-configs/cl-job/adapter-config.yaml delete mode 100644 testdata/adapter-configs/cl-job/adapter-task-config.yaml delete mode 100644 testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml delete mode 100644 testdata/adapter-configs/cl-job/values.yaml delete mode 100644 testdata/adapter-configs/cl-maestro/adapter-config.yaml delete mode 100644 testdata/adapter-configs/cl-maestro/adapter-task-config.yaml delete mode 100644 testdata/adapter-configs/cl-maestro/adapter-task-resource-manifestwork.yaml delete mode 100644 testdata/adapter-configs/cl-maestro/values.yaml delete mode 100644 testdata/adapter-configs/cl-namespace/adapter-config.yaml delete mode 100644 testdata/adapter-configs/cl-namespace/adapter-task-config.yaml delete mode 100644 testdata/adapter-configs/cl-namespace/values.yaml delete mode 100644 testdata/adapter-configs/np-configmap/adapter-config.yaml delete mode 100644 testdata/adapter-configs/np-configmap/adapter-task-config.yaml delete mode 100644 testdata/adapter-configs/np-configmap/adapter-task-resource-configmap.yaml delete mode 100644 testdata/adapter-configs/np-configmap/values.yaml diff --git a/AGENTS.md b/AGENTS.md index c43b0a3..61f0bfd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,7 +28,7 @@ Pre-flight order: `make check` then `make build`. | Test placement strategy | [architecture repo](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/docs/e2e-testing/test-placement-strategy.md) — which layer a test belongs in (unit / integration / E2E) | | Test writing guide | `docs/development.md` | | Debugging | `docs/debugging.md` | -| Local kind setup | `docs/local-kind-setup.md` | +| Setup Guide | `docs/setup.md` | | Runbook | `docs/runbook.md` | | Contributing | `CONTRIBUTING.md` | | Test case templates | `test-design/templates/` | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 630c333..b8dbe7f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -54,16 +54,28 @@ hyperfleet-e2e/ │ ├── labels/ - Test label definitions │ └── logger/ - Structured logging (slog) ├── e2e/ - Test suites -│ ├── adapter/ - Adapter failover and Maestro tests +│ ├── adapter/ - Adapter lifecycle tests +│ ├── channel/ - Channel management tests │ ├── cluster/ - Cluster lifecycle tests -│ └── nodepool/ - NodePool management tests +│ ├── nodepool/ - NodePool management tests +│ └── version/ - Version management tests ├── testdata/ - Test payloads and fixtures +│ ├── adapter-configs/ - Adapter configuration files │ └── payloads/ │ ├── clusters/ - Cluster creation payloads -│ └── nodepools/- NodePool creation payloads +│ └── nodepools/ - NodePool creation payloads +├── test-design/ - Test design documentation +│ ├── templates/ - Test case templates +│ ├── testcases/ - Test case documents +│ └── user-journeys/ - User journey maps ├── configs/ - Configuration files │ └── config.yaml - Default configuration -└── docs/ - Documentation +├── docs/ - Documentation +├── env/ - Environment configuration files +├── hack/ - Build and development scripts +├── images/ - Container image definitions +├── openapi/ - OpenAPI spec and generation config +└── scripts/ - Utility scripts ``` ## Testing diff --git a/Dockerfile b/Dockerfile index 9aa27a7..be4ac01 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,9 +62,6 @@ COPY --from=builder /build/bin/hyperfleet-e2e /usr/local/bin/ # Copy test payloads and fixtures COPY --from=builder /build/testdata /e2e/testdata -# Copy deploy scripts -COPY --from=builder /build/deploy-scripts /e2e/deploy-scripts - # Copy env files COPY --from=builder /build/env /e2e/env diff --git a/Makefile b/Makefile index e109b26..5993620 100644 --- a/Makefile +++ b/Makefile @@ -137,20 +137,6 @@ verify: generate fmt-check vet ## Run all verification checks .PHONY: check check: verify lint test ## Run all checks (fmt, vet, lint, test) -##@ Local kind Development (see docs/local-kind-setup.md) - -.PHONY: local-up -local-up: ## Full local setup: kind cluster + deploy + port-forward - ./deploy-scripts/kind-local.sh up - -.PHONY: local-down -local-down: ## Remove all components from local kind cluster - ./deploy-scripts/kind-local.sh down - -.PHONY: local-rebuild -local-rebuild: ## Rebuild + restart. Usage: make local-rebuild C=hyperfleet-adapter - ./deploy-scripts/kind-local.sh rebuild $(if $(NO_CACHE),--no-cache) $(C) - ##@ Container Images .PHONY: image diff --git a/README.md b/README.md index 0215df0..bc77210 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,8 @@ Black-box end-to-end testing for validating the HyperFleet cluster lifecycle man HyperFleet E2E is a Ginkgo-based testing framework that validates HyperFleet cluster lifecycle management through black-box tests. It creates ephemeral test clusters for each test, providing complete isolation and supporting parallel execution. ## Quick Start - -```bash -# Clone and build -git clone https://github.com/openshift-hyperfleet/hyperfleet-e2e.git -cd hyperfleet-e2e -make build - -# Set API URL and run tests -export HYPERFLEET_API_URL=https://api.hyperfleet.example.com -./bin/hyperfleet-e2e test --label-filter=tier0 -``` - -**Done!** The framework created a cluster, validated adapters, and cleaned up resources. +- **[Setup Guide](docs/setup.md)** - Setup environment to run e2e tests +- **[Getting Started](docs/getting-started.md)** - Getting started guide ## Running Tests @@ -79,15 +68,28 @@ hyperfleet-e2e/ │ ├── labels/ - Test label definitions │ └── logger/ - Structured logging (slog) ├── e2e/ - Test suites +│ ├── adapter/ - Adapter lifecycle tests +│ ├── channel/ - Channel management tests │ ├── cluster/ - Cluster lifecycle tests -│ └── nodepool/ - NodePool management tests +│ ├── nodepool/ - NodePool management tests +│ └── version/ - Version management tests ├── testdata/ - Test payloads and fixtures +│ ├── adapter-configs/ - Adapter configuration files │ └── payloads/ │ ├── clusters/ - Cluster creation payloads │ └── nodepools/ - NodePool creation payloads +├── test-design/ - Test design documentation +│ ├── templates/ - Test case templates +│ ├── testcases/ - Test case documents +│ └── user-journeys/ - User journey maps ├── configs/ - Configuration files │ └── config.yaml - Default configuration -└── docs/ - Documentation +├── docs/ - Documentation +├── env/ - Environment configuration files +├── hack/ - Build and development scripts +├── images/ - Container image definitions +├── openapi/ - OpenAPI spec and generation config +└── scripts/ - Utility scripts ``` ## Key Features @@ -102,8 +104,8 @@ hyperfleet-e2e/ ## Documentation - **[System Architecture](https://github.com/openshift-hyperfleet/architecture)** - Single source of truth for all HyperFleet architectural documentation -- **[Local kind Setup](docs/local-kind-setup.md)** - Run E2E tests locally with kind and RabbitMQ -- **[Runbook](docs/runbook.md)** - E2E test runbook for GKE clusters +- **[Setup Guide](docs/setup.md)** - Setup environment to run e2e tests +- **[Runbook](docs/runbook.md)** - E2E test runbook - **[Getting Started](docs/getting-started.md)** - Run your first test in 10 minutes - **[Framework Architecture](docs/architecture.md)** - Understand the framework design - **[Development](docs/development.md)** - Write new tests diff --git a/deploy-scripts/.env.example b/deploy-scripts/.env.example deleted file mode 100644 index 3b0b1e1..0000000 --- a/deploy-scripts/.env.example +++ /dev/null @@ -1,142 +0,0 @@ -# ============================================================================ -# CLM Components Deployment Configuration -# ============================================================================ -# This file contains environment variables used by deploy-clm.sh -# Copy this file to .env and modify values as needed for your environment: -# cp .env.example .env - -# Kubernetes Configuration -# NAMESPACE must be unique to prevent GCP Pub/Sub topic/subscription collisions. -# GCP resources are named using the namespace (e.g., ${NAMESPACE}-clusters), -# so multiple users sharing the same namespace will interfere with each other's tests. -export NAMESPACE="${NAMESPACE:-hyperfleet-e2e-$(echo ${USER:-default} | tr '[:upper:]' '[:lower:]')}" - -# Provider Configuration -export GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" - -# Image Registry Configuration -export IMAGE_REGISTRY="${IMAGE_REGISTRY:-registry.ci.openshift.org}" - -# API Component Configuration -export API_IMAGE_REPO="${API_IMAGE_REPO:-ci/hyperfleet-api}" -export API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" -export API_SERVICE_TYPE="${API_SERVICE_TYPE:-LoadBalancer}" - -# ============================================================================ -# Adapter Deployment Configuration -# ============================================================================ -# REQUIRED: Specify which adapters to deploy (comma-separated list) -# These environment variables are REQUIRED - auto-discovery has been removed - -# Cluster-level adapters to deploy -# Available adapters: cl-namespace, cl-job, cl-deployment, cl-maestro -export CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-cl-namespace,cl-job,cl-deployment,cl-maestro}" - -# NodePool-level adapters to deploy -# Available adapters: np-configmap -export NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-np-configmap}" -# Base directory containing adapter test data folders -# Each adapter must have its own folder: ${ADAPTERS_FILE_DIR}// -# Note: Uses TESTDATA_DIR environment variable if not explicitly set -# ADAPTERS_FILE_DIR="${TESTDATA_DIR}/adapter-configs" - -# ============================================================================ -# API Adapter Configuration -# ============================================================================ -# NOTE: These are SEPARATE from tier0 deployment configuration above -# These should be set based on specific test case requirements -# Set per test case as needed - -# Adapters for API cluster configuration -export API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-cl-namespace,cl-job,cl-deployment,cl-maestro}" - -# Adapters for API nodepool configuration -export API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-np-configmap}" - -# Sentinel Component Configuration -export SENTINEL_IMAGE_REPO="${SENTINEL_IMAGE_REPO:-ci/hyperfleet-sentinel}" -export SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" -export SENTINEL_BROKER_TYPE="${SENTINEL_BROKER_TYPE:-googlepubsub}" -export SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" - -# Adapter Component Configuration -export ADAPTER_IMAGE_REPO="${ADAPTER_IMAGE_REPO:-ci/hyperfleet-adapter}" -export ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" -export ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" -export ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING:-true}" - -# Adapter Pub/Sub Configuration (Optional) -# If not set, these will be auto-generated based on namespace and resource type: -# ADAPTER_SUBSCRIPTION_ID: ${NAMESPACE}-${resource_type}-${adapter_name} -# ADAPTER_TOPIC: ${NAMESPACE}-${resource_type} -# ADAPTER_DEAD_LETTER_TOPIC: ${NAMESPACE}-${resource_type}-dlq - - -# HyperFleet API Configuration -# Note: If namespace is different, use: http://hyperfleet-api.${NAMESPACE}.svc.cluster.local:8000 -export API_BASE_URL="${API_BASE_URL:-http://hyperfleet-api:8000}" - - -# Helm Chart Sources -# Note: ADAPTER_CHART_* and API_CHART_* vars are also required by tier2 E2E tests -# (e.g., crash recovery) which deploy dedicated adapters and upgrade API config at runtime. -# When running tier2 tests in CI, ensure these are exported alongside GINKGO_LABEL_FILTER=tier2. -export API_CHART_REPO="${API_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-api.git}" -export API_CHART_REF="${API_CHART_REF:-main}" -export API_CHART_PATH="${API_CHART_PATH:-charts}" - -export SENTINEL_CHART_REPO="${SENTINEL_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-sentinel.git}" -export SENTINEL_CHART_REF="${SENTINEL_CHART_REF:-main}" -export SENTINEL_CHART_PATH="${SENTINEL_CHART_PATH:-charts}" - -export ADAPTER_CHART_REPO="${ADAPTER_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-adapter.git}" -export ADAPTER_CHART_REF="${ADAPTER_CHART_REF:-main}" -export ADAPTER_CHART_PATH="${ADAPTER_CHART_PATH:-charts}" - -# Component Selection Flags (true/false) -export INSTALL_API="${INSTALL_API:-true}" -export INSTALL_SENTINEL="${INSTALL_SENTINEL:-true}" -export INSTALL_ADAPTER="${INSTALL_ADAPTER:-true}" - -# Uninstall Options (only used when --action uninstall) -# DELETE_K8S_RESOURCES: Delete Kubernetes resources (Helm releases + namespace) -export DELETE_K8S_RESOURCES="${DELETE_K8S_RESOURCES:-false}" - -# DELETE_CLOUD_RESOURCES: Delete GCP Pub/Sub topics and subscriptions -export DELETE_CLOUD_RESOURCES="${DELETE_CLOUD_RESOURCES:-false}" - -# DELETE_ALL: Delete everything (k8s + cloud resources) - overrides individual flags -export DELETE_ALL="${DELETE_ALL:-false}" - -# Execution Options -export DRY_RUN="${DRY_RUN:-false}" -export VERBOSE="${VERBOSE:-false}" - -# ============================================================================ -# Local kind Development (used by kind-local.sh, see docs/local-kind-setup.md) -# ============================================================================ -# Uncomment and modify for local kind setup. Not used by deploy-clm.sh or CI. - -# Parent directory containing component repos (hyperfleet-api, hyperfleet-sentinel, etc.) -# export PROJECTS_DIR="${HOME}/projects" - -# Path to hyperfleet-infra repo (for RabbitMQ + Maestro install) -# export INFRA_DIR="${HOME}/projects/hyperfleet-infra" - -# Kind cluster name (kubectl context becomes kind-) -# export KIND_CLUSTER="kind" - -# Kubernetes namespace for local deployment (overrides NAMESPACE above) -# export NAMESPACE="hyperfleet-local" - -# Maestro configuration -# export MAESTRO_NS="maestro" -# export MAESTRO_CONSUMER="cluster1" -# export MAESTRO_LOCAL_PORT="8100" - -# RabbitMQ connection URL (in-cluster service name) -# export RABBITMQ_URL="amqp://guest:guest@rabbitmq:5672" - -# E2E test URLs (set these so 'make e2e' works without extra env vars) -# export HYPERFLEET_API_URL="http://localhost:8000" -# export MAESTRO_URL="http://localhost:8100" diff --git a/deploy-scripts/README.md b/deploy-scripts/README.md deleted file mode 100644 index 7cadd4e..0000000 --- a/deploy-scripts/README.md +++ /dev/null @@ -1,342 +0,0 @@ -# CLM Deployment Scripts - -Automated deployment scripts for HyperFleet CLM (Cluster Lifecycle Management) components. - -## Overview - -The `deploy-clm.sh` script automates the installation and uninstallation of HyperFleet CLM components (API, Sentinel, and Adapters) using Helm for E2E testing environments. It provides a consistent and repeatable deployment process with comprehensive validation and error handling. - -## Features - -- **Install/Uninstall Operations**: Deploy or remove all CLM components with a single command -- **Configurable Components**: Install all components or selectively skip specific ones -- **Image Customization**: Configure custom image repositories and tags for each component -- **Helm Chart Management**: Automatically clone and use Helm charts from component repositories -- **Pod Health Verification**: Automatic verification of pod health after deployment with failure detection (CrashLoopBackOff, ImagePullBackOff, etc.) -- **Namespace Lifecycle**: Automatic namespace creation and deletion -- **Infrastructure Validation**: Pre-deployment checks for cluster readiness -- **Dry-Run Support**: Test deployment without making changes -- **Verbose Logging**: Detailed logging for troubleshooting -- **Error Handling**: Comprehensive validation and graceful error handling with automatic log retrieval on failures - -## Prerequisites - -The script requires the following tools to be installed: - -- `kubectl` - Kubernetes command-line tool -- `helm` - Helm package manager (v3+) -- `git` - Git version control - -Ensure you have: -- Valid kubeconfig with access to target cluster -- Appropriate RBAC permissions for namespace and resource management -- Network access to component Git repositories and image registries - -## Quick Start - -### Option 1: Using Command-Line Flags (Simple) - -Install all components with default settings: - -```bash -./deploy-scripts/deploy-clm.sh --action install --namespace -``` - -Install with custom image tags: - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --api-image-tag v1.2.0 \ - --sentinel-image-tag v1.2.0 \ - --adapter-image-tag v1.2.0 -``` - -Uninstall all components: - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall --namespace -``` - -### Option 2: Using .env File (Recommended for Complex Configurations) - -For easier management of deployment parameters, use a `.env` file: - -1. **Copy the example configuration:** - ```bash - cd deploy-scripts/ - cp .env.example .env - ``` - -2. **Edit `.env` with your settings:** - ```bash - vim .env # or your preferred editor - ``` - - Key parameters you can configure: - - `NAMESPACE` - Kubernetes namespace (default: `hyperfleet-e2e-$USER`) - - `IMAGE_REGISTRY` - Container image registry - - `API_IMAGE_TAG`, `SENTINEL_IMAGE_TAG`, `ADAPTER_IMAGE_TAG` - Image tags - - `GCP_PROJECT_ID` - Google Cloud Project ID for Pub/Sub - - `INSTALL_API`, `INSTALL_SENTINEL`, `INSTALL_ADAPTER` - Component selection - - See [.env.example](.env.example) for all available parameters. - -3. **Run the deployment:** - ```bash - ./deploy-clm.sh --action install - ``` - -**Configuration Priority:** -- Command-line flags override .env file values -- .env file values override script defaults -- This allows baseline config in `.env` with per-run overrides via flags - -## Command-Line Reference - -For basic usage, see [Quick Start](#quick-start) section above. - -### Basic Syntax - -```bash -./deploy-scripts/deploy-clm.sh --action [OPTIONS] -``` - -### Required Flags - -| Flag | Description | -|------|-------------| -| `--action ` | Action to perform: `install` or `uninstall` | - -### Optional Flags - -#### General Options - -| Flag | Description | Default | -|------|-------------|---------| -| `--namespace ` | Kubernetes namespace for deployment | `hyperfleet-e2e-$USER` | -| `--dry-run` | Print commands without executing | `false` | -| `--verbose` | Enable verbose logging | `false` | -| `--help` | Show help message | - | - -#### Component Selection - -| Flag | Description | -|------|-------------| -| `--skip-api` | Skip API component installation | -| `--skip-sentinel` | Skip Sentinel component installation | -| `--skip-adapter` | Skip Adapter component installation | - -#### Image Configuration - -| Flag | Description | Default | -|------|-------------|---------| -| `--image-registry ` | Image registry for all components | `registry.ci.openshift.org/ci` | -| `--api-image-repo ` | API image repository (without registry) | `hyperfleet-api` | -| `--api-image-tag ` | API image tag | `latest` | -| `--sentinel-image-repo ` | Sentinel image repository (without registry) | `hyperfleet-sentinel` | -| `--sentinel-image-tag ` | Sentinel image tag | `latest` | -| `--adapter-image-repo ` | Adapter image repository (without registry) | `hyperfleet-adapter` | -| `--adapter-image-tag ` | Adapter image tag | `latest` | - -**Notes**: -- Helm chart sources are fixed and pulled from the official component repositories at the `main` branch -- Final image path format: `${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG}` -- Example: `registry.ci.openshift.org/ci/hyperfleet-api:latest` - -## Examples - -### Installation Examples - -#### 1. Install with Default Settings - -```bash -./deploy-scripts/deploy-clm.sh --action install --namespace -``` - -This installs all three components (API, Sentinel, Adapter) with default configurations. - -#### 2. Install Only API and Sentinel - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --skip-adapter -``` - -#### 3. Install with Custom Image Tags - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --api-image-tag v1.2.0 \ - --sentinel-image-tag v1.2.0 \ - --adapter-image-tag v1.2.0 -``` - -#### 4. Install with Custom Image Repository - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --api-image-repo myregistry.io/hyperfleet-api \ - --api-image-tag pr-123 -``` - -#### 5. Dry-Run Installation (No Changes) - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --dry-run \ - --verbose -``` - -This simulates the installation without making any actual changes. - -### Uninstallation Examples - -#### 1. Uninstall All Components - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall --namespace -``` - -This removes all Helm releases. - -#### 2. Dry-Run Uninstallation - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall \ - --namespace \ - --dry-run \ - --verbose -``` - -#### 3. Uninstall Specific Components Only - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall \ - --namespace \ - --skip-api \ - --skip-sentinel -``` - -This only uninstalls the Adapter component. - -## Script Workflow - -### Installation Flow - -1. **Dependency Checks**: Validates that `kubectl`, `helm`, and `git` are available -2. **Context Validation**: Verifies kubectl context and cluster connectivity -3. **Chart Cloning**: Clones Helm charts from Git repositories -4. **Component Installation**: Installs components in order (API → Sentinel → Adapter) using `helm upgrade --install` with `--create-namespace` -5. **Pod Health Verification**: Verifies all pods are running and healthy (detects CrashLoopBackOff, ImagePullBackOff, etc.) -6. **Status Reporting**: Displays deployment status and usage instructions - -If any component fails health verification, the script automatically retrieves pod logs for troubleshooting and exits with an error status. - -### Uninstallation Flow - -1. **Dependency Checks**: Validates required tools -2. **Context Validation**: Verifies kubectl context -3. **User Confirmation**: Prompts for confirmation (unless `--dry-run`) -4. **Component Removal**: Uninstalls Helm releases in reverse order (Adapter → Sentinel → API) - this automatically removes all resources -5. **Cleanup**: Removes temporary working directories - -## Namespace Management - -The script leverages Helm's built-in namespace management: - -- **Installation**: Namespace is automatically created by Helm using the `--create-namespace` flag -- **Uninstallation**: Resources are removed by `helm uninstall`, but the namespace is **not deleted** -- **Uniqueness**: Each deployment requires a unique namespace to prevent GCP Pub/Sub resource collisions. - -If you want to completely remove the namespace after uninstallation: - -```bash -# Uninstall components -./deploy-scripts/deploy-clm.sh --action uninstall --namespace - -# Manually delete namespace if desired -kubectl delete namespace -``` - -This design allows you to: -- Reuse the same namespace for multiple install/uninstall cycles -- Keep other resources in the namespace that aren't managed by Helm -- Manually inspect resources after uninstallation for debugging - -## Troubleshooting - -### Debugging - -Use `--dry-run --verbose` flags to see what the script would do without making changes: - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --dry-run \ - --verbose -``` - -Check Helm deployment status: - -```bash -helm list -n -kubectl get pods -n -kubectl logs -n -``` - -View script execution with bash trace: - -```bash -bash -x deploy-scripts/deploy-clm.sh --action install --namespace -``` - -## Integration with E2E Tests - -### Pre-Test Setup - -Before running E2E tests, deploy the CLM components: - -```bash -# Deploy test environment -./deploy-scripts/deploy-clm.sh --action install --namespace - -# Configure E2E test API URL -EXTERNAL_IP=$(kubectl get svc hyperfleet-api -n $NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}') -export HYPERFLEET_API_URL="http://${EXTERNAL_IP}:8000" - -# Run E2E tests -./bin/hyperfleet-e2e test --label-filter=tier0 -``` - -### Post-Test Cleanup - -After tests complete: - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall --namespace -``` - -## Script Output - -The script provides structured log output with the following levels: - -- **[INFO]**: Informational messages -- **[SUCCESS]**: Successful operations -- **[WARNING]**: Warnings (non-critical) -- **[ERROR]**: Errors (critical failures) -- **[VERBOSE]**: Detailed debug information (when `--verbose` is enabled) - -## Best Practices - -1. **Use Dry-Run First**: Always test with `--dry-run` before actual deployment -2. **Namespace Isolation**: Use dedicated namespaces for different test environments -3. **Tag Specificity**: Use specific image tags instead of `latest` for reproducible deployments -4. **Cleanup**: Always cleanup test environments after use to save resources -5. **Verbose Logging**: Use `--verbose` when troubleshooting issues -6. **Version Alignment**: Deploy matching versions of all components together - diff --git a/deploy-scripts/deploy-clm.sh b/deploy-scripts/deploy-clm.sh deleted file mode 100755 index c338447..0000000 --- a/deploy-scripts/deploy-clm.sh +++ /dev/null @@ -1,602 +0,0 @@ -#!/usr/bin/env bash - -# deploy-clm.sh - Automated CLM Components Deployment Script -# -# This script automates the installation and uninstallation of HyperFleet CLM components -# (API, Sentinel, and Adapters) using Helm for E2E testing environments. -# -# Usage: -# ./deploy-clm.sh --action install -# ./deploy-clm.sh --action uninstall --dry-run --namespace -# -# Set NAMESPACE in .env file or use --namespace flag. -# Each deployment requires a unique namespace to prevent GCP Pub/Sub resource collisions. - -set -euo pipefail - -# ============================================================================ -# Working Directories (must be set before loading .env) -# ============================================================================ -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -WORK_DIR="${PROJECT_ROOT}/.deploy-work" -TESTDATA_DIR="${TESTDATA_DIR:-${PROJECT_ROOT}/testdata}" - -# ============================================================================ -# Load Environment Variables from .env file -# ============================================================================ -ENV_FILE="${SCRIPT_DIR}/.env" - -if [[ -f "${ENV_FILE}" ]]; then - set -a # automatically export all variables - source "${ENV_FILE}" - set +a -else - echo "[WARNING] .env file not found at ${ENV_FILE}" - echo "[WARNING] Using default configuration values" -fi - -# ============================================================================ -# Default Configuration (fallback if .env is not loaded) -# ============================================================================ - -ACTION="${ACTION:-}" -NAMESPACE="${NAMESPACE:-}" -DRY_RUN="${DRY_RUN:-false}" -VERBOSE="${VERBOSE:-false}" - -# Image Registry -IMAGE_REGISTRY="${IMAGE_REGISTRY:-registry.ci.openshift.org}" -IMAGE_PULL_POLICY="${IMAGE_PULL_POLICY:-Always}" - -# Provider Configuration -GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" - -# API Component -API_IMAGE_REPO="${API_IMAGE_REPO:-ci/hyperfleet-api}" -API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" -API_SERVICE_TYPE="${API_SERVICE_TYPE:-LoadBalancer}" -API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-}" -API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-}" - -# Adapter Test Data Configuration -ADAPTERS_FILE_DIR="${ADAPTERS_FILE_DIR:-${TESTDATA_DIR}/adapter-configs}" -CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-}" -NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-}" - -# Sentinel Component -SENTINEL_IMAGE_REPO="${SENTINEL_IMAGE_REPO:-ci/hyperfleet-sentinel}" -SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" -SENTINEL_BROKER_TYPE="${SENTINEL_BROKER_TYPE:-googlepubsub}" -SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" -SENTINEL_BROKER_RABBITMQ_URL="${SENTINEL_BROKER_RABBITMQ_URL:-}" - -# Adapter Component -ADAPTER_IMAGE_REPO="${ADAPTER_IMAGE_REPO:-ci/hyperfleet-adapter}" -ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" -ADAPTER_BROKER_TYPE="${ADAPTER_BROKER_TYPE:-googlepubsub}" -ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" -ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING:-true}" -ADAPTER_BROKER_RABBITMQ_URL="${ADAPTER_BROKER_RABBITMQ_URL:-}" - -# HyperFleet API Configuration -API_BASE_URL="${API_BASE_URL:-http://hyperfleet-api:8000}" - -# Helm Chart Sources -API_CHART_REPO="${API_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-api.git}" -API_CHART_REF="${API_CHART_REF:-main}" -API_CHART_PATH="${API_CHART_PATH:-charts}" - -SENTINEL_CHART_REPO="${SENTINEL_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-sentinel.git}" -SENTINEL_CHART_REF="${SENTINEL_CHART_REF:-main}" -SENTINEL_CHART_PATH="${SENTINEL_CHART_PATH:-charts}" - -ADAPTER_CHART_REPO="${ADAPTER_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-adapter.git}" -ADAPTER_CHART_REF="${ADAPTER_CHART_REF:-main}" -ADAPTER_CHART_PATH="${ADAPTER_CHART_PATH:-charts}" - -# Component flags -INSTALL_API="${INSTALL_API:-true}" -INSTALL_SENTINEL="${INSTALL_SENTINEL:-true}" -INSTALL_ADAPTER="${INSTALL_ADAPTER:-true}" - -# Uninstall options -DELETE_K8S_RESOURCES="${DELETE_K8S_RESOURCES:-false}" -DELETE_CLOUD_RESOURCES="${DELETE_CLOUD_RESOURCES:-false}" -DELETE_ALL="${DELETE_ALL:-false}" - -# Debug logging -DEBUG_LOG_DIR="${DEBUG_LOG_DIR:-${PROJECT_ROOT}/.debug-work}" - -# ============================================================================ -# Load Library Modules -# ============================================================================ - -source "${SCRIPT_DIR}/lib/common.sh" -source "${SCRIPT_DIR}/lib/helm.sh" -source "${SCRIPT_DIR}/lib/api.sh" -source "${SCRIPT_DIR}/lib/sentinel.sh" -source "${SCRIPT_DIR}/lib/adapter.sh" -source "${SCRIPT_DIR}/lib/gcp.sh" - -# ============================================================================ -# Usage and Argument Parsing -# ============================================================================ - -print_usage() { - cat << EOF -Usage: ${0##*/} --action [OPTIONS] - -Automated deployment script for HyperFleet CLM components (API, Sentinel, Adapter) - -CONFIGURATION: - This script loads configuration from ${SCRIPT_DIR}/.env file. - Command-line flags override .env values. - - NAMESPACE should be unique to prevent GCP Pub/Sub collisions. - Recommended: set NAMESPACE=hyperfleet-e2e-\$USER in .env file. - -REQUIRED FLAGS: - --action Action to perform: install or uninstall - -OPTIONAL FLAGS: - --namespace Kubernetes namespace (default from .env: ${NAMESPACE}) - - # Component Selection - --skip-api Skip API installation - --skip-sentinel Skip Sentinel installation - --skip-adapter Skip Adapter installation - - # Image Configuration - --image-registry Image registry (default: ${IMAGE_REGISTRY}) - --api-image-repo API image repository (default: ${API_IMAGE_REPO}) - --api-image-tag API image tag (default: ${API_IMAGE_TAG}) - --sentinel-image-repo Sentinel image repository (default: ${SENTINEL_IMAGE_REPO}) - --sentinel-image-tag Sentinel image tag (default: ${SENTINEL_IMAGE_TAG}) - --adapter-image-repo Adapter image repository (default: ${ADAPTER_IMAGE_REPO}) - --adapter-image-tag Adapter image tag (default: ${ADAPTER_IMAGE_TAG}) - - # API Configuration - --api-base-url HyperFleet API base URL for Sentinel and Adapter - (default: http://hyperfleet-api..svc.cluster.local:8000) - --api-adapters-cluster Comma-separated list of cluster adapters for API config (e.g., "cl-namespace,cl-job") - --api-adapters-nodepool Comma-separated list of nodepool adapters for API config (e.g., "np-configmap") - - # Adapter Deployment Configuration - --cluster-tier0-adapters Comma-separated list of cluster-level adapters to deploy (e.g., "cl-namespace,cl-job") - --nodepool-tier0-adapters Comma-separated list of nodepool-level adapters to deploy (e.g., "np-configmap") - --adapters-file-dir Base directory containing adapter test data folders (default: ${TESTDATA_DIR}/adapter-configs) - - # Uninstall Options (only for --action uninstall) - --delete-k8s-resources Delete Kubernetes resources (Helm releases + namespace) - --delete-cloud-resources Delete GCP Pub/Sub topics and subscriptions - --all Delete everything (k8s resources + cloud resources) - - # Execution Options - --dry-run Print commands without executing - --verbose Enable verbose logging - --debug-log-dir Directory to save debug logs on deployment failures - (default: ${WORK_DIR}/debug-logs) - --help Show this help message - -ENVIRONMENT VARIABLES: - All configuration can be set in the .env file located at: ${SCRIPT_DIR}/.env - - Common variables: - - NAMESPACE Kubernetes namespace - - IMAGE_REGISTRY Container image registry - - API_IMAGE_TAG API image tag - - SENTINEL_IMAGE_TAG Sentinel image tag - - ADAPTER_IMAGE_TAG Adapter image tag - - GCP_PROJECT_ID Google Cloud Project ID for Pub/Sub - - TESTDATA_DIR Base directory for test data (default: PROJECT_ROOT/testdata) - - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT Cluster-level adapters to deploy (comma-separated) - - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT NodePool-level adapters to deploy (comma-separated) - - ADAPTERS_FILE_DIR Base directory for adapter test data (default: TESTDATA_DIR/adapter-configs) - - API_ADAPTERS_CLUSTER Adapters for API cluster config (set per test case) - - API_ADAPTERS_NODEPOOL Adapters for API nodepool config (set per test case) - - RabbitMQ broker (must be provisioned externally before running this script): - - SENTINEL_BROKER_TYPE=rabbitmq Use RabbitMQ instead of Google Pub/Sub for Sentinel - - SENTINEL_BROKER_RABBITMQ_URL RabbitMQ AMQP URL for Sentinel (e.g., amqp://user:pass@host:5672/) - - ADAPTER_BROKER_TYPE=rabbitmq Use RabbitMQ instead of Google Pub/Sub for Adapters - - ADAPTER_BROKER_RABBITMQ_URL RabbitMQ AMQP URL for Adapters - -EXAMPLES: - # Install with .env defaults - ${0##*/} --action install - - # Install with explicit namespace - ${0##*/} --action install --namespace - - # Install with custom image tags - ${0##*/} --action install \\ - --namespace \\ - --api-image-tag v1.0.0 \\ - --sentinel-image-tag v1.0.0 \\ - --adapter-image-tag v1.0.0 - - # Install only API and Sentinel - ${0##*/} --action install --namespace --skip-adapter - - # Dry-run to preview actions - ${0##*/} --action uninstall --namespace --dry-run --verbose - - # Delete Kubernetes resources - ${0##*/} --action uninstall --namespace --delete-k8s-resources - - # Delete GCP Pub/Sub resources - ${0##*/} --action uninstall --namespace --delete-cloud-resources - - # Complete cleanup: delete everything - ${0##*/} --action uninstall --namespace --all - - # Install with custom image repository - ${0##*/} --action install \\ - --namespace \\ - --api-image-repo myregistry.io/hyperfleet-api \\ - --api-image-tag dev-123 - -EOF -} - -parse_arguments() { - if [[ $# -eq 0 ]]; then - print_usage - exit 1 - fi - - while [[ $# -gt 0 ]]; do - case "$1" in - --action) - ACTION="$2" - shift 2 - ;; - --namespace) - NAMESPACE="$2" - shift 2 - ;; - --skip-api) - INSTALL_API=false - shift - ;; - --skip-sentinel) - INSTALL_SENTINEL=false - shift - ;; - --skip-adapter) - INSTALL_ADAPTER=false - shift - ;; - --image-registry) - IMAGE_REGISTRY="$2" - shift 2 - ;; - --api-image-repo) - API_IMAGE_REPO="$2" - shift 2 - ;; - --api-image-tag) - API_IMAGE_TAG="$2" - shift 2 - ;; - --sentinel-image-repo) - SENTINEL_IMAGE_REPO="$2" - shift 2 - ;; - --sentinel-image-tag) - SENTINEL_IMAGE_TAG="$2" - shift 2 - ;; - --adapter-image-repo) - ADAPTER_IMAGE_REPO="$2" - shift 2 - ;; - --adapter-image-tag) - ADAPTER_IMAGE_TAG="$2" - shift 2 - ;; - --api-base-url) - API_BASE_URL="$2" - shift 2 - ;; - --api-adapters-cluster) - API_ADAPTERS_CLUSTER="$2" - shift 2 - ;; - --api-adapters-nodepool) - API_ADAPTERS_NODEPOOL="$2" - shift 2 - ;; - --cluster-tier0-adapters) - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="$2" - shift 2 - ;; - --nodepool-tier0-adapters) - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="$2" - shift 2 - ;; - --adapters-file-dir) - ADAPTERS_FILE_DIR="$2" - shift 2 - ;; - --delete-k8s-resources) - DELETE_K8S_RESOURCES=true - shift - ;; - --delete-cloud-resources) - DELETE_CLOUD_RESOURCES=true - shift - ;; - --all) - DELETE_ALL=true - DELETE_K8S_RESOURCES=true - DELETE_CLOUD_RESOURCES=true - shift - ;; - --dry-run) - DRY_RUN=true - shift - ;; - --verbose) - VERBOSE=true - shift - ;; - --debug-log-dir) - DEBUG_LOG_DIR="$2" - shift 2 - ;; - --help|-h) - print_usage - exit 0 - ;; - *) - log_error "Unknown option: $1" - echo - print_usage - exit 1 - ;; - esac - done - - # Validate required arguments - if [[ -z "${ACTION}" ]]; then - log_error "Missing required flag: --action" - echo - print_usage - exit 1 - fi - - # Validate NAMESPACE variable is set - if [[ -z "${NAMESPACE}" ]]; then - log_error "Missing required flag: --namespace or env variable NAMESPACE" - echo - print_usage - exit 1 - fi - - if (( ${#NAMESPACE} > 63 )) || [[ ! "${NAMESPACE}" =~ ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ ]]; then - log_error "Invalid namespace: '${NAMESPACE}'. Must match DNS-1123 label format and be <= 63 chars." - exit 1 - fi - - if [[ "${ACTION}" != "install" && "${ACTION}" != "uninstall" ]]; then - log_error "Invalid action: ${ACTION}. Must be 'install' or 'uninstall'" - exit 1 - fi - - # Validate at least one component is selected - if [[ "${INSTALL_API}" == "false" && "${INSTALL_SENTINEL}" == "false" && "${INSTALL_ADAPTER}" == "false" ]]; then - log_error "At least one component must be selected for installation" - exit 1 - fi -} - -# ============================================================================ -# Main Installation Flow -# ============================================================================ - -perform_install() { - log_section "Starting CLM Components Installation" - - # Validate environment - check_dependencies || exit 1 - validate_kubectl_context || exit 1 - - # Prepare working directory - log_section "Preparing Working Directory" - mkdir -p "${WORK_DIR}" - log_verbose "Work directory: ${WORK_DIR}" - - # Clone Helm charts - log_section "Cloning Helm Charts" - - if [[ "${INSTALL_API}" == "true" ]]; then - clone_helm_chart "api" "${API_CHART_REPO}" "${API_CHART_REF}" "${API_CHART_PATH}" || exit 1 - fi - - if [[ "${INSTALL_SENTINEL}" == "true" ]]; then - clone_helm_chart "sentinel" "${SENTINEL_CHART_REPO}" "${SENTINEL_CHART_REF}" "${SENTINEL_CHART_PATH}" || exit 1 - fi - - if [[ "${INSTALL_ADAPTER}" == "true" ]]; then - clone_helm_chart "adapter" "${ADAPTER_CHART_REPO}" "${ADAPTER_CHART_REF}" "${ADAPTER_CHART_PATH}" || exit 1 - fi - - # Install components in order: API -> Sentinel -> Adapter - if [[ "${INSTALL_API}" == "true" ]]; then - install_api || exit 1 - fi - - if [[ "${INSTALL_SENTINEL}" == "true" ]]; then - install_sentinel || exit 1 - fi - - if [[ "${INSTALL_ADAPTER}" == "true" ]]; then - install_adapters || { - log_error "Adapter installation failed" - log_section "Installation Failed" - exit 1 - } - fi - - # Final status - log_section "Installation Complete" - - if [[ "${DRY_RUN}" == "false" ]]; then - log_info "Deployed components:" - helm list -n "${NAMESPACE}" - - echo - log_info "Pod status:" - kubectl get pods -n "${NAMESPACE}" - - echo - log_success "All components installed successfully!" - log_info "Namespace: ${NAMESPACE}" - log_info "To view logs: kubectl logs -n ${NAMESPACE} -l app.kubernetes.io/name=" - - # Display API external IP if available - if [[ "${INSTALL_API}" == "true" ]]; then - local external_ip - external_ip=$(kubectl get svc "hyperfleet-api" -n "${NAMESPACE}" -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null) - if [[ -n "${external_ip}" ]]; then - echo - log_info "HyperFleet API External IP: ${external_ip}" - log_info "API URL: http://${external_ip}:8000" - fi - fi - else - log_info "[DRY-RUN] Installation simulation complete" - fi - - # Clean up work directory - if [[ "${DRY_RUN}" == "false" && "${VERBOSE}" == "false" ]]; then - log_verbose "Cleaning up work directory" - rm -rf "${WORK_DIR}" - fi -} - -# ============================================================================ -# Main Uninstallation Flow -# ============================================================================ - -perform_uninstall() { - log_section "Starting CLM Components Uninstallation" - - # Validate environment - check_dependencies || exit 1 - validate_kubectl_context || exit 1 - - # Display uninstall configuration - log_info "Uninstall Configuration:" - log_info " Delete K8s Resources (including namespace): ${DELETE_K8S_RESOURCES}" - log_info " Delete Cloud Resources: ${DELETE_CLOUD_RESOURCES}" - - local uninstall_errors=0 - - # Uninstall Kubernetes resources (in reverse order: Adapter -> Sentinel -> API) - if [[ "${DELETE_K8S_RESOURCES}" == "true" ]]; then - log_section "Uninstalling Kubernetes Resources" - - if [[ "${INSTALL_ADAPTER}" == "true" ]]; then - if ! uninstall_adapters; then - ((uninstall_errors++)) - fi - fi - - if [[ "${INSTALL_SENTINEL}" == "true" ]]; then - if ! uninstall_sentinel; then - ((uninstall_errors++)) - fi - fi - - if [[ "${INSTALL_API}" == "true" ]]; then - if ! uninstall_api; then - log_warning "Failed to uninstall API" - ((uninstall_errors++)) - fi - fi - - # Delete namespace (this will remove any remaining k8s resources) - if ! delete_namespace "${NAMESPACE}"; then - log_warning "Failed to delete namespace" - ((uninstall_errors++)) - fi - else - log_info "Skipping Kubernetes resource deletion (use --delete-k8s-resources to enable)" - fi - - # Delete GCP resources (topics and subscriptions) - if [[ "${DELETE_CLOUD_RESOURCES}" == "true" ]]; then - log_section "Deleting Cloud Provider Resources" - if ! cleanup_gcp_resources "${NAMESPACE}"; then - log_warning "Some GCP resources failed to delete" - ((uninstall_errors++)) - fi - else - log_info "Skipping cloud resource deletion (use --delete-cloud-resources to enable)" - fi - - # Final status - log_section "Uninstallation Complete" - - if [[ "${DRY_RUN}" == "false" ]]; then - # Show summary of what was deleted - echo - log_info "Summary:" - [[ "${DELETE_K8S_RESOURCES}" == "true" ]] && log_info " ✓ K8s resources and namespace" - [[ "${DELETE_CLOUD_RESOURCES}" == "true" ]] && log_info " ✓ Cloud resources" - - echo - if [[ ${uninstall_errors} -eq 0 ]]; then - log_success "Uninstallation completed successfully!" - else - log_error "Uninstallation completed with ${uninstall_errors} error(s)" - log_error "Please check the logs above for details" - exit 1 - fi - else - log_info "[DRY-RUN] Uninstallation simulation complete" - fi - - # Clean up work directory - if [[ -d "${WORK_DIR}" ]]; then - log_verbose "Cleaning up work directory" - rm -rf "${WORK_DIR}" - fi -} - -# ============================================================================ -# Main Entry Point -# ============================================================================ - -main() { - parse_arguments "$@" - - log_section "CLM Components Deployment Script" - log_info "Action: ${ACTION}" - log_info "Namespace: ${NAMESPACE}" - log_info "Dry-run: ${DRY_RUN}" - log_info "Verbose: ${VERBOSE}" - - if [[ "${VERBOSE}" == "true" ]]; then - echo - log_verbose "Component Configuration:" - log_verbose " API: ${INSTALL_API} (${API_IMAGE_REPO}:${API_IMAGE_TAG})" - log_verbose " Sentinel: ${INSTALL_SENTINEL} (${SENTINEL_IMAGE_REPO}:${SENTINEL_IMAGE_TAG})" - log_verbose " Adapter: ${INSTALL_ADAPTER} (${ADAPTER_IMAGE_REPO}:${ADAPTER_IMAGE_TAG})" - fi - - case "${ACTION}" in - install) - perform_install - ;; - uninstall) - perform_uninstall - ;; - esac -} - -# Run main function -main "$@" diff --git a/deploy-scripts/kind-build-images.sh b/deploy-scripts/kind-build-images.sh deleted file mode 100755 index 467c442..0000000 --- a/deploy-scripts/kind-build-images.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bash -# -# kind-build-images.sh — Build and load HyperFleet images into kind -# -# Builds component images from local repos under PROJECTS_DIR and loads -# them into the kind cluster. No args = build all. Named args = build -# only those (use full repo names). -# -# Usage: -# ./deploy-scripts/kind-build-images.sh # Build all -# ./deploy-scripts/kind-build-images.sh hyperfleet-adapter # Build one -# ./deploy-scripts/kind-build-images.sh --no-cache # Force rebuild -# -# Env vars: -# PROJECTS_DIR Parent dir containing component repos (default: ~/projects) -# KIND_CLUSTER Kind cluster name (default: kind) - -set -euo pipefail - -PROJECTS_DIR="${PROJECTS_DIR:-${HOME}/projects}" -CI_REGISTRY="registry.ci.openshift.org/ci" -KIND_CLUSTER="${KIND_CLUSTER:-kind}" -CONTAINER_TOOL="${CONTAINER_TOOL:-$(command -v podman 2>/dev/null || command -v docker 2>/dev/null || true)}" -if [[ -z "${CONTAINER_TOOL}" ]]; then - echo "[ERROR] No container tool found (podman or docker). Install one or set CONTAINER_TOOL." - exit 1 -fi -NO_CACHE="" - -# The three platform components — each maps 1:1 to a Docker image. -# Adapter configs in testdata/ all share the same hyperfleet-adapter image. -COMPONENTS=("hyperfleet-api" "hyperfleet-sentinel" "hyperfleet-adapter") - -# ============================================================================ -# Parse args -# ============================================================================ - -TARGETS=() - -while [[ $# -gt 0 ]]; do - case "$1" in - --no-cache) NO_CACHE="--no-cache"; shift ;; - -h|--help) - echo "Usage: $0 [--no-cache] [COMPONENT...]" - echo "" - echo "Builds and loads HyperFleet images into kind from local repos." - echo "No args = build all. Named args = build only those." - echo "" - echo "Components: ${COMPONENTS[*]}" - echo "" - echo "Env: PROJECTS_DIR=${PROJECTS_DIR} KIND_CLUSTER=${KIND_CLUSTER} CONTAINER_TOOL=${CONTAINER_TOOL}" - exit 0 - ;; - -*) echo "Unknown option: $1"; exit 1 ;; - *) TARGETS+=("$1"); shift ;; - esac -done - -# Default: build all components -if [[ ${#TARGETS[@]} -eq 0 ]]; then - TARGETS=("${COMPONENTS[@]}") -fi - -# ============================================================================ -# Build and load -# ============================================================================ - -echo "=== Building HyperFleet images (cluster: ${KIND_CLUSTER}) ===" - -for name in "${TARGETS[@]}"; do - dir="${PROJECTS_DIR}/${name}" - - if [[ ! -d "${dir}" ]]; then - echo "[ERROR] ${name} not found at ${dir}" - echo " Clone it: git clone https://github.com/openshift-hyperfleet/${name}.git ${dir}" - echo " Or set PROJECTS_DIR to the parent directory containing your repos." - exit 1 - fi - - echo "[BUILD] ${name}..." - "${CONTAINER_TOOL}" build ${NO_CACHE} -t "${CI_REGISTRY}/${name}:latest" "${dir}" - - echo "[LOAD] ${name} -> kind..." - if [[ "$(basename "${CONTAINER_TOOL}")" == "podman" ]]; then - "${CONTAINER_TOOL}" save "${CI_REGISTRY}/${name}:latest" | kind load image-archive /dev/stdin --name "${KIND_CLUSTER}" - else - kind load docker-image "${CI_REGISTRY}/${name}:latest" --name "${KIND_CLUSTER}" - fi - echo "" -done - -echo "=== Done ===" diff --git a/deploy-scripts/kind-local.sh b/deploy-scripts/kind-local.sh deleted file mode 100755 index d6ad913..0000000 --- a/deploy-scripts/kind-local.sh +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env bash -# -# kind-local.sh — Manage a local kind cluster for E2E testing -# -# Wraps kind, helm, and deploy-clm.sh into a single workflow. -# Sources deploy-scripts/.env for all config (copy from .env.example). -# -# Usage: -# ./deploy-scripts/kind-local.sh up # Full setup -# ./deploy-scripts/kind-local.sh setup # Cluster + RabbitMQ + Maestro + images -# ./deploy-scripts/kind-local.sh deploy # Deploy components -# ./deploy-scripts/kind-local.sh port-forward # Forward API + Maestro -# ./deploy-scripts/kind-local.sh rebuild [component] # Rebuild image + restart -# ./deploy-scripts/kind-local.sh rebuild --no-cache [comp] # Force rebuild -# ./deploy-scripts/kind-local.sh down # Tear down -# -# See docs/local-kind-setup.md for the full guide. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" - -# ============================================================================ -# Configuration — sourced from .env, with local-only defaults below -# ============================================================================ - -# shellcheck source=.env -[[ -f "${SCRIPT_DIR}/.env" ]] && source "${SCRIPT_DIR}/.env" - -# Local-only defaults (not in .env unless user added them) -KIND_CLUSTER="${KIND_CLUSTER:-kind}" -KIND_CONTEXT="kind-${KIND_CLUSTER}" -INFRA_DIR="${INFRA_DIR:-${HOME}/projects/hyperfleet-infra}" -PROJECTS_DIR="${PROJECTS_DIR:-${HOME}/projects}" -MAESTRO_NS="${MAESTRO_NS:-maestro}" -MAESTRO_CONSUMER="${MAESTRO_CONSUMER:-cluster1}" -MAESTRO_LOCAL_PORT="${MAESTRO_LOCAL_PORT:-8100}" -RABBITMQ_URL="${RABBITMQ_URL:-amqp://guest:guest@rabbitmq:5672}" - -# Override .env defaults for local kind -NAMESPACE="${NAMESPACE:-hyperfleet-local}" -IMAGE_PULL_POLICY="IfNotPresent" -API_SERVICE_TYPE="ClusterIP" - -# Map .env adapter names -CLUSTER_ADAPTERS="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-cl-namespace,cl-job,cl-deployment,cl-maestro}" -NODEPOOL_ADAPTERS="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-np-configmap}" - -# ============================================================================ -# Helpers -# ============================================================================ - -require_kind_context() { - if ! kubectl config get-contexts "${KIND_CONTEXT}" &>/dev/null; then - echo "ERROR: kind context ${KIND_CONTEXT} not found. Run: kind create cluster --name ${KIND_CLUSTER}" - exit 1 - fi - local current - current="$(kubectl config current-context 2>/dev/null || true)" - if [[ "${current}" != "${KIND_CONTEXT}" ]]; then - echo "Switching to kind context: ${KIND_CONTEXT}" - kubectl config use-context "${KIND_CONTEXT}" - fi -} - -kill_port_forwards() { - pkill -f "kubectl.*port-forward.*hyperfleet-api" 2>/dev/null || true - pkill -f "kubectl.*port-forward.*maestro" 2>/dev/null || true - sleep 1 -} - -# ============================================================================ -# Commands -# ============================================================================ - -cmd_setup() { - echo "=== Creating kind cluster ===" - kind get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER}$" || kind create cluster --name "${KIND_CLUSTER}" - kubectl create namespace "${NAMESPACE}" --dry-run=client -o yaml | kubectl --context "${KIND_CONTEXT}" apply -f - - - echo "=== Installing RabbitMQ ===" - kubectl --context "${KIND_CONTEXT}" apply -f "${INFRA_DIR}/manifests/rabbitmq.yaml" --namespace "${NAMESPACE}" - echo "Waiting for RabbitMQ..." - local retries=60 - until kubectl --context "${KIND_CONTEXT}" get pod -l app=rabbitmq -n "${NAMESPACE}" --no-headers 2>/dev/null | grep -q .; do - ((retries--)) || { echo "ERROR: Timed out waiting for RabbitMQ pod"; exit 1; } - sleep 2 - done - kubectl --context "${KIND_CONTEXT}" wait --for=condition=ready pod -l app=rabbitmq --namespace "${NAMESPACE}" --timeout=120s - - echo "=== Installing Maestro ===" - make -C "${INFRA_DIR}" install-maestro NAMESPACE="${MAESTRO_NS}" KUBECONFIG="${HOME}/.kube/config" - make -C "${INFRA_DIR}" create-maestro-consumer MAESTRO_CONSUMER="${MAESTRO_CONSUMER}" NAMESPACE="${MAESTRO_NS}" KUBECONFIG="${HOME}/.kube/config" - - echo "=== Building images ===" - "${SCRIPT_DIR}/kind-build-images.sh" "$@" -} - -cmd_deploy() { - require_kind_context - - echo "=== Deploying API + Sentinels + Adapters ===" - SENTINEL_BROKER_RABBITMQ_URL="${RABBITMQ_URL}" \ - ADAPTER_BROKER_RABBITMQ_URL="${RABBITMQ_URL}" \ - ADAPTER_BROKER_TYPE=rabbitmq \ - SENTINEL_BROKER_TYPE=rabbitmq \ - IMAGE_PULL_POLICY="${IMAGE_PULL_POLICY}" \ - NAMESPACE="${NAMESPACE}" \ - API_SERVICE_TYPE="${API_SERVICE_TYPE}" \ - API_ADAPTERS_CLUSTER="${CLUSTER_ADAPTERS}" \ - API_ADAPTERS_NODEPOOL="${NODEPOOL_ADAPTERS}" \ - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_ADAPTERS}" \ - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_ADAPTERS}" \ - "${SCRIPT_DIR}/deploy-clm.sh" --action install -} - -cmd_down() { - require_kind_context - - kill_port_forwards - - NAMESPACE="${NAMESPACE}" \ - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_ADAPTERS}" \ - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_ADAPTERS}" \ - "${SCRIPT_DIR}/deploy-clm.sh" --action uninstall --delete-k8s-resources -} - -cmd_port_forward() { - kill_port_forwards - - kubectl --context "${KIND_CONTEXT}" port-forward -n "${NAMESPACE}" svc/hyperfleet-api 8000:8000 & - kubectl --context "${KIND_CONTEXT}" port-forward -n "${MAESTRO_NS}" svc/maestro "${MAESTRO_LOCAL_PORT}":8000 & - - local api_ready=false - for _ in $(seq 1 10); do - sleep 2 - if curl -sf http://localhost:8000/api/hyperfleet/v1/clusters > /dev/null 2>&1; then - api_ready=true - break - fi - done - if [[ "${api_ready}" == true ]]; then - echo "API ready at http://localhost:8000" - else - echo "ERROR: API not reachable at localhost:8000" - exit 1 - fi - if curl -sf "http://localhost:${MAESTRO_LOCAL_PORT}/api/maestro/v1/consumers" > /dev/null 2>&1; then - echo "Maestro ready at http://localhost:${MAESTRO_LOCAL_PORT}" - else - echo "WARNING: Maestro not reachable at localhost:${MAESTRO_LOCAL_PORT}" - fi -} - -# rebuild — Rebuild image(s), load into kind, restart affected deployments. -# Args forwarded to kind-build-images.sh (component names, --no-cache). -cmd_rebuild() { - require_kind_context - - "${SCRIPT_DIR}/kind-build-images.sh" "$@" - - # Figure out what to restart based on args (skip --no-cache flag) - local components=() - for arg in "$@"; do - [[ "${arg}" == --* ]] && continue - components+=("${arg}") - done - - if [[ ${#components[@]} -eq 0 ]]; then - echo "=== Restarting all deployments (excluding postgresql) ===" - local deploys - deploys=$(kubectl --context "${KIND_CONTEXT}" get deployments -n "${NAMESPACE}" -o name \ - | grep -v postgresql) - echo "${deploys}" | xargs kubectl --context "${KIND_CONTEXT}" rollout restart -n "${NAMESPACE}" - echo "${deploys}" | xargs -I{} kubectl --context "${KIND_CONTEXT}" rollout status {} -n "${NAMESPACE}" --timeout=120s - else - for comp in "${components[@]}"; do - echo "=== Restarting ${comp} ===" - kubectl --context "${KIND_CONTEXT}" rollout restart deployment \ - -n "${NAMESPACE}" -l "app.kubernetes.io/name=${comp},app.kubernetes.io/component!=postgresql" - kubectl --context "${KIND_CONTEXT}" rollout status deployment \ - -n "${NAMESPACE}" -l "app.kubernetes.io/name=${comp}" --timeout=120s - done - fi - - echo "=== Re-establishing port-forwards ===" - cmd_port_forward -} - -cmd_up() { - cmd_setup "$@" - cmd_deploy - cmd_port_forward -} - -# ============================================================================ -# Entrypoint -# ============================================================================ - -case "${1:-}" in - up) shift; cmd_up "$@" ;; - setup) shift; cmd_setup "$@" ;; - deploy) cmd_deploy ;; - down|undeploy) cmd_down ;; - port-forward) cmd_port_forward ;; - rebuild) shift; cmd_rebuild "$@" ;; - *) - echo "Usage: $0 {up|setup|deploy|down|port-forward|rebuild}" - echo "" - echo " up [COMPONENTS...] Full setup from scratch" - echo " setup [COMPONENTS...] Cluster + RabbitMQ + Maestro + build images" - echo " deploy Deploy API + sentinels + adapters" - echo " down Remove all + kill port-forwards" - echo " port-forward Forward API (:8000) + Maestro (:${MAESTRO_LOCAL_PORT})" - echo " rebuild [--no-cache] [COMP...] Rebuild image(s) + restart + port-forward" - echo "" - echo " COMPONENTS: e.g. 'hyperfleet-adapter' (default: all)" - echo " Config: deploy-scripts/.env (copy from .env.example)" - exit 1 - ;; -esac diff --git a/deploy-scripts/lib/adapter.sh b/deploy-scripts/lib/adapter.sh deleted file mode 100755 index 6bd092d..0000000 --- a/deploy-scripts/lib/adapter.sh +++ /dev/null @@ -1,384 +0,0 @@ -#!/usr/bin/env bash - -# adapter.sh - HyperFleet Adapter component deployment functions -# -# This module handles discovery, installation, and uninstallation of adapters -# from the ${ADAPTERS_FILE_DIR} directory (defaults to ${TESTDATA_DIR}/adapter-configs) - -# ============================================================================ -# Adapter Discovery Functions -# ============================================================================ - -discover_adapters() { - # Use ADAPTERS_FILE_DIR env var, fallback to default - local adapter_configs_dir="${ADAPTERS_FILE_DIR:-${TESTDATA_DIR}/adapter-configs}" - - if [[ ! -d "${adapter_configs_dir}" ]]; then - log_verbose "Adapter configs directory not found: ${adapter_configs_dir}" >&2 - return 1 - fi - - # Read adapter names from environment variables - local cluster_adapters="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-}" - local nodepool_adapters="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-}" - - if [[ -z "${cluster_adapters}" && -z "${nodepool_adapters}" ]]; then - log_error "No adapters specified. Set CLUSTER_TIER0_ADAPTERS_DEPLOYMENT and/or NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT" >&2 - return 1 - fi - - # Build list of adapter directories from environment variables - local adapter_dirs=() - - # Add cluster adapters - if [[ -n "${cluster_adapters}" ]]; then - IFS=',' read -ra cluster_adapter_array <<<"${cluster_adapters}" - for adapter_name in "${cluster_adapter_array[@]}"; do - # Trim whitespace - adapter_name=$(echo "${adapter_name}" | xargs) - # Validate adapter name is not empty (prevents issues from trailing commas) - if [[ -z "${adapter_name}" ]]; then - log_error "Empty adapter name in CLUSTER_TIER0_ADAPTERS_DEPLOYMENT (check for trailing commas)" >&2 - return 1 - fi - if [[ -d "${adapter_configs_dir}/${adapter_name}" ]]; then - adapter_dirs+=("clusters|${adapter_name}") - else - log_error "Cluster adapter directory not found: ${adapter_configs_dir}/${adapter_name}" >&2 - return 1 - fi - done - fi - - # Add nodepool adapters - if [[ -n "${nodepool_adapters}" ]]; then - IFS=',' read -ra nodepool_adapter_array <<<"${nodepool_adapters}" - for adapter_name in "${nodepool_adapter_array[@]}"; do - # Trim whitespace - adapter_name=$(echo "${adapter_name}" | xargs) - # Validate adapter name is not empty (prevents issues from trailing commas) - if [[ -z "${adapter_name}" ]]; then - log_error "Empty adapter name in NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT (check for trailing commas)" >&2 - return 1 - fi - if [[ -d "${adapter_configs_dir}/${adapter_name}" ]]; then - adapter_dirs+=("nodepools|${adapter_name}") - else - log_error "NodePool adapter directory not found: ${adapter_configs_dir}/${adapter_name}" >&2 - return 1 - fi - done - fi - - if [[ ${#adapter_dirs[@]} -eq 0 ]]; then - log_verbose "No adapter configurations found" >&2 - return 1 - fi - - log_info "Found ${#adapter_dirs[@]} adapter(s) to deploy:" >&2 - for dir in "${adapter_dirs[@]}"; do - log_info " - ${dir}" >&2 - done - - # Export for use in other functions - # Format: resource_type|adapter_name (e.g., "clusters|cl-namespace") - printf '%s\n' "${adapter_dirs[@]}" -} - -# ============================================================================ -# Adapter Installation Functions -# ============================================================================ - -install_adapter_instance() { - local dir_name="$1" - - log_section "Installing Adapter: ${dir_name}" - - # Extract resource_type and adapter_name from format: resource_type|adapter_name - local resource_type="${dir_name%%|*}" - local adapter_name="${dir_name##*|}" - - # Validate the descriptor format and ensure both parts are non-empty - if [[ -z "${resource_type}" || -z "${adapter_name}" || "${dir_name}" != *"|"* ]]; then - log_error "Invalid adapter descriptor '${dir_name}'. Expected format: resource_type|adapter_name" - return 1 - fi - - log_info "Resource type: ${resource_type}" - log_info "Adapter name: ${adapter_name}" - - # Temporary workaround for installation - HYPERFLEET-1097 - local release_name="${adapter_name}" - - log_info "Release name: ${release_name} (length: ${#release_name})" - - # Source adapter config directory (using ADAPTERS_FILE_DIR env var) - local adapter_configs_dir="${ADAPTERS_FILE_DIR:-${TESTDATA_DIR}/adapter-configs}" - local source_adapter_dir="${adapter_configs_dir}/${adapter_name}" - - if [[ ! -d "${source_adapter_dir}" ]]; then - log_error "Adapter config directory not found: ${source_adapter_dir}" - return 1 - fi - - # Chart path - local full_chart_path="${WORK_DIR}/adapter/${ADAPTER_CHART_PATH}" - - # Copy adapter config folder to chart directory - local dest_adapter_dir="${full_chart_path}/${adapter_name}" - log_info "Copying adapter config from ${source_adapter_dir} to ${dest_adapter_dir}" - - if [[ -d "${dest_adapter_dir}" ]]; then - # Safety check: ensure dest_adapter_dir contains adapter_name to prevent accidental deletion - if [[ "${dest_adapter_dir}" != *"${adapter_name}" || "${dest_adapter_dir}" == "/" || "${dest_adapter_dir}" == "${full_chart_path}" ]]; then - log_error "Safety check failed: refusing to delete suspicious path: ${dest_adapter_dir}" - return 1 - fi - log_verbose "Removing existing adapter config directory: ${dest_adapter_dir}" - rm -rf "${dest_adapter_dir}" - fi - - cp -r "${source_adapter_dir}" "${dest_adapter_dir}" - - # Patch imagePullPolicy in task resource YAMLs (what adapters create at runtime, e.g. Jobs/Deployments); - # helm --set image.pullPolicy only controls the adapter pod itself — these are separate concerns - if [[ "${IMAGE_PULL_POLICY}" != "Always" ]]; then - log_verbose "Patching imagePullPolicy to ${IMAGE_PULL_POLICY} in adapter resource manifests" - find "${dest_adapter_dir}" -name '*.yaml' -exec \ - sed -i.bak "s/imagePullPolicy: Always/imagePullPolicy: ${IMAGE_PULL_POLICY}/g" {} + - find "${dest_adapter_dir}" -name '*.bak' -delete 2>/dev/null - fi - - # Values file path (now in the chart directory) - local values_file="${dest_adapter_dir}/values.yaml" - if [[ ! -f "${values_file}" ]]; then - log_error "Values file not found: ${values_file}" - return 1 - fi - - # Construct subscription ID and topic names - # Allow override from environment variables, otherwise use auto-generated defaults - local subscription_id="${ADAPTER_SUBSCRIPTION_ID:-${NAMESPACE}-${resource_type}-${adapter_name}}" - local topic="${ADAPTER_TOPIC:-${NAMESPACE}-${resource_type}}" - local dead_letter_topic="${ADAPTER_DEAD_LETTER_TOPIC:-${NAMESPACE}-${resource_type}-dlq}" - - if [[ "${ADAPTER_BROKER_TYPE}" == "rabbitmq" && -z "${ADAPTER_BROKER_RABBITMQ_URL}" ]]; then - log_error "ADAPTER_BROKER_RABBITMQ_URL must be set when ADAPTER_BROKER_TYPE=rabbitmq" - return 1 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would install adapter with:" - log_info " Release name: ${release_name}" - log_info " Namespace: ${NAMESPACE}" - log_info " Chart path: ${full_chart_path}" - log_info " Values file: ${values_file}" - log_info " Image: ${IMAGE_REGISTRY}/${ADAPTER_IMAGE_REPO}:${ADAPTER_IMAGE_TAG}" - log_info " Subscription ID: ${subscription_id}" - log_info " Topic: ${topic}" - log_info " Dead Letter Topic: ${dead_letter_topic}" - return 0 - fi - - # Build helm command with labels to track adapter metadata - local helm_cmd=( - helm upgrade --install - "${release_name}" - "${full_chart_path}" - --namespace "${NAMESPACE}" - --create-namespace - --wait - --timeout 5m - -f "${values_file}" - --set "fullnameOverride=${release_name}" - --set "image.registry=${IMAGE_REGISTRY}" - --set "image.repository=${ADAPTER_IMAGE_REPO}" - --set "image.tag=${ADAPTER_IMAGE_TAG}" - --set "image.pullPolicy=${IMAGE_PULL_POLICY}" - --set "broker.type=${ADAPTER_BROKER_TYPE}" - --set "broker.googlepubsub.projectId=${GCP_PROJECT_ID}" - --set "broker.googlepubsub.createTopicIfMissing=${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING}" - --set "broker.googlepubsub.createSubscriptionIfMissing=${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING}" - --set "broker.googlepubsub.subscriptionId=${subscription_id}" - --set "broker.googlepubsub.topic=${topic}" - --set "broker.googlepubsub.deadLetterTopic=${dead_letter_topic}" - --labels "adapter-resource-type=${resource_type},adapter-name=${adapter_name}" - ) - - if [[ "${ADAPTER_BROKER_TYPE}" == "rabbitmq" && -n "${ADAPTER_BROKER_RABBITMQ_URL}" ]]; then - local rabbitmq_queue="${ADAPTER_RABBITMQ_QUEUE:-${subscription_id}}" - local rabbitmq_exchange="${ADAPTER_RABBITMQ_EXCHANGE:-${topic}}" - local rabbitmq_routing_key="${ADAPTER_RABBITMQ_ROUTING_KEY:-#}" - helm_cmd+=( - --set "broker.rabbitmq.url=${ADAPTER_BROKER_RABBITMQ_URL}" - --set "broker.rabbitmq.queue=${rabbitmq_queue}" - --set "broker.rabbitmq.exchange=${rabbitmq_exchange}" - --set "broker.rabbitmq.routingKey=${rabbitmq_routing_key}" - ) - fi - - log_info "Executing Helm command:" - log_info "${helm_cmd[*]}" - echo - - if "${helm_cmd[@]}"; then - log_success "Adapter ${adapter_name} for ${resource_type} Helm release created successfully" - - # Verify pod health - log_info "Verifying pod health..." - if verify_pod_health "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${adapter_name}" 120 5; then - log_success "Adapter ${adapter_name} for ${resource_type} is running and healthy" - else - log_error "Adapter ${adapter_name} for ${resource_type} deployment failed health check" - - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - # Cleanup failed deployment - log_warning "Cleaning up failed adapter deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed adapter deployment cleaned up successfully" - else - log_warning "Failed to cleanup adapter deployment, it may need manual cleanup" - fi - return 1 - fi - else - log_error "Failed to install adapter ${adapter_name} for ${resource_type}" - - # Check if release was created (partial deployment) and cleanup - if helm list -n "${NAMESPACE}" 2>/dev/null | grep -q "^${release_name}"; then - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - log_warning "Cleaning up failed adapter deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed adapter deployment cleaned up successfully" - else - log_warning "Failed to cleanup adapter deployment, it may need manual cleanup" - fi - fi - return 1 - fi -} - -install_adapters() { - log_section "Deploying All Adapters" - - # Discover adapters - local adapters - if ! adapters=$(discover_adapters); then - log_warning "No adapters found to deploy" - return 0 - fi - - # Install each adapter - local failed=0 - while IFS= read -r adapter_dir; do - if ! install_adapter_instance "${adapter_dir}"; then - log_warning "Failed to install adapter: ${adapter_dir}" - ((failed++)) - fi - done <<<"${adapters}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} adapter(s) failed to install" - return 1 - else - log_success "All adapters deployed successfully" - fi -} - -# ============================================================================ -# Adapter Uninstallation Functions -# ============================================================================ - -uninstall_adapter_instance() { - local dir_name="$1" - - log_section "Uninstalling Adapter: ${dir_name}" - - # Extract resource_type and adapter_name from format: resource_type|adapter_name - local resource_type="${dir_name%%|*}" - local adapter_name="${dir_name##*|}" - - # Validate the descriptor format and ensure both parts are non-empty - if [[ -z "${resource_type}" || -z "${adapter_name}" || "${dir_name}" != *"|"* ]]; then - log_error "Invalid adapter descriptor '${dir_name}'. Expected format: resource_type|adapter_name" - return 1 - fi - - log_info "Resource type: ${resource_type}" - log_info "Adapter name: ${adapter_name}" - - # Find all releases by searching for Helm labels (avoids pattern matching issues with truncated names) - log_info "Searching for releases with labels: adapter-resource-type=${resource_type}, adapter-name=${adapter_name}" - local matching_releases - matching_releases=$(helm list -n "${NAMESPACE}" --selector "adapter-resource-type=${resource_type},adapter-name=${adapter_name}" -q 2>/dev/null) - - if [[ -z "${matching_releases}" ]]; then - # Fallback: search by name prefix for releases created before labels were added - log_info "No releases found with labels. Trying fallback search by name prefix..." - local name_prefix="adapter-${resource_type}-${adapter_name}" - matching_releases=$(helm list -n "${NAMESPACE}" -q 2>/dev/null | grep "^${name_prefix}" || true) - - if [[ -z "${matching_releases}" ]]; then - log_warning "No releases found for adapter-resource-type=${resource_type}, adapter-name=${adapter_name} in namespace '${NAMESPACE}'" - return 0 - else - log_info "Found releases using name prefix fallback: ${matching_releases}" - fi - fi - - # Uninstall all matching releases - local uninstall_errors=0 - while IFS= read -r release_name; do - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would uninstall adapter (release: ${release_name})" - else - log_info "Uninstalling adapter ${adapter_name} for ${resource_type} (release: ${release_name})..." - log_info "Executing: helm uninstall ${release_name} -n ${NAMESPACE} --wait --timeout 5m" - - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_success "Adapter ${adapter_name} for ${resource_type} (release: ${release_name}) uninstalled successfully" - else - log_error "Failed to uninstall adapter ${adapter_name} for ${resource_type} (release: ${release_name})" - ((uninstall_errors++)) - fi - fi - done <<<"${matching_releases}" - - if [[ ${uninstall_errors} -gt 0 ]]; then - return 1 - fi - return 0 -} - -uninstall_adapters() { - log_section "Uninstalling All Adapters" - - # Discover adapters - local adapters - if ! adapters=$(discover_adapters); then - log_warning "No adapters found to uninstall" - return 0 - fi - - # Uninstall each adapter - local failed=0 - while IFS= read -r adapter_dir; do - if ! uninstall_adapter_instance "${adapter_dir}"; then - log_warning "Failed to uninstall adapter: ${adapter_dir}" - ((failed++)) - fi - done <<<"${adapters}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} adapter(s) failed to uninstall" - return 1 - else - log_success "All adapters uninstalled successfully" - fi -} diff --git a/deploy-scripts/lib/api.sh b/deploy-scripts/lib/api.sh deleted file mode 100755 index e57ea22..0000000 --- a/deploy-scripts/lib/api.sh +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env bash - -# api.sh - HyperFleet API component deployment functions -# -# This module handles installation and uninstallation of the HyperFleet API component - -# ============================================================================ -# API Component Functions -# ============================================================================ - -install_api() { - log_section "Installing API" - - local release_name="hyperfleet-api" - local full_chart_path="${WORK_DIR}/api/${API_CHART_PATH}" - - # Use API_ADAPTERS_* environment variables for API configuration - # These should be set dynamically based on specific test case requirements - local cluster_adapters="${API_ADAPTERS_CLUSTER:-}" - local nodepool_adapters="${API_ADAPTERS_NODEPOOL:-}" - - log_info "API Adapter Configuration:" - log_info " Cluster adapters: ${cluster_adapters:-}" - log_info " NodePool adapters: ${nodepool_adapters:-}" - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would install API with:" - log_info " Release name: ${release_name}" - log_info " Namespace: ${NAMESPACE}" - log_info " Chart path: ${full_chart_path}" - log_info " Image: ${IMAGE_REGISTRY}/${API_IMAGE_REPO}:${API_IMAGE_TAG}" - log_info " Service type: ${API_SERVICE_TYPE}" - [[ -n "${cluster_adapters}" ]] && log_info " Cluster adapters: ${cluster_adapters}" - [[ -n "${nodepool_adapters}" ]] && log_info " Nodepool adapters: ${nodepool_adapters}" - return 0 - fi - - log_info "Installing API..." - log_verbose "Release name: ${release_name}" - log_verbose "Image: ${IMAGE_REGISTRY}/${API_IMAGE_REPO}:${API_IMAGE_TAG}" - - # Build helm command with image overrides - local helm_cmd=( - helm upgrade --install - "${release_name}" - "${full_chart_path}" - --namespace "${NAMESPACE}" - --create-namespace - --wait - --timeout 3m - --set "image.registry=${IMAGE_REGISTRY}" - --set "image.repository=${API_IMAGE_REPO}" - --set "image.tag=${API_IMAGE_TAG}" - --set "image.pullPolicy=${IMAGE_PULL_POLICY}" - --set "service.type=${API_SERVICE_TYPE}" - ) - - # Add adapter configurations (always set both, use empty if not discovered) - # The API chart requires both config.adapters.required.cluster and config.adapters.required.nodepool to be set - if [[ -n "${cluster_adapters}" ]]; then - helm_cmd+=(--set "config.adapters.required.cluster={${cluster_adapters}}") - log_verbose "Cluster adapters (API): ${cluster_adapters}" - else - helm_cmd+=(--set "config.adapters.required.cluster={}") - log_verbose "Cluster adapters (API): none" - fi - - if [[ -n "${nodepool_adapters}" ]]; then - helm_cmd+=(--set "config.adapters.required.nodepool={${nodepool_adapters}}") - log_verbose "Nodepool adapters (API): ${nodepool_adapters}" - else - helm_cmd+=(--set "config.adapters.required.nodepool={}") - log_verbose "Nodepool adapters (API): none" - fi - - log_info "Executing: ${helm_cmd[*]}" - - if "${helm_cmd[@]}"; then - log_success "API Helm release created successfully" - - # Verify pod health - log_info "Verifying pod health..." - if verify_pod_health "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "API" 120 5; then - log_success "API is running and healthy" - else - log_error "API deployment failed health check" - - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - # Cleanup failed deployment - log_warning "Cleaning up failed API deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed API deployment cleaned up successfully" - else - log_warning "Failed to cleanup API deployment, it may need manual cleanup" - fi - return 1 - fi - else - log_error "Failed to install API" - - # Check if release was created (partial deployment) and cleanup - if helm list -n "${NAMESPACE}" 2>/dev/null | grep -q "^${release_name}"; then - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - log_warning "Cleaning up failed API deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed API deployment cleaned up successfully" - else - log_warning "Failed to cleanup API deployment, it may need manual cleanup" - fi - fi - return 1 - fi -} - -uninstall_api() { - log_section "Uninstalling API" - - local release_name="hyperfleet-api" - - # Check if release exists - if [[ -z "$(helm list -n "${NAMESPACE}" -q -f "^${release_name}$")" ]]; then - log_warning "Release '${release_name}' not found in namespace '${NAMESPACE}'" - return 0 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would uninstall API (release: ${release_name})" - return 0 - fi - - log_info "Uninstalling API..." - log_info "Executing: helm uninstall ${release_name} -n ${NAMESPACE} --wait --timeout 5m" - - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_success "API uninstalled successfully" - else - log_error "Failed to uninstall API" - return 1 - fi -} diff --git a/deploy-scripts/lib/common.sh b/deploy-scripts/lib/common.sh deleted file mode 100755 index 7890618..0000000 --- a/deploy-scripts/lib/common.sh +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env bash - -# common.sh - Common utilities for CLM deployment scripts -# -# This module provides shared functionality used across all deployment scripts: -# - Logging functions -# - Dependency checking -# - Kubernetes context validation - -# ============================================================================ -# Logging Functions -# ============================================================================ - -log_info() { - echo "[INFO] $*" -} - -log_success() { - echo "[SUCCESS] $*" -} - -log_warning() { - echo "[WARNING] $*" -} - -log_error() { - echo "[ERROR] $*" >&2 -} - -log_verbose() { - if [[ "${VERBOSE}" == "true" ]]; then - echo "[VERBOSE] $*" - fi -} - -log_section() { - echo - echo "===================================================================" - echo "$*" - echo "===================================================================" -} - -# ============================================================================ -# Dependency Checking -# ============================================================================ - -check_dependencies() { - log_section "Checking Dependencies" - - local missing_deps=() - - local deps=("kubectl" "helm" "git") - for dep in "${deps[@]}"; do - if ! command -v "${dep}" &> /dev/null; then - missing_deps+=("${dep}") - log_error "Required dependency '${dep}' not found" - else - local version - case "${dep}" in - kubectl) - version=$(kubectl version --client --short 2>/dev/null | head -n1 || echo "unknown") - ;; - helm) - version=$(helm version --short 2>/dev/null || echo "unknown") - ;; - git) - version=$(git --version || echo "unknown") - ;; - esac - log_verbose "Found ${dep}: ${version}" - fi - done - - if [[ ${#missing_deps[@]} -gt 0 ]]; then - log_error "Missing required dependencies: ${missing_deps[*]}" - log_error "Please install the missing dependencies and try again" - return 1 - fi - - log_success "All dependencies are available" - return 0 -} - -# ============================================================================ -# Kubernetes Context Validation -# ============================================================================ - -validate_kubectl_context() { - log_section "Validating Kubernetes Context" - - if ! kubectl cluster-info &> /dev/null; then - log_error "Unable to connect to Kubernetes cluster" - log_error "Please ensure your kubeconfig is properly configured" - return 1 - fi - - local context - context=$(kubectl config current-context) - log_info "Current kubectl context: ${context}" - - local cluster_info - cluster_info=$(kubectl cluster-info 2>&1 | head -n1 || echo "unknown") - log_verbose "Cluster info: ${cluster_info}" - - log_success "Kubectl context validated" - return 0 -} - -# ============================================================================ -# Pod Health Verification -# ============================================================================ - -verify_pod_health() { - local namespace="$1" - local selector="$2" - local component_name="${3:-component}" - local timeout="${4:-60}" - local interval="${5:-5}" - - log_info "Verifying pod health for ${component_name}..." - log_verbose "Namespace: ${namespace}, Selector: ${selector}" - - local elapsed=0 - while [[ ${elapsed} -lt ${timeout} ]]; do - # Get pod status - local pod_status - pod_status=$(kubectl get pods -n "${namespace}" -l "${selector}" \ - -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.phase}{"\t"}{range .status.containerStatuses[*]}{.state.waiting.reason}{" "}{.state.terminated.reason}{end}{"\n"}{end}' 2>/dev/null) - - if [[ -z "${pod_status}" ]]; then - log_warning "No pods found with selector ${selector} in namespace ${namespace}" - sleep ${interval} - ((elapsed += interval)) - continue - fi - - # Check for failure states - local has_failures=false - local failure_details="" - - while IFS=$'\t' read -r pod_name phase reasons; do - log_verbose "Pod ${pod_name}: phase=${phase}, reasons=${reasons}" - - # Check for problematic states - if [[ "${phase}" == "Failed" ]] || \ - [[ "${reasons}" == *"CrashLoopBackOff"* ]] || \ - [[ "${reasons}" == *"Error"* ]] || \ - [[ "${reasons}" == *"ImagePullBackOff"* ]] || \ - [[ "${reasons}" == *"ErrImagePull"* ]]; then - has_failures=true - failure_details="${failure_details}\n - ${pod_name}: ${phase} (${reasons})" - fi - done <<< "${pod_status}" - - if [[ "${has_failures}" == "true" ]]; then - log_error "Pod health check failed for ${component_name}:" - echo -e "${failure_details}" - log_info "Pod details:" - kubectl get pods -n "${namespace}" -l "${selector}" - return 1 - fi - - # Check if all pods are running - local running_count - running_count=$(kubectl get pods -n "${namespace}" -l "${selector}" \ - -o jsonpath='{range .items[*]}{.status.phase}{"\n"}{end}' 2>/dev/null | grep -c "^Running$" || echo "0") - - local total_count - total_count=$(kubectl get pods -n "${namespace}" -l "${selector}" --no-headers 2>/dev/null | wc -l | tr -d ' ') - - if [[ ${running_count} -gt 0 ]] && [[ ${running_count} -eq ${total_count} ]]; then - log_success "All pods for ${component_name} are running (${running_count}/${total_count})" - return 0 - fi - - log_verbose "Waiting for pods to be ready: ${running_count}/${total_count} running (${elapsed}s/${timeout}s)" - sleep ${interval} - ((elapsed += interval)) - done - - log_error "Timeout waiting for ${component_name} pods to become healthy" - log_info "Current pod status:" - kubectl get pods -n "${namespace}" -l "${selector}" - return 1 -} - -# ============================================================================ -# Debug Log Capture -# ============================================================================ - -capture_debug_logs() { - local namespace="$1" - local selector="$2" - local component_name="$3" - local output_dir="${4:-${WORK_DIR:-${PWD}}/debug-logs}" - local capture_failed=false - - log_section "Capturing Debug Logs for ${component_name}" - - # Create output directory - if ! mkdir -p "${output_dir}"; then - log_error "Failed to create debug log directory: ${output_dir}" - return 1 - fi - - local timestamp - timestamp=$(date +"%Y%m%d-%H%M%S") - local log_prefix="${output_dir}/${component_name}-${timestamp}-$$-${RANDOM}" - - log_info "Saving debug logs to: ${log_prefix}-*" - - # Capture pod logs - log_info "Capturing pod logs..." - kubectl logs -n "${namespace}" -l "${selector}" --all-containers=true --prefix=true > "${log_prefix}-pods.log" 2>&1 || { log_warning "Failed to capture current pod logs"; capture_failed=true; } - - # Capture previous pod logs (for crashed containers) - log_info "Capturing previous pod logs..." - kubectl logs -n "${namespace}" -l "${selector}" --all-containers=true --prefix=true --previous > "${log_prefix}-pods-previous.log" 2>&1 || true - - # Capture pod descriptions - log_info "Capturing pod descriptions..." - kubectl describe pods -n "${namespace}" -l "${selector}" > "${log_prefix}-pods-describe.txt" 2>&1 || { log_warning "Failed to capture pod descriptions"; capture_failed=true; } - - # Capture pod status - log_info "Capturing pod status..." - kubectl get pods -n "${namespace}" -l "${selector}" -o wide > "${log_prefix}-pods-status.txt" 2>&1 || { log_warning "Failed to capture pod status"; capture_failed=true; } - kubectl get pods -n "${namespace}" -l "${selector}" -o yaml > "${log_prefix}-pods-yaml.yaml" 2>&1 || { log_warning "Failed to capture pod YAML"; capture_failed=true; } - - # Capture events - log_info "Capturing namespace events..." - kubectl get events -n "${namespace}" --sort-by='.lastTimestamp' > "${log_prefix}-events.txt" 2>&1 || { log_warning "Failed to capture namespace events"; capture_failed=true; } - - # Capture deployment/statefulset status if exists - log_info "Capturing deployment/statefulset status..." - kubectl get deployments,statefulsets -n "${namespace}" -l "${selector}" -o wide > "${log_prefix}-workloads-status.txt" 2>&1 || { log_warning "Failed to capture workload status"; capture_failed=true; } - kubectl get deployments,statefulsets -n "${namespace}" -l "${selector}" -o yaml > "${log_prefix}-workloads-yaml.yaml" 2>&1 || { log_warning "Failed to capture workload YAML"; capture_failed=true; } - - # Capture services and endpoints - log_info "Capturing services and endpoints..." - kubectl get svc,endpoints -n "${namespace}" -l "${selector}" -o wide > "${log_prefix}-network.txt" 2>&1 || { log_warning "Failed to capture services and endpoints"; capture_failed=true; } - - # Create a summary file - cat > "${log_prefix}-summary.txt" < /dev/null; then - log_warning "Namespace '${namespace}' does not exist" - return 0 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would delete namespace: ${namespace}" - return 0 - fi - - log_info "Deleting namespace: ${namespace}" - log_warning "This will remove all resources in the namespace" - - if kubectl delete namespace "${namespace}" --wait --timeout=5m; then - log_success "Namespace '${namespace}' deleted successfully" - return 0 - else - log_error "Failed to delete namespace '${namespace}'" - log_info "You may need to manually remove finalizers or check for stuck resources" - return 1 - fi -} diff --git a/deploy-scripts/lib/gcp.sh b/deploy-scripts/lib/gcp.sh deleted file mode 100644 index 41b2be9..0000000 --- a/deploy-scripts/lib/gcp.sh +++ /dev/null @@ -1,320 +0,0 @@ -#!/usr/bin/env bash - -# gcp.sh - Google Cloud Platform resource management functions -# -# This module handles discovery and cleanup of GCP resources (Pub/Sub topics and subscriptions) -# created during deployment. -# -# NAMESPACE requirements -# - Must be unique to prevent Pub/Sub topic/subscription collisions across deployments -# - Must be DNS-1123 compliant (lowercase alphanumeric, hyphens, start/end with alphanumeric) -# - Default: hyperfleet-e2e-$USER (when using .env configuration) - -# ============================================================================ -# Constants -# ============================================================================ - -# Resource types managed by the system -readonly RESOURCE_TYPES=("clusters" "nodepools") - -# ============================================================================ -# GCP Dependency Checking -# ============================================================================ - -check_gcp_dependencies() { - log_verbose "Checking GCP CLI dependencies" - - if ! command -v gcloud &> /dev/null; then - log_error "gcloud CLI not found" - log_error "Please install Google Cloud SDK: https://cloud.google.com/sdk/docs/install" - return 1 - fi - - local gcloud_version - gcloud_version=$(gcloud --version 2>/dev/null | head -n1 || echo "unknown") - log_verbose "Found gcloud: ${gcloud_version}" - - return 0 -} - -# ============================================================================ -# GCP Pub/Sub Discovery Functions -# ============================================================================ - -discover_pubsub_topics() { - local namespace="$1" - local project_id="${GCP_PROJECT_ID}" - - log_verbose "Discovering Pub/Sub topics for namespace: ${namespace}" - - if [[ -z "${project_id}" ]]; then - log_error "GCP_PROJECT_ID is not set" - return 1 - fi - - # List topics that match the namespace pattern - # NAMESPACE must be unique and DNS-1123 compliant (default: hyperfleet-e2e-$USER when using .env) - # Topics are named: - # - ${NAMESPACE}-${resource_type} (e.g., hyperfleet-e2e-jdoe-clusters, hyperfleet-e2e-jdoe-nodepools) - # - ${NAMESPACE}-${resource_type}-dlq (e.g., hyperfleet-e2e-jdoe-clusters-dlq) - # - ${NAMESPACE}-${resource_type}-${adapter_name}-dlq (e.g., hyperfleet-e2e-jdoe-clusters-adapter1-dlq) - local topics=() - local all_topics - - if ! all_topics=$(gcloud pubsub topics list --project="${project_id}" --format="value(name)" 2>/dev/null); then - log_error "Failed to list Pub/Sub topics in project ${project_id}" - log_error "Make sure you have authenticated with: gcloud auth login" - return 1 - fi - - while IFS= read -r topic; do - if [[ -z "${topic}" ]]; then - continue - fi - - # Extract topic name from full path (projects/{project}/topics/{topic-name}) - local topic_name="${topic##*/}" - - # Match topics with all naming patterns: - # 1. Main topics: ${namespace}-${resource_type} - # 2. DLQ topics (intended): ${namespace}-${resource_type}-dlq - # 3. DLQ topics (temporary/Helm bug): ${namespace}-${resource_type}-${adapter_name}-dlq - local matched=false - for resource_type in "${RESOURCE_TYPES[@]}"; do - if [[ "${topic_name}" == "${namespace}-${resource_type}" ]] || \ - [[ "${topic_name}" == "${namespace}-${resource_type}-dlq" ]] || \ - [[ "${topic_name}" =~ ^${namespace}-${resource_type}-.+-dlq$ ]]; then - matched=true - break - fi - done - - if [[ "${matched}" == "true" ]]; then - topics+=("${topic_name}") - fi - done <<< "${all_topics}" - - if [[ ${#topics[@]} -eq 0 ]]; then - log_verbose "No Pub/Sub topics found for namespace: ${namespace}" >&2 - return 1 - fi - - log_info "Found ${#topics[@]} Pub/Sub topic(s) for namespace ${namespace}:" >&2 - for topic in "${topics[@]}"; do - log_info " - ${topic}" >&2 - done - - # Export for use in other functions (stdout only) - printf '%s\n' "${topics[@]}" -} - -discover_pubsub_subscriptions() { - local namespace="$1" - local project_id="${GCP_PROJECT_ID}" - - log_verbose "Discovering Pub/Sub subscriptions for namespace: ${namespace}" - - if [[ -z "${project_id}" ]]; then - log_error "GCP_PROJECT_ID is not set" - return 1 - fi - - # List subscriptions that match the namespace pattern - # NAMESPACE must be unique and DNS-1123 compliant (default: hyperfleet-e2e-$USER when using .env) - # Subscriptions are named: ${NAMESPACE}-${resource_type}-${adapter_name} - # Example: hyperfleet-e2e-jdoe-clusters-adapter1, -clusters-adapter1 - local subscriptions=() - local all_subscriptions - - if ! all_subscriptions=$(gcloud pubsub subscriptions list --project="${project_id}" --format="value(name)" 2>/dev/null); then - log_error "Failed to list Pub/Sub subscriptions in project ${project_id}" - log_error "Make sure you have authenticated with: gcloud auth login" - return 1 - fi - - while IFS= read -r subscription; do - if [[ -z "${subscription}" ]]; then - continue - fi - - # Extract subscription name from full path (projects/{project}/subscriptions/{subscription-name}) - local subscription_name="${subscription##*/}" - - # Match subscriptions with the expected naming pattern: - # ${namespace}-${resource_type}-${adapter_name} - local matched=false - for resource_type in "${RESOURCE_TYPES[@]}"; do - if [[ "${subscription_name}" =~ ^${namespace}-${resource_type}-.+ ]]; then - matched=true - break - fi - done - - if [[ "${matched}" == "true" ]]; then - subscriptions+=("${subscription_name}") - fi - done <<< "${all_subscriptions}" - - if [[ ${#subscriptions[@]} -eq 0 ]]; then - log_verbose "No Pub/Sub subscriptions found for namespace: ${namespace}" >&2 - return 1 - fi - - log_info "Found ${#subscriptions[@]} Pub/Sub subscription(s) for namespace ${namespace}:" >&2 - for subscription in "${subscriptions[@]}"; do - log_info " - ${subscription}" >&2 - done - - # Export for use in other functions (stdout only) - printf '%s\n' "${subscriptions[@]}" -} - -# ============================================================================ -# GCP Pub/Sub Deletion Functions -# ============================================================================ - -delete_pubsub_subscription() { - local subscription_name="$1" - local project_id="${GCP_PROJECT_ID}" - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would delete subscription: ${subscription_name}" - return 0 - fi - - log_info "Deleting subscription: ${subscription_name}" - - if gcloud pubsub subscriptions delete "${subscription_name}" \ - --project="${project_id}" \ - --quiet 2>/dev/null; then - log_success "Deleted subscription: ${subscription_name}" - return 0 - else - log_error "Failed to delete subscription: ${subscription_name}" - return 1 - fi -} - -delete_pubsub_topic() { - local topic_name="$1" - local project_id="${GCP_PROJECT_ID}" - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would delete topic: ${topic_name}" - return 0 - fi - - log_info "Deleting topic: ${topic_name}" - - if gcloud pubsub topics delete "${topic_name}" \ - --project="${project_id}" \ - --quiet 2>/dev/null; then - log_success "Deleted topic: ${topic_name}" - return 0 - else - log_error "Failed to delete topic: ${topic_name}" - return 1 - fi -} - -delete_all_pubsub_subscriptions() { - local namespace="$1" - - log_section "Deleting Pub/Sub Subscriptions" - - # Discover subscriptions (stdout only contains resource names, stderr has logs) - local subscriptions - if ! subscriptions=$(discover_pubsub_subscriptions "${namespace}"); then - log_info "No Pub/Sub subscriptions to delete" - return 0 - fi - - # Delete each subscription - local failed=0 - while IFS= read -r subscription; do - if [[ -n "${subscription}" ]]; then - if ! delete_pubsub_subscription "${subscription}"; then - log_error "Failed to delete subscription: ${subscription}" - ((failed++)) - fi - fi - done <<< "${subscriptions}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} subscription(s) failed to delete" - return 1 - else - log_success "All subscriptions deleted successfully" - return 0 - fi -} - -delete_all_pubsub_topics() { - local namespace="$1" - - log_section "Deleting Pub/Sub Topics" - - # Discover topics (stdout only contains resource names, stderr has logs) - local topics - if ! topics=$(discover_pubsub_topics "${namespace}"); then - log_info "No Pub/Sub topics to delete" - return 0 - fi - - # Delete each topic - local failed=0 - while IFS= read -r topic; do - if [[ -n "${topic}" ]]; then - if ! delete_pubsub_topic "${topic}"; then - log_error "Failed to delete topic: ${topic}" - ((failed++)) - fi - fi - done <<< "${topics}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} topic(s) failed to delete" - return 1 - else - log_success "All topics deleted successfully" - return 0 - fi -} - -# ============================================================================ -# Main GCP Cleanup Function -# ============================================================================ - -cleanup_gcp_resources() { - local namespace="$1" - - log_section "Cleaning Up GCP Resources" - - # Check GCP CLI dependencies - if ! check_gcp_dependencies; then - log_error "GCP CLI dependencies not available" - return 1 - fi - - local cleanup_errors=0 - - # Delete subscriptions first (subscriptions depend on topics) - if ! delete_all_pubsub_subscriptions "${namespace}"; then - log_warning "Some subscriptions failed to delete" - ((cleanup_errors++)) - fi - - # Delete topics - if ! delete_all_pubsub_topics "${namespace}"; then - log_warning "Some topics failed to delete" - ((cleanup_errors++)) - fi - - if [[ ${cleanup_errors} -gt 0 ]]; then - log_warning "GCP resource cleanup completed with errors" - return 1 - else - log_success "GCP resource cleanup complete" - return 0 - fi -} diff --git a/deploy-scripts/lib/helm.sh b/deploy-scripts/lib/helm.sh deleted file mode 100755 index a835f49..0000000 --- a/deploy-scripts/lib/helm.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash - -# helm.sh - Helm chart management functions -# -# This module provides functions for cloning and managing Helm charts - -# ============================================================================ -# Helm Chart Management -# ============================================================================ - -clone_helm_chart() { - local component="$1" - local repo_url="$2" - local ref="$3" - local chart_path="$4" - - log_info "Cloning ${component} Helm chart from ${repo_url}@${ref} (sparse: ${chart_path})" - - local component_dir="${WORK_DIR}/${component}" - - if [[ -z "${WORK_DIR}" || "${WORK_DIR}" == "/" ]]; then - log_error "WORK_DIR must be set to a non-root directory" - return 1 - fi - if [[ -z "${component}" ]]; then - log_error "Component name is required" - return 1 - fi - - # Clean up any existing directory to ensure fresh clone - if [[ -d "${component_dir}" ]]; then - log_verbose "Removing existing directory: ${component_dir}" - rm -rf "${component_dir}" - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would clone (sparse): git clone --depth 1 --filter=blob:none --sparse --branch ${ref} ${repo_url}" - log_info "[DRY-RUN] Would checkout: ${chart_path}" - return 0 - fi - - # Clone with sparse checkout - only download the chart directory - log_verbose "Executing sparse checkout: git clone --depth 1 --filter=blob:none --sparse --no-checkout --branch ${ref} ${repo_url} ${component_dir}" - if ! git clone --depth 1 --filter=blob:none --sparse --no-checkout --branch "${ref}" "${repo_url}" "${component_dir}" >/dev/null 2>&1; then - log_error "Failed to clone ${component} Helm chart" - return 1 - fi - - # Configure sparse checkout to only include the chart path (no cone mode to avoid root files) - log_verbose "Configuring sparse checkout for: ${chart_path}" - if ! (cd "${component_dir}" && \ - git sparse-checkout init --no-cone >/dev/null 2>&1 && \ - git sparse-checkout set "${chart_path}" >/dev/null 2>&1 && \ - git checkout "${ref}" >/dev/null 2>&1); then - log_error "Failed to checkout chart path: ${chart_path}" - return 1 - fi - - # Verify chart path exists - local full_chart_path="${component_dir}/${chart_path}" - if [[ ! -f "${full_chart_path}/Chart.yaml" ]]; then - log_error "Chart.yaml not found at ${full_chart_path}" - log_error "Please verify the chart path is correct" - return 1 - fi - - log_success "Cloned ${component} Helm chart" - log_verbose "Chart location: ${full_chart_path}" -} diff --git a/deploy-scripts/lib/sentinel.sh b/deploy-scripts/lib/sentinel.sh deleted file mode 100755 index 2033f33..0000000 --- a/deploy-scripts/lib/sentinel.sh +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env bash - -# sentinel.sh - HyperFleet Sentinel component deployment functions -# -# This module handles installation and uninstallation of HyperFleet Sentinel instances -# for both clusters and nodepools resource types - -# ============================================================================ -# Sentinel Component Functions -# ============================================================================ - -install_sentinel_instance() { - local resource_type="$1" # "clusters" or "nodepools" - - local component_name="Sentinel (${resource_type})" - local release_name="sentinel-${resource_type}" - local full_chart_path="${WORK_DIR}/sentinel/${SENTINEL_CHART_PATH}" - - log_section "Installing ${component_name}" - - # Determine API base URL - local api_url="${API_BASE_URL}" - - if [[ "${SENTINEL_BROKER_TYPE}" == "rabbitmq" && -z "${SENTINEL_BROKER_RABBITMQ_URL}" ]]; then - log_error "SENTINEL_BROKER_RABBITMQ_URL must be set when SENTINEL_BROKER_TYPE=rabbitmq" - return 1 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would install ${component_name} with:" - log_info " Release name: ${release_name}" - log_info " Namespace: ${NAMESPACE}" - log_info " Chart path: ${full_chart_path}" - log_info " Image: ${IMAGE_REGISTRY}/${SENTINEL_IMAGE_REPO}:${SENTINEL_IMAGE_TAG}" - log_info " API base URL: ${api_url} (config.clients.hyperfleetApi.baseUrl)" - log_info " Broker type: ${SENTINEL_BROKER_TYPE}" - log_info " Resource type: ${resource_type}" - log_info " Google Pub/Sub Project ID: ${GCP_PROJECT_ID}" - log_info " Google Pub/Sub Create Topic If Missing: ${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING}" - return 0 - fi - - log_info "Installing ${component_name}..." - log_verbose "Release name: ${release_name}" - log_verbose "Image: ${IMAGE_REGISTRY}/${SENTINEL_IMAGE_REPO}:${SENTINEL_IMAGE_TAG}" - log_verbose "API base URL: ${api_url}" - log_verbose "Resource type: ${resource_type}" - - # Build helm command - local helm_cmd=( - helm upgrade --install - "${release_name}" - "${full_chart_path}" - --namespace "${NAMESPACE}" - --create-namespace - --wait - --timeout 3m - --set "image.registry=${IMAGE_REGISTRY}" - --set "image.repository=${SENTINEL_IMAGE_REPO}" - --set "image.tag=${SENTINEL_IMAGE_TAG}" - --set "image.pullPolicy=${IMAGE_PULL_POLICY}" - --set "config.clients.hyperfleetApi.baseUrl=${api_url}" - --set "config.resourceType=${resource_type}" - --set "broker.type=${SENTINEL_BROKER_TYPE}" - --set "broker.googlepubsub.projectId=${GCP_PROJECT_ID}" - --set "broker.googlepubsub.createTopicIfMissing=${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING}" - ) - - if [[ "${SENTINEL_BROKER_TYPE}" == "rabbitmq" && -n "${SENTINEL_BROKER_RABBITMQ_URL}" ]]; then - helm_cmd+=(--set "broker.rabbitmq.url=${SENTINEL_BROKER_RABBITMQ_URL}") - fi - - # Add message_data.owner_references configuration for nodepools resource type - # This enables the sentinel to include ownerReferences from the Kubernetes resource - # in the message data sent to adapters, which is required for nodepools management - if [[ "${resource_type}" == "nodepools" ]]; then - helm_cmd+=( - --set "config.messageData.owner_references.id=resource.owner_references.id" - --set "config.messageData.owner_references.href=resource.owner_references.href" - --set "config.messageData.owner_references.kind=resource.owner_references.kind" - ) - fi - - log_info "Executing: ${helm_cmd[*]}" - - if "${helm_cmd[@]}"; then - log_success "${component_name} Helm release created successfully" - - # Verify pod health - log_info "Verifying pod health..." - if verify_pod_health "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${component_name}" 120 5; then - log_success "${component_name} is running and healthy" - else - log_error "${component_name} deployment failed health check" - - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - # Cleanup failed deployment - log_warning "Cleaning up failed ${component_name} deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed ${component_name} deployment cleaned up successfully" - else - log_warning "Failed to cleanup ${component_name} deployment, it may need manual cleanup" - fi - return 1 - fi - else - log_error "Failed to install ${component_name}" - - # Check if release was created (partial deployment) and cleanup - if helm list -n "${NAMESPACE}" 2>/dev/null | grep -q "^${release_name}"; then - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - log_warning "Cleaning up failed ${component_name} deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed ${component_name} deployment cleaned up successfully" - else - log_warning "Failed to cleanup ${component_name} deployment, it may need manual cleanup" - fi - fi - return 1 - fi -} - -install_sentinel() { - - install_sentinel_instance "clusters" || return 1 - install_sentinel_instance "nodepools" || return 1 -} - -uninstall_sentinel_instance() { - local resource_type="$1" # "clusters" or "nodepools" - - # Capitalize first letter for display - local resource_type_display - if [[ "${resource_type}" == "clusters" ]]; then - resource_type_display="Clusters" - else - resource_type_display="Nodepools" - fi - - local component_name="Sentinel (${resource_type_display})" - local release_name="sentinel-${resource_type}" - - log_section "Uninstalling ${component_name}" - - # Check if release exists - if ! helm list -n "${NAMESPACE}" | grep -q "^${release_name}"; then - log_warning "Release '${release_name}' not found in namespace '${NAMESPACE}'" - return 0 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would uninstall ${component_name} (release: ${release_name})" - return 0 - fi - - log_info "Uninstalling ${component_name}..." - log_info "Executing: helm uninstall ${release_name} -n ${NAMESPACE} --wait --timeout 5m" - - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_success "${component_name} uninstalled successfully" - else - log_error "Failed to uninstall ${component_name}" - return 1 - fi -} - -uninstall_sentinel() { - # Uninstall in reverse order - uninstall_sentinel_instance "nodepools" || log_warning "Failed to uninstall Sentinel (Nodepools)" - uninstall_sentinel_instance "clusters" || log_warning "Failed to uninstall Sentinel (Clusters)" -} diff --git a/docs/development.md b/docs/development.md index 79b77c6..e091908 100644 --- a/docs/development.md +++ b/docs/development.md @@ -338,20 +338,6 @@ import ( No need to manually register tests. -### 4. Run Your Test - -```bash -# Run all cluster tests -make build -./bin/hyperfleet-e2e test --focus "\[Suite: cluster\]" - -# Run specific test by description -./bin/hyperfleet-e2e test --focus "Create Cluster via API" - -# Or run by label -./bin/hyperfleet-e2e test --label-filter "critical && lifecycle" -``` - ## Common Patterns ### Create Resource from Payload @@ -389,43 +375,65 @@ for _, adapter := range statuses.Items { } ``` -### Running a development environment with custom dev images and RabbitMQ +## Validating New E2E Tests -> **For a complete local setup guide using kind**, see [Local kind Setup](local-kind-setup.md). +After writing your test, validate it works properly: -While in development, it is common to use custom images for components (api, sentinel, adapters) instead of the CI images. +### 1. Set Up Your Development Environment -It is also convenient to use RabbitMQ to avoid dealing with GCP credentials for Pub/Sub. +You need a running HyperFleet environment before running tests. See the [Setup Guide](setup.md) for complete instructions: -RabbitMQ has to be installed beforehand, you can use the `hyperfleet-infra` repository to execute: +- **Kind (local):** Fast setup for local testing (recommended for development) +- **GCP:** Cloud environment for more realistic testing +The environment setup will configure required environment variables: +- `HYPERFLEET_API_URL` +- `MAESTRO_URL` +- `NAMESPACE` +- source `env/env.local` if required + +### 2. Build the E2E Binary + +```bash +# Build the binary +make build ``` -make install-rabbitmq NAMESPACE=rabbitmq -``` -Then you can deploy the e2e test components with support for RabbitMQ and custom images executing: +### 3. Run Your Test + +```bash +# Run your specific test by description +./bin/hyperfleet-e2e test --focus "Your Test Description" +# Or run by suite +./bin/hyperfleet-e2e test --focus "\[Suite: Your new test suite\]" ``` -SENTINEL_BROKER_RABBITMQ_URL="amqp://guest:guest@rabbitmq.rabbitmq:5672" \ -ADAPTER_BROKER_RABBITMQ_URL="amqp://guest:guest@rabbitmq.rabbitmq:5672" \ -ADAPTER_BROKER_TYPE=rabbitmq \ -SENTINEL_BROKER_TYPE=rabbitmq \ -./deploy-scripts/deploy-clm.sh --action install \ ---namespace \ ---image-registry quay.io/ \ ---api-image-repo hyperfleet-api \ ---api-image-tag \ ---sentinel-image-repo hyperfleet-sentinel \ ---sentinel-image-tag \ ---adapter-image-repo hyperfleet-adapter \ ---adapter-image-tag \ ---api-base-url http://hyperfleet-api:8000 \ ---api-adapters-cluster cl-namespace,cl-maestro,cl-deployment,cl-job \ ---api-adapters-nodepool np-configmap \ ---cluster-tier0-adapters cl-namespace,cl-maestro,cl-deployment,cl-job,cl-invalid-resource,cl-precondition-error \ ---nodepool-tier0-adapters np-configmap + +### 4. Run Pre-Commit Checks + +Before committing, ensure your code passes all checks: + +```bash +# Run all checks (format, lint, unit tests) +make check ``` +### 5. Verify Test Behavior + +Ensure your test: +- ✅ Creates resources successfully +- ✅ Waits for expected conditions +- ✅ Cleans up resources (check manually if needed) +- ✅ Passes consistently (run multiple times) +- ✅ Fails appropriately when conditions aren't met + +### 5. Check Test Output + +Review the test output for: +- Clear step descriptions (via `ginkgo.By()`) +- Appropriate timeout values +- Proper error messages on failure + ## Next Steps - **Architecture**: Understand the framework design in [Architecture](architecture.md) diff --git a/docs/getting-started.md b/docs/getting-started.md index 7a4a354..020609f 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -5,9 +5,11 @@ New to HyperFleet E2E? This guide will help you run your first test in 10 minute ## Prerequisites - **Go 1.25+** - Required for building the framework -- **HyperFleet API access** - API endpoint URL +- **HyperFleet deployment** - Running HyperFleet API and Maestro instance - **10 minutes** - Time to complete this guide +> **Need to set up a HyperFleet environment first?** See the [Setup Guide](setup.md) for complete instructions using Kind (local) or GCP. + ## Installation ### Clone and Build @@ -28,13 +30,15 @@ You should see the command help output. ## Your First Test -**Step 1**: Set API URL +**Step 1**: Set required environment variables ```bash -export HYPERFLEET_API_URL=https://api.hyperfleet.example.com +export HYPERFLEET_API_URL= +export MAESTRO_URL= +export NAMESPACE= ``` -**Step 2**: Run tests +**Step 2**: Run tier0 tests ```bash ./bin/hyperfleet-e2e test --label-filter=tier0 @@ -61,6 +65,12 @@ The framework: # Run critical tests only ./bin/hyperfleet-e2e test --label-filter=tier0 +# Run important features +./bin/hyperfleet-e2e test --label-filter=tier1 + +# Run edge cases (requires sourcing env/env.local first) +source env/env.local && ./bin/hyperfleet-e2e test --label-filter=tier2 + # Run all cluster suite tests ./bin/hyperfleet-e2e test --focus "\[Suite: cluster\]" @@ -105,9 +115,13 @@ make generate # Regenerate OpenAPI client **API connection errors**: ```bash -# Verify API URL +# Verify API URLs are set echo $HYPERFLEET_API_URL -curl -I $HYPERFLEET_API_URL +echo $MAESTRO_URL +echo $NAMESPACE + +# Test connectivity +curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ ``` **Test timeouts**: Increase timeouts via environment variables: @@ -115,11 +129,13 @@ curl -I $HYPERFLEET_API_URL HYPERFLEET_TIMEOUTS_CLUSTER_RECONCILED=45m make e2e ``` +**Namespace mismatch**: Ensure `NAMESPACE` matches your deployment namespace. Some tests deploy adapters dynamically and must target the same namespace where HyperFleet components are running. + **Configuration not taking effect**: Priority order (highest to lowest): 1. CLI flags (`--api-url`) -2. Environment variables (`HYPERFLEET_API_URL`) +2. Environment variables (`HYPERFLEET_API_URL`, `MAESTRO_URL`, `NAMESPACE`) 3. Config file (`configs/config.yaml`) 4. Built-in defaults @@ -132,8 +148,11 @@ Priority order (highest to lowest): ./bin/hyperfleet-e2e test --log-level=debug ``` +For more troubleshooting help and environment issues, see the [Runbook](runbook.md#troubleshooting) or [Setup Guide](setup.md). + ## Next Steps +- **[Runbook](runbook.md)** - Running tests and troubleshooting guide - **[Architecture](architecture.md)** - Understand how the framework works - **[Development](development.md)** - Write your own tests - **CLI Reference** - Run `./bin/hyperfleet-e2e --help` diff --git a/docs/local-kind-setup.md b/docs/local-kind-setup.md deleted file mode 100644 index aa4b17d..0000000 --- a/docs/local-kind-setup.md +++ /dev/null @@ -1,106 +0,0 @@ -# Local E2E Testing with kind - -Run E2E tests locally using [kind](https://kind.sigs.k8s.io/) and RabbitMQ — no GCP dependencies. - -## Prerequisites - -- **Go** 1.25+ — [go.dev](https://go.dev/doc/install) -- **Docker** — [docker.com](https://www.docker.com/) or **Podman** — [podman.io](https://podman.io/) -- **kind** — [kind.sigs.k8s.io](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) -- **kubectl** 1.28+ — [kubernetes.io](https://kubernetes.io/docs/tasks/tools/) -- **helm** 3+ — [helm.sh](https://helm.sh/docs/intro/install/) - -## Clone Repositories - -All component repos are required — images are built locally. - -```bash -for repo in hyperfleet-e2e hyperfleet-infra hyperfleet-api hyperfleet-sentinel hyperfleet-adapter; do - git clone https://github.com/openshift-hyperfleet/${repo}.git ~/projects/${repo} -done -``` - -> Repos outside `~/projects`? Set `PROJECTS_DIR` in your `.env` — see [Configuration](#configuration). - -## Quick Start - -```bash -# Copy config template -cp deploy-scripts/.env.example deploy-scripts/.env -# Uncomment HYPERFLEET_API_URL and MAESTRO_URL at the bottom - -# One command: cluster + images + deploy + port-forward -make local-up - -# Run tests -make e2e -``` - -For individual steps: - -```bash -./deploy-scripts/kind-local.sh setup # Cluster + RabbitMQ + Maestro + images -./deploy-scripts/kind-local.sh deploy # Deploy API + sentinels + adapters -./deploy-scripts/kind-local.sh port-forward # Forward API (:8000) + Maestro (:8100) -./deploy-scripts/kind-local.sh rebuild # Rebuild all images + restart -./deploy-scripts/kind-local.sh down # Remove everything -``` - -## Rebuilding After Code Changes - -```bash -# Rebuild one component -./deploy-scripts/kind-local.sh rebuild hyperfleet-adapter - -# Force rebuild without cache (after git pull) -./deploy-scripts/kind-local.sh rebuild --no-cache hyperfleet-adapter - -# Rebuild everything -./deploy-scripts/kind-local.sh rebuild --no-cache -``` - -Or via Make: - -```bash -make local-rebuild C=hyperfleet-adapter -make local-rebuild C=hyperfleet-adapter NO_CACHE=1 -``` - -## Running Specific Tests - -With `HYPERFLEET_API_URL` and `MAESTRO_URL` set in `.env`, just run: - -```bash -./bin/hyperfleet-e2e test --focus="\[Suite: cluster\]" --log-level=info -``` - -## Configuration - -All config lives in `deploy-scripts/.env` (gitignored). Copy from `.env.example` and uncomment what you need: - -```bash -cp deploy-scripts/.env.example deploy-scripts/.env -``` - -Local kind settings are at the bottom of the file: - -| Variable | Default | Description | -|----------|---------|-------------| -| `PROJECTS_DIR` | `~/projects` | Parent directory containing component repos | -| `INFRA_DIR` | `~/projects/hyperfleet-infra` | Path to hyperfleet-infra repo | -| `KIND_CLUSTER` | `kind` | Kind cluster name | -| `NAMESPACE` | `hyperfleet-local` | Kubernetes namespace | -| `HYPERFLEET_API_URL` | — | API URL for tests (`http://localhost:8000`) | -| `MAESTRO_URL` | — | Maestro URL for tests (`http://localhost:8100`) | - -## Troubleshooting - -**ImagePullBackOff** — Image not loaded into kind. Run `kind load docker-image `. With Podman: `podman save | kind load image-archive /dev/stdin`. - -**db-migrate crashing** — API binary doesn't match Helm chart: `./deploy-scripts/kind-local.sh rebuild --no-cache hyperfleet-api` - -**Container build cache stale** — Use `--no-cache` after `git pull`. - -**Connection refused** — Port-forwards died: `./deploy-scripts/kind-local.sh port-forward` - -**`make local-down`** removes components but leaves kind cluster. Full cleanup: `kind delete cluster`. diff --git a/docs/runbook.md b/docs/runbook.md index 28f7e03..36ca8b0 100644 --- a/docs/runbook.md +++ b/docs/runbook.md @@ -1,282 +1,131 @@ # HyperFleet E2E Test Runbook -> **Audience:** Developers running e2e tests locally +> **Audience:** Developers running E2E tests locally -This runbook provides step-by-step instructions for setting up, running, and troubleshooting HyperFleet E2E tests in a local development environment. +This runbook provides step-by-step instructions for running and troubleshooting HyperFleet E2E tests in a local development environment. ## Table of Contents - [Prerequisites](#prerequisites) -- [Prepare Test Environment](#prepare-test-environment) -- [Deploy CLM to Your Created GKE Cluster](#deploy-clm-to-your-created-gke-cluster) -- [Running E2E Tests Locally](#running-e2e-tests-locally) -- [Common Failure Modes and Troubleshooting](#common-failure-modes-and-troubleshooting) +- [Running E2E Tests](#running-e2e-tests) +- [Troubleshooting](#troubleshooting) - [Test Coverage in CI](#test-coverage-in-ci) ## Prerequisites -### Required Tools +**Environment Setup:** -The following tools must be installed on your local machine: +Before running tests, you need a running HyperFleet environment. See the [Setup Guide](setup.md) for complete instructions on deploying HyperFleet using: -| Tool | Minimum Version | Purpose | Installation | -|------|----------------|---------|--------------| -| **Go** | 1.25+ | Build and run the E2E framework | [go.dev](https://go.dev/doc/install) | -| **kubectl** | 1.28+ | Interact with Kubernetes clusters | [kubernetes.io](https://kubernetes.io/docs/tasks/tools/) | -| **helm** | 3.0+ | Deploy HyperFleet components | [helm.sh](https://helm.sh/docs/intro/install/) | -| **git** | 2.30+ | Clone repositories and manage Helm charts | [git-scm.com](https://git-scm.com/downloads) | -| **podman** or **docker** | Latest | Build container images (optional) | [podman.io](https://podman.io/) or [docker.com](https://www.docker.com/) | +- **Kind (local):** Fast setup, no cloud dependencies, uses port-forwarding +- **GCP:** Cloud environment, requires GCP access, uses LoadBalancer services +The environment guide covers: +- Tool installation and verification +- HyperFleet deployment (Kind or GCP) +- Port-forwarding / LoadBalancer setup +- Environment variable configuration +- Optional image settings override -### Verify Prerequisites +**Required environment variables** (set during environment setup): -Run these commands to verify your setup: +- `HYPERFLEET_API_URL` - HyperFleet API endpoint +- `MAESTRO_URL` - Maestro API endpoint +- `NAMESPACE` - Deployment namespace +- source env/env.local (Optional for tier 2 tests) -```bash -# Check Go version -go version # Should show 1.25 or higher - -# Check kubectl -kubectl version --client - -# Check Helm -helm version - -# Check Git -git --version - -# Check container tool (optional) -podman --version || docker --version -``` - -## Prepare Test Environment - -### Clone and Configure Terraform - -First, clone the infrastructure repository and navigate to the terraform directory: - -```bash -git clone https://github.com/openshift-hyperfleet/hyperfleet-infra/ -cd hyperfleet-infra/terraform -``` - -### Install GKE Cluster - -Run the following Terraform commands to deploy your GKE cluster. - -#### Terraform Commands - -```bash -# Copy and update the terraform variable file -cp envs/gke/dev.tfvars.example envs/gke/dev-.tfvars -# Update the following settings in your tfvars file -# developer_name - set to your name, use_pubsub=false, enable_dead_letter=false - -# Copy and update the terraform backend file -cp envs/gke/dev.tfbackend.example envs/gke/dev-.tfbackend -# update the prefix field with your name - -# Initialize terraform with your backend configuration -terraform init -backend-config=envs/gke/dev-.tfbackend - -# Preview the infrastructure changes -terraform plan -var-file=envs/gke/dev-.tfvars - -# Apply the infrastructure changes -terraform apply -var-file=envs/gke/dev-.tfvars -``` -### Install Maestro - -After deploying the GKE cluster, install Maestro and create a consumer: - -```bash -# Install Maestro -make install-maestro - -# Create Maestro consumer (default: cluster1, test adapter are configured with it) -make create-maestro-consumer MAESTRO_CONSUMER=cluster1 - -# Patch the service type to LoadBalancer to expose a external IP -kubectl patch svc maestro -n maestro -p '{"spec":{"type":"LoadBalancer"}}' -``` - -### Login to Cluster - -After the deployment completes, log in to the cluster locally using the output command (replace your name): - -```bash -gcloud container clusters get-credentials hyperfleet-dev- --zone us-central1-a --project hcm-hyperfleet -``` - -## Deploy CLM to Your Created GKE Cluster - -### Clone the Repository - -```bash -git clone https://github.com/openshift-hyperfleet/hyperfleet-e2e.git -cd hyperfleet-e2e -``` - -### Deploy HyperFleet Components - -The E2E tests require a running HyperFleet environment (API, Sentinel, and Adapters). - -```bash -# 1. Copy the example configuration -cd deploy-scripts/ -cp .env.example .env - -# 2. Edit .env with your settings -vim .env -source .env - -# 3. Deploy with custom configuration -./deploy-clm.sh --action install --namespace "${NAMESPACE}" - -``` - -**Key Configuration Parameters** (in `.env`): - -```bash -# GCP configuration (required for Pub/Sub) -export GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" - -# Image configuration (optional - defaults to latest) -export API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" -export SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" -export ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" - -# Adapters to deploy (optional) -export CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-cl-namespace,cl-job,cl-deployment,cl-maestro}" -export NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-np-configmap}" - -# Adapters for API cluster/nodepool configuration -export API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-cl-namespace,cl-job,cl-deployment,cl-maestro}" -export API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-np-configmap}" - - -# NAMESPACE must be unique to prevent GCP Pub/Sub topic/subscription collisions. -# Set in the .env.example file as: -export NAMESPACE="${NAMESPACE:-hyperfleet-e2e-$(echo ${USER:-default} | tr '[:upper:]' '[:lower:]')}" -# Or can manually set it with as the namespace is DNS-1123 compliant -export NAMESPACE= - -``` +## Running E2E Tests -#### Verify Deployment +### Build the E2E Binary ```bash -# Check Helm releases -helm list -n "${NAMESPACE}" - -# Verify all pods are running -kubectl get pods -n "${NAMESPACE}" - -# Check pod logs if any issues -kubectl logs -n "${NAMESPACE}" -``` - -**Expected State**: All pods should show status `Running` with `READY 1/1`. - - -## Running E2E Tests Locally - -### Build the E2E Framework - -```bash -# Generate API client from OpenAPI spec -make generate - -# Build the hyperfleet-e2e binary +# Generate API client from OpenAPI spec and build make build # Verify the build ./bin/hyperfleet-e2e --help ``` -### Configure API Access -If the Maestro and Hyperfleet API services are not exposed via LoadBalancer, you'll need to port-forward them locally: +### Run Tests -```bash -# Terminal 1 - Port-forward Maestro API (local port 8000) -kubectl port-forward -n maestro svc/maestro 8000:8000 +Make sure you've set the required environment variables from the [Prerequisites](#prerequisites) section: -# Terminal 2 - Port-forward Hyperfleet API (local port 8001) -kubectl port-forward -n ${NAMESPACE} svc/hyperfleet-api 8001:8000 -``` +- `HYPERFLEET_API_URL` +- `MAESTRO_URL` +- `NAMESPACE` -Then configure your environment variables: +**Run tests by tier:** ```bash -export MAESTRO_URL=http://localhost:8000 -export HYPERFLEET_API_URL=http://localhost:8001 +# Run tier0 tests (critical path) +./bin/hyperfleet-e2e test --label-filter=tier0 + +# Run tier1 tests (important features) +./bin/hyperfleet-e2e test --label-filter=tier1 + +# Run tier2 tests (edge cases - requires sourcing env/env.local first) +source env/env.local && ./bin/hyperfleet-e2e test --label-filter=tier2 ``` -### Basic Test Execution +**Run tests by suite:** ```bash -# Run tests with specific label -./bin/hyperfleet-e2e test --label-filter=tier0 - -# Run tests for specific suite +# Run all cluster tests ./bin/hyperfleet-e2e test --focus "\[Suite: cluster\]" -# Run specific test by description -./bin/hyperfleet-e2e test --focus "Create Cluster via API" +# Run all nodepool tests +./bin/hyperfleet-e2e test --focus "\[Suite: nodepool\]" +# Run all adapter tests +./bin/hyperfleet-e2e test --focus "\[Suite: adapter\]" ``` -**Example:** +**Run specific tests by description:** ```bash -# Using environment variable -export HYPERFLEET_API_URL= -export MAESTRO_URL= -export NAMESPACE= -# Run all tier0 cases -./bin/hyperfleet-e2e test --label-filter=tier0 - -# Run all tier1 cases -./bin/hyperfleet-e2e test --label-filter=tier1 +./bin/hyperfleet-e2e test --focus "Create Cluster via API" ``` -### View All Options +**View available options:** ```bash -# Show all available commands +# Show all commands ./bin/hyperfleet-e2e --help # Show test command options ./bin/hyperfleet-e2e test --help ``` -## Common Failure Modes and Troubleshooting +## Troubleshooting -### Tools and Tips +### Debugging Tools -The following tools are available to help debug and interact with HyperFleet components: +The following tools can help debug and interact with HyperFleet components: | Tool | Purpose | Link | |------|---------|------| -| **Hyperfleet Explorer** | View cluster/nodepool API responses | [https://github.com/rh-amarin/hyperfleet-explorer](https://github.com/rh-amarin/hyperfleet-explorer) | -| **Scripts** | Interact with various component APIs and perform operations | [https://github.com/rh-amarin/hyperfleet-scripts](https://github.com/rh-amarin/hyperfleet-scripts) | -| **k9s** | Kubernetes CLI to manage your clusters in style! | [https://k9scli.io/](https://k9scli.io/) | +| **HyperFleet Explorer** | View cluster/nodepool API responses in a UI | [hyperfleet-explorer](https://github.com/rh-amarin/hyperfleet-explorer) | +| **HyperFleet Scripts** | Interact with component APIs and perform operations | [hyperfleet-scripts](https://github.com/openshift-hyperfleet/hyperfleet-scripts) | +| **k9s** | Kubernetes CLI to manage clusters | [k9scli.io](https://k9scli.io/) | + +### Common Issues + +#### 1. Namespace Mismatch -### General Troubleshooting +**Problem:** Tests fail to find adapters or create resources. -#### Namespace Configuration +**Cause:** The `NAMESPACE` environment variable doesn't match the deployment namespace. Some tests deploy adapters dynamically and must target the same namespace where HyperFleet components are running. -**Important:** Set the `NAMESPACE` environment variable to match the namespace used during deployment. Some test cases deploy adapters dynamically and need to target the same namespace where your HyperFleet components are running. +**Solution:** ```bash -# Set NAMESPACE if you deployed to a unique namespace -export NAMESPACE= +export NAMESPACE= ./bin/hyperfleet-e2e test --label-filter=tier0 ``` -#### Timeout Errors +#### 2. Timeout Errors -If you encounter timeout errors like this: +**Problem:** Test failures with timeout errors: ``` [FAILED] cluster creation failed @@ -285,14 +134,16 @@ Unexpected error: context deadline exceeded (Client.Timeout exceeded while awaiting headers) ``` -**Troubleshooting steps:** +**Solution:** + +1. **Verify all pods are running:** -1. **Check if all pods are running:** ```bash kubectl get pods -n ${NAMESPACE} ``` - Expected output - all pods should show `Running` with `READY 1/1`: + Expected output — all pods should show `Running` with `READY 1/1`: + ``` NAME READY STATUS RESTARTS AGE hyperfleet-api-xxx 1/1 Running 0 10m @@ -302,67 +153,111 @@ Unexpected error: ``` 2. **Check pod logs for errors:** + ```bash - # Check API logs + # API logs kubectl logs -n ${NAMESPACE} deployment/hyperfleet-api --tail=50 - # Check Sentinel logs + # Sentinel logs kubectl logs -n ${NAMESPACE} deployment/hyperfleet-sentinel --tail=50 - # Check adapter logs + # Adapter logs (example) kubectl logs -n ${NAMESPACE} deployment/cl-namespace-adapter --tail=50 ``` -3. **Verify API connectivity:** +3. **Test API connectivity:** + ```bash - # Test API endpoint curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ ``` + Expected: HTTP 200 response with JSON + 4. **Check service endpoints:** + ```bash - # Verify LoadBalancer has external IP + # For GCP deployments - verify LoadBalancer has external IP + kubectl get svc -n ${NAMESPACE} hyperfleet-api + + # For Kind deployments - verify port-forwarding is active + lsof -i :${API_LOCAL_PORT} + ``` + +#### 3. Image Pull Errors + +**Problem:** Pods stuck in `ImagePullBackOff` or `ErrImagePull` status. + +**Solution:** + +1. Check if `env/env.local` image settings match your infra deployment. See [Configure Test Settings](setup.md#configure-test-settings) in the Setup guide for how to override image settings. +2. Verify image registry credentials are configured in your cluster +3. Check pod events: + + ```bash + kubectl describe pod -n ${NAMESPACE} + ``` + +#### 4. Port-Forward Connection Refused (Kind) + +**Problem:** Tests fail with "connection refused" when using Kind. + +**Solution:** + +1. Verify port-forward processes are running: + + ```bash + ps aux | grep "port-forward" + ``` + +2. Restart port-forwarding in separate terminals (see [Kind setup](setup.md#option-1-kind-local) in the Setup guide) + +3. Verify services exist: + + ```bash + kubectl get svc -n maestro maestro kubectl get svc -n ${NAMESPACE} hyperfleet-api ``` ## Test Coverage in CI -### How Your Tests Run in CI +Tests you run locally are automatically executed in nightly CI jobs for continuous validation. -The test cases you run locally are automatically picked up and executed in nightly CI jobs to ensure continuous validation of the system. +### CI Jobs -**Job Configuration File:** All job definitions can be found in the [openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml](https://github.com/openshift/release/blob/main/ci-operator/config/openshift-hyperfleet/hyperfleet-e2e/openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml) configuration file. +**Job Configuration:** [openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml](https://github.com/openshift/release/blob/main/ci-operator/config/openshift-hyperfleet/hyperfleet-e2e/openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml) | Job Name | Test Tier | Schedule | Description | |----------|-----------|----------|-------------| -| **tier0-nightly** | tier0 | Daily | Runs basic smoke tests and happy critical path validations | -| **tier1-nightly** | tier1 | Daily | Runs extended test suite | +| **tier0-nightly** | tier0 | Daily | Critical path smoke tests | +| **tier1-nightly** | tier1 | Daily | Extended test suite | -### Job Configuration and Management +### Managing CI Jobs -For comprehensive information about CI jobs, see the [Add HyperFleet E2E CI Job in Prow](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/docs/test-release/add-hyperfleet-e2e-ci-job-in-prow.md) documentation, which covers: +For details on CI job configuration, viewing results, manual triggering, and debugging failures, see: -- How CI jobs are configured in Prow -- Viewing job results -- Triggering jobs manually -- Debugging job failures +- [Add HyperFleet E2E CI Job in Prow](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/docs/test-release/add-hyperfleet-e2e-ci-job-in-prow.md) + +--- ## Changelog -All notable changes to this document will be documented in this section. +All notable changes to this document are documented below. + +Format based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +### 2026-06-10 -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +#### Changed +- Restructured runbook for clarity and consistency +- Separated Kind and GCP setup into distinct sections with clearer step-by-step instructions into setup.md +- Improved troubleshooting section with numbered common issues and solutions +- Streamlined test execution instructions with better examples +- Cleaned up formatting and removed duplicate content ### 2026-03-30 #### Added - Initial runbook with prerequisites, environment setup, test execution, troubleshooting, and CI coverage sections -- Prerequisites section with required tools and verification steps -- Prepare Test Environment section with Terraform and GKE cluster setup -- Deploy CLM section with HyperFleet component deployment instructions -- Running E2E Tests Locally section with build and execution commands -- Common Failure Modes and Troubleshooting section with debugging tools and tips -- Test Coverage in CI section documenting nightly jobs and Prow integration diff --git a/docs/setup.md b/docs/setup.md new file mode 100644 index 0000000..b84ab5e --- /dev/null +++ b/docs/setup.md @@ -0,0 +1,150 @@ +# Setup Guide + +This guide covers setting up a HyperFleet environment for running E2E tests locally. + +## Table of Contents + +- [Deployment Options](#deployment-options) + - [Option 1: Kind (Local)](#option-1-kind-local) + - [Option 2: GCP](#option-2-gcp) +- [Configure Test Settings](#configure-test-settings) +- [Troubleshooting](#troubleshooting) + +## Deployment Options + +Clone the infrastructure repository: + +```bash +git clone https://github.com/openshift-hyperfleet/hyperfleet-infra/ +cd hyperfleet-infra/terraform +``` + +Choose one of the following deployment options based on your needs: + +- **Kind (local):** Fast setup, no cloud dependencies, uses port-forwarding +- **GCP:** Cloud environment, requires GCP access, slower setup, uses LoadBalancer services + +### Option 1: Kind (Local) + +**1. Deploy HyperFleet to Kind cluster:** + +```bash +export NAMESPACE= +NAMESPACE=${NAMESPACE} HELMFILE_ENV=e2e-kind make local-up-kind +``` + +**2. Set up port-forwarding in two separate terminals:** + +```bash +# Terminal 1 - Port-forward Maestro API +export MAESTRO_LOCAL_PORT=8100 +kubectl port-forward -n maestro svc/maestro ${MAESTRO_LOCAL_PORT}:8000 + +# Terminal 2 - Port-forward HyperFleet API +export API_LOCAL_PORT=8000 +kubectl port-forward -n ${NAMESPACE} svc/hyperfleet-api ${API_LOCAL_PORT}:8000 +``` + +**3. Configure environment variables:** + +```bash +export MAESTRO_URL=http://localhost:${MAESTRO_LOCAL_PORT} +export HYPERFLEET_API_URL=http://localhost:${API_LOCAL_PORT} +export NAMESPACE= +``` + +**4. Verify deployment:** + +```bash +# Check Helm releases +helm list -n ${NAMESPACE} + +# Verify all pods are running +kubectl get pods -n ${NAMESPACE} + +# Test API connectivity +curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ +``` + +### Option 2: GCP + +**1. Deploy HyperFleet to GCP cluster:** + +```bash +export NAMESPACE= +NAMESPACE=${NAMESPACE} HELMFILE_ENV=e2e-gcp make local-up-gcp +``` + +**2. Expose Maestro service via LoadBalancer:** + +```bash +# Patch Maestro service to expose external IP +kubectl patch svc maestro -n maestro -p '{"spec":{"type":"LoadBalancer"}}' + +# Wait for external IPs to be assigned (may take 1-2 minutes) +kubectl get svc maestro -n maestro -w +kubectl get svc hyperfleet-api -n ${NAMESPACE} -w +``` + +**3. Configure environment variables:** + +```bash +export API_EXTERNAL_IP=$(kubectl get svc hyperfleet-api -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +export MAESTRO_EXTERNAL_IP=$(kubectl get svc maestro -n maestro -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +export HYPERFLEET_API_URL=http://${API_EXTERNAL_IP}:8000 +export MAESTRO_URL=http://${MAESTRO_EXTERNAL_IP}:8000 +export NAMESPACE= +``` + +**4. Verify deployment:** + +```bash +# Check Helm releases +helm list -n ${NAMESPACE} + +# Verify all pods are running +kubectl get pods -n ${NAMESPACE} + +# Test API connectivity +curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ +``` + +## Configure Test Settings + +### Override Image Settings (Optional) + +If your deployment uses custom image settings, update `env/env.local` in this repo to match your infrastructure deployment settings: + +- **Kind deployments:** Match settings from [`infra/terraform/env.kind`](https://github.com/openshift-hyperfleet/hyperfleet-infra/blob/main/env.kind#L18-L21) +- **GCP deployments:** Match settings from [`infra/terraform/env.gcp`](https://github.com/openshift-hyperfleet/hyperfleet-infra/blob/main/env.gcp#L18-L21) + +**Update `env/env.local`:** + +```bash +# env/env.local +IMAGE_REGISTRY= +_IMAGE_REPO= +_IMAGE_TAG= +``` + +**Source the configuration:** + +```bash +source env/env.local +``` + +This configuration is required for running tier2 tests. + +## Troubleshooting + +### Infrastructure Setup Issues + +For additional help with infrastructure deployment and configuration, see: + +- [hyperfleet-infra README](https://github.com/openshift-hyperfleet/hyperfleet-infra/blob/main/README.md) - Main infrastructure documentation + +For test-specific troubleshooting (timeouts, API errors, namespace mismatches), see the [Runbook Troubleshooting](runbook.md#troubleshooting) section. + +--- + +**Next Steps:** Once your environment is set up, see the [Runbook](runbook.md) for running tests and troubleshooting. diff --git a/env/env.local b/env/env.local new file mode 100644 index 0000000..84e331c --- /dev/null +++ b/env/env.local @@ -0,0 +1,60 @@ +# ============================================================================ +# NOTE: Local development environment configuration +# Can be used to test E2E tests locally - kind or gcp clusters +# Can be modified without testing the changes in the E2E pipeline +# ============================================================================ + + +# Image Registry Configuration +export IMAGE_REGISTRY="${IMAGE_REGISTRY:-registry.ci.openshift.org}" + +# API Component Configuration +export API_IMAGE_REPO="${API_IMAGE_REPO:-ci/hyperfleet-api}" +export API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" +export API_SERVICE_TYPE="${API_SERVICE_TYPE:-LoadBalancer}" + + +# ===================================================================== +# API Adapter Configuration +# ===================================================================== +# NOTE: These are SEPARATE from tier0 deployment configuration above +# These should be set based on specific test case requirements +# Set per test case as needed + +# Adapters for API cluster configuration +export API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-cl-namespace,cl-job,cl-deployment,cl-maestro}" + +# Adapters for API nodepool configuration +export API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-np-configmap}" + +# Sentinel Component Configuration +export SENTINEL_IMAGE_REPO="${SENTINEL_IMAGE_REPO:-ci/hyperfleet-sentinel}" +export SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" +export SENTINEL_BROKER_TYPE="${SENTINEL_BROKER_TYPE:-googlepubsub}" +export SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" + +# Adapter Component Configuration +export ADAPTER_IMAGE_REPO="${ADAPTER_IMAGE_REPO:-ci/hyperfleet-adapter}" +export ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" +export ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" +export ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING:-true}" + + +# Helm Chart Sources +# Note: ADAPTER_CHART_* and API_CHART_* vars are also required by tier2 E2E tests +# (e.g., crash recovery) which deploy dedicated adapters and upgrade API config at runtime. +# When running tier2 tests in CI, ensure these are exported alongside GINKGO_LABEL_FILTER=tier2. +export API_CHART_REPO="${API_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-api.git}" +export API_CHART_REF="${API_CHART_REF:-main}" +export API_CHART_PATH="${API_CHART_PATH:-charts}" + +export SENTINEL_CHART_REPO="${SENTINEL_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-sentinel.git}" +export SENTINEL_CHART_REF="${SENTINEL_CHART_REF:-main}" +export SENTINEL_CHART_PATH="${SENTINEL_CHART_PATH:-charts}" + +export ADAPTER_CHART_REPO="${ADAPTER_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-adapter.git}" +export ADAPTER_CHART_REF="${ADAPTER_CHART_REF:-main}" +export ADAPTER_CHART_PATH="${ADAPTER_CHART_PATH:-charts}" + + +export GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" \ No newline at end of file diff --git a/testdata/adapter-configs/cl-deployment/adapter-config.yaml b/testdata/adapter-configs/cl-deployment/adapter-config.yaml deleted file mode 100644 index d20254b..0000000 --- a/testdata/adapter-configs/cl-deployment/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: cl-deployment - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml b/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml deleted file mode 100644 index 406ae4e..0000000 --- a/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml +++ /dev/null @@ -1,177 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.id" - type: "string" - required: true - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "clusterNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "clusterReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "clusterAdapterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/statuses" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterJobStatus" - field: "{.items[?(@.adapter=='cl-job')].conditions[?(@.type=='Available')].status}" - - - name: "validationCheck" - expression: | - is_deleting || (clusterJobStatus == "True" && (clusterNotReconciled || clusterReconciledTTL)) - -# Resources with valid K8s manifests -resources: - - name: "testDeployment" - transport: - client: "kubernetes" - manifest: - ref: "/etc/adapter/deployment.yaml" - discovery: - namespace: "{{ .clusterId }}" - by_selectors: - label_selector: - hyperfleet.io/resource-type: "deployment" - hyperfleet.io/cluster-id: "{{ .clusterId }}" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -post: - payloads: - - name: "clusterStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Deployment successfully created - - type: "Applied" - status: - expression: | - has(resources.testDeployment) ? "True" : "False" - reason: - expression: | - has(resources.testDeployment) - ? "DeploymentApplied" - : "DeploymentPending" - message: - expression: | - has(resources.testDeployment) - ? "testDeployment manifest applied successfully" - : "testDeployment is pending to be applied" - # Available: Check deployment status conditions - - type: "Available" - status: - expression: | - has(resources.testDeployment) ? - ( resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].status : "False") - : "False" - reason: - expression: | - resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].reason - : resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Progressing" && c.status == "False") ? "DeploymentFailed" - : resources.?testDeployment.?status.hasValue() ? "DeploymentInProgress" : "DeploymentPending" - message: - expression: | - resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].message - : resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Progressing" && c.status == "False") ? "Deployment failed" - : resources.?testDeployment.?status.hasValue() ? "Deployment in progress" : "Deployment is pending" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once deployment is confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?testDeployment.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?testDeployment.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?testDeployment.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for deployment to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: '{{ now | date "2006-01-02T15:04:05Z07:00" }}' - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .clusterStatusPayload }}" diff --git a/testdata/adapter-configs/cl-deployment/adapter-task-resource-deployment.yaml b/testdata/adapter-configs/cl-deployment/adapter-task-resource-deployment.yaml deleted file mode 100644 index 6f66b37..0000000 --- a/testdata/adapter-configs/cl-deployment/adapter-task-resource-deployment.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Test deployment template -apiVersion: apps/v1 -kind: Deployment -metadata: - name: "test-nginx-{{ .clusterId }}" - namespace: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/resource-type: "deployment" - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" -spec: - replicas: 1 - selector: - matchLabels: - app: test - hyperfleet.io/cluster-id: "{{ .clusterId }}" - template: - metadata: - labels: - app: test - hyperfleet.io/cluster-id: "{{ .clusterId }}" - spec: - containers: - - name: test - image: nginx:latest - ports: - - containerPort: 80 - diff --git a/testdata/adapter-configs/cl-deployment/values.yaml b/testdata/adapter-configs/cl-deployment/values.yaml deleted file mode 100644 index 994cbc3..0000000 --- a/testdata/adapter-configs/cl-deployment/values.yaml +++ /dev/null @@ -1,32 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-deployment/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-deployment/adapter-task-config.yaml - deployment.yaml: cl-deployment/adapter-task-resource-deployment.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - deployments - - deployments/status diff --git a/testdata/adapter-configs/cl-job/adapter-config.yaml b/testdata/adapter-configs/cl-job/adapter-config.yaml deleted file mode 100644 index 88098e0..0000000 --- a/testdata/adapter-configs/cl-job/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: cl-job - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/cl-job/adapter-task-config.yaml b/testdata/adapter-configs/cl-job/adapter-task-config.yaml deleted file mode 100644 index a59aac1..0000000 --- a/testdata/adapter-configs/cl-job/adapter-task-config.yaml +++ /dev/null @@ -1,186 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.id" - type: "string" - required: true - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "clusterNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "clusterReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "clusterAdapterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/statuses" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterNamespaceStatus" - field: "{.items[?(@.adapter=='cl-namespace')].data.namespace.status}" - - - name: "validationCheck" - expression: | - is_deleting || (clusterNamespaceStatus == "Active" && (clusterNotReconciled || clusterReconciledTTL)) - -# Resources with valid K8s manifests -resources: - # Hello-world job created in the cluster namespace - - name: "testJob" - transport: - client: "kubernetes" - manifest: - ref: "/etc/adapter/job.yaml" - discovery: - namespace: "{{ .clusterId }}" - by_selectors: - label_selector: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/resource-type: "job" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -# Reports Applied, Available, and Health conditions for the hello-world job -post: - payloads: - - name: "clusterStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Job successfully created - - type: "Applied" - status: - expression: | - has(resources.testJob) ? "True" : "False" - reason: - expression: | - has(resources.testJob) - ? "JobApplied" - : "JobPending" - message: - expression: | - has(resources.testJob) - ? "testJob manifest applied successfully" - : "testJob is pending to be applied" - # Available: True when the job has completed successfully - - type: "Available" - status: - expression: | - has(resources.testJob) ? - ( resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Complete") - ? resources.testJob.status.conditions.filter(c, c.type == "Complete")[0].status - : "False") - : "False" - reason: - expression: | - resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Complete") - ? "JobComplete" - : resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Failed") - ? "JobFailed" - : resources.?testJob.?status.hasValue() - ? "JobRunning" - : "JobPending" - message: - expression: | - resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Complete") - ? "Hello-world job completed successfully" - : resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Failed") - ? "Hello-world job failed" - : resources.?testJob.?status.hasValue() - ? "Hello-world job is running" - : "Hello-world job is pending" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once all job resources are confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?testJob.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?testJob.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?testJob.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for resources to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .clusterStatusPayload }}" diff --git a/testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml b/testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml deleted file mode 100644 index bf61887..0000000 --- a/testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Simple hello-world Kubernetes Job for e2e testing -apiVersion: batch/v1 -kind: Job -metadata: - name: "test-job-{{ .clusterId }}-{{ .generationSpec }}" - namespace: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/resource-type: "job" - app: test-job - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" -spec: - backoffLimit: 0 - template: - spec: - restartPolicy: Never - containers: - - name: hello-world - image: alpine:3.19 - imagePullPolicy: IfNotPresent - command: ["/bin/sh", "-c", "echo 'Hello, World!'"] - resources: - requests: - memory: "32Mi" - cpu: "50m" - limits: - memory: "64Mi" - cpu: "100m" diff --git a/testdata/adapter-configs/cl-job/values.yaml b/testdata/adapter-configs/cl-job/values.yaml deleted file mode 100644 index 3c84dfd..0000000 --- a/testdata/adapter-configs/cl-job/values.yaml +++ /dev/null @@ -1,32 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-job/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-job/adapter-task-config.yaml - job.yaml: cl-job/adapter-task-resource-job.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - namespaces - - jobs diff --git a/testdata/adapter-configs/cl-maestro/adapter-config.yaml b/testdata/adapter-configs/cl-maestro/adapter-config.yaml deleted file mode 100644 index dbe3a4e..0000000 --- a/testdata/adapter-configs/cl-maestro/adapter-config.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# Example HyperFleet Adapter deployment configuration -adapter: - name: cl-maestro - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: true -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - # These values are overridden at deploy time via env vars from Helm values - subscription_id: CHANGE_ME - topic: CHANGE_ME - - maestro: - grpc_server_address: "maestro-grpc.maestro.svc.cluster.local:8090" - - # HTTPS server address for REST API operations (optional) - # Environment variable: HYPERFLEET_MAESTRO_HTTP_SERVER_ADDRESS - http_server_address: "http://maestro.maestro.svc.cluster.local:8000" - - # Source identifier for CloudEvents routing (must be unique across adapters) - # Environment variable: HYPERFLEET_MAESTRO_SOURCE_ID - source_id: "cl-maestro" - - # Client identifier (defaults to source_id if not specified) - # Environment variable: HYPERFLEET_MAESTRO_CLIENT_ID - client_id: "cl-maestro-client" - insecure: true - - # Authentication configuration - #auth: - # type: "tls" # TLS certificate-based mTLS - # - # tls_config: - # # gRPC TLS configuration - # # Certificate paths (mounted from Kubernetes secrets) - # # Environment variable: HYPERFLEET_MAESTRO_CA_FILE - # ca_file: "/etc/maestro/certs/grpc/ca.crt" - # - # # Environment variable: HYPERFLEET_MAESTRO_CERT_FILE - # cert_file: "/etc/maestro/certs/grpc/client.crt" - # - # # Environment variable: HYPERFLEET_MAESTRO_KEY_FILE - # key_file: "/etc/maestro/certs/grpc/client.key" - # - # # Server name for TLS verification - # # Environment variable: HYPERFLEET_MAESTRO_SERVER_NAME - # server_name: "maestro-grpc.maestro.svc.cluster.local" - # - # # HTTP API TLS configuration (may use different CA than gRPC) - # # If not set, falls back to ca_file for backwards compatibility - # # Environment variable: HYPERFLEET_MAESTRO_HTTP_CA_FILE - # http_ca_file: "/etc/maestro/certs/https/ca.crt" diff --git a/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml b/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml deleted file mode 100644 index 8978adf..0000000 --- a/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml +++ /dev/null @@ -1,247 +0,0 @@ -# Example HyperFleet Adapter task configuration - -# Parameters with all required variables -params: - - - name: "clusterId" - source: "event.id" - type: "string" - required: true - - - name: "generation" - source: "event.generation" - type: "int" - required: true - - - name: "namespace" - source: "env.NAMESPACE" - type: "string" - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generation" - field: "generation" - - name: "timestamp" - field: "created_time" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "reconciledConditionStatus" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status - : "False" - - name: "placementClusterName" - expression: "\"cluster1\"" # TBC coming from placement adapter - - name: "platformType" - expression: | - has(spec.platform) && has(spec.platform.type) ? spec.platform.type : "" - - name: "subnets" - expression: | - has(spec.platform) && has(spec.platform.gcp) && has(spec.platform.gcp.subnets) - ? spec.platform.gcp.subnets - : [] - - - - name: "validationCheck" - expression: | - is_deleting || reconciledConditionStatus == "False" - -# Resources with valid K8s manifests -resources: - - name: "resource0" - transport: - client: "maestro" - maestro: - target_cluster: "{{ .placementClusterName }}" - - # ManifestWork manifest ref with Go template structural syntax ({{ if }}, {{ else }}) - # This validates that Go template conditionals work with Maestro transport - manifest: - ref: "/etc/adapter/manifestwork.yaml" - # Discover the ResourceBundle (ManifestWork) by name from Maestro - discovery: - by_name: "{{ .clusterId }}-{{ .adapter.name }}" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - - # Discover nested resources deployed by the ManifestWork - nested_discoveries: - - name: "namespace0" - discovery: - by_name: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - - name: "configmap0" - discovery: - by_name: "{{ .clusterId | lower }}-{{ .adapter.name }}-configmap" - -post: - payloads: - - name: "statusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Check if ManifestWork exists and has type="Applied", status="True" - - type: "Applied" - status: - expression: | - has(resources.resource0) && has(resources.resource0.status) && has(resources.resource0.status.conditions) && resources.resource0.status.conditions.filter(c, has(c.type) && c.type == "Applied").size() > 0 ? resources.resource0.status.conditions.filter(c, c.type == "Applied")[0].status : "False" - reason: - expression: | - has(resources.resource0) && has(resources.resource0.status) && has(resources.resource0.status.conditions) && resources.resource0.status.conditions.filter(c, has(c.type) && c.type == "Applied").size() > 0 ? resources.resource0.status.conditions.filter(c, c.type == "Applied")[0].reason : "ManifestWorkNotDiscovered" - message: - expression: | - has(resources.resource0) && has(resources.resource0.status) && has(resources.resource0.status.conditions) && resources.resource0.status.conditions.filter(c, has(c.type) && c.type == "Applied").size() > 0 ? resources.resource0.status.conditions.filter(c, c.type == "Applied")[0].message : "ManifestWork not discovered from Maestro or no Applied condition" - - # Available: Check if nested discovered manifests are available on the spoke cluster - # Each nested discovery is enriched with top-level "conditions" from status.resourceStatus.manifests[] - - type: "Available" - status: - expression: | - has(resources.namespace0) && has(resources.namespace0.conditions) - && resources.namespace0.conditions.exists(c, has(c.type) && c.type == "Available" && has(c.status) && c.status == "True") - && has(resources.configmap0) && has(resources.configmap0.conditions) - && resources.configmap0.conditions.exists(c, c.type == "Available" && has(c.status) && c.status == "True") - ? "True" - : "False" - reason: - expression: | - !(has(resources.namespace0) && has(resources.namespace0.conditions)) - ? "NamespaceNotDiscovered" - : !resources.namespace0.conditions.exists(c, has(c.type) && c.type == "Available" && has(c.status) && c.status == "True") - ? "NamespaceNotAvailable" - : !(has(resources.configmap0) && has(resources.configmap0.conditions)) - ? "ConfigMapNotDiscovered" - : !resources.configmap0.conditions.exists(c, c.type == "Available" && has(c.status) && c.status == "True") - ? "ConfigMapNotAvailable" - : "AllResourcesAvailable" - message: - expression: | - !(has(resources.namespace0) && has(resources.namespace0.conditions)) - ? "Namespace not discovered from ManifestWork" - : !resources.namespace0.conditions.exists(c, has(c.type) && c.type == "Available" && has(c.status) && c.status == "True") - ? "Namespace not yet available on spoke cluster" - : !(has(resources.configmap0) && has(resources.configmap0.conditions)) - ? "ConfigMap not discovered from ManifestWork" - : !resources.configmap0.conditions.exists(c, c.type == "Available" && has(c.status) && c.status == "True") - ? "ConfigMap not yet available on spoke cluster" - : "All manifests (namespace, configmap) are available on spoke cluster" - - # Health: Adapter execution status — surfaces errors from any phase - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - - # Finalized: True once ManifestWork and all spoke resources are confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?resource0.hasValue() - && !resources.?namespace0.hasValue() - && !resources.?configmap0.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?resource0.hasValue() - && !resources.?namespace0.hasValue() - && !resources.?configmap0.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?resource0.hasValue() - && !resources.?namespace0.hasValue() - && !resources.?configmap0.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for resources to be removed" - - observed_generation: - expression: "generation" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - # Extract data from discovered ManifestWork from Maestro - data: - manifestwork: - name: - expression: | - has(resources.resource0) && has(resources.resource0.metadata) - ? resources.resource0.metadata.name - : "" - consumer: - expression: | - has(resources.resource0) && has(resources.resource0.metadata) - ? resources.resource0.metadata.namespace - : placementClusterName - configmap: - name: - expression: | - has(resources.configmap0) && has(resources.configmap0.metadata) - ? resources.configmap0.metadata.name - : "" - clusterId: - expression: | - has(resources.configmap0) && has(resources.configmap0.data) && has(resources.configmap0.data.cluster_id) - ? resources.configmap0.data.cluster_id - : clusterId - namespace: - name: - expression: | - has(resources.namespace0) && has(resources.namespace0.metadata) - ? resources.namespace0.metadata.name - : "" - phase: - expression: | - has(resources.namespace0) && has(resources.namespace0.statusFeedback) && has(resources.namespace0.statusFeedback.values) - && resources.namespace0.statusFeedback.values.exists(v, has(v.name) && v.name == "phase" && has(v.fieldValue)) - ? resources.namespace0.statusFeedback.values.filter(v, v.name == "phase")[0].fieldValue.string - : "Unknown" - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .statusPayload }}" diff --git a/testdata/adapter-configs/cl-maestro/adapter-task-resource-manifestwork.yaml b/testdata/adapter-configs/cl-maestro/adapter-task-resource-manifestwork.yaml deleted file mode 100644 index 87dd7a8..0000000 --- a/testdata/adapter-configs/cl-maestro/adapter-task-resource-manifestwork.yaml +++ /dev/null @@ -1,144 +0,0 @@ -# Go template — rendered by the adapter engine before YAML parsing -apiVersion: work.open-cluster-management.io/v1 -kind: ManifestWork -metadata: - # ManifestWork name - must be unique within consumer namespace - name: "{{ .clusterId }}-{{ .adapter.name }}" - - # Labels for identification, filtering, and management - labels: - # HyperFleet tracking labels - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/adapter: "{{ .adapter.name }}" - hyperfleet.io/component: "infrastructure" - hyperfleet.io/generation: "{{ .generation }}" - hyperfleet.io/resource-group: "cluster-setup" - - # Maestro-specific labels - maestro.io/source-id: "{{ .adapter.name }}" - maestro.io/resource-type: "manifestwork" - maestro.io/priority: "normal" - - # Standard Kubernetes application labels - app.kubernetes.io/name: "aro-hcp-cluster" - app.kubernetes.io/instance: "{{ .clusterId }}" - app.kubernetes.io/version: "v1.0.0" - app.kubernetes.io/component: "infrastructure" - app.kubernetes.io/part-of: "hyperfleet" - app.kubernetes.io/managed-by: "cl-maestro" - app.kubernetes.io/created-by: "{{ .adapter.name }}" -{{ if .platformType }} - hyperfleet.io/platform-type: "{{ .platformType }}" -{{ end }} - - # Annotations for metadata and operational information - annotations: - # Tracking and lifecycle - hyperfleet.io/created-by: "cl-maestro-framework" - hyperfleet.io/managed-by: "{{ .adapter.name }}" - hyperfleet.io/generation: "{{ .generation }}" - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/cluster-name: "{{ .clusterName }}" - hyperfleet.io/deployment-time: "{{ .timestamp }}" - - # Maestro-specific annotations - maestro.io/applied-time: "{{ .timestamp }}" - maestro.io/source-adapter: "{{ .adapter.name }}" - - # Documentation - description: "Complete cluster setup including namespace, configuration, and RBAC" - -# ManifestWork specification -spec: - # ============================================================================ - # Workload - Contains the Kubernetes manifests to deploy - # ============================================================================ - workload: - # Kubernetes manifests array - injected by framework from business logic config - manifests: - - apiVersion: v1 - kind: Namespace - metadata: - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - labels: - app.kubernetes.io/component: adapter-task-config - app.kubernetes.io/instance: "{{ .adapter.name }}" - app.kubernetes.io/name: cl-maestro - app.kubernetes.io/transport: maestro - annotations: - hyperfleet.io/generation: "{{ .generation }}" - - apiVersion: v1 - kind: ConfigMap - data: - cluster_id: "{{ .clusterId }}" - cluster_name: "{{ .clusterName }}" -{{ if eq .platformType "gcp" }} - platform_tier: "cloud" -{{ else }} - platform_tier: "onprem" -{{ end }} -{{ range $i, $subnet := .subnets }} - subnet_{{ $subnet.id }}_name: "{{ $subnet.name }}" - subnet_{{ $subnet.id }}_cidr: "{{ $subnet.cidr }}" - subnet_{{ $subnet.id }}_role: "{{ $subnet.role }}" -{{ end }} - metadata: - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-configmap" - namespace: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - labels: - app.kubernetes.io/component: adapter-task-config - app.kubernetes.io/instance: "{{ .adapter.name }}" - app.kubernetes.io/name: cl-maestro - app.kubernetes.io/version: 1.0.0 - app.kubernetes.io/transport: maestro - annotations: - hyperfleet.io/generation: "{{ .generation }}" - - # ============================================================================ - # Delete Options - How resources should be removed - # ============================================================================ - deleteOption: - # Propagation policy for resource deletion - # - "Foreground": Wait for dependent resources to be deleted first - # - "Background": Delete immediately, let cluster handle dependents - # - "Orphan": Leave resources on cluster when ManifestWork is deleted - propagationPolicy: "Foreground" - - # Grace period for graceful deletion (seconds) - gracePeriodSeconds: 30 - - # ============================================================================ - # Manifest Configurations - Per-resource settings for update and feedback - # ============================================================================ - manifestConfigs: - - resourceIdentifier: - group: "" # Core API group (empty for v1 resources) - resource: "namespaces" # Resource type - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" # Specific resource name - updateStrategy: - type: "ServerSideApply" # Use server-side apply for namespaces - feedbackRules: - - type: "JSONPaths" # Use JSON path expressions for status feedback - jsonPaths: - - name: "phase" - path: ".status.phase" - # ======================================================================== - # Configuration for ConfigMap resources - # ======================================================================== - - resourceIdentifier: - group: "" # Core API group (empty for v1 resources) - resource: "configmaps" # Resource type - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-configmap" # Specific resource name - namespace: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - updateStrategy: - type: "ServerSideApply" # Use server-side apply for namespaces - serverSideApply: - fieldManager: "cl-maestro" # Field manager name for conflict resolution - force: false # Don't force conflicts (fail on conflicts) - feedbackRules: - - type: "JSONPaths" # Use JSON path expressions for status feedback - jsonPaths: - - name: "data" - path: ".data" - - name: "resourceVersion" - path: ".metadata.resourceVersion" diff --git a/testdata/adapter-configs/cl-maestro/values.yaml b/testdata/adapter-configs/cl-maestro/values.yaml deleted file mode 100644 index 246a6aa..0000000 --- a/testdata/adapter-configs/cl-maestro/values.yaml +++ /dev/null @@ -1,33 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-maestro/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-maestro/adapter-task-config.yaml - manifestwork.yaml: cl-maestro/adapter-task-resource-manifestwork.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - namespaces - - configmaps - - configmaps/status diff --git a/testdata/adapter-configs/cl-namespace/adapter-config.yaml b/testdata/adapter-configs/cl-namespace/adapter-config.yaml deleted file mode 100644 index 6578f95..0000000 --- a/testdata/adapter-configs/cl-namespace/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: cl-namespace - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml b/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml deleted file mode 100644 index fc519bd..0000000 --- a/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml +++ /dev/null @@ -1,189 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.id" - type: "string" - required: true - - name: "testRunId" - source: "env.TEST_RUN_ID" - type: "string" - required: false - default: "TEST_RUN_ID" - - name: "ci" - source: "env.CI" - type: "string" - required: false - default: "false" - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "clusterNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "clusterReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "validationCheck" - # Precondition passes if cluster is NOT Reconciled OR if cluster is Reconciled and stable for >300 seconds since last transition (enables self-healing) - expression: | - is_deleting || clusterNotReconciled || clusterReconciledTTL - -# Resources with valid K8s manifests -resources: - - name: "clusterNamespace" - transport: - client: "kubernetes" - manifest: - apiVersion: v1 - kind: Namespace - metadata: - name: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/cluster-name: "{{ .clusterName }}" - e2e.hyperfleet.io/test-run-id: "{{ .testRunId }}" - e2e.hyperfleet.io/ci: "{{ .ci }}" - e2e.hyperfleet.io/managed-by: "test-framework" - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" - discovery: - namespace: "*" # Cluster-scoped resource (Namespace) - by_selectors: - label_selector: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/cluster-name: "{{ .clusterName }}" - lifecycle: - delete: - propagationPolicy: Foreground - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -# This example contains multiple resources, we will only report on the conditions of the jobNamespace not to overcomplicate the example -post: - payloads: - - name: "clusterStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Job successfully created - - type: "Applied" - status: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "True" : "False" - reason: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" - ? "NamespaceCreated" - : "NamespacePending" - message: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" - ? "Namespace created successfully" - : "Namespace creation in progress" - # Available: Check job status conditions - - type: "Available" - status: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "True" : "False" - reason: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "NamespaceReady" : "NamespaceNotReady" - message: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "Namespace is active and ready" : "Namespace is not active and ready" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once namespace is confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?clusterNamespace.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?clusterNamespace.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?clusterNamespace.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for namespace to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - data: - namespace: - name: - expression: | - resources.?clusterNamespace.?metadata.?name.orValue("") - status: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .clusterStatusPayload }}" diff --git a/testdata/adapter-configs/cl-namespace/values.yaml b/testdata/adapter-configs/cl-namespace/values.yaml deleted file mode 100644 index a82320a..0000000 --- a/testdata/adapter-configs/cl-namespace/values.yaml +++ /dev/null @@ -1,38 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-namespace/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-namespace/adapter-task-config.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - namespaces - - serviceaccounts - - configmaps - - deployments - - roles - - rolebindings - - jobs - - jobs/status - - pods diff --git a/testdata/adapter-configs/np-configmap/adapter-config.yaml b/testdata/adapter-configs/np-configmap/adapter-config.yaml deleted file mode 100644 index dd0c593..0000000 --- a/testdata/adapter-configs/np-configmap/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: np-configmap - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/np-configmap/adapter-task-config.yaml b/testdata/adapter-configs/np-configmap/adapter-task-config.yaml deleted file mode 100644 index 0e56379..0000000 --- a/testdata/adapter-configs/np-configmap/adapter-task-config.yaml +++ /dev/null @@ -1,178 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.owner_references.id" - type: "string" - required: true - - name: "nodepoolId" - source: "event.id" - type: "string" - required: true - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "nodepoolStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/nodepools/{{ .nodepoolId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "nodepoolName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(nodepoolStatus.deleted_time)" - - name: "nodepoolNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "nodepoolReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "clusterAdapterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/statuses" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterNamespaceStatus" - field: "{.items[?(@.adapter=='cl-namespace')].data.namespace.status}" - - - name: "validationCheck" - # Precondition passes if deleting OR (namespace active AND (nodepool not reconciled OR reconciled TTL expired)) - expression: | - is_deleting || (clusterNamespaceStatus == "Active" && (nodepoolNotReconciled || nodepoolReconciledTTL)) - -# Resources with valid K8s manifests -resources: - - name: "nodepoolConfigMap" - transport: - client: "kubernetes" - manifest: - ref: "/etc/adapter/configmap.yaml" - discovery: - namespace: "{{ .clusterId }}" - by_selectors: - label_selector: - hyperfleet.io/resource-type: "configmap" - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/nodepool-id: "{{ .nodepoolId }}" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -post: - payloads: - - name: "nodepoolStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: ConfigMap successfully created - - type: "Applied" - status: - expression: | - has(resources.nodepoolConfigMap) ? "True" : "False" - reason: - expression: | - has(resources.nodepoolConfigMap) - ? "ConfigMapApplied" - : "ConfigMapPending" - message: - expression: | - has(resources.nodepoolConfigMap) - ? "nodepoolConfigMap manifest applied successfully" - : "nodepoolConfigMap is pending to be applied" - # Available: Check configmap exists - - type: "Available" - status: - expression: | - has(resources.nodepoolConfigMap) && has(resources.nodepoolConfigMap.data) ? "True" : "False" - reason: - expression: | - has(resources.nodepoolConfigMap) && has(resources.nodepoolConfigMap.data) - ? "ConfigMapReady" - : "ConfigMapNotReady" - message: - expression: | - has(resources.nodepoolConfigMap) && has(resources.nodepoolConfigMap.data) - ? "ConfigMap is available and contains data" - : "ConfigMap is not yet available" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once configmap is confirmed deleted during deletion - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?nodepoolConfigMap.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?nodepoolConfigMap.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?nodepoolConfigMap.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for configmap to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - post_actions: - - name: "reportNodepoolStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/nodepools/{{ .nodepoolId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .nodepoolStatusPayload }}" diff --git a/testdata/adapter-configs/np-configmap/adapter-task-resource-configmap.yaml b/testdata/adapter-configs/np-configmap/adapter-task-resource-configmap.yaml deleted file mode 100644 index 750344b..0000000 --- a/testdata/adapter-configs/np-configmap/adapter-task-resource-configmap.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Test ConfigMap template for nodepool -apiVersion: v1 -kind: ConfigMap -metadata: - name: "nodepool-config-{{ .nodepoolId }}" - namespace: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/nodepool-id: "{{ .nodepoolId }}" - hyperfleet.io/nodepool-name: "{{ .nodepoolName }}" - hyperfleet.io/resource-type: "configmap" - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" -data: - nodepool.id: "{{ .nodepoolId }}" - nodepool.name: "{{ .nodepoolName }}" - cluster.id: "{{ .clusterId }}" - test.key: "test-value" - environment: "e2e-testing" diff --git a/testdata/adapter-configs/np-configmap/values.yaml b/testdata/adapter-configs/np-configmap/values.yaml deleted file mode 100644 index 3e448c8..0000000 --- a/testdata/adapter-configs/np-configmap/values.yaml +++ /dev/null @@ -1,32 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: np-configmap/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: np-configmap/adapter-task-config.yaml - configmap.yaml: np-configmap/adapter-task-resource-configmap.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - configmaps - - configmaps/status